Problem Description: In this blog we are trying to enhance our web chatbot ui created in previous blog and add voice features.
Idea here is to send text message to Bot and receive only text messages but before sending convert user voice (speech) to text and again when we receive and message from bot server play it as audio.
Idea here is to send text message to Bot and receive only text messages but before sending convert user voice (speech) to text and again when we receive and message from bot server play it as audio.
There are few options available to convert voice and text. In this blog we are going to use
1. Speech to Text: webkitSpeechRecognition api of browser
2. Text to Speech: speechSynthesis api of browser
Lets take our previous chatbot UI created in blog http://sanjeev-technology.blogspot.com/2018/05/oracle-bot-cloud-ibcs-custom-ui.html and enhance it for voice support.
Now we can follow below steps
1. Add a button to initiate voice chatting
<oj-button id="start_button" on-oj-action="[[startVoiceChat]]">Start Voice Chat</oj-button>
Now add corresponding code in appcontroller.js
self.startVoiceChat = function(event){
listenUserVoice();
}
NOTE: I am using oracle jet so I use oj-button. Point is we have to call listenUserVoice method on button click.
2. Add speak method in app.js. This method will provide audio to text message. It will also callback once meassage reading is finished.
function speak(text, onendcallback){
var msg = new SpeechSynthesisUtterance();
msg.text = text;
if(onendcallback){
msg.onend = onendcallback;
}
window.speechSynthesis.speak(msg);
console.log(JSON.stringify(msg));;
}
3. Add listenUserVoice method also in app.js. This method reconizes user voice and converts it to text. It also calls bots sendmessage or triggerpostback depending on situation.
function listenUserVoice(){
var recognition = new webkitSpeechRecognition();
recognition.continuous = false;
recognition.interimResults = true;
recognition.onstart = function() {
recognizing = true;
};
recognition.onend = function() {
recognizing = false;
};
recognition.onresult = function(event) {
console.log("recognition-onresult" + event);
console.log(event);
var interim_transcript = '';
if (typeof(event.results) == 'undefined') {
recognition.onend = null;
recognition.stop();
upgrade();
return;
}
for (var i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
final_transcript += event.results[i][0].transcript;
} else {
interim_transcript += event.results[i][0].transcript;
}
}
if(final_transcript){
console.log("User said: " + final_transcript);
var totalMsg = Bots.getConversation().messages.length;
if(Bots.getConversation().messages[totalMsg-1] && Bots.getConversation().messages[totalMsg-1].actions){
var actions = Bots.getConversation().messages[totalMsg-1].actions.filter(function(action){
return action.text === final_transcript; //Improve it by performing case insensitive matching
})
if(actions && actions[0]){
Bots.triggerPostback(actions[0]._id).then(function() {
console.log("postback");
});
}
}
else{
Bots.sendMessage(final_transcript).then(function() {
console.log("normal message");
});
}
if(final_transcript){
recognizing = false;
}
}
}
if (recognizing) {
recognition.stop();
return;
}
final_transcript = '';
recognition.start();
}
4. Now modify displayServerMessage method to call listenUserVoice method so that system automatically starts taking user message after providing any information.
function displayServerMessage(message) {
console.log(message);
var conversationElement = document.getElementById('conversation');
var messageElement = document.createElement('li');
var text = 'Server says "' + message.text + '"';
messageElement.innerText = text;
if(message.actions && message.actions.length > 0){
var wrapperElement = document.createElement('div');
for(var i = 0; i < message.actions.length; i++){
var action = message.actions[i];
var btnElement = createButtonElement(action);
wrapperElement.appendChild(btnElement);
}
messageElement.appendChild(wrapperElement);
}
conversationElement.appendChild(messageElement);
speak(text, listenUserVoice);
}
Thats all.