Android 通过WebView WebRTC从麦克风传输语音时的语音识别
我正在编写一个应用程序,它有一个WebView,通过WebRTC处理语音呼叫。麦克风工作正常,因为我已授予WebView权限Android 通过WebView WebRTC从麦克风传输语音时的语音识别,android,webview,webrtc,speech-recognition,microphone,Android,Webview,Webrtc,Speech Recognition,Microphone,我正在编写一个应用程序,它有一个WebView,通过WebRTC处理语音呼叫。麦克风工作正常,因为我已授予WebView权限 webView.setWebChromeClient(new WebChromeClient() { @Override public void onPermissionRequest(final PermissionRequest request) { request.grant(request.getResourc
webView.setWebChromeClient(new WebChromeClient() {
@Override
public void onPermissionRequest(final PermissionRequest request) {
request.grant(request.getResources());
}
后来我决定添加SpeechRecognizer,这样我就可以在WebRTC通话中识别出我在说什么。我试图让语音识别在同一个活动中工作,后来我在一个单独的服务中工作,但不幸的是,它不允许同时工作。麦克风被WebView占用,SpeechRecognitor无法获得任何声音(RMS始终为-2.12)。或者,如果我在通过WebView打电话之前尝试运行服务,我打电话给的人根本听不到我的声音(SpeechRecognitor占用麦克风,WebView什么也听不到)。
我希望找到任何解决办法,如果它存在的话。我不是一个iOS开发者,但我听说,这在iPhone上是可能的,所以令人惊讶的是,这在Android设备上是不可能的。
我的语音识别服务代码
公共类RecognitionService扩展服务实现RecognitionListener{
private String LOG_TAG = "RecognitionService";
private SpeechRecognizer speech = null;
private Intent recognizerIntent;
public RecognitionService() {
}
@Override
public IBinder onBind(Intent intent) {
// TODO: Return the communication channel to the service.
startRecognition();
return null;
}
@Override
public void onCreate() {
Log.i("Test", "RecognitionService: onCreate");
startRecognition();
}
private void startRecognition() {
speech = SpeechRecognizer.createSpeechRecognizer(this);
speech.setRecognitionListener(this);
recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_PREFERENCE,
"ru-RU");
recognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
getPackageName());
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
speech.startListening(recognizerIntent);
}
@Override
public void onBeginningOfSpeech() {
Log.i(LOG_TAG, "onBeginningOfSpeech");
}
@Override
public void onBufferReceived(byte[] buffer) {
Log.i(LOG_TAG, "onBufferReceived: " + buffer);
}
@Override
public void onEndOfSpeech() {
Log.i(LOG_TAG, "onEndOfSpeech");
}
@Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
Log.d(LOG_TAG, "FAILED " + errorMessage);
speech.destroy();
startRecognition();
}
@Override
public void onEvent(int arg0, Bundle arg1) {
Log.i(LOG_TAG, "onEvent");
}
@Override
public void onPartialResults(Bundle arg0) {
Log.i(LOG_TAG, "onPartialResults");
}
@Override
public void onReadyForSpeech(Bundle arg0) {
Log.i(LOG_TAG, "onReadyForSpeech");
}
@Override
public void onResults(Bundle results) {
Log.i(LOG_TAG, "onResults");
ArrayList<String> matches = results
.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String text = "";
for (String result : matches)
text += result + "\n";
Toast.makeText(getApplicationContext(),text,Toast.LENGTH_SHORT).show();
speech.destroy();
startRecognition();
}
public static String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio recording error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client side error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network timeout";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "error from server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No speech input";
break;
default:
message = "Didn't understand, please try again.";
break;
}
return message;
}
@Override
public void onRmsChanged(float rmsdB) {
Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
}
}
private String LOG\u TAG=“RecognitionService”;
专用语音识别器语音=null;
私人意图识别人意图;
公众认可服务(){
}
@凌驾
公共IBinder onBind(意向){
//TODO:将通信通道返回到服务。
startRecognition();
返回null;
}
@凌驾
public void onCreate(){
Log.i(“测试”,“识别服务:onCreate”);
startRecognition();
}
私有无效startRecognition(){
SpeechRecognizer.createSpeechRecognizer(此);
speech.setRecognitionListener(此);
recognizerIntent=新意图(recognizerIntent.ACTION\u recognizer\u SPEECH);
recognizerIntent.putExtra(recognizerIntent.EXTRA_语言_首选项,
“ru-ru”);
recognizerIntent.putExtra(recognizerIntent.EXTRA_调用_包,
getPackageName());
recognizerIntent.putExtra(recognizerIntent.EXTRA_语言_模型,
识别者意图、语言、模型、网络搜索);
recognizerIntent.putExtra(recognizerIntent.EXTRA_最大_结果,3);
演讲。听讲(识别意图);
}
@凌驾
开始时的公共无效fSpeech(){
Log.i(Log_标签,“onbeginingofspeech”);
}
@凌驾
已接收公共无效onBufferReceived(字节[]缓冲区){
Log.i(Log_标签,“onBufferReceived:”+buffer);
}
@凌驾
公共无效onEndOfSpeech(){
Log.i(Log_标签,“onEndOfSpeech”);
}
@凌驾
公共无效onError(内部错误代码){
字符串errorMessage=getErrorText(errorCode);
Log.d(日志标签,“失败”+错误消息);
演讲。破坏();
startRecognition();
}
@凌驾
public void onEvent(int arg0,Bundle arg1){
Log.i(Log_标签,“onEvent”);
}
@凌驾
public void on PartialResults(捆绑包arg0){
Log.i(Log_标签,“onPartialResults”);
}
@凌驾
ReadyForSpeech上的公共无效(捆绑arg0){
Log.i(Log_标签,“onReadyForSpeech”);
}
@凌驾
公共结果(捆绑结果){
Log.i(Log_标签,“onResults”);
ArrayList匹配=结果
.getStringArrayList(SpeechRecognitor.RESULTS_RECOGNITION);
字符串文本=”;
for(字符串结果:匹配项)
文本+=结果+“\n”;
Toast.makeText(getApplicationContext(),text,Toast.LENGTH_SHORT).show();
演讲。破坏();
startRecognition();
}
公共静态字符串getErrorText(int errorCode){
字符串消息;
开关(错误代码){
case SpeechRecognizer.ERROR\u音频:
message=“音频录制错误”;
打破
案例SpeechRecognizer.ERROR\u客户端:
message=“客户端错误”;
打破
案例SpeechRecognizer.ERROR\u权限不足\u:
message=“权限不足”;
打破
案例SpeechRecognizer.ERROR\u网络:
message=“网络错误”;
打破
案例语音识别器。错误\网络\超时:
message=“网络超时”;
打破
case SpeechRecognizer.ERROR\u NO\u匹配:
message=“不匹配”;
打破
案例SpeechRecognizer.ERROR\u RECOGNIZER\u BUSY:
message=“识别服务正忙”;
打破
案例SpeechRecognizer.ERROR\u服务器:
message=“来自服务器的错误”;
打破
case SpeechRecognizer.ERROR\u SPEECH\u超时:
message=“无语音输入”;
打破
违约:
message=“不明白,请重试。”;
打破
}
返回消息;
}
@凌驾
在RMSCHANGED上的公共无效(浮动rmsdB){
Log.i(Log_标签,onRmsChanged:+rmsdB);
}
}
如果您可以公开麦克风原始音频所在的缓冲区,则可以分割该缓冲区。。和buffer1->识别器。。buffer2->rtc stream是可以进行分割的地方。如果您可以公开麦克风原始音频所在的缓冲区,则可以分割该缓冲区。。和buffer1->识别器。。buffer2->rtc流是可以进行拆分的地方。