C# 沃森';s语音到文本统一SDK,如何指定关键字?

C# 沃森';s语音到文本统一SDK,如何指定关键字?,c#,unity3d,speech-recognition,ibm-watson,watson,C#,Unity3d,Speech Recognition,Ibm Watson,Watson,我试图在Watson的Speech to TextUnity SDK中指定关键字,但我不确定如何做到这一点 详细信息页面没有显示示例(请参见此处:) 和其他论坛帖子是为Java应用程序编写的(请参见此处:) 我曾尝试在“Recognize”函数中创建的RecognizeRequest类中对这些值进行编码,但没有成功: **编辑-从未调用此函数--** 但是返回的SpeechRecognitionEvent结果值不包含任何关键字\u result。这是我的目标。我试图像这样查看keywords\u

我试图在Watson的Speech to Text
Unity SDK
中指定关键字,但我不确定如何做到这一点

详细信息页面没有显示示例(请参见此处:)

和其他论坛帖子是为Java应用程序编写的(请参见此处:)

我曾尝试在“Recognize”函数中创建的
RecognizeRequest
类中对这些值进行编码,但没有成功:

**编辑-从未调用此函数--**

但是返回的
SpeechRecognitionEvent
结果值不包含任何
关键字\u result
。这是我的目标。我试图像这样查看keywords\u result对象中每个关键字的置信度,但是
keywords\u result
对象返回为
null

private void OnRecognize(SpeechRecognitionEvent result) {
    Debug.Log("Recognizing!");
    m_ResultOutput.SendData(new SpeechToTextData(result));

    if (result != null && result.results.Length > 0) {
        if (m_Transcript != null)
            m_Transcript.text = "";

        foreach (var res in result.results) {
            //the res.keywords_result comes back as null
            foreach (var keyword in res.keywords_result.keyword) {
                string text = keyword.normalized_text;
                float confidence = keyword.confidence;
                Debug.Log(text + ": " + confidence);                                            
            }
        }
    }
}
是否有人在Unity或C#中使用Watson的Speech-To-Text SDK成功地实现了关键字可信度评估?欢迎提出任何意见和建议


PS这是我的第一篇帖子:)

结果我需要在“SendStart”函数中指定关键字,如下所示:

private void SendStart() {
        if (m_ListenSocket == null)
            throw new WatsonException("SendStart() called with null connector.");

        Dictionary<string, object> start = new Dictionary<string, object>();
        start["action"] = "start";
        start["content-type"] = "audio/l16;rate=" + m_RecordingHZ.ToString() + ";channels=1;";
        start["continuous"] = EnableContinousRecognition;
        start["max_alternatives"] = m_MaxAlternatives;
        start["interim_results"] = EnableInterimResults;
        start["word_confidence"] = m_WordConfidence;
        start["timestamps"] = m_Timestamps;

        //specify keywords here
        start["keywords"] = keywordsToCheck.ToArray();
        start["keywords_threshold"] = 0.05;
        //end additions here 

        m_ListenSocket.Send(new WSConnector.TextMessage(Json.Serialize(start)));
        m_LastStartSent = DateTime.Now;
    }
注意,使用关键字结果置信值比进行一些硬编码检查以查看单词是否与您的关键字匹配,然后使用置信值更有价值。检查关键字_结果时,置信值会高得多。关键字[]。置信值,因为它已经在检查这些单词。这就是完成此过程并解析SpeechRecognitionEvent结果值以正确包含关键字\u结果值的动力


出于某种背景,我正在为患有诵读困难的儿童创建一个节奏游戏,让他们学习构词法,所以想想《吉他英雄与芝麻街》吧。

这一定要和沃森一起完成吗?如果没有,那么看。我认为你需要尝试降低阈值。尝试0.1而不是0.2,甚至0.00001。美国的语音文字转换使用Watson来工作。请参见评论部分:@NikolayShmyrev I也降低了阈值。这不是问题所在。请阅读这篇文章的要求。看到解决方案了吗below@GermanAttanasio完美的我提交了一个拉车请求,泰姬陵在早些时候对我下面的回答发表了评论,但它似乎已经失踪了。我还在github页面上打开了一个问题!我很高兴能贡献自己的力量!这是一个很棒的服务:D
private void SendStart() {
        if (m_ListenSocket == null)
            throw new WatsonException("SendStart() called with null connector.");

        Dictionary<string, object> start = new Dictionary<string, object>();
        start["action"] = "start";
        start["content-type"] = "audio/l16;rate=" + m_RecordingHZ.ToString() + ";channels=1;";
        start["continuous"] = EnableContinousRecognition;
        start["max_alternatives"] = m_MaxAlternatives;
        start["interim_results"] = EnableInterimResults;
        start["word_confidence"] = m_WordConfidence;
        start["timestamps"] = m_Timestamps;

        //specify keywords here
        start["keywords"] = keywordsToCheck.ToArray();
        start["keywords_threshold"] = 0.05;
        //end additions here 

        m_ListenSocket.Send(new WSConnector.TextMessage(Json.Serialize(start)));
        m_LastStartSent = DateTime.Now;
    }
private SpeechRecognitionEvent ParseRecognizeResponse(IDictionary resp){

        if (resp == null)
            return null;


        List<SpeechRecognitionResult> results = new List<SpeechRecognitionResult>();
        IList iresults = resp["results"] as IList;
        if (iresults == null)
            return null;

        foreach (var r in iresults)
        {
            IDictionary iresult = r as IDictionary;
            if (iresults == null)
                continue;

            SpeechRecognitionResult result = new SpeechRecognitionResult();

            //added this section, starting here
            IDictionary iKeywords_result = iresult["keywords_result"] as IDictionary;
            result.keywords_result = new KeywordResults();
            List<KeywordResult> keywordResults = new List<KeywordResult>();
            foreach (string key in keywordsToCheck) {
                if (iKeywords_result[key] != null) {
                    IList keyword_Results = iKeywords_result[key] as IList;
                    if (keyword_Results == null) {
                        continue;
                    }
                    foreach (var res in keyword_Results) {
                        IDictionary kw_resultDic = res as IDictionary;
                        KeywordResult keyword_Result = new KeywordResult();
                        keyword_Result.confidence = (double)kw_resultDic["confidence"];
                        keyword_Result.end_time = (double)kw_resultDic["end_time"];
                        keyword_Result.start_time = (double)kw_resultDic["start_time"];
                        keyword_Result.normalized_text = (string)kw_resultDic["normalized_text"];
                        keywordResults.Add(keyword_Result);
                    }
                }
            }
            result.keywords_result.keyword = keywordResults.ToArray();                   
            //ends here

            result.final = (bool)iresult["final"];

            IList ialternatives = iresult["alternatives"] as IList;
            if (ialternatives == null)
                continue;

            List<SpeechRecognitionAlternative> alternatives = new List<SpeechRecognitionAlternative>();
            foreach (var a in ialternatives)
            {
                IDictionary ialternative = a as IDictionary;
                if (ialternative == null)
                    continue;

                SpeechRecognitionAlternative alternative = new SpeechRecognitionAlternative();
                alternative.transcript = (string)ialternative["transcript"];
                if (ialternative.Contains("confidence"))
                    alternative.confidence = (double)ialternative["confidence"];

                if (ialternative.Contains("timestamps"))
                {
                    IList itimestamps = ialternative["timestamps"] as IList;

                    TimeStamp[] timestamps = new TimeStamp[itimestamps.Count];
                    for (int i = 0; i < itimestamps.Count; ++i)
                    {
                        IList itimestamp = itimestamps[i] as IList;
                        if (itimestamp == null)
                            continue;

                        TimeStamp ts = new TimeStamp();
                        ts.Word = (string)itimestamp[0];
                        ts.Start = (double)itimestamp[1];
                        ts.End = (double)itimestamp[2];
                        timestamps[i] = ts;
                    }

                    alternative.Timestamps = timestamps;
                }
                if (ialternative.Contains("word_confidence"))
                {
                    IList iconfidence = ialternative["word_confidence"] as IList;

                    WordConfidence[] confidence = new WordConfidence[iconfidence.Count];
                    for (int i = 0; i < iconfidence.Count; ++i)
                    {
                        IList iwordconf = iconfidence[i] as IList;
                        if (iwordconf == null)
                            continue;

                        WordConfidence wc = new WordConfidence();
                        wc.Word = (string)iwordconf[0];
                        wc.Confidence = (double)iwordconf[1];
                        confidence[i] = wc;
                    }

                    alternative.WordConfidence = confidence;
                }

                alternatives.Add(alternative);
            }
            result.alternatives = alternatives.ToArray();
            results.Add(result);
        }

        return new SpeechRecognitionEvent(results.ToArray());                        
    }
private void OnRecognize(SpeechRecognitionEvent result) {
    //Debug.Log("Recognizing!");
    m_ResultOutput.SendData(new SpeechToTextData(result));

    if (result != null && result.results.Length > 0) {
        if (m_Transcript != null)
            m_Transcript.text = "";

        foreach (var res in result.results) {
            //start keyword recognition changes here
            if (res.keywords_result != null) {
                if (res.keywords_result.keyword != null) {
                    foreach (var keyword in res.keywords_result.keyword) {
                        m_Transcript.text += string.Format("{0} ({1}, {2:0.00})\n",
                            keyword.normalized_text, res.final ? "Final" : "Interim", keyword.confidence);
                    }
                }
            }
            //end here                
        }
    }
}