如何使用C#获取与单词对应的发音音素?

如何使用C#获取与单词对应的发音音素?,c#,text-to-speech,microsoft-speech-api,C#,Text To Speech,Microsoft Speech Api,首先,我要说,我对C#编程非常熟悉。我正在开发一个应用程序,使用C#结合SAPI v5.4()以编程方式修改Windows语音词典。到目前为止,一切都很好,但我需要更深入地了解合成(语音)字符串时如何解释字符串 我的理解是,在SAPI中,5.4个单词被分解为几个部分,我成功地使用音素“训练”了单词的发音。我还知道我可以手动将单词添加到Windows语音识别词典中,提供语音记录,然后提取单词的发音(音素)……但这很麻烦。探索单词在默认情况下是如何合成的也很有用,即没有我的输入(比如合成器如何解释“

首先,我要说,我对C#编程非常熟悉。我正在开发一个应用程序,使用C#结合SAPI v5.4()以编程方式修改Windows语音词典。到目前为止,一切都很好,但我需要更深入地了解合成(语音)字符串时如何解释字符串

我的理解是,在SAPI中,5.4个单词被分解为几个部分,我成功地使用音素“训练”了单词的发音。我还知道我可以手动将单词添加到Windows语音识别词典中,提供语音记录,然后提取单词的发音(音素)……但这很麻烦。探索单词在默认情况下是如何合成的也很有用,即没有我的输入(比如合成器如何解释“海豚”?)

从编码的角度来看,到目前为止,我得到的是:

using System;
using System.Speech.Synthesis;

namespace SpeechTest
{
    class Program
    {
        static void Main(string[] args)
        {
            // Set up the speech synthesizer
            SpeechSynthesizer synthesizer = new SpeechSynthesizer();
            synthesizer.Volume = 100;
            synthesizer.Rate = -2;

            // Configure the audio output 
            synthesizer.SetOutputToDefaultAudioDevice();

            // Initialize string to store word of interest (not in the speech dictionary)
            string myWord = "dolphins";

            // Speak the word of interest
            synthesizer.Speak(myWord);

            // Retrieve pronunciation of myWord
            string myPronunciation = // *some code here*

            Console.WriteLine("Press any key to exit...");
            Console.ReadLine();
        }
    }
}

多亏了令人惊讶的工作,我已经找到了如何确定给定字符串的IPA电话。现在,我只需要找出如何将IPA电话转换为SAPI符号,但这是一个单独的主题(请参阅,了解如何从文本字符串获取SAPI电话)

使用系统;
使用System.Collections.ObjectModel;
使用系统组件模型;
使用System.IO;
使用系统语音识别;
使用系统、语音、合成;
使用System.Windows.Forms;
名称空间演讲测试
{
班级计划
{
静态void Main(字符串[]参数)
{
string MyText=“dolphins”;//初始化字符串以存储感兴趣的单词
string my发音=GetPronactionFromText(MyText.Trim());//获取MyTe的IPA发音
MessageBox.Show(MyText+“=”+my发音);//输出MyText和my发音
}
公共静态字符串音素;
公共静态字符串GetPronactionFromText(字符串MyWord)
{
//这是一个计算合成引擎使用的音素的技巧
//txt到wav
使用(MemoryStream audioStream=新MemoryStream())
{
使用(SpeechSynthesizer synth=新SpeechSynthesizer())
{
synth.SetOutputToWaveStream(音频流);
PromptBuilder pb=新PromptBuilder();
//pb.AppendBreak(PromptBreak.extralsmall);//如果这是大的或不存在的,则不会识别出“e”?
//合成语音(pb);
synth.Speak(MyWord);
//合成语音(pb);
synth.setoutputtonll();
audioStream.Position=0;
//现在wav到txt(用于记录音素)
recoPhonemes=String.Empty;
GrammarBuilder gb=新的GrammarBuilder(MyWord);
语法g=新语法(gb);//要识别的硬字母是“g”和“e”
SpeechRecognitionEngine reco=新建SpeechRecognitionEngine();
reco.speechinvested+=新事件处理程序(reco_speechinvested);
reco.SpeechRecognitionRejected+=新事件处理程序(reco_SpeechRecognitionRejected);
reco.UnloadAllGrammars();//只使用一个单词的语法
记录加载语法(g);
录制设置输入视频流(音频流);
识别结果rr=记录识别();
reco.setInputOnUrl();
如果(rr!=null)
{
recoPhonemes=StringFromWordArray(rr.Words,WordType.发音);
}
//Text=recophones;
返回音位;
}
}
}
公共静态字符串StringFromWordArray(只读集合词,WordType类型)
{
字符串文本=”;
foreach(已识别的单词或单词中的单位单词)
{
字符串wordText=“”;
if(type==WordType.Text | | type==WordType.Normalized)
{
wordText=word.Text;
}
else if(type==WordType.Lexical)
{
wordText=word.LexicalForm;
}
else if(type==WordType.发音)
{
wordText=单词。发音;
//MessageBox.Show(word.LexicalForm);
}
其他的
{
抛出新的InvalidEnumArgumentException(String.Format(“[0}:不是有效输入”,type));
}
//使用显示属性
if((word.DisplayAttributes&DisplayAttributes.OneTrailingSpace)!=0)
{
wordText+=“”;
}
if((word.DisplayAttributes和DisplayAttributes.TwoTrailingSpaces)!=0)
{
wordText+=“”;
}
if((word.DisplayAttributes和DisplayAttributes.ConsumeLeadingSpace)!=0)
{
wordText=wordText.TrimStart();
}
if((word.DisplayAttributes和DisplayAttributes.ZeroTrailingSpaces)!=0)
{
wordText=wordText.TrimEnd();
}
text+=wordText;
}
返回文本;
}
假设公共静态无效记录(对象发送者、假设演讲对象)
{
recoPhonemes=StringFromWordArray(例如,Result.Words,WordType.发音);
}
public static void reco_speechrecognition rejected(对象发送方,SpeechRecognitionRejectedEventArgs e)
{
recoPhonemes=StringFromWordArray(例如,Result.Words,WordType.prouncia
using System;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.IO;
using System.Speech.Recognition;
using System.Speech.Synthesis;
using System.Windows.Forms;

namespace SpeechTest
{
    class Program
    {
        static void Main(string[] args)
        {
            string MyText = "dolphins"; // Initialze string for storing word (or words) of interest
            string MyPronunciation = GetPronunciationFromText(MyText.Trim()); // Get IPA pronunciations of MyTe
            MessageBox.Show(MyText + " = " + MyPronunciation); // Output MyText and MyPronunciation
        }

        public static string recoPhonemes;

        public static string GetPronunciationFromText(string MyWord)
        {
            //this is a trick to figure out phonemes used by synthesis engine

            //txt to wav
            using (MemoryStream audioStream = new MemoryStream())
            {
                using (SpeechSynthesizer synth = new SpeechSynthesizer())
                {
                    synth.SetOutputToWaveStream(audioStream);
                    PromptBuilder pb = new PromptBuilder();
                    //pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?
                    //synth.Speak(pb);
                    synth.Speak(MyWord);
                    //synth.Speak(pb);
                    synth.SetOutputToNull();
                    audioStream.Position = 0;

                    //now wav to txt (for reco phonemes)
                    recoPhonemes = String.Empty;
                    GrammarBuilder gb = new GrammarBuilder(MyWord);
                    Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'
                    SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
                    reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
                    reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
                    reco.UnloadAllGrammars(); //only use the one word grammar
                    reco.LoadGrammar(g);
                    reco.SetInputToWaveStream(audioStream);
                    RecognitionResult rr = reco.Recognize();
                    reco.SetInputToNull();
                    if (rr != null)
                    {
                        recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
                    }
                    //txtRecoPho.Text = recoPhonemes;
                    return recoPhonemes;
                }
            }
        }

        public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
        {
            string text = "";
            foreach (RecognizedWordUnit word in words)
            {
                string wordText = "";
                if (type == WordType.Text || type == WordType.Normalized)
                {
                    wordText = word.Text;
                }
                else if (type == WordType.Lexical)
                {
                    wordText = word.LexicalForm;
                }
                else if (type == WordType.Pronunciation)
                {
                    wordText = word.Pronunciation;
                    //MessageBox.Show(word.LexicalForm);
                }
                else
                {
                    throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
                }
                //Use display attribute

                if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
                {
                    wordText += " ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
                {
                    wordText += "  ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
                {
                    wordText = wordText.TrimStart();
                }
                if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
                {
                    wordText = wordText.TrimEnd();
                }

                text += wordText;

            }
            return text;
        }

        public static void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

        public static void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

    }

    public enum WordType
    {
        Text,
        Normalized = Text,
        Lexical,
        Pronunciation
    }
}

// Credit for method of retrieving IPA pronunciation from a string goes to Casey Chesnut (http://www.mperfect.net/speechSamples/)