如何使用C#获取与单词对应的发音音素？_C#_Text To Speech_Microsoft Speech Api

如何使用C#获取与单词对应的发音音素？

如何使用C#获取与单词对应的发音音素？,c#,text-to-speech,microsoft-speech-api,C#,Text To Speech,Microsoft Speech Api,首先，我要说，我对C#编程非常熟悉。我正在开发一个应用程序，使用C#结合SAPI v5.4（）以编程方式修改Windows语音词典。到目前为止，一切都很好，但我需要更深入地了解合成（语音）字符串时如何解释字符串我的理解是，在SAPI中，5.4个单词被分解为几个部分，我成功地使用音素“训练”了单词的发音。我还知道我可以手动将单词添加到Windows语音识别词典中，提供语音记录，然后提取单词的发音（音素）……但这很麻烦。探索单词在默认情况下是如何合成的也很有用，即没有我的输入（比如合成器如何解释“

首先，我要说，我对C#编程非常熟悉。我正在开发一个应用程序，使用C#结合SAPI v5.4（）以编程方式修改Windows语音词典。到目前为止，一切都很好，但我需要更深入地了解合成（语音）字符串时如何解释字符串

我的理解是，在SAPI中，5.4个单词被分解为几个部分，我成功地使用音素“训练”了单词的发音。我还知道我可以手动将单词添加到Windows语音识别词典中，提供语音记录，然后提取单词的发音（音素）……但这很麻烦。探索单词在默认情况下是如何合成的也很有用，即没有我的输入（比如合成器如何解释“海豚”？）

从编码的角度来看，到目前为止，我得到的是：

using System;
using System.Speech.Synthesis;

namespace SpeechTest
{
    class Program
    {
        static void Main(string[] args)
        {
            // Set up the speech synthesizer
            SpeechSynthesizer synthesizer = new SpeechSynthesizer();
            synthesizer.Volume = 100;
            synthesizer.Rate = -2;

            // Configure the audio output 
            synthesizer.SetOutputToDefaultAudioDevice();

            // Initialize string to store word of interest (not in the speech dictionary)
            string myWord = "dolphins";

            // Speak the word of interest
            synthesizer.Speak(myWord);

            // Retrieve pronunciation of myWord
            string myPronunciation = // *some code here*

            Console.WriteLine("Press any key to exit...");
            Console.ReadLine();
        }
    }
}

多亏了令人惊讶的工作，我已经找到了如何确定给定字符串的IPA电话。现在，我只需要找出如何将IPA电话转换为SAPI符号，但这是一个单独的主题（请参阅，了解如何从文本字符串获取SAPI电话）

使用系统；
使用System.Collections.ObjectModel；
使用系统组件模型；
使用System.IO；
使用系统语音识别；
使用系统、语音、合成；
使用System.Windows.Forms；
名称空间演讲测试
{
班级计划
{
静态void Main（字符串[]参数）
{
string MyText=“dolphins”；//初始化字符串以存储感兴趣的单词
string my发音=GetPronactionFromText（MyText.Trim（））；//获取MyTe的IPA发音
MessageBox.Show（MyText+“=”+my发音）；//输出MyText和my发音
}
公共静态字符串音素；
公共静态字符串GetPronactionFromText（字符串MyWord）
{
//这是一个计算合成引擎使用的音素的技巧
//txt到wav
使用（MemoryStream audioStream=新MemoryStream（））
{
使用（SpeechSynthesizer synth=新SpeechSynthesizer（））
{
synth.SetOutputToWaveStream（音频流）；
PromptBuilder pb=新PromptBuilder（）；
//pb.AppendBreak（PromptBreak.extralsmall）；//如果这是大的或不存在的，则不会识别出“e”？
//合成语音（pb）；
synth.Speak（MyWord）；
//合成语音（pb）；
synth.setoutputtonll（）；
audioStream.Position=0；
//现在wav到txt（用于记录音素）
recoPhonemes=String.Empty；
GrammarBuilder gb=新的GrammarBuilder（MyWord）；
语法g=新语法（gb）；//要识别的硬字母是“g”和“e”
SpeechRecognitionEngine reco=新建SpeechRecognitionEngine（）；
reco.speechinvested+=新事件处理程序（reco_speechinvested）；
reco.SpeechRecognitionRejected+=新事件处理程序（reco_SpeechRecognitionRejected）；
reco.UnloadAllGrammars（）；//只使用一个单词的语法
记录加载语法（g）；
录制设置输入视频流（音频流）；
识别结果rr=记录识别（）；
reco.setInputOnUrl（）；
如果（rr！=null）
{
recoPhonemes=StringFromWordArray（rr.Words，WordType.发音）；
}
//Text=recophones；
返回音位；
}
}
}
公共静态字符串StringFromWordArray（只读集合词，WordType类型）
{
字符串文本=”；
foreach（已识别的单词或单词中的单位单词）
{
字符串wordText=“”；
if（type==WordType.Text | | type==WordType.Normalized）
{
wordText=word.Text；
}
else if（type==WordType.Lexical）
{
wordText=word.LexicalForm；
}
else if（type==WordType.发音）
{
wordText=单词。发音；
//MessageBox.Show（word.LexicalForm）；
}
其他的
{
抛出新的InvalidEnumArgumentException（String.Format（“[0}:不是有效输入”，type））；
}
//使用显示属性
if（（word.DisplayAttributes&DisplayAttributes.OneTrailingSpace）！=0）
{
wordText+=“”；
}
if（（word.DisplayAttributes和DisplayAttributes.TwoTrailingSpaces）！=0）
{
wordText+=“”；
}
if（（word.DisplayAttributes和DisplayAttributes.ConsumeLeadingSpace）！=0）
{
wordText=wordText.TrimStart（）；
}
if（（word.DisplayAttributes和DisplayAttributes.ZeroTrailingSpaces）！=0）
{
wordText=wordText.TrimEnd（）；
}
text+=wordText；
}
返回文本；
}
假设公共静态无效记录（对象发送者、假设演讲对象）
{
recoPhonemes=StringFromWordArray（例如，Result.Words，WordType.发音）；
}
public static void reco_speechrecognition rejected（对象发送方，SpeechRecognitionRejectedEventArgs e）
{
recoPhonemes=StringFromWordArray（例如，Result.Words，WordType.prouncia
using System;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.IO;
using System.Speech.Recognition;
using System.Speech.Synthesis;
using System.Windows.Forms;

namespace SpeechTest
{
    class Program
    {
        static void Main(string[] args)
        {
            string MyText = "dolphins"; // Initialze string for storing word (or words) of interest
            string MyPronunciation = GetPronunciationFromText(MyText.Trim()); // Get IPA pronunciations of MyTe
            MessageBox.Show(MyText + " = " + MyPronunciation); // Output MyText and MyPronunciation
        }

        public static string recoPhonemes;

        public static string GetPronunciationFromText(string MyWord)
        {
            //this is a trick to figure out phonemes used by synthesis engine

            //txt to wav
            using (MemoryStream audioStream = new MemoryStream())
            {
                using (SpeechSynthesizer synth = new SpeechSynthesizer())
                {
                    synth.SetOutputToWaveStream(audioStream);
                    PromptBuilder pb = new PromptBuilder();
                    //pb.AppendBreak(PromptBreak.ExtraSmall); //'e' wont be recognized if this is large, or non-existent?
                    //synth.Speak(pb);
                    synth.Speak(MyWord);
                    //synth.Speak(pb);
                    synth.SetOutputToNull();
                    audioStream.Position = 0;

                    //now wav to txt (for reco phonemes)
                    recoPhonemes = String.Empty;
                    GrammarBuilder gb = new GrammarBuilder(MyWord);
                    Grammar g = new Grammar(gb); //TODO the hard letters to recognize are 'g' and 'e'
                    SpeechRecognitionEngine reco = new SpeechRecognitionEngine();
                    reco.SpeechHypothesized += new EventHandler<SpeechHypothesizedEventArgs>(reco_SpeechHypothesized);
                    reco.SpeechRecognitionRejected += new EventHandler<SpeechRecognitionRejectedEventArgs>(reco_SpeechRecognitionRejected);
                    reco.UnloadAllGrammars(); //only use the one word grammar
                    reco.LoadGrammar(g);
                    reco.SetInputToWaveStream(audioStream);
                    RecognitionResult rr = reco.Recognize();
                    reco.SetInputToNull();
                    if (rr != null)
                    {
                        recoPhonemes = StringFromWordArray(rr.Words, WordType.Pronunciation);
                    }
                    //txtRecoPho.Text = recoPhonemes;
                    return recoPhonemes;
                }
            }
        }

        public static string StringFromWordArray(ReadOnlyCollection<RecognizedWordUnit> words, WordType type)
        {
            string text = "";
            foreach (RecognizedWordUnit word in words)
            {
                string wordText = "";
                if (type == WordType.Text || type == WordType.Normalized)
                {
                    wordText = word.Text;
                }
                else if (type == WordType.Lexical)
                {
                    wordText = word.LexicalForm;
                }
                else if (type == WordType.Pronunciation)
                {
                    wordText = word.Pronunciation;
                    //MessageBox.Show(word.LexicalForm);
                }
                else
                {
                    throw new InvalidEnumArgumentException(String.Format("[0}: is not a valid input", type));
                }
                //Use display attribute

                if ((word.DisplayAttributes & DisplayAttributes.OneTrailingSpace) != 0)
                {
                    wordText += " ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.TwoTrailingSpaces) != 0)
                {
                    wordText += "  ";
                }
                if ((word.DisplayAttributes & DisplayAttributes.ConsumeLeadingSpaces) != 0)
                {
                    wordText = wordText.TrimStart();
                }
                if ((word.DisplayAttributes & DisplayAttributes.ZeroTrailingSpaces) != 0)
                {
                    wordText = wordText.TrimEnd();
                }

                text += wordText;

            }
            return text;
        }

        public static void reco_SpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

        public static void reco_SpeechRecognitionRejected(object sender, SpeechRecognitionRejectedEventArgs e)
        {
            recoPhonemes = StringFromWordArray(e.Result.Words, WordType.Pronunciation);
        }

    }

    public enum WordType
    {
        Text,
        Normalized = Text,
        Lexical,
        Pronunciation
    }
}

// Credit for method of retrieving IPA pronunciation from a string goes to Casey Chesnut (http://www.mperfect.net/speechSamples/)