C# Microsoft认知语音转换为文本-不转换整个.wav文件_C#_Speech To Text_Microsoft Cognitive_Azure Cognitive Services

C# Microsoft认知语音转换为文本-不转换整个.wav文件

C# Microsoft认知语音转换为文本-不转换整个.wav文件,c#,speech-to-text,microsoft-cognitive,azure-cognitive-services,C#,Speech To Text,Microsoft Cognitive,Azure Cognitive Services,当试图转换个人.wav文件时，只有一小部分语音被转换为文本，每次转换都会在完全相同的位置停止。如果有文件大小限制（我的文件是80MB），或者是因为定价层（免费），我在文档中找不到任何内容。有人知道为什么在我的例子中，三句话之后转换停止了吗？有人能给我指点吗来自Microsoft网站的示例代码： class Program { static async Task Main() { await RecognizeSpeechA

当试图转换个人.wav文件时，只有一小部分语音被转换为文本，每次转换都会在完全相同的位置停止。如果有文件大小限制（我的文件是80MB），或者是因为定价层（免费），我在文档中找不到任何内容。有人知道为什么在我的例子中，三句话之后转换停止了吗？有人能给我指点吗

来自Microsoft网站的示例代码：

    class Program
    {
        static async Task Main()
        {
            await RecognizeSpeechAsync();
        }

        static async Task RecognizeSpeechAsync()
        {
            var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

            using (var audioInput = AudioConfig.FromWavFileInput(@"FilePath\MyWav.wav"))
            using (var recognizer = new SpeechRecognizer(config, audioInput))
            {
                Console.WriteLine("Recognizing first result...");
                var result = await recognizer.RecognizeOnceAsync();

                switch (result.Reason)
                {
                    case ResultReason.RecognizedSpeech:
                        Console.WriteLine($"We recognized: {result.Text}");
                        break;
                    case ResultReason.NoMatch:
                        Console.WriteLine($"NOMATCH: Speech could not be recognized.");
                        break;
                    case ResultReason.Canceled:
                        var cancellation = CancellationDetails.FromResult(result);
                        Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

                        if (cancellation.Reason == CancellationReason.Error)
                        {
                            Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
                            Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
                            Console.WriteLine($"CANCELED: Did you update the subscription info?");
                        }
                        break;
                }
            }
        }
    }

编辑我已经将代码示例粘贴到通过搜索引擎搜索的代码下面，以防原始代码被更改或删除。对于一个80MB的.wav文件，代码运行大约需要20分钟

“主要”代码：

//利用音频流进行语音识别
公共静态异步任务识别WithPullAudioStreamAsync（）
{
//使用指定的订阅密钥和服务区域创建语音配置的实例。
//替换为您自己的订阅密钥和服务区域（例如，“westus”）。
var config=SpeechConfig.FromSubscription（“YourSubscriptionKey”、“YourServiceRegion”）；
StringBuilder sb=new StringBuilder（）；//记住System.Text
var stopRecognition=new TaskCompletionSource（）；
//从wav文件创建音频流。
//替换为您自己的音频文件名。
使用（var audioInput=Helper.OpenWavFile（@“whatstheweatherlike.wav”））
{
//使用音频流输入创建语音识别器。
使用（var识别器=新的语音识别器（配置，音频输入））
{
//订阅活动。
识别器。识别+=（s，e）=>
{
//你可以不注释下面的行，但是如果你有一个像我这样的文件（80MB），你的控制台窗口会变得疯狂
//WriteLine（$“识别：Text={e.Result.Text}”）；
某人追加（如结果文本）；
};
识别器。已识别+=（s，e）=>
{
if（e.Result.Reason==ResultReason.RecognizedSpeech）
{
WriteLine（$“已识别：Text={e.Result.Text}”）；
AppendAllText（“test.txt”，e.Result.Text）；
}
else if（e.Result.Reason==ResultReason.NoMatch）
{
Console.WriteLine（$“无法识别NOMATCH:语音”）；
}
};
识别器。已取消+=（s，e）=>
{
WriteLine（$“已取消：原因={e.Reason}”）；
如果（e.Reason==CancellationReason.Error）
{
WriteLine（$“已取消：ErrorCode={e.ErrorCode}”）；
WriteLine（$“已取消：ErrorDetails={e.ErrorDetails}”）；
Console.WriteLine（$“已取消：是否更新了订阅信息？”）；
}
停止识别。TrySetResult（0）；
};
recognizer.SessionStarted+=（s，e）=>
{
Console.WriteLine（“\n会话启动事件”）；
};
recognizer.SessionStopped+=（s，e）=>
{
AppendAllText（“test.txt”，e.Result.Text）；
Console.WriteLine（“\n会话停止事件”）；
Console.WriteLine（“\n停止识别”）；
停止识别。TrySetResult（0）；
};
//启动连续识别。使用StopContinuousRecognitionAsync（）停止识别。
等待识别器。StartContinuousRecognitionAsync（）。配置等待（false）；
//等待完成。
//使用Task.WaitAny使任务保持根目录。
Task.WaitAny（新[]{stopRecognition.Task}）；
//停止识别。
等待识别器。StopContinuousRecognitionAsync（）。配置等待（false）；
}
}
}

助手类：

using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System;
using System.Diagnostics;
using System.IO;

namespace MicrosoftSpeechSDKSamples
{
    public class Helper
    {
        public static AudioConfig OpenWavFile(string filename)
        {
            BinaryReader reader = new BinaryReader(File.OpenRead(filename));
            return OpenWavFile(reader);
        }

        public static AudioConfig OpenWavFile(BinaryReader reader)
        {
            AudioStreamFormat format = readWaveHeader(reader);
            return AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format);
        }

        public static BinaryAudioStreamReader CreateWavReader(string filename)
        {
            BinaryReader reader = new BinaryReader(File.OpenRead(filename));
            // read the wave header so that it won't get into the in the following readings
            AudioStreamFormat format = readWaveHeader(reader);
            return new BinaryAudioStreamReader(reader);
        }

        public static AudioStreamFormat readWaveHeader(BinaryReader reader)
        {
            // Tag "RIFF"
            char[] data = new char[4];
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'R') && (data[1] == 'I') && (data[2] == 'F') && (data[3] == 'F'), "Wrong wav header");

            // Chunk size
            long fileSize = reader.ReadInt32();

            // Subchunk, Wave Header
            // Subchunk, Format
            // Tag: "WAVE"
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'W') && (data[1] == 'A') && (data[2] == 'V') && (data[3] == 'E'), "Wrong wav tag in wav header");

            // Tag: "fmt"
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'f') && (data[1] == 'm') && (data[2] == 't') && (data[3] == ' '), "Wrong format tag in wav header");

            // chunk format size
            var formatSize = reader.ReadInt32();
            var formatTag = reader.ReadUInt16();
            var channels = reader.ReadUInt16();
            var samplesPerSecond = reader.ReadUInt32();
            var avgBytesPerSec = reader.ReadUInt32();
            var blockAlign = reader.ReadUInt16();
            var bitsPerSample = reader.ReadUInt16();

            // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now.
            if (formatSize > 16)
                reader.ReadBytes((int)(formatSize - 16));

            // Second Chunk, data
            // tag: data.
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'd') && (data[1] == 'a') && (data[2] == 't') && (data[3] == 'a'), "Wrong data tag in wav");
            // data chunk size
            int dataSize = reader.ReadInt32();

            // now, we have the format in the format parameter and the
            // reader set to the start of the body, i.e., the raw sample data
            return AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, (byte)bitsPerSample, (byte)channels);
        }
    }

    /// <summary>
    /// Adapter class to the native stream api.
    /// </summary>
    public sealed class BinaryAudioStreamReader : PullAudioInputStreamCallback
    {
        private System.IO.BinaryReader _reader;

        /// <summary>
        /// Creates and initializes an instance of BinaryAudioStreamReader.
        /// </summary>
        /// <param name="reader">The underlying stream to read the audio data from. Note: The stream contains the bare sample data, not the container (like wave header data, etc).</param>
        public BinaryAudioStreamReader(System.IO.BinaryReader reader)
        {
            _reader = reader;
        }

        /// <summary>
        /// Creates and initializes an instance of BinaryAudioStreamReader.
        /// </summary>
        /// <param name="stream">The underlying stream to read the audio data from. Note: The stream contains the bare sample data, not the container (like wave header data, etc).</param>
        public BinaryAudioStreamReader(System.IO.Stream stream)
            : this(new System.IO.BinaryReader(stream))
        {
        }

        /// <summary>
        /// Reads binary data from the stream.
        /// </summary>
        /// <param name="dataBuffer">The buffer to fill</param>
        /// <param name="size">The size of data in the buffer.</param>
        /// <returns>The number of bytes filled, or 0 in case the stream hits its end and there is no more data available.
        /// If there is no data immediate available, Read() blocks until the next data becomes available.</returns>
        public override int Read(byte[] dataBuffer, uint size)
        {
            return _reader.Read(dataBuffer, 0, (int)size);
        }

        /// <summary>
        /// This method performs cleanup of resources.
        /// The Boolean parameter <paramref name="disposing"/> indicates whether the method is called from <see cref="IDisposable.Dispose"/> (if <paramref name="disposing"/> is true) or from the finalizer (if <paramref name="disposing"/> is false).
        /// Derived classes should override this method to dispose resource if needed.
        /// </summary>
        /// <param name="disposing">Flag to request disposal.</param>
        protected override void Dispose(bool disposing)
        {
            if (disposed)
            {
                return;
            }

            if (disposing)
            {
                _reader.Dispose();
            }

            disposed = true;
            base.Dispose(disposing);
        }


        private bool disposed = false;
    }

    /// <summary>
    /// Implements a custom class for PushAudioOutputStreamCallback.
    /// This is to receive the audio data when the synthesizer has produced audio data.
    /// </summary>
    public sealed class PushAudioOutputStreamSampleCallback : PushAudioOutputStreamCallback
    {
        private byte[] audioData;

        /// <summary>
        /// Constructor
        /// </summary>
        public PushAudioOutputStreamSampleCallback()
        {
            audioData = new byte[0];
        }

        /// <summary>
        /// A callback which is invoked when the synthesizer has a output audio chunk to write out
        /// </summary>
        /// <param name="dataBuffer">The output audio chunk sent by synthesizer</param>
        /// <returns>Tell synthesizer how many bytes are received</returns>
        public override uint Write(byte[] dataBuffer)
        {
            int oldSize = audioData.Length;
            Array.Resize(ref audioData, oldSize + dataBuffer.Length);
            for (int i = 0; i < dataBuffer.Length; ++i)
            {
                audioData[oldSize + i] = dataBuffer[i];
            }

            Console.WriteLine($"{dataBuffer.Length} bytes received.");

            return (uint)dataBuffer.Length;
        }

        /// <summary>
        /// A callback which is invoked when the synthesizer is about to close the stream
        /// </summary>
        public override void Close()
        {
            Console.WriteLine("Push audio output stream closed.");
        }

        /// <summary>
        /// Get the received audio data
        /// </summary>
        /// <returns>The received audio data in byte array</returns>
        public byte[] GetAudioData()
        {
            return audioData;
        }
    }
}

使用Microsoft.CognitiveServices.Speech；
使用Microsoft.CognitiveServices.Speech.Audio；
使用制度；
使用系统诊断；
使用System.IO；
名称空间MicrosoftSpeechSDKSamples
{
公营助理员
{
公共静态AudioConfig OpenWavFile（字符串文件名）
{
BinaryReader=新的BinaryReader（File.OpenRead（filename））；
返回OpenWavFile（读卡器）；
}
公共静态AudioConfig OpenWavFile（二进制读取器）
{
AudioStreamFormat格式=readWaveHeader（读取器）；
返回AudioConfig.FromStreamInput（新的二进制AudioStreamReader（reader），格式）；
}
公共静态二进制AudioStreamReader CreateWavReader（字符串文件名）
{
BinaryReader=新的BinaryReader（File.OpenRead（filename））；
//读取波形标题，使其不会进入以下读数中
AudioStreamFormat格式=readWaveHeader（读取器）；
返回新的BinaryAudioStreamReader（读取器）；
}
公共静态AudioStreamFormat readWaveHeader（二进制读取器）
{
//标记“RIFF”
字符[]数据=新字符[4]；
reader.Read（数据，0,4）；
Assert（（数据[0]='R'）&&（数据[1]='I'）&&（数据[2]='F'）&（数据[3]='F'），“错误的wav头”）；
//块大小
long fileSize=reader.ReadInt32（）；
//波头分块
//子块，格式
//标签：“波浪”
reader.Read（数据，0,4）；
Assert（（数据[0]='W'）&&（数据[1]='A'）&&（数据[2]='V'）&（数据[3]='E'），“wav头中的wav标记错误”）；
//标签：“fmt”
reader.Read（数据，0,4）；
Assert（（数据[0]='f'）&&（数据[1]='m'）&&（数据[2]='t'）&&（数据[3]=''，“wav头中的格式标记错误”）；
//块格式大小
var formatSize=reader.ReadInt32（）；
var formatTag=reader.ReadUIn
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System;
using System.Diagnostics;
using System.IO;

namespace MicrosoftSpeechSDKSamples
{
    public class Helper
    {
        public static AudioConfig OpenWavFile(string filename)
        {
            BinaryReader reader = new BinaryReader(File.OpenRead(filename));
            return OpenWavFile(reader);
        }

        public static AudioConfig OpenWavFile(BinaryReader reader)
        {
            AudioStreamFormat format = readWaveHeader(reader);
            return AudioConfig.FromStreamInput(new BinaryAudioStreamReader(reader), format);
        }

        public static BinaryAudioStreamReader CreateWavReader(string filename)
        {
            BinaryReader reader = new BinaryReader(File.OpenRead(filename));
            // read the wave header so that it won't get into the in the following readings
            AudioStreamFormat format = readWaveHeader(reader);
            return new BinaryAudioStreamReader(reader);
        }

        public static AudioStreamFormat readWaveHeader(BinaryReader reader)
        {
            // Tag "RIFF"
            char[] data = new char[4];
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'R') && (data[1] == 'I') && (data[2] == 'F') && (data[3] == 'F'), "Wrong wav header");

            // Chunk size
            long fileSize = reader.ReadInt32();

            // Subchunk, Wave Header
            // Subchunk, Format
            // Tag: "WAVE"
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'W') && (data[1] == 'A') && (data[2] == 'V') && (data[3] == 'E'), "Wrong wav tag in wav header");

            // Tag: "fmt"
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'f') && (data[1] == 'm') && (data[2] == 't') && (data[3] == ' '), "Wrong format tag in wav header");

            // chunk format size
            var formatSize = reader.ReadInt32();
            var formatTag = reader.ReadUInt16();
            var channels = reader.ReadUInt16();
            var samplesPerSecond = reader.ReadUInt32();
            var avgBytesPerSec = reader.ReadUInt32();
            var blockAlign = reader.ReadUInt16();
            var bitsPerSample = reader.ReadUInt16();

            // Until now we have read 16 bytes in format, the rest is cbSize and is ignored for now.
            if (formatSize > 16)
                reader.ReadBytes((int)(formatSize - 16));

            // Second Chunk, data
            // tag: data.
            reader.Read(data, 0, 4);
            Trace.Assert((data[0] == 'd') && (data[1] == 'a') && (data[2] == 't') && (data[3] == 'a'), "Wrong data tag in wav");
            // data chunk size
            int dataSize = reader.ReadInt32();

            // now, we have the format in the format parameter and the
            // reader set to the start of the body, i.e., the raw sample data
            return AudioStreamFormat.GetWaveFormatPCM(samplesPerSecond, (byte)bitsPerSample, (byte)channels);
        }
    }

    /// <summary>
    /// Adapter class to the native stream api.
    /// </summary>
    public sealed class BinaryAudioStreamReader : PullAudioInputStreamCallback
    {
        private System.IO.BinaryReader _reader;

        /// <summary>
        /// Creates and initializes an instance of BinaryAudioStreamReader.
        /// </summary>
        /// <param name="reader">The underlying stream to read the audio data from. Note: The stream contains the bare sample data, not the container (like wave header data, etc).</param>
        public BinaryAudioStreamReader(System.IO.BinaryReader reader)
        {
            _reader = reader;
        }

        /// <summary>
        /// Creates and initializes an instance of BinaryAudioStreamReader.
        /// </summary>
        /// <param name="stream">The underlying stream to read the audio data from. Note: The stream contains the bare sample data, not the container (like wave header data, etc).</param>
        public BinaryAudioStreamReader(System.IO.Stream stream)
            : this(new System.IO.BinaryReader(stream))
        {
        }

        /// <summary>
        /// Reads binary data from the stream.
        /// </summary>
        /// <param name="dataBuffer">The buffer to fill</param>
        /// <param name="size">The size of data in the buffer.</param>
        /// <returns>The number of bytes filled, or 0 in case the stream hits its end and there is no more data available.
        /// If there is no data immediate available, Read() blocks until the next data becomes available.</returns>
        public override int Read(byte[] dataBuffer, uint size)
        {
            return _reader.Read(dataBuffer, 0, (int)size);
        }

        /// <summary>
        /// This method performs cleanup of resources.
        /// The Boolean parameter <paramref name="disposing"/> indicates whether the method is called from <see cref="IDisposable.Dispose"/> (if <paramref name="disposing"/> is true) or from the finalizer (if <paramref name="disposing"/> is false).
        /// Derived classes should override this method to dispose resource if needed.
        /// </summary>
        /// <param name="disposing">Flag to request disposal.</param>
        protected override void Dispose(bool disposing)
        {
            if (disposed)
            {
                return;
            }

            if (disposing)
            {
                _reader.Dispose();
            }

            disposed = true;
            base.Dispose(disposing);
        }


        private bool disposed = false;
    }

    /// <summary>
    /// Implements a custom class for PushAudioOutputStreamCallback.
    /// This is to receive the audio data when the synthesizer has produced audio data.
    /// </summary>
    public sealed class PushAudioOutputStreamSampleCallback : PushAudioOutputStreamCallback
    {
        private byte[] audioData;

        /// <summary>
        /// Constructor
        /// </summary>
        public PushAudioOutputStreamSampleCallback()
        {
            audioData = new byte[0];
        }

        /// <summary>
        /// A callback which is invoked when the synthesizer has a output audio chunk to write out
        /// </summary>
        /// <param name="dataBuffer">The output audio chunk sent by synthesizer</param>
        /// <returns>Tell synthesizer how many bytes are received</returns>
        public override uint Write(byte[] dataBuffer)
        {
            int oldSize = audioData.Length;
            Array.Resize(ref audioData, oldSize + dataBuffer.Length);
            for (int i = 0; i < dataBuffer.Length; ++i)
            {
                audioData[oldSize + i] = dataBuffer[i];
            }

            Console.WriteLine($"{dataBuffer.Length} bytes received.");

            return (uint)dataBuffer.Length;
        }

        /// <summary>
        /// A callback which is invoked when the synthesizer is about to close the stream
        /// </summary>
        public override void Close()
        {
            Console.WriteLine("Push audio output stream closed.");
        }

        /// <summary>
        /// Get the received audio data
        /// </summary>
        /// <returns>The received audio data in byte array</returns>
        public byte[] GetAudioData()
        {
            return audioData;
        }
    }
}

{
  "contentUrls": [
    "{{path to audio blob}}"
  ],
  "properties": {
    "diarizationEnabled": false,
    "wordLevelTimestampsEnabled": false,
    "punctuationMode": "DictatedAndAutomatic",
    "profanityFilterMode": "Masked",
    "destinationContainerUrl": "{{path to your container with SAS token}}"
  },
  "locale": "en-US",
  "displayName": "Transcription using default model for en-US",
}