C# C语言中的Google语音到文本API#
当flac文件使用windows的录音机录制手册并使用软件转换器进行转换时,我的程序从google获得正确的响应。C# C语言中的Google语音到文本API#,c#,google-api,speech-to-text,C#,Google Api,Speech To Text,当flac文件使用windows的录音机录制手册并使用软件转换器进行转换时,我的程序从google获得正确的响应。 但是当我使用程序记录的文件时,我得到了“{”结果“:[]} “来自谷歌。我该怎么办? 这是我的代码: 发件人: private static void CopyStream(FileStream fileStream, Stream requestStream) { var buffer = new byte[32768]; int
但是当我使用程序记录的文件时,我得到了“{”结果“:[]} “来自谷歌。我该怎么办? 这是我的代码:
发件人:
private static void CopyStream(FileStream fileStream, Stream requestStream)
{
var buffer = new byte[32768];
int read;
while ((read = fileStream.Read(buffer, 0, buffer.Length)) > 0)
{
requestStream.Write(buffer, 0, read);
}
}
private static void ConfigureRequest(HttpWebRequest request)
{
request.KeepAlive = true;
request.SendChunked = true;
request.ContentType = "audio/x-flac; rate=44100";
request.UserAgent =
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2";
request.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip,deflate,sdch");
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.8,en;q=0.6");
request.Headers.Set(HttpRequestHeader.AcceptCharset, "ISO-8859-1,utf-8;q=0.7,*;q=0.3");
request.Method = "POST";
}
using (var fileStream = new FileStream(@"C:\Users\Ahmad Mustofa\Documents\Visual Studio 2010\Projects\FP\FP\bin\Debug\voice.flac", FileMode.Open))
{
const string requestUrl = "https://www.google.com/speech-api/v2/recognize?output=json&lang=ar-sa&key=AIzaSyBJ6VJ326Rpb23msih2wGhXENEwU1TF1PA&client=chromium&maxresults=1&pfilter=2";
var request = (HttpWebRequest)WebRequest.Create(requestUrl);
ConfigureRequest(request);
var requestStream = request.GetRequestStream();
CopyStream(fileStream, requestStream);
using (var response = request.GetResponse())
{
using (var responseStream = response.GetResponseStream())
{
using (var zippedStream = new GZipStream(responseStream, CompressionMode.Decompress))
{
using (var sr = new StreamReader(zippedStream))
{
var res = sr.ReadToEnd();
state.Text = res;
}
}
}
}
}
wav记录器: private void sourceStream_DataAvailable(object sender, NAudio.Wave.WaveInEventArgs e)
{
if (waveWriter == null) return;
waveWriter.WriteData(e.Buffer, 0, e.BytesRecorded);
waveWriter.Flush();
}
fileName = "C:\\Users\\Ahmad Mustofa\\Documents\\Visual Studio 2010\\Projects\\FP\\FP\\bin\\debug\\voice.wav";
int deviceNumber = hardware.SelectedItems[0].Index;
try
{
sourceStream = new NAudio.Wave.WaveIn();
sourceStream.DeviceNumber = deviceNumber;
sourceStream.WaveFormat = new NAudio.Wave.WaveFormat(44100, NAudio.Wave.WaveIn.GetCapabilities(deviceNumber).Channels);
sourceStream.DataAvailable += new EventHandler<NAudio.Wave.WaveInEventArgs>(sourceStream_DataAvailable);
waveWriter = new NAudio.Wave.WaveFileWriter(fileName, sourceStream.WaveFormat);
sourceStream.StartRecording();
}
catch (Exception ex)
{
state.Text = "disini" + ex.Message;
}
string inputFile = Path.Combine("wav ", input);
string outputFile = Path.Combine("flac", Path.ChangeExtension(input, ".flac"));
if (!File.Exists(inputFile))
throw new ApplicationException("Input file " + inputFile + " cannot be found!");
WavReader wav = new WavReader(inputFile);
using (var flacStream = File.Create(outputFile))
{
FlacWriter flac = new FlacWriter(flacStream, wav.BitDepth, wav.Channels, wav.SampleRate);
// Buffer for 1 second's worth of audio data
byte[] buffer = new byte[wav.Bitrate / 8];
int bytesRead;
do
{
bytesRead = wav.InputStream.Read(buffer, 0, buffer.Length);
flac.Convert(buffer, 0, bytesRead);
} while (bytesRead > 0);
flac.Dispose();
flac = null;
}
使用它并获得成功(短语)和信心(%)
我也遇到了同样的问题,但提出了一个简洁的解决方案。我使用Fiddler()来了解Chrome如何进行语音识别,然后创建了一些代码来模拟Chrome发送请求。这种方法使用不同的URI,还有一个称为pair的16个字符的值,对于每个请求都是不同的。我使用一个简单的随机值生成器函数为请求创建一个随机值生成器,并将输出值更改为“json” 注意:在上面的例子中,结果有时可能是空的,但响应中还有另一个json对象,其中包含替代项
private void GoogleSpeechToText()
{
string uri = "https://www.google.com/speech-api/full-duplex/v1/up?output=json&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&pair=" + GenerateUnique(16) + "&lang=en-US&pFilter=2&maxAlternatives=10&client=chromium";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Timeout = 10000;
request.Method = "POST";
request.Host = "www.google.com";
request.KeepAlive = true;
request.SendChunked = true;
request.ContentType = "audio/x-flac; rate=16000";
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.8,en;q=0.6");
request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36";
string path = @"C:\TestFolder\test_audio.flac";
FileInfo fInfo = new FileInfo(path);
var numBytes = fInfo.Length;
byte[] data;
using (FileStream fStream = new FileStream(path, FileMode.Open, FileAccess.Read))
{
data = new Byte[numBytes];
fStream.Read(data, 0, (int) numBytes);
fStream.Close();
}
using (Stream reqStream = request.GetRequestStream())
reqStream.Write(data, 0, data.Length);
try
{
WebResponse response = request.GetResponse();
Stream respStream = response.GetResponseStream();
if(response.ContentType == "application/json; charset=utf-8")
{
using (var sr = new StreamReader(respStream))
{
var res = sr.ReadToEnd();
textBox1.Text = res;
}
}
}
catch (Exception ex) { MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK); }
}
private string GenerateUnique(int length)
{
string[] LETTERS = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };
string[] DIGITS = new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" };
string buffer = "";
Random random = new Random();
for(int i = 0; i < length; i++)
{
int rnd = random.Next(2);
if (rnd == 1)
buffer += LETTERS[random.Next(LETTERS.Length)];
else
buffer += DIGITS[random.Next(DIGITS.Length)];
}
return buffer;
}
private void GoogleSpeechToText()
{
字符串uri=”https://www.google.com/speech-api/full-duplex/v1/up?output=json&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&pair=“+GenerateUnique(16)+”和lang=en-US&pFilter=2&maxAlternatives=10&client=chromium”;
HttpWebRequest请求=(HttpWebRequest)WebRequest.Create(uri);
请求超时=10000;
request.Method=“POST”;
request.Host=“www.google.com”;
request.KeepAlive=true;
request.SendChunked=true;
request.ContentType=“audio/x-flac;rate=16000”;
Set(HttpRequestHeader.AcceptLanguage,“en-GB,en-US;q=0.8,en;q=0.6”);
request.UserAgent=“Mozilla/5.0(Windows NT 6.3;WOW64)AppleWebKit/537.36(KHTML,类似Gecko)Chrome/36.0.1985.143 Safari/537.36”;
字符串路径=@“C:\TestFolder\test_audio.flac”;
FileInfo fInfo=新的FileInfo(路径);
变量numBytes=最终长度;
字节[]数据;
使用(FileStream fStream=newfilestream(路径,FileMode.Open,FileAccess.Read))
{
数据=新字节[numBytes];
读取(数据,0,(整数)个字节);
fStream.Close();
}
使用(Stream reqStream=request.GetRequestStream())
请求流写入(数据,0,数据长度);
尝试
{
WebResponse=request.GetResponse();
Stream respStream=response.GetResponseStream();
if(response.ContentType==“application/json;charset=utf-8”)
{
使用(var sr=新StreamReader(respStream))
{
var res=sr.ReadToEnd();
textBox1.Text=res;
}
}
}
catch(异常ex){MessageBox.Show(例如Message,“Error”,MessageBoxButtons.OK);}
}
专用字符串生成器(整数长度)
{
字符串[]字母=新字符串[]{“A”、“B”、“C”、“D”、“E”、“F”、“G”、“H”、“I”、“J”、“K”、“L”、“M”、“N”、“O”、“P”、“Q”、“R”、“S”、“T”、“U”、“V”、“W”、“X”、“Y”、“Z”};
字符串[]位=新字符串[]{“0”、“1”、“2”、“3”、“4”、“5”、“6”、“7”、“8”、“9”};
字符串缓冲区=”;
随机=新随机();
for(int i=0;i
使用系统;
使用System.Collections.Generic;
使用System.Linq;
使用系统文本;
使用System.IO;
Net系统;
使用Newtonsoft.Json;
名称空间google\u speech\u api\u trial4
{
班级计划
{
公共静态字符串访问\u GOOGLE\u SPEECH\u KEY=“AIzaSyDC8nM1S0cLpXvRc8TXrDoey tqsobgnm”;
静态void Main(字符串[]参数)
{
GoogleSpeechRequest();
Console.ReadLine();
}
公共静态无效GoogleSpeechRequest()
{
FileStream FileStream=File.OpenRead(“my.flac”);
MemoryStream MemoryStream=新的MemoryStream();
memoryStream.SetLength(fileStream.Length);
读取(memoryStream.GetBuffer(),0,(int)fileStream.Length);
字节[]BA_AudioFile=memoryStream.GetBuffer();
HttpWebRequest\u HWR\u SpeechToText=null;
_HWR_SpeechToText=(HttpWebRequest)HttpWebRequest.Create(“https://www.google.com/speech-api/v2/recognize?output=json&lang=en-us&key=“+ACCESS\u GOOGLE\u SPEECH\u key);
_HWR_SpeechToText.Credentials=CredentialCache.DefaultCredentials;
_HWR_SpeechToText.Method=“POST”;
_HWR_SpeechToText.ContentType=“音频/x-flac;速率=44100”;
_HWR_SpeechToText.ContentLength=BA_AudioFile.Length;
Stream=HWR_SpeechToText.GetRequestStream();
stream.Write(BA_AudioFile,0,BA_AudioFile.Length);
stream.Close();
HttpWebResponse HWR_Response=(HttpWebResponse)HWR_SpeechToText.GetResponse();
StreamReader SR_Response=新的StreamReader(HWR_Response.GetResponseStream());
字符串responseFromServer=(SR_Response.ReadToEnd());
字符串[]jsons=responseFromServer.Split('\n');
字符串文本=”;
foreach(jsons中的字符串j)
{
动态jsonObject=JsonConvert.DeserializeObject(j);
如果(jsonObject==null | | jsonObject.result.Count它在Google cloud api url中明确提到,即
如果操作尚未完成,则可以通过反复发出GET请求来轮询端点,直到响应的done属性为true为止
{
"name": "operationname here",
"metadata": {
"@type": "type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeMetadata",
"progressPercent": 0,
"startTime": "2018-12-18T10:56:09.425584Z",
"lastUpdateTime": "2018-12-18T11:10:27.147310Z"
},
"done": true,
}
通过反复发出GET请求轮询端点,直到响应的done属性为true,或者您可以检查“progressPercent”:0,直到其值变为true
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;
using Newtonsoft.Json;
namespace google_speech_api_trial4
{
class Program
{
public static string ACCESS_GOOGLE_SPEECH_KEY = "AIzaSyDC8nM1S0cLpXvRc8TXrDoey-tqQsoBGnM";
static void Main(string[] args)
{
GoogleSpeechRequest();
Console.ReadLine();
}
public static void GoogleSpeechRequest()
{
FileStream fileStream = File.OpenRead("my.flac");
MemoryStream memoryStream = new MemoryStream();
memoryStream.SetLength(fileStream.Length);
fileStream.Read(memoryStream.GetBuffer(), 0, (int)fileStream.Length);
byte[] BA_AudioFile = memoryStream.GetBuffer();
HttpWebRequest _HWR_SpeechToText = null;
_HWR_SpeechToText = (HttpWebRequest)HttpWebRequest.Create("https://www.google.com/speech-api/v2/recognize?output=json&lang=en-us&key=" + ACCESS_GOOGLE_SPEECH_KEY);
_HWR_SpeechToText.Credentials = CredentialCache.DefaultCredentials;
_HWR_SpeechToText.Method = "POST";
_HWR_SpeechToText.ContentType = "audio/x-flac; rate=44100";
_HWR_SpeechToText.ContentLength = BA_AudioFile.Length;
Stream stream = _HWR_SpeechToText.GetRequestStream();
stream.Write(BA_AudioFile, 0, BA_AudioFile.Length);
stream.Close();
HttpWebResponse HWR_Response = (HttpWebResponse)_HWR_SpeechToText.GetResponse();
StreamReader SR_Response = new StreamReader(HWR_Response.GetResponseStream());
string responseFromServer = (SR_Response.ReadToEnd());
String[] jsons = responseFromServer.Split('\n');
String text = "";
foreach (String j in jsons)
{
dynamic jsonObject = JsonConvert.DeserializeObject(j);
if (jsonObject == null || jsonObject.result.Count <= 0)
{
continue;
}
text = jsonObject.result[0].alternative[0].transcript;
}
Console.WriteLine(text);
}
}
}
{
"name": "operationname here",
"metadata": {
"@type": "type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeMetadata",
"progressPercent": 0,
"startTime": "2018-12-18T10:56:09.425584Z",
"lastUpdateTime": "2018-12-18T11:10:27.147310Z"
},
"done": true,
}
public async Task<string> TranscribeLongMediaFile(string operationName)
{
string bearerToken = GetOAuthToken();
var baseUrl = new Uri(googleSpeechBaseUrl + operationName);
string resultContent = string.Empty;
using (var client = new HttpClient())
{
client.DefaultRequestHeaders.Add(HttpRequestHeader.Authorization.ToString(), "Bearer " + bearerToken);
client.DefaultRequestHeaders.Add(HttpRequestHeader.ContentType.ToString(), "application/json; charset=utf-8");
client.Timeout = TimeSpan.FromMilliseconds(Timeout.Infinite);
int currentPercentage = 0;
bool responseStatus = false;
while (!responseStatus)
{
responseStatus = false;
// Send request
using (var result = await client.GetAsync(baseUrl))
{
resultContent = await result.Content.ReadAsStringAsync();
ResponseObject responseObject = JsonConvert.DeserializeObject<ResponseObject>(resultContent);
currentPercentage = responseObject.metadata.progressPercent;
responseStatus = (responseObject.done && currentPercentage == 100);
// Delay the request based on percentage value to repeatedly making the GET request until the done property of the response is true.
await Task.Delay(CalculateDealy(currentPercentage));
}
}
};
return resultContent;
}
/// <summary>
/// Delay the request to number of milliseconds
/// </summary>
/// <param name="currentPercentage"></param>
/// <returns></returns>
private int CalculateDealy(int currentPercentage)
{
int x = currentPercentage / 10;
return (10 - x) * 1500;
}
/// <summary>
/// Get OAuth token
/// </summary>
/// <returns></returns>
public string GetOAuthToken()
{
return googleCredential.UnderlyingCredential.GetAccessTokenForRequestAsync("https://accounts.google.com/o/oauth2/v2/auth", CancellationToken.None).Result;
}
{
"name": "operationname here",
"metadata": {
"@type": "type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeMetadata",
"progressPercent": 100,
"startTime": "2018-12-18T10:56:09.425584Z",
"lastUpdateTime": "2018-12-18T11:10:27.147310Z"
},
"done": true,
"response": {
"@type": "type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeResponse",
"results": [
{
"alternatives": [
{
"transcript": "okay let's get started",
"confidence": 0.97442055
}
]
}, and so on .....