C#中的Google演讲文字API

当flac文件使用Windows的录音机录制手册并使用软件转换器转换时,我的程序从谷歌获得正确的响应。
但是当我使用我的程序记录的文件时,我从谷歌获得了“{”结果“:[]}”。 我该怎么办? 这是我的代码:
寄件人 :

private static void CopyStream(FileStream fileStream, Stream requestStream) { var buffer = new byte[32768]; int read; while ((read = fileStream.Read(buffer, 0, buffer.Length)) > 0) { requestStream.Write(buffer, 0, read); } } private static void ConfigureRequest(HttpWebRequest request) { request.KeepAlive = true; request.SendChunked = true; request.ContentType = "audio/x-flac; rate=44100"; request.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2"; request.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip,deflate,sdch"); request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.8,en;q=0.6"); request.Headers.Set(HttpRequestHeader.AcceptCharset, "ISO-8859-1,utf-8;q=0.7,*;q=0.3"); request.Method = "POST"; } using (var fileStream = new FileStream(@"C:\Users\Ahmad Mustofa\Documents\Visual Studio 2010\Projects\FP\FP\bin\Debug\voice.flac", FileMode.Open)) { const string requestUrl = "https://www.google.com/speech-api/v2/recognize?output=json&lang=ar-sa&key=AIzaSyBJ6VJ326Rpb23msih2wGhXENEwU1TF1PA&client=chromium&maxresults=1&pfilter=2"; var request = (HttpWebRequest)WebRequest.Create(requestUrl); ConfigureRequest(request); var requestStream = request.GetRequestStream(); CopyStream(fileStream, requestStream); using (var response = request.GetResponse()) { using (var responseStream = response.GetResponseStream()) { using (var zippedStream = new GZipStream(responseStream, CompressionMode.Decompress)) { using (var sr = new StreamReader(zippedStream)) { var res = sr.ReadToEnd(); state.Text = res; } } } } } 

wav记录器:

  private void sourceStream_DataAvailable(object sender, NAudio.Wave.WaveInEventArgs e) { if (waveWriter == null) return; waveWriter.WriteData(e.Buffer, 0, e.BytesRecorded); waveWriter.Flush(); } fileName = "C:\\Users\\Ahmad Mustofa\\Documents\\Visual Studio 2010\\Projects\\FP\\FP\\bin\\debug\\voice.wav"; int deviceNumber = hardware.SelectedItems[0].Index; try { sourceStream = new NAudio.Wave.WaveIn(); sourceStream.DeviceNumber = deviceNumber; sourceStream.WaveFormat = new NAudio.Wave.WaveFormat(44100, NAudio.Wave.WaveIn.GetCapabilities(deviceNumber).Channels); sourceStream.DataAvailable += new EventHandler(sourceStream_DataAvailable); waveWriter = new NAudio.Wave.WaveFileWriter(fileName, sourceStream.WaveFormat); sourceStream.StartRecording(); } catch (Exception ex) { state.Text = "disini" + ex.Message; } 

flac转换器:

  string inputFile = Path.Combine("wav ", input); string outputFile = Path.Combine("flac", Path.ChangeExtension(input, ".flac")); if (!File.Exists(inputFile)) throw new ApplicationException("Input file " + inputFile + " cannot be found!"); WavReader wav = new WavReader(inputFile); using (var flacStream = File.Create(outputFile)) { FlacWriter flac = new FlacWriter(flacStream, wav.BitDepth, wav.Channels, wav.SampleRate); // Buffer for 1 second's worth of audio data byte[] buffer = new byte[wav.Bitrate / 8]; int bytesRead; do { bytesRead = wav.InputStream.Read(buffer, 0, buffer.Length); flac.Convert(buffer, 0, bytesRead); } while (bytesRead > 0); flac.Dispose(); flac = null; } 

我也有同样的问题,但想出了一个简洁的解决方案。 我使用Fiddler( http://www.telerik.com/fiddler/ )来确定Chrome如何进行语音识别,然后创建一些代码来模拟发送请求的chrome。 此方法使用不同的URI,并且还有一个名为pair的16个字符值,对于每个请求都是不同的。 我使用一个简单的随机值生成器函数为请求创建一个,我还将输出值更改为’json’。

注意 :结果有时可能是空的,就像上面的情况一样,但在响应中还有另一个包含备选方案的json对象。

  private void GoogleSpeechToText() { string uri = "https://www.google.com/speech-api/full-duplex/v1/up?output=json&key=AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw&pair=" + GenerateUnique(16) + "&lang=en-US&pFilter=2&maxAlternatives=10&client=chromium"; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri); request.Timeout = 10000; request.Method = "POST"; request.Host = "www.google.com"; request.KeepAlive = true; request.SendChunked = true; request.ContentType = "audio/x-flac; rate=16000"; request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.8,en;q=0.6"); request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"; string path = @"C:\TestFolder\test_audio.flac"; FileInfo fInfo = new FileInfo(path); var numBytes = fInfo.Length; byte[] data; using (FileStream fStream = new FileStream(path, FileMode.Open, FileAccess.Read)) { data = new Byte[numBytes]; fStream.Read(data, 0, (int) numBytes); fStream.Close(); } using (Stream reqStream = request.GetRequestStream()) reqStream.Write(data, 0, data.Length); try { WebResponse response = request.GetResponse(); Stream respStream = response.GetResponseStream(); if(response.ContentType == "application/json; charset=utf-8") { using (var sr = new StreamReader(respStream)) { var res = sr.ReadToEnd(); textBox1.Text = res; } } } catch (Exception ex) { MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK); } } private string GenerateUnique(int length) { string[] LETTERS = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" }; string[] DIGITS = new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" }; string buffer = ""; Random random = new Random(); for(int i = 0; i < length; i++) { int rnd = random.Next(2); if (rnd == 1) buffer += LETTERS[random.Next(LETTERS.Length)]; else buffer += DIGITS[random.Next(DIGITS.Length)]; } return buffer; } 

使用它并获得uterance(短语)和信心(%)

  string toParse=(VALUE RETURNED BY GOOGLE) var trsc1 = @"transcript"":"""; var trsc2 = @""",""confidence"":"; var trsc3 = @"}],""final"":"; var start = toParse.IndexOf(trsc1) + trsc1.Length; var end = toParse.IndexOf(trsc2); var end2 = toParse.IndexOf(trsc3); var vv1 = toParse.Substring(start, end - start); var vv2 = toParse.Substring(end + trsc2.Length, end2 - (end + trsc2.Length)); vv2 = vv2.Trim().Replace(".", ","); float confidence = (float)Math.Round(double.Parse(vv2), 2); string utterance = vv1; 
 using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Net; using Newtonsoft.Json; namespace google_speech_api_trial4 { class Program { public static string ACCESS_GOOGLE_SPEECH_KEY = "AIzaSyDC8nM1S0cLpXvRc8TXrDoey-tqQsoBGnM"; static void Main(string[] args) { GoogleSpeechRequest(); Console.ReadLine(); } public static void GoogleSpeechRequest() { FileStream fileStream = File.OpenRead("my.flac"); MemoryStream memoryStream = new MemoryStream(); memoryStream.SetLength(fileStream.Length); fileStream.Read(memoryStream.GetBuffer(), 0, (int)fileStream.Length); byte[] BA_AudioFile = memoryStream.GetBuffer(); HttpWebRequest _HWR_SpeechToText = null; _HWR_SpeechToText = (HttpWebRequest)HttpWebRequest.Create("https://www.google.com/speech-api/v2/recognize?output=json&lang=en-us&key=" + ACCESS_GOOGLE_SPEECH_KEY); _HWR_SpeechToText.Credentials = CredentialCache.DefaultCredentials; _HWR_SpeechToText.Method = "POST"; _HWR_SpeechToText.ContentType = "audio/x-flac; rate=44100"; _HWR_SpeechToText.ContentLength = BA_AudioFile.Length; Stream stream = _HWR_SpeechToText.GetRequestStream(); stream.Write(BA_AudioFile, 0, BA_AudioFile.Length); stream.Close(); HttpWebResponse HWR_Response = (HttpWebResponse)_HWR_SpeechToText.GetResponse(); StreamReader SR_Response = new StreamReader(HWR_Response.GetResponseStream()); string responseFromServer = (SR_Response.ReadToEnd()); String[] jsons = responseFromServer.Split('\n'); String text = ""; foreach (String j in jsons) { dynamic jsonObject = JsonConvert.DeserializeObject(j); if (jsonObject == null || jsonObject.result.Count <= 0) { continue; } text = jsonObject.result[0].alternative[0].transcript; } Console.WriteLine(text); } } } 

我正在寻找3个小时,当我打印文本时,我保持开始{“结果:[]”}。 我以为它没有转换音频。 但是,Json obj有两行。 第二行有音频文本。 要打印它,我们需要解析它。 哦,我还遇到了很多导入,引用和使用语句的问题。 但最后这段代码有效。