Ibm cloud IBM Watson大型文件的语音到文本处理_Ibm Cloud_Speech To Text_Ibm Watson

Ibm cloud IBM Watson大型文件的语音到文本处理

ibm-cloud

Ibm cloud IBM Watson大型文件的语音到文本处理,ibm-cloud,speech-to-text,ibm-watson,Ibm Cloud,Speech To Text,Ibm Watson,我一直在尝试使用BlueMix SpeechToText Java库，特别是com.ibm.watson.developer_cloud.SpeechToText到_text.v1中的SpeechToText类我有很长的wav文件，我想转换成文本。文件大小约为70MB。目标是使用javaapi（）来识别文本。我意识到我需要每30秒检查一次通话状态，因为翻译结束后，我只有30秒的时间来检索最终结果为了在使用RESTful API时做到这一点，我需要创建一个会话，然后根据所述会话绑定搜索引擎，以

我一直在尝试使用BlueMix SpeechToText Java库，特别是com.ibm.watson.developer_cloud.SpeechToText到_text.v1中的SpeechToText类

我有很长的wav文件，我想转换成文本。文件大小约为70MB。目标是使用javaapi（）来识别文本。我意识到我需要每30秒检查一次通话状态，因为翻译结束后，我只有30秒的时间来检索最终结果

为了在使用RESTful API时做到这一点，我需要创建一个会话，然后根据所述会话绑定搜索引擎，以便查询会话上运行的作业的状态

我已尝试创建会话，但该会话始终不可用。我已经验证了它似乎可以在提供的webapp（）上工作

此外，我还尝试编写自己的客户端，试图设置从会话创建中检索到的cookie，但也没有成功

我还尝试通过安全WebSocket进行连接，但未能成功连接

下面是我一直在使用的一些示例代码

有什么想法吗

public class Speech2Text extends WatsonService {
private static final Logger logger = LoggerFactory           .getLogger(Speech2Text.class);
public static void main(String[] args) throws FileNotFoundException,           UnsupportedEncodingException, InterruptedException {
    Speech2Text s2t = new Speech2Text();
    s2t.httpClient();
    // try {
    // s2t.webSocketClient();
    // } catch (URISyntaxException e) {
    // TODO Auto-generated catch block
    // e.printStackTrace();
    // } catch (IOException e) {
    // TODO Auto-generated catch block
    // e.printStackTrace();
    // }
}
public void httpClient() throws FileNotFoundException,UnsupportedEncodingException {
    logger.info("Running http client");
    final Stopwatch stopwatch = Stopwatch.createStarted();
    SpeechToText service = new SpeechToText();
    service.setUsernameAndPassword("XXXXXX","XXXXX");
    List<SpeechModel> models = service.getModels();
    for (SpeechModel model : models) {
        logger.info(model.getName());
    }
    SpeechSession session = service.createSession("en-US_NarrowbandModel");
    System.out.println(session.toString());
    SessionStatus status = service.getRecognizeStatus(session);
    logger.info(status.getModel());
    logger.info(service.getEndPoint());
    File audio = new File("/home/baaron/watson-bluemix/answer_06.wav");
    Map params = new HashMap();
    params.put("audio", audio);
    params.put("content_type", "audio/wav");
    params.put("continuous", "true");
    params.put("session_id", session.getSessionId());
    logger.info(service.getEndPoint());
    SpeechResults transcript = service.recognize(params);
    PrintWriter writer = new PrintWriter("/home/baaron/watson-bluemix/PCCJPApart1test.transcript",   "UTF-8");
    writer.println(transcript.toString());
    SessionStatus status1 = service.getRecognizeStatus(session.getSessionId());
    System.out.println(status1);
    service.deleteSession(session.getSessionId());
    writer.close();
    stopwatch.stop();
    logger.info("Processing took: " + stopwatch + ".");
}
public void webSocketClient() throws URISyntaxException, IOException,
        InterruptedException {
    logger.info("Running web socket client");
    String encoding = new String(Base64.encodeBase64String("XXXXXXXXXX".getBytes()));
    HttpPost httppost = new HttpPost(                "https://stream.watsonplatform.net/authorization/api/v1/token?url=https://stream.watsonplatform.net/speech-to-text/api");
    httppost.setHeader("Authorization", "Basic " + encoding);
    System.out.println("executing request " + httppost.getRequestLine());
    DefaultHttpClient httpclient = new DefaultHttpClient();
    HttpResponse response = httpclient.execute(httppost);
    HttpEntity entity = response.getEntity();
    logger.info(response.getStatusLine().getReasonPhrase());
    WebSocketImpl.DEBUG = true;
    BufferedReader reader = new BufferedReader(new InputStreamReader(                entity.getContent()));
    StringBuilder out = new StringBuilder();
    String line;
    while ((line = reader.readLine()) != null) {
        out.append(line);
    }
    String token = out.toString();
    final WebSocketClient client = new WebSocketClient(
            new URI("wss://stream.watsonplatform.net/speech-to-text-beta/api/v1/recognize?watson-token=" + token)) {
        @Override
        public void onMessage(String message) {
            JSONObject obj = new JSONObject(message);
            // String channel = obj.getString("channel");
        }
        @Override
        public void onOpen(ServerHandshake handshake) {
            System.out.println("opened connection");
        }
        @Override
        public void onClose(int code, String reason, boolean remote) {
            System.out.println("closed connection");
        }
        @Override
        public void onError(Exception ex) {
            ex.printStackTrace();
        }
    };
    // open websocket
    SSLContext sslContext = null;
    try {
        sslContext = SSLContext.getInstance("TLS");
        sslContext.init(null, null, null); 
    } catch (NoSuchAlgorithmException e) {
        e.printStackTrace();
    } catch (KeyManagementException e) {
        e.printStackTrace();
    }
    client.setWebSocketFactory(new DefaultSSLWebSocketClientFactory(
            sslContext));
    logger.info("CONNECTED: " + client.connectBlocking());
    JSONObject obj = new JSONObject();
    obj.put("action", "start");
    obj.put("content-type", "audio/wav");
    client.send(obj.toString());
    logger.info("Done");
  }
}

公共类Speech2Text扩展WatsonService{
私有静态最终记录器Logger=LoggerFactory.getLogger（Speech2Text.class）；
公共静态void main（字符串[]args）引发FileNotFoundException、UnsupportedEncodingException、InterruptedException{
Speech2Text s2t=新Speech2Text（）；
s2t.httpClient（）；
//试一试{
//s2t.webSocketClient（）；
//}catch（URISyntaxException e）{
//TODO自动生成的捕捉块
//e.printStackTrace（）；
//}捕获（IOE异常）{
//TODO自动生成的捕捉块
//e.printStackTrace（）；
// }
}
public void httpClient（）引发FileNotFoundException，UnsupportedEncodingException{
info（“运行http客户端”）；
final Stopwatch Stopwatch=Stopwatch.createStarted（）；
SpeechToText服务=新建SpeechToText（）；
服务。设置用户名和密码（“XXXXXX”、“XXXXX”）；
List models=service.getModels（）；
用于（演讲模型：模型）{
logger.info（model.getName（））；
}
SpeechSession session=service.createSession（“en-US_窄带模型”）；
System.out.println（session.toString（））；
SessionStatus status=service.getRecognizeStatus（会话）；
logger.info（status.getModel（））；
logger.info（service.getEndPoint（））；
文件音频=新文件（“/home/baaron/watson bluemix/answer_06.wav”）；
Map params=新的HashMap（）；
参数put（“音频”，音频）；
参数put（“内容类型”、“音频/波形”）；
参数put（“连续”、“真实”）；
put（“session_id”，session.getSessionId（））；
logger.info（service.getEndPoint（））；
SpeechResults转录本=服务。识别（参数）；
PrintWriter writer=新的PrintWriter（“/home/baaron/watson bluemix/PCCJPApart1test.transcript”，“UTF-8”）；
writer.println（transcript.toString（））；
SessionStatus status 1=service.getRecognizeStatus（session.getSessionId（））；
系统输出打印项次（状态1）；
deleteSession（session.getSessionId（））；
writer.close（）；
秒表；
info（“处理时间：“+stopwatch+”）；
}
public void webSocketClient（）引发URI语法异常、IOException、，
中断异常{
info（“运行web套接字客户端”）；
字符串编码=新字符串（Base64.encodeBase64String（“xxxxxxxxx.getBytes（））；
HttpPost HttpPost=新的HttpPost（“https://stream.watsonplatform.net/authorization/api/v1/token?url=https://stream.watsonplatform.net/speech-改为“文本/api”）；
httppost.setHeader（“授权”、“基本”+编码）；
System.out.println（“正在执行请求”+httppost.getRequestLine（））；
DefaultHttpClient httpclient=新的DefaultHttpClient（）；
HttpResponse response=httpclient.execute（httppost）；
HttpEntity=response.getEntity（）；
logger.info（response.getStatusLine（）.getReasonPhrase（））；
WebSocketImpl.DEBUG=true；
BufferedReader=new BufferedReader（new InputStreamReader（entity.getContent（））；
StringBuilder out=新的StringBuilder（）；
弦线；
而（（line=reader.readLine（））！=null）{
out.追加（行）；
}
字符串标记=out.toString（）；
最终WebSocketClient=新WebSocketClient(
新URI（“wss://stream.watsonplatform.net/speech-to-text-beta/api/v1/recognize?watson-令牌=“+令牌”）{
@凌驾
公共消息无效（字符串消息）{
JSONObject obj=新的JSONObject（消息）；
//字符串通道=obj.getString（“通道”）；
}
@凌驾
公共打开（服务器握手握手）{
System.out.println（“打开的连接”）；
}
@凌驾
公共void onClose（int代码、字符串原因、布尔远程）{
System.out.println（“闭合连接”）；
}
@凌驾
公共无效申报人（例外情况除外）{
例如printStackTrace（）；
}
};
//开口网袋
SSLContext SSLContext=null；
试一试{
sslContext=sslContext.getInstance（“TLS”）；
init（null，null，null）；
}捕获（无算法异常）{
e、 printStackTrace（）；
}捕获（密钥管理异常e）{
e、 printStackTrace（）；
}
client.setWebSocketFactory（新的默认SSLWebSocketClientFactory(
sslContext）；
logger.info（“已连接：+client.connectBlocking（））；
JSONObject obj=新的JSONObject（）；
目标付诸表决（“行动”、“开始”）；
obj.put（“内容类型”、“音频/波形”）；
send（obj.toString（））；
logger.info（“完成”）；
}
}

即使创建了会话，也不会列出您的会话

检查你是否有一个会话的方法是做一个“上车”

如果会话在那里，您将得到200响应，否则将得到404响应。记住

SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("{username"}, "{password}");

RecognizeOptions options = new RecognizeOptions.Builder()
  .contentType("audio/wav")
  .continuous(true)
  .model("en-US_NarrowbandModel")
  .inactivityTimeout(-1) // Seconds after which the connection is closed if no audio is detected
  .build();

String[] files = {"file1.wav", "file2.wav"};
for (String file : files) {
  SpeechResults results = service.recognize(new File(file), options).execute();
  System.out.println(results); // print results(you could write them to a file)
}

<dependency>
  <groupId>com.ibm.watson.developer_cloud</groupId>
  <artifactId>java-sdk</artifactId>
  <version>3.8.0</version>
</dependency>

compile 'com.ibm.watson.developer_cloud:java-sdk:3.8.0'