Ibm cloud IBM Watson大型文件的语音到文本处理
我一直在尝试使用BlueMix SpeechToText Java库,特别是com.ibm.watson.developer_cloud.SpeechToText到_text.v1中的SpeechToText类 我有很长的wav文件,我想转换成文本。文件大小约为70MB。目标是使用javaapi()来识别文本。我意识到我需要每30秒检查一次通话状态,因为翻译结束后,我只有30秒的时间来检索最终结果 为了在使用RESTful API时做到这一点,我需要创建一个会话,然后根据所述会话绑定搜索引擎,以便查询会话上运行的作业的状态 我已尝试创建会话,但该会话始终不可用。我已经验证了它似乎可以在提供的webapp()上工作 此外,我还尝试编写自己的客户端,试图设置从会话创建中检索到的cookie,但也没有成功 我还尝试通过安全WebSocket进行连接,但未能成功连接 下面是我一直在使用的一些示例代码 有什么想法吗Ibm cloud IBM Watson大型文件的语音到文本处理,ibm-cloud,speech-to-text,ibm-watson,Ibm Cloud,Speech To Text,Ibm Watson,我一直在尝试使用BlueMix SpeechToText Java库,特别是com.ibm.watson.developer_cloud.SpeechToText到_text.v1中的SpeechToText类 我有很长的wav文件,我想转换成文本。文件大小约为70MB。目标是使用javaapi()来识别文本。我意识到我需要每30秒检查一次通话状态,因为翻译结束后,我只有30秒的时间来检索最终结果 为了在使用RESTful API时做到这一点,我需要创建一个会话,然后根据所述会话绑定搜索引擎,以
public class Speech2Text extends WatsonService {
private static final Logger logger = LoggerFactory .getLogger(Speech2Text.class);
public static void main(String[] args) throws FileNotFoundException, UnsupportedEncodingException, InterruptedException {
Speech2Text s2t = new Speech2Text();
s2t.httpClient();
// try {
// s2t.webSocketClient();
// } catch (URISyntaxException e) {
// TODO Auto-generated catch block
// e.printStackTrace();
// } catch (IOException e) {
// TODO Auto-generated catch block
// e.printStackTrace();
// }
}
public void httpClient() throws FileNotFoundException,UnsupportedEncodingException {
logger.info("Running http client");
final Stopwatch stopwatch = Stopwatch.createStarted();
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("XXXXXX","XXXXX");
List<SpeechModel> models = service.getModels();
for (SpeechModel model : models) {
logger.info(model.getName());
}
SpeechSession session = service.createSession("en-US_NarrowbandModel");
System.out.println(session.toString());
SessionStatus status = service.getRecognizeStatus(session);
logger.info(status.getModel());
logger.info(service.getEndPoint());
File audio = new File("/home/baaron/watson-bluemix/answer_06.wav");
Map params = new HashMap();
params.put("audio", audio);
params.put("content_type", "audio/wav");
params.put("continuous", "true");
params.put("session_id", session.getSessionId());
logger.info(service.getEndPoint());
SpeechResults transcript = service.recognize(params);
PrintWriter writer = new PrintWriter("/home/baaron/watson-bluemix/PCCJPApart1test.transcript", "UTF-8");
writer.println(transcript.toString());
SessionStatus status1 = service.getRecognizeStatus(session.getSessionId());
System.out.println(status1);
service.deleteSession(session.getSessionId());
writer.close();
stopwatch.stop();
logger.info("Processing took: " + stopwatch + ".");
}
public void webSocketClient() throws URISyntaxException, IOException,
InterruptedException {
logger.info("Running web socket client");
String encoding = new String(Base64.encodeBase64String("XXXXXXXXXX".getBytes()));
HttpPost httppost = new HttpPost( "https://stream.watsonplatform.net/authorization/api/v1/token?url=https://stream.watsonplatform.net/speech-to-text/api");
httppost.setHeader("Authorization", "Basic " + encoding);
System.out.println("executing request " + httppost.getRequestLine());
DefaultHttpClient httpclient = new DefaultHttpClient();
HttpResponse response = httpclient.execute(httppost);
HttpEntity entity = response.getEntity();
logger.info(response.getStatusLine().getReasonPhrase());
WebSocketImpl.DEBUG = true;
BufferedReader reader = new BufferedReader(new InputStreamReader( entity.getContent()));
StringBuilder out = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
out.append(line);
}
String token = out.toString();
final WebSocketClient client = new WebSocketClient(
new URI("wss://stream.watsonplatform.net/speech-to-text-beta/api/v1/recognize?watson-token=" + token)) {
@Override
public void onMessage(String message) {
JSONObject obj = new JSONObject(message);
// String channel = obj.getString("channel");
}
@Override
public void onOpen(ServerHandshake handshake) {
System.out.println("opened connection");
}
@Override
public void onClose(int code, String reason, boolean remote) {
System.out.println("closed connection");
}
@Override
public void onError(Exception ex) {
ex.printStackTrace();
}
};
// open websocket
SSLContext sslContext = null;
try {
sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, null, null);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (KeyManagementException e) {
e.printStackTrace();
}
client.setWebSocketFactory(new DefaultSSLWebSocketClientFactory(
sslContext));
logger.info("CONNECTED: " + client.connectBlocking());
JSONObject obj = new JSONObject();
obj.put("action", "start");
obj.put("content-type", "audio/wav");
client.send(obj.toString());
logger.info("Done");
}
}
公共类Speech2Text扩展WatsonService{
私有静态最终记录器Logger=LoggerFactory.getLogger(Speech2Text.class);
公共静态void main(字符串[]args)引发FileNotFoundException、UnsupportedEncodingException、InterruptedException{
Speech2Text s2t=新Speech2Text();
s2t.httpClient();
//试一试{
//s2t.webSocketClient();
//}catch(URISyntaxException e){
//TODO自动生成的捕捉块
//e.printStackTrace();
//}捕获(IOE异常){
//TODO自动生成的捕捉块
//e.printStackTrace();
// }
}
public void httpClient()引发FileNotFoundException,UnsupportedEncodingException{
info(“运行http客户端”);
final Stopwatch Stopwatch=Stopwatch.createStarted();
SpeechToText服务=新建SpeechToText();
服务。设置用户名和密码(“XXXXXX”、“XXXXX”);
List models=service.getModels();
用于(演讲模型:模型){
logger.info(model.getName());
}
SpeechSession session=service.createSession(“en-US_窄带模型”);
System.out.println(session.toString());
SessionStatus status=service.getRecognizeStatus(会话);
logger.info(status.getModel());
logger.info(service.getEndPoint());
文件音频=新文件(“/home/baaron/watson bluemix/answer_06.wav”);
Map params=新的HashMap();
参数put(“音频”,音频);
参数put(“内容类型”、“音频/波形”);
参数put(“连续”、“真实”);
put(“session_id”,session.getSessionId());
logger.info(service.getEndPoint());
SpeechResults转录本=服务。识别(参数);
PrintWriter writer=新的PrintWriter(“/home/baaron/watson bluemix/PCCJPApart1test.transcript”,“UTF-8”);
writer.println(transcript.toString());
SessionStatus status 1=service.getRecognizeStatus(session.getSessionId());
系统输出打印项次(状态1);
deleteSession(session.getSessionId());
writer.close();
秒表;
info(“处理时间:“+stopwatch+”);
}
public void webSocketClient()引发URI语法异常、IOException、,
中断异常{
info(“运行web套接字客户端”);
字符串编码=新字符串(Base64.encodeBase64String(“xxxxxxxxx.getBytes());
HttpPost HttpPost=新的HttpPost(“https://stream.watsonplatform.net/authorization/api/v1/token?url=https://stream.watsonplatform.net/speech-改为“文本/api”);
httppost.setHeader(“授权”、“基本”+编码);
System.out.println(“正在执行请求”+httppost.getRequestLine());
DefaultHttpClient httpclient=新的DefaultHttpClient();
HttpResponse response=httpclient.execute(httppost);
HttpEntity=response.getEntity();
logger.info(response.getStatusLine().getReasonPhrase());
WebSocketImpl.DEBUG=true;
BufferedReader=new BufferedReader(new InputStreamReader(entity.getContent());
StringBuilder out=新的StringBuilder();
弦线;
而((line=reader.readLine())!=null){
out.追加(行);
}
字符串标记=out.toString();
最终WebSocketClient=新WebSocketClient(
新URI(“wss://stream.watsonplatform.net/speech-to-text-beta/api/v1/recognize?watson-令牌=“+令牌”){
@凌驾
公共消息无效(字符串消息){
JSONObject obj=新的JSONObject(消息);
//字符串通道=obj.getString(“通道”);
}
@凌驾
公共打开(服务器握手握手){
System.out.println(“打开的连接”);
}
@凌驾
公共void onClose(int代码、字符串原因、布尔远程){
System.out.println(“闭合连接”);
}
@凌驾
公共无效申报人(例外情况除外){
例如printStackTrace();
}
};
//开口网袋
SSLContext SSLContext=null;
试一试{
sslContext=sslContext.getInstance(“TLS”);
init(null,null,null);
}捕获(无算法异常){
e、 printStackTrace();
}捕获(密钥管理异常e){
e、 printStackTrace();
}
client.setWebSocketFactory(新的默认SSLWebSocketClientFactory(
sslContext);
logger.info(“已连接:+client.connectBlocking());
JSONObject obj=新的JSONObject();
目标付诸表决(“行动”、“开始”);
obj.put(“内容类型”、“音频/波形”);
send(obj.toString());
logger.info(“完成”);
}
}
即使创建了会话,也不会列出您的会话
检查你是否有一个会话的方法是做一个“上车”
如果会话在那里,您将得到200响应,否则将得到404响应。记住
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("{username"}, "{password}");
RecognizeOptions options = new RecognizeOptions.Builder()
.contentType("audio/wav")
.continuous(true)
.model("en-US_NarrowbandModel")
.inactivityTimeout(-1) // Seconds after which the connection is closed if no audio is detected
.build();
String[] files = {"file1.wav", "file2.wav"};
for (String file : files) {
SpeechResults results = service.recognize(new File(file), options).execute();
System.out.println(results); // print results(you could write them to a file)
}
<dependency>
<groupId>com.ibm.watson.developer_cloud</groupId>
<artifactId>java-sdk</artifactId>
<version>3.8.0</version>
</dependency>
compile 'com.ibm.watson.developer_cloud:java-sdk:3.8.0'