Java AWS成绩单:文件到文本返回无意义
这是我的后续问题。我使用流读取.wav文件内容并将其发送到AWS 我没有得到正确的成绩单,而是得到了一堆“是的”的废话。看起来AWS无法正确解释字节流,但我不确定出了什么问题。我想知道文件是否需要编码,也就是说,我不能直接从文件中发送原始的.wav字节?或者我需要告诉服务这是.wav格式 这里怎么了?输入文件是一个有效的.wav语音文件,当我听它时,它听起来是可理解的 以下是我的java代码:Java AWS成绩单:文件到文本返回无意义,java,amazon-web-services,aws-transcribe,Java,Amazon Web Services,Aws Transcribe,这是我的后续问题。我使用流读取.wav文件内容并将其发送到AWS 我没有得到正确的成绩单,而是得到了一堆“是的”的废话。看起来AWS无法正确解释字节流,但我不确定出了什么问题。我想知道文件是否需要编码,也就是说,我不能直接从文件中发送原始的.wav字节?或者我需要告诉服务这是.wav格式 这里怎么了?输入文件是一个有效的.wav语音文件,当我听它时,它听起来是可理解的 以下是我的java代码: package com.amazonaws.transcribe; import org.react
package com.amazonaws.transcribe;
import org.reactivestreams.Publisher;
import org.reactivestreams.Subscriber;
import org.reactivestreams.Subscription;
import software.amazon.awssdk.core.SdkBytes;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.transcribestreaming.TranscribeStreamingAsyncClient;
import software.amazon.awssdk.services.transcribestreaming.model.*;
import javax.sound.sampled.*;
import java.io.*;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
public class TranscribeFileFromStream {
private static final Region REGION = Region.US_EAST_1;
private static TranscribeStreamingAsyncClient client;
public static void main(String args[]) throws URISyntaxException, ExecutionException, InterruptedException, LineUnavailableException {
System.out.println(System.getProperty("java.version"));
client = TranscribeStreamingAsyncClient.builder()
.region(REGION)
.build();
try {
CompletableFuture<Void> result = client.startStreamTranscription(getRequest(16000),
new AudioStreamPublisher(getStreamFromFile()),
getResponseHandler());
result.get();
} finally {
if (client != null) {
client.close();
}
}
}
private static InputStream getStreamFromFile() {
try {
File inputFile = new File("~/work/transcribe/src/main/resources/story/media/Story3.m4a.wav");
InputStream audioStream = new FileInputStream(inputFile);
return audioStream;
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
private static StartStreamTranscriptionRequest getRequest(Integer mediaSampleRateHertz) {
return StartStreamTranscriptionRequest.builder()
.languageCode(LanguageCode.EN_US)
.mediaEncoding(MediaEncoding.PCM)
.mediaSampleRateHertz(mediaSampleRateHertz)
.build();
}
private static StartStreamTranscriptionResponseHandler getResponseHandler() {
return StartStreamTranscriptionResponseHandler.builder()
.onResponse(r -> {
System.out.println("Received Initial response");
})
.onError(e -> {
System.out.println(e.getMessage());
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
System.out.println("Error Occurred: " + sw.toString());
})
.onComplete(() -> {
System.out.println("=== All records stream successfully ===");
})
.subscriber(event -> {
List<Result> results = ((TranscriptEvent) event).transcript().results();
if (results.size() > 0) {
if (!results.get(0).alternatives().get(0).transcript().isEmpty()) {
System.out.println(results.get(0).alternatives().get(0).transcript());
} else {
System.out.println("Empty result");
}
} else {
System.out.println("No results");
}
})
.build();
}
private static class AudioStreamPublisher implements Publisher<AudioStream> {
private final InputStream inputStream;
private static Subscription currentSubscription;
private AudioStreamPublisher(InputStream inputStream) {
this.inputStream = inputStream;
}
@Override
public void subscribe(Subscriber<? super AudioStream> s) {
if (this.currentSubscription == null) {
this.currentSubscription = new SubscriptionImpl(s, inputStream);
} else {
this.currentSubscription.cancel();
this.currentSubscription = new SubscriptionImpl(s, inputStream);
}
s.onSubscribe(currentSubscription);
}
}
public static class SubscriptionImpl implements Subscription {
private static final int CHUNK_SIZE_IN_BYTES = 1024 * 1;
private final Subscriber<? super AudioStream> subscriber;
private final InputStream inputStream;
private ExecutorService executor = Executors.newFixedThreadPool(1);
private AtomicLong demand = new AtomicLong(0);
SubscriptionImpl(Subscriber<? super AudioStream> s, InputStream inputStream) {
this.subscriber = s;
this.inputStream = inputStream;
}
@Override
public void request(long n) {
if (n <= 0) {
subscriber.onError(new IllegalArgumentException("Demand must be positive"));
}
demand.getAndAdd(n);
executor.submit(() -> {
try {
do {
ByteBuffer audioBuffer = getNextEvent();
if (audioBuffer.remaining() > 0) {
AudioEvent audioEvent = audioEventFromBuffer(audioBuffer);
subscriber.onNext(audioEvent);
} else {
subscriber.onComplete();
break;
}
} while (demand.decrementAndGet() > 0);
} catch (Exception e) {
subscriber.onError(e);
}
});
}
@Override
public void cancel() {
executor.shutdown();
}
private ByteBuffer getNextEvent() {
ByteBuffer audioBuffer = null;
byte[] audioBytes = new byte[CHUNK_SIZE_IN_BYTES];
int len = 0;
try {
len = inputStream.read(audioBytes);
if (len <= 0) {
audioBuffer = ByteBuffer.allocate(0);
} else {
audioBuffer = ByteBuffer.wrap(audioBytes, 0, len);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return audioBuffer;
}
private AudioEvent audioEventFromBuffer(ByteBuffer bb) {
return AudioEvent.builder()
.audioChunk(SdkBytes.fromByteBuffer(bb))
.build();
}
}
}
音频文件的采样率为44.1 kHz。它被转换为16 kHz,并且工作正常:
音频文件的采样率为44.1 kHz。它被转换为16 kHz,并且工作正常:
正如smac2020所指出的,抽样率是错误的。调试传递给AWS的不正确元数据值很棘手,因为AWS没有错误。你只是得到了一个不正确的抄本。因此,这里的教训是,确保您知道什么是正确的值。其中一些可以自动检测到 如果你在mac上,mediainfo工具非常有用
brew install mediainfo
ffmpeg也是如此:
brew install ffmpeg
下面是一个更新的示例,其中我使用AudioFormat.java自动检测采样率。理想情况下,AWS sdk可以为您完成这项工作。如果媒体文件超出了可转录内容的参数范围,则会引发异常。注意,我不得不使用工具nch.com.au/switch/index.html将我的原始文件修改为16000采样率。如果SDK还能够修改采样率等,以便可以更改文件以适应输入参数,那就太好了(提示,提示)
package com.amazonaws.transcribe;
import org.reactivestreams.Publisher;
import org.reactivestreams.Subscriber;
import org.reactivestreams.Subscription;
import software.amazon.awssdk.core.SdkBytes;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.transcribestreaming.TranscribeStreamingAsyncClient;
import software.amazon.awssdk.services.transcribestreaming.model.*;
import javax.sound.sampled.*;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import static javax.sound.sampled.AudioFormat.Encoding.*;
public class TranscribeFileFromStream {
private static final Region REGION = Region.US_EAST_1;
private static TranscribeStreamingAsyncClient client;
public static void main(String args[]) throws Exception {
System.setProperty("AWS_ACCESS_KEY_ID", "myId");
System.setProperty("AWS_SECRET_ACCESS_KEY", "myKey");
System.out.println(System.getProperty("java.version"));
// BasicConfigurator.configure();
client = TranscribeStreamingAsyncClient.builder()
.region(REGION)
.build();
try {
File inputFile = new File("/home/me/work/transcribe/src/main/resources/test-file.wav");
CompletableFuture<Void> result = client.startStreamTranscription(
getRequest(inputFile),
new AudioStreamPublisher(getStreamFromFile(inputFile)),
getResponseHandler());
result.get();
} finally {
if (client != null) {
client.close();
}
}
}
private static InputStream getStreamFromFile(File inputFile) {
try {
return new FileInputStream(inputFile);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
private static StartStreamTranscriptionRequest getRequest(File inputFile) throws IOException, UnsupportedAudioFileException {
//TODO: I read the file twice in this example. Can this be more performant?
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile);
AudioFormat audioFormat = audioInputStream.getFormat();
return StartStreamTranscriptionRequest.builder()
.languageCode(LanguageCode.EN_US)
//.mediaEncoding(MediaEncoding.PCM)
.mediaEncoding(getAwsMediaEncoding(audioFormat))
.mediaSampleRateHertz(getAwsSampleRate(audioFormat))
.build();
}
private static MediaEncoding getAwsMediaEncoding(AudioFormat audioFormat) {
final String javaMediaEncoding = audioFormat.getEncoding().toString();
if (PCM_SIGNED.toString().equals(javaMediaEncoding)) {
return MediaEncoding.PCM;
} else if (PCM_UNSIGNED.toString().equals(javaMediaEncoding)){
return MediaEncoding.PCM;
} /*else if (ALAW.toString().equals(javaMediaEncoding)){
//WARNING: I have no idea how ALAW maps to AWS media encodings.
return MediaEncoding.OGG_OPUS;
} else if (ULAW.toString().equals(javaMediaEncoding)){
//WARNING: I have no idea how ULAW maps to AWS encodings.
return MediaEncoding.FLAC;
}*/
throw new IllegalArgumentException("Not a recognized media encoding:" + javaMediaEncoding);
}
private static Integer getAwsSampleRate(AudioFormat audioFormat) {
return Math.round(audioFormat.getSampleRate());
}
private static StartStreamTranscriptionResponseHandler getResponseHandler() {
return StartStreamTranscriptionResponseHandler.builder()
.onResponse(r -> {
System.out.println("Received Initial response");
})
.onError(e -> {
System.out.println(e.getMessage());
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
System.out.println("Error Occurred: " + sw.toString());
})
.onComplete(() -> {
System.out.println("=== All records stream successfully ===");
})
.subscriber(event -> {
List<Result> results = ((TranscriptEvent) event).transcript().results();
if (results.size() > 0) {
if (!results.get(0).alternatives().get(0).transcript().isEmpty()) {
System.out.println(results.get(0).alternatives().get(0).transcript());
} else {
System.out.println("Empty result");
}
} else {
System.out.println("No results");
}
})
.build();
}
private static class AudioStreamPublisher implements Publisher<AudioStream> {
private final InputStream inputStream;
private static Subscription currentSubscription;
private AudioStreamPublisher(InputStream inputStream) {
this.inputStream = inputStream;
}
@Override
public void subscribe(Subscriber<? super AudioStream> s) {
if (this.currentSubscription == null) {
this.currentSubscription = new SubscriptionImpl(s, inputStream);
} else {
this.currentSubscription.cancel();
this.currentSubscription = new SubscriptionImpl(s, inputStream);
}
s.onSubscribe(currentSubscription);
}
}
public static class SubscriptionImpl implements Subscription {
private static final int CHUNK_SIZE_IN_BYTES = 1024 * 1;
private final Subscriber<? super AudioStream> subscriber;
private final InputStream inputStream;
private ExecutorService executor = Executors.newFixedThreadPool(1);
private AtomicLong demand = new AtomicLong(0);
SubscriptionImpl(Subscriber<? super AudioStream> s, InputStream inputStream) {
this.subscriber = s;
this.inputStream = inputStream;
}
@Override
public void request(long n) {
if (n <= 0) {
subscriber.onError(new IllegalArgumentException("Demand must be positive"));
}
demand.getAndAdd(n);
executor.submit(() -> {
try {
do {
ByteBuffer audioBuffer = getNextEvent();
if (audioBuffer.remaining() > 0) {
AudioEvent audioEvent = audioEventFromBuffer(audioBuffer);
subscriber.onNext(audioEvent);
} else {
subscriber.onComplete();
break;
}
} while (demand.decrementAndGet() > 0);
} catch (Exception e) {
subscriber.onError(e);
}
});
}
@Override
public void cancel() {
executor.shutdown();
}
private ByteBuffer getNextEvent() {
ByteBuffer audioBuffer = null;
byte[] audioBytes = new byte[CHUNK_SIZE_IN_BYTES];
int len = 0;
try {
len = inputStream.read(audioBytes);
if (len <= 0) {
audioBuffer = ByteBuffer.allocate(0);
} else {
audioBuffer = ByteBuffer.wrap(audioBytes, 0, len);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return audioBuffer;
}
private AudioEvent audioEventFromBuffer(ByteBuffer bb) {
return AudioEvent.builder()
.audioChunk(SdkBytes.fromByteBuffer(bb))
.build();
}
}
}
package com.amazonaws.transcribe;
导入org.reactivestreams.Publisher;
导入org.reactivestreams.Subscriber;
导入org.reactivestreams.Subscription;
导入software.amazon.awssdk.core.SdkBytes;
导入software.amazon.awssdk.regions.Region;
导入software.amazon.awssdk.services.transcribeStreading.transcribeStreadingAsyncClient;
import software.amazon.awssdk.services.transcribestreaming.model.*;
导入javax.sound.sampled.*;
导入java.io.*;
导入java.nio.ByteBuffer;
导入java.util.List;
导入java.util.concurrent.CompletableFuture;
导入java.util.concurrent.ExecutorService;
导入java.util.concurrent.Executors;
导入java.util.concurrent.AtomicLong;
导入静态javax.sound.sampled.AudioFormat.Encoding.*;
公共类TranscribeFileFromStream{
私有静态最终区域=Region.US\u EAST\u 1;
私有静态转录流同步客户端;
公共静态void main(字符串args[])引发异常{
System.setProperty(“AWS_访问_密钥_ID”、“myId”);
System.setProperty(“AWS_SECRET_ACCESS_KEY”、“myKey”);
System.out.println(System.getProperty(“java.version”);
//BasicConfigurator.configure();
client=转录StreamingAsyncClient.builder()
.地区(地区)
.build();
试一试{
File inputFile=新文件(“/home/me/work/transcribe/src/main/resources/test File.wav”);
CompletableFuture结果=client.startStreamTranscription(
getRequest(输入文件),
新的AudioStreamPublisher(getStreamFromFile(inputFile)),
getResponseHandler());
result.get();
}最后{
如果(客户端!=null){
client.close();
}
}
}
私有静态InputStream getStreamFromFile(文件inputFile){
试一试{
返回新文件InputStream(inputFile);
}catch(filenotfounde异常){
抛出新的运行时异常(e);
}
}
私有静态StartStreamTranscriptionRequest getRequest(文件输入文件)引发IOException、UnsupportdAudioFileException{
//TODO:在本例中,我读了两次文件。这能更有效吗?
AudioInputStream AudioInputStream=AudioSystem.getAudioInputStream(inputFile);
AudioFormat AudioFormat=audioInputStream.getFormat();
return StartStreamTranscriptionRequest.builder()
.languageCode(languageCode.EN_US)
//.mediaEncoding(mediaEncoding.PCM)
.mediaEncoding(getAwsMediaEncoding(音频格式))
.mediaSampleRateHertz(getAwsSampleRate(音频格式))
.build();
}
专用静态媒体编码getAwsMediaEncoding(AudioFormat AudioFormat){
最后一个字符串javaMediaEncoding=audioFormat.getEncoding().toString();
if(PCM_SIGNED.toString().equals(javaMediaEncoding)){
返回MediaEncoding.PCM;
}else if(PCM_UNSIGNED.toString().equals(javaMediaEncoding)){
返回MediaEncoding.PCM;
}/*else if(ALAW.toString().equals(javaMediaEncoding)){
//警告:我不知道ALAW如何映射到AWS媒体编码。
返回MediaEncoding.OGG_OPUS;
}else if(ULAW.toString().equals(javaMediaEncoding)){
//警告:我不知道ULAW如何映射到AWS编码。
返回mediacodeding.FLAC;
}*/
抛出新的IllegalArgumentException(“不是可识别的媒体编码:“+javaMediaEncoding”);
}
私有静态整数getAwsSampleRate(AudioFormat AudioFormat){
返回Math.round(audioFormat.getSampleRate());
}
私有静态StartStreamTranscriptionResponseHandler getResponseHandler(){
return StartStreamTranscriptionResponseHandler.builder()
.onResponse(r->{
System.out.println(“收到的初始响应”);
})
.onError(e->{
System.out.println(e.getMessage());
StringWriter sw=新的StringWriter();
e、 printStackTrace(新PrintWriter(sw));
package com.amazonaws.transcribe;
import org.reactivestreams.Publisher;
import org.reactivestreams.Subscriber;
import org.reactivestreams.Subscription;
import software.amazon.awssdk.core.SdkBytes;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.transcribestreaming.TranscribeStreamingAsyncClient;
import software.amazon.awssdk.services.transcribestreaming.model.*;
import javax.sound.sampled.*;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import static javax.sound.sampled.AudioFormat.Encoding.*;
public class TranscribeFileFromStream {
private static final Region REGION = Region.US_EAST_1;
private static TranscribeStreamingAsyncClient client;
public static void main(String args[]) throws Exception {
System.setProperty("AWS_ACCESS_KEY_ID", "myId");
System.setProperty("AWS_SECRET_ACCESS_KEY", "myKey");
System.out.println(System.getProperty("java.version"));
// BasicConfigurator.configure();
client = TranscribeStreamingAsyncClient.builder()
.region(REGION)
.build();
try {
File inputFile = new File("/home/me/work/transcribe/src/main/resources/test-file.wav");
CompletableFuture<Void> result = client.startStreamTranscription(
getRequest(inputFile),
new AudioStreamPublisher(getStreamFromFile(inputFile)),
getResponseHandler());
result.get();
} finally {
if (client != null) {
client.close();
}
}
}
private static InputStream getStreamFromFile(File inputFile) {
try {
return new FileInputStream(inputFile);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
private static StartStreamTranscriptionRequest getRequest(File inputFile) throws IOException, UnsupportedAudioFileException {
//TODO: I read the file twice in this example. Can this be more performant?
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile);
AudioFormat audioFormat = audioInputStream.getFormat();
return StartStreamTranscriptionRequest.builder()
.languageCode(LanguageCode.EN_US)
//.mediaEncoding(MediaEncoding.PCM)
.mediaEncoding(getAwsMediaEncoding(audioFormat))
.mediaSampleRateHertz(getAwsSampleRate(audioFormat))
.build();
}
private static MediaEncoding getAwsMediaEncoding(AudioFormat audioFormat) {
final String javaMediaEncoding = audioFormat.getEncoding().toString();
if (PCM_SIGNED.toString().equals(javaMediaEncoding)) {
return MediaEncoding.PCM;
} else if (PCM_UNSIGNED.toString().equals(javaMediaEncoding)){
return MediaEncoding.PCM;
} /*else if (ALAW.toString().equals(javaMediaEncoding)){
//WARNING: I have no idea how ALAW maps to AWS media encodings.
return MediaEncoding.OGG_OPUS;
} else if (ULAW.toString().equals(javaMediaEncoding)){
//WARNING: I have no idea how ULAW maps to AWS encodings.
return MediaEncoding.FLAC;
}*/
throw new IllegalArgumentException("Not a recognized media encoding:" + javaMediaEncoding);
}
private static Integer getAwsSampleRate(AudioFormat audioFormat) {
return Math.round(audioFormat.getSampleRate());
}
private static StartStreamTranscriptionResponseHandler getResponseHandler() {
return StartStreamTranscriptionResponseHandler.builder()
.onResponse(r -> {
System.out.println("Received Initial response");
})
.onError(e -> {
System.out.println(e.getMessage());
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
System.out.println("Error Occurred: " + sw.toString());
})
.onComplete(() -> {
System.out.println("=== All records stream successfully ===");
})
.subscriber(event -> {
List<Result> results = ((TranscriptEvent) event).transcript().results();
if (results.size() > 0) {
if (!results.get(0).alternatives().get(0).transcript().isEmpty()) {
System.out.println(results.get(0).alternatives().get(0).transcript());
} else {
System.out.println("Empty result");
}
} else {
System.out.println("No results");
}
})
.build();
}
private static class AudioStreamPublisher implements Publisher<AudioStream> {
private final InputStream inputStream;
private static Subscription currentSubscription;
private AudioStreamPublisher(InputStream inputStream) {
this.inputStream = inputStream;
}
@Override
public void subscribe(Subscriber<? super AudioStream> s) {
if (this.currentSubscription == null) {
this.currentSubscription = new SubscriptionImpl(s, inputStream);
} else {
this.currentSubscription.cancel();
this.currentSubscription = new SubscriptionImpl(s, inputStream);
}
s.onSubscribe(currentSubscription);
}
}
public static class SubscriptionImpl implements Subscription {
private static final int CHUNK_SIZE_IN_BYTES = 1024 * 1;
private final Subscriber<? super AudioStream> subscriber;
private final InputStream inputStream;
private ExecutorService executor = Executors.newFixedThreadPool(1);
private AtomicLong demand = new AtomicLong(0);
SubscriptionImpl(Subscriber<? super AudioStream> s, InputStream inputStream) {
this.subscriber = s;
this.inputStream = inputStream;
}
@Override
public void request(long n) {
if (n <= 0) {
subscriber.onError(new IllegalArgumentException("Demand must be positive"));
}
demand.getAndAdd(n);
executor.submit(() -> {
try {
do {
ByteBuffer audioBuffer = getNextEvent();
if (audioBuffer.remaining() > 0) {
AudioEvent audioEvent = audioEventFromBuffer(audioBuffer);
subscriber.onNext(audioEvent);
} else {
subscriber.onComplete();
break;
}
} while (demand.decrementAndGet() > 0);
} catch (Exception e) {
subscriber.onError(e);
}
});
}
@Override
public void cancel() {
executor.shutdown();
}
private ByteBuffer getNextEvent() {
ByteBuffer audioBuffer = null;
byte[] audioBytes = new byte[CHUNK_SIZE_IN_BYTES];
int len = 0;
try {
len = inputStream.read(audioBytes);
if (len <= 0) {
audioBuffer = ByteBuffer.allocate(0);
} else {
audioBuffer = ByteBuffer.wrap(audioBytes, 0, len);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return audioBuffer;
}
private AudioEvent audioEventFromBuffer(ByteBuffer bb) {
return AudioEvent.builder()
.audioChunk(SdkBytes.fromByteBuffer(bb))
.build();
}
}
}