Java 录制时检测静音
在Java中启动录制操作时,如何检测静音?什么是PCM数据?如何在Java中计算PCM数据 我找到了解决办法:Java 录制时检测静音,java,audio,speech-recognition,javasound,Java,Audio,Speech Recognition,Javasound,在Java中启动录制操作时,如何检测静音?什么是PCM数据?如何在Java中计算PCM数据 我找到了解决办法: package bemukan.voiceRecognition.speechToText; import javax.sound.sampled.*; import java.io.*; public class RecordAudio { private File audioFile; protected boolean running; private
package bemukan.voiceRecognition.speechToText;
import javax.sound.sampled.*;
import java.io.*;
public class RecordAudio {
private File audioFile;
protected boolean running;
private ByteArrayOutputStream out;
private AudioInputStream inputStream;
final static float MAX_8_BITS_SIGNED = Byte.MAX_VALUE;
final static float MAX_8_BITS_UNSIGNED = 0xff;
final static float MAX_16_BITS_SIGNED = Short.MAX_VALUE;
final static float MAX_16_BITS_UNSIGNED = 0xffff;
private AudioFormat format;
private float level;
private int frameSize;
public RecordAudio(){
getFormat();
}
private AudioFormat getFormat() {
File file = new File("src/Facebook/1.wav");
AudioInputStream stream;
try {
stream = AudioSystem.getAudioInputStream(file);
format=stream.getFormat();
frameSize=stream.getFormat().getFrameSize();
return stream.getFormat();
} catch (UnsupportedAudioFileException e) {
} catch (IOException e) {
}
return null;
}
public void stopAudio() {
running = false;
}
public void recordAudio() {
try {
final AudioFormat format = getFormat();
DataLine.Info info = new DataLine.Info(
TargetDataLine.class, format);
final TargetDataLine line = (TargetDataLine)
AudioSystem.getLine(info);
line.open(format);
line.start();
Runnable runner = new Runnable() {
int bufferSize = (int) format.getSampleRate()
* format.getFrameSize();
byte buffer[] = new byte[bufferSize];
public void run() {
int readPoint = 0;
out = new ByteArrayOutputStream();
running = true;
int sum=0;
while (running) {
int count =
line.read(buffer, 0, buffer.length);
calculateLevel(buffer,0,0);
System.out.println(level);
if (count > 0) {
out.write(buffer, 0, count);
}
}
line.stop();
}
};
Thread captureThread = new Thread(runner);
captureThread.start();
} catch (LineUnavailableException e) {
System.err.println("Line unavailable: " + e);
System.exit(-2);
}
}
public File getAudioFile() {
byte[] audio = out.toByteArray();
InputStream input = new ByteArrayInputStream(audio);
try {
final AudioFormat format = getFormat();
final AudioInputStream ais =
new AudioInputStream(input, format,
audio.length / format.getFrameSize());
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, new File("temp.wav"));
input.close();
System.out.println("New file created!");
} catch (IOException e) {
System.out.println(e.getMessage());
}
return new File("temp.wav");
}
private void calculateLevel (byte[] buffer,
int readPoint,
int leftOver) {
int max = 0;
boolean use16Bit = (format.getSampleSizeInBits() == 16);
boolean signed = (format.getEncoding() ==
AudioFormat.Encoding.PCM_SIGNED);
boolean bigEndian = (format.isBigEndian());
if (use16Bit) {
for (int i=readPoint; i<buffer.length-leftOver; i+=2) {
int value = 0;
// deal with endianness
int hiByte = (bigEndian ? buffer[i] : buffer[i+1]);
int loByte = (bigEndian ? buffer[i+1] : buffer [i]);
if (signed) {
short shortVal = (short) hiByte;
shortVal = (short) ((shortVal << 8) | (byte) loByte);
value = shortVal;
} else {
value = (hiByte << 8) | loByte;
}
max = Math.max(max, value);
} // for
} else {
// 8 bit - no endianness issues, just sign
for (int i=readPoint; i<buffer.length-leftOver; i++) {
int value = 0;
if (signed) {
value = buffer [i];
} else {
short shortVal = 0;
shortVal = (short) (shortVal | buffer [i]);
value = shortVal;
}
max = Math.max (max, value);
} // for
} // 8 bit
// express max as float of 0.0 to 1.0 of max value
// of 8 or 16 bits (signed or unsigned)
if (signed) {
if (use16Bit) { level = (float) max / MAX_16_BITS_SIGNED; }
else { level = (float) max / MAX_8_BITS_SIGNED; }
} else {
if (use16Bit) { level = (float) max / MAX_16_BITS_UNSIGNED; }
else { level = (float) max / MAX_8_BITS_UNSIGNED; }
}
} // calculateLevel
}
package bemukan.voiceRecognition.speechToText;
导入javax.sound.sampled.*;
导入java.io.*;
公共类录音带{
私有文件音频文件;
保护布尔运行;
私人BYTEARRAYOUTPUTSTROUT;
私有音频输入流输入流;
最终静态浮点最大8位有符号=Byte.MAX\u值;
最终静态浮点最大8位无符号=0xff;
最终静态浮点最大16位符号=Short.MAX\u值;
最终静态浮点最大16位无符号=0xffff;
专用音频格式;
私人浮动水平;
私有整数帧大小;
公共录音(音频){
getFormat();
}
私有AudioFormat getFormat(){
File File=新文件(“src/Facebook/1.wav”);
音频输入流;
试一试{
stream=AudioSystem.getAudioInputStream(文件);
format=stream.getFormat();
frameSize=stream.getFormat().getFrameSize();
返回stream.getFormat();
}捕获(不支持的数据文件异常e){
}捕获(IOE异常){
}
返回null;
}
公共音频{
运行=错误;
}
公共音频{
试一试{
最终AudioFormat格式=getFormat();
DataLine.Info=newdataline.Info(
TargetDataLine.class,格式);
最终TargetDataLine=(TargetDataLine)
AudioSystem.getLine(info);
行。打开(格式);
line.start();
Runnable runner=新的Runnable(){
int bufferSize=(int)format.getSampleRate()
*format.getFrameSize();
字节缓冲区[]=新字节[bufferSize];
公开募捐{
int readPoint=0;
out=新的ByteArrayOutputStream();
运行=真;
整数和=0;
(跑步时){
整数计数=
行读取(缓冲区,0,缓冲区长度);
calculateLevel(缓冲区,0,0);
系统输出打印项次(级别);
如果(计数>0){
out.write(缓冲区,0,计数);
}
}
line.stop();
}
};
螺纹CAPTURAREAD=新螺纹(流道);
captRead.start();
}捕获(LineUnavailableException e){
System.err.println(“行不可用:+e”);
系统出口(-2);
}
}
公共文件getAudioFile(){
字节[]音频=out.toByteArray();
InputStream输入=新的ByteArrayInputStream(音频);
试一试{
最终AudioFormat格式=getFormat();
最终音频输入流ais=
新的音频输入流(输入、格式、,
audio.length/format.getFrameSize());
AudioSystem.write(ais,AudioFileFormat.Type.WAVE,新文件(“temp.wav”);
input.close();
System.out.println(“创建了新文件!”);
}捕获(IOE异常){
System.out.println(e.getMessage());
}
返回新文件(“temp.wav”);
}
私有void calculateLevel(字节[]缓冲区,
int读取点,
整数剩余){
int max=0;
布尔use16Bit=(format.getSampleSizeInBits()==16);
布尔符号=(format.getEncoding()==
音频格式。编码。PCM_签名);
布尔bigEndian=(format.isBigEndian());
如果(使用16位){
for(int i=读取点;i
在Java中启动录制操作时,如何检测静音
计算一组声音帧的or值,并确定在什么级别被视为“静音”
什么是PCM数据
格式为的数据
如何在Java中计算PCM数据
我不理解这个问题。但是猜测它与语音识别
标记有关,我有一些坏消息。理论上,这可能是通过使用来实现的。但是显然,API没有“语音到文本”实现(只有“文本到语音”)
我必须为语音识别项目计算rms。但我不知道如何用Java计算
对于由从-1到1的double
中的信号大小表示的单个通道,可以使用此方法
/** Computes the RMS volume of a group of signal sizes ranging from -1 to 1. */
public double volumeRMS(double[] raw) {
double sum = 0d;
if (raw.length==0) {
return sum;
} else {
for (int ii=0; ii<raw.length; ii++) {
sum += raw[ii];
}
}
double average = sum/raw.length;
double sumMeanSquare = 0d;
for (int ii=0; ii<raw.length; ii++) {
sumMeanSquare += Math.pow(raw[ii]-average,2d);
}
double averageMeanSquare = sumMeanSquare/raw.length;
double rootMeanSquare = Math.sqrt(averageMeanSquare);
return rootMeanSquare;
}
/**计算从-1到1的一组信号大小的RMS体积*/
公共双卷(双[]原始){
双和=0d;
if(原始长度==0){
回报金额;
}否则{
对于(int ii=0;ii您需要捕捉值,就像数字为零或接近零一样
请根据您的要求调整您的代码!!!
在本例中,一个名为UMBRAL的变量(西班牙语中的阈值)
假设您可以访问WAV文件,如字节字节头
private Integer Byte2PosIntBig(byte Byte24, byte Byte16, byte Byte08, byte Byte00) {
return new Integer (
((Byte24) << 24)|
((Byte16 & 0xFF) << 16)|
((Byte08 & 0xFF) << 8)|
((Byte00 & 0xFF) << 0));
}
从这里开始
int PSData = 44;
byte[] Bytes = new byte[4];
byte[] ByteHeader = new byte[44];
RAFSource.seek(0);
RAFSource.read(ByteHeader);
int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]);
int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]);
int NumByte = NumBits/8;
for (int i = PSData;i < PSData+WavSize;i+=NumByte) {
int WavSample = 0;
int WavResultI =0;
int WavResultO = 0;
if (NumByte == 2) {
RAFSource.seek(i);
Bytes[0] = RAFSource.readByte();
Bytes[1] = RAFSource.readByte();
WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0));
if (Math.abs(WavSample) < UMBRAL) {
//SILENCE DETECTED!!!
}
} else {
RAFSource.seek(i);
WavSample = (short)(RAFSource.readByte() & 0xFF);
short sSamT = (short)WavSample;
sSamT += 128;
double dSamD = (double)sSamT*Multiplier;
if ((double)sSamT < UMBRAL) {
//SILENCE DETECTED!!!
}
}
int PSData=44;
字节[]字节=新字节[4];
字节[]字节头=新字节[44];
RAFSource.seek(0);
读取(字节头);
int WavSize=byte2positbig(字节头[43],字节头[42],字节头[41],字节头[40]);
int NumBits=byte2positbig(ByteHeader[35],ByteHeader[34]);
int NumByte=NumBits/8;
对于(int i=PSData;i WavSample=(int)((字节[1])是的,我必须为语音识别项目计算rms
int PSData = 44;
byte[] Bytes = new byte[4];
byte[] ByteHeader = new byte[44];
RAFSource.seek(0);
RAFSource.read(ByteHeader);
int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]);
int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]);
int NumByte = NumBits/8;
for (int i = PSData;i < PSData+WavSize;i+=NumByte) {
int WavSample = 0;
int WavResultI =0;
int WavResultO = 0;
if (NumByte == 2) {
RAFSource.seek(i);
Bytes[0] = RAFSource.readByte();
Bytes[1] = RAFSource.readByte();
WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0));
if (Math.abs(WavSample) < UMBRAL) {
//SILENCE DETECTED!!!
}
} else {
RAFSource.seek(i);
WavSample = (short)(RAFSource.readByte() & 0xFF);
short sSamT = (short)WavSample;
sSamT += 128;
double dSamD = (double)sSamT*Multiplier;
if ((double)sSamT < UMBRAL) {
//SILENCE DETECTED!!!
}
}