Java 录制时检测静音_Java_Audio_Speech Recognition_Javasound

Java 录制时检测静音

java audio speech-recognition

Java 录制时检测静音,java,audio,speech-recognition,javasound,Java,Audio,Speech Recognition,Javasound,在Java中启动录制操作时，如何检测静音？什么是PCM数据？如何在Java中计算PCM数据我找到了解决办法： package bemukan.voiceRecognition.speechToText; import javax.sound.sampled.*; import java.io.*; public class RecordAudio { private File audioFile; protected boolean running; private

在Java中启动录制操作时，如何检测静音？什么是PCM数据？如何在Java中计算PCM数据

我找到了解决办法：

package bemukan.voiceRecognition.speechToText;

import javax.sound.sampled.*;
import java.io.*;

public class RecordAudio {
    private File audioFile;
    protected boolean running;
    private ByteArrayOutputStream out;
    private AudioInputStream inputStream;
    final static float MAX_8_BITS_SIGNED = Byte.MAX_VALUE;
    final static float MAX_8_BITS_UNSIGNED = 0xff;
    final static float MAX_16_BITS_SIGNED = Short.MAX_VALUE;
    final static float MAX_16_BITS_UNSIGNED = 0xffff;
    private AudioFormat format;
    private float level;
    private int frameSize;

    public RecordAudio(){
         getFormat();
    }

    private AudioFormat getFormat() {
        File file = new File("src/Facebook/1.wav");
        AudioInputStream stream;
        try {
            stream = AudioSystem.getAudioInputStream(file);
            format=stream.getFormat();
            frameSize=stream.getFormat().getFrameSize();
            return stream.getFormat();
        } catch (UnsupportedAudioFileException e) {

        } catch (IOException e) {

        }
        return null;
    }

    public void stopAudio() {

        running = false;
    }

    public void recordAudio() {

        try {
            final AudioFormat format = getFormat();
            DataLine.Info info = new DataLine.Info(
                    TargetDataLine.class, format);
            final TargetDataLine line = (TargetDataLine)
                    AudioSystem.getLine(info);
            line.open(format);
            line.start();
            Runnable runner = new Runnable() {
                int bufferSize = (int) format.getSampleRate()
                        * format.getFrameSize();
                byte buffer[] = new byte[bufferSize];

                public void run() {
                     int readPoint = 0;

                    out = new ByteArrayOutputStream();
                    running = true;
                     int sum=0;
                    while (running) {
                        int count =
                              line.read(buffer, 0, buffer.length);
                              calculateLevel(buffer,0,0);
                         System.out.println(level);

                        if (count > 0) {
                            out.write(buffer, 0, count);
                        }
                    }
                    line.stop();
                }
            };
            Thread captureThread = new Thread(runner);
            captureThread.start();
        } catch (LineUnavailableException e) {
            System.err.println("Line unavailable: " + e);
            System.exit(-2);
        }
    }

    public File getAudioFile() {
        byte[] audio = out.toByteArray();
        InputStream input = new ByteArrayInputStream(audio);
        try {

            final AudioFormat format = getFormat();
            final AudioInputStream ais =
                    new AudioInputStream(input, format,
                            audio.length / format.getFrameSize());
            AudioSystem.write(ais, AudioFileFormat.Type.WAVE, new File("temp.wav"));
            input.close();
            System.out.println("New file created!");
        } catch (IOException e) {
            System.out.println(e.getMessage());
        }
        return new File("temp.wav");
    }
    private void calculateLevel (byte[] buffer,
                                 int readPoint,
                                 int leftOver) {
        int max = 0;
        boolean use16Bit = (format.getSampleSizeInBits() == 16);
        boolean signed = (format.getEncoding() ==
                          AudioFormat.Encoding.PCM_SIGNED);
        boolean bigEndian = (format.isBigEndian());
        if (use16Bit) {
            for (int i=readPoint; i<buffer.length-leftOver; i+=2) {
                int value = 0;
                // deal with endianness
                int hiByte = (bigEndian ? buffer[i] : buffer[i+1]);
                int loByte = (bigEndian ? buffer[i+1] : buffer [i]);
                if (signed) {
                    short shortVal = (short) hiByte;
                    shortVal = (short) ((shortVal << 8) | (byte) loByte);
                    value = shortVal;
                } else {
                    value = (hiByte << 8) | loByte;
                }
                max = Math.max(max, value);
            } // for
        } else {
            // 8 bit - no endianness issues, just sign
            for (int i=readPoint; i<buffer.length-leftOver; i++) {
                int value = 0;
                if (signed) {
                    value = buffer [i];
                } else {
                    short shortVal = 0;
                    shortVal = (short) (shortVal | buffer [i]);
                    value = shortVal;
                }
                max = Math.max (max, value);
            } // for
        } // 8 bit
        // express max as float of 0.0 to 1.0 of max value
        // of 8 or 16 bits (signed or unsigned)
        if (signed) {
            if (use16Bit) { level = (float) max / MAX_16_BITS_SIGNED; }
            else { level = (float) max / MAX_8_BITS_SIGNED; }
        } else {
            if (use16Bit) { level = (float) max / MAX_16_BITS_UNSIGNED; }
            else { level = (float) max / MAX_8_BITS_UNSIGNED; }
        }
    } // calculateLevel


}

package bemukan.voiceRecognition.speechToText；
导入javax.sound.sampled.*；
导入java.io.*；
公共类录音带{
私有文件音频文件；
保护布尔运行；
私人BYTEARRAYOUTPUTSTROUT；
私有音频输入流输入流；
最终静态浮点最大8位有符号=Byte.MAX\u值；
最终静态浮点最大8位无符号=0xff；
最终静态浮点最大16位符号=Short.MAX\u值；
最终静态浮点最大16位无符号=0xffff；
专用音频格式；
私人浮动水平；
私有整数帧大小；
公共录音（音频）{
getFormat（）；
}
私有AudioFormat getFormat（）{
File File=新文件（“src/Facebook/1.wav”）；
音频输入流；
试一试{
stream=AudioSystem.getAudioInputStream（文件）；
format=stream.getFormat（）；
frameSize=stream.getFormat（）.getFrameSize（）；
返回stream.getFormat（）；
}捕获（不支持的数据文件异常e）{
}捕获（IOE异常）{
}
返回null；
}
公共音频{
运行=错误；
}
公共音频{
试一试{
最终AudioFormat格式=getFormat（）；
DataLine.Info=newdataline.Info(
TargetDataLine.class，格式）；
最终TargetDataLine=（TargetDataLine）
AudioSystem.getLine（info）；
行。打开（格式）；
line.start（）；
Runnable runner=新的Runnable（）{
int bufferSize=（int）format.getSampleRate（）
*format.getFrameSize（）；
字节缓冲区[]=新字节[bufferSize]；
公开募捐{
int readPoint=0；
out=新的ByteArrayOutputStream（）；
运行=真；
整数和=0；
（跑步时）{
整数计数=
行读取（缓冲区，0，缓冲区长度）；
calculateLevel（缓冲区，0,0）；
系统输出打印项次（级别）；
如果（计数>0）{
out.write（缓冲区，0，计数）；
}
}
line.stop（）；
}
};
螺纹CAPTURAREAD=新螺纹（流道）；
captRead.start（）；
}捕获（LineUnavailableException e）{
System.err.println（“行不可用：+e”）；
系统出口（-2）；
}
}
公共文件getAudioFile（）{
字节[]音频=out.toByteArray（）；
InputStream输入=新的ByteArrayInputStream（音频）；
试一试{
最终AudioFormat格式=getFormat（）；
最终音频输入流ais=
新的音频输入流（输入、格式、，
audio.length/format.getFrameSize（））；
AudioSystem.write（ais，AudioFileFormat.Type.WAVE，新文件（“temp.wav”）；
input.close（）；
System.out.println（“创建了新文件！”）；
}捕获（IOE异常）{
System.out.println（e.getMessage（））；
}
返回新文件（“temp.wav”）；
}
私有void calculateLevel（字节[]缓冲区，
int读取点，
整数剩余）{
int max=0；
布尔use16Bit=（format.getSampleSizeInBits（）==16）；
布尔符号=（format.getEncoding（）==
音频格式。编码。PCM_签名）；
布尔bigEndian=（format.isBigEndian（））；
如果（使用16位）{
for（int i=读取点；i
在Java中启动录制操作时，如何检测静音
计算一组声音帧的or值，并确定在什么级别被视为“静音”
什么是PCM数据
格式为的数据
如何在Java中计算PCM数据
我不理解这个问题。但是猜测它与语音识别
标记有关，我有一些坏消息。理论上，这可能是通过使用来实现的。但是显然，API没有“语音到文本”实现（只有“文本到语音”）

我必须为语音识别项目计算rms。但我不知道如何用Java计算
对于由从-1到1的double
中的信号大小表示的单个通道，可以使用此方法
/** Computes the RMS volume of a group of signal sizes ranging from -1 to 1. */
public double volumeRMS(double[] raw) {
    double sum = 0d;
    if (raw.length==0) {
        return sum;
    } else {
        for (int ii=0; ii<raw.length; ii++) {
            sum += raw[ii];
        }
    }
    double average = sum/raw.length;

    double sumMeanSquare = 0d;
    for (int ii=0; ii<raw.length; ii++) {
        sumMeanSquare += Math.pow(raw[ii]-average,2d);
    }
    double averageMeanSquare = sumMeanSquare/raw.length;
    double rootMeanSquare = Math.sqrt(averageMeanSquare);

    return rootMeanSquare;
}

/**计算从-1到1的一组信号大小的RMS体积*/
公共双卷（双[]原始）{
双和=0d；
if（原始长度==0）{
回报金额；
}否则{
对于（int ii=0；ii您需要捕捉值，就像数字为零或接近零一样
请根据您的要求调整您的代码！！！
在本例中，一个名为UMBRAL的变量（西班牙语中的阈值）
假设您可以访问WAV文件，如字节字节头
private Integer Byte2PosIntBig(byte Byte24, byte Byte16, byte Byte08, byte Byte00) {
    return new Integer (
            ((Byte24) << 24)|
            ((Byte16 & 0xFF) << 16)|
            ((Byte08 & 0xFF) << 8)|
            ((Byte00 & 0xFF) << 0));
} 

从这里开始
int PSData = 44;
byte[] Bytes = new byte[4];
byte[] ByteHeader = new byte[44];
RAFSource.seek(0);
RAFSource.read(ByteHeader);

int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]);

int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]);
int NumByte = NumBits/8;

    for (int i = PSData;i < PSData+WavSize;i+=NumByte) {
      int WavSample = 0;
      int WavResultI =0;
      int WavResultO = 0;
      if (NumByte == 2) {

          RAFSource.seek(i);
          Bytes[0] = RAFSource.readByte();
          Bytes[1] = RAFSource.readByte();
          WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0));
          if (Math.abs(WavSample) < UMBRAL) {
            //SILENCE DETECTED!!!
          }

      } else {
        RAFSource.seek(i);
        WavSample = (short)(RAFSource.readByte() & 0xFF);
        short sSamT = (short)WavSample;
        sSamT += 128;
        double dSamD = (double)sSamT*Multiplier;
        if ((double)sSamT < UMBRAL) {
          //SILENCE DETECTED!!!
        }
      }

int PSData=44；
字节[]字节=新字节[4]；
字节[]字节头=新字节[44]；
RAFSource.seek（0）；
读取（字节头）；
int WavSize=byte2positbig（字节头[43]，字节头[42]，字节头[41]，字节头[40]）；
int NumBits=byte2positbig（ByteHeader[35]，ByteHeader[34]）；
int NumByte=NumBits/8；
对于（int i=PSData；iWavSample=（int）（（字节[1]）是的，我必须为语音识别项目计算rms
int PSData = 44;
byte[] Bytes = new byte[4];
byte[] ByteHeader = new byte[44];
RAFSource.seek(0);
RAFSource.read(ByteHeader);

int WavSize = Byte2PosIntBig(ByteHeader[43],ByteHeader[42],ByteHeader[41],ByteHeader[40]);

int NumBits = Byte2PosIntBig(ByteHeader[35],ByteHeader[34]);
int NumByte = NumBits/8;

    for (int i = PSData;i < PSData+WavSize;i+=NumByte) {
      int WavSample = 0;
      int WavResultI =0;
      int WavResultO = 0;
      if (NumByte == 2) {

          RAFSource.seek(i);
          Bytes[0] = RAFSource.readByte();
          Bytes[1] = RAFSource.readByte();
          WavSample = (int)(((Bytes[1]) << 8)|((Bytes[0] & 0xFF) << 0));
          if (Math.abs(WavSample) < UMBRAL) {
            //SILENCE DETECTED!!!
          }

      } else {
        RAFSource.seek(i);
        WavSample = (short)(RAFSource.readByte() & 0xFF);
        short sSamT = (short)WavSample;
        sSamT += 128;
        double dSamD = (double)sSamT*Multiplier;
        if ((double)sSamT < UMBRAL) {
          //SILENCE DETECTED!!!
        }
      }