C++ 如何使用opus对音频数据进行编码和解码?
我正在进行语音聊天,需要压缩音频数据。我通过Qt框架录制和播放音频数据。如果我录制和播放音频数据而不压缩它,一切都很好。如果我压缩、解压并播放音频数据,我只会听到一种爆裂的声音 编辑:我看了一下演示代码,并尝试使用该代码。 我能听到一些声音,但声音很慢。如果我将pcm_字节的大小增加到例如40000,听起来会更好,但我的声音仍然有延迟和破裂的声音 这是一行(底部的audioinput.cpp): codecopus.cpp:C++ 如何使用opus对音频数据进行编码和解码?,c++,qt,audio,opus,C++,Qt,Audio,Opus,我正在进行语音聊天,需要压缩音频数据。我通过Qt框架录制和播放音频数据。如果我录制和播放音频数据而不压缩它,一切都很好。如果我压缩、解压并播放音频数据,我只会听到一种爆裂的声音 编辑:我看了一下演示代码,并尝试使用该代码。 我能听到一些声音,但声音很慢。如果我将pcm_字节的大小增加到例如40000,听起来会更好,但我的声音仍然有延迟和破裂的声音 这是一行(底部的audioinput.cpp): codecopus.cpp: #include "codecopus.h" CodecOpus::
#include "codecopus.h"
CodecOpus::CodecOpus()
{
}
void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
int error;
decoderState = opus_decoder_create(samplingRate,channels,&error);
if(error == OPUS_OK){
std::cout << "Created Opus Decoder struct" << std::endl;
}
}
void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
int error;
encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
if(error == OPUS_OK){
std::cout << "Created Opus Encoder struct" << std::endl;
}
}
opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
return i;
}
int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{
int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);
return i;
}
CodecOpus::~CodecOpus()
{
opus_decoder_destroy(this->decoderState);
opus_encoder_destroy(this->encoderState);
}
#包括“codecopus.h”
CodecOpus::CodecOpus()
{
}
void CodecOpus::initDecoder(opus_int32采样,int通道)//解码器
{
整数误差;
decoderState=opus_decoder_create(采样、通道和错误);
如果(错误==OPUS_OK){
std::cout我已经准备好了一个很长的答案,关于你如何将opus.decodeData的返回值误解为字节数,其中正确的解释是“每个通道的解码样本数”。但看起来你在稍后的字节转换例程中仍然解释了这一点。所以我不确定错误在哪里
一般来说,我认为您正在使从unsigned char int16的转换变得比需要的更复杂。您应该能够直接将音频缓冲区传递给opus或从opus传递音频缓冲区,并将其指针重新解释为所需的内联类型,而不必手动执行位操作来在不同的缓冲区之间进行转换和复制设备应该给你们一点端数据,但若有不匹配,你们可以做一个基本的字节交换例行程序
for (int c = 0; c < numSamples; c++)
{
unsigned char tmp = data[2 * c];
data[2 * c] = data[2 * c + 1];
data[2 * c + 1] = tmp;
}
for(int c=0;c
我在这里看不到,但我假设您也有代码,一次仅消耗来自麦克风的960个样本,并将其余样本保留在下一帧的缓冲区中,否则您将丢弃数据
这并不重要,但您也可以将cbits中的4000替换为1275,这是opus数据包的最大大小。1)您只编码960字节,而缓冲区要大得多。您必须将缓冲区分成几个相等的部分,并将它们传递给编码器。该部分的大小为120、240、480、960、1920和2880
2) 从字符数组转换为opus_int16数组/从opus_int16数组转换为字符数组时,请使用qFromLittleEndian()/qToLittleEndian()函数或类型转换。这将防止出现破裂和音质差
例如:
void voice::slot_read_audio_input()
{
// Audio settings:
// Sample Rate=48000
// Sample Size=16
// Channel Count=1
// Byte Order=Little Endian
// Sample Type= UnSignedInt
// Encoder settings:
// Sample Rate=48000
// Channel Count=1
// OPUS_APPLICATION_VOIP
// Decoder settings:
// Sample Rate=48000
// Channel Count=1
QByteArray audio_buffer;//mic
QByteArray output_audio_buffer;//speaker
int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
int const FRAME_SIZE=960;
int const MAX_FRAME_SIZE=1276;
int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;
opus_int16 input_frame[FRAME_SIZE] = {};
opus_int16 output_frame[FRAME_SIZE] = {};
unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};
audio_buffer.resize(voice_input->bytesReady());
output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);
input->read(audio_buffer.data(),audio_buffer.size());
for(int i=0;i<FRAME_COUNT;i++)
{
// convert from LittleEndian
for(int j=0;j<FRAME_SIZE;j++)
{
input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
}
opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);
// conver to LittleEndian
for(int j = 0; j<decompressedBytes;j++)
{
qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
// decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
}
audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
// or use this:
// output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
}
}
void voice::插槽读取音频输入()
{
//音频设置:
//抽样率=48000
//样本量=16
//通道计数=1
//字节顺序=小端
//样本类型=UnSignedInt
//编码器设置:
//抽样率=48000
//通道计数=1
//OPUS\u应用程序\u VOIP
//解码器设置:
//抽样率=48000
//通道计数=1
QByteArray音频缓冲区;//麦克风
QByteArray输出\音频\缓冲区;//扬声器
int const OPUS_int_SIZE=2;//sizeof(OPUS_int16)
int const FRAME_SIZE=960;
int const MAX_FRAME_SIZE=1276;
int FRAME\u COUNT=3840/FRAME\u SIZE/OPUS\u int\u SIZE;//3840是一个样本大小=语音输入->字节就绪;
opus_int16输入_帧[帧大小]={};
opus_int16输出_帧[帧大小]={};
无符号字符压缩帧[最大帧大小]={};
无符号字符解压\u帧[帧大小*OPUS\u INT\u大小]={};
调整音频缓冲区大小(语音输入->字节准备();
输出音频缓冲区。调整大小(帧大小*作品大小);
输入->读取(音频缓冲区.data(),音频缓冲区.size());
对于(int i=0;我您能解决这个问题吗?我对找到类似问题的解决方案感兴趣。如果您解决了,请告诉我。
#include "audioinput.h"
AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
audioFormat.setSampleRate(48000);
audioFormat.setChannelCount(2);
audioFormat.setSampleSize(16);
audioFormat.setSampleType(QAudioFormat::SignedInt);
audioFormat.setByteOrder(QAudioFormat::LittleEndian);
audioFormat.setCodec("audio/pcm");
speakerAudioFormat.setSampleRate(48000);
speakerAudioFormat.setChannelCount(2);
speakerAudioFormat.setSampleSize(16);
speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
speakerAudioFormat.setCodec("audio/pcm");
QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
if(!info.isFormatSupported(audioFormat)){
std::cout << "Mic Format not supported!" << std::endl;
audioFormat = info.nearestFormat(audioFormat);
}
QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
std::cout << "Speaker Format is not supported!" << std::endl;
speakerAudioFormat = info.nearestFormat(speakerAudioFormat);
}
std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
audioInput = new QAudioInput(audioFormat);
audioOutput = new QAudioOutput(speakerAudioFormat);
audioInput->setNotifyInterval(20);
micFrameSize = (audioFormat.sampleRate()/1000)*20;
opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());
MAX_FRAME_SIZE = 6*960;
connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}
AudioInput::~AudioInput()
{
}
void AudioInput::startRecording()
{
mic = audioInput->start();
speaker = audioOutput->start();
std::cout << "Recording started!" << std::endl;
}
void AudioInput::CreateNewAudioThread()
{
std::thread t1(&AudioInput::startRecording,this);
t1.detach();
}
void AudioInput::OnAudioNotfiy()
{
data = mic->readAll();
std::cout << "data size" <<data.size() << std::endl;
if(data.size() > 0){
pcm_bytes = reinterpret_cast<unsigned char*>(data.data());
//convert
for(int i=0;i<2*960;i++){ //TODO HARDCODED
in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
}
opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);
opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);
for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
{
pcm_bytes[2*i]=out[i]&0xFF;
pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
}
speaker->write((const char*)pcm_bytes,3840);
}
}
for (int c = 0; c < numSamples; c++)
{
unsigned char tmp = data[2 * c];
data[2 * c] = data[2 * c + 1];
data[2 * c + 1] = tmp;
}
void voice::slot_read_audio_input()
{
// Audio settings:
// Sample Rate=48000
// Sample Size=16
// Channel Count=1
// Byte Order=Little Endian
// Sample Type= UnSignedInt
// Encoder settings:
// Sample Rate=48000
// Channel Count=1
// OPUS_APPLICATION_VOIP
// Decoder settings:
// Sample Rate=48000
// Channel Count=1
QByteArray audio_buffer;//mic
QByteArray output_audio_buffer;//speaker
int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
int const FRAME_SIZE=960;
int const MAX_FRAME_SIZE=1276;
int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;
opus_int16 input_frame[FRAME_SIZE] = {};
opus_int16 output_frame[FRAME_SIZE] = {};
unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};
audio_buffer.resize(voice_input->bytesReady());
output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);
input->read(audio_buffer.data(),audio_buffer.size());
for(int i=0;i<FRAME_COUNT;i++)
{
// convert from LittleEndian
for(int j=0;j<FRAME_SIZE;j++)
{
input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
}
opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);
// conver to LittleEndian
for(int j = 0; j<decompressedBytes;j++)
{
qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
// decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
}
audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
// or use this:
// output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
}
}