C++ FFmpeg Opus波涛汹涌的声音更新描述_C++_Ffmpeg_Resampling_Opus

C++ FFmpeg Opus波涛汹涌的声音更新描述

c++ ffmpeg

C++ FFmpeg Opus波涛汹涌的声音更新描述,c++,ffmpeg,resampling,opus,C++,Ffmpeg,Resampling,Opus,我正在使用FFmpeg，并尝试使用内置FFmpeg“Opus”编解码器将原始PCM声音编码和解码到Opus。我的输入样本是原始PCM 8000 Hz 16位单声道，采用AV_样本FMT_S16格式。因为Opus只需要采样格式AV_sample_FMT_FLTP和采样率48000 Hz，所以我在对样本进行编码之前重新采样我有两个ResamplerAudio类的实例，用于对音频样本进行重新采样，并且有一个SwrContext的成员，我使用ResamplerAudio的第一个实例在编码前对原始PCM

我正在使用FFmpeg，并尝试使用内置FFmpeg“Opus”编解码器将原始PCM声音编码和解码到Opus。我的输入样本是原始PCM 8000 Hz 16位单声道，采用AV_样本FMT_S16格式。因为Opus只需要采样格式AV_sample_FMT_FLTP和采样率48000 Hz，所以我在对样本进行编码之前重新采样

我有两个

ResamplerAudio

类的实例，用于对音频样本进行重新采样，并且有一个

SwrContext

的成员，我使用

ResamplerAudio

的第一个实例在编码前对原始PCM输入音频进行重采样，第二个实例对解码音频进行重采样，以使其格式和采样率与输入原始音频的源值相同

ResamplerAudio类有一个函数可以初始化它的SwrContext成员，如下所示：

void ResamplerAudio::init(AVCodecContext *codecContext, int inSampleRate, int outSampleRate, AVSampleFormat inSampleFmt, AVSampleFormat outSampleFmt)
{
    swrContext = swr_alloc();
    if (!swrContext)
    {
        LOGE(TAG, "[init] Couldn't allocate swr context");
        return;
    }

    av_opt_set_int(swrContext, "in_channel_layout", (int64_t) codecContext->channel_layout, 0);
    av_opt_set_int(swrContext, "out_channel_layout", (int64_t) codecContext->channel_layout,  0);

    av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
    av_opt_set_int(swrContext, "out_channel_count", codecContext->channels, 0);

    av_opt_set_int(swrContext, "in_sample_rate", inSampleRate, 0);
    av_opt_set_int(swrContext, "out_sample_rate", outSampleRate, 0);

    av_opt_set_sample_fmt(swrContext, "in_sample_fmt", inSampleFmt, 0);
    av_opt_set_sample_fmt(swrContext, "out_sample_fmt", outSampleFmt,  0);

    int ret = swr_init(swrContext);
    if (ret < 0)
    {
        LOGE(TAG, "[init] swr_init error: %s", av_err2str(ret));
        return;
    }

    LOGD(TAG, "[init] success codecContext->channel_layout: %d; inSampleRate: %d; outSampleRate: %d; inSampleFmt: %d; outSampleFmt: %d", (int) codecContext->channel_layout, inSampleRate, outSampleRate, inSampleFmt, outSampleFmt);
}

std::vector<uint8_t> ResamplerAudio::convert(uint8_t **inData, int inSamplesCount, int outChannels, int outFormat)
{
    std::vector<uint8_t> result;
    uint8_t *dstData = NULL;
    const int dstNbSamples = swr_get_out_samples(swrContext, inSamplesCount);
    av_samples_alloc(&dstData, NULL, outChannels, dstNbSamples, AVSampleFormat(outFormat), 1);
    int resampledSize = swr_convert(swrContext, &dstData, dstNbSamples, (const uint8_t **)inData, inSamplesCount);
    int dstBufSize = av_samples_get_buffer_size(NULL, outChannels, resampledSize, AVSampleFormat(outFormat), 1);

    if (dstBufSize <= 0) return result;

    std::copy(&dstData[0], &dstData[dstBufSize], std::back_inserter(result));

    return result;
}

getSamplesCount(int bytesCount, int channels, AVSampleFormat format)
{
    return bytesCount / av_get_bytes_per_sample(format) / channels;
}

std::vector<uint8_t> decode(uint8_t *data, unsigned int dataLength)
{
    decodedData.clear();

    int dataSize = dataLength;

    while (dataSize > 0)
    {
        if (!frameDecode)
        {
            frameDecode = av_frame_alloc();
            if (!frameDecode)
            {
                LOGE(TAG, "[decode] Couldn't allocate the frame");
                return EMPTY_DATA;
            }
        }

        ret = av_parser_parse2(parser, contextDecoder, &packetDecode->data, &packetDecode->size, &data[0], dataSize, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
        if (ret < 0) {
            LOGE(TAG, "[decode] av_parser_parse2 error: %s", av_err2str(ret));
            return EMPTY_DATA;
        }

        data += ret;
        dataSize -= ret;

        doDecode();
    }
    return decodedData;
}

void doDecode()
{
    if (packetDecode->size) {
        /* send the packet with the compressed data to the decoder */
        int ret = avcodec_send_packet(contextDecoder, packetDecode);
        if (ret < 0) LOGE(TAG, "[decode] avcodec_send_packet error: %s", av_err2str(ret));

        /* read all the output frames (in general there may be any number of them */
        while (ret >= 0)
        {
            ret = avcodec_receive_frame(contextDecoder, frameDecode);
            if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) LOGE(TAG, "[decode] avcodec_receive_frame error: %s", av_err2str(ret));
            if (ret < 0) break;

            std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
            if (!resampledData.size()) continue;
            std::copy(&resampledData.data()[0], &resampledData.data()[resampledData.size()], std::back_inserter(decodedData));
        }
    }
}

ResamplerAudio

的第二个实例（该实例在解码Opus中的音频后进行重采样，我称之为

resampleraudecoder

）I init，带有以下参数：

resamplerEncoder->init(contextEncoder, 8000, 48000, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP);

resamplerDecoder->init(contextDecoder, 48000, 8000, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16);

// data - an array of raw pcm audio
// dataLength - the length of data array
// getSamplesCount() - function that calculates samples count
// frameEncode - AVFrame that using for encode audio
std::vector<uint8_t> resampledData = resamplerEncoder->convert(&data, getSamplesCount(dataLength, frameEncode->channels, AV_SAMPLE_FMT_S16), frameEncode->channels, frameEncode->format);

// frameDecode - AVFrame that holds decoded audio
std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);

执行重采样的

ResamplerAudio

函数如下所示：

void ResamplerAudio::init(AVCodecContext *codecContext, int inSampleRate, int outSampleRate, AVSampleFormat inSampleFmt, AVSampleFormat outSampleFmt)
{
    swrContext = swr_alloc();
    if (!swrContext)
    {
        LOGE(TAG, "[init] Couldn't allocate swr context");
        return;
    }

    av_opt_set_int(swrContext, "in_channel_layout", (int64_t) codecContext->channel_layout, 0);
    av_opt_set_int(swrContext, "out_channel_layout", (int64_t) codecContext->channel_layout,  0);

    av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
    av_opt_set_int(swrContext, "out_channel_count", codecContext->channels, 0);

    av_opt_set_int(swrContext, "in_sample_rate", inSampleRate, 0);
    av_opt_set_int(swrContext, "out_sample_rate", outSampleRate, 0);

    av_opt_set_sample_fmt(swrContext, "in_sample_fmt", inSampleFmt, 0);
    av_opt_set_sample_fmt(swrContext, "out_sample_fmt", outSampleFmt,  0);

    int ret = swr_init(swrContext);
    if (ret < 0)
    {
        LOGE(TAG, "[init] swr_init error: %s", av_err2str(ret));
        return;
    }

    LOGD(TAG, "[init] success codecContext->channel_layout: %d; inSampleRate: %d; outSampleRate: %d; inSampleFmt: %d; outSampleFmt: %d", (int) codecContext->channel_layout, inSampleRate, outSampleRate, inSampleFmt, outSampleFmt);
}

std::vector<uint8_t> ResamplerAudio::convert(uint8_t **inData, int inSamplesCount, int outChannels, int outFormat)
{
    std::vector<uint8_t> result;
    uint8_t *dstData = NULL;
    const int dstNbSamples = swr_get_out_samples(swrContext, inSamplesCount);
    av_samples_alloc(&dstData, NULL, outChannels, dstNbSamples, AVSampleFormat(outFormat), 1);
    int resampledSize = swr_convert(swrContext, &dstData, dstNbSamples, (const uint8_t **)inData, inSamplesCount);
    int dstBufSize = av_samples_get_buffer_size(NULL, outChannels, resampledSize, AVSampleFormat(outFormat), 1);

    if (dstBufSize <= 0) return result;

    std::copy(&dstData[0], &dstData[dstBufSize], std::back_inserter(result));

    return result;
}

getSamplesCount(int bytesCount, int channels, AVSampleFormat format)
{
    return bytesCount / av_get_bytes_per_sample(format) / channels;
}

std::vector<uint8_t> decode(uint8_t *data, unsigned int dataLength)
{
    decodedData.clear();

    int dataSize = dataLength;

    while (dataSize > 0)
    {
        if (!frameDecode)
        {
            frameDecode = av_frame_alloc();
            if (!frameDecode)
            {
                LOGE(TAG, "[decode] Couldn't allocate the frame");
                return EMPTY_DATA;
            }
        }

        ret = av_parser_parse2(parser, contextDecoder, &packetDecode->data, &packetDecode->size, &data[0], dataSize, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
        if (ret < 0) {
            LOGE(TAG, "[decode] av_parser_parse2 error: %s", av_err2str(ret));
            return EMPTY_DATA;
        }

        data += ret;
        dataSize -= ret;

        doDecode();
    }
    return decodedData;
}

void doDecode()
{
    if (packetDecode->size) {
        /* send the packet with the compressed data to the decoder */
        int ret = avcodec_send_packet(contextDecoder, packetDecode);
        if (ret < 0) LOGE(TAG, "[decode] avcodec_send_packet error: %s", av_err2str(ret));

        /* read all the output frames (in general there may be any number of them */
        while (ret >= 0)
        {
            ret = avcodec_receive_frame(contextDecoder, frameDecode);
            if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) LOGE(TAG, "[decode] avcodec_receive_frame error: %s", av_err2str(ret));
            if (ret < 0) break;

            std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
            if (!resampledData.size()) continue;
            std::copy(&resampledData.data()[0], &resampledData.data()[resampledData.size()], std::back_inserter(decodedData));
        }
    }
}

getSamplesCount（）

函数如下所示：

void ResamplerAudio::init(AVCodecContext *codecContext, int inSampleRate, int outSampleRate, AVSampleFormat inSampleFmt, AVSampleFormat outSampleFmt)
{
    swrContext = swr_alloc();
    if (!swrContext)
    {
        LOGE(TAG, "[init] Couldn't allocate swr context");
        return;
    }

    av_opt_set_int(swrContext, "in_channel_layout", (int64_t) codecContext->channel_layout, 0);
    av_opt_set_int(swrContext, "out_channel_layout", (int64_t) codecContext->channel_layout,  0);

    av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
    av_opt_set_int(swrContext, "out_channel_count", codecContext->channels, 0);

    av_opt_set_int(swrContext, "in_sample_rate", inSampleRate, 0);
    av_opt_set_int(swrContext, "out_sample_rate", outSampleRate, 0);

    av_opt_set_sample_fmt(swrContext, "in_sample_fmt", inSampleFmt, 0);
    av_opt_set_sample_fmt(swrContext, "out_sample_fmt", outSampleFmt,  0);

    int ret = swr_init(swrContext);
    if (ret < 0)
    {
        LOGE(TAG, "[init] swr_init error: %s", av_err2str(ret));
        return;
    }

    LOGD(TAG, "[init] success codecContext->channel_layout: %d; inSampleRate: %d; outSampleRate: %d; inSampleFmt: %d; outSampleFmt: %d", (int) codecContext->channel_layout, inSampleRate, outSampleRate, inSampleFmt, outSampleFmt);
}

std::vector<uint8_t> ResamplerAudio::convert(uint8_t **inData, int inSamplesCount, int outChannels, int outFormat)
{
    std::vector<uint8_t> result;
    uint8_t *dstData = NULL;
    const int dstNbSamples = swr_get_out_samples(swrContext, inSamplesCount);
    av_samples_alloc(&dstData, NULL, outChannels, dstNbSamples, AVSampleFormat(outFormat), 1);
    int resampledSize = swr_convert(swrContext, &dstData, dstNbSamples, (const uint8_t **)inData, inSamplesCount);
    int dstBufSize = av_samples_get_buffer_size(NULL, outChannels, resampledSize, AVSampleFormat(outFormat), 1);

    if (dstBufSize <= 0) return result;

    std::copy(&dstData[0], &dstData[dstBufSize], std::back_inserter(result));

    return result;
}

getSamplesCount(int bytesCount, int channels, AVSampleFormat format)
{
    return bytesCount / av_get_bytes_per_sample(format) / channels;
}

std::vector<uint8_t> decode(uint8_t *data, unsigned int dataLength)
{
    decodedData.clear();

    int dataSize = dataLength;

    while (dataSize > 0)
    {
        if (!frameDecode)
        {
            frameDecode = av_frame_alloc();
            if (!frameDecode)
            {
                LOGE(TAG, "[decode] Couldn't allocate the frame");
                return EMPTY_DATA;
            }
        }

        ret = av_parser_parse2(parser, contextDecoder, &packetDecode->data, &packetDecode->size, &data[0], dataSize, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
        if (ret < 0) {
            LOGE(TAG, "[decode] av_parser_parse2 error: %s", av_err2str(ret));
            return EMPTY_DATA;
        }

        data += ret;
        dataSize -= ret;

        doDecode();
    }
    return decodedData;
}

void doDecode()
{
    if (packetDecode->size) {
        /* send the packet with the compressed data to the decoder */
        int ret = avcodec_send_packet(contextDecoder, packetDecode);
        if (ret < 0) LOGE(TAG, "[decode] avcodec_send_packet error: %s", av_err2str(ret));

        /* read all the output frames (in general there may be any number of them */
        while (ret >= 0)
        {
            ret = avcodec_receive_frame(contextDecoder, frameDecode);
            if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) LOGE(TAG, "[decode] avcodec_receive_frame error: %s", av_err2str(ret));
            if (ret < 0) break;

            std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
            if (!resampledData.size()) continue;
            std::copy(&resampledData.data()[0], &resampledData.data()[resampledData.size()], std::back_inserter(decodedData));
        }
    }
}

之后，我用重新采样的样本填充我的

frameEncode

：

memcpy(&frame->data[0][0], &resampledData[0], sizeof(uint8_t) * resampledDataLength);

然后将

frameEncode

传递给如下编码

encodeFrame（重采样长度）

：

结果声音是起伏的，我还注意到解码后的数组大小大于原始pcm音频的源数组大小

你知道我做错了什么吗

UPD 18.05.2020 我测试了我的重采样逻辑，我对原始pcm声音进行了重采样，没有任何编码和解码例程。首先，我尝试将输入声音的采样率从8000 Hz转换为48000 Hz，然后从上面的步骤中重新采样，并将其采样率从48000 Hz转换为8000 Hz，结果声音完美干净，我也做了同样的步骤，但我没有转换采样率，而是将采样格式从AV_sample_FMT_S16转换为AV_sample_FMT_FLTP，反之亦然，结果声音又完美又干净，当我转换采样率和采样格式时，我也得到了相同的结果。因此，我假设失真和起伏声音的问题存在于我的编码或解码例程中，我认为最有可能出现在解码例程中，因为在解码之后，无论输入声音的大小如何，我总是获得960 nb_样本的AVFrame

我的解码例程如下所示：

void ResamplerAudio::init(AVCodecContext *codecContext, int inSampleRate, int outSampleRate, AVSampleFormat inSampleFmt, AVSampleFormat outSampleFmt)
{
    swrContext = swr_alloc();
    if (!swrContext)
    {
        LOGE(TAG, "[init] Couldn't allocate swr context");
        return;
    }

    av_opt_set_int(swrContext, "in_channel_layout", (int64_t) codecContext->channel_layout, 0);
    av_opt_set_int(swrContext, "out_channel_layout", (int64_t) codecContext->channel_layout,  0);

    av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
    av_opt_set_int(swrContext, "out_channel_count", codecContext->channels, 0);

    av_opt_set_int(swrContext, "in_sample_rate", inSampleRate, 0);
    av_opt_set_int(swrContext, "out_sample_rate", outSampleRate, 0);

    av_opt_set_sample_fmt(swrContext, "in_sample_fmt", inSampleFmt, 0);
    av_opt_set_sample_fmt(swrContext, "out_sample_fmt", outSampleFmt,  0);

    int ret = swr_init(swrContext);
    if (ret < 0)
    {
        LOGE(TAG, "[init] swr_init error: %s", av_err2str(ret));
        return;
    }

    LOGD(TAG, "[init] success codecContext->channel_layout: %d; inSampleRate: %d; outSampleRate: %d; inSampleFmt: %d; outSampleFmt: %d", (int) codecContext->channel_layout, inSampleRate, outSampleRate, inSampleFmt, outSampleFmt);
}

std::vector<uint8_t> ResamplerAudio::convert(uint8_t **inData, int inSamplesCount, int outChannels, int outFormat)
{
    std::vector<uint8_t> result;
    uint8_t *dstData = NULL;
    const int dstNbSamples = swr_get_out_samples(swrContext, inSamplesCount);
    av_samples_alloc(&dstData, NULL, outChannels, dstNbSamples, AVSampleFormat(outFormat), 1);
    int resampledSize = swr_convert(swrContext, &dstData, dstNbSamples, (const uint8_t **)inData, inSamplesCount);
    int dstBufSize = av_samples_get_buffer_size(NULL, outChannels, resampledSize, AVSampleFormat(outFormat), 1);

    if (dstBufSize <= 0) return result;

    std::copy(&dstData[0], &dstData[dstBufSize], std::back_inserter(result));

    return result;
}

getSamplesCount(int bytesCount, int channels, AVSampleFormat format)
{
    return bytesCount / av_get_bytes_per_sample(format) / channels;
}

std::vector<uint8_t> decode(uint8_t *data, unsigned int dataLength)
{
    decodedData.clear();

    int dataSize = dataLength;

    while (dataSize > 0)
    {
        if (!frameDecode)
        {
            frameDecode = av_frame_alloc();
            if (!frameDecode)
            {
                LOGE(TAG, "[decode] Couldn't allocate the frame");
                return EMPTY_DATA;
            }
        }

        ret = av_parser_parse2(parser, contextDecoder, &packetDecode->data, &packetDecode->size, &data[0], dataSize, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
        if (ret < 0) {
            LOGE(TAG, "[decode] av_parser_parse2 error: %s", av_err2str(ret));
            return EMPTY_DATA;
        }

        data += ret;
        dataSize -= ret;

        doDecode();
    }
    return decodedData;
}

void doDecode()
{
    if (packetDecode->size) {
        /* send the packet with the compressed data to the decoder */
        int ret = avcodec_send_packet(contextDecoder, packetDecode);
        if (ret < 0) LOGE(TAG, "[decode] avcodec_send_packet error: %s", av_err2str(ret));

        /* read all the output frames (in general there may be any number of them */
        while (ret >= 0)
        {
            ret = avcodec_receive_frame(contextDecoder, frameDecode);
            if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) LOGE(TAG, "[decode] avcodec_receive_frame error: %s", av_err2str(ret));
            if (ret < 0) break;

            std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
            if (!resampledData.size()) continue;
            std::copy(&resampledData.data()[0], &resampledData.data()[resampledData.size()], std::back_inserter(decodedData));
        }
    }
}

std:：矢量解码（uint8_t*数据，无符号整数数据长度）
{
decodedData.clear（）；
int dataSize=数据长度；
而（数据大小>0）
{
如果（！frameDecode）
{
frameDecode=av_frame_alloc（）；
如果（！frameDecode）
{
LOGE（标记，“[decode]无法分配帧”）；
返回空的_数据；
}
}
ret=av_parser_parse2（parser，contextDecoder，&packetDecode->data，&packetDecode->size，&data[0]，dataSize，av_NOPTS_值，av_NOPTS_值，0）；
如果（ret<0）{
LOGE（标记“[decode]av_parser_parse2错误：%s”，av_err2str（ret））；
返回空的_数据；
}
数据+=ret；
dataSize-=ret；
十二碳编码（）；
}
返回解码数据；
}
void doDecode（）
{
if（打包解码->大小）{
/*将包含压缩数据的数据包发送到解码器*/
int-ret=avcodec\u send\u数据包（contextDecoder，packetDecode）；
如果（ret<0）LOGE（标记“[decode]avcodec_send_数据包错误：%s”，av_err2str（ret））；
/*读取所有输出帧（通常可能有任意数量的输出帧）*/
而（ret>=0）
{
ret=avcodec_接收_帧（上下文解码器，帧解码）；
如果（ret<0&&ret！=AVERROR（EAGAIN）&&ret！=AVERROR_EOF）LOGE（标记，“[decode]avcodec_接收_帧错误：%s”，av_err2str（ret））；
如果（ret<0）断裂；
std:：vector resampledData=重采样编码器->转换（帧解码->数据，帧解码->nb\U样本，帧解码->通道，AV\U样本\U FMT\U S16）；
如果（！resampledData.size（））继续；
std:：copy（&resampledata.data（）[0]，&resampledata.data（）[resampledata.size（）]，std:：back_inserter（decodedata））；
}
}
}

UPD 30.05.2020

我决定在我的项目中拒绝使用FFmpeg，而是使用它，因此我对其进行了修改，效果很好。

您是在整个项目中保留了一个单声道，还是在某个地方为立体声添加了第二个声道？@AnthumChris我保留了一个声道，我的输入音频是单声道，当我进行重采样时，我还指定了一个声道，请在您需要的位置添加代码设置

swrContext

但是当您调用swr\u convert时，swrContext中有什么？我认为格式和布局需要正确。@stark swrContext它是ResamplerAudio类的成员，我有两个ResamplerAudio实例：第一个用于编码前重采样，第二个用于解码后重采样