如何使用libavcodec将h.264视频与mp3音频流式传输?

如何使用libavcodec将h.264视频与mp3音频流式传输?,c,ffmpeg,libavcodec,C,Ffmpeg,Libavcodec,我从网络摄像机中读取h.264帧,并从麦克风中捕获音频。我需要流实时视频到ffserver。调试期间,我使用ffmpeg和以下命令从ffserver读取视频: ffmpeg -i http://127.0.0.1:12345/robot.avi -vcodec copy -acodec copy out.avi 我的输出文件中的视频略微加速。如果我添加一个音频流,它会被加速几次。有时输出文件中没有音频 以下是我的音频编码代码: #include "v_audio_encoder.h" ext

我从网络摄像机中读取h.264帧,并从麦克风中捕获音频。我需要流实时视频到ffserver。调试期间,我使用ffmpeg和以下命令从ffserver读取视频:

ffmpeg -i http://127.0.0.1:12345/robot.avi -vcodec copy -acodec copy out.avi
我的输出文件中的视频略微加速。如果我添加一个音频流,它会被加速几次。有时输出文件中没有音频

以下是我的音频编码代码:

#include "v_audio_encoder.h"

extern "C" {
#include <libavcodec/avcodec.h>
}
#include <cassert>

struct VAudioEncoder::Private
{
    AVCodec *m_codec;
    AVCodecContext *m_context;

    std::vector<uint8_t> m_outBuffer;
};

VAudioEncoder::VAudioEncoder( int sampleRate, int bitRate )
{
    d = new Private( );
    d->m_codec = avcodec_find_encoder( CODEC_ID_MP3 );
    assert( d->m_codec );
    d->m_context = avcodec_alloc_context3( d->m_codec );

    // put sample parameters
    d->m_context->channels = 2;
    d->m_context->bit_rate = bitRate;
    d->m_context->sample_rate = sampleRate;
    d->m_context->sample_fmt = AV_SAMPLE_FMT_S16;
    strcpy( d->m_context->codec_name, "libmp3lame" );

    // open it
    int res = avcodec_open2( d->m_context, d->m_codec, 0 );
    assert( res >= 0 );

    d->m_outBuffer.resize( d->m_context->frame_size );
}

VAudioEncoder::~VAudioEncoder( )
{
    avcodec_close( d->m_context );
    av_free( d->m_context );
    delete d;
}

void VAudioEncoder::encode( const std::vector<uint32_t>& samples, std::vector<uint8_t>& outbuf )
{
    assert( (int)samples.size( ) == d->m_context->frame_size );

    int outSize = avcodec_encode_audio( d->m_context, d->m_outBuffer.data( ),
                                        d->m_outBuffer.size( ), reinterpret_cast<const short*>( samples.data( ) ) );
    if( outSize ) {
        outbuf.resize( outSize );
        memcpy( outbuf.data( ), d->m_outBuffer.data( ), outSize );
    }
    else
        outbuf.clear( );
}

int VAudioEncoder::getFrameSize( ) const
{
    return d->m_context->frame_size;
}
#包括“v_audio_encoder.h”
外部“C”{
#包括
}
#包括
结构VAudioEncoder::Private
{
AVCodec*m_编解码器;
AVCodecContext*m_context;
std::向量m_突发;
};
VAudioEncoder::VAudioEncoder(整数采样率,整数比特率)
{
d=新的私有文件();
d->m_codec=avcodec_find_编码器(codec_ID_MP3);
断言(d->m_编解码器);
d->m_context=avcodec\u alloc\u context3(d->m_codec);
//输入样本参数
d->m_context->channels=2;
d->m_上下文->比特率=比特率;
d->m_context->sample_rate=sampleRate;
d->m_context->sample_fmt=AV_sample_fmt_S16;
strcpy(d->m_上下文->codec_名称,“libmp3lame”);
//打开它
int res=avcodec_open2(d->m_上下文,d->m_编解码器,0);
断言(res>=0);
调整大小(d->m_上下文->帧大小);
}
VAudioEncoder::~VAudioEncoder()
{
avcodec_关闭(d->m_上下文);
av_免费(d->m_上下文);
删除d;
}
void VAudioEncoder::encode(常量std::vector&samples,std::vector&exputf)
{
assert((int)samples.size()==d->m\u context->frame\u size);
int outSize=avcodec\u encode\u audio(d->m\u context,d->m\u exputffer.data(),
d->m_exputfer.size(),重新解释cast(samples.data());
if(特大型){
扩展。调整大小(超大);
memcpy(extuff.data(),d->m_extuffer.data(),超大);
}
其他的
exputf.clear();
}
int-VAudioEncoder::getFrameSize()常量
{
返回d->m_上下文->帧大小;
}
以下是我的流式视频代码:

#include "v_out_video_stream.h"

extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/avstring.h>
#include <libavformat/avio.h>
}

#include <stdexcept>
#include <cassert>

struct VStatticRegistrar
{
    VStatticRegistrar( )
    {
        av_register_all( );
        avformat_network_init( );
    }
};

VStatticRegistrar __registrar;

struct VOutVideoStream::Private
{
    AVFormatContext * m_context;
    int m_videoStreamIndex;
    int m_audioStreamIndex;

    int m_videoBitrate;
    int m_width;
    int m_height;
    int m_fps;
    int m_bitrate;

    bool m_waitKeyFrame;
};

VOutVideoStream::VOutVideoStream( int width, int height, int fps, int bitrate )
{
    d = new Private( );
    d->m_width = width;
    d->m_height = height;
    d->m_fps = fps;
    d->m_context = 0;
    d->m_videoStreamIndex = -1;
    d->m_audioStreamIndex = -1;
    d->m_bitrate = bitrate;
    d->m_waitKeyFrame = true;
}

bool VOutVideoStream::connectToServer( const std::string& uri )
{
    assert( ! d->m_context );

    // initalize the AV context
    d->m_context = avformat_alloc_context();
    if( !d->m_context )
        return false;
    // get the output format
    d->m_context->oformat = av_guess_format( "ffm", NULL, NULL );
    if( ! d->m_context->oformat )
        return false;

    strcpy( d->m_context->filename, uri.c_str( ) );

    // add an H.264 stream
    AVStream *stream = avformat_new_stream( d->m_context, NULL );
    if ( ! stream )
        return false;
    // initalize codec
    AVCodecContext* codec = stream->codec;
    if( d->m_context->oformat->flags & AVFMT_GLOBALHEADER )
        codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
    codec->codec_id = CODEC_ID_H264;
    codec->codec_type = AVMEDIA_TYPE_VIDEO;
    strcpy( codec->codec_name, "libx264" );
//    codec->codec_tag = ( unsigned('4') << 24 ) + (unsigned('6') << 16 ) + ( unsigned('2') << 8 ) + 'H';
    codec->width = d->m_width;
    codec->height = d->m_height;
    codec->time_base.den = d->m_fps;
    codec->time_base.num = 1;
    codec->bit_rate = d->m_bitrate;
    d->m_videoStreamIndex = stream->index;

    // add an MP3 stream
    stream = avformat_new_stream( d->m_context, NULL );
    if ( ! stream )
        return false;
    // initalize codec
    codec = stream->codec;
    if( d->m_context->oformat->flags & AVFMT_GLOBALHEADER )
        codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
    codec->codec_id = CODEC_ID_MP3;
    codec->codec_type = AVMEDIA_TYPE_AUDIO;
    strcpy( codec->codec_name, "libmp3lame" );
    codec->sample_fmt = AV_SAMPLE_FMT_S16;
    codec->channels = 2;
    codec->bit_rate = 64000;
    codec->sample_rate = 44100;
    d->m_audioStreamIndex = stream->index;

    // try to open the stream
    if( avio_open( &d->m_context->pb, d->m_context->filename, AVIO_FLAG_WRITE ) < 0 )
         return false;

    // write the header
    return avformat_write_header( d->m_context, NULL ) == 0;
}

void VOutVideoStream::disconnect( )
{
    assert( d->m_context );

    avio_close( d->m_context->pb );
    avformat_free_context( d->m_context );
    d->m_context = 0;
}

VOutVideoStream::~VOutVideoStream( )
{
    if( d->m_context )
        disconnect( );
    delete d;
}

int VOutVideoStream::getVopType( const std::vector<uint8_t>& image )
{
    if( image.size( ) < 6 )
        return -1;
    unsigned char *b = (unsigned char*)image.data( );

    // Verify NAL marker
    if( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] ) {
        ++b;
        if ( b[ 0 ] || b[ 1 ] || 0x01 != b[ 2 ] )
            return -1;
    }

    b += 3;

    // Verify VOP id
    if( 0xb6 == *b ) {
        ++b;
        return ( *b & 0xc0 ) >> 6;
    }

    switch( *b ) {
    case 0x65: return 0;
    case 0x61: return 1;
    case 0x01: return 2;
    }

    return -1;
}

bool VOutVideoStream::sendVideoFrame( std::vector<uint8_t>& image )
{
    // Init packet
    AVPacket pkt;
    av_init_packet( &pkt );
    pkt.flags |= ( 0 >= getVopType( image ) ) ? AV_PKT_FLAG_KEY : 0;

    // Wait for key frame
    if ( d->m_waitKeyFrame ) {
        if( pkt.flags & AV_PKT_FLAG_KEY )
            d->m_waitKeyFrame = false;
        else
            return true;
    }

    pkt.stream_index = d->m_videoStreamIndex;
    pkt.data = image.data( );
    pkt.size = image.size( );
    pkt.pts = pkt.dts = AV_NOPTS_VALUE;

    return av_write_frame( d->m_context, &pkt ) >= 0;
}

bool VOutVideoStream::sendAudioFrame( std::vector<uint8_t>& audio )
{
    // Init packet
    AVPacket pkt;
    av_init_packet( &pkt );
    pkt.stream_index = d->m_audioStreamIndex;
    pkt.data = audio.data( );
    pkt.size = audio.size( );
    pkt.pts = pkt.dts = AV_NOPTS_VALUE;

    return av_write_frame( d->m_context, &pkt ) >= 0;
}
#包括“v_out_video_stream.h”
外部“C”{
#包括
#包括
#包括
#包括
}
#包括
#包括
结构VStatticRegistrator
{
vStatticRegistrator()
{
av_寄存器_all();
avformat_network_init();
}
};
国家注册处处长;;
结构VOutVideoStream::Private
{
AVFormatContext*m_context;
int m_视频流索引;
int m_音频流索引;
int mu视频比特率;
国际货币单位宽度;
国际货币单位高度;
国际货币基金组织;
整数m_比特率;
布尔·穆韦特;
};
VOutVideoStream::VOutVideoStream(整数宽度、整数高度、整数fps、整数比特率)
{
d=新的私有文件();
d->m_宽度=宽度;
d->m_高度=高度;
d->m_fps=fps;
d->m_上下文=0;
d->m_videoStreamIndex=-1;
d->m_指数=-1;
d->m_比特率=比特率;
d->m_waitKeyFrame=true;
}
bool VOutVideoStream::connectToServer(const std::string&uri)
{
断言(!d->m_上下文);
//初始化AV上下文
d->m_context=avformat_alloc_context();
如果(!d->m_上下文)
返回false;
//获取输出格式
d->m_context->oformat=av_guess_格式(“ffm”,NULL,NULL);
如果(!d->m_context->oformat)
返回false;
strcpy(d->m_context->filename,uri.c_str());
//添加一个H.264流
AVStream*stream=avformat\U new\U stream(d->m\U上下文,空);
如果(!流)
返回false;
//初始化编解码器
AVCodecContext*codec=stream->codec;
if(d->m_上下文->格式->标志和AVFMT_全局标题)
编解码器->标志|=编解码器标志|全局标题;
编解码器->编解码器id=编解码器id\U H264;
编解码器->编解码器类型=AVMEDIA\u类型\u视频;
strcpy(编解码器->编解码器名称,“libx264”);
//编解码器->编解码器标签=(无符号('4')高度=d->m\U高度;
编解码器->time\u base.den=d->m\u fps;
编解码器->时间_base.num=1;
编解码器->比特率=d->m比特率;
d->m\u videoStreamIndex=流->索引;
//添加MP3流
stream=avformat\u new\u stream(d->m\u上下文,空);
如果(!流)
返回false;
//初始化编解码器
编解码器=流->编解码器;
if(d->m_上下文->格式->标志和AVFMT_全局标题)
编解码器->标志|=编解码器标志|全局标题;
编解码器->编解码器\u id=编解码器\u id\u MP3;
编解码器->编解码器类型=AVMEDIA\u类型\u音频;
strcpy(编解码器->编解码器名称,“libmp3lame”);
编解码器->样本\u fmt=AV\u样本\u fmt\u S16;
编解码器->通道=2;
编解码器->比特率=64000;
编解码器->采样率=44100;
d->m_audioStreamIndex=流->索引;
//试着打开小溪
如果(avio_打开(&d->m_上下文->pb,d->m_上下文->文件名,avio_标志_写入)<0)
返回false;
//写标题
返回avformat_write_头(d->m_上下文,NULL)==0;
}
void VOutVideoStream::断开连接()
{
断言(d->m_上下文);
avio_关闭(d->m_上下文->pb);
avformat_free_上下文(d->m_上下文);
d->m_上下文=0;
}
VOutVideoStream::~VOutVideoStream()
{
如果(d->m_上下文)
断开连接();
删除d;
}
int VOutVideoStream::getVopType(const std::vector&image)
{
if(image.size()<6)
返回-1;
unsigned char*b=(unsigned char*)image.data();
//验证NAL标记
如果(b[0]| | b[1]| | 0x01!=b[2]){
++b;
如果(b[0]| | b[1]| | 0x01!=b[2])
返回-1;
}
b+=3;
//验证VOP id
如果(0xb6==*b){
++b;
返回(*b&0xc0)>>6;
}
开关(*b){
案例0x65:返回0;
案例0x61:返回1;
案例0x01:返回2;
}
返回-1;
}
bool VOutVideoStream::sendVideoFrame(标准::矢量和图像)
{
//初始化包
AVPacket-pkt;
av_初始_数据包(&pkt);
pkt.flags |=(0>=getVopType(图像))?AV_pkt_FLAG_键:0;
//等待关键帧
如果(d->m_等待关键帧){
if(pkt.flags和AV_pkt_FLAG_键)
d->m_waitKeyFrame=false;
其他的
返回true;
}
pkt.stream_index=d->m_videoStreamIndex;
pkt.data=image.data();
BOOST_AUTO_TEST_CASE(testSendingVideo)
{
    const int framesToGrab = 90000;

    VOutVideoStream stream( VIDEO_WIDTH, VIDEO_HEIGHT, FPS, VIDEO_BITRATE );
    if( stream.connectToServer( URI ) ) {
        VAudioEncoder audioEncoder( AUDIO_SAMPLE_RATE, AUDIO_BIT_RATE );
        VAudioCapture microphone( MICROPHONE_NAME, AUDIO_SAMPLE_RATE, audioEncoder.getFrameSize( ) );

        VLogitecCamera camera( VIDEO_WIDTH, VIDEO_HEIGHT );
        BOOST_REQUIRE( camera.open( CAMERA_PORT ) );
        BOOST_REQUIRE( camera.startCapturing( ) );

        std::vector<uint8_t> image, encodedAudio;
        std::vector<uint32_t> voice;
        boost::system_time startTime;
        int delta;
        for( int i = 0; i < framesToGrab; ++i ) {
            startTime = boost::posix_time::microsec_clock::universal_time( );

            BOOST_REQUIRE( camera.read( image ) );
            BOOST_REQUIRE( microphone.read( voice ) );
            audioEncoder.encode( voice, encodedAudio );

            BOOST_REQUIRE( stream.sendVideoFrame( image ) );
            BOOST_REQUIRE( stream.sendAudioFrame( encodedAudio ) );

            delta = ( boost::posix_time::microsec_clock::universal_time( ) - startTime ).total_milliseconds( );
            if( delta < 1000 / FPS )
                boost::thread::sleep( startTime + boost::posix_time::milliseconds( 1000 / FPS - delta ) );
        }

        BOOST_REQUIRE( camera.stopCapturing( ) );
        BOOST_REQUIRE( camera.close( ) );
    }
    else
        std::cout << "failed to connect to server" << std::endl;
}