在IOS上使用FFMPEG mux flv和发送rtmp

在IOS上使用FFMPEG mux flv和发送rtmp,ios,ffmpeg,mp3,h.264,rtmp,Ios,Ffmpeg,Mp3,H.264,Rtmp,我想使用iphone摄像头和麦克风来捕获通过FFMPEG RTMP流传输的信息 - (void)encoderToH264:(CMSampleBufferRef)sampleBuffer { CVPixelBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer); if (CVPixelBufferLockBaseAddress(imageBuffer, 0) == kCVReturnSuccess)

我想使用iphone摄像头和麦克风来捕获通过FFMPEG RTMP流传输的信息

- (void)encoderToH264:(CMSampleBufferRef)sampleBuffer
{
    CVPixelBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
    if (CVPixelBufferLockBaseAddress(imageBuffer, 0) == kCVReturnSuccess)
    {
        UInt8 *bufferbasePtr = (UInt8 *)CVPixelBufferGetBaseAddress(imageBuffer);
        UInt8 *bufferPtr = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(imageBuffer,0);
        UInt8 *bufferPtr1 = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(imageBuffer,1);
        size_t buffeSize = CVPixelBufferGetDataSize(imageBuffer);
        size_t width = CVPixelBufferGetWidth(imageBuffer);
        size_t height = CVPixelBufferGetHeight(imageBuffer);
        size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
        size_t bytesrow0 = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer,0);
        size_t bytesrow1  = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer,1);
        size_t bytesrow2 = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer,2);
        UInt8 *yuv420_data = (UInt8 *)malloc(width * height *3/ 2); // buffer to store YUV with layout YYYYYYYYUUVV


        /* convert NV12 data to YUV420*/
        UInt8 *pY = bufferPtr ;
        UInt8 *pUV = bufferPtr1;
        UInt8 *pU = yuv420_data + width*height;
        UInt8 *pV = pU + width*height/4;
        for(int i =0;i<height;i++)
        {
            memcpy(yuv420_data+i*width,pY+i*bytesrow0,width);
        }
        for(int j = 0;j<height/2;j++)
        {
            for(int i =0;i<width/2;i++)
            {
                *(pU++) = pUV[i<<1];
                *(pV++) = pUV[(i<<1) + 1];
            }
            pUV+=bytesrow1;
        }

        //Read raw YUV data
        picture_buf = yuv420_data;
        pFrame->data[0] = picture_buf;              // Y
        pFrame->data[1] = picture_buf+ y_size;      // U
        pFrame->data[2] = picture_buf+ y_size*5/4;  // V

        int got_picture = 0;

        // Encode
        pFrame->width = 720;
        pFrame->height = 1280;
        pFrame->format = PIX_FMT_YUV420P;

        AVCodecContext *c = video_st->codec;
        int ret = avcodec_encode_video2(c, &pkt, pFrame, &got_picture);
        if(ret < 0)
        {
            printf("Failed to encode! \n");
        }

        if (got_picture==1)
        {
            /* Compute current audio and video time. */
            video_time = video_st ? video_st->pts.val * av_q2d(video_st->time_base) : 0.0;
            pFrame->pts += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);

            if(pkt.size != 0)
            {
                printf("Succeed to encode frame: %5lld\tsize:%5d\n", pFrame->pts, pkt.size);
                pkt.stream_index = video_st->index;
                ret = av_write_frame(pFormatCtx, &pkt);
                av_free_packet(&pkt);
            }
        }
        free(yuv420_data);
    }
    CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
}
-(void)encoderToMP3:(CMSampleBufferRef)sampleBuffer
{
    CMSampleTimingInfo timing_info;
    CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &timing_info);
    double  pts=0;
    double  dts=0;
    AVCodecContext *c;
    int got_packet, ret;
    c = audio_st->codec;
    CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleBuffer);

    NSUInteger channelIndex = 0;

    CMBlockBufferRef audioBlockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);

    size_t audioBlockBufferOffset = (channelIndex * numSamples * sizeof(SInt16));
    size_t lengthAtOffset = 0;
    size_t totalLength = 0;
    SInt16 *samples = NULL;
    CMBlockBufferGetDataPointer(audioBlockBuffer, audioBlockBufferOffset, &lengthAtOffset, &totalLength, (char **)(&samples));

    const AudioStreamBasicDescription *audioDescription = CMAudioFormatDescriptionGetStreamBasicDescription(CMSampleBufferGetFormatDescription(sampleBuffer));

    SwrContext *swr = swr_alloc();

    int in_smprt = (int)audioDescription->mSampleRate;
    av_opt_set_int(swr, "in_channel_layout",  AV_CH_LAYOUT_MONO, 0);
    av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout,  0);

    av_opt_set_int(swr, "in_channel_count", audioDescription->mChannelsPerFrame,  0);
    av_opt_set_int(swr, "out_channel_count", 1,  0);

    av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout,  0);
    av_opt_set_int(swr, "in_sample_rate",     audioDescription->mSampleRate,0);

    av_opt_set_int(swr, "out_sample_rate",    audio_st->codec->sample_rate,0);

    av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_S16, 0);

    av_opt_set_sample_fmt(swr, "out_sample_fmt", audio_st->codec->sample_fmt,  0);

    swr_init(swr);
    uint8_t **input = NULL;
    int src_linesize;
    int in_samples = (int)numSamples;
    ret = av_samples_alloc_array_and_samples(&input, &src_linesize, audioDescription->mChannelsPerFrame, in_samples, AV_SAMPLE_FMT_S16P, 0);

    *input=(uint8_t*)samples;
    uint8_t *output=NULL;

    int out_samples = av_rescale_rnd(swr_get_delay(swr, in_smprt) +in_samples, (int)audio_st->codec->sample_rate, in_smprt, AV_ROUND_UP);

    av_samples_alloc(&output, NULL, audio_st->codec->channels, out_samples, audio_st->codec->sample_fmt, 0);
    in_samples = (int)numSamples;
    out_samples = swr_convert(swr, &output, out_samples, (const uint8_t **)input, in_samples);

    aFrame->nb_samples =(int) out_samples;

    ret = avcodec_fill_audio_frame(aFrame, audio_st->codec->channels, audio_st->codec->sample_fmt,
                                   (uint8_t *)output,
                                   (int) out_samples *
                                   av_get_bytes_per_sample(audio_st->codec->sample_fmt) *
                                   audio_st->codec->channels, 1);
    if (ret < 0)
    {
        fprintf(stderr, "Error fill audio frame: %s\n", av_err2str(ret));
    }
    aFrame->channel_layout = audio_st->codec->channel_layout;
    aFrame->channels=audio_st->codec->channels;
    aFrame->sample_rate= audio_st->codec->sample_rate;

    if (timing_info.presentationTimeStamp.timescale!=0)
        pts=(double) timing_info.presentationTimeStamp.value/timing_info.presentationTimeStamp.timescale;


    aFrame->pts = pts*audio_st->time_base.den;
    aFrame->pts = av_rescale_q(aFrame->pts, audio_st->time_base, audio_st->codec->time_base);

    ret = avcodec_encode_audio2(c, &pkt2, aFrame, &got_packet);

    if (ret < 0)
    {
        fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
    }
    swr_free(&swr);

    if (got_packet)
    {
        pkt2.stream_index = audio_st->index;        

        // Write the compressed frame to the media file.

        ret = av_interleaved_write_frame(pFormatCtx, &pkt2);
        if (ret != 0)
        {
            fprintf(stderr, "Error while writing audio frame: %s\n", av_err2str(ret));
            av_free_packet(&pkt2);
        }
    }
}
以下函数用于捕获IOS上的信息

- (void)captureOutput:(AVCaptureOutput *)captureOutput  didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection
{    
    if (connection == videoCaptureConnection)
    {
        [manager264 encoderToH264:sampleBuffer];
    }
    else if (connection == audioCaptureConnection)
    {
        [manager264 encoderToMP3:sampleBuffer];
    }
}
初始化FFMPEG

- (int)setX264Resource
{
    Global_Variables_VVV = (AppDelegate *)[[UIApplication sharedApplication] delegate];
    avformat_network_init();
    av_register_all();

    pFormatCtx = avformat_alloc_context();
    avformat_alloc_output_context2(&pFormatCtx, NULL, "flv", out_file);
    fmt = pFormatCtx->oformat;

    //Open output URL
    if (avio_open(&pFormatCtx->pb, out_file, AVIO_FLAG_READ_WRITE) < 0)
    {
        printf("Failed to open output file! \n");
        return -1;
    }

    /* Add the audio and video streams using the default format codecs
     * and initialize the codecs. */
    video_st = NULL;
    audio_st = NULL;
   if (fmt->video_codec != AV_CODEC_ID_NONE) {
        video_st = add_stream(pFormatCtx, &pCodec, AV_CODEC_ID_H264);
    }
   if (fmt->audio_codec != AV_CODEC_ID_NONE) {
        audio_st = add_stream(pFormatCtx, &aCodec, AV_CODEC_ID_MP3);
    }

    /* Now that all the parameters are set, we can open the audio and
     * video codecs and allocate the necessary encode buffers. */
    if (video_st)
        [self open_video:pFormatCtx avcodec:pCodec avstream:video_st];

    if (audio_st)
        [self open_audio:pFormatCtx avcodec:aCodec avstream:audio_st];

    // Show some Information
    av_dump_format(pFormatCtx, 0, out_file, 1);

    //Write File Header
    avformat_write_header(pFormatCtx, NULL);

    av_new_packet(&pkt, picture_size);
    av_new_packet(&pkt2, picture_size);

    AVCodecContext *c = video_st->codec;

    y_size = c->width * c->height;

    if (pFrame)
        pFrame->pts = 0;

    if(aFrame)
    {
        aFrame->pts = 0;
    }

    return 0;
}

static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;
    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec))
    {
        NSLog(@"Could not find encoder for '%s'\n",
          avcodec_get_name(codec_id));
    }
    st = avformat_new_stream(oc, *codec);
    if (!st)
    {
        NSLog(@"Could not allocate stream\n");
    }
    st->id = oc->nb_streams-1;
    c = st->codec;
    switch ((*codec)->type)
    {
        case AVMEDIA_TYPE_AUDIO:
            c->codec_id = AV_CODEC_ID_MP3;
            c->codec_type = AVMEDIA_TYPE_AUDIO;
            c->channels = 1;

            c->sample_fmt = AV_SAMPLE_FMT_S16P;
            c->bit_rate = 128000;
            c->sample_rate = 44100;
            c->channel_layout = AV_CH_LAYOUT_MONO;
            break;
        case AVMEDIA_TYPE_VIDEO:
            c->codec_id = AV_CODEC_ID_H264;
            c->codec_type=AVMEDIA_TYPE_VIDEO;
            /* Resolution must be a multiple of two. */
            c->width    = 720;
            c->height   = 1280;
            /* timebase: This is the fundamental unit of time (in seconds) in terms
             * of which frame timestamps are represented. For fixed-fps content,
             * timebase should be 1/framerate and timestamp increments should be
             * identical to 1. */
            c->time_base.den = 30;
            c->time_base.num = 1;
            c->gop_size      = 15; /* emit one intra frame every twelve frames at most */
            c->pix_fmt       = PIX_FMT_YUV420P;
            c->max_b_frames = 0;
            c->bit_rate = 3000000;
            c->qmin = 10;
            c->qmax = 51;

            break;
        default:
            break;
    }
    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;
    return st;
}

通过发送到编码器的样本量增加PTS。 另外,不要忘记从音频流到输出格式上下文重新调整计时

因此,解决办法是:

audioFrame->pts = audioSamplesCounter; // starting from zero
然后在编码(avcodec_encode_audio2)后,将计数器增加发送到编码器的帧中的采样量(在您的情况下,这不是从CMSampleBuffer获得的量,而是通过SWR重新采样后的量—“out_samples”):

在写入介质输出文件之前,重新缩放时间:

av_packet_rescale_ts(&audioPacket,
                     audioStream->codec->time_base,
                     outputFormatContext->streams[audioStream->index]->time_base);
另外,我建议您使用设备资源优化方法

  • 创建一次重缩放/重采样的上下文并重用它们
  • 一旦流开始或第一个CMSampleBufferRef到达,就为音频和视频分配缓冲区。在重新启动流/会话之前,大小不会更改。这将大大提高性能和内存消耗
  • 尽可能使用硬件加速
  • 不要忘记释放任何分配的数组和上下文

  • 希望它能帮助您:)

    通过发送到编码器的样本量增加PTS。 另外,不要忘记从音频流到输出格式上下文重新调整计时

    因此,解决办法是:

    audioFrame->pts = audioSamplesCounter; // starting from zero
    
    然后在编码(avcodec_encode_audio2)后,将计数器增加发送到编码器的帧中的采样量(在您的情况下,这不是从CMSampleBuffer获得的量,而是通过SWR重新采样后的量—“out_samples”):

    在写入介质输出文件之前,重新缩放时间:

    av_packet_rescale_ts(&audioPacket,
                         audioStream->codec->time_base,
                         outputFormatContext->streams[audioStream->index]->time_base);
    
    另外,我建议您使用设备资源优化方法

  • 创建一次重缩放/重采样的上下文并重用它们
  • 一旦流开始或第一个CMSampleBufferRef到达,就为音频和视频分配缓冲区。在重新启动流/会话之前,大小不会更改。这将大大提高性能和内存消耗
  • 尽可能使用硬件加速
  • 不要忘记释放任何分配的数组和上下文
  • 希望它能帮助你:)