C++ 使用opencv进行视频处理的音频输出_C++_C_Audio_Opencv_Ffmpeg

C++ 使用opencv进行视频处理的音频输出

c++ c audio opencv ffmpeg

C++ 使用opencv进行视频处理的音频输出,c++,c,audio,opencv,ffmpeg,C++,C,Audio,Opencv,Ffmpeg,我正在用opencv处理视频，但同时我需要播放音频并简单地控制它，比如声音或当前帧数我想我应该用ffmpeg创建一个并行进程，但我不知道怎么做。你能解释一下怎么做吗或者您知道另一种解决方案吗？我认为在这种情况下，应该使用ffmpeg播放音频和SDL for video 使用OpenCV打开文件并处理帧后，您可以使用OpenCV->SDL显示它，同时通过ffmpeg检索音频帧并使用SDL播放它们我还发现了一篇不错的帖子，展示了如何使用ffmpeg从视频文件中捕获帧，将它们存储在OpenCV

我正在用opencv处理视频，但同时我需要播放音频并简单地控制它，比如声音或当前帧数

我想我应该用ffmpeg创建一个并行进程，但我不知道怎么做。你能解释一下怎么做吗

或者您知道另一种解决方案吗？

我认为在这种情况下，应该使用ffmpeg播放音频和SDL for video

使用OpenCV打开文件并处理帧后，您可以使用

OpenCV->SDL

显示它，同时通过ffmpeg检索音频帧并使用SDL播放它们

我还发现了一篇不错的帖子，展示了如何使用ffmpeg从视频文件中捕获帧，将它们存储在OpenCV

cv:：Mat

中，并在OpenCV窗口中显示结果。但是这样你就不能播放音频了，因为OpenCV不能处理这个问题

您可能也对阅读感兴趣：

编辑：

我花了4小时编写了一个原型来演示它是如何完成的此演示程序通过OpenCV读取视频帧（以便您可以处理它们）并通过ffmpeg读取音频，SDL用于播放这两种视频帧！此演示中有两个限制，您必须注意：1-它假设您使用的是打包为BGR（24位）的OpenCV图像，2-音频和视频不同步！是的，我有一些工作要你做（Yeeey）。但是不要惊慌，有一些想法
这一点很重要，因为您将对帧进行一些处理，这肯定会使视频和音频很快失去同步，因为它们是独立播放的
我上面建议的ffmpeg教程对于理解代码非常重要。它们展示了如何处理SDL，以及如何读取音频/视频流的数据包

#include <highgui.h> #include <cv.h> extern "C" { #include <SDL.h> #include <SDL_thread.h> #include <avcodec.h> #include <avformat.h> } #include <iostream> #include <stdio.h> //#include <malloc.h> using namespace cv; #define SDL_AUDIO_BUFFER_SIZE 1024 typedef struct PacketQueue { AVPacketList *first_pkt, *last_pkt; int nb_packets; int size; SDL_mutex *mutex; SDL_cond *cond; } PacketQueue; PacketQueue audioq; int audioStream = -1; int videoStream = -1; int quit = 0; SDL_Surface* screen = NULL; SDL_Surface* surface = NULL; AVFormatContext* pFormatCtx = NULL; AVCodecContext* aCodecCtx = NULL; AVCodecContext* pCodecCtx = NULL; void show_frame(IplImage* img) { if (!screen) { screen = SDL_SetVideoMode(img->width, img->height, 0, 0); if (!screen) { fprintf(stderr, "SDL: could not set video mode - exiting\n"); exit(1); } } // Assuming IplImage packed as BGR 24bits SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData, img->width, img->height, img->depth * img->nChannels, img->widthStep, 0xff0000, 0x00ff00, 0x0000ff, 0 ); SDL_BlitSurface(surface, 0, screen, 0); SDL_Flip(screen); } void packet_queue_init(PacketQueue *q) { memset(q, 0, sizeof(PacketQueue)); q->mutex = SDL_CreateMutex(); q->cond = SDL_CreateCond(); } int packet_queue_put(PacketQueue *q, AVPacket *pkt) { AVPacketList *pkt1; if (av_dup_packet(pkt) < 0) { return -1; } //pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList)); pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList)); if (!pkt1) return -1; pkt1->pkt = *pkt; pkt1->next = NULL; SDL_LockMutex(q->mutex); if (!q->last_pkt) q->first_pkt = pkt1; else q->last_pkt->next = pkt1; q->last_pkt = pkt1; q->nb_packets++; q->size += pkt1->pkt.size; SDL_CondSignal(q->cond); SDL_UnlockMutex(q->mutex); return 0; } static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block) { AVPacketList *pkt1; int ret; SDL_LockMutex(q->mutex); for (;;) { if( quit) { ret = -1; break; } pkt1 = q->first_pkt; if (pkt1) { q->first_pkt = pkt1->next; if (!q->first_pkt) q->last_pkt = NULL; q->nb_packets--; q->size -= pkt1->pkt.size; *pkt = pkt1->pkt; //av_free(pkt1); free(pkt1); ret = 1; break; } else if (!block) { ret = 0; break; } else { SDL_CondWait(q->cond, q->mutex); } } SDL_UnlockMutex(q->mutex); return ret; } int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) { static AVPacket pkt; static uint8_t *audio_pkt_data = NULL; static int audio_pkt_size = 0; int len1, data_size; for (;;) { while (audio_pkt_size > 0) { data_size = buf_size; len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size, audio_pkt_data, audio_pkt_size); if (len1 < 0) { /* if error, skip frame */ audio_pkt_size = 0; break; } audio_pkt_data += len1; audio_pkt_size -= len1; if (data_size <= 0) { /* No data yet, get more frames */ continue; } /* We have data, return it and come back for more later */ return data_size; } if (pkt.data) av_free_packet(&pkt); if (quit) return -1; if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1; audio_pkt_data = pkt.data; audio_pkt_size = pkt.size; } } void audio_callback(void *userdata, Uint8 *stream, int len) { AVCodecContext *aCodecCtx = (AVCodecContext *)userdata; int len1, audio_size; static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2]; static unsigned int audio_buf_size = 0; static unsigned int audio_buf_index = 0; while (len > 0) { if (audio_buf_index >= audio_buf_size) { /* We have already sent all our data; get more */ audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf)); if(audio_size < 0) { /* If error, output silence */ audio_buf_size = 1024; // arbitrary? memset(audio_buf, 0, audio_buf_size); } else { audio_buf_size = audio_size; } audio_buf_index = 0; } len1 = audio_buf_size - audio_buf_index; if (len1 > len) len1 = len; memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1); len -= len1; stream += len1; audio_buf_index += len1; } } void setup_ffmpeg(char* filename) { if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0) { fprintf(stderr, "FFmpeg failed to open file %s!\n", filename); exit(-1); } if (av_find_stream_info(pFormatCtx) < 0) { fprintf(stderr, "FFmpeg failed to retrieve stream info!\n"); exit(-1); } // Dump information about file onto standard error dump_format(pFormatCtx, 0, filename, 0); // Find the first video stream int i = 0; for (i; i < pFormatCtx->nb_streams; i++) { if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0) { videoStream = i; } if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0) { audioStream = i; } } if (videoStream == -1) { fprintf(stderr, "No video stream found in %s!\n", filename); exit(-1); } if (audioStream == -1) { fprintf(stderr, "No audio stream found in %s!\n", filename); exit(-1); } // Get a pointer to the codec context for the audio stream aCodecCtx = pFormatCtx->streams[audioStream]->codec; // Set audio settings from codec info SDL_AudioSpec wanted_spec; wanted_spec.freq = aCodecCtx->sample_rate; wanted_spec.format = AUDIO_S16SYS; wanted_spec.channels = aCodecCtx->channels; wanted_spec.silence = 0; wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE; wanted_spec.callback = audio_callback; wanted_spec.userdata = aCodecCtx; SDL_AudioSpec spec; if (SDL_OpenAudio(&wanted_spec, &spec) < 0) { fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError()); exit(-1); } AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id); if (!aCodec) { fprintf(stderr, "Unsupported codec!\n"); exit(-1); } avcodec_open(aCodecCtx, aCodec); // audio_st = pFormatCtx->streams[index] packet_queue_init(&audioq); SDL_PauseAudio(0); // Get a pointer to the codec context for the video stream pCodecCtx = pFormatCtx->streams[videoStream]->codec; // Find the decoder for the video stream AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id); if (pCodec == NULL) { fprintf(stderr, "Unsupported codec!\n"); exit(-1); // Codec not found } // Open codec if (avcodec_open(pCodecCtx, pCodec) < 0) { fprintf(stderr, "Unsupported codec!\n"); exit(-1); // Could not open codec } } int main(int argc, char* argv[]) { if (argc < 2) { std::cout << "Usage: " << argv[0] << " <video>" << std::endl; return -1; } av_register_all(); // Init SDL if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) { fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError()); return -1; } // Init ffmpeg and setup some SDL stuff related to Audio setup_ffmpeg(argv[1]); VideoCapture cap(argv[1]); // open the default camera if (!cap.isOpened()) // check if we succeeded { std::cout << "Failed to load file!" << std::endl; return -1; } AVPacket packet; while (av_read_frame(pFormatCtx, &packet) >= 0) { if (packet.stream_index == videoStream) { // Actually this is were SYNC between audio/video would happen. // Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets! // But for the time being, assume 1 video frame == 1 video packet, // so instead of reading the frame through ffmpeg, I read it through OpenCV. Mat frame; cap >> frame; // get a new frame from camera // do some processing on the frame, either as a Mat or as IplImage. // For educational purposes, applying a lame grayscale conversion IplImage ipl_frame = frame; for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels) { ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R } // Display it on SDL window show_frame(&ipl_frame); av_free_packet(&packet); } else if (packet.stream_index == audioStream) { packet_queue_put(&audioq, &packet); } else { av_free_packet(&packet); } SDL_Event event; SDL_PollEvent(&event); switch (event.type) { case SDL_QUIT: SDL_FreeSurface(surface); SDL_Quit(); break; default: break; } } // the camera will be deinitialized automatically in VideoCapture destructor // Close the codec avcodec_close(pCodecCtx); // Close the video file av_close_input_file(pFormatCtx); return 0; }

@victor1234我在等你的反馈。我现在才有时间回答。）karlphillip，谢谢你的代码，它对我也很有帮助。然而，在我的例子中，我管理帧速率，在这一点上，我仍然没有弄清楚如何正确地做到这一点。声音播放中断，音频驱动程序向ALSA lib pcm投诉。c:7223:（snd_pcm_recover）出现欠载，我可能会找出原因，但如果您仍在阅读任何想法，我将不胜感激。本应作为注释的一些答案是cookie帐户，不再存在。我无法转换它们，因为生成的注释将没有有效的所有者。
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat