C++ 使用opencv进行视频处理的音频输出
我正在用opencv处理视频,但同时我需要播放音频并简单地控制它,比如声音或当前帧数 我想我应该用ffmpeg创建一个并行进程,但我不知道怎么做。你能解释一下怎么做吗C++ 使用opencv进行视频处理的音频输出,c++,c,audio,opencv,ffmpeg,C++,C,Audio,Opencv,Ffmpeg,我正在用opencv处理视频,但同时我需要播放音频并简单地控制它,比如声音或当前帧数 我想我应该用ffmpeg创建一个并行进程,但我不知道怎么做。你能解释一下怎么做吗 或者您知道另一种解决方案吗?我认为在这种情况下,应该使用ffmpeg播放音频和SDL for video 使用OpenCV打开文件并处理帧后,您可以使用OpenCV->SDL显示它,同时通过ffmpeg检索音频帧并使用SDL播放它们 我还发现了一篇不错的帖子,展示了如何使用ffmpeg从视频文件中捕获帧,将它们存储在OpenCV
或者您知道另一种解决方案吗?我认为在这种情况下,应该使用ffmpeg播放音频和SDL for video 使用OpenCV打开文件并处理帧后,您可以使用
OpenCV->SDL
显示它,同时通过ffmpeg检索音频帧并使用SDL播放它们
我还发现了一篇不错的帖子,展示了如何使用ffmpeg从视频文件中捕获帧,将它们存储在OpenCVcv::Mat
中,并在OpenCV窗口中显示结果。但是这样你就不能播放音频了,因为OpenCV不能处理这个问题
您可能也对阅读感兴趣:
编辑:
我花了4小时编写了一个原型来演示它是如何完成的此演示程序通过OpenCV读取视频帧(以便您可以处理它们)并通过ffmpeg读取音频,SDL用于播放这两种视频帧!此演示中有两个限制,您必须注意:1-它假设您使用的是打包为BGR(24位)的OpenCV图像,2-音频和视频不同步!是的,我有一些工作要你做(Yeeey)。但是不要惊慌,有一些想法
这一点很重要,因为您将对帧进行一些处理,这肯定会使视频和音频很快失去同步,因为它们是独立播放的
我上面建议的ffmpeg教程对于理解代码非常重要。它们展示了如何处理SDL,以及如何读取音频/视频流的数据包
#include <highgui.h>
#include <cv.h>
extern "C"
{
#include <SDL.h>
#include <SDL_thread.h>
#include <avcodec.h>
#include <avformat.h>
}
#include <iostream>
#include <stdio.h>
//#include <malloc.h>
using namespace cv;
#define SDL_AUDIO_BUFFER_SIZE 1024
typedef struct PacketQueue
{
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img)
{
if (!screen)
{
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen)
{
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}
}
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->width,
img->height,
img->depth * img->nChannels,
img->widthStep,
0xff0000, 0x00ff00, 0x0000ff, 0
);
SDL_BlitSurface(surface, 0, screen, 0);
SDL_Flip(screen);
}
void packet_queue_init(PacketQueue *q)
{
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt)
{
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0)
{
return -1;
}
//pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
SDL_LockMutex(q->mutex);
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;)
{
if( quit)
{
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1)
{
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
//av_free(pkt1);
free(pkt1);
ret = 1;
break;
}
else if (!block)
{
ret = 0;
break;
}
else
{
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)
{
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;)
{
while (audio_pkt_size > 0)
{
data_size = buf_size;
len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size,
audio_pkt_data, audio_pkt_size);
if (len1 < 0)
{
/* if error, skip frame */
audio_pkt_size = 0;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0)
{
/* No data yet, get more frames */
continue;
}
/* We have data, return it and come back for more later */
return data_size;
}
if (pkt.data)
av_free_packet(&pkt);
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
}
void audio_callback(void *userdata, Uint8 *stream, int len)
{
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0)
{
if (audio_buf_index >= audio_buf_size)
{
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0)
{
/* If error, output silence */
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
}
else
{
audio_buf_size = audio_size;
}
audio_buf_index = 0;
}
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
}
void setup_ffmpeg(char* filename)
{
if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0)
{
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
exit(-1);
}
if (av_find_stream_info(pFormatCtx) < 0)
{
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
exit(-1);
}
// Dump information about file onto standard error
dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0)
{
videoStream = i;
}
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0)
{
audioStream = i;
}
}
if (videoStream == -1)
{
fprintf(stderr, "No video stream found in %s!\n", filename);
exit(-1);
}
if (audioStream == -1)
{
fprintf(stderr, "No audio stream found in %s!\n", filename);
exit(-1);
}
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0)
{
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
exit(-1);
}
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1);
}
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(0);
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Codec not found
}
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Could not open codec
}
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
}
av_register_all();
// Init SDL
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// Init ffmpeg and setup some SDL stuff related to Audio
setup_ffmpeg(argv[1]);
VideoCapture cap(argv[1]); // open the default camera
if (!cap.isOpened()) // check if we succeeded
{
std::cout << "Failed to load file!" << std::endl;
return -1;
}
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStream)
{
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
{
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
}
// Display it on SDL window
show_frame(&ipl_frame);
av_free_packet(&packet);
}
else if (packet.stream_index == audioStream)
{
packet_queue_put(&audioq, &packet);
}
else
{
av_free_packet(&packet);
}
SDL_Event event;
SDL_PollEvent(&event);
switch (event.type)
{
case SDL_QUIT:
SDL_FreeSurface(surface);
SDL_Quit();
break;
default:
break;
}
}
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
avcodec_close(pCodecCtx);
// Close the video file
av_close_input_file(pFormatCtx);
return 0;
}
@victor1234我在等你的反馈。我现在才有时间回答。)karlphillip,谢谢你的代码,它对我也很有帮助。然而,在我的例子中,我管理帧速率,在这一点上,我仍然没有弄清楚如何正确地做到这一点。声音播放中断,音频驱动程序向ALSA lib pcm投诉。c:7223:(snd_pcm_recover)出现欠载,我可能会找出原因,但如果您仍在阅读任何想法,我将不胜感激。本应作为注释的一些答案是cookie帐户,不再存在。我无法转换它们,因为生成的注释将没有有效的所有者。
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat