C++ cpp rgb到yuv422的转换

C++ cpp rgb到yuv422的转换,c++,opencv,opencl,yuv,color-space,C++,Opencv,Opencl,Yuv,Color Space,我正在尝试将RGB/RGBA格式(可以更改)的图像(最初来自QImage)转换为YUV422格式。 我最初的意图是使用OpenCV来完成这项工作,但它无法将RGB/RGBA转换为422格式 我寻找替代方案,甚至考虑根据需要编写自己的转换,但速度不够快 我找了另一个图书馆使用,但它是旧的,不太相关 所以我的问题是,对于RGB->YUV422转换,我有什么好的选择?如果他们在GPU而不是CPU上执行转换,效果会更好 在这方面,他们建议使用,并且OP似乎达到了预期的效果(实时转换许多PAL流)。我使用

我正在尝试将RGB/RGBA格式(可以更改)的图像(最初来自QImage)转换为YUV422格式。 我最初的意图是使用OpenCV来完成这项工作,但它无法将RGB/RGBA转换为422格式

我寻找替代方案,甚至考虑根据需要编写自己的转换,但速度不够快

我找了另一个图书馆使用,但它是旧的,不太相关

所以我的问题是,对于RGB->YUV422转换,我有什么好的选择?如果他们在GPU而不是CPU上执行转换,效果会更好


在这方面,他们建议使用,并且OP似乎达到了预期的效果(实时转换许多PAL流)。

我使用OpenCL解决了我的问题,如下所示:

我将转换更改为格式\u ARGB32\u预乘为YUV422,但它可以轻松更改为任何格式

openclwrapper.h:

class OpenClWrapper
{
public:
    OpenClWrapper(size_t width, size_t height);
    ~OpenClWrapper();

    void RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg);

private:
    std::vector<cl::Platform> m_all_platforms;
    std::vector<cl::Device> m_all_devices;
    cl::Platform m_default_platform;
    cl::Device m_default_device;
    cl::Context m_context;
    cl::Program::Sources m_sources;
    cl::Program m_program;
    cl::CommandQueue m_queue;
    cl::Buffer m_buffer_yuv;
    cl::Buffer m_buffer_rgb;
    std::string m_kernel_code;

    size_t m_width;
    size_t m_height;

};
#include "openclwrapper.h"
#include <iostream>
#include <sstream>

OpenClWrapper::OpenClWrapper(size_t width, size_t height) :
    m_height(height),
    m_width(width)
{
    //get all platforms (drivers)
       cl::Platform::get(&m_all_platforms);
       if(m_all_platforms.size()==0){
           std::cout<<" No platforms found. Check OpenCL installation!\n";
           exit(1);
       }
       m_default_platform=m_all_platforms[0];

       //get default device of the default platform
       m_default_platform.getDevices(CL_DEVICE_TYPE_ALL, &m_all_devices);
       if(m_all_devices.size()==0){
           std::cout<<" No devices found. Check OpenCL installation!\n";
           exit(1);
       }
       m_default_device=m_all_devices[0];


       m_context = *(new cl::Context({m_default_device}));

       std::ostringstream oss;

       oss <<
               "   void kernel RGB2YUV422(global const unsigned char rgbImg[" << m_height << "][" << m_width << "*4], global unsigned int yuvImg[" << m_height << "][" << m_width << "/2]){       \n"
               "       int x_idx = get_global_id(0);                                                                                        \n"
               "       int y_idx = get_global_id(1)*8;                                                                                      \n"
               "       int alpha1 = rgbImg[x_idx][y_idx+3];                                                                                 \n"
               "       int alpha2 = rgbImg[x_idx][y_idx+7];                                                                                 \n"
               "       unsigned char R1 = rgbImg[x_idx][y_idx+2]  * (255 / alpha1);                                                         \n"
               "       unsigned char G1 = rgbImg[x_idx][y_idx+1]  * (255 / alpha1);                                                         \n"
               "       unsigned char B1 = rgbImg[x_idx][y_idx] * (255 / alpha1);                                                            \n"
               "       unsigned char R2 = rgbImg[x_idx][y_idx+6] * (255 / alpha2);                                                          \n"
               "       unsigned char G2 = rgbImg[x_idx][y_idx+5] * (255 / alpha2);                                                          \n"
               "       unsigned char B2 = rgbImg[x_idx][y_idx+4] * (255 / alpha2);                                                          \n"

               "       unsigned char Y1 = (unsigned char)(0.299000*R1 + 0.587000*G1 + 0.114000*B1);                                         \n"
               "       unsigned char Y2 = (unsigned char)(0.299000*R2 + 0.587000*G2 + 0.114000*B2);                                         \n"
               "       unsigned char U = (unsigned char)(-0.168736*R1-0.331264*G1+0.500000*B1+128);//(0.492*(B1-Y1));                       \n"
               "       unsigned char V = (unsigned char)(0.500000*R1-0.418688*G1-0.081312*B1+128);//(0.877*(R1-Y1));                        \n"
               "       yuvImg[get_global_id(0)][get_global_id(1)] = (unsigned int)(Y2 << 24 | V << 16 | Y1 << 8 | U);                       \n"
               "   }                                                                                                                        ";

       m_kernel_code = oss.str();

       m_sources.push_back({m_kernel_code.c_str(),m_kernel_code.length()});

       m_program = *(new cl::Program(m_context,m_sources));
       if(m_program.build({m_default_device})!=CL_SUCCESS){
           std::cout<<" Error building: "<<m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_default_device)<<"\n";
           exit(1);
       }


       // create buffers on the device
       m_buffer_yuv = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned int)*(m_width*m_height/2))); //each cell is int so it is 4 times the mem nedded, but each pixel is represented by 16 bits
       m_buffer_rgb = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned char)*(m_width*m_height*4))); // each pixel is represented by 4 bytes (alpha, RGB)

}

OpenClWrapper::~OpenClWrapper(){
    free(&m_buffer_rgb);
    free(&m_buffer_yuv);
}

void OpenClWrapper::RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg){


    cl::CommandQueue queue(m_context,m_default_device);
       //write rgb image to the OpenCl buffer
       queue.enqueueWriteBuffer(m_buffer_rgb,CL_TRUE,0,sizeof(unsigned char)*(m_width*m_height*4),rgbImg);


       //run the kernel
       cl::Kernel kernel_yuv2rgb=cl::Kernel(m_program,"RGB2YUV422");
       kernel_yuv2rgb.setArg(0,m_buffer_rgb);
       kernel_yuv2rgb.setArg(1,m_buffer_yuv);
       queue.enqueueNDRangeKernel(kernel_yuv2rgb,cl::NullRange,cl::NDRange(m_height,(m_width/2)),cl::NullRange); //range is divided by 2 because we have width is represented in integers instead of 16bit (as needed in yuv422).
       queue.finish();

       //read result yuv Image from the device to yuv Image pointer
       queue.enqueueReadBuffer(m_buffer_yuv,CL_TRUE,0,sizeof(unsigned int)*(m_width*m_height/2),yuvImg);

}
类OpenClWrapper
{
公众:
开口包装(尺寸、宽度、尺寸、高度);
~OpenClWrapper();
void RGB2YUV422(无符号int*yuvImg,无符号char*rgbImg);
私人:
std::矢量m_所有_平台;
std::矢量m_所有_设备;
cl::平台m_默认_平台;
cl::设备m_默认_设备;
cl::Context m_Context;
cl::Program::Sources m_Sources;
cl::程序m_程序;
cl::命令队列m_队列;
cl::Buffer m_Buffer_yuv;
cl::缓冲区m_缓冲区rgb;
std::字符串m_内核代码;
尺寸和宽度;
大小和高度;
};
openclwrapper.cpp:

class OpenClWrapper
{
public:
    OpenClWrapper(size_t width, size_t height);
    ~OpenClWrapper();

    void RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg);

private:
    std::vector<cl::Platform> m_all_platforms;
    std::vector<cl::Device> m_all_devices;
    cl::Platform m_default_platform;
    cl::Device m_default_device;
    cl::Context m_context;
    cl::Program::Sources m_sources;
    cl::Program m_program;
    cl::CommandQueue m_queue;
    cl::Buffer m_buffer_yuv;
    cl::Buffer m_buffer_rgb;
    std::string m_kernel_code;

    size_t m_width;
    size_t m_height;

};
#include "openclwrapper.h"
#include <iostream>
#include <sstream>

OpenClWrapper::OpenClWrapper(size_t width, size_t height) :
    m_height(height),
    m_width(width)
{
    //get all platforms (drivers)
       cl::Platform::get(&m_all_platforms);
       if(m_all_platforms.size()==0){
           std::cout<<" No platforms found. Check OpenCL installation!\n";
           exit(1);
       }
       m_default_platform=m_all_platforms[0];

       //get default device of the default platform
       m_default_platform.getDevices(CL_DEVICE_TYPE_ALL, &m_all_devices);
       if(m_all_devices.size()==0){
           std::cout<<" No devices found. Check OpenCL installation!\n";
           exit(1);
       }
       m_default_device=m_all_devices[0];


       m_context = *(new cl::Context({m_default_device}));

       std::ostringstream oss;

       oss <<
               "   void kernel RGB2YUV422(global const unsigned char rgbImg[" << m_height << "][" << m_width << "*4], global unsigned int yuvImg[" << m_height << "][" << m_width << "/2]){       \n"
               "       int x_idx = get_global_id(0);                                                                                        \n"
               "       int y_idx = get_global_id(1)*8;                                                                                      \n"
               "       int alpha1 = rgbImg[x_idx][y_idx+3];                                                                                 \n"
               "       int alpha2 = rgbImg[x_idx][y_idx+7];                                                                                 \n"
               "       unsigned char R1 = rgbImg[x_idx][y_idx+2]  * (255 / alpha1);                                                         \n"
               "       unsigned char G1 = rgbImg[x_idx][y_idx+1]  * (255 / alpha1);                                                         \n"
               "       unsigned char B1 = rgbImg[x_idx][y_idx] * (255 / alpha1);                                                            \n"
               "       unsigned char R2 = rgbImg[x_idx][y_idx+6] * (255 / alpha2);                                                          \n"
               "       unsigned char G2 = rgbImg[x_idx][y_idx+5] * (255 / alpha2);                                                          \n"
               "       unsigned char B2 = rgbImg[x_idx][y_idx+4] * (255 / alpha2);                                                          \n"

               "       unsigned char Y1 = (unsigned char)(0.299000*R1 + 0.587000*G1 + 0.114000*B1);                                         \n"
               "       unsigned char Y2 = (unsigned char)(0.299000*R2 + 0.587000*G2 + 0.114000*B2);                                         \n"
               "       unsigned char U = (unsigned char)(-0.168736*R1-0.331264*G1+0.500000*B1+128);//(0.492*(B1-Y1));                       \n"
               "       unsigned char V = (unsigned char)(0.500000*R1-0.418688*G1-0.081312*B1+128);//(0.877*(R1-Y1));                        \n"
               "       yuvImg[get_global_id(0)][get_global_id(1)] = (unsigned int)(Y2 << 24 | V << 16 | Y1 << 8 | U);                       \n"
               "   }                                                                                                                        ";

       m_kernel_code = oss.str();

       m_sources.push_back({m_kernel_code.c_str(),m_kernel_code.length()});

       m_program = *(new cl::Program(m_context,m_sources));
       if(m_program.build({m_default_device})!=CL_SUCCESS){
           std::cout<<" Error building: "<<m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_default_device)<<"\n";
           exit(1);
       }


       // create buffers on the device
       m_buffer_yuv = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned int)*(m_width*m_height/2))); //each cell is int so it is 4 times the mem nedded, but each pixel is represented by 16 bits
       m_buffer_rgb = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned char)*(m_width*m_height*4))); // each pixel is represented by 4 bytes (alpha, RGB)

}

OpenClWrapper::~OpenClWrapper(){
    free(&m_buffer_rgb);
    free(&m_buffer_yuv);
}

void OpenClWrapper::RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg){


    cl::CommandQueue queue(m_context,m_default_device);
       //write rgb image to the OpenCl buffer
       queue.enqueueWriteBuffer(m_buffer_rgb,CL_TRUE,0,sizeof(unsigned char)*(m_width*m_height*4),rgbImg);


       //run the kernel
       cl::Kernel kernel_yuv2rgb=cl::Kernel(m_program,"RGB2YUV422");
       kernel_yuv2rgb.setArg(0,m_buffer_rgb);
       kernel_yuv2rgb.setArg(1,m_buffer_yuv);
       queue.enqueueNDRangeKernel(kernel_yuv2rgb,cl::NullRange,cl::NDRange(m_height,(m_width/2)),cl::NullRange); //range is divided by 2 because we have width is represented in integers instead of 16bit (as needed in yuv422).
       queue.finish();

       //read result yuv Image from the device to yuv Image pointer
       queue.enqueueReadBuffer(m_buffer_yuv,CL_TRUE,0,sizeof(unsigned int)*(m_width*m_height/2),yuvImg);

}
#包括“openclwrapper.h”
#包括
#包括
OpenClWrapper::OpenClWrapper(大小\宽度、大小\高度):
m_高度(高度),
m_宽度(宽度)
{
//获取所有平台(驱动程序)
cl::平台::获取(&m_所有平台);
如果(m_所有平台.size()==0){

std::coutOpenCV的一个简单实现:

void rgb_to_yuv422_uyvy(const cv::Mat& rgb, cv::Mat& yuv) {
    assert(rgb.size() == yuv.size() &&
           rgb.depth() == CV_8U &&
           rgb.channels() == 3 &&
           yuv.depth() == CV_8U &&
           yuv.channels() == 2);
    for (int ih = 0; ih < rgb.rows; ih++) {
        const uint8_t* rgbRowPtr = rgb.ptr<uint8_t>(ih);
        uint8_t* yuvRowPtr = yuv.ptr<uint8_t>(ih);

        for (int iw = 0; iw < rgb.cols; iw = iw + 2) {
            const int rgbColIdxBytes = iw * rgb.elemSize();
            const int yuvColIdxBytes = iw * yuv.elemSize();

            const uint8_t R1 = rgbRowPtr[rgbColIdxBytes + 0];
            const uint8_t G1 = rgbRowPtr[rgbColIdxBytes + 1];
            const uint8_t B1 = rgbRowPtr[rgbColIdxBytes + 2];
            const uint8_t R2 = rgbRowPtr[rgbColIdxBytes + 3];
            const uint8_t G2 = rgbRowPtr[rgbColIdxBytes + 4];
            const uint8_t B2 = rgbRowPtr[rgbColIdxBytes + 5];

            const int Y  =  (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16.0f ;
            const int U  = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128.0f;
            const int V  =  (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128.0f;
            const int Y2 =  (0.257f * R2) + (0.504f * G2) + (0.098f * B2) + 16.0f ;

            yuvRowPtr[yuvColIdxBytes + 0] = cv::saturate_cast<uint8_t>(U );
            yuvRowPtr[yuvColIdxBytes + 1] = cv::saturate_cast<uint8_t>(Y );
            yuvRowPtr[yuvColIdxBytes + 2] = cv::saturate_cast<uint8_t>(V );
            yuvRowPtr[yuvColIdxBytes + 3] = cv::saturate_cast<uint8_t>(Y2);
        }
    }
}
void rgb_to_yuv422_uyvy(常数cv::Mat&rgb,cv::Mat&yuv){
断言(rgb.size()==yuv.size()&&
rgb.depth()==CV_8U&&
rgb.channels()==3&&
yuv.depth()==CV_8U&&
yuv.channels()==2);
对于(int ih=0;ih

注意:假设(并检查)RGB以及YUV422 UYVY味道。我发现这相当快,但显然是。项目也有一个。我认为这将是最好的方式,因为它在GPU上执行。你可能可以从现有的
cvtColor
实现中获得灵感。这是一个相当大的挑战。一般来说,它是可以接受的首先尝试使用OpenCL(如果有)并为给定的转换实现,然后尝试HAL版本(对于Tegra等极少数特定的东西)如果可用并实现,那么它可能会尝试IPP(如果对给定转换可用并实现),最后是基线实现,对于YUV转换,它似乎使用
cv::ParallelLoopImpl
cv::parallel\u for
。关于我如何实现这种颜色转换,我还没有计时,但这是一个问题去掉了现有的OpenCV代码(基线)。结果看起来很合理。我明天会继续玩。还没有看过Cuda的东西。|这可能是OpenCV的一个很好的补丁…它显然缺失了。。这在我的i4930k上用NVIdia GTX 760生成。使用12个线程(这是OpenCV默认的功能)我需要7毫秒的时间来转换256 x 65536 BGR图像。这对你来说够快吗?我仍在尝试OpenCL实现。CUDA版本的
cvtColor
在两个方向上似乎都不支持YUV 4:2:2。这应该够快。我会尽快测试它,然后回复你。据我所知,Thanksip不是免费的.我更喜欢使用开源软件