C++ 如何使用GPU倍增2个OpenCV mats_C++_Opencv_Image Processing

C++ 如何使用GPU倍增2个OpenCV mats

c++ opencv image-processing

C++ 如何使用GPU倍增2个OpenCV mats,c++,opencv,image-processing,C++,Opencv,Image Processing,在OpenCV中，我可以将RGB 1920 x 1080材质乘以3 x 3材质，以更改源材质的颜色组成。一旦我的源代码正确成形，我就可以使用“*”操作符执行乘法。使用cv:：gpu:：GpuMat时，此运算符不可用我的问题是如何格式化输入源Mat以使用cv:：gpu:：gemm？我甚至可以使用cv:：gpu:：gemm吗据我所知，这是OpenCV库中唯一一个执行矩阵乘法的调用。gemm希望看到cv_32FC1、cv_64FC1类型的垫子。我通常使用的CPU类型是CV_32FC3 //sour

在OpenCV中，我可以将RGB 1920 x 1080材质乘以3 x 3材质，以更改源材质的颜色组成。一旦我的源代码正确成形，我就可以使用“*”操作符执行乘法。使用cv:：gpu:：GpuMat时，此运算符不可用

我的问题是如何格式化输入源Mat以使用cv:：gpu:：gemm？我甚至可以使用cv:：gpu:：gemm吗

据我所知，这是OpenCV库中唯一一个执行矩阵乘法的调用。gemm希望看到cv_32FC1、cv_64FC1类型的垫子。我通常使用的CPU类型是CV_32FC3

//sourceMat is CV_32FC3 1920 x 1080 Mat
Mat sourceMat = matFromBuffer(data->bufferA, data->widthA, data->heightA);

//This is the color Matrix
float matrix[3][3] = {{1.057311, -0.204043, 0.055648},
{ 0.041556, 1.875992, -0.969256},
{-0.498535,-1.537150, 3.240479}};

Mat colorMatrixMat = Mat(3, 3, CV_32FC1, matrix).t();

//Color Correct the Mat
Mat linearSourceMat = sourceMat.reshape(1, 1080*1920);
Mat multipliedMatrix = linearSourceMat * colorMatrixMat;
Mat recoloredMat = multipliedMatrix.reshape(3, 1080);

更新：作为测试，我创建了测试例程：

static int gpuTest(){

    float matrix[9] = {1.057311, -0.204043, 0.055648, 0.041556, 1.875992, -0.969256, -0.498535,-1.537150, 3.240479};
    Mat matrixMat = Mat(1, 9, CV_32FC1, matrix).t();
    cv::gpu::GpuMat gpuMatrixMat;
    gpuMatrixMat.upload(matrixMat);

    float matrixDest[9] = {1,1,1,1,1,1,1,1,1};
    Mat matrixDestMat = Mat(1, 9, CV_32FC1, matrixDest).t();
    cv::gpu::GpuMat destMatrixMat;
    destMatrixMat.upload(matrixDestMat);

    cv::gpu::GpuMat nextMat;
    cv::gpu::gemm(gpuMatrixMat, destMatrixMat, 1, cv::gpu::GpuMat(), 0, nextMat);

    return 0;
};

我收到的错误是：

OpenCV Error: Assertion failed (src1Size.width == src2Size.height) in gemm, file /Users/myuser/opencv-2.4.12/modules/gpu/src/arithm.cpp, line 109
libc++abi.dylib: terminating with uncaught exception of type cv::Exception: /Users/myuser/opencv-2.4.12/modules/gpu/src/arithm.cpp:109: error: (-215) src1Size.width == src2Size.height in function gemm

现在，src1Size.width如何等于src2Size.height？宽度和高度不同。

以下是使用OpenCV 3.1的最低工作示例

#include <opencv2/opencv.hpp>
#include <opencv2/cudaarithm.hpp>

int main()
{ 
    cv::Mat sourceMat = cv::Mat::ones(1080, 1920, CV_32FC3);

    //This is the color Matrix
    float matrix[3][3] = {
        { 1.057311, -0.204043, 0.055648 }
        , { 0.041556, 1.875992, -0.969256 }
        , { -0.498535, -1.537150, 3.240479 }
        };

    cv::Mat colorMatrixMat = cv::Mat(3, 3, CV_32FC1, matrix).t();

    cv::Mat linearSourceMat = sourceMat.reshape(1, 1080 * 1920);
    cv::Mat multipliedMatrix = linearSourceMat * colorMatrixMat;

    try {
        cv::Mat dummy, gpuMultipliedMatrix;

        // Regular gemm
        cv::gemm(linearSourceMat, colorMatrixMat, 1.0, dummy, 0.0, gpuMultipliedMatrix);
        // CUDA gemm
        // cv::cuda::gemm(linearSourceMat, colorMatrixMat, 1.0, dummy, 0.0, gpuMultipliedMatrix);

        std::cout << (cv::countNonZero(multipliedMatrix != gpuMultipliedMatrix) == 0);
    } catch (cv::Exception& e) {
        std::cerr << e.what();
        return -1;
    }
}

然后

cv::gpu::gemm(gpuLinSrc, gpuColorMat, 1.0, cv::gpu::GpuMat(), 0.0, gpuResult);

最后从GPU下载数据

cv::Mat resultFromGPU;
gpuResult.download(resultFromGPU);

更新下面是一个更详细的示例，向您展示发生了什么：

#include <opencv2/opencv.hpp>

#include <iostream>
#include <numeric>
#include <vector>

// ============================================================================

// Make a 3 channel test image with 5 rows and 4 columns
cv::Mat make_image()
{
    std::vector<float> v(5 * 4);
    std::iota(std::begin(v), std::end(v), 1.0f); // Fill with 1..20
    cv::Mat seq(5, 4, CV_32FC1, v.data()); // 5 rows, 4 columns, 1 channel

    // Create 3 channels, each with different offset, so we can tell them apart
    cv::Mat chans[3] = {
        seq, seq + 100, seq + 200
    };

    cv::Mat merged;
    cv::merge(chans, 3, merged); // 5 rows, 4 columns, 3 channels

    return merged;
}

// Make a transposed color correction matrix.
cv::Mat make_color_mat()
{
    float color_in[3][3] = {
        { 0.1f, 0.2f, 0.3f } // Coefficients for channel 0
        , { 0.4f, 0.5f, 0.6f } // Coefficients for channel 1
        , { 0.7f, 0.8f, 0.9f } // Coefficients for channel 2
    };

    return cv::Mat(3, 3, CV_32FC1, color_in).t();
}

void print_mat(cv::Mat m, std::string const& label)
{
    std::cout << label << ":\n  size=" << m.size()
        << "\n  channels=" << m.channels()
        << "\n" << m << "\n" << std::endl;
}

// Perform matrix multiplication to obtain result point (r,c)
float mm_at(cv::Mat a, cv::Mat b, int r, int c)
{
    return a.at<float>(r, 0) * b.at<float>(0, c)
        + a.at<float>(r, 1) * b.at<float>(1, c)
        + a.at<float>(r, 2) * b.at<float>(2, c);
}

// Perform matrix multiplication to obtain result row r
cv::Vec3f mm_test(cv::Mat a, cv::Mat b, int r)
{
    return cv::Vec3f(
        mm_at(a, b, r, 0)
        , mm_at(a, b, r, 1)
        , mm_at(a, b, r, 2)
        );
}

// ============================================================================

int main()
{ 
    try {
        // Step 1
        cv::Mat source_image(make_image());
        print_mat(source_image, "source_image");
        std::cout << "source pixel at (0,0): " << source_image.at<cv::Vec3f>(0, 0) << "\n\n";

        // Step 2
        cv::Mat color_mat(make_color_mat());
        print_mat(color_mat, "color_mat");

        // Step 3
        // Reshape the source matrix to obtain a matrix:
        // * with only one channel (CV_32FC1)
        // * where each row corresponds to a single pixel from source
        // * where each column corresponds to a single channel from source
        cv::Mat reshaped_image(source_image.reshape(1, source_image.rows * source_image.cols));
        print_mat(reshaped_image, "reshaped_image");

        // Step 4
        cv::Mat corrected_image;
        // corrected_image = 1.0 * reshaped_image * color_mat
        cv::gemm(reshaped_image, color_mat, 1.0, cv::Mat(), 0.0, corrected_image);
        print_mat(corrected_image, "corrected_image");

        // Step 5
        // Reshape back to the original format
        cv::Mat result_image(corrected_image.reshape(3, source_image.rows));
        print_mat(result_image, "result_image");
        std::cout << "result pixel at (0,0): " << result_image.at<cv::Vec3f>(0, 0) << "\n\n";

        // Step 6
        // Calculate one pixel manually...
        std::cout << "check pixel (0,0): " << mm_test(reshaped_image, color_mat, 0) << "\n\n";
    } catch (cv::Exception& e) {
        std::cerr << e.what();
        return -1;
    }
}

// ============================================================================

我们可以打印出单个像素，以使结构更清晰：

源像素位于（0,0）：[1101201]

步骤2 创建样本颜色校正矩阵（转置），以便：

第一列包含用于确定红色值的系数
第二列包含用于确定绿色值的系数
第三列包含用于确定蓝色值的系数

color\u mat:
大小=[3 x 3]
通道=1
[0.1, 0.40000001, 0.69999999;
0.2, 0.5, 0.80000001;
0.30000001, 0.60000002, 0.89999998]

旁注：颜色校正算法我们希望使用系数C将源像素S转换为像素T

S=[sr，sg，sb]
T=[tr，tg，tb]
C=[cr1，cr2，cr3；
cg1，cg2，cg3；
cb1、cb2、cb3]

以致

Tr=cr1*sr+cr2*sg+cr3*sb Tg=cg1*sr+cg2*sg+cg3*sb Tb=cb1*sr+cb2*sg+cb3*sb 可由以下矩阵表达式表示

T = S * C_transpose

步骤3 为了能够使用上述算法，我们首先需要将图像重塑为矩阵：

包含单个通道，因此每个点上的值仅为浮点值
每行有一个像素
有3列表示红色、绿色、蓝色

在此形状中，矩阵乘法意味着输入的每个像素/行与系数矩阵相乘，以确定输出中的一个像素/行

重塑后的矩阵如下所示：

重塑的_图像：
大小=[3 x 20]
通道=1
[1, 101, 201;
2, 102, 202;
3, 103, 203;
4, 104, 204;
5, 105, 205;
6, 106, 206;
7, 107, 207;
8, 108, 208;
9, 109, 209;
10, 110, 210;
11, 111, 211;
12, 112, 212;
13, 113, 213;
14, 114, 214;
15, 115, 215;
16, 116, 216;
17, 117, 217;
18, 118, 218;
19, 119, 219;
20, 120, 220]

步骤4 我们执行乘法，例如使用

gemm

，以获得以下矩阵：

校正图像：
大小=[3 x 20]
通道=1
[80.600006, 171.5, 262.39999;
81.200005, 173, 264.79999;
81.800003, 174.5, 267.20001;
82.400002, 176, 269.60001;
83, 177.5, 272;
83.600006, 179, 274.39999;
84.200005, 180.5, 276.79999;
84.800003, 182, 279.20001;
85.400002, 183.5, 281.60001;
86, 185, 284;
86.600006, 186.5, 286.39999;
87.200005, 188, 288.79999;
87.800003, 189.5, 291.20001;
88.400009, 191, 293.60001;
89, 192.5, 296;
89.600006, 194, 298.39999;
90.200005, 195.50002, 300.79999;
90.800003, 197, 303.20001;
91.400009, 198.5, 305.60001;
92, 200, 308]

步骤5 现在我们可以将图像重塑回原始形状。结果是

result\u图像：
大小=[4 x 5]
频道=3
[80.600006, 171.5, 262.39999, 81.200005, 173, 264.79999, 81.800003, 174.5, 267.20001, 82.400002, 176, 269.60001;
83, 177.5, 272, 83.600006, 179, 274.39999, 84.200005, 180.5, 276.79999, 84.800003, 182, 279.20001;
85.400002, 183.5, 281.60001, 86, 185, 284, 86.600006, 186.5, 286.39999, 87.200005, 188, 288.79999;
87.800003, 189.5, 291.20001, 88.400009, 191, 293.60001, 89, 192.5, 296, 89.600006, 194, 298.39999;
90.200005, 195.50002, 300.79999, 90.800003, 197, 303.20001, 91.400009, 198.5, 305.60001, 92, 200, 308]

让我们看看结果中的一个像素：

结果像素位于（0,0）：[80.6171.5262.4]

步骤6 现在，我们可以通过手动执行适当的计算（函数

mm\u test

和

mm\u at

）来再次检查结果

检查像素（0,0）：[80.6171.5262.4]

不会

sourceMat.重塑（11080*1920）将通道减少为1，因此使linearSourceMat
具有类型cv32fc1
？您的colorMatrixMat
也是CV\u 32FC1
。所以在我看来，gemm应该按原样处理您的数据。我的mat是CV_32FC3，所以它是1080列乘以1920行的3个元素RGB。当我把它改形为（11080*1920）时。我正在创建一列1080*1920行的RGB值。从第一个参数到Mat:：restrape是通道数。根据我的理解，这意味着您要创建一个包含1080*1920行和3列的单通道矩阵。因为你要乘以一个3x3矩阵，所以这必须是真的（根据矩阵乘法的定义）。检查调试器中的类型…让我看看是否可以在本地尝试。我不确定我的OpenCV副本是否启用了GPU支持。我已经确认了我对矩阵类型的假设。现在无法让gpu版本工作，以前没有真正使用过，现在有点晚了。我在上创建了一个测试例程。我将一个1x9 CV_32F1与一个9x1 CV_32F1相乘，得到一个9x9 CV_32F1。这段代码可以工作，但矩阵乘法会产生更大的矩阵。在我的CPU矩阵乘法代码中，我
T = S * C_transpose