C++ 使用CUDA和OpenCV删除一个图像通道_C++_Opencv_Cuda

C++ 使用CUDA和OpenCV删除一个图像通道

c++ opencv cuda

C++ 使用CUDA和OpenCV删除一个图像通道,c++,opencv,cuda,C++,Opencv,Cuda,我刚开始学习OpenCV。我想删除一个通道使用CUDA内核，然后可视化它如何影响原始图像。但是这个程序不起作用，不知道为什么。它只显示黑色窗口：( 代码如下： #include "opencv2\opencv.hpp" #include <cuda.h> #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <iostream> #include <device_func

我刚开始学习OpenCV。我想删除一个通道使用CUDA内核，然后可视化它如何影响原始图像。但是这个程序不起作用，不知道为什么。它只显示黑色窗口：( 代码如下：

#include "opencv2\opencv.hpp"
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <device_functions.h>


using namespace cv;


__global__ void imgProc(unsigned char *in, unsigned char * out)
{
    int i = threadIdx.x + blockIdx.x * blockDim.x;
    out[i] =in[i];
    out[i+1] = in[i+1];
    out[i + 2] = 0; //deleting one channel


}

int main()
{
    Mat file1 = imread("sw.jpg", CV_LOAD_IMAGE_COLOR);  
    unsigned char *input = (unsigned char*)(file1.data);
    unsigned char *dev_input, *dev_output;
    unsigned char *output = (unsigned char*)malloc(file1.cols*file1.rows * 3 * sizeof(char));

    cudaMalloc((void**)&dev_input, file1.cols*file1.rows * 3 * sizeof(char));
    cudaMalloc((void**)&dev_output, file1.cols*file1.rows * 3 * sizeof(char));
    cudaMemcpy(dev_input, input, file1.cols*file1.rows * 3 * sizeof(char), cudaMemcpyHostToDevice);
    imgProc << <file1.cols, file1.rows >> > (dev_input, dev_output);
    cudaMemcpy(output, dev_output, file1.cols*file1.rows * 3 * sizeof(char), cudaMemcpyDeviceToHost);

    Mat file3 =  Mat(file1.rows,file1.cols, CV_8UC3,output);
    namedWindow("Modified", CV_WINDOW_FREERATIO);
    imshow("Modified", file3);
    namedWindow("Original", CV_WINDOW_FREERATIO);
    imshow("Original", file1);

    cudaFree(dev_input);
    cudaFree(dev_output);
    free(output);


    waitKey(); 

    return 0;
}

#包括“opencv2\opencv.hpp”
#包括
#包括“cuda_runtime.h”
#包括“设备启动参数.h”
#包括
#包括
使用名称空间cv；
__全局无效imgProc（无符号字符*输入，无符号字符*输出）
{
int i=threadIdx.x+blockIdx.x*blockDim.x；
out[i]=in[i]；
out[i+1]=in[i+1]；
out[i+2]=0；//删除一个通道
}
int main（）
{
Mat file1=imread（“sw.jpg”，CV\u LOAD\u IMAGE\u COLOR）；
无符号字符*输入=（无符号字符*）（file1.data）；
无符号字符*dev_输入，*dev_输出；
unsigned char*output=（unsigned char*）malloc（file1.cols*file1.rows*3*sizeof（char））；
cudamaloc（（void**）和dev_输入，file1.cols*file1.rows*3*sizeof（char））；
cudamaloc（（void**）和dev_输出，file1.cols*file1.rows*3*sizeof（char））；
cudaMemcpy（dev_输入，输入，file1.cols*file1.rows*3*sizeof（char），cudaMemcpyHostToDevice）；
imgProc>（开发输入，开发输出）；
cudaMemcpy（输出，dev_输出，file1.cols*file1.rows*3*sizeof（char），cudaMemcpyDeviceToHost）；
Mat file3=Mat（file1.rows，file1.cols，CV_8UC3，输出）；
namedWindow（“修改”，CV\u窗口\u自由比率）；
imshow（“修改”，文件3）；
namedWindow（“原始”，CV\u窗口\u自由比率）；
imshow（“原件”，文件1）；
cudaFree（dev_输入）；
cudaFree（dev_输出）；
自由（输出）；
waitKey（）；
返回0；
}

您似乎让这项任务变得比需要的更复杂，OpenCV提供了完成此任务所需的所有功能：

split(src,BGRChannels); // split the BGR channesl
BGRChannels[1]=Mat::zeros(src.rows,src.cols,CV_8UC1);// removing Green channel
merge(BGRChannels,3,src); // pack the image

您似乎使这项任务变得比需要的更复杂，OpenCV提供了完成此任务所需的所有功能：

split(src,BGRChannels); // split the BGR channesl
BGRChannels[1]=Mat::zeros(src.rows,src.cols,CV_8UC1);// removing Green channel
merge(BGRChannels,3,src); // pack the image

好的，我明白了。内核中有一些错误，但最重要的是我正在处理的图片的大小大于我的GPU上每个网格的最大线程数

Here is working code, which deletes one img channel from the picture:

#include "opencv2\opencv.hpp"
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <device_functions.h>


using namespace cv;


__global__ void imgProc(unsigned char *in, unsigned char * out)
{
    int x = blockIdx.x;
    int y = blockIdx.y;
    int offset = x + y * gridDim.x;
    out[offset*3+0] =0;
    out[offset * 3 + 1] = in[offset * 3 + 1];
    out[offset * 3 + 2] = in[offset * 3 + 2];


}

int main()
{
    cudaDeviceProp prop;
    cudaGetDeviceProperties(&prop, 0);
    std::cout << (int)prop.maxGridSize[1];


    Mat file1 = imread("sw.jpg", CV_LOAD_IMAGE_COLOR);  
    unsigned char *input = (unsigned char*)(file1.data);
    unsigned char *dev_input, *dev_output;
    unsigned char *output = (unsigned char*)malloc(file1.cols*file1.rows * 3 * sizeof(char));

    cudaMalloc((void**)&dev_input, file1.cols*file1.rows * 3 * sizeof(char));
    cudaMalloc((void**)&dev_output, file1.cols*file1.rows * 3 * sizeof(char));
    cudaMemcpy(dev_input, input, file1.cols*file1.rows * 3 * sizeof(char), cudaMemcpyHostToDevice);

    dim3 grid(file1.cols, file1.rows);
    imgProc << <grid,1  >> > (dev_input, dev_output);
    cudaMemcpy(output, dev_output, file1.cols*file1.rows * 3 * sizeof(char), cudaMemcpyDeviceToHost);

    Mat file3 =  Mat(file1.rows,file1.cols, CV_8UC3,output);
    namedWindow("Modified", CV_WINDOW_FREERATIO);
    imshow("Modified", file3);
    namedWindow("Original", CV_WINDOW_FREERATIO);
    imshow("Original", file1);

    cudaFree(dev_input);
    cudaFree(dev_output);
    free(output);


    waitKey(); 

    return 0;
}

以下是工作代码，它从图片中删除一个img频道：
#包括“opencv2\opencv.hpp”
#包括
#包括“cuda_runtime.h”
#包括“设备启动参数.h”
#包括
#包括
使用名称空间cv；
__全局无效imgProc（无符号字符*输入，无符号字符*输出）
{
int x=blockIdx.x；
int y=块idx.y；
int offset=x+y*gridDim.x；
out[偏移量*3+0]=0；
out[偏移量*3+1]=in[偏移量*3+1]；
out[偏移量*3+2]=in[偏移量*3+2]；
}
int main（）
{
cudaDeviceProp支柱；
CudaGetDeviceProperty（&prop，0）；
std:：cout（开发输入、开发输出）；
cudaMemcpy（输出，dev_输出，file1.cols*file1.rows*3*sizeof（char），cudaMemcpyDeviceToHost）；
Mat file3=Mat（file1.rows，file1.cols，CV_8UC3，输出）；
namedWindow（“修改”，CV\u窗口\u自由比率）；
imshow（“修改”，文件3）；
namedWindow（“原始”，CV\u窗口\u自由比率）；
imshow（“原件”，文件1）；
cudaFree（dev_输入）；
cudaFree（dev_输出）；
自由（输出）；
waitKey（）；
返回0；
}

好的，我明白了。内核中有一些错误，但最重要的是，我正在处理的图片的大小大于我的GPU上每个网格的最大线程数

Here is working code, which deletes one img channel from the picture:

#include "opencv2\opencv.hpp"
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <device_functions.h>


using namespace cv;


__global__ void imgProc(unsigned char *in, unsigned char * out)
{
    int x = blockIdx.x;
    int y = blockIdx.y;
    int offset = x + y * gridDim.x;
    out[offset*3+0] =0;
    out[offset * 3 + 1] = in[offset * 3 + 1];
    out[offset * 3 + 2] = in[offset * 3 + 2];


}

int main()
{
    cudaDeviceProp prop;
    cudaGetDeviceProperties(&prop, 0);
    std::cout << (int)prop.maxGridSize[1];


    Mat file1 = imread("sw.jpg", CV_LOAD_IMAGE_COLOR);  
    unsigned char *input = (unsigned char*)(file1.data);
    unsigned char *dev_input, *dev_output;
    unsigned char *output = (unsigned char*)malloc(file1.cols*file1.rows * 3 * sizeof(char));

    cudaMalloc((void**)&dev_input, file1.cols*file1.rows * 3 * sizeof(char));
    cudaMalloc((void**)&dev_output, file1.cols*file1.rows * 3 * sizeof(char));
    cudaMemcpy(dev_input, input, file1.cols*file1.rows * 3 * sizeof(char), cudaMemcpyHostToDevice);

    dim3 grid(file1.cols, file1.rows);
    imgProc << <grid,1  >> > (dev_input, dev_output);
    cudaMemcpy(output, dev_output, file1.cols*file1.rows * 3 * sizeof(char), cudaMemcpyDeviceToHost);

    Mat file3 =  Mat(file1.rows,file1.cols, CV_8UC3,output);
    namedWindow("Modified", CV_WINDOW_FREERATIO);
    imshow("Modified", file3);
    namedWindow("Original", CV_WINDOW_FREERATIO);
    imshow("Original", file1);

    cudaFree(dev_input);
    cudaFree(dev_output);
    free(output);


    waitKey(); 

    return 0;
}

以下是工作代码，它从图片中删除一个img频道：
#包括“opencv2\opencv.hpp”
#包括
#包括“cuda_runtime.h”
#包括“设备启动参数.h”
#包括
#包括
使用名称空间cv；
__全局无效imgProc（无符号字符*输入，无符号字符*输出）
{
int x=blockIdx.x；
int y=块idx.y；
int offset=x+y*gridDim.x；
out[偏移量*3+0]=0；
out[偏移量*3+1]=in[偏移量*3+1]；
out[偏移量*3+2]=in[偏移量*3+2]；
}
int main（）
{
cudaDeviceProp支柱；
CudaGetDeviceProperty（&prop，0）；
std:：cout（开发输入、开发输出）；
cudaMemcpy（输出，dev_输出，file1.cols*file1.rows*3*sizeof（char），cudaMemcpyDeviceToHost）；
Mat file3=Mat（file1.rows，file1.cols，CV_8UC3，输出）；
namedWindow（“修改”，CV\u窗口\u自由比率）；
imshow（“修改”，文件3）；
namedWindow（“原始”，CV\u窗口\u自由比率）；
imshow（“原件”，文件1）；
cudaFree（dev_输入）；
cudaFree（dev_输出）；
自由（输出）；
waitKey（）；
返回0；
}

我知道，我不想让它简单化，我想把它和CUDA结合起来练习我知道，我不想让它简单化，我想把它和CUDA结合起来练习，我在你的代码中看不到任何错误检查。你确定没有运行时错误吗？你使用的图像尺寸是多少？好的，我已经计算出来了。我事实证明，cols数大于我在GPU上的最大线程数；p@KarolŻurowski你真的不应该更新你的问题。你可以回答你自己的问题并接受。那会更有用。@GPPK这似乎是合理的，我已经关注了你的广告我在代码中的任何地方都没有看到错误。您确定没有运行时错误吗？您使用的图像尺寸是多少？好的，我已经计算出来了。事实证明，cols数大于我在GPU上的最大线程数。我会在我的问题中粘贴正确的代码，这样可能会帮助别人；p@KarolŻurowski你真的不应该更新你的问题。不过，你可以回答自己的问题并接受它。这将非常有用。@GPPK这似乎是合理的，我已经按照您的建议进行了：）将每个块的线程数设置为1将浪费大约97%的GPU计算能力。您可能想阅读这个内核调用签名是

imgProc>（dev_输入，dev_输出）将每个块的线程数设置为1会浪费大约97%的GPU计算能力。您可能想阅读这个内核调用签名是imgProc>（dev_输入，dev_输出）