OpenCL:带高斯滤波器的模糊图像,高斯模糊

OpenCL:带高斯滤波器的模糊图像,高斯模糊,opencl,gaussian,Opencl,Gaussian,我计算了n=5的高斯滤波器掩码nxn。看起来是这样的: int gauss5[] = { 1, 4, 7, 4, 1, 4, 20, 33, 20, 4, 7, 33, 55, 33, 7, 4, 20, 33, 20, 4, 1, 4, 7, 4, 1 }; int gauss5_summ = 331; 经过计算,opencl中的图像变得更暗。原因是什么 size_t global_wblur[2]; size_t local_wblur[2]

我计算了n=5的高斯滤波器掩码nxn。看起来是这样的:

int gauss5[] = { 
    1, 4, 7, 4, 1,  
    4, 20, 33, 20, 4,
    7, 33, 55, 33, 7,
    4, 20, 33, 20, 4,
    1, 4, 7, 4, 1
};

int gauss5_summ = 331;
经过计算,opencl中的图像变得更暗。原因是什么

size_t global_wblur[2];
size_t local_wblur[2];

global_wblur[0] = h;
global_wblur[1] = w;

local_wblur[0] = local_wblur[1] = 32;

err = clEnqueueNDRangeKernel(queue, cl_img_gaussian_blur, 2, NULL, global_wblur, local_wblur, 0, NULL, NULL);

if (err != CL_SUCCESS) {
   fprintf(stderr, "error: clEnqueueNDRangeKernel() blur %d %s\n", err, cl_strerror(err));
   exit(EXIT_FAILURE);
}
内核源:

 __kernel void cl_img_gaussian_blur(__global const uchar *gray, __global uchar *out, __global const uchar *gbox, uint n, uint sum, uint w, uint h)
{
    int i, j, offset;
    uint x, y, summ;

    y = get_global_id(0);
    x = get_global_id(1);

    offset = n/2;

    /* ignore border pixels 
     */
    if (y - offset < 0 || y + offset > h || x - offset < 0 || x + offset > w) {
        out[y*w + x] = gray[y*w + x];
        return;
    }

    summ = 0;

    for (j = -offset; j <= offset; j++) {
        for (i = -offset; i <= offset; i++) {
            summ += gray[(y + j)*w + x + i]*gbox[(j + offset)*n + i + offset];
        }
    }

    out[y*w + x] = summ/sum;
}
\uuuuuu内核无效cl\u img\u高斯模糊(\uuuu全局常量uchar*gray,\uuuu全局常量uchar*out,\uuuu全局常量uchar*gbox,uint n,uint sum,uint w,uint h)
{
int i,j,偏移量;
单位x,y,总和;
y=获取全局id(0);
x=获取全局id(1);
偏移量=n/2;
/*忽略边框像素
*/
如果(y-偏移量<0 | | y+偏移量>h | | x-偏移量<0 | | x+偏移量>w){
out[y*w+x]=灰色[y*w+x];
返回;
}
总和=0;

对于(j=-offset;j您说过gbox数据的初始化如下:

clEnqueueWriteBuffer(queue, gauss_buf, CL_FALSE, 0, 5*5, gauss5, 0, NULL, NULL);
这是错误的,因为您正在复制实际内存量的1/4。正确的方法是:

clEnqueueWriteBuffer(queue, gauss_buf, CL_FALSE, 0, 5*5*sizeof(cl_int), gauss5, 0, NULL, NULL);

否则,其余部分为0,导致输出值较低。

您发布的内核似乎没有使用
gauss5
gauss5\u sum
。我将高斯掩码作为缓冲区传递给内核clEnqueueWriteBuffer(queue,gauss\u buf,CL\u FALSE,0,5*5,gauss5,0,NULL,NULL);我只是硬编码了一些值。框值的总和是正确的。它在C中运行良好。