C++ OpenCL只读取/写入缓冲区内存的1/4,有时会崩溃
OpenCL有一个问题,它执行整个命令队列,但它只读取输入的1/4,只写入结果的1/4。 无论迭代多少次,始终为1/4 而且它有时会随机崩溃。在调试过程中,我得不到任何信息,因为没有调试符号,它会崩溃(0x4c4783f6 in???,等等) 源代码:C++ OpenCL只读取/写入缓冲区内存的1/4,有时会崩溃,c++,c,opencl,intel,gpgpu,C++,C,Opencl,Intel,Gpgpu,OpenCL有一个问题,它执行整个命令队列,但它只读取输入的1/4,只写入结果的1/4。 无论迭代多少次,始终为1/4 而且它有时会随机崩溃。在调试过程中,我得不到任何信息,因为没有调试符号,它会崩溃(0x4c4783f6 in???,等等) 源代码: #include <iostream> #include <cl/cl.h> #include <cassert> #include <cstring> const char *progsrc[]
#include <iostream>
#include <cl/cl.h>
#include <cassert>
#include <cstring>
const char *progsrc[] = {
"#pragma OPENCL EXTENSION cl_intel_printf : enable\n\
__kernel void add(__global const int *a, __global const int *b, __global int *out) \
{ \
int tid = get_global_id(0);\
out[tid] = tid/*a[tid]+b[tid]*/;\
printf(\"krnl: %d = %d + %d \\n\", out[tid], a[tid], b[tid]);\
}"};
const int iterations = 20;
#define CLCheck(a) \
do\
{\
if(a != CL_SUCCESS)\
{\
std::cerr << "OpenCL Error(" << a << ") at " << __LINE__ << std::endl;\
return -1;\
}\
} while(0)
int main()
{
cl_int err = CL_SUCCESS;
int *aH = NULL;
int *bH = NULL;
int *outH = NULL;
cl_uint platnum, devnum;
cl_device_id dev;
cl_platform_id plat;
err = clGetPlatformIDs(0, 0, &platnum);
CLCheck(err);
cl_platform_id pfids[platnum];
err = clGetPlatformIDs(platnum, pfids, &platnum);
CLCheck(err);
if(!platnum)
{
std::cerr << "No platform found." << std::endl;
return -1;
}
else
std::cout << platnum << " OpenCL platform(s) found.\n" << std::endl;
for(unsigned int i = 0; i != platnum; i++)
{
char buf[4096];
err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, 0, 0, &devnum);
CLCheck(err);
cl_device_id devids[devnum];
err = clGetDeviceIDs(pfids[i], CL_DEVICE_TYPE_ALL, devnum, devids, &devnum);
CLCheck(err);
if(!devnum)
{
std::cerr << "No device found." << std::endl;
return -1;
}
else
std::cout << " " << devnum << " OpenCL device(s) found.\n" << std::endl;
for(unsigned int i2 = 0; i2 != devnum; i2++)
{
char buf[1024];
std::cout << ": \n\tName: " << buf;
err = clGetDeviceInfo(devids[i2], CL_DEVICE_VENDOR, 1024, buf, NULL);
CLCheck(err);
if(!strncmp(buf, "Intel", 5))
{
dev = devids[0];
plat = pfids[i];
std::cout << "\n\tFound Intel(R) OpenCL device.";
}
}
}
cl_context_properties ctxprop[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)plat, 0};
cl_context ctx = clCreateContext(ctxprop, 1, &dev, NULL, NULL, &err);
CLCheck(err);
cl_program program = clCreateProgramWithSource(ctx, 1, progsrc, NULL, &err);
CLCheck(err);
err = clBuildProgram(program, 1, &dev, "", NULL, NULL);
if(err != CL_SUCCESS)
{
size_t bufsz;
err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0, 0, &bufsz);
char buf[bufsz];
err = clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, bufsz, buf, &bufsz);
std::cerr << "OpenCL program building failed: " << buf << std::endl;
return -1;
}
err = clUnloadCompiler();
CLCheck(err);
aH = new int[iterations];
bH = new int[iterations];
outH = new int[iterations];
memset(outH, 0, iterations*sizeof(int));
for(int i = 0; i != iterations; i++)
{
aH[i] = i;
bH[i] = i*2;
}
cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
CLCheck(err);
cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err);
CLCheck(err);
cl_kernel krnl = clCreateKernel(program, "add", &err);
CLCheck(err);
err = clSetKernelArg(krnl, 0, sizeof(aCL), &aCL);
CLCheck(err);
err = clSetKernelArg(krnl, 1, sizeof(bCL), &bCL);
CLCheck(err);
err = clSetKernelArg(krnl, 2, sizeof(outCL), &outCL);
CLCheck(err);
cl_command_queue cmdqueue = clCreateCommandQueue(ctx, dev, 0, &err);
cl_event evt;
size_t global_work_size[1] = { iterations };
err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL);
err = clEnqueueWriteBuffer(cmdqueue, bCL, CL_TRUE, 0, iterations, bH, 0, NULL, NULL);
err = clEnqueueNDRangeKernel(cmdqueue, krnl, 1, NULL, global_work_size, NULL, 0, NULL, &evt);
err = clWaitForEvents(1, &evt);
err = clEnqueueReadBuffer(cmdqueue, outCL, CL_TRUE, 0, iterations, outH, 0, NULL, &evt);
for(int i = 0; i != iterations; i++)
{
std::cout << outH[i] << std::endl;
}
err = clReleaseEvent(evt);
err = clReleaseCommandQueue(cmdqueue);
err = clReleaseKernel(krnl);
err = clReleaseMemObject(outCL);
err = clReleaseMemObject(bCL);
err = clReleaseMemObject(aCL);
err = clReleaseProgram(program);
err = clReleaseContext(ctx);
if(aH)
delete aH;
if(bH)
delete bH;
if(outH)
delete outH;
return 0;
}
谢谢:)我不熟悉openCL,但我认为您缺少一些
大小的,如下所示:
err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations, aH, 0, NULL, NULL);
应该是:
err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations * sizeof(int), aH, 0, NULL, NULL);
这同样适用于以下类似的代码
编辑:
这里还有一个地方,您可能错过了一些sizeof()
s:
哦,非常感谢……我怎么能成为一个有经验的C++程序员(多年)没有看到这一点。在C++中,你不必在空洞的指针上耍花招,这就是为什么;
err = clEnqueueWriteBuffer(cmdqueue, aCL, CL_TRUE, 0, iterations * sizeof(int), aH, 0, NULL, NULL);
cl_mem aCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
cl_mem bCL = clCreateBuffer(ctx, CL_MEM_READ_ONLY, iterations, NULL, &err);
CLCheck(err);
cl_mem outCL = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, iterations, NULL, &err);
CLCheck(err);