openCL错误结果
谁能解释一下为什么会这样? 我认为这会增加我数组的值openCL错误结果,opencl,Opencl,谁能解释一下为什么会这样? 我认为这会增加我数组的值 #include <iostream> #pragma comment(lib, "OpenCL.lib") #include <CL/cl.h> const std::string source_str = R"( __kernel void add(__global int* c) { int i = get_global_id(0); c[i]=c[i]+1;
#include <iostream>
#pragma comment(lib, "OpenCL.lib")
#include <CL/cl.h>
const std::string source_str = R"(
__kernel void add(__global int* c) {
int i = get_global_id(0);
c[i]=c[i]+1;
})";
size_t source_size = source_str.length();
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
cl_context context;
cl_command_queue command_queue;
cl_mem a_mem_obj;
cl_program program;
cl_kernel kernel;
int* a;
#define SIZE 100
// ## You may add your own initialization routines here ##
void init() {
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
if (ret != CL_SUCCESS)
std::cout << ret << 1;
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1,
&device_id, &ret_num_devices);
if (ret != CL_SUCCESS)
std::cout << ret << 2;
// Create an OpenCL context
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
if (ret != CL_SUCCESS)
std::cout << ret << 3;
// Create a command queue
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
if (ret != CL_SUCCESS)
std::cout << ret << 4;
a_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
SIZE * sizeof(int), NULL, &ret);
if (ret != CL_SUCCESS)
std::cout << ret << 6;
// Create a program from the kernel source
program = clCreateProgramWithSource(context, 1,
(const char**)&source_str, (const size_t*)&source_size, &ret);
if (ret != CL_SUCCESS)
std::cout << ret << 9;
// Build the program
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS)
std::cout << ret << 10;
// Create the OpenCL kernel
kernel = clCreateKernel(program, "add", &ret);
if (ret != CL_SUCCESS)
std::cout << ret << 11;
// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&a_mem_obj);
if (ret != CL_SUCCESS)
std::cout << ret << 13;
}
void KernelStart() {
// Copy to the memory buffers
ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
SIZE * sizeof(int), a, 0, NULL, NULL);
if (ret != CL_SUCCESS)
std::cout << ret << 7;
// Execute the OpenCL kernel on the list
size_t static global_item_size = SIZE; // Process the entire lists
size_t static local_item_size = 64; // Divide work items into groups of 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
if (ret != CL_SUCCESS)
std::cout << ret << 14;
ret = clEnqueueReadBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
SIZE * sizeof(int), a, 0, NULL, NULL);
if (ret != CL_SUCCESS)
std::cout << ret << 15;
}
int main() {
a = new int[SIZE];
for (size_t i = 0; i < SIZE; i++)
{
a[i] = 1;
}
for (size_t i = 0; i < SIZE; i++)
{
std::cout << a[i];
}
std::cout << std::endl;
init();
KernelStart();
for (size_t i = 0; i < SIZE; i++)
{
std::cout << a[i];
}
}
#包括
#pragma注释(lib,“OpenCL.lib”)
#包括
常量std::字符串源\u str=R“(
__内核void add(uu全局int*c){
int i=获取全局id(0);
c[i]=c[i]+1;
})";
size\u t source\u size=source\u str.length();
cl_平台_id平台_id=NULL;
cl\U设备\U id设备\U id=NULL;
氯离子交换装置;
cl_uint ret_num_平台;
cl_int ret;
语境;
cl_命令_队列命令_队列;
cl_mem a_mem_obj;
CLU计划;
cl_核;
int*a;
#定义大小100
//##您可以在此处添加自己的初始化例程##
void init(){
ret=clGetPlatformIDs(1,&platform\u id,&ret\u num\u platforms);
if(ret!=CL_SUCCESS)
std::cout您的日志已经向您显示了正在发生的事情,尽管不是特别容易理解
第一个问题
您的程序输出:
-4913
相应的代码:
// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&a_mem_obj);
if (ret != CL_SUCCESS)
std::cout << ret << 13;
-54
对应于CL\u无效\u工作组\u大小
有:
CL\u无效的工作组大小
如果指定了本地工作组大小,并且全局工作组大小指定的工作项数量不能平均除以本地工作组大小指定的工作组大小,或者与使用\u属性为内核指定的工作组大小不匹配((reqd\u工作组大小(X,Y,Z)))
程序源中的限定符
CL\u无效的工作组大小
如果指定了本地工作组大小,并且工作组中的工作项总数计算为local\u WORK\u SIZE[0]*…本地工作组大小[WORK\u dim-1]
大于OpenCL设备查询clGetDeviceInfo
表中的CL\u设备\u最大工作\u组\u大小
指定的值
CL\u无效的工作组大小
如果local\u WORK\u SIZE
为NULL
并且\u属性((reqd\u工作组大小(X,Y,Z))
限定符用于声明程序源中内核的工作组大小
您的本地大小是64,全局大小是100。这意味着您遇到了第一个条件:您需要确保您的全局大小是本地大小的整数倍。在两个函数init()和KernelStart()之前和之后添加一些打印;以便隔离这两个函数的输出。
#define SIZE 100
…
// Execute the OpenCL kernel on the list
size_t static global_item_size = SIZE; // Process the entire lists
size_t static local_item_size = 64; // Divide work items into groups of 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
if (ret != CL_SUCCESS)
std::cout << ret << 14;