openCL错误结果

openCL错误结果,opencl,Opencl,谁能解释一下为什么会这样? 我认为这会增加我数组的值 #include <iostream> #pragma comment(lib, "OpenCL.lib") #include <CL/cl.h> const std::string source_str = R"( __kernel void add(__global int* c) { int i = get_global_id(0); c[i]=c[i]+1;

谁能解释一下为什么会这样? 我认为这会增加我数组的值

#include <iostream>
#pragma comment(lib, "OpenCL.lib")
#include <CL/cl.h>

const std::string source_str = R"(
__kernel void add(__global int* c) {

    int i = get_global_id(0);

    c[i]=c[i]+1;
})";

size_t source_size = source_str.length();
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
cl_context context;
cl_command_queue command_queue;
cl_mem a_mem_obj;
cl_program program;
cl_kernel kernel;
int* a;
#define SIZE 100

// ## You may add your own initialization routines here ##
void init() {
    ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    if (ret != CL_SUCCESS)
        std::cout << ret << 1;
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1,
        &device_id, &ret_num_devices);

    if (ret != CL_SUCCESS)
        std::cout << ret << 2;

    // Create an OpenCL context
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
    if (ret != CL_SUCCESS)
        std::cout << ret << 3;

    // Create a command queue
    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    if (ret != CL_SUCCESS)
        std::cout << ret << 4;

    a_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
        SIZE * sizeof(int), NULL, &ret);
    if (ret != CL_SUCCESS)
        std::cout << ret << 6;

    // Create a program from the kernel source
    program = clCreateProgramWithSource(context, 1,
        (const char**)&source_str, (const size_t*)&source_size, &ret);
    if (ret != CL_SUCCESS)
        std::cout << ret << 9;

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    if (ret != CL_SUCCESS)
        std::cout << ret << 10;

    // Create the OpenCL kernel
    kernel = clCreateKernel(program, "add", &ret);
    if (ret != CL_SUCCESS)
        std::cout << ret << 11;

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&a_mem_obj);
    if (ret != CL_SUCCESS)
        std::cout << ret << 13;
}

void KernelStart() {

    // Copy to the memory buffers
    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
        SIZE * sizeof(int), a, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
        std::cout << ret << 7;

    // Execute the OpenCL kernel on the list
    size_t static global_item_size = SIZE; // Process the entire lists
    size_t static local_item_size = 64; // Divide work items into groups of 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
        &global_item_size, &local_item_size, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
        std::cout << ret << 14;

    ret = clEnqueueReadBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
        SIZE * sizeof(int), a, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
        std::cout << ret << 15;
}

int main() {
    a = new int[SIZE];
    for (size_t i = 0; i < SIZE; i++)
    {
        a[i] = 1;
    }
    for (size_t i = 0; i < SIZE; i++)
    {
        std::cout << a[i];
    }
    std::cout << std::endl;
    init();
    KernelStart();
    for (size_t i = 0; i < SIZE; i++)
    {
        std::cout << a[i];
    }
}
#包括
#pragma注释(lib,“OpenCL.lib”)
#包括
常量std::字符串源\u str=R“(
__内核void add(uu全局int*c){
int i=获取全局id(0);
c[i]=c[i]+1;
})";
size\u t source\u size=source\u str.length();
cl_平台_id平台_id=NULL;
cl\U设备\U id设备\U id=NULL;
氯离子交换装置;
cl_uint ret_num_平台;
cl_int ret;
语境;
cl_命令_队列命令_队列;
cl_mem a_mem_obj;
CLU计划;
cl_核;
int*a;
#定义大小100
//##您可以在此处添加自己的初始化例程##
void init(){
ret=clGetPlatformIDs(1,&platform\u id,&ret\u num\u platforms);
if(ret!=CL_SUCCESS)

std::cout您的日志已经向您显示了正在发生的事情,尽管不是特别容易理解

第一个问题 您的程序输出:

-4913

相应的代码:

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&a_mem_obj);
    if (ret != CL_SUCCESS)
        std::cout << ret << 13;
-54
对应于
CL\u无效\u工作组\u大小

有:

  • CL\u无效的工作组大小
    如果指定了本地工作组大小,并且全局工作组大小指定的工作项数量不能平均除以本地工作组大小指定的工作组大小,或者与使用
    \u属性为内核指定的工作组大小不匹配((reqd\u工作组大小(X,Y,Z)))
    程序源中的限定符
  • CL\u无效的工作组大小
    如果指定了本地工作组大小,并且工作组中的工作项总数计算为
    local\u WORK\u SIZE[0]*…本地工作组大小[WORK\u dim-1]
    大于OpenCL设备查询
    clGetDeviceInfo
    表中的
    CL\u设备\u最大工作\u组\u大小
    指定的值
  • CL\u无效的工作组大小
    如果
    local\u WORK\u SIZE
    NULL
    并且
    \u属性((reqd\u工作组大小(X,Y,Z))
    限定符用于声明程序源中内核的工作组大小

您的本地大小是64,全局大小是100。这意味着您遇到了第一个条件:您需要确保您的全局大小是本地大小的整数倍。

在两个函数init()和KernelStart()之前和之后添加一些打印;以便隔离这两个函数的输出。
#define SIZE 100

…

    // Execute the OpenCL kernel on the list
    size_t static global_item_size = SIZE; // Process the entire lists
    size_t static local_item_size = 64; // Divide work items into groups of 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
        &global_item_size, &local_item_size, 0, NULL, NULL);
    if (ret != CL_SUCCESS)
        std::cout << ret << 14;