Vector 在OpenCL矢量加法程序中，内存对象和CL_无效_平台错误_Vector_Parallel Processing_Opencl

Vector 在OpenCL矢量加法程序中，内存对象和CL_无效_平台错误

vector parallel-processing opencl

Vector 在OpenCL矢量加法程序中，内存对象和CL_无效_平台错误,vector,parallel-processing,opencl,Vector,Parallel Processing,Opencl,在下面的OpenCL矢量加法程序中，将clCreateBuffer和clEnqueueWriteBuffer组合成一个命令给出以下错误：无法将赋值/参数中的“\u cl\u mem*”转换为“\u cl\u mem**” 在使用clCreateBuffer和CLreleaseMoobject的所有地方都会出现相同的错误。有人能帮我解决这个错误吗？提前谢谢 #include <wb.h> //@@ wb include opencl.h for you #include <ma

在下面的OpenCL矢量加法程序中，将clCreateBuffer和clEnqueueWriteBuffer组合成一个命令

给出以下错误：无法将赋值/参数中的“\u cl\u mem*”转换为“\u cl\u mem**”

在使用clCreateBuffer和CLreleaseMoobject的所有地方都会出现相同的错误。有人能帮我解决这个错误吗？提前谢谢

#include <wb.h> //@@ wb include opencl.h for you
#include <math.h>
//@@ OpenCL Kernel
const char* vaddsrc ="__kernel void vadd(__global const float *a,__global const float *b,__global float *result){int id = get_global_id(0);result[id] = a[id] + b[id];}";

int main(int argc, char **argv) 
{

    unsigned int VECTOR_SIZE = 1024;
    int size = VECTOR_SIZE* sizeof(float);
    wbArg_t args;
    int inputLength;
    float *hostInput1;
    float *hostInput2;
    float *hostOutput;
    cl_mem *deviceInput1;
    cl_mem *deviceInput2;
    cl_mem *deviceOutput;

  args = wbArg_read(argc, argv);
  wbTime_start(Generic, "Importing data and creating memory on host");
  hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
  hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
  hostOutput = (float *)malloc(inputLength * sizeof(float));
   wbTime_stop(Generic, "Importing data and creating memory on host");
  wbLog(TRACE, "The input length is ", inputLength);


  //@@ OpenCL Context Setup Code (simple)
        size_t parmsz;
        cl_int clerr;
        cl_context clctx;
        cl_command_queue clcmdq;
        cl_program clpgm;
        cl_kernel clkern;

   clerr = CL_SUCCESS;
   clctx = clCreateContextFromType(0,CL_DEVICE_TYPE_ALL, NULL, NULL, &clerr);
   clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0,NULL, & parmsz);
   cl_device_id* cldevs = (cl_device_id *) malloc( parmsz);
   clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, parmsz,cldevs, NULL);
   clcmdq = clCreateCommandQueue(clctx,cldevs[0], 0, &clerr);
   clpgm = clCreateProgramWithSource(clctx, 1, &vaddsrc,NULL, &clerr);
   char clcompileflags[4096];
   sprintf(clcompileflags, "-cl-mad-enable");
   clerr = clBuildProgram(clpgm, 0, NULL, clcompileflags,NULL, NULL);
   clkern = clCreateKernel(clpgm, "vadd", &clerr); 
  //@@ OpenCL Context Setup Code (simple)


  wbTime_start(GPU, "Allocating GPU memory.Copying input memory to the GPU.");
  //@@ Allocate GPU memory here Copy memory to the GPU here
  deviceInput1 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR,             inputLength *sizeof(float), hostInput1, NULL);
  deviceInput2 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR, inputLength *sizeof(float), hostInput2, NULL);
  deviceOutput = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY,inputLength *sizeof(float), NULL, NULL);  
  wbTime_stop(GPU, "Allocating GPU memory.Copying input memory to the GPU.");



  //@@ Initialize the grid and block dimensions here
size_t globalSize, localSize;
localSize = 64;
globalSize = ceil(inputLength/(float)localSize)*localSize;




  wbTime_start(Compute, "Performing CUDA computation");
  //@@ Launch the GPU Kernel here
clerr= clSetKernelArg(clkern, 0, sizeof(cl_mem),(void *)&deviceInput1);
clerr= clSetKernelArg(clkern, 1, sizeof(cl_mem),(void *)&deviceInput2);
clerr= clSetKernelArg(clkern, 2, sizeof(cl_mem),(void *)&deviceOutput);
clerr= clSetKernelArg(clkern, 3, sizeof(int), &inputLength);
 wbTime_stop(Compute, "Performing CUDA computation");


wbTime_start(Copy, "Copying output memory to the CPU");
//@@ Copy the GPU memory back to the CPU here
cl_event event=NULL;
clerr= clEnqueueNDRangeKernel(clcmdq, clkern, 1, NULL, &globalSize, &localSize, 0, NULL, &event);
clerr= clWaitForEvents(1, &event);
clEnqueueReadBuffer(clcmdq, deviceOutput, CL_TRUE, 0,inputLength*sizeof(float), hostOutput, 0, NULL, NULL);
wbTime_stop(Copy, "Copying output memory to the CPU");


wbTime_start(GPU, "Freeing GPU Memory");
//@@ Free the GPU memory here
clReleaseMemObject(deviceInput1);
clReleaseMemObject(deviceInput2);
clReleaseMemObject(deviceOutput);
wbTime_stop(GPU, "Freeing GPU Memory");
wbSolution(args, hostOutput, inputLength);
free(hostInput1);
free(hostInput2);
free(hostOutput);

  return 0;
}

#include/@@wb为您包含opencl.h
#包括
//@@OpenCL内核
const char*vaddsrc=“uuu内核void vadd（uuu全局常量float*a，uuu全局常量float*b，uuu全局float*result）{int id=get_全局\u id（0）；result[id]=a[id]+b[id]；}”；
int main（int argc，字符**argv）
{
无符号整数向量_SIZE=1024；
int size=VECTOR_size*sizeof（float）；
wbArg_t args；
int输入长度；
浮点*主机输入1；
浮点*主机输入2；
浮点*输出；
cl_mem*设备输入1；
cl_mem*设备输入2；
cl_mem*设备输出；
args=wbArg_读取（argc，argv）；
wbTime_start（通用，“导入数据并在主机上创建内存”）；
hostInput1=（float*）wbImport（wbArg_getInputFile（args，0），&inputLength）；
hostInput2=（float*）wbImport（wbArg_getInputFile（args，1），&inputLength）；
hostOutput=（float*）malloc（inputLength*sizeof（float））；
wbTime_stop（通用，“在主机上导入数据并创建内存”）；
wbLog（跟踪，“输入长度为”，输入长度）；
//@@OpenCL上下文设置代码（简单）
尺寸(每平方米);；
国际文书主任；
cl_上下文clctx；
cl_命令_队列clcmdq；
cl_计划clpgm；
cl_内核clkern；
clerr=成功；
clctx=clCreateContextFromType（0，CL_设备_类型_全部，NULL，NULL和clerr）；
clerr=clGetContextInfo（clctx、CL_上下文_设备、0、NULL和parmsz）；
cl_设备id*cldevs=（cl_设备id*）malloc（parmsz）；
clerr=clGetContextInfo（clctx、CLU上下文设备、parmsz、cldevs、NULL）；
clcmdq=clCreateCommandQueue（clctx、cldevs[0]、0和clerr）；
clpgm=clCreateProgramWithSource（clctx、1和vaddsrc、NULL和clerr）；
char-clcompileflags[4096]；
sprintf（clcompileflags，“-cl mad enable”）；
clerr=clBuildProgram（clpgm，0，NULL，clcompileflags，NULL，NULL）；
clkern=clCreateKernel（clpgm，“vadd”和cler）；
//@@OpenCL上下文设置代码（简单）
wbTime_start（GPU，“分配GPU内存。将输入内存复制到GPU”）；
//@@在此处分配GPU内存将内存复制到此处的GPU
deviceInput1=clCreateBuffer（clctx，CL_MEM_READ_ONLY，CL_MEM_COPY_HOST_PTR，inputLength*sizeof（float），hostInput1，NULL）；
deviceInput2=clCreateBuffer（clctx，CL_MEM_READ_ONLY，CL_MEM_COPY_HOST_PTR，inputLength*sizeof（float），hostInput2，NULL）；
deviceOutput=clCreateBuffer（clctx，CLU MEM_WRITE_ONLY，inputLength*sizeof（float），NULL，NULL）；
wbTime_stop（GPU，“分配GPU内存。将输入内存复制到GPU”）；
//@@在此处初始化栅格和块尺寸
大小\u t全局大小、本地大小；
localSize=64；
globalSize=ceil（inputLength/（float）localSize）*localSize；
wbTime_开始（计算，“执行CUDA计算”）；
//@@在这里启动GPU内核
clerr=clSetKernelArg（clkern，0，sizeof（cl_mem），（void*）和设备输入1）；
clerr=clSetKernelArg（clkern，1，sizeof（cl_mem），（void*）和设备输入2）；
clerr=CLSETKERNELAG（clkern，2，sizeof（cl_mem），（void*）和设备输出）；
clerr=clSetKernelArg（clkern，3，sizeof（int），&inputLength）；
wbTime_停止（计算，“执行CUDA计算”）；
wbTime_启动（复制，“将输出内存复制到CPU”）；
//@@将GPU内存复制回此处的CPU
cl_事件=空；
cler=clenqueendrangekernel（clcmdq、clkern、1、NULL、&globalSize、&localSize、0、NULL、&event）；
clerr=clWaitForEvents（1，事件和事件）；
clEnqueueReadBuffer（clcmdq，deviceOutput，CL_TRUE，0，inputLength*sizeof（float），hostOutput，0，NULL，NULL）；
wbTime_stop（复制，“将输出内存复制到CPU”）；
wbTime_启动（GPU，“释放GPU内存”）；
//@@在此释放GPU内存
CLreleaseMoObject（设备输入1）；
CLreleaseMoObject（设备输入2）；
CLRELEASEMOBJECT（设备输出）；
wbTime_停止（GPU，“释放GPU内存”）；
wbSolution（参数、主机输出、输入长度）；
免费（主机输入1）；
免费（主机输入2）；
免费（主机输出）；
返回0；
}

您需要使用cl\u mem作为设备缓冲区的数据类型，而不是cl\u mem* 因为cl_mem type是“内存对象”的句柄，所以它不应该是指针

删除以下行中的*

 cl_mem *deviceInput1;
 cl_mem *deviceInput2;
 cl_mem *deviceOutput;

编辑： 您不应该在clCreateContextFromType的第一个参数中使用0，因为您将得到本文中提到的CL_INVALID_PLATFORM错误

我在下面给出了固定代码：

#include <wb.h> //@@ wb include opencl.h for you
#include <math.h>
//@@ OpenCL Kernel
const char* vaddsrc ="__kernel void vadd(__global const float *a,__global const float *b,__global float *result){int id = get_global_id(0);result[id] = a[id] + b[id];}";

int main(int argc, char **argv) 
{

    unsigned int VECTOR_SIZE = 1024;
    int size = VECTOR_SIZE* sizeof(float);
    wbArg_t args;
    int inputLength = VECTOR_SIZE;
    float *hostInput1;
    float *hostInput2;
    float *hostOutput;
    cl_mem deviceInput1;
    cl_mem deviceInput2;
    cl_mem deviceOutput;

  args = wbArg_read(argc, argv);
  wbTime_start(Generic, "Importing data and creating memory on host");
  hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
  hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
  hostOutput = (float *)malloc(inputLength * sizeof(float));
   wbTime_stop(Generic, "Importing data and creating memory on host");
  wbLog(TRACE, "The input length is ", inputLength);


  //@@ OpenCL Context Setup Code (simple)
        size_t parmsz;
        cl_int clerr;
        cl_context clctx;
        cl_command_queue clcmdq;
        cl_program clpgm;
        cl_kernel clkern;

// query the number of platforms
   cl_uint numPlatforms;
   clerr = clGetPlatformIDs(0, NULL, &numPlatforms);
   cl_platform_id platforms[numPlatforms];
   clerr = clGetPlatformIDs(numPlatforms, platforms, NULL);
   cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (unsigned long)platforms[0], 0};
   clctx = clCreateContextFromType(properties,CL_DEVICE_TYPE_ALL, NULL, NULL, &clerr);
    clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0,NULL, & parmsz);
   cl_device_id* cldevs = (cl_device_id *) malloc( parmsz);
   clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, parmsz,cldevs, NULL);
   clcmdq = clCreateCommandQueue(clctx,cldevs[0], 0, &clerr);
   clpgm = clCreateProgramWithSource(clctx, 1, &vaddsrc,NULL, &clerr);
   char clcompileflags[4096];
   sprintf(clcompileflags, "-cl-mad-enable");
   clerr = clBuildProgram(clpgm, 0, NULL, clcompileflags,NULL, NULL);
   clkern = clCreateKernel(clpgm, "vadd", &clerr); 
  //@@ OpenCL Context Setup Code (simple)


  wbTime_start(GPU, "Allocating GPU memory.Copying input memory to the GPU.");
  //@@ Allocate GPU memory here Copy memory to the GPU here
  deviceInput1 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR,inputLength *sizeof(float), hostInput1, NULL);
  deviceInput2 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR, inputLength *sizeof(float), hostInput2, NULL);
  deviceOutput = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY,inputLength *sizeof(float), NULL, NULL);  
  wbTime_stop(GPU, "Allocating GPU memory.Copying input memory to the GPU.");



  //@@ Initialize the grid and block dimensions here
size_t globalSize, localSize;
localSize = 64;
globalSize = ceil(inputLength/(float)localSize)*localSize;




  wbTime_start(Compute, "Performing CUDA computation");
  //@@ Launch the GPU Kernel here
clerr= clSetKernelArg(clkern, 0, sizeof(cl_mem),(void *)&deviceInput1);
clerr= clSetKernelArg(clkern, 1, sizeof(cl_mem),(void *)&deviceInput2);
clerr= clSetKernelArg(clkern, 2, sizeof(cl_mem),(void *)&deviceOutput);
clerr= clSetKernelArg(clkern, 3, sizeof(int), &inputLength);
 wbTime_stop(Compute, "Performing CUDA computation");


wbTime_start(Copy, "Copying output memory to the CPU");
//@@ Copy the GPU memory back to the CPU here
cl_event event=NULL;
clerr= clEnqueueNDRangeKernel(clcmdq, clkern, 1, NULL, &globalSize, &localSize, 0, NULL, &event);
clerr= clWaitForEvents(1, &event);
clerr= clEnqueueReadBuffer(clcmdq, deviceOutput, CL_TRUE, 0,inputLength*sizeof(float), hostOutput, 0, NULL, NULL);
wbTime_stop(Copy, "Copying output memory to the CPU");


wbTime_start(GPU, "Freeing GPU Memory");
//@@ Free the GPU memory here
clReleaseMemObject(deviceInput1);
clReleaseMemObject(deviceInput2);
clReleaseMemObject(deviceOutput);
wbTime_stop(GPU, "Freeing GPU Memory");
wbSolution(args, hostOutput, inputLength);
free(hostInput1);
free(hostInput2);
free(hostOutput);

  return 0;
}

#include/@@wb为您包含opencl.h
#包括
//@@OpenCL内核
const char*vaddsrc=“uuu内核void vadd（uuu全局常量float*a，uuu全局常量float*b，uuu全局float*result）{int id=get_全局\u id（0）；result[id]=a[id]+b[id]；}”；
int main（int argc，字符**argv）
{
无符号整数向量_SIZE=1024；
int size=VECTOR_size*sizeof（float）；
wbArg_t args；
int inputLength=向量大小；
浮点*主机输入1；
浮点*主机输入2；
浮点*输出；
cl_mem deviceInput1；
cl_mem deviceInput2；
cl_mem设备输出；
args=wbArg_读取（argc，argv）；
wbTime_start（通用，“导入数据并在主机上创建内存”）；
hostInput1=（float*）wbImport（wbArg_getInputFile（args，0），&inputLength）；
hostInput2=（float*）wbImport（wbArg_getInputFile（args，1），&inputLength）；
hostOutput=（float*）malloc（inputLength*sizeof（float））；
wbTime_stop（通用，“在主机上导入数据并创建内存”）；
wbLog（跟踪，“输入长度为”，输入长度）；
//@@OpenCL上下文设置代码（简单）
尺寸(每平方米);；
国际文书主任；
cl_上下文clctx；
cl_命令_队列clcmdq；
cl_计划clpgm；
cl_内核clkern；
//查询平台的数量
clu-uint-numPlatforms；
clerr=clGetPlatformIDs（0、NULL和numPlatforms）；
cl_平台_id平台[numPlatforms]；