Vector 在OpenCL矢量加法程序中,内存对象和CL_无效_平台错误
在下面的OpenCL矢量加法程序中,将clCreateBuffer和clEnqueueWriteBuffer组合成一个命令 给出以下错误:无法将赋值/参数中的“\u cl\u mem*”转换为“\u cl\u mem**” 在使用clCreateBuffer和CLreleaseMoobject的所有地方都会出现相同的错误。有人能帮我解决这个错误吗?提前谢谢Vector 在OpenCL矢量加法程序中,内存对象和CL_无效_平台错误,vector,parallel-processing,opencl,Vector,Parallel Processing,Opencl,在下面的OpenCL矢量加法程序中,将clCreateBuffer和clEnqueueWriteBuffer组合成一个命令 给出以下错误:无法将赋值/参数中的“\u cl\u mem*”转换为“\u cl\u mem**” 在使用clCreateBuffer和CLreleaseMoobject的所有地方都会出现相同的错误。有人能帮我解决这个错误吗?提前谢谢 #include <wb.h> //@@ wb include opencl.h for you #include <ma
#include <wb.h> //@@ wb include opencl.h for you
#include <math.h>
//@@ OpenCL Kernel
const char* vaddsrc ="__kernel void vadd(__global const float *a,__global const float *b,__global float *result){int id = get_global_id(0);result[id] = a[id] + b[id];}";
int main(int argc, char **argv)
{
unsigned int VECTOR_SIZE = 1024;
int size = VECTOR_SIZE* sizeof(float);
wbArg_t args;
int inputLength;
float *hostInput1;
float *hostInput2;
float *hostOutput;
cl_mem *deviceInput1;
cl_mem *deviceInput2;
cl_mem *deviceOutput;
args = wbArg_read(argc, argv);
wbTime_start(Generic, "Importing data and creating memory on host");
hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
hostOutput = (float *)malloc(inputLength * sizeof(float));
wbTime_stop(Generic, "Importing data and creating memory on host");
wbLog(TRACE, "The input length is ", inputLength);
//@@ OpenCL Context Setup Code (simple)
size_t parmsz;
cl_int clerr;
cl_context clctx;
cl_command_queue clcmdq;
cl_program clpgm;
cl_kernel clkern;
clerr = CL_SUCCESS;
clctx = clCreateContextFromType(0,CL_DEVICE_TYPE_ALL, NULL, NULL, &clerr);
clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0,NULL, & parmsz);
cl_device_id* cldevs = (cl_device_id *) malloc( parmsz);
clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, parmsz,cldevs, NULL);
clcmdq = clCreateCommandQueue(clctx,cldevs[0], 0, &clerr);
clpgm = clCreateProgramWithSource(clctx, 1, &vaddsrc,NULL, &clerr);
char clcompileflags[4096];
sprintf(clcompileflags, "-cl-mad-enable");
clerr = clBuildProgram(clpgm, 0, NULL, clcompileflags,NULL, NULL);
clkern = clCreateKernel(clpgm, "vadd", &clerr);
//@@ OpenCL Context Setup Code (simple)
wbTime_start(GPU, "Allocating GPU memory.Copying input memory to the GPU.");
//@@ Allocate GPU memory here Copy memory to the GPU here
deviceInput1 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR, inputLength *sizeof(float), hostInput1, NULL);
deviceInput2 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR, inputLength *sizeof(float), hostInput2, NULL);
deviceOutput = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY,inputLength *sizeof(float), NULL, NULL);
wbTime_stop(GPU, "Allocating GPU memory.Copying input memory to the GPU.");
//@@ Initialize the grid and block dimensions here
size_t globalSize, localSize;
localSize = 64;
globalSize = ceil(inputLength/(float)localSize)*localSize;
wbTime_start(Compute, "Performing CUDA computation");
//@@ Launch the GPU Kernel here
clerr= clSetKernelArg(clkern, 0, sizeof(cl_mem),(void *)&deviceInput1);
clerr= clSetKernelArg(clkern, 1, sizeof(cl_mem),(void *)&deviceInput2);
clerr= clSetKernelArg(clkern, 2, sizeof(cl_mem),(void *)&deviceOutput);
clerr= clSetKernelArg(clkern, 3, sizeof(int), &inputLength);
wbTime_stop(Compute, "Performing CUDA computation");
wbTime_start(Copy, "Copying output memory to the CPU");
//@@ Copy the GPU memory back to the CPU here
cl_event event=NULL;
clerr= clEnqueueNDRangeKernel(clcmdq, clkern, 1, NULL, &globalSize, &localSize, 0, NULL, &event);
clerr= clWaitForEvents(1, &event);
clEnqueueReadBuffer(clcmdq, deviceOutput, CL_TRUE, 0,inputLength*sizeof(float), hostOutput, 0, NULL, NULL);
wbTime_stop(Copy, "Copying output memory to the CPU");
wbTime_start(GPU, "Freeing GPU Memory");
//@@ Free the GPU memory here
clReleaseMemObject(deviceInput1);
clReleaseMemObject(deviceInput2);
clReleaseMemObject(deviceOutput);
wbTime_stop(GPU, "Freeing GPU Memory");
wbSolution(args, hostOutput, inputLength);
free(hostInput1);
free(hostInput2);
free(hostOutput);
return 0;
}
#include/@@wb为您包含opencl.h
#包括
//@@OpenCL内核
const char*vaddsrc=“uuu内核void vadd(uuu全局常量float*a,uuu全局常量float*b,uuu全局float*result){int id=get_全局\u id(0);result[id]=a[id]+b[id];}”;
int main(int argc,字符**argv)
{
无符号整数向量_SIZE=1024;
int size=VECTOR_size*sizeof(float);
wbArg_t args;
int输入长度;
浮点*主机输入1;
浮点*主机输入2;
浮点*输出;
cl_mem*设备输入1;
cl_mem*设备输入2;
cl_mem*设备输出;
args=wbArg_读取(argc,argv);
wbTime_start(通用,“导入数据并在主机上创建内存”);
hostInput1=(float*)wbImport(wbArg_getInputFile(args,0),&inputLength);
hostInput2=(float*)wbImport(wbArg_getInputFile(args,1),&inputLength);
hostOutput=(float*)malloc(inputLength*sizeof(float));
wbTime_stop(通用,“在主机上导入数据并创建内存”);
wbLog(跟踪,“输入长度为”,输入长度);
//@@OpenCL上下文设置代码(简单)
尺寸(每平方米);;
国际文书主任;
cl_上下文clctx;
cl_命令_队列clcmdq;
cl_计划clpgm;
cl_内核clkern;
clerr=成功;
clctx=clCreateContextFromType(0,CL_设备_类型_全部,NULL,NULL和clerr);
clerr=clGetContextInfo(clctx、CL_上下文_设备、0、NULL和parmsz);
cl_设备id*cldevs=(cl_设备id*)malloc(parmsz);
clerr=clGetContextInfo(clctx、CLU上下文设备、parmsz、cldevs、NULL);
clcmdq=clCreateCommandQueue(clctx、cldevs[0]、0和clerr);
clpgm=clCreateProgramWithSource(clctx、1和vaddsrc、NULL和clerr);
char-clcompileflags[4096];
sprintf(clcompileflags,“-cl mad enable”);
clerr=clBuildProgram(clpgm,0,NULL,clcompileflags,NULL,NULL);
clkern=clCreateKernel(clpgm,“vadd”和cler);
//@@OpenCL上下文设置代码(简单)
wbTime_start(GPU,“分配GPU内存。将输入内存复制到GPU”);
//@@在此处分配GPU内存将内存复制到此处的GPU
deviceInput1=clCreateBuffer(clctx,CL_MEM_READ_ONLY,CL_MEM_COPY_HOST_PTR,inputLength*sizeof(float),hostInput1,NULL);
deviceInput2=clCreateBuffer(clctx,CL_MEM_READ_ONLY,CL_MEM_COPY_HOST_PTR,inputLength*sizeof(float),hostInput2,NULL);
deviceOutput=clCreateBuffer(clctx,CLU MEM_WRITE_ONLY,inputLength*sizeof(float),NULL,NULL);
wbTime_stop(GPU,“分配GPU内存。将输入内存复制到GPU”);
//@@在此处初始化栅格和块尺寸
大小\u t全局大小、本地大小;
localSize=64;
globalSize=ceil(inputLength/(float)localSize)*localSize;
wbTime_开始(计算,“执行CUDA计算”);
//@@在这里启动GPU内核
clerr=clSetKernelArg(clkern,0,sizeof(cl_mem),(void*)和设备输入1);
clerr=clSetKernelArg(clkern,1,sizeof(cl_mem),(void*)和设备输入2);
clerr=CLSETKERNELAG(clkern,2,sizeof(cl_mem),(void*)和设备输出);
clerr=clSetKernelArg(clkern,3,sizeof(int),&inputLength);
wbTime_停止(计算,“执行CUDA计算”);
wbTime_启动(复制,“将输出内存复制到CPU”);
//@@将GPU内存复制回此处的CPU
cl_事件=空;
cler=clenqueendrangekernel(clcmdq、clkern、1、NULL、&globalSize、&localSize、0、NULL、&event);
clerr=clWaitForEvents(1,事件和事件);
clEnqueueReadBuffer(clcmdq,deviceOutput,CL_TRUE,0,inputLength*sizeof(float),hostOutput,0,NULL,NULL);
wbTime_stop(复制,“将输出内存复制到CPU”);
wbTime_启动(GPU,“释放GPU内存”);
//@@在此释放GPU内存
CLreleaseMoObject(设备输入1);
CLreleaseMoObject(设备输入2);
CLRELEASEMOBJECT(设备输出);
wbTime_停止(GPU,“释放GPU内存”);
wbSolution(参数、主机输出、输入长度);
免费(主机输入1);
免费(主机输入2);
免费(主机输出);
返回0;
}
您需要使用cl\u mem作为设备缓冲区的数据类型,而不是cl\u mem*
因为cl_mem type是“内存对象”的句柄,所以它不应该是指针
删除以下行中的*
cl_mem *deviceInput1;
cl_mem *deviceInput2;
cl_mem *deviceOutput;
编辑:
您不应该在clCreateContextFromType的第一个参数中使用0,因为您将得到本文中提到的CL_INVALID_PLATFORM错误
我在下面给出了固定代码:
#include <wb.h> //@@ wb include opencl.h for you
#include <math.h>
//@@ OpenCL Kernel
const char* vaddsrc ="__kernel void vadd(__global const float *a,__global const float *b,__global float *result){int id = get_global_id(0);result[id] = a[id] + b[id];}";
int main(int argc, char **argv)
{
unsigned int VECTOR_SIZE = 1024;
int size = VECTOR_SIZE* sizeof(float);
wbArg_t args;
int inputLength = VECTOR_SIZE;
float *hostInput1;
float *hostInput2;
float *hostOutput;
cl_mem deviceInput1;
cl_mem deviceInput2;
cl_mem deviceOutput;
args = wbArg_read(argc, argv);
wbTime_start(Generic, "Importing data and creating memory on host");
hostInput1 = (float *)wbImport(wbArg_getInputFile(args, 0), &inputLength);
hostInput2 = (float *)wbImport(wbArg_getInputFile(args, 1), &inputLength);
hostOutput = (float *)malloc(inputLength * sizeof(float));
wbTime_stop(Generic, "Importing data and creating memory on host");
wbLog(TRACE, "The input length is ", inputLength);
//@@ OpenCL Context Setup Code (simple)
size_t parmsz;
cl_int clerr;
cl_context clctx;
cl_command_queue clcmdq;
cl_program clpgm;
cl_kernel clkern;
// query the number of platforms
cl_uint numPlatforms;
clerr = clGetPlatformIDs(0, NULL, &numPlatforms);
cl_platform_id platforms[numPlatforms];
clerr = clGetPlatformIDs(numPlatforms, platforms, NULL);
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (unsigned long)platforms[0], 0};
clctx = clCreateContextFromType(properties,CL_DEVICE_TYPE_ALL, NULL, NULL, &clerr);
clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0,NULL, & parmsz);
cl_device_id* cldevs = (cl_device_id *) malloc( parmsz);
clerr = clGetContextInfo(clctx, CL_CONTEXT_DEVICES, parmsz,cldevs, NULL);
clcmdq = clCreateCommandQueue(clctx,cldevs[0], 0, &clerr);
clpgm = clCreateProgramWithSource(clctx, 1, &vaddsrc,NULL, &clerr);
char clcompileflags[4096];
sprintf(clcompileflags, "-cl-mad-enable");
clerr = clBuildProgram(clpgm, 0, NULL, clcompileflags,NULL, NULL);
clkern = clCreateKernel(clpgm, "vadd", &clerr);
//@@ OpenCL Context Setup Code (simple)
wbTime_start(GPU, "Allocating GPU memory.Copying input memory to the GPU.");
//@@ Allocate GPU memory here Copy memory to the GPU here
deviceInput1 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR,inputLength *sizeof(float), hostInput1, NULL);
deviceInput2 = clCreateBuffer(clctx, CL_MEM_READ_ONLY |CL_MEM_COPY_HOST_PTR, inputLength *sizeof(float), hostInput2, NULL);
deviceOutput = clCreateBuffer(clctx, CL_MEM_WRITE_ONLY,inputLength *sizeof(float), NULL, NULL);
wbTime_stop(GPU, "Allocating GPU memory.Copying input memory to the GPU.");
//@@ Initialize the grid and block dimensions here
size_t globalSize, localSize;
localSize = 64;
globalSize = ceil(inputLength/(float)localSize)*localSize;
wbTime_start(Compute, "Performing CUDA computation");
//@@ Launch the GPU Kernel here
clerr= clSetKernelArg(clkern, 0, sizeof(cl_mem),(void *)&deviceInput1);
clerr= clSetKernelArg(clkern, 1, sizeof(cl_mem),(void *)&deviceInput2);
clerr= clSetKernelArg(clkern, 2, sizeof(cl_mem),(void *)&deviceOutput);
clerr= clSetKernelArg(clkern, 3, sizeof(int), &inputLength);
wbTime_stop(Compute, "Performing CUDA computation");
wbTime_start(Copy, "Copying output memory to the CPU");
//@@ Copy the GPU memory back to the CPU here
cl_event event=NULL;
clerr= clEnqueueNDRangeKernel(clcmdq, clkern, 1, NULL, &globalSize, &localSize, 0, NULL, &event);
clerr= clWaitForEvents(1, &event);
clerr= clEnqueueReadBuffer(clcmdq, deviceOutput, CL_TRUE, 0,inputLength*sizeof(float), hostOutput, 0, NULL, NULL);
wbTime_stop(Copy, "Copying output memory to the CPU");
wbTime_start(GPU, "Freeing GPU Memory");
//@@ Free the GPU memory here
clReleaseMemObject(deviceInput1);
clReleaseMemObject(deviceInput2);
clReleaseMemObject(deviceOutput);
wbTime_stop(GPU, "Freeing GPU Memory");
wbSolution(args, hostOutput, inputLength);
free(hostInput1);
free(hostInput2);
free(hostOutput);
return 0;
}
#include/@@wb为您包含opencl.h
#包括
//@@OpenCL内核
const char*vaddsrc=“uuu内核void vadd(uuu全局常量float*a,uuu全局常量float*b,uuu全局float*result){int id=get_全局\u id(0);result[id]=a[id]+b[id];}”;
int main(int argc,字符**argv)
{
无符号整数向量_SIZE=1024;
int size=VECTOR_size*sizeof(float);
wbArg_t args;
int inputLength=向量大小;
浮点*主机输入1;
浮点*主机输入2;
浮点*输出;
cl_mem deviceInput1;
cl_mem deviceInput2;
cl_mem设备输出;
args=wbArg_读取(argc,argv);
wbTime_start(通用,“导入数据并在主机上创建内存”);
hostInput1=(float*)wbImport(wbArg_getInputFile(args,0),&inputLength);
hostInput2=(float*)wbImport(wbArg_getInputFile(args,1),&inputLength);
hostOutput=(float*)malloc(inputLength*sizeof(float));
wbTime_stop(通用,“在主机上导入数据并创建内存”);
wbLog(跟踪,“输入长度为”,输入长度);
//@@OpenCL上下文设置代码(简单)
尺寸(每平方米);;
国际文书主任;
cl_上下文clctx;
cl_命令_队列clcmdq;
cl_计划clpgm;
cl_内核clkern;
//查询平台的数量
clu-uint-numPlatforms;
clerr=clGetPlatformIDs(0、NULL和numPlatforms);
cl_平台_id平台[numPlatforms];