如何在OpenCL中将数据从设备内存子缓冲区传输到主机程序?
我从马修·斯卡皮诺的《OpenCL行动》中学习OpenCL。第3章包含关于缓冲区(第45-47页)和子缓冲区(第47-48页)组织的材料。第47页所述的创建子缓冲区的代码示例中存在不准确之处,通过相同的问题和步骤进行了披露。我进一步研究了将子缓冲区中存储的值传输回宿主程序的情况 我的主程序正在将整数数组如何在OpenCL中将数据从设备内存子缓冲区传输到主机程序?,c,buffer,opencl,opencl-c,C,Buffer,Opencl,Opencl C,我从马修·斯卡皮诺的《OpenCL行动》中学习OpenCL。第3章包含关于缓冲区(第45-47页)和子缓冲区(第47-48页)组织的材料。第47页所述的创建子缓冲区的代码示例中存在不准确之处,通过相同的问题和步骤进行了披露。我进一步研究了将子缓冲区中存储的值传输回宿主程序的情况 我的主程序正在将整数数组iaArray1[5]={1,2,3,4,5}传输到内核中。缓冲区memobjaray1用于执行此操作。从iaArray1数组中,获得iparray2数组,将值{3,4,5,6,7}存储为内核将数
iaArray1[5]={1,2,3,4,5}
传输到内核中。缓冲区memobjaray1
用于执行此操作。从iaArray1
数组中,获得iparray2
数组,将值{3,4,5,6,7}
存储为内核将数组的值与常量2相加。memobjaray2
输出缓冲区用于将iparray2
数组的值完全从设备传输到主机程序。接下来,从memobjaray2
缓冲区形成memobjaray2子缓冲区。正在尝试将数据从设备内存子单元传送到主机程序
我相信,从内核到主机程序传输数据的机制对于缓冲区和子缓冲区都是相同的。为此,我使用了相同的函数clenqueureadbuffer()
,但程序给出了一条错误消息。我做错了什么应该使用什么功能将数据从设备内存子缓冲区传输到主机程序?
内核函数如下所示:
__kernel void good (global int* iaArray1, global int* iaArray2)
{
int i=get_global_id(0);
iaArray2[i]=iaArray1[i]+2;
}
cl_int status = clEnqueueCopyBuffer(cmdQueue0, memObjInput, memObjOutput,
sizeof(int)*tSrcBufOffset,
sizeof(int)*tDestBufOffset,
sizeof(int)*tQuantityToCopy,
NULL, NULL, NULL);
这是我的程序代码。在所提出的程序中有几个简化。首先,退出分支已被简化以缩短代码。其次,原始程序设计用于处理多个cl文件,因此一些变量是数组
#include <CL\cl.h>
#include <stdio.h>
#include <stdlib.h>
#define PROGRAM_FILE_1 "good.cl"
//#define PROGRAM_FILE_2 "bad.cl"
//#define PROGRAM_FILE_3 "setminusone.cl"
#define NUM_OF_FILES 1
int main(){
cl_platform_id *platforms;
cl_uint numOfPlatforms;
cl_int status;
cl_device_id *devices;
cl_uint numOfDevices;
char caDeviceName[500];
cl_context context;
const char * kcpaFileName[NUM_OF_FILES] = { PROGRAM_FILE_1};
FILE * pProgramHandler;
char * cpaProgramBuffer[NUM_OF_FILES];
size_t saProgramSize[NUM_OF_FILES] = { 0};
cl_uint numOfEnters[NUM_OF_FILES] = { 0};
cl_program program;
const char kcaOptions[] = "-cl-finite-math-only -cl-no-signed-zeros";
size_t sLogSize = 0;
char * cpProgramLog;
cl_uint numOfKernels = 0;
cl_kernel * kernels;
char caKernelName[20];
cl_command_queue cmdQueue0;
printf("Establishing number of available platforms... ");
status = clGetPlatformIDs(NULL, NULL, &numOfPlatforms);
if (status < 0){
printf("FAIL to establish platform(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nEstablised %u platform(s).\nInitializing platform(s)... ", numOfPlatforms);
platforms = (cl_platform_id *)malloc(numOfPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numOfPlatforms, platforms, NULL); //
if (status < 0){
printf("FAIL to initialize platform(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nEstablishing devices... ");
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, NULL, NULL, &numOfDevices);
if (status < 0){
printf("FAIL to establish device(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nEstablished %u device(s).\nInitializing device(s)... ", numOfDevices);
devices = (cl_device_id *)malloc(numOfDevices*sizeof(cl_device_id));
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numOfDevices, devices, NULL);
if (status < 0){
printf("FAIL to initialize devices(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.");
for (int i = 0; i < numOfDevices; i++){
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(caDeviceName), caDeviceName, NULL);
if (status < 0){
printf("FAIL to read device #%d name!> %d\n", i, status);
system("PAUSE");
exit(1);
}
printf("\nDevice #%d is \"%s\".", i, caDeviceName);
}
printf("\nCreating context... ");
context = clCreateContext(NULL, numOfDevices, devices, NULL, NULL, &status);
if (status < 0){
printf("FAIL to create context!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading source code from file(s)... ");
for (int i = 0; i < NUM_OF_FILES; i++){
pProgramHandler = fopen(kcpaFileName[i], "r");
if (pProgramHandler == NULL){
printf("FAIL to open file \"%s\"!> %d\n", kcpaFileName[i], status);
system("PAUSE");
exit(1);
}
fseek(pProgramHandler, 0, SEEK_END);
saProgramSize[i] = ftell(pProgramHandler);
rewind(pProgramHandler);
cpaProgramBuffer[i] = (char*)malloc(sizeof(char)*saProgramSize[i] + 1);
fread(cpaProgramBuffer[i], sizeof(char), saProgramSize[i], pProgramHandler);
cpaProgramBuffer[i][saProgramSize[i]] = '\0';
fclose(pProgramHandler);
for (int j = 0; j < saProgramSize[i]; j++){
if ((char)cpaProgramBuffer[i][j] == (char)10){
numOfEnters[i]++;
}
}
saProgramSize[i] = saProgramSize[i] - numOfEnters[i];
cpaProgramBuffer[i][saProgramSize[i]] = '\0';
}
printf("OK.\nCreating program from source code... ");
program = clCreateProgramWithSource(context, NUM_OF_FILES, (const char **)cpaProgramBuffer, (const size_t *)saProgramSize, &status);
if (status < 0){
printf("FAIL to create program!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nBuilding program... ");
status = clBuildProgram(program, 1, devices, kcaOptions, NULL, NULL);//,
if (status < 0){
printf("FAIL to build program.\n...Genetating log...");
for (int i = 0; i < NUM_OF_FILES; i++){
printf("\nCode from file \"%s\":\n%s", kcpaFileName[i], cpaProgramBuffer[i]);
}
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &sLogSize);
cpProgramLog = (char*)malloc(sizeof(char)*sLogSize + 1);
cpProgramLog[sLogSize] = '\0';
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, sLogSize + 1, cpProgramLog, NULL);
printf("\nLog length is %d.\nLog:\n%s\n> %d\n", sLogSize, cpProgramLog, status);
system("PAUSE");
exit(1);
}
printf("OK.\nDetermining number of kernels... ");
status = clCreateKernelsInProgram(program, NULL, NULL, &numOfKernels);
if (status < 0){
printf("FAIL to determine number of kernels!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nDetermined %d kernel(s):", numOfKernels);
kernels = (cl_kernel*)malloc(sizeof(cl_kernel)*numOfKernels);
clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
for (int i = 0; i < numOfKernels; i++){
clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(caKernelName), caKernelName, NULL);
printf("\nKernel \"%s\" indexed at %d.", caKernelName, i);
}
printf("\nCreating command queue... ");
cmdQueue0 = clCreateCommandQueue(context, devices[0], NULL, &status);
if (status < 0){
printf("FAIL to create command queue!> %d\n", status);
system("PAUSE");
exit(1);
}
/*Data, buffers and subbuffers*/
int iaArray1[5] = { 1, 2, 3, 4, 5 };
printf("\nPrinting out the initial array:\n");
for (int i = 0; i < 5; i++){
printf("%d ", iaArray1[i]);
}
printf("\nCreating buffers for kernels[0]... ");
int* ipaArray2 = (int*)malloc(5 * sizeof(int));
cl_mem memObjArray1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(iaArray1), &iaArray1, &status);
if (status < 0){
printf("\nFAIL to create memObjArray1 buffer!> %d \n", status);
system("PAUSE");
exit(1);
}
cl_mem memObjArray2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(iaArray1), NULL, &status);
if (status < 0){
printf("\nFAIL to create memObjArray2 buffer!> %d \n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nSetting arguments for kernels[0]... ");
status = clSetKernelArg(kernels[0], 0, sizeof(cl_mem), &memObjArray1);
if (status < 0){
printf("\nFAIL to set memObjArray1 argument at kernels[0]!> %d\n", status);
system("PAUSE");
exit(1);
}
status = clSetKernelArg(kernels[0], 1, sizeof(cl_mem), &memObjArray2);
if (status < 0){
printf("\nFAIL to set memObjArray2 argument at kernels[0]!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nExecuting kernels[0]... ");
size_t tGlobal_item_size = 5; //?
size_t tLocal_item_size = 1; //?
status = clEnqueueNDRangeKernel(cmdQueue0, kernels[0], 1, NULL, &tGlobal_item_size, &tLocal_item_size, 0, NULL, NULL);
if (status < 0){
printf("\nFAIL to enqueue kernels[0] into cmdQueue0!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading results from device memory buffer to host array... ");
status = clEnqueueReadBuffer(cmdQueue0, memObjArray2, CL_TRUE, 0, 5 * sizeof(int), ipaArray2, 0, NULL, NULL);
if (status < 0){
printf("FAIL to copy results from device to host!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out the result array:\n");
for (int i = 0; i < 5; i++){
printf("%d ", ipaArray2[i]);
}
printf("\nCreating subbuffer... ");
int iQuantity = 2;
int iShift = 2;
typedef struct _cl_buffer_region{
size_t size;
size_t origin;
} cl_buffer_region;
cl_buffer_region stRegion;
stRegion.size = iQuantity * sizeof(int);
stRegion.origin = iShift * sizeof(int);
cl_mem memObjSubArray = clCreateSubBuffer(memObjArray2, CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &stRegion, &status);
if (status < 0){
printf("FAIL to create subbuffer!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading results from device memory subbuffer to host array... ");
int* ipaSubArray = (int*)malloc(iQuantity*sizeof(int));
status = clEnqueueReadBuffer(cmdQueue0, memObjSubArray, CL_TRUE, 0, iQuantity*sizeof(int), ipaSubArray, 0, NULL, NULL);
if (status < 0){
printf("FAIL to copy results from device to host!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out the result array:\n");
for (int i = 0; i < iQuantity; i++){
printf("%d ", ipaSubArray[i]);
}
printf("OK.\n...Releasing resources... ");
clReleaseMemObject(memObjArray1);
clReleaseMemObject(memObjArray2);
clReleaseMemObject(memObjSubArray);
clReleaseCommandQueue(cmdQueue0);
free(kernels);
clReleaseProgram(program);
for (int i = 0; i < NUM_OF_FILES; i++){
free(cpaProgramBuffer[i]);
}
clReleaseContext(context);
free(devices);
free(platforms);
printf("OK.\nEnd of program. Bey!\n");
system("PAUSE");
}
#包括
#包括
#包括
#定义程序文件“good.cl”
//#将程序文件定义为“bad.cl”
//#定义程序文件3“setminusone.cl”
#定义\u文件的数量\u 1
int main(){
cl_平台_id*平台;
cl_uint Numof平台;
国际地位;
cl_设备\u id*设备;
氯离子装置;
char caDeviceName[500];
语境;
const char*kcpaFileName[NUM_OF_FILES]={PROGRAM_FILE_1};
文件*pProgramHandler;
char*cpaprogrammabuffer[NUM_OF_FILES];
size\u t saProgramSize[NUM\u OF_文件]={0};
cl_uint numOfEnters[NUM_OF_文件]={0};
CLU计划;
const char kcaOptions[]=“仅限-cl有限数学-cl无符号零”;
大小\u t sLogSize=0;
char*cpProgramLog;
cl_uint numOfKernels=0;
cl_内核*内核;
char-caKernelName[20];
cl_命令_队列cmdQueue0;
printf(“确定可用平台的数量…”);
status=clGetPlatformIDs(NULL、NULL和numOfPlatforms);
如果(状态<0){
printf(“无法建立平台!>%d\n”,状态);
系统(“暂停”);
出口(1);
}
printf(“确定。\n初始化了%u个平台。\n初始化平台…”,numOfPlatforms);
平台=(cl_平台id*)malloc(NUMOF平台*sizeof(cl_平台id));
状态=clGetPlatformIDs(numOfPlatforms,platforms,NULL)//
如果(状态<0){
printf(“初始化平台失败!>%d\n”,状态);
系统(“暂停”);
出口(1);
}
printf(“确定。\n正在安装设备…”);
状态=CLGetDeviceID(平台[0],CL\u设备类型\u所有,NULL,NULL和NUMOFECTIONS);
如果(状态<0){
printf(“无法建立设备!>%d\n”,状态);
系统(“暂停”);
出口(1);
}
printf(“确定。\n初始化%u个设备。\n初始化设备…”,numOfDevices);
设备=(cl_设备id*)malloc(numOfDevices*sizeof(cl_设备id));
状态=CLGetDeviceID(平台[0],CL\u设备类型\u全部,numOfDevices,设备,NULL);
如果(状态<0){
printf(“初始化设备失败!>%d\n”,状态);
系统(“暂停”);
出口(1);
}
printf(“OK”);
对于(int i=0;i%d\n”,i,状态);
系统(“暂停”);
出口(1);
}
printf(“\n设备%d是\%s\”,i,caDeviceName);
}
printf(“\n正在创建上下文…”);
context=clCreateContext(NULL、numOfDevices、devices、NULL、NULL和status);
如果(状态<0){
printf(“创建上下文失败!>%d\n”,状态);
系统(“暂停”);
出口(1);
}
printf(“确定。\n正在从文件中读取源代码…”);
for(int i=0;i%d\n”,kcpaFileName[i],状态);
系统(“暂停”);
出口(1);
}
fseek(pProgramHandler,0,SEEK_END);
saProgramSize[i]=ftell(pProgramHandler);
倒带(pProgramHandler);
cpaProgramBuffer[i]=(char*)malloc(sizeof(char)*saProgramSize[i]+1);
fread(cpaprogrammabuffer[i],sizeof(char),saProgramSize[i],pprogrammandler);
cpaProgramBuffer[i][saProgramSize[i]]='\0';
fclose(pprogrammandler);
对于(int j=0;j%d\n”),状态
...
/*Data and buffers*/
/*kernels[0]*/
// two arrays and buffers creation
int iaInputArray[] = { 1, 2, 3, 4, 5 };
int iSizeOfArray = 5;
int* ipaOutputArray = (int*)malloc(iSizeOfArray*sizeof(int));
cl_mem memObjInput;
cl_mem memObjOutput;
cl_mem memObjSubBuffer;
size_t tGlobal_item_size = iSizeOfArray; //?
size_t tLocal_item_size = 1; //?
size_t tSrcBufOffset; //offset in source buffer
size_t tDstBufOffset; //offset in destination buffer
size_t tNumbOfElementsToCopy=2; //number of elements to copy
int* ipaSubArray = (int*)malloc(tNumbOfElementsToCopy*sizeof(int));
printf("OK.\nPrinting out initial input array:\n");
for (int i = 0; i < iSizeOfArray; i++){
printf("%d ", iaInputArray[i]);
}
printf("\nCreating buffer memory objects... ");
memObjInput = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(iaInputArray), &iaInputArray, &status);
if (status < 0){
printf("FAIL to create buffer for input data!> %d\n", status);
system("PAUSE");
exit(1);
}
memObjOutput = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(iaInputArray), NULL, &status);
if (status < 0){
printf("FAIL to create buffer for output data!> %d\n", status);
system("PAUSE");
exit(1);
}
memObjSubBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(ipaSubArray), NULL, &status);
if (status < 0){
printf("FAIL to create buffer for output data!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nCopying 2nd and 3rd elements of the initial array into sub-buffer... ");
tSrcBufOffset = 1;
tDstBufOffset = 0;
status = clEnqueueCopyBuffer(cmdQueue0, memObjInput, memObjSubBuffer, sizeof(int)*tSrcBufOffset, sizeof(int)*tDstBufOffset, sizeof(int)*tNumbOfElementsToCopy, NULL, NULL, NULL);
if (status < 0){
printf("FAIL to copy buffers!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nTransferring copied elements to host-program... ");
status = clEnqueueReadBuffer(cmdQueue0, memObjSubBuffer, CL_TRUE, 0, tNumbOfElementsToCopy*sizeof(int), ipaSubArray, 0, NULL, NULL);
if (status < 0){
printf("FAIL to transfer data from device memory buffer to host array!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out copied elements:\n");
for (int i = 0; i < tNumbOfElementsToCopy; i++){
printf("%d ", ipaSubArray[i]);
}
printf("\nSetting kernel arguments... ");
status = clSetKernelArg(kernels[0], 0, sizeof(cl_mem), &memObjInput);
if (status < 0){
printf("FAIL to set kernel argument #0!> %d\n", status);
system("PAUSE");
exit(1);
}
status = clSetKernelArg(kernels[0], 1, sizeof(cl_mem), &memObjOutput);
if (status < 0){
printf("FAIL to set kernel argument #1!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nExecuting kernel... ");
status = clEnqueueNDRangeKernel(cmdQueue0, kernels[0], 1, NULL, &tGlobal_item_size, &tLocal_item_size, 0, NULL, NULL);
if (status < 0){
printf("FAIL to enqueue kernels[0] into cmdQueue0!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading results from device memory buffer to host array... ");
status = clEnqueueReadBuffer(cmdQueue0, memObjOutput, CL_TRUE, 0, iSizeOfArray*sizeof(int), ipaOutputArray, 0, NULL, NULL);
if (status < 0){
printf("FAIL to transfer data from device memory buffer to host array!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out data obtained from kernel:\n");
for (int i = 0; i < iSizeOfArray; i++){
printf("%d ", ipaOutputArray[i]);
}
printf("\nChanging 3rd and 4th elements of data in output buffer... ");
tSrcBufOffset = 0;
tDstBufOffset = 2;
status = clEnqueueCopyBuffer(cmdQueue0, memObjSubBuffer, memObjOutput, sizeof(int)*tSrcBufOffset, sizeof(int)*tDstBufOffset, sizeof(int)*tNumbOfElementsToCopy, NULL, NULL, NULL);
if (status < 0){
printf("FAIL to copy buffers!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nTransferring results from device memory buffer to host array... ");
status = clEnqueueReadBuffer(cmdQueue0, memObjOutput, CL_TRUE, 0, iSizeOfArray*sizeof(int), ipaOutputArray, 0, NULL, NULL);
if (status < 0){
printf("FAIL to transfer data from device memory buffer to host array!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out host array data:\n");
for (int i = 0; i < iSizeOfArray; i++){
printf("%d ", ipaOutputArray[i]);
}
printf("\n...Releasing resources... ");
...