OpenCL for循环提供了CL_OUT_的资源

OpenCL for循环提供了CL_OUT_的资源,c,opencl,C,Opencl,因此,我目前正在传入2个GPULevel,我希望内核获取它们中的每一个,然后如果在级别的数组中有arr1,我想检查值是否>=0,以及是否为更改值 我的原始代码内核代码如下: typedef struct GPULevelDef { int nInput, nOutput; float arr1[100]; float arr2[100]; }GPULevel; __kernel void levelComposition(__global GPULevel *lLeve

因此,我目前正在传入2个GPULevel,我希望内核获取它们中的每一个,然后如果在
级别
的数组中有
arr1
,我想检查值是否>=0,以及是否为更改值

我的原始代码内核代码如下:

typedef struct GPULevelDef
{
    int nInput, nOutput;
    float arr1[100];
    float arr2[100];
}GPULevel;

__kernel void levelComposition(__global GPULevel *lLevels, __global GPULevel *oLevels, __global int *LCount)
{
    int lIndex = get_global_id(1);
    int wIndex = get_global_id(0);
    int wCount = 0;
    if(lIndex < LCount)
    {
        wCount = lLevels[lIndex].nInput*lLevels[lIndex].nOutput;
        if(wIndex < wCount)
        {
            if(lLevels[lIndex].arr1[wIndex] >= 0)
            {
                oLevels[lIndex].arr1[wIndex] = (lLevels[lIndex].arr1[wIndex]) + 350;
            }
        }
    }
}
总结一下:我使用了第一个内核,因为我认为这会给我想要的结果,因为我觉得它是一个并行实现。我确实觉得奇怪,get_global_id()对于lIndex来说必须是1,对于wIndex来说必须是0才能使其正常工作(否则它会再次为两者产生错误的结果)。 因此,当这个原始内核在第二层出错时,我创建了第二个内核。 在第二个内核中,这正是我想要实现的,但是由于某种原因,for循环的引入导致CL_OUT_of_RESOURCES错误(-5)。我需要知道我应该使用和坚持使用哪个内核,以及如何得到我想要的

谢谢

不确定这个图表是否也有帮助

levels[0]
    nInput = 2
    nOutput = 5
    arr1  [0] = 2
     arr1 [1] = 7
     arr1 [...] = -32
     arr1 [n] = -1
    arr2  [0] = 3
     arr2 [1] = -2
     arr2 [...] = 5
     arr2 [n] = -3

levels[1]
    nInput = 5
    nOutput = 1
    arr1  [0] = 3
     arr1 [1] = 7
     arr1 [...] = 72
     arr1 [n] = -1
    arr2  [0] = 5
     arr2 [1] = -2
     arr2 [...] = 1
     arr2 [n] = -1



  Parallel           Parallel

              ------->oLevels[0].arr1[0] =lLevels[0].arr1[0] +350
   lLevels[0] ------->oLevels[0].arr1[1] =lLevels[0].arr1[1] +350
              ------->oLevels[0].arr1[...] NOTHING
              ------->oLevels[0].arr1[n] NOTHING

              ------->oLevels[1].arr1[0] =lLevels[0].arr1[0] +350
   lLevels[1] ------->oLevels[1].arr1[1] =lLevels[0].arr1[1] +350
              ------->oLevels[1].arr1[...] =lLevels[0].arr1[...] +350
              ------->oLevels[1].arr1[n] NOTHING

LCount
是指向int的指针,您将其用作整数。你的循环可能超出范围

CL\u OUT\u OF_RESOURCES
通常表示地址超出范围


您的OpenCL编译器应该发出警告。您可能需要检查clGetProgramBuildInfo(…,CL_PROGRAM_BUILD_LOG…)返回的字符串。

您使用的是NVIDIA GPU吗?我也经常看到NVIDIA OpenCL驱动程序报告的CL_OUT_OF_RESOURCES错误,用于超出范围的访问(本地内存)。
GPULevel* levelIn = (GPULevel*)malloc(sizeof(GPULevel)*levelCount);
GPULevel* levelOut = (GPULevel*)malloc(sizeof(GPULevel)*levelCount);

size_t dataSize = sizeof(GPULevel)*levelCount;
layerBuffer = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
err = clEnqueueWriteBuffer(queue,layerBuffer,CL_TRUE,0,dataSize,(void*)layerIn,0,NULL,NULL);
cl_mem bufferB = clCreateBuffer(gpu.context,CL_MEM_WRITE_ONLY,dataSize,NULL,&err);
err = clEnqueueWriteBuffer(queue,bufferB,CL_TRUE,0,dataSize,(void*)layerOut,0,NULL,NULL);


GPULayer* val1 = (GPULevel*)calloc(sizeof(levelIn), sizeof(GPULevel));
GPULayer* val2 = (GPULevel*)calloc(sizeof(levelOut), sizeof(GPULevel));
err = clEnqueueReadBuffer(queue, layerBuffer, CL_TRUE, 0, dataSize, val1, 0, NULL, NULL);
err = clEnqueueReadBuffer(queue, bufferB, CL_TRUE, 0, dataSize, val2, 0, NULL, NULL);
levels[0]
    nInput = 2
    nOutput = 5
    arr1  [0] = 2
     arr1 [1] = 7
     arr1 [...] = -32
     arr1 [n] = -1
    arr2  [0] = 3
     arr2 [1] = -2
     arr2 [...] = 5
     arr2 [n] = -3

levels[1]
    nInput = 5
    nOutput = 1
    arr1  [0] = 3
     arr1 [1] = 7
     arr1 [...] = 72
     arr1 [n] = -1
    arr2  [0] = 5
     arr2 [1] = -2
     arr2 [...] = 1
     arr2 [n] = -1



  Parallel           Parallel

              ------->oLevels[0].arr1[0] =lLevels[0].arr1[0] +350
   lLevels[0] ------->oLevels[0].arr1[1] =lLevels[0].arr1[1] +350
              ------->oLevels[0].arr1[...] NOTHING
              ------->oLevels[0].arr1[n] NOTHING

              ------->oLevels[1].arr1[0] =lLevels[0].arr1[0] +350
   lLevels[1] ------->oLevels[1].arr1[1] =lLevels[0].arr1[1] +350
              ------->oLevels[1].arr1[...] =lLevels[0].arr1[...] +350
              ------->oLevels[1].arr1[n] NOTHING