Python PyOpenCl内核参数无效_Python_Pyopencl

Python PyOpenCl内核参数无效

python

Python PyOpenCl内核参数无效,python,pyopencl,Python,Pyopencl,我在我的gpu上用python编写了一些代码，用一个过滤器来并行卷积。我一直收到这个错误，我不知道如何修复它。我在下面公布了错误以及我的代码。事先非常感谢你对于这个问题，我查看了一些过去的堆栈溢出响应，但它们似乎都没有起到作用。所以有可能是我找不到你能抓到的东西 File "gpu_test_prgrm.py", line 127, in <module> prg.multiplymatrices(queue, conv_img[0].shape ,

我在我的gpu上用python编写了一些代码，用一个过滤器来并行卷积。我一直收到这个错误，我不知道如何修复它。我在下面公布了错误以及我的代码。事先非常感谢你

对于这个问题，我查看了一些过去的堆栈溢出响应，但它们似乎都没有起到作用。所以有可能是我找不到你能抓到的东西

  File "gpu_test_prgrm.py", line 127, in <module>
    prg.multiplymatrices(queue, conv_img[0].shape , None, 3,3,2,2,2,2 ,cl_a, cl_b, cl_c)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pyopencl/__init__.py", line 888, in kernel_call
    return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
  File "<generated code>", line 7, in enqueue_knl_multiplymatrices
pyopencl._cl.LogicError: Kernel.set_arg failed: INVALID_VALUE - when processing arg#1 (1-based): invalid kernel argument

文件“gpu\u test\u prgrm.py”，第127行，在
多重矩阵（队列，conv_img[0]。形状，无，3,3,2,2,2，cl_a，cl_b，cl_c）
文件“/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pyopencl/__-init___.py”，第888行，在内核调用中
返回self.\u排队（self、queue、global\u size、local\u size、*args、**kwargs）
文件“”，第7行，在多重矩阵中排队
pyopencl._cl.LogicError:Kernel.set_arg失败：无效的_值-处理arg时#1（基于1）：无效的内核参数

ctx=cl.创建一些上下文（）
queue=cl.CommandQueue（ctx）
伪img=np.数组（[[1.0,2.0,3.0]，[4.0,5.0,6.0]，[7.0,8.0,9.0]）
伪_filters=np.array（[[1.0,1.0]，[1.0,1.0]，[[2.0,2.0]，[2.0,2.0]]））
conv_img=np.零（（2,2,2））
#打印（假图片）
#打印（假过滤器）
#打印（假图像[0:2,0:2]）
#打印（假图像展平（））
#打印（假过滤器。展平（））
#打印（假过滤器[0]。展平（）
#打印（conv_img[0].flatte（））
mf=cl.mem_标志
cl_a=cl.Buffer（ctx，mf.READ_ONLY | mf.COPY_HOST_PTR，hostbuf=false_img.flatte（））
cl_b=cl.Buffer（ctx，mf.READ_ONLY | mf.COPY_HOST_PTR，hostbuf=fake_filters[0]。展平（）
cl_c=cl.Buffer（仅ctx、mf.WRITE_、conv_img[0].flatte（）.nbytes）
#过滤器中的行，过滤器中的列=frow，fcol
#CONV图像中的行，CONV图像中的列=crow，ccol
#图像中的行，图像中的列=irow，icol
prg=cl.Program（ctx），“”
__内核无效多重矩阵（int irow、int icol、int frow、int fcol、int crow、int ccol、uuu全局浮点*伪伪伪伪伪伪伪伪伪过滤器、uu全局浮点*conv\img）
{
int i=获取全局id（0）；
int j=获取全局id（1）；
printf（“您输入了：%d”，i）；
printf（“您输入了：%d”，j）；
conv_img[i*ccol+j]=0；
for（int行=0；行

python中的数字是python对象，需要包装到

np.int32（）

中才能将它们作为

int

传递到内核：

prg.多重矩阵（队列，conv_img[0]。形状，无，np.int32（3），np.int32（3），np.int32（2），np.int32（2），np.int32（2），np.int32（2），cl_a，cl_b，cl_c）

这对我很有效。谢谢你的帮助哈哈！

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)


fake_img = np.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
fake_filters = np.array([[[1.0,1.0],[1.0,1.0]],[[2.0,2.0],[2.0,2.0]]])
conv_img = np.zeros((2,2,2))


#print(fake_img)
#print(fake_filters)
#print(fake_img[0:2,0:2])
#print(fake_img.flatten())
#print(fake_filters.flatten())
#print(fake_filters[0].flatten())
#print(conv_img[0].flatten())

mf = cl.mem_flags
cl_a = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = fake_img.flatten())
cl_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = fake_filters[0].flatten())
cl_c = cl.Buffer(ctx, mf.WRITE_ONLY, conv_img[0].flatten().nbytes)

#ROWS IN FILTER, COLS IN FILTER = frow, fcol
#ROWS IN CONV IMAGE, COLS IN CONV IMAGE = crow, ccol
#ROWS IN IMAGE, COLS IN IMAGE = irow,icol

prg = cl.Program(ctx, """
    __kernel void multiplymatrices(int irow, int icol, int frow, int fcol, int crow, int ccol, __global float * fake_img, __global float * fake_filters, __global float * conv_img)
    {
    
        int i = get_global_id(0);
        int j = get_global_id(1);
        
        printf("You entered: %d", i);
        printf("You entered: %d", j);

        conv_img[i * ccol + j ] = 0;

        for (int row=0; row < frow; row++)
        {
            for (int col=0; col < fcol; col++)
            {
        
                /*(i * col + j) = translation of conv image to reg image start*/
                /*(row * icol + col) = creating the subarray in the matrix*/
                
                conv_img[i * ccol + j] += fake_img[(row * icol + col)+(i*icol+j)] * fake_filters[row * frow + col];
            
            }
        }
    }
    """).build()


t0 = datetime.datetime.now()

print(conv_img[0].shape)

prg.multiplymatrices(queue, conv_img[0].shape , None, 3,3,2,2,2,2 ,cl_a, cl_b, cl_c)

conv_img2 = np.zeros((2,2))
cl.enqueue_copy(queue, conv_img2 , cl_c)
#print(conv_img2)

delta_t = datetime.datetime.now() - t0
print('OpenCL Multiplication: ' + str(delta_t))