C++ CUDA将GpuMat的c数组传递给内核
我是CUDA和C的新手,我需要以下方面的帮助: 我想将GPUMAT的C数组传递给CUDA内核: 以下是我的内核代码:C++ CUDA将GpuMat的c数组传递给内核,c++,c,opencv,cuda,C++,C,Opencv,Cuda,我是CUDA和C的新手,我需要以下方面的帮助: 我想将GPUMAT的C数组传递给CUDA内核: 以下是我的内核代码: __global__ void disparityFromDiffMapsKernel(cuda::PtrStepSzi* differenceMapsArray, int arraySize, cuda::PtrStepSzi dispar
__global__
void disparityFromDiffMapsKernel(cuda::PtrStepSzi* differenceMapsArray,
int arraySize,
cuda::PtrStepSzi disparityMap){
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
//check if thread is inside the image
if(x > differenceMapsArray[0].cols || y > differenceMapsArray[0].rows){
return;
}
//do stuff
}
下面是我初始化数组并调用内核的代码:
cuda::PtrStepSzi diffMaps[diffMapsSize];
for(int i = 0; i <= offset; i++){
cuda::GpuMat diffMap(leftImageGPU.size(),CV_32SC1);
cuda::PtrStepSzi diffMapPtr = diffMap;
diffMaps[i] = diffMapPtr;
}
disparityFromDiffMapsKernel<<<numBlocks,threadsPerBlock>>>(diffMaps,diffMapsSize,disparityImageGPU); //gpu mat is initialized before
我真的很感激任何帮助 我通过
cudamaloc
和cudaMemcpy
将阵列移动到gpu内存,找到了解决问题的方法(感谢@sgarizvi的提示)
以下是最终代码,以防有人遇到类似问题:
// reserve memory for the diffmap ptrs arrays
cuda::PtrStepSzi* cpuDiffMapPtrs;
cpuDiffMapPtrs = (cuda::PtrStepSzi*) malloc(diffMapsSize * sizeof(cuda::PtrStepSzi));
cuda::PtrStepSzi* gpuDiffMapPtrs;
cudaMalloc(&gpuDiffMapPtrs, diffMapsSize * sizeof(cuda::PtrStepSzi));
//fill cpu array with ptrs to gpu mats
for(int i = 0; i< diffMapsSize; i++){
cuda::GpuMat diffMap(leftImageGPU.size(),CV_32SC1);
//do stuff with the gpu mats
cpuDiffMapPtrs[i] = diffMap;
}
//copy cpu array to gpu
cudaMemcpy(gpuDiffMapPtrs,cpuDiffMapPtrs,diffMapsSize * sizeof(cuda::PtrStepSzi), cudaMemcpyHostToDevice);
disparityFromDiffMapsKernel<<<numBlocks,threadsPerBlock>>>(gpuDiffMapPtrs,diffMapsSize,halfKernelSize,disparityImageGPU);
// free the allocated memory
cudaFree(gpuDiffMapPtrs);
free(cpuDiffMapPtrs);
//为diffmap ptrs阵列保留内存
cuda::PtrStepSzi*cpudiffmaptrs;
cpuDiffMapPtrs=(cuda::PtrStepSzi*)malloc(diffmapsize*sizeof(cuda::PtrStepSzi));
cuda::PtrStepSzi*gpudiffmaptrs;
cudamaloc(&gpuDiffMapPtrs,diffmapsize*sizeof(cuda::PtrStepSzi));
//用PTR填充cpu阵列到gpu垫
对于(int i=0;i
我通过cudamaloc
和cudaMemcpy
将阵列移动到gpu内存,找到了解决问题的方法(感谢@sgarizvi的提示)
以下是最终代码,以防有人遇到类似问题:
// reserve memory for the diffmap ptrs arrays
cuda::PtrStepSzi* cpuDiffMapPtrs;
cpuDiffMapPtrs = (cuda::PtrStepSzi*) malloc(diffMapsSize * sizeof(cuda::PtrStepSzi));
cuda::PtrStepSzi* gpuDiffMapPtrs;
cudaMalloc(&gpuDiffMapPtrs, diffMapsSize * sizeof(cuda::PtrStepSzi));
//fill cpu array with ptrs to gpu mats
for(int i = 0; i< diffMapsSize; i++){
cuda::GpuMat diffMap(leftImageGPU.size(),CV_32SC1);
//do stuff with the gpu mats
cpuDiffMapPtrs[i] = diffMap;
}
//copy cpu array to gpu
cudaMemcpy(gpuDiffMapPtrs,cpuDiffMapPtrs,diffMapsSize * sizeof(cuda::PtrStepSzi), cudaMemcpyHostToDevice);
disparityFromDiffMapsKernel<<<numBlocks,threadsPerBlock>>>(gpuDiffMapPtrs,diffMapsSize,halfKernelSize,disparityImageGPU);
// free the allocated memory
cudaFree(gpuDiffMapPtrs);
free(cpuDiffMapPtrs);
//为diffmap ptrs阵列保留内存
cuda::PtrStepSzi*cpudiffmaptrs;
cpuDiffMapPtrs=(cuda::PtrStepSzi*)malloc(diffmapsize*sizeof(cuda::PtrStepSzi));
cuda::PtrStepSzi*gpudiffmaptrs;
cudamaloc(&gpuDiffMapPtrs,diffmapsize*sizeof(cuda::PtrStepSzi));
//用PTR填充cpu阵列到gpu垫
对于(int i=0;i
diffMaps
驻留在主机上,而您正在将其作为内核参数传递并尝试在设备上访问它。这就是问题所在!Thx@sgarizvidiffMaps
驻留在主机上,而您正在将其作为内核参数传递并尝试在设备上访问它。这就是问题所在!Thx@sgvi