Struct 将带有数组的结构数组加载到cuda
我正在尝试创建一个数组结构,其中包含数组,并将它们加载到GPU上。我想我遵循了正确的步骤Struct 将带有数组的结构数组加载到cuda,struct,cuda,Struct,Cuda,我正在尝试创建一个数组结构,其中包含数组,并将它们加载到GPU上。我想我遵循了正确的步骤 使用malloc在CPU上创建结构 cudamaloc将数组添加到结构中 使用cudamalloc在GPU上创建结构 将CPU结构复制到GPU结构上 运行此代码时,只要不更改内核函数中的值p[I].c[0],它就会正常工作。如果我删除行p[I].c[0]=3.3然后输出预期结果。当我保持原样时,它会输出所有值的随机数。我希望能够使用内核函数更新数组中的值 有什么不对劲吗 这是我的密码: #include &
cudamaloc
将数组添加到结构中李>
p[I].c[0]
,它就会正常工作。如果我删除行p[I].c[0]=3.3代码>然后输出预期结果。当我保持原样时,它会输出所有值的随机数。我希望能够使用内核函数更新数组中的值
有什么不对劲吗
这是我的密码:
#include <stdio.h>
#include <cuda_runtime.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstdio>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <omp.h>
#include <vector>
#include <sys/time.h>
float cData[]
{
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
};
float dData[]
{
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
};
typedef struct
{
float a, b;
float* c;
float* d;
} point;
__global__ void testKernel(point *p){
int i = blockIdx.x * blockDim.x + threadIdx.x;
p[i].a = 1.1;
p[i].b = 2.2;
p[i].c[0] = 3.3;
}
void checkerror(cudaError_t error, char* descrp){
if (error != 0){
printf("%s error code: %d \n", descrp, error);
}
}
extern "C" int main()
{
printf("starting gpuCode\n");
int *dev_a;
// set number of points
int numPoints = 16,
gpuBlockSize = 4,
pointSize = sizeof(point),
numBytes = numPoints * pointSize,
gpuGridSize = numPoints / gpuBlockSize;
cudaError_t err = cudaSuccess;
printf("initialized variables\n");
// allocate memory
point *cpuPointArray,
*gpuPointArray,
*outPointArray;
cpuPointArray = (point*)malloc(numBytes); //create the cpuPointArray struct on the cpu
outPointArray = (point*)malloc(numBytes); //create the outPointArray struct on the cpu
printf("load cpuPointArray struct with default values\n");
for (int k=0; k<16; k++){
err = cudaMalloc( (void**)&cpuPointArray[k].c, 16*sizeof(float) );
checkerror(err, "assigning cuda pointer c");
err = cudaMalloc( (void**)&cpuPointArray[k].d, 16*sizeof(float) );
checkerror(err, "assigning cuda pointer d");
cpuPointArray[k].a = 16;
cpuPointArray[k].b = 16;
}
for (int k=0; k<16; k++){
printf("top loop %d\n", k);
err = cudaMemcpy(cpuPointArray[k].c, cData, 16*sizeof(float), cudaMemcpyHostToDevice);
printf("after cdata\n");
checkerror(err, "copying cdata to gpu array c" );
err = cudaMemcpy(cpuPointArray[k].d, dData, 16*sizeof(float), cudaMemcpyHostToDevice);
printf("after ddata\n");
checkerror(err, "copying ddata to gpu array d");
printf("bottom of loop %d\n", k);
}
err = cudaMalloc((void**)&gpuPointArray, numBytes); //allocate memory on the gpu for the cpu point array
checkerror(err, "allocating memory for gpuPointArray");
err = cudaMemcpy(gpuPointArray,cpuPointArray,sizeof(cpuPointArray), cudaMemcpyHostToDevice); //copy the cpu point array onto the gpu
checkerror(err, "copying cpuPointArray to gpuPointArray");
printf("loaded the struct into the kernel\n");
for(int i = 0; i < numPoints; ++i)
{
printf("point.a: %f, point.b: %f ************************\n",cpuPointArray[i].a,cpuPointArray[i].b);
printf("cuda mem location point.c: %d point.d: %d\n",&cpuPointArray[i].c, &cpuPointArray[i].d);
}
// launch kernel
testKernel<<<gpuGridSize,gpuBlockSize>>>(gpuPointArray);
printf("returned the struct from the kernel\n");
err = cudaMemcpy(outPointArray,gpuPointArray,numBytes, cudaMemcpyDeviceToHost);
checkerror(err, "copying gpuPointArray to cpuPointArray");
printf("after gpu copy to cpu\n");
for (int k=0; k<16; k++){
printf("creating memory on cpu for array c\n");
outPointArray[k].c = (float*)malloc(16*sizeof(float));
printf("creating memory on cpu for array d\n");
outPointArray[k].d = (float*)malloc(16*sizeof(float));
printf("copying memory values onto cpu array c\n");
err = cudaMemcpy(outPointArray[k].c, cpuPointArray[k].c, 16*sizeof(float), cudaMemcpyDeviceToHost);
checkerror(err, "copy array c from gpu to cpu");
printf("copying memory values onto cpu array c\n");
err = cudaMemcpy(outPointArray[k].d, cpuPointArray[k].d, 16*sizeof(float), cudaMemcpyDeviceToHost);
checkerror(err, "copy array d from gpu to cpu");
printf("bottom of loop %d\n", k);
}
// retrieve the results
printf("testKernel results:\n");
for(int i = 0; i < numPoints; ++i)
{
printf("point.a: %f, point.b: %f ************************\n",outPointArray[i].a,outPointArray[i].b);
for (int j=0; j<16; j++){
printf("point.c: %f point.d: %f\n",outPointArray[i].c[j], outPointArray[i].d[j]);
}
}
// deallocate memory
free(cpuPointArray);
cudaFree(gpuPointArray);
return 0;
}
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
浮动cData[]
{
1.
2.
3.
4.
5.
6.
7.
8.
9,
10,
11,
12,
13,
14,
15,
16
};
浮点dData[]
{
1.
2.
3.
4.
5.
6.
7.
8.
9,
10,
11,
12,
13,
14,
15,
16
};
类型定义结构
{
浮子a、b;
浮点数*c;
浮动*d;
}点;
__全局无效测试内核(点*p){
int i=blockIdx.x*blockDim.x+threadIdx.x;
p[i].a=1.1;
p[i].b=2.2;
p[i].c[0]=3.3;
}
无效检查错误(cudaError\u t error,char*descrp){
如果(错误!=0){
printf(“%s错误代码:%d\n”,描述,错误);
}
}
外部“C”int main()
{
printf(“启动gpuCode\n”);
国际开发署;
//设定点数
int numPoints=16,
gpuBlockSize=4,
pointSize=sizeof(点),
numBytes=numPoints*pointSize,
gpuGridSize=numPoints/gpuBlockSize;
cudaError\u t err=cudaSuccess;
printf(“初始化变量\n”);
//分配内存
点*cpuPointArray,
*gpuPointArray,
*输出点阵列;
cpuPointArray=(point*)malloc(numBytes);//在cpu上创建cpuPointArray结构
outPointArray=(point*)malloc(numBytes);//在cpu上创建outPointArray结构
printf(“使用默认值加载cpuPointArray结构\n”);
对于(int k=0;k,您可能将结构数组错误地复制到设备。请尝试更改:
err = cudaMemcpy(gpuPointArray,cpuPointArray,sizeof(cpuPointArray), cudaMemcpyHostToDevice);
将来
err = cudaMemcpy(gpuPointArray,cpuPointArray,numBytes, cudaMemcpyHostToDevice);
由于cpuPointArray的类型为point*,因此sizeof(cpuPointArray)将实际返回计算机上指针的大小。您需要的是结构的完整数组的大小。事实上,从设备复制回时,您甚至可以通过以下方式正确执行此操作:
err = cudaMemcpy(outPointArray,gpuPointArray,numBytes, cudaMemcpyDeviceToHost);
希望这能有所帮助!您可能将结构数组复制到设备中的操作不正确。请尝试更改:
err = cudaMemcpy(gpuPointArray,cpuPointArray,sizeof(cpuPointArray), cudaMemcpyHostToDevice);
将来
err = cudaMemcpy(gpuPointArray,cpuPointArray,numBytes, cudaMemcpyHostToDevice);
由于cpuPointArray的类型为point*,因此sizeof(cpuPointArray)将实际返回计算机上指针的大小。您需要的是结构的完整数组的大小。事实上,从设备复制回时,您甚至可以通过以下方式正确执行此操作:
err = cudaMemcpy(outPointArray,gpuPointArray,numBytes, cudaMemcpyDeviceToHost);
希望有帮助