C++ 对于具有CUDA的嵌套循环
我对一些嵌套循环有问题,我必须将它们从C/C++转换成CUDA。基本上,我有4个for嵌套循环,它们共享相同的数组并进行位移位操作C++ 对于具有CUDA的嵌套循环,c++,c,for-loop,cuda,parallel-processing,C++,C,For Loop,Cuda,Parallel Processing,我对一些嵌套循环有问题,我必须将它们从C/C++转换成CUDA。基本上,我有4个for嵌套循环,它们共享相同的数组并进行位移位操作 #define N 65536 // ---------------------------------------------------------------------------------- int a1,a2,a3,a4, i1,i2,i3,i4; int Bit4CBitmapLookUp[16] = {0, 1, 3, 3, 7, 7, 7,
#define N 65536
// ----------------------------------------------------------------------------------
int a1,a2,a3,a4, i1,i2,i3,i4;
int Bit4CBitmapLookUp[16] = {0, 1, 3, 3, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15};
int _cBitmapLookupTable[N];
int s = 0; // index into the cBitmapLookupTable
for (i1 = 0; i1 < 16; i1++)
{
// first customer
a1 = Bit4CBitmapLookUp[i1] << 12;
for (i2 = 0; i2 < 16; i2++)
{
// second customer
a2 = Bit4CBitmapLookUp[i2] << 8;
for (i3 = 0; i3 < 16; i3++)
{
// third customer
a3 = Bit4CBitmapLookUp[i3] << 4;
for (i4 = 0;i4 < 16;i4++)
{
// fourth customer
a4 = Bit4CBitmapLookUp[i4];
// now actually set the sBitmapLookupTable value
_cBitmapLookupTable[s] = a1 | a2 | a3 | a4;
s++;
} // for i4
} // for i3
} // for i2
} // for i1
#定义N 65536
// ----------------------------------------------------------------------------------
int a1、a2、a3、a4、i1、i2、i3、i4;
int Bit4CBitmapLookUp[16]={0,1,3,3,7,7,7,7,15,15,15,15,15,15};
intcbitmaplookuptable[N];
int s=0;//索引到cBitmapLookupTable
对于(i1=0;i1<16;i1++)
{
//第一位顾客
a1=Bit4CBitmapLookUp[i1]因为leftaroundabout已经指出初始化有问题。我建议您按照如下方式重写程序
int i1 = blockDim.x * blockIdx.x + threadIdx.x;
int i2 = blockDim.y * blockIdx.y + threadIdx.y;
int i3;
int i4;
while(i1 < N && i2 < N){
a1 = ..;
a2 = ..;
for(i3 = i4 = 0; i3 < N, i4 < N; i3++, i4++){
// third customer
a3 = Bit4CBitmapLookUp_device[i3] << 4;
// fourth customer
a4 = Bit4CBitmapLookUp_device[i4];
// now actually set the sBitmapLookupTable value
_cBitmapLookupTable[s] = a1 | a2 | a3 | a4;
s ++;
}
s += blockDim.x*gridDim.x*blockDim.y*gridDim.y;
i1 += blockDim.x*gridDim.x;
i2 += blockDim.y*gridDim.y;
}
int i1=blockDim.x*blockIdx.x+threadIdx.x;
int i2=blockDim.y*blockIdx.y+threadIdx.y;
int i3;
int i4;
而(i1 a3=Bit4CBitmapLookUp\u设备[i3]提示:您正在将变量i1
..i4
初始化为从未使用过的值。请参见此->但是,索引i3和i4如何?我是否应该将它们声明为正常整数?抱歉,我认为它们显然是正常整数。更新了我的答案。
int i1 = blockDim.x * blockIdx.x + threadIdx.x;
int i2 = blockDim.y * blockIdx.y + threadIdx.y;
int i3;
int i4;
while(i1 < N && i2 < N){
a1 = ..;
a2 = ..;
for(i3 = i4 = 0; i3 < N, i4 < N; i3++, i4++){
// third customer
a3 = Bit4CBitmapLookUp_device[i3] << 4;
// fourth customer
a4 = Bit4CBitmapLookUp_device[i4];
// now actually set the sBitmapLookupTable value
_cBitmapLookupTable[s] = a1 | a2 | a3 | a4;
s ++;
}
s += blockDim.x*gridDim.x*blockDim.y*gridDim.y;
i1 += blockDim.x*gridDim.x;
i2 += blockDim.y*gridDim.y;
}