在cuda中使用共享内存会导致内存写入错误
我有一个内核,可以作为在cuda中使用共享内存会导致内存写入错误,cuda,shared-memory,Cuda,Shared Memory,我有一个内核,可以作为 __global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber) { const Integer TID = CudaGetTargetID(); const Integer ID = TID; if(ID >= ComputeParticleNumber) { return ; } CDistance Near
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
const Integer TID = CudaGetTargetID();
const Integer ID = TID;
if(ID >= ComputeParticleNumber)
{
return ;
}
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction = make_Scalar3(0,0,0);
if(c_daOutputParticleID[ID] < -1)
{
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
return;
}
Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CIDX, CIDY, CIDZ;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&TargetPosition,CIDX, CIDY, CIDZ);
Integer Range = 1;
if(CID >=0 && CID < c_CellNum)
{
for(Integer k = -Range; k <= Range; ++k)
{
for(Integer j = -Range; j <= Range; ++j)
{
for(Integer i = -Range; i <= Range; ++i)
{
Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
if(MCID < 0 || MCID >= c_CellNum)
{
continue;
}
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
for(unsigned int l = 0; l < TriangleNum; ++l)
{
TriangleID = c_daCell[MCID].m_TriangleID[l];
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
{
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &TargetPosition, &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
{
NearestDistance = Distance;
NearestID = TriangleID;
}
}
}
}
}
}
}
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
}
您可以在中找到有关如何使用共享内存的有用信息。特别关注静态共享内存和动态共享内存部分 根据上面的文章,您应该会发现您只是在写入数组
s
的边界之外,正如错误消息所说的那样。要解决此问题,您可以:
- 在编译时指定共享内存数组
的大小, 如果您事先知道它,例如s
\uuuuuuuuuuuuuuuuuuuuuu3s[123456]代码>
- 或者使用动态大小的
数组,这基本上就是您目前正在做的事情,但是还可以将第三个内核启动参数指定为s
。如果您将使用calcstlistance\u kernel
123456
s数组,则使用float3
int-sharedMemorySizeInBytes=123456*sizeof(float3)
s
的边界之外,正如错误消息所说的那样。要解决此问题,您可以:
- 在编译时指定共享内存数组
的大小, 如果您事先知道它,例如s
\uuuuuuuuuuuuuuuuuuuuuu3s[123456]代码>
- 或者使用动态大小的
数组,这基本上就是您目前正在做的事情,但是还可以将第三个内核启动参数指定为s
。如果您将使用calcstlistance\u kernel
123456
s数组,则使用float3
int-sharedMemorySizeInBytes=123456*sizeof(float3)
s
的边界之外,正如错误消息所说的那样。要解决此问题,您可以:
- 在编译时指定共享内存数组
的大小, 如果您事先知道它,例如s
\uuuuuuuuuuuuuuuuuuuuuu3s[123456]代码>
- 或者使用动态大小的
数组,这基本上就是您目前正在做的事情,但是还可以将第三个内核启动参数指定为s
。如果您将使用calcstlistance\u kernel
123456
s数组,则使用float3
int-sharedMemorySizeInBytes=123456*sizeof(float3)
s
的边界之外,正如错误消息所说的那样。要解决此问题,您可以:
- 在编译时指定共享内存数组
的大小, 如果您事先知道它,例如s
\uuuuuuuuuuuuuuuuuuuuuu3s[123456]代码>
- 或者使用动态大小的
数组,这基本上就是您目前正在做的事情,但是还可以将第三个内核启动参数指定为s
。如果您将使用calcstlistance\u kernel
123456
s数组,则使用float3
int-sharedMemorySizeInBytes=123456*sizeof(float3)
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
//const Integer TID = CudaGetTargetID();
const Integer ID =CudaGetTargetID();
extern __shared__ float3 s[];
/*if(ID >= ComputeParticleNumber)
{
return ;
}*/
s[ID] = c_daParticlePosition[ID];
__syncthreads();
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction.x = 0;
NearestDistance.Direction.y = 0;
NearestDistance.Direction.z = 0;//make_Scalar3(0,0,0);
//if(c_daOutputParticleID[ID] < -1)
//{
// c_daSTLDistance[ID] = NearestDistance;
// c_daSTLID[ID] = NearestID;
// return;
//}
//Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CIDX, CIDY, CIDZ;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&s[ID],CIDX, CIDY, CIDZ);
if(CID >=0 && CID < c_CellNum)
{
//Integer Range = 1;
for(Integer k = -1; k <= 1; ++k)
{
for(Integer j = -1; j <= 1; ++j)
{
for(Integer i = -1; i <= 1; ++i)
{
Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
if(MCID < 0 || MCID >= c_CellNum)
{
continue;
}
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
for(unsigned int l = 0; l < TriangleNum; ++l)
{
TriangleID = c_daCell[MCID].m_TriangleID[l];
/*if(c_daTrianglesParameters[c_daTriangles[TriangleID].ModelIDNumber].isDrag)
{
continue;
}*/
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
{
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &s[ID], &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
{
NearestDistance = Distance;
NearestID = TriangleID;
}
}
}
}
}
}
}
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
}
Invalid __shared__ write of size 4
========= at 0x00000128 in CalcSTLDistance_Kernel(int)
========= by thread (159,0,0) in block (0,0,0)
========= Address 0x0000077c is out of bounds