Memory management cuda内核是否受每个线程/块的内存使用限制
我有一个正确执行的内核代码 可运行代码Memory management cuda内核是否受每个线程/块的内存使用限制,memory-management,cuda,Memory Management,Cuda,我有一个正确执行的内核代码 可运行代码 __global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber) { //const Integer TID = CudaGetTargetID(); const Integer ID =CudaGetTargetID(); /*if(ID >= ComputeParticleNumber) { return ;
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
//const Integer TID = CudaGetTargetID();
const Integer ID =CudaGetTargetID();
/*if(ID >= ComputeParticleNumber)
{
return ;
}*/
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction.x = 0;
NearestDistance.Direction.y = 0;
NearestDistance.Direction.z = 0;//make_Scalar3(0,0,0);
//if(c_daOutputParticleID[ID] < -1)
//{
// c_daSTLDistance[ID] = NearestDistance;
// c_daSTLID[ID] = NearestID;
// return;
//}
//Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CIDX, CIDY, CIDZ;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&c_daParticlePosition[ID],CIDX, CIDY, CIDZ);
if(CID >=0 && CID < c_CellNum)
{
//Integer Range = 1;
for(Integer k = -1; k <= 1; ++k)
{
for(Integer j = -1; j <= 1; ++j)
{
for(Integer i = -1; i <= 1; ++i)
{
Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
if(MCID < 0 || MCID >= c_CellNum)
{
continue;
}
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
for(unsigned int l = 0; l < TriangleNum; ++l)
{
TriangleID = c_daCell[MCID].m_TriangleID[l];
/*if(c_daTrianglesParameters[c_daTriangles[TriangleID].ModelIDNumber].isDrag)
{
continue;
}*/
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
{
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &c_daParticlePosition[ID], &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
{
NearestDistance = Distance;
NearestID = TriangleID;
}
}
}
}
}
}
}
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
}
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
//const Integer TID = CudaGetTargetID();
const Integer ID =CudaGetTargetID();
/*if(ID >= ComputeParticleNumber)
{
return ;
}*/
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction.x = 0;
NearestDistance.Direction.y = 0;
NearestDistance.Direction.z = 0;//make_Scalar3(0,0,0);
//if(c_daOutputParticleID[ID] < -1)
//{
// c_daSTLDistance[ID] = NearestDistance;
// c_daSTLID[ID] = NearestID;
// return;
//}
//Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CIDX, CIDY, CIDZ;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&c_daParticlePosition[ID],CIDX, CIDY, CIDZ);
int len=0;
int td[100];
for(int m=0;m<100;m++)
{
td[m]=-1;
}
if(CID >=0 && CID < c_CellNum)
{
//Integer Range = 1;
for(Integer k = -1; k <= 1; ++k)
{
for(Integer j = -1; j <= 1; ++j)
{
for(Integer i = -1; i <= 1; ++i)
{
Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
if(MCID < 0 || MCID >= c_CellNum)
{
continue;
}
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
bool flag = false;
//len=len+TriangleNum ;
for(unsigned int l = 0; l < TriangleNum; ++l)
{
TriangleID = c_daCell[MCID].m_TriangleID[l];
//tem[l] = c_daCell[MCID].m_TriangleID[l];
for(int m=0;m<100;m++)
{
if(TriangleID ==td[m])
{
flag= true;
}
if(flag == true)
break;
}
if(flag == true)
continue;
else
{
td[len] = TriangleID;
len= len+1;
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
{
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &c_daParticlePosition[ID], &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
{
NearestDistance = Distance;
NearestID = TriangleID;
}
}
}
}
}
}
}
}
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
}
\uuuuu全局\uuuu静态无效CalCstlInstance\u内核(整数ComputeParticleNumber)
{
//常量整数TID=cudagetargetid();
const Integer ID=cudagetargetid();
/*如果(ID>=ComputeParticleNumber)
{
返回;
}*/
距离最近距离;
整数NearestID=-1;
最近距离。震级=1e8;
最近距离。方向。x=0;
最近距离。方向。y=0;
最近距离.Direction.z=0;//make_Scalar3(0,0,0);
//如果(c_daOutputParticleID[ID]<-1)
//{
//c_dastldance[ID]=最近距离;
//c_daSTLID[ID]=NearestID;
//返回;
//}
//Scalar3 TargetPosition=c_daParticlePosition[ID];
整数三角形;
整数CIDX,CIDY,CIDZ;
整数CID=GetCellID(&CONSTANT_BOUNDINGBOX,&c_daParticlePosition[ID],CIDX,CIDY,CIDZ);
如果(CID>=0&&CID 对于(整数k=-1;k请修复第二个代码示例中的代码格式。这在发布时极难阅读。我有一个数组,并尝试将以前未计算的三角形ID插入数组末尾,因此会检查此数组以确定当前三角形ID是否在数组中。如果它已经在数组中,则跳过c计算。这段代码在三角形较少的情况下运行良好,但如果三角形增加,则会产生未知错误。在第二段代码中,每个线程分配100
ints
。这取决于内核启动配置和硬件,可能需要太多内存。我使用的是gtx 780 gpu。这是不是100个整数的位置超过了线程内存限制。每个线程的最大限制是多少???@JackOLantern:线程的数量应该是不相关的。本地内存保留是(我相信从CUDA 6开始)对目标GPU支持的最大线程数以及从特定模块加载的最大本地内存占用量进行了先验计算。