在CUDA中执行此内核的效率如何
我的测试函数是这样的在CUDA中执行此内核的效率如何,cuda,Cuda,我的测试函数是这样的 DIMENSION 20 POPSIZE 5000 __global__ void repairT(int* H, int* diff){ int tidx = blockDim.x * blockIdx.x + threadIdx.x; int ii = tidx * DIMENSION; //if (ii < DIMENSION * POPSIZE) //{ i
DIMENSION 20
POPSIZE 5000
__global__ void repairT(int* H, int* diff){
int tidx = blockDim.x * blockIdx.x + threadIdx.x;
int ii = tidx * DIMENSION;
//if (ii < DIMENSION * POPSIZE)
//{
int Hdiff[DIMENSION] = { 0 };
int diffcount = 0;
bool isInIndiv = false;
//complement set H
for (int i = 1; i <= DIMENSION; i++)
{
for (int j = ii; j < ii + DIMENSION; j++) //H for
{
if (i == H[j])
{
isInIndiv = isInIndiv || true;
}
}
if (isInIndiv == false)
{
Hdiff[diffcount] = i;
diffcount++;
}
else
isInIndiv = false;
}
// diff to array
int diffc = ii * DIMENSION;
for (int i = 0; i < DIMENSION; i++)
{
diff[diffc] = Hdiff[i];
diffc++;
}
//}
}
维度20
POPSIZE 5000
__全局无效修复(int*H,int*diff){
int tidx=blockDim.x*blockIdx.x+threadIdx.x;
int ii=tidx*尺寸;
//如果(ii<尺寸*尺寸)
//{
int-Hdiff[DIMENSION]={0};
int diffcount=0;
bool-isInIndiv=false;
//补集H
对于(int i=1;i(d_H,d_diff);
请给我一些建议。您访问H,而diff不是,这意味着内存单元效率不高。您希望对数据重新排序或更改代码以进行合并访问 此外,您似乎正在大量读取H[j]。您可能希望定义另一个小数组Hcache预加载它,以避免过度读取:
int Hcache[DIMENSION];
for (int j = 0; j < DIMENSION; j++) //H for
{
Hcache[j] = H[j+ii];
}
for (int i = 1; i <= DIMENSION; i++)
{
for (int j = 0; j < ii; j++) //H for
{
if (i == Hcache[j])
{
isInIndiv = isInIndiv || true;
}
}
if (isInIndiv == false)
{
Hdiff[diffcount] = i;
diffcount++;
}
else
isInIndiv = false;
}
inthcache[DIMENSION];
for(int j=0;j 对于(inti=1;i不i从ii的声明中删除维度,但现在内核有时会做错误的计算,但它在所有数组上都能工作。我很确定这并不能回答这个问题(非常糟糕)存在asked@talonmies,这是可能的。我尽力理解这个问题,并帮助Petr。我想指出重复读取相同数据非常昂贵,将此不变量移出循环将有助于提高性能。如果您对这个问题有更好的理解,请让我知道。这实际上是一个“帮助解决我的代码不起作用的问题”,但是没有足够的细节来提供答案,因为Petr询问如何有效地使用这个内核,我认为Florent的回答仍然可以帮助他,因为它指出了影响性能的两个实现方面。
int Hcache[DIMENSION];
for (int j = 0; j < DIMENSION; j++) //H for
{
Hcache[j] = H[j+ii];
}
for (int i = 1; i <= DIMENSION; i++)
{
for (int j = 0; j < ii; j++) //H for
{
if (i == Hcache[j])
{
isInIndiv = isInIndiv || true;
}
}
if (isInIndiv == false)
{
Hdiff[diffcount] = i;
diffcount++;
}
else
isInIndiv = false;
}