Cuda 库达雅可比松弛

Cuda 库达雅可比松弛,cuda,gpu,Cuda,Gpu,我正在将这个顺序计算映射到CUDA计算。该计算是NxN网格上的二维雅可比松弛,其中N未知。N可被32整除 Jacobi(float *a,float *b,int N){ for (i=1; i<N+1; i++){ for (j=1; j<N+1; j++) { a[i][j]=0.8*(b[i+1][j]+b[i+1][j]+b[i][j+1]+b[i][j+1]); } } } 雅各比(浮点*a,浮点*b,整数N){ fo

我正在将这个顺序计算映射到CUDA计算。该计算是NxN网格上的二维雅可比松弛,其中N未知。N可被32整除

Jacobi(float *a,float *b,int N){
   for (i=1; i<N+1; i++){
      for (j=1; j<N+1; j++) {
         a[i][j]=0.8*(b[i+1][j]+b[i+1][j]+b[i][j+1]+b[i][j+1]);
      }
   }
}
雅各比(浮点*a,浮点*b,整数N){
for(i=1;ifor)这是一个简单的实现。您可以对这个内核函数使用共享内存优化

__global__ void jacobi(int* a, const int* b,const int N)
{
  int i= blockIdx.x * blockDim.x + threadIdx.x;
  int j = blockIdx.y * blockDim.y + threadIdx.y;
  if (i<N && j<N)
  {
    a[j*N+i] = 0.8* (2*b[(i+1)+j*N] + 2*b[i+N*(j+1)]);
  }
}
\uuuuu全局\uuuuu无效雅可比(int*a,const int*b,const int N)
{
int i=blockIdx.x*blockDim.x+threadIdx.x;
int j=blockIdx.y*blockDim.y+threadIdx.y;

if(iforThis)是一个简单的实现。您可以对这个内核函数使用共享内存优化

__global__ void jacobi(int* a, const int* b,const int N)
{
  int i= blockIdx.x * blockDim.x + threadIdx.x;
  int j = blockIdx.y * blockDim.y + threadIdx.y;
  if (i<N && j<N)
  {
    a[j*N+i] = 0.8* (2*b[(i+1)+j*N] + 2*b[i+N*(j+1)]);
  }
}
\uuuuu全局\uuuuu无效雅可比(int*a,const int*b,const int N)
{
int i=blockIdx.x*blockDim.x+threadIdx.x;
int j=blockIdx.y*blockDim.y+threadIdx.y;
如果(i或者,如果您想使用“数组的数组”而不是数组:

__global__ void Jacobi(int** a, const int** b,const int N)
{
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  int j = blockIdx.y * blockDim.y + threadIdx.y;
  if (i<N && j<N)
  {
    a[i][j]=0.8*(b[i+1][j]+b[i+1][j]+b[i][j+1]+b[i][j+1]);
  }
}
\uuuuu全局\uuuuu无效雅可比(整数**a,常数整数**b,常数整数N)
{
int i=blockIdx.x*blockDim.x+threadIdx.x;
int j=blockIdx.y*blockDim.y+threadIdx.y;
如果(i或者,如果您想使用“数组的数组”而不是数组:

__global__ void Jacobi(int** a, const int** b,const int N)
{
  int i = blockIdx.x * blockDim.x + threadIdx.x;
  int j = blockIdx.y * blockDim.y + threadIdx.y;
  if (i<N && j<N)
  {
    a[i][j]=0.8*(b[i+1][j]+b[i+1][j]+b[i][j+1]+b[i][j+1]);
  }
}
\uuuuu全局\uuuuu无效雅可比(整数**a,常数整数**b,常数整数N)
{
int i=blockIdx.x*blockDim.x+threadIdx.x;
int j=blockIdx.y*blockDim.y+threadIdx.y;

如果(iI)方程是正确的?
b[i+1][j]+b[i+1][j]+b[i][j+1][i][j+code>与
2*b[i+1][j]+2*b[i][j+1][j]相同。
b[i+1][j]+b[i][i][j]+b[i][i][j][j+1]
2*b[i+1][i][j+1][i][j]相同,这些方程是正确的。