C 在处理笛卡尔网格的MPI程序中解决死锁的问题

C 在处理笛卡尔网格的MPI程序中解决死锁的问题,c,mpi,deadlock,C,Mpi,Deadlock,我正在实现cannon的算法。我使用4个处理器运行它。我进入循环时遇到了死锁: for (i=0; i<dims[0]; i++) { Multiply(nlocal, a, b, c); MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); MPI_Sendrecv_replace(b, nlocal*nlo

我正在实现cannon的算法。我使用4个处理器运行它。我进入循环时遇到了死锁:

   for (i=0; i<dims[0]; i++) { 
    Multiply(nlocal, a, b, c); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 1, downrank, 1, comm_2d, &status); 
  } 

有两个小问题可以解决:

  • 排队:

    a = calloc(n*n, sizeof(double));
    b = calloc(n*n, sizeof(double));
    c = calloc(n*n, sizeof(double));
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
n
应在分配
a
之前广播。否则,
n
未初始化,输出未定义。它可以触发分段错误

  • 在函数中,第三个参数是位移:负表示向下,正表示向上。我改变了它,为每个人设置了相同的位移,效果很好。即使使用,进程接收的消息数也必须与发送到此进程的消息数匹配。调用
    MPI\u Sendrecv\u replace()
    时可能不是这种情况:

在中,它略有不同:

 C compute shift source and destination
    CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
                        dest, ierr)
  C skew array
    CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
                              source, 0, comm, status,
                              ierr)
在这种情况下,每条线中的所有过程都会得到相同的位移。因此,每个进程应该发送一条消息,每个进程应该接收一条消息。然而,位移取决于直线,矩阵是倾斜的

下面是生成的代码。它由mpicc main.c-o main-lm-Wall编译,并由mpirun-np4 main运行:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"

int main(int argc, char **argv) 
{

    MPI_Init(&argc, &argv);   

    double* a,*b,*c;
    int i, t, n;
    int nlocal; 
    int npes, dims[2], periods[2]; 
    int myrank, my2drank, mycoords[2]; 
    int uprank, downrank, leftrank, rightrank; 
    int shiftsource, shiftdest; 
    MPI_Status status; 
    MPI_Comm comm_2d; 


    MPI_Comm_size(MPI_COMM_WORLD, &npes); 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Barrier(MPI_COMM_WORLD);

    t = -MPI_Wtime();


    if (myrank == 0) {

        int sizeA,sizeB;
        printf("Reading \n");
        //  a = readMatrix(argv[1], &sizeA);
        sizeA=16;
        a=malloc(sizeA*sizeA*sizeof(double));
        //  b = readMatrix(argv[2], &sizeB);
        sizeB=16;
        b=malloc(sizeB*sizeB*sizeof(double));
        printf("Reading \n");
        c = calloc(sizeA*sizeB, sizeof(double));
        n = sizeA;
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        if (sizeA != sizeB) {
            printf("Matrix not sized n^2\n");
            MPI_Abort(MPI_COMM_WORLD, 0);
        }


    }
    else {
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
        a = calloc(n*n, sizeof(double));
        b = calloc(n*n, sizeof(double));
        c = calloc(n*n, sizeof(double));

        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    }


    dims[0] = dims[1] = sqrt(npes); 




    periods[0] = periods[1] = 1; 


    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); 


    MPI_Comm_rank(comm_2d, &my2drank); 
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); 


    MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); 
    MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank); 


    nlocal = n/dims[0]; 

    MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest); 
    // MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status); 

    // MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
    MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status); 

    printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
    for (i=0; i<dims[0]; i++) { 
        //  Multiply(nlocal, a, b, c); 

        MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 


        MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status); 
    } 
    printf("rank[%d] has left loop\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    // MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); 
    MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status); 

    MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest); 
    //MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status); 




    printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    if (myrank == 0) {
        t += MPI_Wtime();
        //  writeMatrix(c, argv[3], n); 
        printf("Finshed in %d second(s)\n",t);
    }

    free(a); free(b); free(c);

    MPI_Comm_free(&comm_2d); 

    MPI_Finalize();

    return 0;
}
#包括
#包括
#包括
#包括“mpi.h”
int main(int argc,字符**argv)
{
MPI_Init(&argc,&argv);
双*a、*b、*c;
int i,t,n;
国际非本地;
国际净现值、dims[2],期间[2];
int-myrank、my2drank、mycoords[2];
int上列、下列、左列、右列;
int shiftsource,shiftdest;
MPI_状态;
MPI_通信2d;
MPI通信大小(MPI通信世界和NPE);
MPI_Comm_rank(MPI_Comm_WORLD和myrank);
MPI_屏障(MPI_通信世界);
t=-MPI_Wtime();
如果(myrank==0){
国际标准化组织,标准化组织;
printf(“读取”\n);
//a=读取矩阵(argv[1],&sizeA);
sizeA=16;
a=malloc(sizeA*sizeA*sizeof(双));
//b=读取矩阵(argv[2],&sizeB);
sizeB=16;
b=malloc(sizeB*sizeB*sizeof(双));
printf(“读取”\n);
c=calloc(sizeA*sizeB,sizeof(双));
n=sizeA;
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(a、n*n、MPI_双精度、0、MPI_通信世界);
MPI_Bcast(b,n*n,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(c,n*n,MPI_双精度,0,MPI_通信世界);
if(sizeA!=sizeB){
printf(“矩阵未调整大小n^2\n”);
MPI_中止(MPI_通信世界,0);
}
}
否则{
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);//n应在分配前广播
a=calloc(n*n,sizeof(双));
b=calloc(n*n,sizeof(双));
c=calloc(n*n,sizeof(双));
MPI_Bcast(a、n*n、MPI_双精度、0、MPI_通信世界);
MPI_Bcast(b,n*n,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(c,n*n,MPI_双精度,0,MPI_通信世界);
}
dims[0]=dims[1]=sqrt(npes);
期间[0]=期间[1]=1;
MPI_Cart_create(MPI_COMM_WORLD、2、dims、句点、1和COMM_2d);
MPI_通信等级(通信2d和My2D等级);
MPI_Cart_coords(comm_2d、my2drank、2、MyCords);
MPI车移位(通信2d、0、-1、&rightrank和&leftrank);
MPI车轮班(通信2d、1、-1、降级和上排);
nlocal=n/dims[0];
MPI车轮班(通信2d、0、-1、&shiftsource和&shiftdest);
//MPI_Cart_shift(comm_2d,0,-mycoords[0],&shiftsource,&shiftdest);
MPI_Sendrecv_replace(a、nlocal*nlocal、MPI_DOUBLE、移位测试、5、移位源、5、通信2d和状态);
//MPI车轮班(通信2d,1,-mycoords[1],&shiftsource,&shiftdest);
MPI车轮班(通信2d、1、-1、移位源和移位测试);
MPI_Sendrecv_replace(b,nlocal*nlocal,MPI_DOUBLE,shiftdest,6,shiftsource,6,comm_2d,&status);
printf(“秩[%d]已进入循环dim%d\n”,myrank,dims[0]);fflush(stdout);

对于(i=0;i而言,有两个小问题需要解决:

  • 排队:

    a = calloc(n*n, sizeof(double));
    b = calloc(n*n, sizeof(double));
    c = calloc(n*n, sizeof(double));
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
n
应在分配
a
之前广播。否则,
n
未初始化且输出未定义。它可能触发分段错误

  • 在函数中,第三个参数是置换:负表示向下,正表示向上。我将其更改为为为每个人设置相同的置换,效果很好。即使使用了,进程接收的消息数也必须与发送到此进程的消息数相匹配。调用
    MPI\u Sendrecv\u replace()

在中,它略有不同:

 C compute shift source and destination
    CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
                        dest, ierr)
  C skew array
    CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
                              source, 0, comm, status,
                              ierr)
在这种情况下,每行中的所有进程都会得到相同的位移。因此,每个进程都应该发送一条消息,每个进程都应该接收一条消息。然而,位移取决于行,矩阵是倾斜的

下面是生成的代码。它由mpicc main.c-o main-lm-Wall编译,并由mpirun-np 4 main运行:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"

int main(int argc, char **argv) 
{

    MPI_Init(&argc, &argv);   

    double* a,*b,*c;
    int i, t, n;
    int nlocal; 
    int npes, dims[2], periods[2]; 
    int myrank, my2drank, mycoords[2]; 
    int uprank, downrank, leftrank, rightrank; 
    int shiftsource, shiftdest; 
    MPI_Status status; 
    MPI_Comm comm_2d; 


    MPI_Comm_size(MPI_COMM_WORLD, &npes); 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Barrier(MPI_COMM_WORLD);

    t = -MPI_Wtime();


    if (myrank == 0) {

        int sizeA,sizeB;
        printf("Reading \n");
        //  a = readMatrix(argv[1], &sizeA);
        sizeA=16;
        a=malloc(sizeA*sizeA*sizeof(double));
        //  b = readMatrix(argv[2], &sizeB);
        sizeB=16;
        b=malloc(sizeB*sizeB*sizeof(double));
        printf("Reading \n");
        c = calloc(sizeA*sizeB, sizeof(double));
        n = sizeA;
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        if (sizeA != sizeB) {
            printf("Matrix not sized n^2\n");
            MPI_Abort(MPI_COMM_WORLD, 0);
        }


    }
    else {
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
        a = calloc(n*n, sizeof(double));
        b = calloc(n*n, sizeof(double));
        c = calloc(n*n, sizeof(double));

        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    }


    dims[0] = dims[1] = sqrt(npes); 




    periods[0] = periods[1] = 1; 


    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); 


    MPI_Comm_rank(comm_2d, &my2drank); 
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); 


    MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); 
    MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank); 


    nlocal = n/dims[0]; 

    MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest); 
    // MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status); 

    // MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
    MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status); 

    printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
    for (i=0; i<dims[0]; i++) { 
        //  Multiply(nlocal, a, b, c); 

        MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 


        MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status); 
    } 
    printf("rank[%d] has left loop\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    // MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); 
    MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status); 

    MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest); 
    //MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status); 




    printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    if (myrank == 0) {
        t += MPI_Wtime();
        //  writeMatrix(c, argv[3], n); 
        printf("Finshed in %d second(s)\n",t);
    }

    free(a); free(b); free(c);

    MPI_Comm_free(&comm_2d); 

    MPI_Finalize();

    return 0;
}
#包括
#包括
#包括
#包括“mpi.h”
int main(int argc,字符**argv)
{
MPI_Init(&argc,&argv);
双*a、*b、*c;
int i,t,n;
国际非本地;
国际净现值、dims[2],期间[2];
int-myrank、my2drank、mycoords[2];
int上列、下列、左列、右列;
int shiftsource,shiftdest;
MPI_状态;
MPI_通信2d;
MPI通信大小(MPI通信世界和NPE);
MPI_Comm_rank(MPI_Comm_WORLD和myrank);
MPI_屏障(MPI_通信世界);
t=-MPI_Wtime();
如果(myrank==0){
国际标准化组织,标准化组织;
printf(“读取”\n);
//a=读取矩阵(argv[1],&sizeA);
sizeA=16;
a=malloc(sizeA*sizeA*sizeof(双));
//b=读取矩阵(argv[2],&sizeB);
sizeB=16;
b=malloc(sizeB*sizeB*sizeof(双));
printf(“读取”\n);
c=calloc(sizeA*sizeB,sizeof(双));
n=sizeA;
MPI_Bcast(&n,1,MPI_INT,0,MPI_CO)