C 在处理笛卡尔网格的MPI程序中解决死锁的问题_C_Mpi_Deadlock

C 在处理笛卡尔网格的MPI程序中解决死锁的问题

c mpi

C 在处理笛卡尔网格的MPI程序中解决死锁的问题,c,mpi,deadlock,C,Mpi,Deadlock,我正在实现cannon的算法。我使用4个处理器运行它。我进入循环时遇到了死锁： for (i=0; i<dims[0]; i++) { Multiply(nlocal, a, b, c); MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); MPI_Sendrecv_replace(b, nlocal*nlo

我正在实现cannon的算法。我使用4个处理器运行它。我进入循环时遇到了死锁：

   for (i=0; i<dims[0]; i++) { 
    Multiply(nlocal, a, b, c); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 1, downrank, 1, comm_2d, &status); 
  }

有两个小问题可以解决：

排队：

a = calloc(n*n, sizeof(double));
b = calloc(n*n, sizeof(double));
c = calloc(n*n, sizeof(double));
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);

应在分配

之前广播。否则，

未初始化，输出未定义。它可以触发分段错误

在函数中，第三个参数是位移：负表示向下，正表示向上。我改变了它，为每个人设置了相同的位移，效果很好。即使使用，进程接收的消息数也必须与发送到此进程的消息数匹配。调用
```
MPI\u Sendrecv\u replace（）
```
时可能不是这种情况：

在中，它略有不同：

 C compute shift source and destination
    CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
                        dest, ierr)
  C skew array
    CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
                              source, 0, comm, status,
                              ierr)

在这种情况下，每条线中的所有过程都会得到相同的位移。因此，每个进程应该发送一条消息，每个进程应该接收一条消息。然而，位移取决于直线，矩阵是倾斜的

下面是生成的代码。它由mpicc main.c-o main-lm-Wall编译，并由mpirun-np4 main运行：

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"

int main(int argc, char **argv) 
{

    MPI_Init(&argc, &argv);   

    double* a,*b,*c;
    int i, t, n;
    int nlocal; 
    int npes, dims[2], periods[2]; 
    int myrank, my2drank, mycoords[2]; 
    int uprank, downrank, leftrank, rightrank; 
    int shiftsource, shiftdest; 
    MPI_Status status; 
    MPI_Comm comm_2d; 


    MPI_Comm_size(MPI_COMM_WORLD, &npes); 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Barrier(MPI_COMM_WORLD);

    t = -MPI_Wtime();


    if (myrank == 0) {

        int sizeA,sizeB;
        printf("Reading \n");
        //  a = readMatrix(argv[1], &sizeA);
        sizeA=16;
        a=malloc(sizeA*sizeA*sizeof(double));
        //  b = readMatrix(argv[2], &sizeB);
        sizeB=16;
        b=malloc(sizeB*sizeB*sizeof(double));
        printf("Reading \n");
        c = calloc(sizeA*sizeB, sizeof(double));
        n = sizeA;
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        if (sizeA != sizeB) {
            printf("Matrix not sized n^2\n");
            MPI_Abort(MPI_COMM_WORLD, 0);
        }


    }
    else {
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
        a = calloc(n*n, sizeof(double));
        b = calloc(n*n, sizeof(double));
        c = calloc(n*n, sizeof(double));

        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    }


    dims[0] = dims[1] = sqrt(npes); 




    periods[0] = periods[1] = 1; 


    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); 


    MPI_Comm_rank(comm_2d, &my2drank); 
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); 


    MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); 
    MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank); 


    nlocal = n/dims[0]; 

    MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest); 
    // MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status); 

    // MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
    MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status); 

    printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
    for (i=0; i<dims[0]; i++) { 
        //  Multiply(nlocal, a, b, c); 

        MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 


        MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status); 
    } 
    printf("rank[%d] has left loop\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    // MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); 
    MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status); 

    MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest); 
    //MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status); 




    printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    if (myrank == 0) {
        t += MPI_Wtime();
        //  writeMatrix(c, argv[3], n); 
        printf("Finshed in %d second(s)\n",t);
    }

    free(a); free(b); free(c);

    MPI_Comm_free(&comm_2d); 

    MPI_Finalize();

    return 0;
}

#包括
#包括
#包括
#包括“mpi.h”
int main（int argc，字符**argv）
{
MPI_Init（&argc，&argv）；
双*a、*b、*c；
int i，t，n；
国际非本地；
国际净现值、dims[2]，期间[2]；
int-myrank、my2drank、mycoords[2]；
int上列、下列、左列、右列；
int shiftsource，shiftdest；
MPI_状态；
MPI_通信2d；
MPI通信大小（MPI通信世界和NPE）；
MPI_Comm_rank（MPI_Comm_WORLD和myrank）；
MPI_屏障（MPI_通信世界）；
t=-MPI_Wtime（）；
如果（myrank==0）{
国际标准化组织，标准化组织；
printf（“读取”\n）；
//a=读取矩阵（argv[1]，&sizeA）；
sizeA=16；
a=malloc（sizeA*sizeA*sizeof（双））；
//b=读取矩阵（argv[2]，&sizeB）；
sizeB=16；
b=malloc（sizeB*sizeB*sizeof（双））；
printf（“读取”\n）；
c=calloc（sizeA*sizeB，sizeof（双））；
n=sizeA；
MPI_Bcast（&n，1，MPI_INT，0，MPI_COMM_WORLD）；
MPI_Bcast（a、n*n、MPI_双精度、0、MPI_通信世界）；
MPI_Bcast（b，n*n，MPI_DOUBLE，0，MPI_COMM_WORLD）；
MPI_Bcast（c，n*n，MPI_双精度，0，MPI_通信世界）；
if（sizeA！=sizeB）{
printf（“矩阵未调整大小n^2\n”）；
MPI_中止（MPI_通信世界，0）；
}
}
否则{
MPI_Bcast（&n，1，MPI_INT，0，MPI_COMM_WORLD）；//n应在分配前广播
a=calloc（n*n，sizeof（双））；
b=calloc（n*n，sizeof（双））；
c=calloc（n*n，sizeof（双））；
MPI_Bcast（a、n*n、MPI_双精度、0、MPI_通信世界）；
MPI_Bcast（b，n*n，MPI_DOUBLE，0，MPI_COMM_WORLD）；
MPI_Bcast（c，n*n，MPI_双精度，0，MPI_通信世界）；
}
dims[0]=dims[1]=sqrt（npes）；
期间[0]=期间[1]=1；
MPI_Cart_create（MPI_COMM_WORLD、2、dims、句点、1和COMM_2d）；
MPI_通信等级（通信2d和My2D等级）；
MPI_Cart_coords（comm_2d、my2drank、2、MyCords）；
MPI车移位（通信2d、0、-1、&rightrank和&leftrank）；
MPI车轮班（通信2d、1、-1、降级和上排）；
nlocal=n/dims[0]；
MPI车轮班（通信2d、0、-1、&shiftsource和&shiftdest）；
//MPI_Cart_shift（comm_2d，0，-mycoords[0]，&shiftsource，&shiftdest）；
MPI_Sendrecv_replace（a、nlocal*nlocal、MPI_DOUBLE、移位测试、5、移位源、5、通信2d和状态）；
//MPI车轮班（通信2d，1，-mycoords[1]，&shiftsource，&shiftdest）；
MPI车轮班（通信2d、1、-1、移位源和移位测试）；
MPI_Sendrecv_replace（b，nlocal*nlocal，MPI_DOUBLE，shiftdest，6，shiftsource，6，comm_2d，&status）；
printf（“秩[%d]已进入循环dim%d\n”，myrank，dims[0]）；fflush（stdout）；
对于（i=0；i而言，有两个小问题需要解决：

排队：
a = calloc(n*n, sizeof(double));
b = calloc(n*n, sizeof(double));
c = calloc(n*n, sizeof(double));
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);



n
应在分配a
之前广播。否则，n
未初始化且输出未定义。它可能触发分段错误

在函数中，第三个参数是置换：负表示向下，正表示向上。我将其更改为为为每个人设置相同的置换，效果很好。即使使用了，进程接收的消息数也必须与发送到此进程的消息数相匹配。调用MPI\u Sendrecv\u replace（）
：


在中，它略有不同：
 C compute shift source and destination
    CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
                        dest, ierr)
  C skew array
    CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
                              source, 0, comm, status,
                              ierr)

在这种情况下，每行中的所有进程都会得到相同的位移。因此，每个进程都应该发送一条消息，每个进程都应该接收一条消息。然而，位移取决于行，矩阵是倾斜的
下面是生成的代码。它由mpicc main.c-o main-lm-Wall编译，并由mpirun-np 4 main运行：
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"

int main(int argc, char **argv) 
{

    MPI_Init(&argc, &argv);   

    double* a,*b,*c;
    int i, t, n;
    int nlocal; 
    int npes, dims[2], periods[2]; 
    int myrank, my2drank, mycoords[2]; 
    int uprank, downrank, leftrank, rightrank; 
    int shiftsource, shiftdest; 
    MPI_Status status; 
    MPI_Comm comm_2d; 


    MPI_Comm_size(MPI_COMM_WORLD, &npes); 
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 

    MPI_Barrier(MPI_COMM_WORLD);

    t = -MPI_Wtime();


    if (myrank == 0) {

        int sizeA,sizeB;
        printf("Reading \n");
        //  a = readMatrix(argv[1], &sizeA);
        sizeA=16;
        a=malloc(sizeA*sizeA*sizeof(double));
        //  b = readMatrix(argv[2], &sizeB);
        sizeB=16;
        b=malloc(sizeB*sizeB*sizeof(double));
        printf("Reading \n");
        c = calloc(sizeA*sizeB, sizeof(double));
        n = sizeA;
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        if (sizeA != sizeB) {
            printf("Matrix not sized n^2\n");
            MPI_Abort(MPI_COMM_WORLD, 0);
        }


    }
    else {
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
        a = calloc(n*n, sizeof(double));
        b = calloc(n*n, sizeof(double));
        c = calloc(n*n, sizeof(double));

        MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    }


    dims[0] = dims[1] = sqrt(npes); 




    periods[0] = periods[1] = 1; 


    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); 


    MPI_Comm_rank(comm_2d, &my2drank); 
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); 


    MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank); 
    MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank); 


    nlocal = n/dims[0]; 

    MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest); 
    // MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status); 

    // MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
    MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status); 

    printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
    for (i=0; i<dims[0]; i++) { 
        //  Multiply(nlocal, a, b, c); 

        MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); 


        MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status); 
    } 
    printf("rank[%d] has left loop\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    // MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest); 
    MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status); 

    MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest); 
    //MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest); 
    MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status); 




    printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
    MPI_Barrier(MPI_COMM_WORLD);

    if (myrank == 0) {
        t += MPI_Wtime();
        //  writeMatrix(c, argv[3], n); 
        printf("Finshed in %d second(s)\n",t);
    }

    free(a); free(b); free(c);

    MPI_Comm_free(&comm_2d); 

    MPI_Finalize();

    return 0;
}

#包括
#包括
#包括
#包括“mpi.h”
int main（int argc，字符**argv）
{
MPI_Init（&argc，&argv）；
双*a、*b、*c；
int i，t，n；
国际非本地；
国际净现值、dims[2]，期间[2]；
int-myrank、my2drank、mycoords[2]；
int上列、下列、左列、右列；
int shiftsource，shiftdest；
MPI_状态；
MPI_通信2d；
MPI通信大小（MPI通信世界和NPE）；
MPI_Comm_rank（MPI_Comm_WORLD和myrank）；
MPI_屏障（MPI_通信世界）；
t=-MPI_Wtime（）；
如果（myrank==0）{
国际标准化组织，标准化组织；
printf（“读取”\n）；
//a=读取矩阵（argv[1]，&sizeA）；
sizeA=16；
a=malloc（sizeA*sizeA*sizeof（双））；
//b=读取矩阵（argv[2]，&sizeB）；
sizeB=16；
b=malloc（sizeB*sizeB*sizeof（双））；
printf（“读取”\n）；
c=calloc（sizeA*sizeB，sizeof（双））；
n=sizeA；
MPI_Bcast（&n，1，MPI_INT，0，MPI_CO）