C 在处理笛卡尔网格的MPI程序中解决死锁的问题
我正在实现cannon的算法。我使用4个处理器运行它。我进入循环时遇到了死锁:C 在处理笛卡尔网格的MPI程序中解决死锁的问题,c,mpi,deadlock,C,Mpi,Deadlock,我正在实现cannon的算法。我使用4个处理器运行它。我进入循环时遇到了死锁: for (i=0; i<dims[0]; i++) { Multiply(nlocal, a, b, c); MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status); MPI_Sendrecv_replace(b, nlocal*nlo
for (i=0; i<dims[0]; i++) {
Multiply(nlocal, a, b, c);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 1, downrank, 1, comm_2d, &status);
}
有两个小问题可以解决:
- 排队:
a = calloc(n*n, sizeof(double)); b = calloc(n*n, sizeof(double)); c = calloc(n*n, sizeof(double)); MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
n
应在分配a
之前广播。否则,n
未初始化,输出未定义。它可以触发分段错误
- 在函数中,第三个参数是位移:负表示向下,正表示向上。我改变了它,为每个人设置了相同的位移,效果很好。即使使用,进程接收的消息数也必须与发送到此进程的消息数匹配。调用
时可能不是这种情况:MPI\u Sendrecv\u replace()
C compute shift source and destination
CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
dest, ierr)
C skew array
CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
source, 0, comm, status,
ierr)
在这种情况下,每条线中的所有过程都会得到相同的位移。因此,每个进程应该发送一条消息,每个进程应该接收一条消息。然而,位移取决于直线,矩阵是倾斜的
下面是生成的代码。它由mpicc main.c-o main-lm-Wall编译,并由mpirun-np4 main运行:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
double* a,*b,*c;
int i, t, n;
int nlocal;
int npes, dims[2], periods[2];
int myrank, my2drank, mycoords[2];
int uprank, downrank, leftrank, rightrank;
int shiftsource, shiftdest;
MPI_Status status;
MPI_Comm comm_2d;
MPI_Comm_size(MPI_COMM_WORLD, &npes);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Barrier(MPI_COMM_WORLD);
t = -MPI_Wtime();
if (myrank == 0) {
int sizeA,sizeB;
printf("Reading \n");
// a = readMatrix(argv[1], &sizeA);
sizeA=16;
a=malloc(sizeA*sizeA*sizeof(double));
// b = readMatrix(argv[2], &sizeB);
sizeB=16;
b=malloc(sizeB*sizeB*sizeof(double));
printf("Reading \n");
c = calloc(sizeA*sizeB, sizeof(double));
n = sizeA;
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (sizeA != sizeB) {
printf("Matrix not sized n^2\n");
MPI_Abort(MPI_COMM_WORLD, 0);
}
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
a = calloc(n*n, sizeof(double));
b = calloc(n*n, sizeof(double));
c = calloc(n*n, sizeof(double));
MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}
dims[0] = dims[1] = sqrt(npes);
periods[0] = periods[1] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank);
MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank);
nlocal = n/dims[0];
MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest);
// MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status);
// MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status);
printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
for (i=0; i<dims[0]; i++) {
// Multiply(nlocal, a, b, c);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status);
}
printf("rank[%d] has left loop\n", myrank);fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
// MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest);
MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status);
MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest);
//MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status);
printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if (myrank == 0) {
t += MPI_Wtime();
// writeMatrix(c, argv[3], n);
printf("Finshed in %d second(s)\n",t);
}
free(a); free(b); free(c);
MPI_Comm_free(&comm_2d);
MPI_Finalize();
return 0;
}
#包括
#包括
#包括
#包括“mpi.h”
int main(int argc,字符**argv)
{
MPI_Init(&argc,&argv);
双*a、*b、*c;
int i,t,n;
国际非本地;
国际净现值、dims[2],期间[2];
int-myrank、my2drank、mycoords[2];
int上列、下列、左列、右列;
int shiftsource,shiftdest;
MPI_状态;
MPI_通信2d;
MPI通信大小(MPI通信世界和NPE);
MPI_Comm_rank(MPI_Comm_WORLD和myrank);
MPI_屏障(MPI_通信世界);
t=-MPI_Wtime();
如果(myrank==0){
国际标准化组织,标准化组织;
printf(“读取”\n);
//a=读取矩阵(argv[1],&sizeA);
sizeA=16;
a=malloc(sizeA*sizeA*sizeof(双));
//b=读取矩阵(argv[2],&sizeB);
sizeB=16;
b=malloc(sizeB*sizeB*sizeof(双));
printf(“读取”\n);
c=calloc(sizeA*sizeB,sizeof(双));
n=sizeA;
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);
MPI_Bcast(a、n*n、MPI_双精度、0、MPI_通信世界);
MPI_Bcast(b,n*n,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(c,n*n,MPI_双精度,0,MPI_通信世界);
if(sizeA!=sizeB){
printf(“矩阵未调整大小n^2\n”);
MPI_中止(MPI_通信世界,0);
}
}
否则{
MPI_Bcast(&n,1,MPI_INT,0,MPI_COMM_WORLD);//n应在分配前广播
a=calloc(n*n,sizeof(双));
b=calloc(n*n,sizeof(双));
c=calloc(n*n,sizeof(双));
MPI_Bcast(a、n*n、MPI_双精度、0、MPI_通信世界);
MPI_Bcast(b,n*n,MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(c,n*n,MPI_双精度,0,MPI_通信世界);
}
dims[0]=dims[1]=sqrt(npes);
期间[0]=期间[1]=1;
MPI_Cart_create(MPI_COMM_WORLD、2、dims、句点、1和COMM_2d);
MPI_通信等级(通信2d和My2D等级);
MPI_Cart_coords(comm_2d、my2drank、2、MyCords);
MPI车移位(通信2d、0、-1、&rightrank和&leftrank);
MPI车轮班(通信2d、1、-1、降级和上排);
nlocal=n/dims[0];
MPI车轮班(通信2d、0、-1、&shiftsource和&shiftdest);
//MPI_Cart_shift(comm_2d,0,-mycoords[0],&shiftsource,&shiftdest);
MPI_Sendrecv_replace(a、nlocal*nlocal、MPI_DOUBLE、移位测试、5、移位源、5、通信2d和状态);
//MPI车轮班(通信2d,1,-mycoords[1],&shiftsource,&shiftdest);
MPI车轮班(通信2d、1、-1、移位源和移位测试);
MPI_Sendrecv_replace(b,nlocal*nlocal,MPI_DOUBLE,shiftdest,6,shiftsource,6,comm_2d,&status);
printf(“秩[%d]已进入循环dim%d\n”,myrank,dims[0]);fflush(stdout);
对于(i=0;i而言,有两个小问题需要解决:
- 排队:
a = calloc(n*n, sizeof(double));
b = calloc(n*n, sizeof(double));
c = calloc(n*n, sizeof(double));
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
n
应在分配a
之前广播。否则,n
未初始化且输出未定义。它可能触发分段错误
- 在函数中,第三个参数是置换:负表示向下,正表示向上。我将其更改为为为每个人设置相同的置换,效果很好。即使使用了,进程接收的消息数也必须与发送到此进程的消息数相匹配。调用
MPI\u Sendrecv\u replace()
:
在中,它略有不同:
C compute shift source and destination
CALL MPI_CART_SHIFT(comm, 0, coords(2), source,
dest, ierr)
C skew array
CALL MPI_SENDRECV_REPLACE(A, 1, MPI_REAL, dest, 0,
source, 0, comm, status,
ierr)
在这种情况下,每行中的所有进程都会得到相同的位移。因此,每个进程都应该发送一条消息,每个进程都应该接收一条消息。然而,位移取决于行,矩阵是倾斜的
下面是生成的代码。它由mpicc main.c-o main-lm-Wall编译,并由mpirun-np 4 main运行:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
double* a,*b,*c;
int i, t, n;
int nlocal;
int npes, dims[2], periods[2];
int myrank, my2drank, mycoords[2];
int uprank, downrank, leftrank, rightrank;
int shiftsource, shiftdest;
MPI_Status status;
MPI_Comm comm_2d;
MPI_Comm_size(MPI_COMM_WORLD, &npes);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Barrier(MPI_COMM_WORLD);
t = -MPI_Wtime();
if (myrank == 0) {
int sizeA,sizeB;
printf("Reading \n");
// a = readMatrix(argv[1], &sizeA);
sizeA=16;
a=malloc(sizeA*sizeA*sizeof(double));
// b = readMatrix(argv[2], &sizeB);
sizeB=16;
b=malloc(sizeB*sizeB*sizeof(double));
printf("Reading \n");
c = calloc(sizeA*sizeB, sizeof(double));
n = sizeA;
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if (sizeA != sizeB) {
printf("Matrix not sized n^2\n");
MPI_Abort(MPI_COMM_WORLD, 0);
}
}
else {
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);//n should be broadcast before allocation
a = calloc(n*n, sizeof(double));
b = calloc(n*n, sizeof(double));
c = calloc(n*n, sizeof(double));
MPI_Bcast(a, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(b, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Bcast(c, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}
dims[0] = dims[1] = sqrt(npes);
periods[0] = periods[1] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank);
MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank);
nlocal = n/dims[0];
MPI_Cart_shift(comm_2d, 0, -1, &shiftsource, &shiftdest);
// MPI_Cart_shift(comm_2d, 0, -mycoords[0], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest,5, shiftsource, 5, comm_2d, &status);
// MPI_Cart_shift(comm_2d, 1, -mycoords[1], &shiftsource, &shiftdest);
MPI_Cart_shift(comm_2d, 1, -1, &shiftsource, &shiftdest);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 6, shiftsource, 6, comm_2d, &status);
printf("rank[%d] has entered loop dim %d\n", myrank,dims[0]);fflush(stdout);
for (i=0; i<dims[0]; i++) {
// Multiply(nlocal, a, b, c);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,leftrank, 1, rightrank, 1, comm_2d, &status);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,uprank, 2, downrank, 2, comm_2d, &status);
}
printf("rank[%d] has left loop\n", myrank);fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
// MPI_Cart_shift(comm_2d, 0, +mycoords[0], &shiftsource, &shiftdest);
MPI_Cart_shift(comm_2d, 0, 1, &shiftsource, &shiftdest);
MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE,shiftdest, 3, shiftsource, 3, comm_2d, &status);
MPI_Cart_shift(comm_2d, 1, 1, &shiftsource, &shiftdest);
//MPI_Cart_shift(comm_2d, 1, +mycoords[1], &shiftsource, &shiftdest);
MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE,shiftdest, 4, shiftsource, 4, comm_2d, &status);
printf("rank[%d] has reached the barrier...\n", myrank);fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
if (myrank == 0) {
t += MPI_Wtime();
// writeMatrix(c, argv[3], n);
printf("Finshed in %d second(s)\n",t);
}
free(a); free(b); free(c);
MPI_Comm_free(&comm_2d);
MPI_Finalize();
return 0;
}
#包括
#包括
#包括
#包括“mpi.h”
int main(int argc,字符**argv)
{
MPI_Init(&argc,&argv);
双*a、*b、*c;
int i,t,n;
国际非本地;
国际净现值、dims[2],期间[2];
int-myrank、my2drank、mycoords[2];
int上列、下列、左列、右列;
int shiftsource,shiftdest;
MPI_状态;
MPI_通信2d;
MPI通信大小(MPI通信世界和NPE);
MPI_Comm_rank(MPI_Comm_WORLD和myrank);
MPI_屏障(MPI_通信世界);
t=-MPI_Wtime();
如果(myrank==0){
国际标准化组织,标准化组织;
printf(“读取”\n);
//a=读取矩阵(argv[1],&sizeA);
sizeA=16;
a=malloc(sizeA*sizeA*sizeof(双));
//b=读取矩阵(argv[2],&sizeB);
sizeB=16;
b=malloc(sizeB*sizeB*sizeof(双));
printf(“读取”\n);
c=calloc(sizeA*sizeB,sizeof(双));
n=sizeA;
MPI_Bcast(&n,1,MPI_INT,0,MPI_CO)