使用MPI创建二维拓扑的矩阵乘法_Mpi_Matrix Multiplication

使用MPI创建二维拓扑的矩阵乘法

mpi

使用MPI创建二维拓扑的矩阵乘法,mpi,matrix-multiplication,Mpi,Matrix Multiplication,我在MPI中通过创建2d拓扑将2个矩阵相乘我发现了错误 An error occurred in MPI_Type_create_struct on communicator MPI_COMM_WORLD MPI_ERR_TYPE: invalid datatype *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort) ------------------------------------------------------------

我在MPI中通过创建2d拓扑将2个矩阵相乘我发现了错误

An error occurred in MPI_Type_create_struct
on communicator MPI_COMM_WORLD
MPI_ERR_TYPE: invalid datatype
*** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)
--------------------------------------------------------------------------
mpiexec has exited due to process rank 0 with PID 21294 on
node hpc-nist.nist.local exiting without calling "finalize". This may
have caused other processes in the application to be
terminated by signals sent by mpiexec (as reported here)

我知道它发生在第74行，但不知道为什么一切似乎都很好

#include<mpi.h>
#include<stdio.h>
#include<stdlib.h>
#define NUM_ROW_A 225
#define NUM_COL_A 300
#define NUM_ROW_B 300
#define NUM_COL_B 150
int main()
{
    double a[NUM_ROW_A][NUM_COL_A],b[NUM_ROW_B][NUM_COL_B],c[NUM_ROW_A][NUM_COL_B];
    int n[3]={NUM_ROW_A,NUM_COL_A,NUM_COL_B};
    int p[2]={3,2};
    MPI_Comm comm =MPI_COMM_WORLD;
    MPI_Comm comm_2d,comm_1d[2],pcomm;
    int nn[2];
    double aa[3][NUM_COL_A],bb[NUM_COL_A][2],cc[3][2];
    int coords[2];
    int rank;
    int *dispc,*countc;
    int i,j,k;//ierr;
    int periods[2]={0,0};
    int remains[2];
    //int sizeofdouble=sizeof(double);
    MPI_Aint sizeofreal;
    double s_time,f_time;
    MPI_Datatype typea,typec,types[2];
    int blen[2];
    MPI_Aint disp[2];
    MPI_Init(NULL,NULL);
    s_time=MPI_Wtime();
    MPI_Comm_dup(comm,&pcomm);
    MPI_Bcast(n,3,MPI_INT,0,pcomm);
    MPI_Bcast(p,2,MPI_INT,0,pcomm);
    //periods={0,0};
    MPI_Cart_create(pcomm,2,p,periods,0,&comm_2d);
    MPI_Comm_rank(comm_2d,&rank);
    MPI_Cart_coords(comm_2d,rank,2,coords);
    for(i=0;i<2;i++)
    {
        for(j=0;j<2;j++)
            remains[j]=(i==j);
        MPI_Cart_sub(comm_2d,remains,&comm_1d[i]);
    }
    nn[0]=n[0]/p[0];
    nn[1]=n[2]/p[1];
    if(rank==0)
    {
        for(i=0; i<n[0]; i++)
        {
            for(j=0; j<n[1]; j++)
            {
                double randNr =  (rand()/9.9);
                a[i][j] = randNr;
            }
        }
        for(i=0; i<n[1]; i++)
        {
            for(j=0; j<n[2]; j++)
            {
                double randNr =  (rand()/9.9);
                b[i][j] = randNr;
            }
        }
        MPI_Type_vector(n[1],nn[0],n[0],MPI_DOUBLE,&types[0]);
        MPI_Type_extent(MPI_DOUBLE,&sizeofreal);
        disp[0]=0;
        disp[1]=sizeofreal*nn[0];
        blen[0]=1;
        blen[1]=1;
        types[2]=MPI_UB;
        printf("hi%ld\n",disp[1]);
        MPI_Type_struct(2,blen,disp,types,&typea);
        printf("hi\n");
        MPI_Type_commit(&typea);
        MPI_Type_vector(nn[1],nn[0],n[0],MPI_DOUBLE,&types[1]);
        MPI_Type_struct(2,blen,disp,types,&typec);
        MPI_Type_commit(&typec);
        dispc=(int *)malloc(p[0]*p[1]*sizeof(int));
        countc=(int *)malloc(p[0]*p[1]*sizeof(int));
        for(i=0;i<p[0];i++)
        {
            for(j=0;j<p[1];j++)
            {
                dispc[(i-1)*p[1]+j]=((j-1)*p[0]+(i-1)*nn[1]);
                countc[(i-1)*p[1]+j]=1;
            }
        }
        printf("hi\n");
    }
    if(coords[1]==0)
        MPI_Scatter(a,1,typea,aa,nn[0]*n[1],MPI_DOUBLE,0,comm_1d[0]);
    if(coords[0]==0)
        MPI_Scatter(b,n[1]*nn[1],MPI_DOUBLE,bb,n[1]*nn[1],MPI_DOUBLE,0,comm_1d[1]);
    MPI_Bcast(aa,nn[0]*n[1],MPI_DOUBLE,0,comm_1d[1]);
    MPI_Bcast(bb,n[1]*nn[1],MPI_DOUBLE,0,comm_1d[0]);
    for(i=0;i<nn[0];i++)
    {
        for(j=0;j<nn[1];j++)
        {
            cc[i][j]=0.0;
            for(k=0;k<n[1];k++)
                cc[i][j]+=a[i][k]*b[k][j];
        }
    }
    MPI_Gatherv(cc,nn[0]*nn[1],MPI_DOUBLE,c,countc,dispc,typec,0,comm_2d);
    f_time=MPI_Wtime();
    if(rank==0)
    {
        printf("matrix a:\n");
        for(i=0;i<n[0];i++)
        {
            for(j=0;j<n[1];j++)
                printf("%lf\t",a[i][j]);
            printf("\n");
        }
        printf("matrix b:\n");
        for(i=0;i<n[1];i++)
        {
            for(j=0;j<n[2];j++)
                printf("%lf\t",b[i][j]);
            printf("\n");
        }
        printf("matrix c:\n");
        for(i=0;i<n[0];i++)
        {
            for(j=0;j<n[2];j++)
                printf("%lf\t",c[i][j]);
            printf("\n");
        }
        printf("time take = %1.2lf\n",f_time-s_time);
    }
    MPI_Finalize();
    return 0;
}

#包括
#包括
#包括
#定义行数225
#定义NUM_COL_A 300
#定义行数300
#定义NUM_COL_B 150
int main（）
{
双a[NUM_ROW_a][NUM colu a]，b[NUM_ROW_b][NUM colu b]，c[NUM_ROW_a][NUM colu b]；
int n[3]={NUM_ROW_A，NUM_COL_A，NUM_COL_B}；
int p[2]={3,2}；
MPI_通信=MPI_通信世界；
MPI_Comm_2d，Comm_1d[2]，pcomm；
int-nn[2]；
双aa[3][NUM_colu_A]、bb[NUM_colu A][2]、cc[3][2]；
国际协调[2]；
整数秩；
int*dispc，*countc；
int i，j，k；//ierr；
整数周期[2]={0,0}；
int仍然是[2]；
//int-sizeofdouble=sizeof（双精度）；
MPI_Aint sizeofreal；
双s_时间，f_时间；
MPI_数据类型typea，typec，类型[2]；
int-blen[2]；
MPI_Aint disp[2]；
MPI_Init（NULL，NULL）；
s_time=MPI_Wtime（）；
MPI_Comm_dup（Comm和pcomm）；
MPI_Bcast（n，3，MPI_INT，0，pcomm）；
MPI_Bcast（p，2，MPI_INT，0，pcomm）；
//句点={0,0}；
MPI购物车创建（pcomm、2、p、句点、0和comm\U 2d）；
MPI_通信等级（通信2d和等级）；
MPI车协调（通信2d，排名2，协调）；
对于（i=0；i您的错误不是在线74
，而是在线上，我太懒了，无法计数
：
types[2] = MPI_UB;
      ^

这很可能会改为types[1]=MPI\u UB；
。
您的错误不是在线74
，而是在线我太懒了，无法计数
：
types[2] = MPI_UB;
      ^

这最有可能读取类型[1]=MPI_UB；
相反。
说真的，伙计，如果你想得到任何帮助，你必须让它变得更容易。你的代码缩进太多，需要l/r滚动；拨回。你声明错误发生在第74行；如果你标记了那一行，我看不到它，我不会开始数行。最后，你似乎有post编辑了你的全部代码，并邀请SO来整理。有证据表明你认真地解决了自己的问题吗？证据比如一个最小的、可编译的程序，它显示了你报告的错误。说真的，伙计，如果你想得到任何帮助，你必须让它变得更容易。你的代码有太多的缩进，这需要l/r滚动；拨回。您声明错误发生在第74行；如果您标记了该行，我看不到它，也不打算开始计算行数。最后，您似乎已经发布了您的全部代码，并邀请SO对其进行排序。哪里有证据表明您已经认真地解决了自己的问题？这些证据包括作为一个最小的、可编译的程序，它显示了你报告的错误。谢谢你，伙计，我也这么认为，但后来又出现了另一个问题，现在它发出了信号：总线错误（7）地址为零失败我是MPI新手，不知道如何解决这个错误，关于MPI_错误的书也帮不了什么忙，你能帮我吗？如果（coords[0]==0）MPI_散布（b，n[1]*nn[1]，MPI_DOUBLE，bb，n[1]*nn[1]，MPI_DOUBLE，0，comm_1d[1]），我找到了错误所在的位置，我已经检查了coords（2个进程，其中6个进程的坐标[0]=0），comm_1d，很好，它仍然不能分散，有什么帮助吗？？？我也反驳说，bb内存分配不是它应该的，所以我动态分配所有aa bb和cc作为双指针。但错误仍然是一样的。谢谢老兄，我也这么想，但后来又出现了另一个问题，现在它给出了信号：总线错误（7）地址为零失败我是MPI新手，不知道如何解决这个错误，关于MPI_错误的书也帮不了什么忙，你能帮我吗？如果（coords[0]==0）MPI_散布（b，n[1]*nn[1]，MPI_DOUBLE，bb，n[1]*nn[1]，MPI_DOUBLE，0，comm_1d[1]），我找到了错误所在的位置，我已经检查了coords（2个进程6的坐标[0]=0），comm_1d，很好，它仍然不能分散，有什么帮助吗？？？我也反驳说，bb内存分配不是它应该的，所以我动态分配所有aa bb和cc作为双指针。但错误仍然是一样的。