MPI C中的矩阵和向量乘法_C_Matrix_Vector_Mpi

MPI C中的矩阵和向量乘法

c matrix vector mpi

MPI C中的矩阵和向量乘法,c,matrix,vector,mpi,C,Matrix,Vector,Mpi,我有一个任务，我目前的MPI类，我必须乘以2x3矩阵 1 2 3 4 5 6 通过3x1向量 7 8 9 我被告知假设我们只有两个处理器我有以下建议，但我将陷入僵局，我不知道为什么 #include <stdio.h> #include <string.h> #include <stdlib.h> #include <mpi.h> int main(void) { int comm_sz; /* number of processes

我有一个任务，我目前的MPI类，我必须乘以2x3矩阵

1 2 3

4 5 6

通过3x1向量 7 8 9

我被告知假设我们只有两个处理器

我有以下建议，但我将陷入僵局，我不知道为什么


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <mpi.h>

int main(void)
{
    int comm_sz; /* number of processes*/
    int my_rank; /* my process rank */
    int m, n;
    double* Matrix;
    double* Vector;
    double* Result;
    double* localMatrix;

    MPI_Comm comm;


    FILE *fptr = fopen("mv-data.txt", "r");
    MPI_Init(NULL, NULL);
    comm = MPI_COMM_WORLD;
    MPI_Comm_size(comm, &comm_sz);
    MPI_Comm_rank(comm, &my_rank);

    if (my_rank == 0)
    {
        fscanf(fptr,"%d", m);
        fscanf(fptr, "%d", n);
    }
    MPI_Bcast(m, 1, MPI_INT, 0, comm);
    MPI_Bcast(n,1,MPI_INT, 0, comm);


    if (my_rank==0)
    {
    Matrix = malloc(m*n * sizeof(double));

    for(int i = 0; i<m; i++)
        for(int j=0; j< n; j++)
            fscanf(fptr, "%lf", &Matrix[i*n+j]);

    Vector = malloc(n*sizeof(double));

    for (int i = 0; i < n; i++)
        fscanf(fptr,"%lf", &Vector[i]);


    Result = malloc(m * sizeof(double));



    for (int row = 0; row < m; row++)
    {
        localMatrix = malloc(m*sizeof(double));
        for(int column = 0; column < n; column++)
            localMatrix[column] = Matrix[row*n + column];

        MPI_Send(localMatrix, n, MPI_DOUBLE, row % comm_sz, 0, comm);
    }

    }
    MPI_Bcast(Vector, n, MPI_DOUBLE, 0,comm);
    MPI_Recv(localMatrix, n, MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
    Result[my_rank] = 0;
    for(int i = 0; i < n; i++)
    {
        Result[my_rank] += localMatrix[i] * Vector[i];
    }

    if (my_rank = 0)
    {
        for (int i = 0; i < m; i++)
            printf("%d", Result[i]);
    }

    return 0;

}


#包括
#包括
#包括
#包括
内部主（空）
{
int comm_sz；/*进程数*/
int my_秩；/*我的进程秩*/
int m，n；
双*矩阵；
双*向量；
双*结果；
双*局部矩阵；
MPI_通信；
文件*fptr=fopen（“mv data.txt”，“r”）；
MPI_Init（NULL，NULL）；
通信=MPI_通信世界；
MPI_Comm_尺寸（Comm和Comm_sz）；
MPI通信等级（通信和我的通信等级）；
如果（我的排名==0）
{
fscanf（fptr，“%d”，m）；
fscanf（fptr，“%d”，n）；
}
MPI_Bcast（m，1，MPI_INT，0，comm）；
MPI_Bcast（n，1，MPI_INT，0，comm）；
如果（我的排名==0）
{
矩阵=malloc（m*n*sizeof（double））；
对于（int i=0；i在rank 0
中，您使用的是阻塞MPI\u Send
。其他进程随后调用集合MPI\u Bcast
，而不是匹配的MPI\u Recv
。这可能会导致死锁（缓冲可能发生在MPI\u Send
内部，也可能避免死锁）.见下文：
if (my_rank==0)
{
   for (int row = 0; row < m; row++)
   {
      MPI_Send(localMatrix, n, MPI_DOUBLE, row % comm_sz, 0, comm);
   }
}
MPI_Bcast(Vector, n, MPI_DOUBLE, 0,comm); // COLLECTIVE - PROCESS MAY GET STUCK HERE
MPI_Recv(localMatrix, n, MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);

将导致分段错误，因为未分配localMatrix
的内存。访问Result
以及Vector
也会导致分段错误，因为您未在其他进程中为其分配内存（例如：排名1到N-1）除了秩0之外
使用集体操作：分散矩阵，广播向量并收集结果。如果（坦克==0）将只保留在代码的最开始和最末尾。
if (my_rank==0)
{
   for (int row = 0; row < m; row++)
   {
      MPI_Send(localMatrix, n, MPI_DOUBLE, row % comm_sz, 0, comm);
   }
} else
{
      MPI_Recv(localMatrix, n, MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
}
MPI_Bcast(Vector, n, MPI_DOUBLE, 0,comm);

MPI_Recv(localMatrix, n, MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);