Cuda 顺序程序中分离并行MPI部件的最佳方法_Cuda_Mpi

Cuda 顺序程序中分离并行MPI部件的最佳方法

cuda mpi

Cuda 顺序程序中分离并行MPI部件的最佳方法,cuda,mpi,Cuda,Mpi,我有一个庞大的sequntial程序，其中我想将一些算法与MPI和CUDA并行。如何正确地将顺序部分与并行部分分开问题在于并行算法的嵌套，以及slurm或loadLeveler的使用（例如，在我的MPI集群上，我无法编写类似以下内容：mpirun-n1 a.out:-n2 b.out）例如：对于这个问题，我找到了一个很好的解决办法。这是示例代码： #include <iostream> #include <mpi.h> #include <unistd.h

我有一个庞大的sequntial程序，其中我想将一些算法与MPI和CUDA并行。如何正确地将顺序部分与并行部分分开问题在于并行算法的嵌套，以及slurm或loadLeveler的使用（例如，在我的MPI集群上，我无法编写类似以下内容：

mpirun-n1 a.out:-n2 b.out

）

例如：

对于这个问题，我找到了一个很好的解决办法。这是示例代码：

 #include <iostream>
 #include <mpi.h>
 #include <unistd.h>

 using namespace std;

 int main(int argc, char** argv) {

MPI_Init(&argc, &argv);
int r;
MPI_Comm_rank(MPI_COMM_WORLD, &r);

if (r == 0) {
    cout << "[GUI]Start perfoming initialization...." << endl;
    sleep(2);
    cout << "[GUI]Send command to start execution...." << endl;
    int command = 1;
    //TODO: now it's hardcoded to send data to 1 proc
    MPI_Send(&command, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
    cout << "[GUI]Waiting for execution results..." << endl;
    int buf[5];
    MPI_Recv(&buf, 5, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    for (int i=0; i<5; i++)
    {
        cout << "buf["<< i << "] = " << buf[i] << endl;
    }
} else {
    int command;
    MPI_Recv(&command, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    cout << "Received command: " << command << endl;
    if (command == 1) {
        cout << "[ALGO]Receive command to start execution" << endl;
        sleep(2);
        cout << "[ALGO]Send computed data..." << endl;
        int buf[5] = {5,4,3,2,1};
        MPI_Send(&buf, 5, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
}


MPI_Finalize();
return 0;
  }

#包括
#包括
#包括
使用名称空间std；
int main（int argc，字符**argv）{
MPI_Init（&argc，&argv）；
INTR；
MPI通信等级（MPI通信世界和r）；
如果（r==0）{
库特
 #include <iostream>
 #include <mpi.h>
 #include <unistd.h>

 using namespace std;

 int main(int argc, char** argv) {

MPI_Init(&argc, &argv);
int r;
MPI_Comm_rank(MPI_COMM_WORLD, &r);

if (r == 0) {
    cout << "[GUI]Start perfoming initialization...." << endl;
    sleep(2);
    cout << "[GUI]Send command to start execution...." << endl;
    int command = 1;
    //TODO: now it's hardcoded to send data to 1 proc
    MPI_Send(&command, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
    cout << "[GUI]Waiting for execution results..." << endl;
    int buf[5];
    MPI_Recv(&buf, 5, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    for (int i=0; i<5; i++)
    {
        cout << "buf["<< i << "] = " << buf[i] << endl;
    }
} else {
    int command;
    MPI_Recv(&command, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    cout << "Received command: " << command << endl;
    if (command == 1) {
        cout << "[ALGO]Receive command to start execution" << endl;
        sleep(2);
        cout << "[ALGO]Send computed data..." << endl;
        int buf[5] = {5,4,3,2,1};
        MPI_Send(&buf, 5, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
}


MPI_Finalize();
return 0;
  }