Cuda 顺序程序中分离并行MPI部件的最佳方法

Cuda 顺序程序中分离并行MPI部件的最佳方法,cuda,mpi,Cuda,Mpi,我有一个庞大的sequntial程序,其中我想将一些算法与MPI和CUDA并行。如何正确地将顺序部分与并行部分分开问题在于并行算法的嵌套,以及slurm或loadLeveler的使用(例如,在我的MPI集群上,我无法编写类似以下内容:mpirun-n1 a.out:-n2 b.out) 例如: 对于这个问题,我找到了一个很好的解决办法。这是示例代码: #include <iostream> #include <mpi.h> #include <unistd.h

我有一个庞大的sequntial程序,其中我想将一些算法与MPI和CUDA并行。如何正确地将顺序部分与并行部分分开问题在于并行算法的嵌套,以及slurm或loadLeveler的使用(例如,在我的MPI集群上,我无法编写类似以下内容:
mpirun-n1 a.out:-n2 b.out

例如:


对于这个问题,我找到了一个很好的解决办法。这是示例代码:

 #include <iostream>
 #include <mpi.h>
 #include <unistd.h>

 using namespace std;

 int main(int argc, char** argv) {

MPI_Init(&argc, &argv);
int r;
MPI_Comm_rank(MPI_COMM_WORLD, &r);

if (r == 0) {
    cout << "[GUI]Start perfoming initialization...." << endl;
    sleep(2);
    cout << "[GUI]Send command to start execution...." << endl;
    int command = 1;
    //TODO: now it's hardcoded to send data to 1 proc
    MPI_Send(&command, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
    cout << "[GUI]Waiting for execution results..." << endl;
    int buf[5];
    MPI_Recv(&buf, 5, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    for (int i=0; i<5; i++)
    {
        cout << "buf["<< i << "] = " << buf[i] << endl;
    }
} else {
    int command;
    MPI_Recv(&command, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    cout << "Received command: " << command << endl;
    if (command == 1) {
        cout << "[ALGO]Receive command to start execution" << endl;
        sleep(2);
        cout << "[ALGO]Send computed data..." << endl;
        int buf[5] = {5,4,3,2,1};
        MPI_Send(&buf, 5, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
}


MPI_Finalize();
return 0;
  }
#包括
#包括
#包括
使用名称空间std;
int main(int argc,字符**argv){
MPI_Init(&argc,&argv);
INTR;
MPI通信等级(MPI通信世界和r);
如果(r==0){
库特
 #include <iostream>
 #include <mpi.h>
 #include <unistd.h>

 using namespace std;

 int main(int argc, char** argv) {

MPI_Init(&argc, &argv);
int r;
MPI_Comm_rank(MPI_COMM_WORLD, &r);

if (r == 0) {
    cout << "[GUI]Start perfoming initialization...." << endl;
    sleep(2);
    cout << "[GUI]Send command to start execution...." << endl;
    int command = 1;
    //TODO: now it's hardcoded to send data to 1 proc
    MPI_Send(&command, 1, MPI_INT, 1, 0, MPI_COMM_WORLD);
    cout << "[GUI]Waiting for execution results..." << endl;
    int buf[5];
    MPI_Recv(&buf, 5, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    for (int i=0; i<5; i++)
    {
        cout << "buf["<< i << "] = " << buf[i] << endl;
    }
} else {
    int command;
    MPI_Recv(&command, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    cout << "Received command: " << command << endl;
    if (command == 1) {
        cout << "[ALGO]Receive command to start execution" << endl;
        sleep(2);
        cout << "[ALGO]Send computed data..." << endl;
        int buf[5] = {5,4,3,2,1};
        MPI_Send(&buf, 5, MPI_INT, 0, 0, MPI_COMM_WORLD);
    }
}


MPI_Finalize();
return 0;
  }