C++ 如何在MPI-2+中复制MPI#u累积的功能;
我正在学习MPI-2/MPI-3中介绍的MPI单边通信,并且遇到了关于C++ 如何在MPI-2+中复制MPI#u累积的功能;,c++,mpi,communication,mpi-rma,C++,Mpi,Communication,Mpi Rma,我正在学习MPI-2/MPI-3中介绍的MPI单边通信,并且遇到了关于MPI\u累积的以下内容: MPI_累计允许调用者合并移动到 已存在数据的目标进程,例如 在目标进程上求和。同样的功能也可以通过以下方式实现: 使用MPI_Get检索数据(然后同步); 在调用者处执行求和操作;然后使用MPI_Put发送 将更新的数据返回到目标进程。累积简化 这种混乱 但是,只有有限数量的操作允许与MPI_累计(最大、最小、总和、乘积等)一起使用,并且不允许使用用户定义的操作。我想知道如何使用MPI\u Get
MPI\u累积的以下内容:
MPI_累计允许调用者合并移动到
已存在数据的目标进程,例如
在目标进程上求和。同样的功能也可以通过以下方式实现:
使用MPI_Get检索数据(然后同步);
在调用者处执行求和操作;然后使用MPI_Put发送
将更新的数据返回到目标进程。累积简化
这种混乱
但是,只有有限数量的操作允许与MPI_累计
(最大、最小、总和、乘积等)一起使用,并且不允许使用用户定义的操作。我想知道如何使用MPI\u Get
、sync、op和MPI\u Put
实现上述messiness。是否有C/C++中的教程或工作代码示例
谢谢
为了进行测试,我修改了其中的一段代码,其中使用单边通信来创建一个整数计数器,该计数器在MPI进程之间保持同步。使用MPI_累计的目标问题行已标记
代码按原样编译,大约15秒后返回。但是,当我试图用问题行后面的注释块中显示的等效基本操作序列替换MPI\u acculate
时,编译的程序无限期挂起
谁能帮我解释一下出了什么问题
在这种情况下,替换MPI_累计的正确方法是什么
顺便说一句,我用
g++ -std=c++11 -I.. mpistest.cpp -lmpi
并用
mpiexec -n 4 a.exe
代码:
使用get和put实现累加确实会非常麻烦,尤其是当您必须处理派生数据类型等时。但是,假设您正在对单个整数进行累加,并且只想将本地值求和到远程缓冲区中,则可以执行以下操作(仅限伪代码):
您的代码不正确,因为您在GET之后放弃了独占锁,这会在两个进程同时尝试求和数据时导致原子性问题。锁的粒度比fences(集体)更细,因此最好在可以的地方使用它们。但是要小心嵌套同步!你在一个已经被锁定的区域内设置了围栏,所以你被吊死了。还要注意的是,在那个问题/答案之后出现的MPI-3大大改进了片面的例程和语义。@JonathanDursi谢谢。我试过上锁/开锁。它仍然没有给出正确的结果。请参阅编辑。MPI-3.0标准改进了例程和语义。现有的实现没有:)如果这种情况太复杂而无法解释,我会问一个更简单的问题,在这种情况下,只有一个“全局”值用于记录进程中的“最小”值。即使在更简单的情况下,我也有一些同步问题。
//adpated from https://stackoverflow.com/questions/4948788/
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <thread>
#include <chrono>
struct mpi_counter_t {
MPI_Win win;
int hostrank; //id of the process that host values to be exposed to all processes
int rank; //process id
int size; //number of processes
int val;
int *hostvals;
};
struct mpi_counter_t *create_counter(int hostrank) {
struct mpi_counter_t *count;
count = (struct mpi_counter_t *)malloc(sizeof(struct mpi_counter_t));
count->hostrank = hostrank;
MPI_Comm_rank(MPI_COMM_WORLD, &(count->rank));
MPI_Comm_size(MPI_COMM_WORLD, &(count->size));
if (count->rank == hostrank) {
MPI_Alloc_mem(count->size * sizeof(int), MPI_INFO_NULL, &(count->hostvals));
for (int i=0; i<count->size; i++) count->hostvals[i] = 0;
MPI_Win_create(count->hostvals, count->size * sizeof(int), sizeof(int),
MPI_INFO_NULL, MPI_COMM_WORLD, &(count->win));
}
else {
count->hostvals = NULL;
MPI_Win_create(count->hostvals, 0, 1,
MPI_INFO_NULL, MPI_COMM_WORLD, &(count->win));
}
count -> val = 0;
return count;
}
int increment_counter(struct mpi_counter_t *count, int increment) {
int *vals = (int *)malloc( count->size * sizeof(int) );
int val;
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, count->hostrank, 0, count->win);
for (int i=0; i<count->size; i++) {
if (i == count->rank) {
MPI_Accumulate(&increment, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM,count->win); //Problem line: increment hostvals[i] on host
/* //Question: How to correctly replace the above MPI_Accumulate call with the following sequence? Currently, the following causes the program to hang.
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_fence(0,count->win);
vals[i] += increment;
MPI_Put(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_fence(0,count->win);
//*/
} else {
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
}
}
MPI_Win_unlock(0, count->win);
//do op part of MPI_Accumulate's work on count->rank
count->val += increment;
vals[count->rank] = count->val;
//return the sum of vals
val = 0;
for (int i=0; i<count->size; i++)
val += vals[i];
free(vals);
return val;
}
void delete_counter(struct mpi_counter_t **count) {
if ((*count)->rank == (*count)->hostrank) {
MPI_Free_mem((*count)->hostvals);
}
MPI_Win_free(&((*count)->win));
free((*count));
*count = NULL;
return;
}
void print_counter(struct mpi_counter_t *count) {
if (count->rank == count->hostrank) {
for (int i=0; i<count->size; i++) {
printf("%2d ", count->hostvals[i]);
}
puts("");
}
}
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
const int WORKITEMS=50;
struct mpi_counter_t *c;
int rank;
int result = 0;
c = create_counter(0);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
srand(rank);
while (result < WORKITEMS) {
result = increment_counter(c, 1);
if (result <= WORKITEMS) {
printf("%d working on item %d...\n", rank, result);
std::this_thread::sleep_for (std::chrono::seconds(rand()%2));
} else {
printf("%d done\n", rank);
}
}
MPI_Barrier(MPI_COMM_WORLD);
print_counter(c);
delete_counter(&c);
MPI_Finalize();
return 0;
}
int increment_counter(struct mpi_counter_t *count, int increment) {
int *vals = (int *)malloc( count->size * sizeof(int) );
int val;
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, count->hostrank, 0, count->win);
for (int i=0; i<count->size; i++) {
if (i == count->rank) {
//MPI_Accumulate(&increment, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM,count->win); //Problem line: increment hostvals[i] on host
///* //Question: How to correctly replace the above MPI_Accumulate call with the following sequence? reports that 0 does all the work
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_unlock(0, count->win);
vals[i] += increment;
MPI_Put(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, count->hostrank, 0, count->win);
//*/
} else {
MPI_Get(&vals[i], 1, MPI_INT, 0, i, 1, MPI_INT, count->win);
}
}
MPI_Win_unlock(0, count->win);
//do op part of MPI_Accumulate's work on count->rank
count->val += increment;
vals[count->rank] = count->val;
//return the sum of vals
val = 0;
for (int i=0; i<count->size; i++)
val += vals[i];
free(vals);
return val;
}
MPI_Win_lock(EXCLUSIVE); /* exclusive needed for accumulate atomicity constraints */
MPI_Get(&remote_data);
MPI_Win_flush(win); /* make sure GET has completed */
new = local_data + remote_data;
MPI_Put(&new);
MPI_Win_unlock();