C++ mpirun注意到节点X540UV上PID为0的进程秩1在信号11上退出(分段故障)
我是MPI编程的初学者。我想用MPI乘法二维向量。我写这个程序如下,当我定义二维矩阵使用mat_a[]],mat_b[]]时,我没有收到这个问题,但我定义使用二维向量收到这样的问题:C++ mpirun注意到节点X540UV上PID为0的进程秩1在信号11上退出(分段故障),c++,mpi,C++,Mpi,我是MPI编程的初学者。我想用MPI乘法二维向量。我写这个程序如下,当我定义二维矩阵使用mat_a[]],mat_b[]]时,我没有收到这个问题,但我定义使用二维向量收到这样的问题: #include<stdio.h> #include<mpi.h> #include <iostream> #include<curses.h> #include<vector> #define NUM_ROWS_A 12 //rows of input
#include<stdio.h>
#include<mpi.h>
#include <iostream>
#include<curses.h>
#include<vector>
#define NUM_ROWS_A 12 //rows of input [A]
#define NUM_COLUMNS_A 12 //columns of input [A]
#define NUM_ROWS_B 12 //rows of input [B]
#define NUM_COLUMNS_B 12 //columns of input [B]
#define MASTER_TO_SLAVE_TAG 1 //tag for messages sent from master to slaves
#define SLAVE_TO_MASTER_TAG 4 //tag for messages sent from slaves to master
void makeAB(); //makes the [A] and [B] matrixes
void printArray(); //print the content of output matrix [C];
int rank; //process rank
int size; //number of processes
int i, j, k; //helper variables
std::vector<std::vector<double> >mat_a(NUM_ROWS_A,std::vector<double>(NUM_COLUMNS_A)); //declare input [A]
std::vector<std::vector<double> >mat_b(NUM_ROWS_B, std::vector<double>(NUM_COLUMNS_B)); //declare input [B]
std::vector<std::vector<diuble> >mat_result(NUM_ROWS_A ,std::vector<double>(NUM_COLUMNS_B)); //declare output [C]
double start_time; //hold start time
double end_time; // hold end time
int low_bound; //low bound of the number of rows of [A] allocated to a slave
int upper_bound; //upper bound of the number of rows of [A] allocated to a slave
int portion; //portion of the number of rows of [A] allocated to a slave
MPI_Status status; // store status of a MPI_Recv
MPI_Request request; //capture request of a MPI_Isend
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv); //initialize MPI operations
MPI_Comm_rank(MPI_COMM_WORLD, &rank); //get the rank
MPI_Comm_size(MPI_COMM_WORLD, &size); //get number of processes
/* master initializes work*/
if (rank == 0) {
makeAB();
start_time = MPI_Wtime();
for (i = 1; i < size; i++) {//for each slave other than the master
portion = (NUM_ROWS_A / (size - 1)); // calculate portion without master
low_bound = (i - 1) * portion;
if (((i + 1) == size) && ((NUM_ROWS_A % (size - 1)) != 0)) {//if rows of [A] cannot be equally divided among slaves
upper_bound = NUM_ROWS_A; //last slave gets all the remaining rows
} else {
upper_bound = low_bound + portion; //rows of [A] are equally divisable among slaves
}
//send the low bound first without blocking, to the intended slave
MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);
//next send the upper bound without blocking, to the intended slave
MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);
//finally send the allocated row portion of [A] without blocking, to the intended slave
MPI_Isend(&mat_a[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_A, MPI_DOUBLE, i, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
}
}
//broadcast [B] to all the slaves
MPI_Bcast(&mat_b, NUM_ROWS_B*NUM_COLUMNS_B, MPI_DOUBLE, 0, MPI_COMM_WORLD);
/* work done by slaves*/
if (rank > 0) {
//receive low bound from the master
MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
//next receive upper bound from the master
MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
//finally receive row portion of [A] to be processed from the master
MPI_Recv(&mat_a[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_A, MPI_DOUBLE, 0, MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
for (i = low_bound; i < upper_bound; i++) {//iterate through a given set of rows of [A]
for (j = 0; j < NUM_COLUMNS_B; j++) {//iterate through columns of [B]
for (k = 0; k < NUM_ROWS_B; k++) {//iterate through rows of [B]
mat_result[i][j] += (mat_a[i][k] * mat_b[k][j]);
}
}
}
//send back the low bound first without blocking, to the master
MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
//send the upper bound next without blocking, to the master
MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
//finally send the processed portion of data without blocking, to the master
MPI_Isend(&mat_result[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_B, MPI_DOUBLE, 0, SLAVE_TO_MASTER_TAG + 2, MPI_COMM_WORLD, &request);
}
/* master gathers processed work*/
if (rank == 0) {
for (i = 1; i < size; i++) {// untill all slaves have handed back the processed data
//receive low bound from a slave
MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);
//receive upper bound from a slave
MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);
//receive processed data from a slave
MPI_Recv(&mat_result[low_bound][0], (upper_bound - low_bound) * NUM_COLUMNS_B, MPI_DOUBLE, i, SLAVE_TO_MASTER_TAG + 2, MPI_COMM_WORLD, &status);
}
end_time = MPI_Wtime();
printf("\nRunning Time = %f\n\n", end_time - start_time);
printArray();
}
MPI_Finalize(); //finalize MPI operations
return 0;
}
void makeAB()
{
for (i = 0; i < NUM_ROWS_A; i++) {
for (j = 0; j < NUM_COLUMNS_A; j++) {
mat_a[i][j] = i + j;
}
}
for (i = 0; i < NUM_ROWS_B; i++) {
for (j = 0; j < NUM_COLUMNS_B; j++) {
mat_b[i][j] = i*j;
}
}
}
void printArray()
{
for (i = 0; i < NUM_ROWS_A; i++) {
printf("\n");
for (j = 0; j < NUM_COLUMNS_A; j++)
printf("%8.2f ", mat_a[i][j]);
}
printf("\n\n\n");
for (i = 0; i < NUM_ROWS_B; i++) {
printf("\n");
for (j = 0; j < NUM_COLUMNS_B; j++)
printf("%8.2f ", mat_b[i][j]);
}
printf("\n\n\n");
for (i = 0; i < NUM_ROWS_A; i++) {
printf("\n");
for (j = 0; j < NUM_COLUMNS_B; j++)
printf("%8.2f ", mat_result[i][j]);
}
printf("\n\n");
}
MPI\u Bcast(&mat\u b,…
。这是完全错误的。mat\u b
是std::vector@DanielLangr,为什么不能用MPI\u b Act vectormat\u b发送矩阵元素?1d向量中没有什么问题,这段代码使用mat\u b[]工作正常,但我想使用vectormat__b,因为在主程序中,mat_b的大小在重复中发生变化,因为MPI_Bcast
请求将发送的数据存储在一个连续的内存位置。尝试找出向量的元素是如何存储的。顺便说一句,mat_b
返回指向(外部)的指针向量对象本身,而不是它的元素。了解有关向量的一些基本知识。可以广播向量向量,但不能在一个步骤中广播。相反,您需要单独广播每个内部向量的元素。例如(const auto&v:mat_b)MPI_Bcast(v.data(),v.size(),MPI_DOUBLE,…)如果你想做有效的MPI通信,你应该使用2D数组(例如,代码> double A[10 ] [10 ] <代码>代替锯齿数组(例如,代码>双** < /COD>或向量的C++向量)。另一个C++的ISH选项是使用一个更高级的库,如具有多维数组<代码> MPIYBCAST的BooSt.MPi。(&mat_b,…
。这是完全错误的。mat_b
是std::vector@DanielLangr,为什么不能用MPI\u b Act vectormat\u b发送矩阵元素?1d向量中没有什么问题,这段代码使用mat\u b[]工作正常,但我想使用vectormat__b,因为在主程序中,mat_b的大小在重复中发生变化,因为MPI_Bcast
请求将发送的数据存储在一个连续的内存位置。尝试找出向量的元素是如何存储的。顺便说一句,mat_b
返回指向(外部)的指针向量对象本身,而不是它的元素。了解有关向量的一些基本知识。可以广播向量向量,但不能在一个步骤中广播。相反,您需要单独广播每个内部向量的元素。例如(const auto&v:mat_b)MPI_Bcast(v.data(),v.size(),MPI_DOUBLE,…)如果你想做有效的MPI通信,你应该使用2D数组(例如,代码> double A[10 ] [10 ] < /C>)代替锯齿数组(例如<代码>双** < /COD>或向量的C++向量)。另一个C++的ISH选项是使用一个更高级的库,例如具有多维数组的BooSt.MPi。
[X540UV:08026] *** Process received signal ***
[X540UV:08028] *** Process received signal ***
[X540UV:08028] Signal: Segmentation fault (11)
[X540UV:08028] Signal code: Address not mapped (1)
[X540UV:08028] Failing at address: 0xf6e680
[X540UV:08027] *** Process received signal ***
[X540UV:08027] Signal: Segmentation fault (11)
[X540UV:08027] Signal code: Address not mapped (1)
[X540UV:08027] Failing at address: 0xf6e680
[X540UV:08027] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x354b0)[0x7f1a9cae74b0]
[X540UV:08027] [ 1] multi3[0x401cfa]
[X540UV:08027] [ 2] multi3[0x4012d5]
[X540UV:08027] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f1a9cad2830]
[X540UV:08027] [ 4] multi3[0x400e79]
[X540UV:08027] *** End of error message ***
[X540UV:08028] [ 0] [X540UV:08026] Signal: Segmentation fault (11)
[X540UV:08026] Signal code: Address not mapped (1)
[X540UV:08026] Failing at address: 0xf6e680
/lib/x86_64-linux-gnu/libc.so.6(+0x354b0)[0x7fd43be274b0]
[X540UV:08026] [ 0] [X540UV:08028] /lib/x86_64-linux-gnu/libc.so.6(+0x354b0)[0x7fde584744b0]
[X540UV:08026] [ 1] multi3[0x401cfa]
[X540UV:08026] [ 2] multi3[0x4012d5]
[ 1] [X540UV:08026] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fde5845f830]
[X540UV:08026] [ 4] multi3[0x400e79]
[X540UV:08026] *** End of error message ***
multi3[0x401cfa]
[X540UV:08028] [ 2] multi3[0x4012d5]
[X540UV:08028] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fd43be12830]
[X540UV:08028] [ 4] multi3[0x400e79]
[X540UV:08028] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 0 on node X540UV exited on signal 11 (Segmentation fault).