C++ 需要建议以避免更高迭代步骤的死锁吗
我的MPI程序由许多进程组成,这些进程从其他进程发送/接收零条或多条消息。进程会定期检查消息是否可供处理。代码最多可以运行3000个迭代步骤。之后会出现死锁和程序冻结。请随意提出任何建议。下面是我的伪代码。如果有任何问题,请告诉我 N是处理节点数:C++ 需要建议以避免更高迭代步骤的死锁吗,c++,optimization,mpi,parallel-processing,C++,Optimization,Mpi,Parallel Processing,我的MPI程序由许多进程组成,这些进程从其他进程发送/接收零条或多条消息。进程会定期检查消息是否可供处理。代码最多可以运行3000个迭代步骤。之后会出现死锁和程序冻结。请随意提出任何建议。下面是我的伪代码。如果有任何问题,请告诉我 N是处理节点数: do{ if(numberIterations>1) -- Receive Data { getdata: MPI_Iprobe() while(flagprobe !=0)
do{
if(numberIterations>1) -- Receive Data
{
getdata:
MPI_Iprobe()
while(flagprobe !=0)
{
If(TAG=StausUpdate)
Update status of processor;
If(TAG=Data)
Process Data;
MPI_Iprobe()
}
}
if( numberIterations< MaxIterations ) -- Send Data
{
for(i=0;i<N;i++)
MPI_Bsend_init(request[i])
for(i=0;i<N;i++)
MPI_Start(request[i])
numberIterations++;
}
if(numberIterations == MaxIterations) -- Update Processor Status
{
for(i=0;i<N;i++)
MPI_Isend(request1[i]) -- with TAG = StatusUpdate
goto getdata;
set endloopflag = 1
}
if(numberIterations == MaxIterations && endloopflag ==1) --Final Check
{
for(i=0;i<N;i++)
MPI_Test(request1[i],flagtest);
if(!flagtest)
goto getdata;
}
} while(numberIterations < MaxIterations);
for(i=0;i<N;i++) --Free request
{
MPI_Request_free(&request[i]);
}
do{
如果(numberIterations>1)--接收数据
{
获取数据:
MPI_Iprobe()
while(flagprobe!=0)
{
如果(标记=状态更新)
更新处理器状态;
如果(标记=数据)
过程数据;
MPI_Iprobe()
}
}
if(numberIterations 对于(i=0;i我对MPI\u Start
的了解有点生疏,但它不应该与MPI\u Wait
(或其变体)搭配使用吗?如果没有等待,我想知道您的缓冲区是否溢出,这是对您的程序在停止前运行一段时间的排序的一种解释。由于您没有显示任何类型的错误消息,我可以自由地解释您的语句:存在死锁和程序冻结,以涵盖程序因缓冲区空间而冻结的情况已耗尽。这到底是不是死锁还没有定论。我在末尾使用MPI_Request_free来释放处理器和缓冲区。我以前认为缓冲区会导致问题,但最后的错误消息看起来像死锁“mpirun注意到节点1082上PID为14656的进程列组1在信号9(已终止)时退出。”您的伪代码是伪正确的(可能),不确定我们是否可以在没有realocode站点的情况下提供更多帮助。我仍然有相同的问题。该程序在MPI_测试(&request[i]、&flagtest,&status);MPI_测试(&request1[i]、&flagtest1,&status)上不断循环;goto getdata;我同意Mark的观点;让所有这些mpi_启动而没有相应的mpi_等待似乎是在自找麻烦。如果在收到消息之前你什么都做不了,那为什么你要用mpi_Iprobe而不是mpi_probe呢?@JonathanDursi你是在建议摆脱mpi_Iprobe,然后再使用我的while循环T
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
#include "mpi.h"
#define N 9 //# of nodes
#define M 10 //samples number
#define n 2 //demension of weight vector
#define TAU 0.15
#define DISTANCE 0.1 //measuremeant for two nodes
#define A 0.2 //learning rate
#define ITERATION_STEPS 1000 // Program goes for ITERATION_STEPS - 1
#define SAMPLE_STEP 1 //Number of current iteration
#define BT1 0.17
#define BT2 0.02
#define A0 0.9 //initial learning rate
#define AC 0.05 //middle learning rate
#define AF 0.001 //final learning rate
#define TC 4500 //first period of iteration
#define TF 5000 //second period of iteration
#define BUFSIZE 400000
using namespace std;
void printtime(double comm_time,double update_time,string filename,int rank);
int checkack(int ack[],int status[]);
int checkstatus(int status[],int procid);
int noof_activeproc(int status[],int myrank);
void printresult(double w[][n],string filename,int rank);
void plot(double w[][n], char* fileName);
void update(double w[][n], double x[], int t,int rank, int g[][9]);
double norm(double a[], double b[]);
double p(double sample[], double w[], int t);
void OneToTwo (int index, int *row, int *col);
int g(int b, int j);
int main(int argc, char *argv[])
{
int rank,size;;
MPI_Init(&argc,&argv );
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Status statusprobe,status,status1[N],status2[N];
MPI_Request request[N],request1[N],request2[N],request3[N]; //N request for N process per iteration
double buf[BUFSIZE]; // buffer for the outgoing message;
int procstatus[N],ack[N]; //store the process status and ack
double temp[n];
double tempsend[n];
ifstream in1, in2, in3;
ofstream out,outtime;
int i, j,k,z,req = 0,req1;
int checklocation=0; //bookmark for MPI_Test
int checklocation1=0; //bookmark for MPI_Test
int numberIterations;
double samples[M][n]; //for all samples
double w[N][n]; //for all node weight
double x[n]; //one sample
int g[9][9];
int count=n;
int flagprobe=0;
int flagtest=1;
int flagrecv=0;
int datareadflag; // flag that checks wheter the data is read or not
double dataincount;
int checktestflag;
int requestfreeflag=0;
int flag=0; //test flag
int flagtest1=0; // check for the request to update the processor status
int endloopflag=0;
int *bptr, bl;
double start_time,end_time,tupdate_start,tupdate_end,t_temp1,t_temp2;
double comm_time;
double update_time=0;
for(i=0;i<N;i++)
{
procstatus[i]=1; // all the processor are on
ack[i]=0;
}
// read sample data
in1.open("samples.dat");
if(!in1)
{
cout<<"100:File openning error. \n";
exit(100);
}
in2.open("initialMap.dat");
if(!in2)
{
cout<<"200:File openning error. \n";
exit(200);
}
in3.open("gij.dat");
if(!in3)
{
cout<<"200:File openning error. \n";
exit(200);
}
for(i=0; i<M; i++)
for(j=0; j<n; j++){
in1>>samples[i][j];
//cout<<samples[i][j]<<"="<<i<<","<<j<<" ";
}
//read initial weights
for(i=0; i<N; i++)
for(j=0; j<n; j++) {
in2>>w[i][j];
//cout<<w[i][j]<<"="<<i<<","<<j<<" ";
}
//read Gij
for(i=0; i<9; i++)
for(j=0; j<9; j++) {
in3>>g[i][j];
//cout<<w[i][j]<<"="<<i<<","<<j<<" ";
}
//Print W to file
out.open("w.dot");
out<<"graph G {"<<endl;
out<<"size=\"10,10\";"<<endl;
out<<"ratio=expand;"<<endl;
out<<"node [shape=circle];"<<endl;
//out<<"node [shape=point];"<<endl;
for(i=0; i<9; i++) {
for(j=0; j<n; j++) {
if(j == 0) out<<i+1<<"[pos = \"";
out<<w[i][j];
if(j == 0) out<<",";
if(j == 1) out<<"!\"]"<<endl;
}
}
for(i=0; i<9; i++)
for(j=0; j<i+1; j++) {
if(g[i][j] == 1 && i != j) out<<i+1<<" -- "<<j+1<<";"<<endl;
}
out<<"}"<<endl;
MPI_Barrier(MPI_COMM_WORLD);
MPI_Buffer_attach( buf, BUFSIZE );
k = 0;
numberIterations = 1;
dataincount=N; //for the first time , all process or has N data in from file.
datareadflag=1;
checktestflag=1;
int tagno=1;
int prevtag; // start_time=MPI_Wtime();
time_t start,start1,end1,end;
time(&start);
do{
if(numberIterations%SAMPLE_STEP==0)
{
t_temp1=MPI_Wtime();
if(k>=M) k=0;
for(j=0; j<n; j++) {
x[j]=samples[k][j];
}
k++;
t_temp2=MPI_Wtime();
}
if(numberIterations>1)
{ getdata:
MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flagprobe, &statusprobe); //tag = numberIterations,
while(flagprobe != 0)
{
if(statusprobe.MPI_TAG==0) // tag=0 means status update of the processor
{
int rtemp[1];
MPI_Recv(rtemp,1,MPI_INT,statusprobe.MPI_SOURCE,0, MPI_COMM_WORLD, &status );
procstatus[status.MPI_SOURCE]=rtemp[0];
}
else
{
datareadflag=1;
dataincount++;
MPI_Recv(temp,count,MPI_DOUBLE,statusprobe.MPI_SOURCE,statusprobe.MPI_TAG, MPI_COMM_WORLD, &status );
for(j=0;j<n;j++) w[status.MPI_SOURCE][j]=temp[j];
}
MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG, MPI_COMM_WORLD,&flagprobe, &statusprobe);
} //end while
} //end if (no of iteration >1)
if( numberIterations< ITERATION_STEPS ) // do not send on last iterations.
{
tupdate_start=MPI_Wtime();
update(w,x,k,rank,g);
tupdate_end=MPI_Wtime();
update_time=update_time+tupdate_end-tupdate_start;
if(req==0) {
for(i=0;i<N;i++)
{ int c=0;
if((i!=rank)&&(checkstatus(procstatus,i)==1)) // send if only the process is active
{
MPI_Bsend_init(w[rank], count, MPI_DOUBLE, i ,tagno, MPI_COMM_WORLD,&request[i]);
MPI_Bsend_init(&c,1, MPI_INT,i,0, MPI_COMM_WORLD,&request1[i]);
}
} //end for
req=1;
}
for(i=0;i<N;i++)
{
if((i!=rank)&&(checkstatus(procstatus,i)==1)) // send if only the process is active
{
MPI_Start(&request[i]);
//actual message send.
}
}
tagno++;
requestfreeflag==1;
checktestflag=0;
dataincount=0;
checklocation=0;
numberIterations++;
datareadflag=0;
cout<<numberIterations<<"-th iterations for . "<<rank<<endl;
} //end if( numberIterations< ITERATION_STEPS )
/* Before exiting notify all the active process */
if((numberIterations == ITERATION_STEPS) && (endloopflag==0)) //endloop flag prevent sending twice
{ // status value (initially all 1);
req1=0;
for(i=0;i<N;i++)
{
if((i!=rank)&&(checkstatus(procstatus,i)==1)) // check if only the process is active
{
MPI_Start(&request1[i]);
}
}
endloopflag=1;
goto getdata;
} //end if
if(numberIterations == ITERATION_STEPS && endloopflag==1)
{
for(i=1;i<N;i++)
{
if((i!=rank)&&(checkstatus(procstatus,i)==1)) // check if only the process is active
{
MPI_Test(&request[i], &flagtest, &status);
MPI_Test(&request1[i], &flagtest1, &status);
if(!flagtest || !flagtest1)
{
checklocation1=i; //for next check continue from i;
cout<<"getdata called by" <<rank<<endl;
goto getdata;
}
} //end if
}//end for
} //end if
} while(numberIterations < ITERATION_STEPS);
for(i=0;i<N;i++)
{
if(i!=rank && request[i]!=MPI_REQUEST_NULL)
{
MPI_Request_free(&request[i]);
MPI_Request_free(&request1[i]);
}
}
if(numberIterations == ITERATION_STEPS)
{
char pno[2];
sprintf(pno,"%d",rank);
string filename;
filename=filename+pno;
filename=filename+".dot";
char *file=strdup(filename.c_str());
ofstream out;
out.open(file);
//plot(w, "final_map_25.dat",rank);
out<<"graph G {"<<endl;
out<<"size=\"10,10\";"<<endl;
out<<"ratio=expand;"<<endl;
out<<"node [shape=point];"<<endl;
//out<<"node [shape=point];"<<endl;
for(i=0; i<9; i++) {
for(j=0; j<n; j++) {
if(j == 0) out<<i+1<<"[pos = \"";
out<<w[i][j];
if(j == 0) out<<",";
if(j == 1) out<<"!\"]"<<endl;
}
}
for(i=0; i<9; i++)
for(j=0; j<i+1; j++) {
if(g[i][j] == 1 && i != j) out<<i+1<<" -- "<<j+1<<";"<<endl;
}
out<<"}"<<endl;
}
MPI_Buffer_detach( &bptr, &bl );
MPI_Finalize();
return 0;
} // End main Program