Io mpi i/o文件缺少随机行

Io mpi i/o文件缺少随机行,io,parallel-processing,mpi,Io,Parallel Processing,Mpi,我正在处理MPI I I/O问题。秩0从参数文件读取位置,然后发送到秩1、2、3。所有这些过程(1,2,3)都将根据秩0给出的位置从读取文件中获取文本,并在写入文件中以不同的行写入。当我在一台计算机上运行程序时,一切正常。但当我使用2台计算机时(仍然是4个进程,在服务器上排名0,1,而在客户机上排名1,2),输出文件的一些随机行丢失了!这是我的密码 #include "mpi.h" #include <stdio.h> #include <stdlib.h> #inclu

我正在处理MPI I I/O问题。秩0从参数文件读取位置,然后发送到秩1、2、3。所有这些过程(1,2,3)都将根据秩0给出的位置从读取文件中获取文本,并在写入文件中以不同的行写入。当我在一台计算机上运行程序时,一切正常。但当我使用2台计算机时(仍然是4个进程,在服务器上排名0,1,而在客户机上排名1,2),输出文件的一些随机行丢失了!这是我的密码

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

//define the message
#define MSG_MISSION_COMPLETE 78
#define MSG_EXIT 79

//define a structural message of MPI
int array_of_blocklengths[3] = { 1, 1, 1 };
MPI_Aint array_of_displacements[3] = { 0, sizeof(float), sizeof(float) + sizeof(int) };
MPI_Datatype array_of_types[3] = {MPI_FLOAT, MPI_FLOAT, MPI_INT};

MPI_Datatype location;

int master();
int slave(MPI_File fhr, MPI_File fhw);                                  
int main(int argc, char* argv[])
{
    int rank;

    MPI_File fhr, fhw;
    char read[] = "./sharedReadSample1.txt";
    char write[] = "./sharedWriteSample1.txt";
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    printf("%d is speaking\n", rank);

    MPI_File_open(MPI_COMM_WORLD, read, MPI_MODE_RDONLY, MPI_INFO_NULL, &fhr);
    MPI_File_open(MPI_COMM_WORLD, write, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fhw);

    if (rank == 0)//rank 0, dispatch the tasks
        master();
    else//other processes
        slave(fhr, fhw);

    MPI_Finalize();
    printf("%d said byebye\n", rank);

    MPI_File_close(&fhr);
    MPI_File_close(&fhw);
    return 0;
}


int master()//master, read the parameters, send them to other slave processes, get the message of task finishing, arrange next task to the slave who completed the task
{
    int i, size, firstmsg, nslave;

    int buf[256];
    struct{
        float pause;//pause time
        int stand;//starting position in the file
        int offset;//offset
    }buf_str[10000] = { {0.0,0,0} };
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    nslave = size - 1;//the number of slaves

    FILE* fp;
    FILE* fpm;//for log

    fp = fopen("sharedAttributeSample1.txt", "rb");
    if (fp == NULL)
    {
        printf("The file was not opened\n");
        getchar();
        //send a quit message to slaves, use the tag to tell them(>10000)
        for (i = 10000; i < 10000 + nslave; i++)
        {
            buf[0] = MSG_EXIT;
            MPI_Send(&buf[0], 1, MPI_INT, i - 10000 + 1, i, MPI_COMM_WORLD);
        }
        return 0;
    }
    else
        printf("The file was opened\n");

    fpm = fopen("./logs/log_master.txt","wb");
    if (fpm == NULL)
        printf("master log system failed to load!\n");

    for (i = 0; i < 10000;i++)
    {
        fscanf(fp,"%f,%d,%d", &buf_str[i].pause, &buf_str[i].stand, &buf_str[i].offset);
    }

    MPI_Status status;

    MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
    MPI_Type_commit(&location);

    for (i = 0; i < nslave; i++)
    {
        MPI_Send(&buf_str[i], 1, location, i+1, i, MPI_COMM_WORLD);
        fprintf(fpm, "initial message %d sent\n",i);
    }

    for (i = nslave; i < 10000; i++)
    {
        MPI_Recv(buf, 256, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);//receive messages from slaves
        fprintf(fpm, "task %d complete massage received\n",status.MPI_TAG);

        if (buf[0] == MSG_MISSION_COMPLETE)//send next task
        {
            firstmsg = status.MPI_SOURCE;
            fprintf(fpm, "task %d  is sent to %d \n", i, firstmsg);
            MPI_Send(&buf_str[i], 1, location, firstmsg, i, MPI_COMM_WORLD);
        }
    }

    for (i = 10000; i < 10000+nslave; i++)//send quitting message
    {
        buf[0] = MSG_EXIT;
        MPI_Send(&buf_str[0], 1, location, i-10000+1, i, MPI_COMM_WORLD);
    }

    fclose(fp);
    fclose(fpm);
    return 0;
}

int slave(MPI_File fhr, MPI_File fhw)
{
    struct{
        float pause;
        int stand;
        int offset;
    }buf_str;

    char buf[256];
    int buf_s[256];

    int rank, size, nslave, i=0;
    char name[30];

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    nslave = size - 1;

    FILE* fps[nslave];

    //open their own logging pointers
    for(i=0;i<nslave;i++)
    {
        if(i == rank-1)
        {
            sprintf(name,"./logs/logfile_slave%d",i+1);
            fps[i] = fopen(name, "w");
            if(fps[i] == NULL)
                printf("failed to open logfile of slave %d\n", i+1);
            break;
        }
    }

    MPI_Status status;
    MPI_Status status_read;
    MPI_Status status_write;

    MPI_Type_struct(3, array_of_blocklengths, array_of_displacements, array_of_types, &location);
    MPI_Type_commit(&location);

    while (1)
    {
        //receive the message from master
        MPI_Recv(&buf_str, 1, location, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        fprintf(fps[i], "process %d message %d received\n",rank,status.MPI_TAG);

        if (status.MPI_TAG < 10000){//if it is a task 

            sleep(buf_str.pause);//sleep, to simulate a computing process
            fprintf(fps[i], "process %d sleep for %f seconds\n", rank, buf_str.pause);
            //read from the position given
            MPI_File_read_at(fhr, buf_str.stand, buf, buf_str.offset, MPI_CHAR, &status_read);
            buf[buf_str.offset] = '\n';//need a \n 
            MPI_File_write_at(fhw, status.MPI_TAG*(buf_str.offset+1), buf, buf_str.offset+1, MPI_CHAR, &status_write);

            fprintf(fps[i], "%d has done task %d\n", rank, status.MPI_TAG);
            //send task complete message to master
            buf_s[0] = MSG_MISSION_COMPLETE;
            MPI_Send(&buf_s, 1, MPI_INT, 0, status.MPI_TAG, MPI_COMM_WORLD);
        }

        else
            break;
    }

    fclose(fps[i]);
    return 0;
}
#包括“mpi.h”
#包括
#包括
#包括
//定义消息
#定义消息任务完成78
#定义MSG_出口79
//定义MPI的结构消息
块长度[3]={1,1,1}的int数组;
MPI_Aint数组_的位移[3]={0,sizeof(float),sizeof(float)+sizeof(int)};
_类型[3]的MPI_数据类型数组_={MPI_FLOAT,MPI_FLOAT,MPI_INT};
MPI_数据类型位置;
int master();
int从机(MPI_文件fhr、MPI_文件fhw);
int main(int argc,char*argv[])
{
整数秩;
MPI_文件fhr、fhw;
char read[]=“/sharedardsample1.txt”;
char write[]=“/sharedWriteSample1.txt”;
MPI_Init(&argc,&argv);
MPI通信等级(MPI通信世界和等级);
printf(“%d正在讲话\n”,等级);
MPI文件打开(MPI通信世界、读取、仅MPI模式、MPI信息为空,&fhr);
MPI_文件打开(MPI_通信世界、写入、MPI_模式创建、MPI_模式仅限WRONLY、MPI_信息为NULL和fhw);
如果(秩==0)//秩0,则分派任务
master();
else//其他过程
从机(fhr、fhw);
MPI_Finalize();
printf(“%d说byebye\n”,秩);
MPI文件关闭(&fhr);
MPI文件关闭(&fhw);
返回0;
}
int master()//master,读取参数,发送给其他从进程,获取任务完成消息,将下一个任务安排给完成任务的从进程
{
int i、大小、firstmsg、nslave;
int buf[256];
结构{
浮动暂停;//暂停时间
int stand;//文件中的起始位置
int offset;//offset
}buf_str[10000]={{0.0,0,0};
MPI_通信大小(MPI_通信世界和大小);
nslave=size-1;//从机的数量
文件*fp;
文件*fpm;//用于日志
fp=fopen(“sharedAttributeSample1.txt”、“rb”);
如果(fp==NULL)
{
printf(“文件未打开\n”);
getchar();
//向从属服务器发送退出消息,使用标签告知从属服务器(>10000)
对于(i=10000;i<10000+nslave;i++)
{
buf[0]=消息退出;
MPI_发送(&buf[0],1,MPI_INT,i-10000+1,i,MPI_通信世界);
}
返回0;
}
其他的
printf(“文件已打开\n”);
fpm=fopen(“./logs/log_master.txt”,“wb”);
如果(fpm==NULL)
printf(“主日志系统加载失败!\n”);
对于(i=0;i<10000;i++)
{
fscanf(fp、%f、%d、%d、&buf_str[i]。暂停,&buf_str[i]。站立,&buf_str[i]。偏移);
}
MPI_状态;
MPI类型结构(3、块长度数组、位移数组、类型数组和位置);
MPI_类型_提交(&位置);
对于(i=0;i对于(i=0;在
MPI\u finalize
之后调用大多数MPI例程会导致未定义的行为。我不知道调用
MPI\u file\u close
是否会对您的程序产生这样的影响,但我会调查这个问题。事实上,我很惊讶没有与此代码相关的运行时错误。我是这个领域的新手,这是我的第一次MPI代码:>在
MPI\u finalize
之后对大多数MPI例程的调用会导致未定义的行为。我不知道对
MPI\u file\u close
的调用是否会对您的程序产生这样的影响,但我会调查这个问题。事实上,我很惊讶没有与此代码相关的运行时错误。我是这个领域的新手,这是我的职责第一个MPI代码:>