Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/307.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Java 使用OpenMPI广播发生死锁的数据_Java_Multithreading_Mpi_Distributed Computing_Openmpi - Fatal编程技术网

Java 使用OpenMPI广播发生死锁的数据

Java 使用OpenMPI广播发生死锁的数据,java,multithreading,mpi,distributed-computing,openmpi,Java,Multithreading,Mpi,Distributed Computing,Openmpi,我用Java编写了一个程序,在单线程中调用OpenMPI进行通信Isend/recv用于防止死锁。调用方调用send方法,然后将所有发送请求放入队列。网络线程从队列获取要发送的请求 class NetworkThread extends Thread { private final ConcurrentLinkedQueue<SendRequest> sendQueue = new ConcurrentLinkedQueue<>(); private fi

我用Java编写了一个程序,在单线程中调用OpenMPI进行通信Isend/recv用于防止死锁。调用方调用send方法,然后将所有发送请求放入队列。网络线程从队列获取要发送的请求

class NetworkThread extends Thread {
    private final ConcurrentLinkedQueue<SendRequest> sendQueue = new ConcurrentLinkedQueue<>();
    private final List<Request> activeSends = new LinkedList<>();
    private final List<RecvRequest> recvList = new LinkedList<>();
    private volatile boolean shutdown;

    @Override
    public void run() {
        System.out.println("network thread started");
        try {
            loop();
        } catch (MPIException e) {
            e.printStackTrace();
        }
    }

    void loop() throws MPIException {
        while (!shutdown) {
            Status status = MPI.COMM_WORLD.iProbe(MPI.ANY_SOURCE, MPI.ANY_TAG);
            if (status != null) {
                int source = status.getSource();
                int tag = status.getTag();
                int sizeInBytes = status.getCount(MPI.BYTE);

                ByteBuffer buffer = MPI.newByteBuffer(sizeInBytes);
                MPI.COMM_WORLD.recv(buffer, sizeInBytes, MPI.BYTE, source, tag);
                byte[] data = new byte[sizeInBytes];
                buffer.get(data);
                RecvRequest recvRequest = new RecvRequest(data, source, tag);
                synchronized (recvList) {
                    recvList.add(recvRequest);
                }
            }

            SendRequest sendRequest;
            while ((sendRequest = sendQueue.poll()) != null) {
                byte[] data = sendRequest.getData();
                ByteBuffer buffer = MPI.newByteBuffer(data.length);
                buffer.put(data);
                Request request = MPI.COMM_WORLD.iSend(buffer, data.length, MPI.BYTE, sendRequest.getDest(), sendRequest.getTag());
                synchronized (activeSends) {
                    activeSends.add(request);
                }
            }
            //delete sent record
            synchronized (activeSends) {
                Iterator<Request> iterator = activeSends.iterator();
                while (iterator.hasNext()) {
                    Request request = iterator.next();
                    if (request.test())
                        iterator.remove();
                }
            }
        }
    }

    public void send(byte[] data, int dest, int tag) {
        SendRequest sendRequest = new SendRequest(data, dest, tag);
            sendQueue.add(sendRequest);
    }

    public byte[] read(int source, int tag) {
        byte[] data;
        while ((data = tryRead(source, tag)) == null) {
            try {
                Thread.sleep(10);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        return data;
    }

    public byte[] tryRead(int source, int tag) {
        byte[] data = null;
        synchronized (recvList) {
            Iterator<RecvRequest> iterator = recvList.iterator();
            while (iterator.hasNext()) {
                RecvRequest recvRequest = iterator.next();
                if (recvRequest.getSource() == source && recvRequest.getTag() == tag) {
                    iterator.remove();
                    data = recvRequest.getData();
                    break;//just get one
                }
            }
        }
        return data;
    }

    public void shutdown() {
        shutdown = true;
        //waiting for all sent
        synchronized (activeSends) {
            while (activeSends.size() > 0)
                try {
                    Thread.sleep(1);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
        }
    }
}

class SendRequest {
    private byte[] data;
    private int dest;
    private int tag;

    SendRequest(byte[] data, int dest, int tag) {
        this.data = data;
        this.dest = dest;
        this.tag = tag;
    }

    public int getTag() {
        return tag;
    }

    public int getDest() {
        return dest;
    }

    public byte[] getData() {
        return data;
    }
}

class RecvRequest {
    private byte[] data;
    private int source;
    private int tag;

    RecvRequest(byte[] data, int source, int tag) {
        this.data = data;
        this.source = source;
        this.tag = tag;
    }

    public int getTag() {
        return tag;
    }

    public int getSource() {
        return source;
    }

    public byte[] getData() {
        return data;
    }
}

问题:此程序在较低的进程(例如插槽=4)下运行良好。但当增加要发送的插槽或数据大小时,程序偶尔会进入死锁状态。我试图更改OpenMPI版本(3.0、2.1.2、1.7.5),但似乎不起作用。

乍一看,关机过程中似乎存在争用情况。您应该在退出之前完成所有队列(例如发送和接收所有内容)。是的,语句“shutdown=true”应该在“while code block”下面执行。修复后,问题仍然发生。我添加了一些输出语句,以确定哪一行被卡住了。sendTh.start();recvTh.start();sendTh.join();recvTh.join();System.out.println(String.format(“%s-%d”在退出之前),host,rank);networkThread.shutdown();System.out.println(String.format(“%s-%d”在退出后)、host、rank));networkThread.join();结果显示,所有进程都输出“xxxx-xx后退出”,但有些进程卡在“networkThread.shutdown();”中。这很奇怪,因为所有进程都接收到数据,所以activeSends列表应该是空的。因此,任何进程都不应该停留在“while code block”中@GillesGouaillardeti在
NetworkThread.run()
的末尾添加了一个
printf()
类。尽管所有非主列组都到达了那里,但并不是所有列组都可以
join()
它们的
NetworkThread
。我对java MPI绑定知之甚少,而且我发现的文档非常糟糕。您确定传递给
MPI.COMM\u WORLD.iSend
缓冲区在请求期间始终保持活动状态吗?另外,您应该以某种方式请求并确认您的线程级别至少是
MPI\u thread\u SERIALIZED
。您应该在退出之前完成所有队列(例如发送和接收所有内容)。是的,语句“shutdown=true”应该在“while code block”下面执行。修复后,问题仍然发生。我添加了一些输出语句,以确定哪一行被卡住了。sendTh.start();recvTh.start();sendTh.join();recvTh.join();System.out.println(String.format(“%s-%d”在退出之前),host,rank);networkThread.shutdown();System.out.println(String.format(“%s-%d”在退出后)、host、rank));networkThread.join();结果显示,所有进程都输出“xxxx-xx后退出”,但有些进程卡在“networkThread.shutdown();”中。这很奇怪,因为所有进程都接收到数据,所以activeSends列表应该是空的。因此,任何进程都不应该停留在“while code block”中@GillesGouaillardeti在
NetworkThread.run()
的末尾添加了一个
printf()
类。尽管所有非主列组都到达了那里,但并不是所有列组都可以
join()
它们的
NetworkThread
。我对java MPI绑定知之甚少,而且我发现的文档非常糟糕。您确定传递给
MPI.COMM\u WORLD.iSend
缓冲区在请求期间始终保持活动状态吗?此外,您应该以某种方式请求并确认您的线程级别至少是
MPI\u thread\u序列化的
public class BroadcastTest {
    private static final int TAG_MPI = 123;
    static int rank;
    static String host;

    static {
        try {
            host = InetAddress.getLocalHost().getHostName();
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws MPIException, InterruptedException, UnknownHostException {
        MPI.Init(args);
        int rank = MPI.COMM_WORLD.getRank();
        int size = MPI.COMM_WORLD.getSize();
        BroadcastTest.rank = rank;
        if (rank == 0) {
            System.out.println(String.format("total %d machines", size));
            System.out.println("master started");
        } else {
            NetworkThread networkThread = new NetworkThread();
            networkThread.start();

            Thread sendTh = new Thread(() -> {
                for (int i = 0; i < 5; i++) {
                    try {
                        Thread.sleep((long) (Math.random() * 1000)); //send data five times in random interval
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                    for (int machineId = 1/* skip master */; machineId < size; machineId++) {
                        if (machineId == rank) continue;//skip myself
                        networkThread.send(new byte[4096], machineId, TAG_MPI); //send 4K bytes data
                    }
                }
            });
            //receive data
            Thread recvTh = new Thread(() -> {
                for (int i = 0; i < 5; i++) {
                    for (int machineId = 1; machineId < size; machineId++) {
                        if (machineId == rank) continue;
                        byte[] bytes = networkThread.read(machineId, TAG_MPI);
                    }
                }
            });
            sendTh.start();
            recvTh.start();
            sendTh.join();
            recvTh.join();
            networkThread.shutdown();
            networkThread.join();
        }
        System.out.println(String.format("%s exit", host));
        MPI.Finalize();
    }
}
/home/gongsf/openmpi-2.1.2/bin/mpirun --prefix /home/gongsf/openmpi-2.1.2 -bycore -nooversubscribe -machinefile /home/gongsf/JavaMPI/myhosts /home/gongsf/jdk1.8.0_144/bin/java -classpath /home/gongsf/JavaMPI/lib/*:/home/gongsf/JavaMPI/out/production/JavaMPI BroadcastTest