通过MPI进程同步阵列，如果每个线程都更改了其中的一部分？_Mpi_Openmpi

通过MPI进程同步阵列，如果每个线程都更改了其中的一部分？

mpi

通过MPI进程同步阵列，如果每个线程都更改了其中的一部分？,mpi,openmpi,Mpi,Openmpi,我有一个程序，我想使用MPI并行化。我以前从未使用过MPI 该程序计算大量对象随时间的行为。数据这些对象存储在数组中，例如x坐标的双精度：：body_x（10000）要计算对象的行为，需要有关所有其他对象的信息，因此，每个线程都需要保存所有数据，但只更新其中的一部分。但是在新的timestep每个线程都需要从所有其他线程获取信息据我所知，MPI\u Allgather可以用于此，但它需要一个发送缓冲区和一个接收缓冲区。如果每个线程都已更新，如何在不同线程上同步阵列阵列的另一部分？我

我有一个程序，我想使用MPI并行化。我以前从未使用过MPI

该程序计算大量对象随时间的行为。数据这些对象存储在数组中，例如x坐标的

双精度：：body_x（10000）

要计算对象的行为，需要有关所有其他对象的信息，因此，每个线程都需要保存所有数据，但只更新其中的一部分。但是在新的timestep每个线程都需要从所有其他线程获取信息

据我所知，

MPI\u Allgather

可以用于此，但它需要一个发送缓冲区和一个接收缓冲区。如果每个线程都已更新，如何在不同线程上同步阵列阵列的另一部分？我必须将每个线程的整个数组发送到 master在接收缓冲区中，更新masters数组的特定部分线程已从主线程发送数据重新广播

这是一个非常基本的问题，但我对MPI非常陌生，我发现的所有示例都是非常简单，不包括这一点。谢谢你的帮助

伪示例（假设第一个索引为1的Fortran样式向量）：（是的，发送/接收最好是非阻塞的，这是为了简单起见）

if（master）then
readInputFile
如果结束
MPI_Bcast（numberOfObject）
分配实体数组（numberOfObjects）
如果（主人）那么
填充body_数组！使用输入文件中的数据
如果结束
MPI_Bcast（主体阵列）
objectsPerThread=numberOfObjects/threadCount
myStart=threadID*objectsPerThread+1
myEnd=（线程ID+1）*objectsPerThread
边做边做（t

听起来你想要MPI\u Allgather。为了避免需要单独的发送缓冲区，您可以使用MPI_IN_PLACE值。这告诉MPI对发送和接收使用相同的缓冲区

请参见

使用对的调用可以组合来自所有进程的数组块。下面是一个完整的Fortran示例。它定义了一个大小为50的数组。然后每个进程将该数组的一个块设置为某个复数。最后，调用

MPI\u allgatherv

将所有块拉到一起。块大小的计算以及需要传递给

MPI\u allgatrov

的一些参数封装在

MPI\u split

例程中

program test
  use mpi

  implicit none

  integer, parameter :: idp = 8
  integer, parameter :: n_tasks = 11
  real(idp), parameter :: zero = 0.0d0
  complex(idp), parameter :: czero = cmplx(zero, zero, kind=idp)

  integer :: mpi_n_procs, mpi_proc_id, error
  integer :: i, i_from, i_to
  complex(idp) :: c(-5:5)
  real(idp) :: split_size

  integer, allocatable :: recvcount(:), displs(:)

  call MPI_Init(error)
  call MPI_Comm_size(MPI_COMM_WORLD, mpi_n_procs, error)
  call MPI_Comm_rank(MPI_COMM_WORLD, mpi_proc_id, error)

  allocate(recvcount(mpi_n_procs))
  allocate(displs(mpi_n_procs))

  i_from = -5
  i_to = 5

  ! each process covers only part of the array
  call mpi_split(i_from, i_to, counts=recvcount, displs=displs)
  write(*,*) "ID", mpi_proc_id,":", i_from, "..", i_to
  if (mpi_proc_id == 0) then
    write(*,*) "Counts: ", recvcount
    write(*,*) "Displs: ", displs
  end if

  c(:) = czero
  do i = i_from, i_to
    c(i) = cmplx(real(i, idp), real(i+1, idp), kind=idp)
  end do

  call MPI_Allgatherv(c(i_from), i_to-i_from+1, MPI_DOUBLE_COMPLEX, c,         &
  &                   recvcount, displs, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD,   &
  &                   error)

  if (mpi_proc_id == 0) then
    do i = -5, 5
      write(*,*) i, ":", c(i)
    end do
  end if

  deallocate(recvcount, displs)
  call MPI_Finalize(error)

contains

  !! @description: split the range (a,b) into equal chunks, where each chunk is
  !! handled by a different MPI process
  !! @param: a        On input, the lower bound of an array to be processed. On
  !!                  output, the lower index of the chunk that the MPI process
  !!                  `proc_id` should process
  !! @param: b        On input, the upper bound of an array. On, output the
  !!                  upper index of the chunk that process `proc_id` should
  !!                  process.
  !! @param: n_procs  The total number of available processes. If not given,
  !!                  this is determined automatically from the MPI environment.
  !! @param: proc_id  The (zero-based) process ID (`0 <= proc_id < n_procs`). If
  !!                  not given, the ID of the current MPI process
  !! @param: counts   If given, must be of size `n_procs`. On output, the chunk
  !!                  size for each MPI process
  !! @param: displs   If given, must be of size `n_procs`. On output, the offset
  !!                  if the first index processed by each MPI process, relative
  !!                  to the input value of `a`
  subroutine mpi_split(a, b, n_procs, proc_id, counts, displs)

    integer,           intent(inout) :: a
    integer,           intent(inout) :: b
    integer, optional, intent(in)    :: n_procs
    integer, optional, intent(in)    :: proc_id
    integer, optional, intent(inout) :: counts(:)
    integer, optional, intent(inout) :: displs(:)

    integer :: mpi_n_procs, n_tasks, mpi_proc_id, error
    integer :: aa, bb
    real(idp) :: split_size
    logical :: mpi_is_initialized

    mpi_n_procs = 1
    if (present(n_procs)) mpi_n_procs = n_procs
    mpi_proc_id = 0
    if (present(proc_id)) mpi_proc_id = proc_id
    if (.not. present(n_procs)) then
      call MPI_Comm_size(MPI_COMM_WORLD, mpi_n_procs, error)
    end if
    if (.not. present(proc_id)) then
      call MPI_Comm_rank(MPI_COMM_WORLD, mpi_proc_id, error)
    end if

    aa = a
    bb = b

    n_tasks = bb - aa + 1
    split_size = real(n_tasks, idp) / real(max(mpi_n_procs, 1), idp)
    a = nint(mpi_proc_id * split_size) + aa
    b = min(aa + nint((mpi_proc_id+1) * split_size) - 1, bb)

    if (present(counts)) then
      do mpi_proc_id = 0, mpi_n_procs-1
        counts(mpi_proc_id+1) = max(nint((mpi_proc_id+1) * split_size)         &
        &                           - nint((mpi_proc_id) * split_size), 0)
      end do
    end if

    if (present(displs)) then
      do mpi_proc_id = 0, mpi_n_procs-1
        displs(mpi_proc_id+1) = min(nint(mpi_proc_id * split_size), bb-aa)
      end do
    end if

  end subroutine mpi_split

end program

程序测试
使用mpi
隐式无
整数，参数：：idp=8
整数，参数：：n_tasks=11
实数（idp），参数：：0=0.0d0
复杂（idp），参数：：czero=cmplx（零，零，种类=idp）
整数：：mpi\u n\u进程，mpi\u进程id，错误
整数：：i，i_from，i_to
复合物（idp）：c（-5:5）
真实（idp）：分割大小
整数，可分配：：recvcount（：），显示（：）
调用MPI_Init（错误）
调用MPI_Comm_size（MPI_Comm_WORLD，MPI_n_procs，error）
调用MPI_Comm_rank（MPI_Comm_WORLD，MPI_proc_id，error）
分配（重新计数（mpi\u n\u过程））
分配（显示（mpi\u n\u过程））
i_from=-5
i_to=5
! 每个进程只覆盖阵列的一部分
调用mpi_split（i_from，i_to，counts=recvcount，disfs=disfs）
写入（*，*）“ID”，mpi_程序ID，“：”，i_from，“…”，i_to
如果（mpi_proc_id==0），则
写入（*，*）“计数：”，记录计数
写（*，*）“显示：”，显示
如果结束
c（：）=czero
i=我从，我到
c（i）=cmplx（实（i，idp），实（i+1，idp），种类=idp）
结束
调用MPI_Allgatherv（c（i_from）、i_to-i_from+1、MPI_DOUBLE_COMPLEX、c、&
&记录、显示、MPI_DOUBLE_COMPLEX、MPI_COMM_WORLD、&
&（错误）
如果（mpi_proc_id==0），则
我是不是=-5，5
写（*，*）i，“：”，c（i）
结束
如果结束
解除分配（重新计算、显示）
调用MPI_Finalize（错误）
包含
!! @描述：将范围（a，b）分成相等的块，每个块都是相同的
!! 由不同的MPI进程处理
!! @参数：输入时，要处理的数组的下限。在…上
!!                  output，MPI处理的块的较低索引
!!                  `proc_id`应该处理
!! @param:b输入时，数组的上限。在上，输出
!!                  进程'proc_id'应该包含的块的上索引
!!                  过程
!! @param:n_处理可用进程的总数。如果不给,，
!!                  这是由MPI环境自动确定的。
!! @param:proc_id（基于零的）进程id（`0）一个特定的示例可能会有帮助！如果块大小不相等，则必须使用MPI_allgatrov，请参见下面的示例。
program test
  use mpi

  implicit none

  integer, parameter :: idp = 8
  integer, parameter :: n_tasks = 11
  real(idp), parameter :: zero = 0.0d0
  complex(idp), parameter :: czero = cmplx(zero, zero, kind=idp)

  integer :: mpi_n_procs, mpi_proc_id, error
  integer :: i, i_from, i_to
  complex(idp) :: c(-5:5)
  real(idp) :: split_size

  integer, allocatable :: recvcount(:), displs(:)

  call MPI_Init(error)
  call MPI_Comm_size(MPI_COMM_WORLD, mpi_n_procs, error)
  call MPI_Comm_rank(MPI_COMM_WORLD, mpi_proc_id, error)

  allocate(recvcount(mpi_n_procs))
  allocate(displs(mpi_n_procs))

  i_from = -5
  i_to = 5

  ! each process covers only part of the array
  call mpi_split(i_from, i_to, counts=recvcount, displs=displs)
  write(*,*) "ID", mpi_proc_id,":", i_from, "..", i_to
  if (mpi_proc_id == 0) then
    write(*,*) "Counts: ", recvcount
    write(*,*) "Displs: ", displs
  end if

  c(:) = czero
  do i = i_from, i_to
    c(i) = cmplx(real(i, idp), real(i+1, idp), kind=idp)
  end do

  call MPI_Allgatherv(c(i_from), i_to-i_from+1, MPI_DOUBLE_COMPLEX, c,         &
  &                   recvcount, displs, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD,   &
  &                   error)

  if (mpi_proc_id == 0) then
    do i = -5, 5
      write(*,*) i, ":", c(i)
    end do
  end if

  deallocate(recvcount, displs)
  call MPI_Finalize(error)

contains

  !! @description: split the range (a,b) into equal chunks, where each chunk is
  !! handled by a different MPI process
  !! @param: a        On input, the lower bound of an array to be processed. On
  !!                  output, the lower index of the chunk that the MPI process
  !!                  `proc_id` should process
  !! @param: b        On input, the upper bound of an array. On, output the
  !!                  upper index of the chunk that process `proc_id` should
  !!                  process.
  !! @param: n_procs  The total number of available processes. If not given,
  !!                  this is determined automatically from the MPI environment.
  !! @param: proc_id  The (zero-based) process ID (`0 <= proc_id < n_procs`). If
  !!                  not given, the ID of the current MPI process
  !! @param: counts   If given, must be of size `n_procs`. On output, the chunk
  !!                  size for each MPI process
  !! @param: displs   If given, must be of size `n_procs`. On output, the offset
  !!                  if the first index processed by each MPI process, relative
  !!                  to the input value of `a`
  subroutine mpi_split(a, b, n_procs, proc_id, counts, displs)

    integer,           intent(inout) :: a
    integer,           intent(inout) :: b
    integer, optional, intent(in)    :: n_procs
    integer, optional, intent(in)    :: proc_id
    integer, optional, intent(inout) :: counts(:)
    integer, optional, intent(inout) :: displs(:)

    integer :: mpi_n_procs, n_tasks, mpi_proc_id, error
    integer :: aa, bb
    real(idp) :: split_size
    logical :: mpi_is_initialized

    mpi_n_procs = 1
    if (present(n_procs)) mpi_n_procs = n_procs
    mpi_proc_id = 0
    if (present(proc_id)) mpi_proc_id = proc_id
    if (.not. present(n_procs)) then
      call MPI_Comm_size(MPI_COMM_WORLD, mpi_n_procs, error)
    end if
    if (.not. present(proc_id)) then
      call MPI_Comm_rank(MPI_COMM_WORLD, mpi_proc_id, error)
    end if

    aa = a
    bb = b

    n_tasks = bb - aa + 1
    split_size = real(n_tasks, idp) / real(max(mpi_n_procs, 1), idp)
    a = nint(mpi_proc_id * split_size) + aa
    b = min(aa + nint((mpi_proc_id+1) * split_size) - 1, bb)

    if (present(counts)) then
      do mpi_proc_id = 0, mpi_n_procs-1
        counts(mpi_proc_id+1) = max(nint((mpi_proc_id+1) * split_size)         &
        &                           - nint((mpi_proc_id) * split_size), 0)
      end do
    end if

    if (present(displs)) then
      do mpi_proc_id = 0, mpi_n_procs-1
        displs(mpi_proc_id+1) = min(nint(mpi_proc_id * split_size), bb-aa)
      end do
    end if

  end subroutine mpi_split

end program