Parallel processing 在Fortran中收集大小不等的数组时使用MPI_GATHERV_Parallel Processing_Fortran_Mpi

Parallel processing 在Fortran中收集大小不等的数组时使用MPI_GATHERV

parallel-processing fortran mpi

Parallel processing 在Fortran中收集大小不等的数组时使用MPI_GATHERV,parallel-processing,fortran,mpi,Parallel Processing,Fortran,Mpi,这个问题类似于，其复杂之处在于所收集矩阵的大小在行长度上不相等，但在列长度上相等。（我也看过了，但没有弄明白）背景我正在执行一个计算，直到计算结束，我才知道结果矩阵的行数。连续地，我分配一个非常大的矩阵，它被填满，在计算结束时（当我知道行的限制时），我切掉这个大数组的末尾，剩下我想要的结果。使用MPI，我应用相同的逻辑：在每一个进程中，我都有一个大数组被填满在计算结束时，我需要在每个数组各自的极限处切掉每个数组（每个进程的极限不同）然后，我需要将每个进程的结果数组收集到一个数组中，然后

这个问题类似于，其复杂之处在于所收集矩阵的大小在行长度上不相等，但在列长度上相等。（我也看过了，但没有弄明白）

背景

我正在执行一个计算，直到计算结束，我才知道结果矩阵的行数。连续地，我分配一个非常大的矩阵，它被填满，在计算结束时（当我知道行的限制时），我切掉这个大数组的末尾，剩下我想要的结果。使用MPI，我应用相同的逻辑：

在每一个进程中，我都有一个大数组被填满

在计算结束时，我需要在每个数组各自的极限处切掉每个数组（每个进程的极限不同）

然后，我需要将每个进程的结果数组收集到一个数组中，然后将其提供给根进程，以便继续程序的其余部分

迄今为止的尝试

在试图理解MPI_GATHERV如何工作以及如何在我的案例中使用它时，我编辑了的答案中给出的代码，以便从每个进程中接受可变大小的数组

program main
use mpi
implicit none
integer :: ierr, myRank, nProcs
integer :: sendsubarray, recvsubarray, resizedrecvsubarray
integer, dimension(2) :: starts,sizes,subsizes
integer, dimension(:), allocatable :: counts, disps
integer, parameter :: nx_glb=20, ny_glb=5, ny=5
integer :: nx
integer, dimension(:), allocatable :: nx_all  
character, dimension(:,:), target, allocatable :: mat, matG
character :: c
integer :: i, j
integer(kind=mpi_address_kind) :: start, extent

call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world, myRank, ierr)
call mpi_comm_size(mpi_comm_world, nProcs, ierr)
allocate(nx_all(nProcs))
nx_all = (/5, 4, 5, 5/)
nx = nx_all(myRank+1)  
sizes(1)=nx; sizes(2)=ny
subsizes(1)=nx; subsizes(2)=ny
starts(1)=0; starts(2)=0
call mpi_type_create_subarray(2, sizes, subsizes, starts,    mpi_order_fortran, &
                            mpi_character, sendsubarray, ierr)
call mpi_type_commit(sendsubarray,ierr)
allocate(mat(1:nx,1:ny))
mat='.'
forall (i=1:nx,j=1:ny) mat(i,j)=ACHAR(ICHAR('0')+myRank)

if(myRank.eq.0) then
 allocate(matG(nx_glb,ny_glb))
 matG='.'
 sizes(1)=nx_glb; sizes(2)=ny_glb
 subsizes(1)=nx; subsizes(2)=ny
 starts(1)=0; starts(2)=0
 call mpi_type_create_subarray(2, sizes, subsizes, starts, mpi_order_fortran, &
                               mpi_character, recvsubarray, ierr)
 call mpi_type_commit(recvsubarray, ierr)
 extent = sizeof(c)
 start = 0
 call mpi_type_create_resized(recvsubarray, start, extent, resizedrecvsubarray, ierr)
 call mpi_type_commit(resizedrecvsubarray,ierr)
end if

allocate(counts(4),disps(4))
counts(1:4) = (/1, 1, 1, 1/)
disps(1:4) = (/0, 5, 10, 15/)
call mpi_barrier(mpi_comm_world,ierr)
print *, mat, "process", myRank    
call mpi_gatherv(mat,1,sendsubarray,matG,counts,disps,resizedrecvsubarray, &
                 0,mpi_comm_world,ierr)
do p=0,nProcs
  if (myRank == p) then
     print *, 'Local array for rank ', myRank
     do i=1, nx
      print *, (mat(i,j),j=1,ny)
     end do
  endif
enddo
call MPI_Barrier(MPI_COMM_WORLD,ierr)
if(myRank.eq.0) then
 print * , matG, "process", myRank    
 print *, 'Global array: '
 do i=1, nx_glb
  print *, (matG(i,j),j=1, ny_glb)
 end do
end if

call mpi_finalize(ierr)

end program main

理想的结果（注意排名1的行数如何减少）：

实际结果（请注意，我在局部具有预期行为，但在全局矩阵中，额外的1行在那里，并且在末尾有点）：

我知道在内存中，矩阵被保存为数组，所以我得到的全局数组如下所示：

0000011111222223333300000111112222233333000001111122222333330000011111222223333300000.....2222233333

问题

如何删除点（代表排名1中的空行）？如何将其显示为具有正确行数的矩阵

编辑

全局数组中出现额外行的原因是，尽管进程1的sendsubarray的维度为4x5，但在根进程中创建的recvsubarray的维度为5x5。现在的问题是，如何定义具有可变维度的recvsubarray，这取决于它从哪个列接收信息？

将全局矩阵定义为在第一个维度（nx）而不是第二个维度（ny）中更大，这让你的生活变得非常艰难。Fortran存储数组的方式（具有较大的ny）更为自然，因为它对应于将所有子矩阵按顺序存储在内存中

如果您愿意交换nx和ny，则不需要使用任何复杂的派生类型。事实上，我怀疑您是否可以使用Scatterv实现这种模式，因为该函数需要一个单一的接收类型，但每个传入矩阵都有不同的模式（因为您选择了nx和ny的顺序）

这段代码使用了交换的nx和ny，似乎可以正常工作。虚线在末尾-我想你总是会有一些点，因为你分配了比子矩阵更多的空间

program main
use mpi
implicit none
integer :: ierr, myRank, nProcs
integer :: sendsubarray, recvsubarray, resizedrecvsubarray
integer, dimension(2) :: starts,sizes,subsizes
integer, dimension(:), allocatable :: counts, disps
integer, parameter :: ny_glb=20, nx_glb=5, nx=5
integer :: ny
integer, dimension(:), allocatable :: ny_all
character, dimension(:,:), target, allocatable :: mat, matG
character :: c
integer :: i, j, p
integer(kind=mpi_address_kind) :: start, extent

call mpi_init(ierr)
call mpi_comm_rank(mpi_comm_world, myRank, ierr)
call mpi_comm_size(mpi_comm_world, nProcs, ierr)
allocate(ny_all(nProcs))
ny_all = (/5, 4, 5, 5/)
ny = ny_all(myRank+1)
allocate(mat(1:nx,1:ny))
mat='.'
forall (i=1:nx,j=1:ny) mat(i,j)=ACHAR(ICHAR('0')+myRank)

if(myRank.eq.0) then
 allocate(matG(nx_glb,ny_glb))
 matG='.'
end if

allocate(counts(4),disps(4))
counts(:) = nx*ny_all(:)

disps(1)=0
do i = 2, 4
  disps(i) = disps(i-1)+counts(i-1)
end do

call mpi_barrier(mpi_comm_world,ierr)
print *, mat, "process", myRank
call mpi_gatherv(mat,nx*ny,MPI_CHARACTER,matG,counts,disps,MPI_CHARACTER, &
                 0,mpi_comm_world,ierr)
do p=0,nProcs
  if (myRank == p) then
     print *, 'Local array for rank ', myRank
     do i=1, nx
      print *, (mat(i,j),j=1,ny)
     end do
  endif
enddo
call MPI_Barrier(MPI_COMM_WORLD,ierr)
if(myRank.eq.0) then
 print * , matG, "process", myRank
 print *, 'Global array: '
 do i=1, nx_glb
  print *, (matG(i,j),j=1, ny_glb)
 end do
end if

call mpi_finalize(ierr)

end program main

以下是一些输出：

 Global array: 
 0000011112222233333.
 0000011112222233333.
 0000011112222233333.
 0000011112222233333.
 0000011112222233333.

0000000000000000000 11111111111111111 2222222222222223333333333333333

希望这是有用的。

这就是我解决上述问题的方法（答案是一位同事给我的，所以这都归功于他）：

由于最终矩阵的一个维度是固定的，并且由于矩阵存储在数组中，因此最好将

mpi\u聚集

与

mpi\u类型_向量

一起使用，而不是

mpi\u类型(创建)子数组

。因此，程序结构是这样的：a）确定每个秩中感兴趣的子矩阵的大小，b）将其转换为向量，c）从每个秩收集向量，d）将向量重塑为最终矩阵。这样，就不需要收集不需要的信息（如上面问题中的点所示），因此在使用

mpi\u-gather

后也不需要消除这些信息

因此，为了将不同长度但恒定宽度的子阵列收集到全局矩阵中，以下代码实现了这一技巧：

program main
  use mpi
  implicit none
  integer :: ierr, myRank, iProcs, nProcs, master
  integer :: ix, iy, ip
  integer :: nx, nxSum, offset, newtype
  integer, parameter :: ny=5
  integer, allocatable:: vec(:), vecG(:), nxAll(:), displs(:), rcounts(:), matG(:,:)

  call mpi_init(ierr)
  call mpi_comm_rank(mpi_comm_world, myRank, ierr)
  call mpi_comm_size(mpi_comm_world, nProcs, ierr)

  master = 0

  nx = myRank+1
  allocate(vec(nx*ny))
  do ix = 1,nx
     do iy = 1,ny
        ip = (ix-1)*ny + iy
        vec(ip) = myRank
     enddo
  enddo

  call mpi_barrier(mpi_comm_world,ierr)

  allocate(nxAll(nProcs))
  call mpi_gather(nx, 1, mpi_integer, nxAll, 1, mpi_integer, &
       master, mpi_comm_world, ierr)

  if (myRank == master) then
     ! print *, 'nxAll = ', nxAll, 'sum(nxAll) = ',sum(nxAll)
     nxSum = sum(nxAll)        
     allocate(vecG(nxSum*ny))

     allocate(displs(nProcs),rcounts(nProcs))
     offset = 0
     do iProcs = 1,nProcs
        displs(iProcs) = offset
        rcounts(iProcs) = nxAll(iProcs)*ny
        offset = offset + rcounts(iProcs)
        ! print *,'iProcs',iProcs,'displs = ',displs(iProcs),'rcounts',rcounts(iProcs)
     enddo

  endif

  call mpi_type_vector(nx*ny, 1, 1, mpi_integer,newtype,ierr)
  call mpi_type_commit(newtype,ierr)

  call mpi_gatherv(vec,1,newtype,vecG,rcounts,displs,mpi_integer, &
       master,mpi_comm_world,ierr)    


  if (myRank == master) then
     print *, 'Global vector, vecG = ',vecG

     ! Reshape into matrix
     print *, 'Global matrix'
     allocate(matG(nxSum,ny))
     do ix = 1,nxSum
        do iy = 1,ny
           ip = (ix-1)*ny + iy
           matG(ix,iy) = vecG(ip)
        enddo
        print *, (matG(ix,iy),iy=1,ny)
     enddo

  endif

  call mpi_finalize(ierr)

end program main

很高兴知道fortran存储基于ny维度的数组。我无法对你的答案投赞成票，因为我没有足够的代表。

 Global array: 
 0000011112222233333.
 0000011112222233333.
 0000011112222233333.
 0000011112222233333.
 0000011112222233333.

program main
  use mpi
  implicit none
  integer :: ierr, myRank, iProcs, nProcs, master
  integer :: ix, iy, ip
  integer :: nx, nxSum, offset, newtype
  integer, parameter :: ny=5
  integer, allocatable:: vec(:), vecG(:), nxAll(:), displs(:), rcounts(:), matG(:,:)

  call mpi_init(ierr)
  call mpi_comm_rank(mpi_comm_world, myRank, ierr)
  call mpi_comm_size(mpi_comm_world, nProcs, ierr)

  master = 0

  nx = myRank+1
  allocate(vec(nx*ny))
  do ix = 1,nx
     do iy = 1,ny
        ip = (ix-1)*ny + iy
        vec(ip) = myRank
     enddo
  enddo

  call mpi_barrier(mpi_comm_world,ierr)

  allocate(nxAll(nProcs))
  call mpi_gather(nx, 1, mpi_integer, nxAll, 1, mpi_integer, &
       master, mpi_comm_world, ierr)

  if (myRank == master) then
     ! print *, 'nxAll = ', nxAll, 'sum(nxAll) = ',sum(nxAll)
     nxSum = sum(nxAll)        
     allocate(vecG(nxSum*ny))

     allocate(displs(nProcs),rcounts(nProcs))
     offset = 0
     do iProcs = 1,nProcs
        displs(iProcs) = offset
        rcounts(iProcs) = nxAll(iProcs)*ny
        offset = offset + rcounts(iProcs)
        ! print *,'iProcs',iProcs,'displs = ',displs(iProcs),'rcounts',rcounts(iProcs)
     enddo

  endif

  call mpi_type_vector(nx*ny, 1, 1, mpi_integer,newtype,ierr)
  call mpi_type_commit(newtype,ierr)

  call mpi_gatherv(vec,1,newtype,vecG,rcounts,displs,mpi_integer, &
       master,mpi_comm_world,ierr)    


  if (myRank == master) then
     print *, 'Global vector, vecG = ',vecG

     ! Reshape into matrix
     print *, 'Global matrix'
     allocate(matG(nxSum,ny))
     do ix = 1,nxSum
        do iy = 1,ny
           ip = (ix-1)*ny + iy
           matG(ix,iy) = vecG(ip)
        enddo
        print *, (matG(ix,iy),iy=1,ny)
     enddo

  endif

  call mpi_finalize(ierr)

end program main