Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/fortran/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Fortran 三嵌套循环上的MPI_Fortran_Mpi_Nested Loops - Fatal编程技术网

Fortran 三嵌套循环上的MPI

Fortran 三嵌套循环上的MPI,fortran,mpi,nested-loops,Fortran,Mpi,Nested Loops,我用MPI并行化了一个三嵌套循环。当我运行代码时,它失败了,并显示以下错误消息 srun: error: nid00217: tasks 0-31: Killed srun: Terminating job step 20266620.0 srun: error: nid00218: tasks 32-63: Killed srun: Force Terminated job step 20266620.0 这是我用来提交作业的脚本 #!/bin/bash #SBATCH --partitio

我用MPI并行化了一个三嵌套循环。当我运行代码时,它失败了,并显示以下错误消息

srun: error: nid00217: tasks 0-31: Killed
srun: Terminating job step 20266620.0
srun: error: nid00218: tasks 32-63: Killed
srun: Force Terminated job step 20266620.0
这是我用来提交作业的脚本

#!/bin/bash
#SBATCH --partition=workq
#SBATCH --job-name="code"
#SBATCH --nodes=2
#SBATCH --time=1:00:00
#SBATCH --exclusive
#SBATCH --err=std.err
#SBATCH --output=std.out
#----------------------------------------------------------#
module switch PrgEnv-cray PrgEnv-intel
export OMP_NUM_THREADS=1
#----------------------------------------------------------#
echo "The job "${SLURM_JOB_ID}" is running on "${SLURM_JOB_NODELIST}
#----------------------------------------------------------#
srun --ntasks=64 --cpus-per-task=${OMP_NUM_THREADS} --hint=nomultithread ./example_parallel
下面是代码,谁能给我一些解决方案的建议?多谢各位

PROGRAM THREEDIMENSION
USE MPI
IMPLICIT NONE
INTEGER, PARAMETER :: dp = SELECTED_REAL_KIND(p=15,r=14)
INTEGER :: i, j, k, le(3)
REAL (KIND=dp), ALLOCATABLE :: kp(:,:,:,:), kpt(:,:), col1(:), col2(:)
REAL (KIND=dp) :: su, co, tot
INTEGER :: world_size, world_rank, ierr
INTEGER :: world_comm_1st, world_comm_2nd, world_comm_3rd
INTEGER :: th3_dimension_size, th3_dimension_size_max, th3_dimension_rank
INTEGER :: th2_dimension_size, th2_dimension_size_max, th2_dimension_rank
INTEGER :: th1_dimension_size, th1_dimension_size_max, th1_dimension_rank
INTEGER :: proc_1st_dimension_len, proc_2nd_dimension_len, proc_3rd_last_len, proc_i, proc_j, proc_k
REAL (KIND=dp) :: t0, t1

CALL MPI_INIT(ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, world_size, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, world_rank, ierr)

IF (world_rank == 0) THEN
   t0 = MPI_WTIME()
END IF

le(1) = 1000
le(2) = 600
le(3) = 900
ALLOCATE (kp(le(1),le(2),le(3),3))
ALLOCATE (kpt(le(3),3))
ALLOCATE (col1(le(1)))
ALLOCATE (col2(le(2)))

DO i = 1, le(1), 1
   DO j = 1, le(2), 1
      DO k = 1, le(3), 1
         kp(i,j,k,1) = DBLE(i+j+j+1)
         kp(i,j,k,2) = DBLE(i+j+k+2)
         kp(i,j,k,3) = DBLE(i+j+k+3)
      END DO
   END DO
END DO

proc_1st_dimension_len = (world_size - 1) / le(1) + 1
proc_2nd_dimension_len = (world_size - 1 / (le(1) + le(2))) + 1
proc_3rd_last_len = MOD(world_size - 1, le(1)+le(2)) + 1

IF (world_rank <= proc_3rd_last_len*proc_2nd_dimension_len*proc_1st_dimension_len) THEN
   proc_i = MOD(world_rank,proc_1st_dimension_len)
   proc_j = world_rank / proc_1st_dimension_len
   proc_k = world_rank / (proc_1st_dimension_len*proc_2nd_dimension_len)
ELSE
   proc_i = MOD(world_rank-proc_3rd_last_len,proc_1st_dimension_len-1)
   proc_j = (world_rank-proc_3rd_last_len) / proc_1st_dimension_len-1
   proc_k = (world_rank-proc_3rd_last_len) / (proc_2nd_dimension_len*proc_2nd_dimension_len-1)
END IF

CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)

CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,proc_i,world_rank,world_comm_1st,ierr)
CALL MPI_COMM_SIZE(world_comm_1st,th1_dimension_size,ierr)
CALL MPI_COMM_RANK(world_comm_1st,th1_dimension_rank,ierr)

CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,proc_j,world_rank,world_comm_2nd,ierr)
CALL MPI_COMM_SIZE(world_comm_2nd,th2_dimension_size,ierr)
CALL MPI_COMM_RANK(world_comm_2nd,th2_dimension_rank,ierr)

CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,proc_k,world_rank,world_comm_3rd,ierr)
CALL MPI_COMM_SIZE(world_comm_3rd,th3_dimension_size,ierr)
CALL MPI_COMM_RANK(world_comm_3rd,th3_dimension_rank,ierr)

CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
CALL MPI_ALLREDUCE(th1_dimension_size,th1_dimension_size_max,1,MPI_INT,MPI_MAX,MPI_COMM_WORLD,ierr)
CALL MPI_ALLREDUCE(th2_dimension_size,th2_dimension_size_max,1,MPI_INT,MPI_MAX,MPI_COMM_WORLD,ierr)

IF (world_rank == 0) THEN
   OPEN (UNIT=3, FILE='out.dat', STATUS='UNKNOWN')
END IF

col1 = 0.0
DO i = 1, le(1), 1
   IF (MOD(i-1,th1_dimension_size_max) /= th1_dimension_rank) CYCLE
   col2 = 0.0
   DO j = 1, le(2), 1
      IF (MOD(j-1,th2_dimension_size_max) /= th2_dimension_rank) CYCLE
      kpt = kp(i,j,:,:)
      su = 0.0
      DO k = 1, le(3), 1
         IF(MOD(k-1,th1_dimension_size*th2_dimension_size) /= th3_dimension_rank) CYCLE
         CALL CAL(kpt(k,3),co)
         su = su + co
      END DO
      CALL MPI_BARRIER(world_comm_3rd,ierr)
      CALL MPI_REDUCE(su,col2(j),1,MPI_DOUBLE,MPI_SUM,0,world_comm_3rd,ierr)
   END DO
   CALL MPI_BARRIER(world_comm_2nd,ierr)
   CALL MPI_REDUCE(col2,col1(i),le(2),MPI_DOUBLE,MPI_SUM,0,world_comm_2nd,ierr)
END DO

CALL MPI_BARRIER(world_comm_1st,ierr)
tot = 0.0
IF (th1_dimension_rank == 0) THEN
   CALL MPI_REDUCE(col1,tot,le(1),MPI_DOUBLE,MPI_SUM,0,world_comm_1st,ierr)
   WRITE (UNIT=3, FMT=*) tot
   CLOSE (UNIT=3)
END IF

DEALLOCATE (kp)
DEALLOCATE (kpt)
DEALLOCATE (col1)
DEALLOCATE (col2)

IF (world_rank == 0) THEN
   t1 = MPI_WTIME()
   WRITE (UNIT=3, FMT=*) 'Total time:', t1 - t0, 'seconds'
END IF

CALL MPI_FINALIZE (ierr)

STOP
END PROGRAM THREEDIMENSION

SUBROUTINE CAL(arr,co)
IMPLICIT NONE
INTEGER, PARAMETER :: dp=SELECTED_REAL_KIND(p=15,r=14)
INTEGER :: i
REAL (KIND=dp) :: arr(3), co

co = 0.0d0
co = co + (arr(1) ** 2 + arr(2) * 3.1d1) / (arr(3) + 5.0d-1)

RETURN
END SUBROUTINE CAL
编程三维
使用MPI
隐式无
整数,参数::dp=所选的实类(p=15,r=14)
整数::i,j,k,le(3)
REAL(KIND=dp),ALLOCATABLE::kp(:,:,:,:,:),kpt(:,:),col1(:),col2(:)
真实(种类=dp):su、co、tot
整数::世界大小,世界排名,ierr
整数::世界通信第一,世界通信第二,世界通信第三
整数::th3维大小,th3维大小最大,th3维秩
整数::th2\u维度大小,th2\u维度大小最大,th2\u维度排名
整数::th1\u维度大小,th1\u维度大小最大,th1\u维度排名
整数::过程1维,过程2维,过程3维,过程i,过程j,过程k
实际(种类=dp)::t0,t1
调用MPI_INIT(ierr)
调用MPI_COMM_SIZE(MPI_COMM_WORLD,WORLD_SIZE,ierr)
调用MPI_COMM_RANK(MPI_COMM_WORLD,WORLD_RANK,ierr)
如果(世界排名==0),则
t0=MPI_WTIME()
如果结束
le(1)=1000
le(2)=600
le(3)=900
分配(kp(le(1)、le(2)、le(3)、3))
分配(kpt(le(3),3))
分配(col1(le(1)))
分配(col2(le(2)))
i=1,le(1),1吗
DO j=1,le(2),1
dok=1,le(3),1
kp(i,j,k,1)=DBLE(i+j+j+1)
kp(i,j,k,2)=DBLE(i+j+k+2)
kp(i,j,k,3)=DBLE(i+j+k+3)
结束
结束
结束
过程第一维度长度=(世界大小-1)/le(1)+1
过程第二维度长度=(世界大小-1/(le(1)+le(2))+1
程序第三个最后一个长度=MOD(世界大小-1,le(1)+le(2))+1

IF(world_rank)对所有Fortran问题使用标记。快速浏览一下,在IF条件中有一个MPI_REDUCE,该条件为IF(th1_维_rank==0)然后-这看起来很有可能出现死锁,然后批处理计划程序终止您的作业,生成上述错误消息。如果时间稍后会测试此猜测。还要注意,MPI_INT和MPI_DOUBLE不是Fortran的标准MPI-它们是用于C的。我会将它们更正为MPI_INTEGER和MPI_DOUBLE_精度。还要注意所有的MPI_BARRI定时器是不必要的-其他的集体例程将提供所需的同步。MPI_屏障只是很少需要,除了确保定时器报告好的值,我不记得我上次使用它的时间。事实上,每当我看到它时,几乎不需要它,我假设编程实践很差,直到证明不是这样。此外,你是一个分配超过12GB/进程,这可能只是因为您在做任何事情之前内存不足。而且您的MPI_减少非常奇怪-您是否意识到减少大小为n的数组会导致相同大小的数组?