Fortran Pi计算在OpenMP中给出了不正确的结果
我是OpenMP新手,不知道这段代码有什么问题。它以串行方式工作。我正在使用Ubuntu Linux和gfortranFortran Pi计算在OpenMP中给出了不正确的结果,fortran,openmp,gfortran,Fortran,Openmp,Gfortran,我是OpenMP新手,不知道这段代码有什么问题。它以串行方式工作。我正在使用Ubuntu Linux和gfortran ! program test_rand use omp_lib implicit none integer, parameter :: num_threads =36 integer*8,parameter :: nc = 1000000000 integer*8 ncirc,ncircs(0:35) int
!
program test_rand
use omp_lib
implicit none
integer, parameter :: num_threads =36
integer*8,parameter :: nc = 1000000000
integer*8 ncirc,ncircs(0:35)
integer i,thread_num,istart,iend,ppt
real*8 x,y,dist,pi
integer,parameter :: seed = 864
call srand(seed)
do i=1,4
ncircs(i)=0
end do
ncirc=0
ppt=(nc+num_threads-1)/num_threads
istart=1
iend=nc
thread_num=1
!$ call omp_set_num_threads(num_threads)
!$omp parallel default(none) private(istart,iend,thread_num,i, &
!$omp dist,x,y,ncircs) shared(ppt,ncirc)
!$ thread_num = omp_get_thread_num()
!$ istart=thread_num*ppt+1
!$ iend = min(nc,thread_num*ppt+ppt)
print*,thread_num
do i=istart,iend
x=rand()
y=rand()
dist=sqrt((x-0.5)**2+(y-0.5)**2)
if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
end do
!$omp critical
!$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
ncirc=ncirc+ncircs(thread_num)
!$omp end critical
!$omp end parallel
print*,ncircs
pi=4*dble(ncirc)/dble(nc)
print *,pi,ncirc
end program test_rand
输出:
lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi pi.f95
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi
1
2 785398441 0 0 0 16777216 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 268435456 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3.1415937640000000 785398441
real 0m25.211s
user 0m25.152s
sys 0m0.000s
lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi_omp pi.f95 -fopenmp
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi_omp
3
21
25
30
35
18
4
17
11
34
12
28
7
33
29
24
31
27
8
23
10
19
0
6
26
32
16
14
1
13
5
20
15
2
9
22
thread_num= 6 istart= 166666669 iend= 194444446 0
thread_num= 11 istart= 305555559 iend= 333333336 0
thread_num= 5 istart= 138888891 iend= 166666668 0
thread_num= 27 istart= 750000007 iend= 777777784 0
thread_num= 19 istart= 527777783 iend= 555555560 0
thread_num= 25 istart= 694444451 iend= 722222228 0
thread_num= 2 istart= 55555557 iend= 83333334 0
thread_num= 1 istart= 27777779 iend= 55555556 0
thread_num= 21 istart= 583333339 iend= 611111116 0
thread_num= 18 istart= 500000005 iend= 527777782 0
thread_num= 20 istart= 555555561 iend= 583333338 0
thread_num= 24 istart= 666666673 iend= 694444450 0
thread_num= 29 istart= 805555563 iend= 833333340 0
thread_num= 28 istart= 777777785 iend= 805555562 0
thread_num= 26 istart= 722222229 iend= 750000006 0
thread_num= 12 istart= 333333337 iend= 361111114 0
thread_num= 22 istart= 611111117 iend= 638888894 0
thread_num= 23 istart= 638888895 iend= 666666672 0
thread_num= 10 istart= 277777781 iend= 305555558 0
thread_num= 3 istart= 83333335 iend= 111111112 0
thread_num= 34 istart= 944444453 iend= 972222230 0
thread_num= 17 istart= 472222227 iend= 500000004 0
thread_num= 15 istart= 416666671 iend= 444444448 0
thread_num= 14 istart= 388888893 iend= 416666670 0
thread_num= 8 istart= 222222225 iend= 250000002 0
thread_num= 7 istart= 194444447 iend= 222222224 0
thread_num= 30 istart= 833333341 iend= 861111118 0
thread_num= 32 istart= 888888897 iend= 916666674 0
thread_num= 9 istart= 250000003 iend= 277777780 0
thread_num= 4 istart= 111111113 iend= 138888890 0
thread_num= 16 istart= 444444449 iend= 472222226 0
thread_num= 0 istart= 1 iend= 27777778 93851473435152
thread_num= 13 istart= 361111115 iend= 388888892 0
thread_num= 35 istart= 972222231 iend= 1000000000 4600331172021844405
thread_num= 33 istart= 916666675 iend= 944444452 0
thread_num= 31 istart= 861111119 iend= 888888896 0
139938624438272 0 0 0 0 139942555560952 10 140720670311236 3933000496 0 139942559675136 466005475 139942562579304 140720670311528 139942559716466 140720670311360 140720670311376 139942562705897 3 139942555561536 1 0 1 139942562578432 361825296 139942555561536 139942562578432 139942562579304 0 140720670311656 139942559737213 140720308486145 4294967295 139942562705897 139942557392112 139942562581024
375409.03530958400 93852258827396
real 6m57.907s
user 8m35.083s
sys 219m5.670s
lakshmi@lakshmiVM:~/Documents/fortran$
恐怕你们的程序在错误和风格上都有很大的错误。让我们先看看如何使用编译器来解决一些问题,然后讨论编译器无法帮助您解决的问题,然后讨论样式。最后,我将展示如何解决这个问题 因此,首先,我建议在开发时打开所有编译器警告和错误检测标志。有很多这样的。让我们看看
-Wall-Wextra
告诉您的代码:
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 --version
GNU Fortran (GCC) 11.1.0
Copyright © 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fopenmp -O -g pi_orig.f90
pi_orig.f90:20:7:
20 | ppt=(nc+num_threads-1)/num_threads
| 1
Warning: Integer division truncated to constant ‘27777778’ at (1) [-Winteger-division]
pi_orig.f90:30:12:
30 | !$ iend = min(nc,thread_num*ppt+ppt)
| 1
Warning: Possible change of value in conversion from INTEGER(8) to INTEGER(4) at (1) [-Wconversion]
我不担心第一个警告,但第二个警告告诉我,iend
可能不是一个足够长的整数,无法存储您所需的迭代数,因此,I
和许多其他变量都不是。因此,如果在某个点之后增加nc
,则不会提高精度
但是,您的程序不是用标准Fortran编写的。从长远来看,遵循标准会让你的生活更轻松。因此,让我们添加-std=2008
,以使用编译器检测未使用标准Fortran的位置:
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fopenmp -O -g pi_orig.f90
pi_orig.f90:8:12:
8 | integer*8,parameter :: nc = 1000000000
| 1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:9:12:
9 | integer*8 ncirc,ncircs(0:35)
| 1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:11:9:
11 | real*8 x,y,dist,pi
| 1
Error: GNU Extension: Nonstandard type declaration REAL*8 at (1)
pi_orig.f90:37:23:
37 | if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
| 1
Error: Syntax error in IF-clause after (1)
pi_orig.f90:45:15:
45 | print*,ncircs
| 1
Error: Function ‘ncircs’ requires an argument list at (1)
pi_orig.f90:27:12:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Symbol ‘dist’ at (1) has no IMPLICIT type
pi_orig.f90:20:9:
20 | ppt=(nc+num_threads-1)/num_threads
| 1
Error: Symbol ‘nc’ at (1) has no IMPLICIT type
pi_orig.f90:18:7:
18 | ncirc=0
| 1
Error: Symbol ‘ncirc’ at (1) has no IMPLICIT type
pi_orig.f90:46:4:
46 | pi=4*dble(ncirc)/dble(nc)
| 1
Error: Symbol ‘pi’ at (1) has no IMPLICIT type; did you mean ‘i’?
pi_orig.f90:27:14:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Symbol ‘x’ at (1) has no IMPLICIT type
pi_orig.f90:27:16:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Symbol ‘y’ at (1) has no IMPLICIT type
pi_orig.f90:14:12:
14 | call srand(seed)
| 1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:12: Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:18:
14 | call srand(seed)
| 1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:16:5:
16 | ncircs(i)=0
| 1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:34:11:
34 | x=rand()
| 1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:11: Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:
34 | x=rand()
| 1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:
34 | x=rand()
| 1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:35:7:
35 | y=rand()
| 1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:41:70:
41 | !$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
| 1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:42:20:
42 | ncirc=ncirc+ncircs(thread_num)
| 1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:27:17:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Object ‘ncircs’ is not a variable at (1)
ijb@ijb-Latitude-5410:~/work/stack$
哦,天哪!事实上,只有三个错误:
srand
和rand
不是标准的Fortran内部函数。gfortran支持它,但其他编译器可能不支持它。不要用它李>
数字3可以通过使用标准固有的随机数
进行寻址(但请参见下文)。数字1和2通过使用种类进行求解-请参阅。因为1和2影响变量声明,所以使用这些错误行声明的变量的所有行现在都标记为错误本身,因此错误级联
解决这些问题可以解决一些问题,但恐怕会有更多的错误。尤其是数组的访问超出了它们的边界。您可以在调试时使用-fcheck=all
自动检测这些,我强烈建议您这样做,但请注意,这会大大降低程序的速度,因此在运行生产运行时请忽略它。如果我修复了上面提到的错误,然后在运行程序时使用-fcheck=all,我会得到:
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fcheck=all -fopenmp -O -g pi2.f90
pi2.f90:22:7:
22 | ppt=(nc+num_threads-1)/num_threads
| 1
Warning: Integer division truncated to constant ‘2778’ at (1) [-Winteger-division]
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
1
28
14
26
32
9
5
8
10
35
21
29
0
22
17
4
19
thread_num= 14 istart= 38893 iend= 41670 0
thread_num= 21 istart= 58339 iend= 61116 0
thread_num= 28 istart= 77785 iend= 80562 0
20
12
7
24
23
15
34
25
thread_num= 26 istart= 72229 iend= 75006 0
thread_num= 23 istart= 63895 iend= 66672 0
thread_num= 10 istart= 27781 iend= 30558 0
thread_num= 15 istart= 41671 iend= 44448 0
thread_num= 1 istart= 2779 iend= 5556 0
thread_num= 25 istart= 69451 iend= 72228 0
thread_num= 9 istart= 25003 iend= 27780 0
16
33
30
thread_num= 0 istart= 1 iend= 2778 0
27
thread_num= 7 istart= 19447 iend= 22224 0
31
2
6
thread_num= 17 istart= 47227 iend= 50004 0
3
13
11
thread_num= 27 istart= 75007 iend= 77784 0
thread_num= 30 istart= 83341 iend= 86118 0
thread_num= 32 istart= 88897 iend= 91674 0
thread_num= 4 istart= 11113 iend= 13890 0
thread_num= 19 istart= 52783 iend= 55560 0
thread_num= 2 istart= 5557 iend= 8334 0
18
thread_num= 6 istart= 16669 iend= 19446 0
thread_num= 5 istart= 13891 iend= 16668 0
thread_num= 3 istart= 8335 iend= 11112 0
thread_num= 24 istart= 66673 iend= 69450 0
thread_num= 8 istart= 22225 iend= 25002 0
thread_num= 34 istart= 94453 iend= 97230 0
thread_num= 11 istart= 30559 iend= 33336 0
thread_num= 29 istart= 80563 iend= 83340 0
thread_num= 22 istart= 61117 iend= 63894 0
thread_num= 16 istart= 44449 iend= 47226 0
thread_num= 20 istart= 55561 iend= 58338 0
thread_num= 12 istart= 33337 iend= 36114 0
At line 45 of file pi2.f90
Fortran runtime error: Index '36' of dimension 1 of array 'ncircs' above upper bound of 35
Error termination. Backtrace:
#0 0x7f580fce1d01 in ???
#1 0x7f580fce2849 in ???
#2 0x7f580fce2ec6 in ???
#3 0x4015ab in MAIN__._omp_fn.0
at /home/ijb/work/stack/pi2.f90:45
#4 0x7f580fb4a77d in ???
#5 0x7f580fab1608 in start_thread
at /build/glibc-YbNSs7/glibc-2.31/nptl/pthread_create.c:477
#6 0x7f580f9d6292 in ???
#7 0xffffffffffffffff in ???
因此,您可以使用编译器查找许多问题。然而,还有更多的是它抓不到的。最重要的是,您将数组ncircs
定义为私有,但不在并行区域内对其进行初始化-当您进入并行区域时,每个线程将生成自己的私有变量的新版本,默认情况下,并行区域外的任何初始化都将被忽略。因此,您使用的是未初始化的变量。即使没有这一点,在代码中也要注意,您只初始化了ncircs
的一部分,而不是您所使用的全部,并且在索引中按一进行了关闭
然而,除了上面提到的错误之外,这并不是编写openmp代码的真正方式。OpenMP提供了自动在循环中共享工作的方法,您确实不应该像在代码中那样手动共享迭代。此外,它还提供了精确执行此类计算的缩减,临界值应仅在需要时使用,而不适用于此处显示的reduce擅长的专业案例。也不需要ncircs
数组-每个线程只需要一个值,那么为什么要声明它们的数组呢?根据经验,如果您声明的内存大小随线程数的增加而增加,则可能是做错了什么。最后,我也不会将线程数烧录到代码中,我会使用环境变量来设置线程数,这样就可以在不重新编译的情况下更改线程数
下面是我将如何解决您的问题,并展示它的编译和运行是否正确:
ijb@ijb-Latitude-5410:~/work/stack$ cat pi_ijb.f90
Program pi_calc
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Use omp_lib
Implicit None
Real( wp ), Parameter :: pi_exact = 4.0_wp * Atan( 1.0_wp )
Integer( li ), Parameter :: n_sample = 1000000000_li
Integer , Parameter :: seed = 864
Real( wp ), Dimension( 1:2 ) :: rand
Real( wp ) :: dist
Real( wp ) :: pi_approx
Integer( li ) :: ncirc
Integer( li ) :: i_sample
Integer( li ) :: start, finish, rate
Integer :: thread_num
Integer :: n_seed
Integer :: i
! Assume using the maximum number of threads made available
Write( *, * ) 'Running on ', omp_get_max_threads(), ' threads'
ncirc = 0
Call system_clock( start, rate )
! Create threads
!$omp parallel default( none ) private( thread_num, n_seed, i, rand, dist ) reduction( +:ncirc )
! Set up the random number generator. Assume it is thread safe
thread_num = omp_get_thread_num()
Call Random_seed( size = n_seed )
Call Random_seed( put = [ ( i * seed * ( thread_num + 1 ), i = 1, n_seed ) ] )
! Workshare the loop
!$omp do
Sampling_loop: Do i_sample = 1, n_sample
Call Random_number( rand )
rand = rand - 0.5_wp
dist = rand( 1 ) * rand( 1 ) + rand( 2 ) * rand( 2 )
If( dist <= 0.5_wp * 0.5_wp ) Then
ncirc = ncirc + 1
End If
End Do Sampling_loop
!$omp end do
! Close down the parallel region
!$omp end parallel
Call system_clock( finish, rate )
pi_approx = 4.0_wp * Real( ncirc, wp ) / Real( n_sample, wp )
Write( *, * ) 'Using ', n_sample, ' sampling points: '
Write( *, * ) 'Approx pi ', pi_approx
Write( *, * ) 'Exact pi ', pi_exact
Write( *, * ) 'Error ', Abs( pi_approx - pi_exact )
Write( *, * ) 'This took ', Real( finish - start ) / Real( rate ), ' seconds'
End Program pi_calc
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fcheck=all -O -g -fopenmp pi_ijb.f90
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 4 threads
Using 1000000000 sampling points:
Approx pi 3.1415474360000002
Exact pi 3.1415926535897931
Error 4.5217589792923008E-005
This took 5.21012735 seconds
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -O -g -fopenmp pi_ijb.f90
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=1
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 1 threads
Using 1000000000 sampling points:
Approx pi 3.1415804920000001
Exact pi 3.1415926535897931
Error 1.2161589793002747E-005
This took 18.5533371 seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=2
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 2 threads
Using 1000000000 sampling points:
Approx pi 3.1416029320000001
Exact pi 3.1415926535897931
Error 1.0278410206954192E-005
This took 8.87815666 seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 4 threads
Using 1000000000 sampling points:
Approx pi 3.1415474360000002
Exact pi 3.1415926535897931
Error 4.5217589792923008E-005
This took 4.49851656 seconds
ijb@ijb-纬度-5410:~/work/stack$cat pi_ijb.f90
程序pi_calc
使用,内在::iso_fortran_env,仅限:wp=>real64,li=>int64
使用omp_库
隐式无
实数(wp),参数::pi_精确=4.0_wp*Atan(1.0_wp)
整数(li),参数::n_sample=100000000_li
整数,参数::seed=864
真实(wp),尺寸(1:2)::兰特
雷亚尔(wp)::距离
真实值(wp)::pi_近似值
整数(li)::ncirc
整数(li)::i\u样本
整数(li)::开始、结束、速率
整数::线程数
整数::n_种子
整数::i
! 假设使用可用的最大线程数
写入(*,*)“正在运行”,omp\U get\U max\U threads(),“线程”
ncirc=0
呼叫系统时钟(启动、速率)
! 创建线程
!$omp并行默认值(无)私有(线程数、n种子、i、rand、dist)缩减(+:ncirc)
! 设置随机数生成器。假设它是线程安全的
thread\u num=omp\u get\u thread\u num()
调用随机种子(大小=n种子)
调用随机种子(put=[(i*seed*(thread\u num+1),i=1,n\u seed)])
! 工作共享循环
!$omp do
采样循环:i_样本=1,n_样本吗
随机呼叫号码(rand)
兰特=兰特-0.5
地区=兰特(1)*兰特(1)+兰特(2)*兰特(2)
如果(dist)欢迎,请阅读。始终确保您的文章格式正确。对于Fortran和OpenMP,使用正确的标签。您必须解释您的问题。不要写“它不工作”,它没有说任何有用的东西,告诉我们什么是错误的。是否有任何错误消息?或者结果是错误的?有多错误?我假设这里你的意思是pi的值是错误的?1)假设gfortran使用-Wall-Wextra-fcheck=all和where进行编译