Parallel processing 用高斯消去法求解对称线性方程ax=b
我是并行编程新手,目前正致力于优化与电磁计算相关的代码。 通过分析程序的工作原理,我意识到85%的执行时间都花在解线性方程上。 我研究了一点openmp,但我不知道如何并行化这样的嵌套循环。 有什么想法吗? 先谢谢你。遵循下面的代码Parallel processing 用高斯消去法求解对称线性方程ax=b,parallel-processing,fortran,openmp,Parallel Processing,Fortran,Openmp,我是并行编程新手,目前正致力于优化与电磁计算相关的代码。 通过分析程序的工作原理,我意识到85%的执行时间都花在解线性方程上。 我研究了一点openmp,但我不知道如何并行化这样的嵌套循环。 有什么想法吗? 先谢谢你。遵循下面的代码 Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji) !------------------------------------------------------------------ ! Solve sysmm
Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
! Solve sysmmetric linear equation ax=b by using Gauss elimination.
! If kwji=1, no solution;if kwji=0,has solution
! Input--n,ma,a(ma,n),b(n),ep,
! Output--b,kwji
!------------------------------------------------------------------
implicit real*8 (a-h,o-z)
dimension a(ma,n),b(n),m(n+1)
do 10 i=1,n
10 m(i)=i
do 120 k=1,n
p=0.0
do 20 i=k,n
do 20 j=k,n
if(dabs(a(i,j)).gt.dabs(p)) then
p=a(i,j)
io=i
jo=j
endif
20 continue
if(dabs(p)-ep) 30,30,35
30 kwji=1
return
35 continue
if(jo.eq.k) go to 45
do 40 i=1,n
t=a(i,jo)
a(i,jo)=a(i,k)
a(i,k)=t
40 continue
j=m(k)
m(k)=m(jo)
m(jo)=j
45 if(io.eq.k) go to 55
do 50 j=k,n
t=a(io,j)
a(io,j)=a(k,j)
a(k,j)=t
50 continue
t=b(io)
b(io)=b(k)
b(k)=t
55 p=1./p
in=n-1
if(k.eq.n) go to 65
do 60 j=k,in
60 a(k,j+1)=a(k,j+1)*p
65 b(k)=b(k)*p
if(k.eq.n) go to 120
do 80 i=k,in
do 70 j=k,in
70 a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
80 b(i+1)=b(i+1)-a(i+1,k)*b(k)
120 continue
do 130 i1=2,n
i=n+1-i1
do 130 j=i,in
130 b(i)=b(i)-a(i,j+1)*b(j+1)
do 140 k=1,n
i=m(k)
140 a(1,i)=b(k)
do 150 k=1,n
150 b(k)=a(1,k)
kwji=0
return
end
如果您对性能感兴趣,您应该使用LAPACK。为了说明这一点,我编写了一个简单的驱动程序,用于比较调用DSYSV时提供的代码的速度。DSYSV是一个LAPACK例程,用于求解对称“双精度”矩阵的一组线性方程组。代码和结果如下所示,但总而言之,LAPACK的速度是Fortran的3.3倍,是Fortran的725倍。请注意,这可能不是一个优化的LAPACK库,而是我在笔记本电脑上安装的Mint Linux附带的任何库。无论如何,详情如下
ian@eris:~/work/stack$ cat solve.f90
Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
! Solve sysmmetric linear equation ax=b by using Gauss elimination.
! If kwji=1, no solution;if kwji=0,has solution
! Input--n,ma,a(ma,n),b(n),ep,
! Output--b,kwji
!------------------------------------------------------------------
implicit real*8 (a-h,o-z)
dimension a(ma,n),b(n),m(n+1)
do 10 i=1,n
10 m(i)=i
do 120 k=1,n
p=0.0
do 20 i=k,n
do 20 j=k,n
if(dabs(a(i,j)).gt.dabs(p)) then
p=a(i,j)
io=i
jo=j
endif
20 continue
if(dabs(p)-ep) 30,30,35
30 kwji=1
return
35 continue
if(jo.eq.k) go to 45
do 40 i=1,n
t=a(i,jo)
a(i,jo)=a(i,k)
a(i,k)=t
40 continue
j=m(k)
m(k)=m(jo)
m(jo)=j
45 if(io.eq.k) go to 55
do 50 j=k,n
t=a(io,j)
a(io,j)=a(k,j)
a(k,j)=t
50 continue
t=b(io)
b(io)=b(k)
b(k)=t
55 p=1./p
in=n-1
if(k.eq.n) go to 65
do 60 j=k,in
60 a(k,j+1)=a(k,j+1)*p
65 b(k)=b(k)*p
if(k.eq.n) go to 120
do 80 i=k,in
do 70 j=k,in
70 a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
80 b(i+1)=b(i+1)-a(i+1,k)*b(k)
120 continue
do 130 i1=2,n
i=n+1-i1
do 130 j=i,in
130 b(i)=b(i)-a(i,j+1)*b(j+1)
do 140 k=1,n
i=m(k)
140 a(1,i)=b(k)
do 150 k=1,n
150 b(k)=a(1,k)
kwji=0
return
end
Program solve_eqns
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Implicit None
Real( wp ), Dimension( :, : ), Allocatable :: a, a_copy
Real( wp ), Dimension( : ), Allocatable :: b
Real( wp ), Dimension( : ), Allocatable :: x_lap, x_for
Real( wp ), Dimension( : ), Allocatable :: work
Real( wp ) :: time_lap, time_for
Integer, Dimension( : ), Allocatable :: pivots
Integer :: i
Integer :: n, nb = 64 ! hack value for nb - should use ilaenv
Integer :: error
Integer( li ) :: start, finish, rate
Write( *, * ) 'n ?'
Read ( *, * ) n
Allocate( a( 1:n, 1:n ) )
Allocate( b( 1:n ) )
Allocate( pivots( 1:n ) )
! Set up matrix
Call Random_number( a )
a = a - 0.5_wp
! Make A symmetric
a = 0.5_wp * ( a + Transpose( a ) )
! Add n to diagonal of A to avoid any nasty condition numbers
Do i = 1, n
a( i, i ) = a( i, i ) + n
End Do
! And keep a back up of A
a_copy = a
! RHS
Call Random_number( b )
! Solve with LAPACK
x_lap = b
Allocate( work( 1:n * nb ) )
Call system_clock( start, rate )
Call dsysv( 'U', n, 1, a, Size( a, dim = 1 ), pivots, &
x_lap, Size( x_lap, Dim = 1 ), work, Size( work ), error )
Call system_clock( finish, rate )
time_lap = Real( finish - start, Kind( time_lap ) ) / rate
! Restore A
a = a_copy
Write( *, * ) 'Errors for LAPACK', error, Maxval( Abs( Matmul( a, x_lap ) - b ) )
Write( *, * ) 'Time for LAPACK', time_lap
! Solve with Fortran
x_for = b
Call system_clock( start, rate )
Call GaussEqSolver_Sym( Size( a, Dim = 2 ), Size( a, Dim = 1 ), a, x_for, Epsilon( a ), error )
Call system_clock( finish, rate )
time_for = Real( finish - start, Kind( time_for ) ) / rate
! Restore A
a = a_copy
Write( *, * ) 'Errors for Fortran', error, Maxval( Abs( Matmul( a, x_for ) - b ) )
Write( *, * ) 'Time_For for Fortran', time_for
Write( *, * ) 'Max difference in solutions', Maxval( Abs( x_lap - x_for ) )
Write( *, * ) 'LAPACK is ', time_for / time_lap, ' times quicker than the Fortran'
End Program solve_eqns
ian@eris:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ian@eris:~/work/stack$ gfortran -O3 solve.f90 -llapack
ian@eris:~/work/stack$ ./a.out
n ?
100
Errors for LAPACK 0 4.4408920985006262E-016
Time for LAPACK 1.5952670000000000E-003
Errors for Fortran 0 9.9920072216264089E-016
Time_For for Fortran 5.3095140000000004E-003
Max difference in solutions 8.6736173798840355E-018
LAPACK is 3.3282917530419676 times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
n ?
1000
Errors for LAPACK 0 1.3322676295501878E-015
Time for LAPACK 3.9014976000000000E-002
Errors for Fortran 0 4.9960036108132044E-015
Time_For for Fortran 1.9314730620000000
Max difference in solutions 4.7704895589362195E-018
LAPACK is 49.505940026722044 times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
n ?
5000
Errors for LAPACK 0 4.3298697960381105E-015
Time for LAPACK 1.2611959250000000
Errors for Fortran 0 1.3322676295501878E-014
Time_For for Fortran 913.76959534100001
Max difference in solutions 2.7647155398380363E-018
LAPACK is 724.52628273517462 times quicker than the Fortran
正如我在对你以前的线性代数问题的评论中所说的,现在你不需要自己编写代码,你需要使用一个很好的LAPACK实现,比如MKL。它将比您自己编写的任何东西都更快、更稳定,并且将与线程并行。假设你有一个通用的矩阵DGESV可能是你想要的例程。虽然我同意@IanBush的观点,对于高效的并行代码,你最好使用一个现成的库(更好),但它不会教你太多关于并行代码的知识,这可能是一个很好的目标。然而,正如我对这个问题的早期版本所做的评论,我认为在尝试并行化之前,您需要对代码进行现代化。取消使用
continue
和goto
不仅仅是一种现代主义时尚,它会使代码在尝试并行化时更加清晰,并且需要您更彻底地理解代码。哦,放弃隐式键入并使用implicit none
——这将保护您在重构过程中避免一些严重的潜在错误。(如果我们讨论要放弃的内容,算术If将在列表中占据相当高的位置。)不幸的是,我只能向@francescalus投票一次。。。