Python 反应扩散算法中的Numba或Cython加速
我想加速用Python和NumPy编写的代码。我对反应扩散模型使用了Gray Skott算法(),但对于Numba和Cython,它甚至更慢!有可能加快速度吗?提前谢谢 Python+NumPyPython 反应扩散算法中的Numba或Cython加速,python,numpy,cython,pde,numba,Python,Numpy,Cython,Pde,Numba,我想加速用Python和NumPy编写的代码。我对反应扩散模型使用了Gray Skott算法(),但对于Numba和Cython,它甚至更慢!有可能加快速度吗?提前谢谢 Python+NumPy def GrayScott(counts, Du, Dv, F, k): n = 300 U = np.zeros((n+2,n+2), dtype=np.float_) V = np.zeros((n+2,n+2), dtype=np.float_) u, v = U[
def GrayScott(counts, Du, Dv, F, k):
n = 300
U = np.zeros((n+2,n+2), dtype=np.float_)
V = np.zeros((n+2,n+2), dtype=np.float_)
u, v = U[1:-1,1:-1], V[1:-1,1:-1]
r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
for i in range(counts):
Lu = ( U[0:-2,1:-1] +
U[1:-1,0:-2] - 4*U[1:-1,1:-1] + U[1:-1,2:] +
U[2: ,1:-1] )
Lv = ( V[0:-2,1:-1] +
V[1:-1,0:-2] - 4*V[1:-1,1:-1] + V[1:-1,2:] +
V[2: ,1:-1] )
uvv = u*v*v
u += Du*Lu - uvv + F*(1 - u)
v += Dv*Lv + uvv - (F + k)*v
return V
Numba
from numba import jit, autojit
@autojit
def numbaGrayScott(counts, Du, Dv, F, k):
n = 300
U = np.zeros((n+2,n+2), dtype=np.float_)
V = np.zeros((n+2,n+2), dtype=np.float_)
u, v = U[1:-1,1:-1], V[1:-1,1:-1]
r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
Lu = np.zeros_like(u)
Lv = np.zeros_like(v)
for i in range(counts):
for row in range(n):
for col in range(n):
Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1]
Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1]
uvv = u*v*v
u += Du*Lu - uvv + F*(1 - u)
v += Dv*Lv + uvv - (F + k)*v
return V
Cython
%%cython
cimport cython
import numpy as np
cimport numpy as np
cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k):
cdef int n = 300
cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray u = U[1:-1,1:-1]
cdef np.ndarray v = V[1:-1,1:-1]
cdef int r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
cdef np.ndarray Lu = np.zeros_like(u)
cdef np.ndarray Lv = np.zeros_like(v)
cdef int i, row, col
cdef np.ndarray uvv
for i in range(counts):
for row in range(n):
for col in range(n):
Lu[row,col] = U[row+1,col+2] + U[row+1,col] + U[row+2,col+1] + U[row,col+1] - 4*U[row+1,col+1]
Lv[row,col] = V[row+1,col+2] + V[row+1,col] + V[row+2,col+1] + V[row,col+1] - 4*V[row+1,col+1]
uvv = u*v*v
u += Du*Lu - uvv + F*(1 - u)
v += Dv*Lv + uvv - (F + k)*v
return V
用法示例:
GrayScott(4000, 0.16, 0.08, 0.04, 0.06)
除了循环和涉及的大量操作之外,在您的情况下,最有可能影响性能的是阵列分配。我不知道为什么你的Numba和Cython版本没有达到你的期望,但是你可以通过适当地执行所有操作,即用以下内容替换当前循环,使你的numpy代码快2倍(以一些可读性为代价):
Lu, Lv, uvv = np.empty_like(u), np.empty_like(v), np.empty_like(u)
for i in range(counts):
Lu[:] = u
Lu *= -4
Lu += U[:-2,1:-1]
Lu += U[1:-1,:-2]
Lu += U[1:-1,2:]
Lu += U[2:,1:-1]
Lu *= Du
Lv[:] = v
Lv *= -4
Lv += V[:-2,1:-1]
Lv += V[1:-1,:-2]
Lv += V[1:-1,2:]
Lv += V[2:,1:-1]
Lv *= Dv
uvv[:] = u
uvv *= v
uvv *= v
Lu -= uvv
Lv += uvv
u *= 1 - F
u += F
u += Lu
v *= 1 - F - k
v += Lv
以下是加速cython版本的步骤:
无法加快元素访问速度,您需要使用cython中的memoryview:cdef-np.ndarray
cdef-double[:,::1]bU=U
- 关闭
和boundscheck
wrapparound
- 在for循环中执行所有计算
%%cython
#cython: boundscheck=False
#cython: wraparound=False
cimport cython
import numpy as np
cimport numpy as np
cpdef cythonGrayScott(int counts, double Du, double Dv, double F, double k):
cdef int n = 300
cdef np.ndarray U = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray V = np.zeros((n+2,n+2), dtype=np.float_)
cdef np.ndarray u = U[1:-1,1:-1]
cdef np.ndarray v = V[1:-1,1:-1]
cdef int r = 20
u[:] = 1.0
U[n/2-r:n/2+r,n/2-r:n/2+r] = 0.50
V[n/2-r:n/2+r,n/2-r:n/2+r] = 0.25
u += 0.15*np.random.random((n,n))
v += 0.15*np.random.random((n,n))
cdef np.ndarray Lu = np.zeros_like(u)
cdef np.ndarray Lv = np.zeros_like(v)
cdef int i, c, r1, c1, r2, c2
cdef double uvv
cdef double[:, ::1] bU = U
cdef double[:, ::1] bV = V
cdef double[:, ::1] bLu = Lu
cdef double[:, ::1] bLv = Lv
for i in range(counts):
for r in range(n):
r1 = r + 1
r2 = r + 2
for c in range(n):
c1 = c + 1
c2 = c + 2
bLu[r,c] = bU[r1,c2] + bU[r1,c] + bU[r2,c1] + bU[r,c1] - 4*bU[r1,c1]
bLv[r,c] = bV[r1,c2] + bV[r1,c] + bV[r2,c1] + bV[r,c1] - 4*bV[r1,c1]
for r in range(n):
r1 = r + 1
for c in range(n):
c1 = c + 1
uvv = bU[r1,c1]*bV[r1,c1]*bV[r1,c1]
bU[r1,c1] += Du*bLu[r,c] - uvv + F*(1 - bU[r1,c1])
bV[r1,c1] += Dv*bLv[r,c] + uvv - (F + k)*bV[r1,c1]
return V
它比numpy版本快11倍左右。不错,我给它加1。非常感谢!你们的帮助使我的计算变得更精确:)非常有用的技巧——在我的机器上,它能使速度提高40-50%。但当然可读性会丢失…Numba是否显著加快了代码的速度?