Python 优化独立N体模拟的方法

Python 优化独立N体模拟的方法,python,optimization,jit,numba,runge-kutta,Python,Optimization,Jit,Numba,Runge Kutta,我对并行计算和Numba软件包比较陌生。我正在为我惊人的平行N体模拟寻找优化方法。到目前为止,我已经将我所知道的一切应用于Numpy阵列、JIT编译器和多处理。然而,我仍然没有达到我想要的速度(我看过他们的代码更快的视频) 我现在使用的是一个非常简单的python积分器,它使用Runge-Kutta积分和两个运动方程。在我的领域里,我经常和数值积分器打交道,所以我很想从你们那里学到更多的技巧 我在下面发布了我的代码,但本质上,我有一个名为“Motion”的主要函数,它接受2个初始条件,并在一定时

我对并行计算和Numba软件包比较陌生。我正在为我惊人的平行N体模拟寻找优化方法。到目前为止,我已经将我所知道的一切应用于Numpy阵列、JIT编译器和多处理。然而,我仍然没有达到我想要的速度(我看过他们的代码更快的视频)

我现在使用的是一个非常简单的python积分器,它使用Runge-Kutta积分和两个运动方程。在我的领域里,我经常和数值积分器打交道,所以我很想从你们那里学到更多的技巧

我在下面发布了我的代码,但本质上,我有一个名为“Motion”的主要函数,它接受2个初始条件,并在一定时间内对它们的运动进行积分。我已经对这个函数和它反复调用的所有函数进行了JIT:“RK4”、“ODE”、“电场”。最后,我从Multiprocessing调用pool函数来并行化“Motion”函数,并为它运行的每个模拟插入不同的初始条件

同样,我已经实现了我所知道的每种类型的优化,但是我仍然对它的速度不太满意。我已经在下面发布了我的代码。如果有人能找到一个可以进一步优化的算法,那将是非常有帮助和有教育意义的(至少对我来说)!谢谢你抽出时间

import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from IPython.display import clear_output
from scipy import interpolate


"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency =  np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field 
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)

"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant


"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second 
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)

Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM

"Constants"    
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s

"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
    L_sample = np.linspace(1,10,100) 
    phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
    phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
    L0i = phidot_to_L(omega/m)
    return L0i 
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)

@njit(nogil= True)
def Electric_Field(t,r):
    E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
    Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
    Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
    Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
    return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"

"Particle's ODE - Equation of Motion"
@njit(nogil= True)
def ODE(t,r):
    Er, Ephi = Electric_Field(t,r) #Pull out the electric so we only call it once.
    Ldot = Ephi * r[0]**3 / (Re*Beq)
    Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
    return np.array([Ldot,Phidot])

@njit(nogil= True)
def RK4(t,r): #Standard Runge-Kutta Integration Algorthim
    k1 = Step_Size*ODE(t,r)
    k2 = Step_Size*ODE(t+Step_Size/2, r+k1/2)
    k3 = Step_Size*ODE(t+Step_Size/2, r+k2)
    k4 = Step_Size*ODE(t+Step_Size, r+k3)
    return r + k1/6 + k2/3 + k3/3 + k4/6

@njit(nogil= True)
def Motion(L0,Phi0): #Insert Inital Conditions and it will loop through the RK4 integrator and output all its positions.
    L_Array = np.zeros_like(time_array)
    Phi_Array = np.zeros_like(time_array)
    
    L_Array[0] = L0
    Phi_Array[0] = Phi0
    for i in range(1,N_points):
        L_Array[i], Phi_Array[i] = RK4(time_array[i-1], np.array([ L_Array[i-1],Phi_Array[i-1] ]) )
    
    return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many] 
    #Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data

# Location = Motion(5,0)
# x = Location[0]*np.cos(Location[1])
# y = Location[0]*np.sin(Location[1])
# plt.plot(x,y,"o", markersize = 0.5)
# ts = time()
# Motion(5,0)
# print('Solo Time:', time() - ts)

"Getting my Arrays ready so I can index it"
Split = int(np.sqrt(N_Particles))
L0i = np.linspace(4.4,5.5,Split)
Phi0i = np.linspace(0,360,Split) / 180 * np.pi
L0_Grid = np.repeat(L0i,Split) 
# ^Here I want to run a meshgrid of L0i and Phi0, so I repeat L0i using this function and mod (%) the index on the Phi Function


#Create Appending Array
results = []
def get_results(result): #Call back to this array from Multiprocessing to append the results it gives per run.
    results.append(result)
    clear_output()
    print("Getting Results %0.2f" %(len(results)/N_Particles * 100), end='\r')
    
if __name__ == '__main__':
    #Call In Multiprocessing
    pool = mp.Pool(mp.cpu_count()) #Counting number of threads to start
    ts = time() #Timing this process. Begins here
    for ii in range(N_Particles): #Not too sure what this does, but it works - I assume it parallelizes this loop
        pool.apply_async(Motion, args = (L0_Grid[ii],Phi0i[int(ii%Split)]), callback=get_results)
        
    pool.close() #I'm not too sure what this does but everyone uses it, and it won't work without it
    pool.join()
    print('Time in MP parallel:', time() - ts) #Output Time