Python 如何使用numba加速xarray计算？_Python_Numba_Python Xarray

Python 如何使用numba加速xarray计算？

python

Python 如何使用numba加速xarray计算？,python,numba,python-xarray,Python,Numba,Python Xarray,我正在努力更好地理解numba装饰，尤其是guvectorize 我试着开始。特别是在最底部的步骤15 我试图修改它来计算风速以下是我得到的： import numpy as np import xarray as xr import datetime import glob import dask import sys import os import tempfile from numba import float64, guvectorize, vectorize, njit im

我正在努力更好地理解numba装饰，尤其是guvectorize

我试着开始。特别是在最底部的步骤15

我试图修改它来计算风速

以下是我得到的：

import numpy as np
import xarray as xr
import datetime
import glob
import dask

import sys
import os
import tempfile

from numba import float64, guvectorize, vectorize, njit

import time as t

@guvectorize(
    "(float64, float64, float64)",
    "(), () -> ()",
    nopython=True,
)
def calcWindspeed_ufunc(u, v, out):
        out = np.sqrt( u**2 + v**2 )


def calcWindspeed(u, v):

    return xr.apply_ufunc(calcWindspeed_ufunc, u, v,
                         input_core_dims=[[],[]],
                         output_core_dims=[[]],
                         # vectorize=True,
                         dask="parallelized",
                         output_dtypes=[u.dtype])


def main():

    nlon = 120
    nlat = 100
    ntime = 3650
    lon = np.linspace(129.4, 153.75, nlon)
    lat = np.linspace(-43.75, -10.1, nlat)
    time = np.linspace(0, 365, ntime)

    #< Create random data
    u = 10 * np.random.rand(len(time), len(lat), len(lon))
    u = xr.Dataset({"u": (["time", "lat", "lon"], u)},coords={"time": time, "lon": lon, "lat": lat})
    u = u.chunk({'time':365})
    u = u['u']
    v = u.copy()


    start = t.time()
    ws_xr = np.sqrt( u**2 + v**2 ).load()
    end = t.time()
    print('It took xarray {} seconds!'.format(end-start))

    start = t.time()
    ws_ufunc = calcWindspeed(u, v).load()
    end = t.time()
    print('It took numba {} seconds!'.format(end-start))

    # Difference of the output
    print( (ws_xr-ws_ufunc).max() )


if __name__ == '__main__':
    import dask.distributed
    import sys

    # Get the number of CPUS in the job and start a dask.distributed cluster
    mem          = 190
    cores        = 4
    memory_limit = '{}gb'.format(int(max(mem/cores, 4)))
    client       = dask.distributed.Client(n_workers=cores, threads_per_worker=1, memory_limit=memory_limit, local_dir=tempfile.mkdtemp())


    #< Print client summary
    print('### Client summary')
    print(client)
    print('\n\n')

    #< Call the main function
    main()

    #< Close the client
    client.shutdown()

将numpy导入为np
将xarray作为xr导入
导入日期时间
导入glob
进口达斯克
导入系统
导入操作系统
导入临时文件
来自numba import float64，guvectorize，vectorize，njit
导入时间为t
@矢量化(
“（浮动64，浮动64，浮动64）”，
"(), () -> ()",
nopython=正确，
)
def calcWindspeed_ufunc（u、v、out）：
out=np.sqrt（u**2+v**2）
def calcWindspeed（u，v）：
返回xr.apply_ufunc（calcWindspeed_ufunc，u，v，
输入核心尺寸=[]，[]，
输出\u核心\u dims=[[]]，
#矢量化=真，
dask=“并行化”，
输出类型=[u.dtype]）
def main（）：
nlon=120
nlat=100
时间=3650
lon=np.linspace（129.4153.75，nlon）
lat=np.linspace（-43.75，-10.1，nlat）
时间=np.linspace（0，365，ntime）
#<创建随机数据
u=10*np.rand随机随机数（len（时间）、len（纬度）、len（经度））
u=xr.Dataset（{“u”：（[“time”，“lat”，“lon”]，u）}，coords={“time”：time，“lon”：lon，“lat”：lat}）
u=u.chunk（{'time'：365}）
u=u['u']
v=u.copy（）
开始=t.时间（）
ws_xr=np.sqrt（u**2+v**2）.load（）
结束=t.时间（）
打印（'xarray{}秒！格式（结束-开始））
开始=t.时间（）
ws_ufunc=calcWindspeed（u，v）.load（）
结束=t.时间（）
打印（'花费了numba{}秒！'。格式（结束-开始））
#产量差异
打印（（ws_xr-ws_ufunc.max（））
如果uuuu name uuuuuu='\uuuuuuu main\uuuuuuu'：
导入dask.distributed
导入系统
#获取作业中的CPU数量并启动dask.distributed集群
mem=190
芯数=4
内存限制=“{}gb”。格式（int（max（mem/cores，4）））
client=dask.distributed.client（n_worker=cores，threads_per_worker=1，memory_limit=memory_limit，local_dir=tempfile.mkdtemp（））
#<打印客户摘要
打印（“####客户摘要”）
打印（客户端）
打印（'\n\n'）
#<调用主函数
main（）
#<关闭客户端
client.shutdown（）

这在技术上是可行的（它运行），但输出是错误的。两种计算之间的差值应该接近0，但在我的例子中是14

我不明白我做错了什么

谢谢你的帮助

几点想法：

如果只是叫numpy，就没有必要使用numba。Numba运行编译后的代码，但当前示例实际上没有任何代码
如果您使用此功能运行多个维度，则可以单独使用
```
xr.apply\u ufunc
```
来执行此操作
如果你想让其他人参与这个例子，你能把它缩小到最小尺寸吗？目前有达斯克，沙雷，努巴-如果你把它们去掉，差别还存在吗

作为参考，以下是我使用xarray和numba编写的一些函数