Dask分布式本地群集失败,返回“0”;类型错误:can';t pickle“U线程.”U局部对象“;将dask.array.store用于hdf5文件时

Dask分布式本地群集失败,返回“0”;类型错误:can';t pickle“U线程.”U局部对象“;将dask.array.store用于hdf5文件时,dask,hdf5,dask-distributed,Dask,Hdf5,Dask Distributed,我在一台16核64GB RAM的机器上运行,希望将dask与LocalCluster结合使用,因为需要分析工具进行优化 如前所述,我设置了LocalCluster。但它仍然给了我以下错误: Traceback (most recent call last): File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/pickle.py", line 38, in dumps re

我在一台16核64GB RAM的机器上运行,希望将dask与LocalCluster结合使用,因为需要分析工具进行优化

如前所述,我设置了LocalCluster。但它仍然给了我以下错误:

Traceback (most recent call last):
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/pickle.py", line 38, in dumps
    result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
TypeError: can't pickle _thread._local objects

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/myusername/remote_code/trials/minimal_reproducible_example.py", line 61, in <module>
    create_matrix()
  File "/home/myusername/remote_code/trials/minimal_reproducible_example.py", line 55, in create_matrix
    da.store(w, d_set, dtype="float32")
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/array/core.py", line 916, in store
    result.compute(**kwargs)
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/base.py", line 175, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/dask/base.py", line 446, in compute
    results = schedule(dsk, keys, **kwargs)
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 2499, in get
    actors=actors,
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/client.py", line 2426, in _graph_to_futures
    "tasks": valmap(dumps_task, dsk3),
  File "cytoolz/dicttoolz.pyx", line 179, in cytoolz.dicttoolz.valmap
  File "cytoolz/dicttoolz.pyx", line 204, in cytoolz.dicttoolz.valmap
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/worker.py", line 3186, in dumps_task
    return {"function": dumps_function(task[0]), "args": warn_dumps(task[1:])}
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/worker.py", line 3195, in warn_dumps
    b = dumps(obj)
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/distributed/protocol/pickle.py", line 51, in dumps
    return cloudpickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/cloudpickle/cloudpickle.py", line 1108, in dumps
    cp.dump(obj)
  File "/data/myusername/anaconda3/lib/python3.7/site-packages/cloudpickle/cloudpickle.py", line 473, in dump
    return Pickler.dump(self, obj)
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 437, in dump
    self.save(obj)
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 786, in save_tuple
    save(element)
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 662, in save_reduce
    save(state)
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 856, in save_dict
    self._batch_setitems(obj.items())
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 882, in _batch_setitems
    save(v)
  File "/data/myusername/anaconda3/lib/python3.7/pickle.py", line 524, in save
    rv = reduce(self.proto)
TypeError: can't pickle _thread._local objects

有谁能帮我解决这个问题吗?<安迪>,你可以考虑做一个更简单的例子。您好,安迪,您可以考虑做一个更简单的例子。看见
import os
import dask.array as da
import h5py
import numpy as np
from dask.distributed import Client

MY_USER_NAME = "myusername"
EARTH_RADIUS = 6372.795
CHUNK_SIZE = 5000
N = 20000

def create_matrix():
    lat_vec = np.random.random(N) * 90
    lon_vec = np.random.random(N) * 180
    lat_vec = np.radians(lat_vec)
    lon_vec = np.radians(lon_vec)
    sin_lat_vec = np.sin(lat_vec)
    cos_lat_vec = np.cos(lat_vec)

    def _blocked_calculate_great_circle_distance(block, block_info=None):
        loc = block_info[0]['array-location']
        (row_start, row_stop) = loc[0]
        (col_start, col_stop) = loc[1]

        # see https://en.wikipedia.org/wiki/Great-circle_distance
        # and https://github.com/ulope/geopy/blob/master/geopy/distance.py
        row_lon = lon_vec[row_start:row_stop]
        col_lon = lon_vec[col_start:col_stop]

        delta_lon = row_lon[:, np.newaxis] - col_lon
        cos_delta_lon = np.cos(delta_lon)
        central_angle = np.arccos(sin_lat_vec[row_start:row_stop, np.newaxis] * sin_lat_vec[col_start:col_stop] +
                                  cos_lat_vec[row_start:row_stop, np.newaxis] * cos_lat_vec[col_start:col_stop]
                                  * cos_delta_lon)

        return EARTH_RADIUS * central_angle

    dir_path = "/home/" + MY_USER_NAME + "/minimum_reproducible_example/"
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    file_path = os.path.join(dir_path, "matrix.hdf5")
    if os.path.exists(file_path):
        os.remove(file_path)

    with h5py.File(file_path) as f:
        d_set = f.create_dataset('/data', shape=(N, N), dtype='f4', fillvalue=0)

        w = da.from_array(d_set, chunks=(CHUNK_SIZE, CHUNK_SIZE))

        w = w.map_blocks(_blocked_calculate_great_circle_distance, chunks=(CHUNK_SIZE, CHUNK_SIZE), dtype='f4')

        da.store(w, d_set, dtype="float32")


if __name__ == '__main__':
    client = Client(processes=False)
    create_matrix()