Python 多线程处理numpy n迭代器

Python 多线程处理numpy n迭代器,python,multithreading,numpy,iterator,multiprocessing,Python,Multithreading,Numpy,Iterator,Multiprocessing,对于MCMC实现,我想计算协方差张量C,单位为numpy 工作单线程代码 两个元素之间的距离基于其索引之间的距离。以下是工作单线程代码(带有示例距离),仅供参考: 解决方案尝试 现在的问题是,将C应用于矩阵x是一个多线程操作: x = np.random.standard_normal((size,)*ndim) result = np.tensordot(C, x, axes=ndim) 计算C的条目是不正确的。我的想法是,在初始化后沿着C的第一个轴拆分C,并分别迭代块: import mu

对于MCMC实现,我想计算协方差张量C,单位为numpy

工作单线程代码 两个元素之间的距离基于其索引之间的距离。以下是工作单线程代码(带有示例距离),仅供参考:

解决方案尝试 现在的问题是,将C应用于矩阵x是一个多线程操作:

x = np.random.standard_normal((size,)*ndim)
result = np.tensordot(C, x, axes=ndim)
计算C的条目是不正确的。我的想法是,在初始化后沿着C的第一个轴拆分C,并分别迭代块:

import multiprocessing
def _calc_distances(C):
    'Calculate distances of submatrices'
    it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = np.array(it.multi_index)
        it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
        it.iternext()
    return C

def update_tensor(C):
    'Updates Covariance Operator'   
    #Multicore Processing
    n_processes = multiprocessing.cpu_count()
    Chunks = [
        C[i*C.shape[0]//n_processes:(i+1)*C.shape[0]//n_processes] for i in range(0, n_processes-1)
    ]
    Chunks.append(C[C.shape[0]//n_processes*(n_processes-1):])
    with multiprocessing.Pool(n_processes+1) as p:
        #map and stitch together
        C = np.concatenate(
            p.map(_calc_distances, Chunks)
        )
但这失败了,因为子矩阵的指数发生了变化

问题: 有更好的解决办法吗?如何解决索引问题?最好的方法可能是使用共享C数据的线程遍历数组的各个部分。这可能吗

问答 问:你必须使用numpy迭代器吗?
不,很好,但我可以放弃

就是这样工作的。我只是想把课程发到这里

基准

优点:减少计算时间 缺点:使用更多的内存;有点复杂的代码

工作多线程代码
您必须使用
nditer
?这不会使迭代更快。很高兴知道!它已经就位,因此节省了内存,使处理索引相对容易,但我可以放弃。
import multiprocessing
def _calc_distances(C):
    'Calculate distances of submatrices'
    it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = np.array(it.multi_index)
        it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
        it.iternext()
    return C

def update_tensor(C):
    'Updates Covariance Operator'   
    #Multicore Processing
    n_processes = multiprocessing.cpu_count()
    Chunks = [
        C[i*C.shape[0]//n_processes:(i+1)*C.shape[0]//n_processes] for i in range(0, n_processes-1)
    ]
    Chunks.append(C[C.shape[0]//n_processes*(n_processes-1):])
    with multiprocessing.Pool(n_processes+1) as p:
        #map and stitch together
        C = np.concatenate(
            p.map(_calc_distances, Chunks)
        )
CPU: Intel Core i5-6300U@2.5GHz, boosting to ~2.9GHz
Windows 10 64-bit, Python 3.7.4, Numpy 1.17
import multiprocessing
import numpy as np

class CovOp(object):
    'F[0,1]^ndim->C[0,1]^ndim'
    def f(self, r):
        return np.exp(-r/self.ro)#(1 + np.sqrt(3)*r / self.ro) * np.exp(-np.sqrt(3) * r / self.ro)

    def dist(self, x,y):
        return np.sum((x-y)**2)

    def __init__(self, ndim, size, sigma=1, ro=1):
        self.tensor_cached = False
        self.inverse_cached = False
        self.ndim = ndim
        self.size = size
        self.shape = (size,)*ndim*2
        self.C = np.zeros(self.shape)
        self.Inv = np.zeros(self.shape)
        self.ro = ro * size
        self.sigma = sigma      

    def __call__(self, x):
        if not self.tensor_cached:
            self.update_tensor
        if self.ndim == 0:
            return self.sigma * self.C * x
        elif self.ndim == 1:
            return self.sigma * np.dot(self.C, x)
        return self.sigma * np.tensordot(self.C, x, axes=self.ndim)

    def _calc_distances(self, Chunk:tuple):
        'Calculate distances of submatrices'
        C, offset = Chunk
        it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = np.array(it.multi_index)
            idx[0]+=offset
            d = self.dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
            it[0] = self.f(d)
            it.iternext()
        return C

    def update_tensor(self):
        'Updates Covariance Operator'   
        #Multicore Processing
        n_processes = multiprocessing.cpu_count()
        Chunks = [
            (
                self.C[i*self.C.shape[0]//n_processes:(i+1)*self.C.shape[0]//n_processes],
                i*self.C.shape[0]//n_processes) for i in range(0, n_processes-1)
        ]
        Chunks.append((
                self.C[self.C.shape[0]//n_processes*(n_processes-1):],
                self.C.shape[0]//n_processes*(n_processes-1)
            )
        )
        with multiprocessing.Pool(n_processes+1) as p:
            self.C = np.concatenate(
                p.map(self._calc_distances, Chunks)
            )      
        self.tensor_cached = True
        #missing cholesky decomposition

    def update_inverse(self):
        if self.ndim==1:
            self.Inv = np.linalg.inv(self.C)
        elif self.ndim>1:
            self.Inv = np.linalg.tensorinv(self.C)
        else:
            self.Inv = 1/self.C
        self.inverse_cached = True

    def inv(self, x):
        if self.ndim == 0:
            return self.Inv * x / self.sigma
        elif self.ndim == 1:
            return np.dot(self.Inv, x) / self.sigma
        return np.tensordot(self.Inv, x) / self.sigma
if __name__=='__main__':

        size = 30
        ndim = 2
        depth = 1

        Cov = CovOp(ndim, size, 1, .2)


        import time

        n_tests = 5
        t_start = time.perf_counter()
        for i in range(n_tests):
            Cov.update_tensor()
        t_stop = time.perf_counter()
        dt_new = t_stop - t_start

        print(
        '''Benchmark; NDim: %s, Size: %s NTests: %s
        Mean time per test:
            Multithreaded %ss'''%(ndim, size, n_tests, dt_new/n_tests)
        )