Python 多线程处理numpy n迭代器
对于MCMC实现,我想计算协方差张量C,单位为numpy 工作单线程代码 两个元素之间的距离基于其索引之间的距离。以下是工作单线程代码(带有示例距离),仅供参考: 解决方案尝试 现在的问题是,将C应用于矩阵x是一个多线程操作:Python 多线程处理numpy n迭代器,python,multithreading,numpy,iterator,multiprocessing,Python,Multithreading,Numpy,Iterator,Multiprocessing,对于MCMC实现,我想计算协方差张量C,单位为numpy 工作单线程代码 两个元素之间的距离基于其索引之间的距离。以下是工作单线程代码(带有示例距离),仅供参考: 解决方案尝试 现在的问题是,将C应用于矩阵x是一个多线程操作: x = np.random.standard_normal((size,)*ndim) result = np.tensordot(C, x, axes=ndim) 计算C的条目是不正确的。我的想法是,在初始化后沿着C的第一个轴拆分C,并分别迭代块: import mu
x = np.random.standard_normal((size,)*ndim)
result = np.tensordot(C, x, axes=ndim)
计算C的条目是不正确的。我的想法是,在初始化后沿着C的第一个轴拆分C,并分别迭代块:
import multiprocessing
def _calc_distances(C):
'Calculate distances of submatrices'
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it.iternext()
return C
def update_tensor(C):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
C[i*C.shape[0]//n_processes:(i+1)*C.shape[0]//n_processes] for i in range(0, n_processes-1)
]
Chunks.append(C[C.shape[0]//n_processes*(n_processes-1):])
with multiprocessing.Pool(n_processes+1) as p:
#map and stitch together
C = np.concatenate(
p.map(_calc_distances, Chunks)
)
但这失败了,因为子矩阵的指数发生了变化
问题:
有更好的解决办法吗?如何解决索引问题?最好的方法可能是使用共享C数据的线程遍历数组的各个部分。这可能吗
问答
问:你必须使用numpy迭代器吗?
不,很好,但我可以放弃 就是这样工作的。我只是想把课程发到这里 基准 优点:减少计算时间 缺点:使用更多的内存;有点复杂的代码 工作多线程代码
您必须使用
nditer
?这不会使迭代更快。很高兴知道!它已经就位,因此节省了内存,使处理索引相对容易,但我可以放弃。
import multiprocessing
def _calc_distances(C):
'Calculate distances of submatrices'
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
it[0] = dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it.iternext()
return C
def update_tensor(C):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
C[i*C.shape[0]//n_processes:(i+1)*C.shape[0]//n_processes] for i in range(0, n_processes-1)
]
Chunks.append(C[C.shape[0]//n_processes*(n_processes-1):])
with multiprocessing.Pool(n_processes+1) as p:
#map and stitch together
C = np.concatenate(
p.map(_calc_distances, Chunks)
)
CPU: Intel Core i5-6300U@2.5GHz, boosting to ~2.9GHz
Windows 10 64-bit, Python 3.7.4, Numpy 1.17
import multiprocessing
import numpy as np
class CovOp(object):
'F[0,1]^ndim->C[0,1]^ndim'
def f(self, r):
return np.exp(-r/self.ro)#(1 + np.sqrt(3)*r / self.ro) * np.exp(-np.sqrt(3) * r / self.ro)
def dist(self, x,y):
return np.sum((x-y)**2)
def __init__(self, ndim, size, sigma=1, ro=1):
self.tensor_cached = False
self.inverse_cached = False
self.ndim = ndim
self.size = size
self.shape = (size,)*ndim*2
self.C = np.zeros(self.shape)
self.Inv = np.zeros(self.shape)
self.ro = ro * size
self.sigma = sigma
def __call__(self, x):
if not self.tensor_cached:
self.update_tensor
if self.ndim == 0:
return self.sigma * self.C * x
elif self.ndim == 1:
return self.sigma * np.dot(self.C, x)
return self.sigma * np.tensordot(self.C, x, axes=self.ndim)
def _calc_distances(self, Chunk:tuple):
'Calculate distances of submatrices'
C, offset = Chunk
it = np.nditer(C, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = np.array(it.multi_index)
idx[0]+=offset
d = self.dist(idx[:idx.shape[0]//2], idx[idx.shape[0]//2:])
it[0] = self.f(d)
it.iternext()
return C
def update_tensor(self):
'Updates Covariance Operator'
#Multicore Processing
n_processes = multiprocessing.cpu_count()
Chunks = [
(
self.C[i*self.C.shape[0]//n_processes:(i+1)*self.C.shape[0]//n_processes],
i*self.C.shape[0]//n_processes) for i in range(0, n_processes-1)
]
Chunks.append((
self.C[self.C.shape[0]//n_processes*(n_processes-1):],
self.C.shape[0]//n_processes*(n_processes-1)
)
)
with multiprocessing.Pool(n_processes+1) as p:
self.C = np.concatenate(
p.map(self._calc_distances, Chunks)
)
self.tensor_cached = True
#missing cholesky decomposition
def update_inverse(self):
if self.ndim==1:
self.Inv = np.linalg.inv(self.C)
elif self.ndim>1:
self.Inv = np.linalg.tensorinv(self.C)
else:
self.Inv = 1/self.C
self.inverse_cached = True
def inv(self, x):
if self.ndim == 0:
return self.Inv * x / self.sigma
elif self.ndim == 1:
return np.dot(self.Inv, x) / self.sigma
return np.tensordot(self.Inv, x) / self.sigma
if __name__=='__main__':
size = 30
ndim = 2
depth = 1
Cov = CovOp(ndim, size, 1, .2)
import time
n_tests = 5
t_start = time.perf_counter()
for i in range(n_tests):
Cov.update_tensor()
t_stop = time.perf_counter()
dt_new = t_stop - t_start
print(
'''Benchmark; NDim: %s, Size: %s NTests: %s
Mean time per test:
Multithreaded %ss'''%(ndim, size, n_tests, dt_new/n_tests)
)