Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/339.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 多处理距离矩阵cdist Scipy_Python_Scipy_Multiprocessing - Fatal编程技术网

Python 多处理距离矩阵cdist Scipy

Python 多处理距离矩阵cdist Scipy,python,scipy,multiprocessing,Python,Scipy,Multiprocessing,我要计算两个字符串向量元素之间的多重距离。我使用cdist来完成这项工作,速度很快,但对于大型阵列来说,这需要时间,而且我必须多次重复该操作cdistfromScipy它只使用了我的一个核心。我怎样才能利用我所有的核心计算所有的距离,更快地得到最终的3D矩阵? 我想实现的是在不同的过程中分别计算每个距离矩阵,或者使用我的所有核心计算一个距离矩阵 在这里,您可以运行我试图实现的示例 import numpy as np first = np.array(["hello", &q

我要计算两个字符串向量元素之间的多重距离。我使用cdist来完成这项工作,速度很快,但对于大型阵列来说,这需要时间,而且我必须多次重复该操作
cdist
from
Scipy
它只使用了我的一个核心。我怎样才能利用我所有的核心计算所有的距离,更快地得到最终的3D矩阵? 我想实现的是在不同的过程中分别计算每个距离矩阵,或者使用我的所有核心计算一个距离矩阵

在这里,您可以运行我试图实现的示例

import numpy as np
first = np.array(["hello", "hello", "hellllo"])
second = np.array(["hlo", "halo", "alle"])
def diff_len(string1, string2):
    return abs(len(string1) - len(string2))
def diff_len2(string1, string2):
    return abs(len(string1) - len(string2)*2)
def diff_len_square(string1, string2):
    return abs(len(string1) - len(string2)**2)
def minimum_nw(*sequences):
    return -max(map(len, sequences)) * 1


def maximum_nw(*sequences):
    return max(map(len, sequences))


def normalized_distance(distance, *sequences):
    """Get distance from 0 to 1
    """
    minimum = minimum_nw(*sequences)
    maximum = maximum_nw(*sequences)

    if maximum == 0:
        return 0
    return (distance - minimum) / (maximum - minimum)


@njit
def NeedlemanWunschDP(dist_mat, s1, s2):
    for i in range(1, len(s1) + 1):
        for j in range(1, len(s2) + 1):
            match = dist_mat[i - 1, j - 1] + (s1[i-1] == s2[j-1])
            delete = dist_mat[i - 1, j] - gap_
            insert = dist_mat[i, j - 1] - gap_
            dist_mat[i, j] = max(match, delete, insert)


def NeedleW(s1, s2, mode='raw'):
    dist_mat_ = np.empty(
        (len(s1) + 1, len(s2) + 1),
        dtype=np.int64,
    )

    # DP initialization
    for i in range(len(s1) + 1):
        dist_mat_[i, 0] = -(i * gap_)

    # DP initialization
    for j in range(len(s2) + 1):
        dist_mat_[0, j] = -(j * gap_)

    # Transform the strings to fast integer arrays
    tmp_s1 = np.array([ord(e) for e in s1], dtype=np.int64)
    tmp_s2 = np.array([ord(e) for e in s2], dtype=np.int64)
    # Needleman-Wunsch DP calculation
    NeedlemanWunschDP(dist_mat_, tmp_s1, tmp_s2)
    distance_ = -1 * dist_mat_[dist_mat_.shape[0] - 1, dist_mat_.shape[1] - 1]

    if mode == 'norm':
        return normalized_distance(distance_, s1, s2)

    return - float(distance_)
first = np.array(["hello", "hello", "hellllo"])
second = np.array(["hlo", "halo", "alle"])
first = np.tile(first, 20)
second = np.array(second, 20)

d0 = cdist(first[:, np.newaxis], second[:, np.newaxis], lambda a, b: diff_len(a[0], b[0]))
d1 = cdist(first[:, np.newaxis], second[:, np.newaxis], lambda a, b: diff_len2(a[0], b[0]))
d2 = cdist(first[:, np.newaxis], second[:, np.newaxis], lambda a, b: diff_len_square(a[0], b[0]))
d3 = cdist(first[:, np.newaxis], second[:, np.newaxis], lambda a, b: NeedleW(a[0], b[0], 'norm'))

mat3D = np.stack((d0, d1, d2, d3))