Python 存储已添加到集群的行和列的索引

Python 存储已添加到集群的行和列的索引,python,numpy,Python,Numpy,我的问题是关于层次聚类的。我正试图从给定的距离矩阵出发,用Python从头开始创建一个层次聚类算法。我希望从scipy.cluster.hierarchy.linkage获得与linkage函数相同的输出。但是,当我想要将文件的标记(距离矩阵中的行/列)添加到算法中较早形成的集群中时,我遇到了一个问题


def cluster(distance_matrix):

    n = len(distance_matrix)

    cluster_dictionary = {i:i for i in range(n)}
    linkage_matrix = np.zeros((n - 1, 4))

    for i in range(n - 1):
        # Create a mask with True on all values that are not on the diagonal
        mask = np.ones(distance_matrix.shape, dtype=bool)
        np.fill_diagonal(mask, 0)

        # Find the index of the minimum distance between two texts disregarding the diagonal
        min_distance = np.min(distance_matrix[mask])
        ind = np.where(distance_matrix == min_distance)[0]

        # Gather the indices of our clusters from the indices of our distance matrix
        cluster_ind = [cluster_dictionary[idx] for idx in ind]

        # Update our cluster_dictionary so each index is properly assigned to a cluster
        for text_idx in cluster_ind:
            if text_idx > n - 1:
                for k, v in cluster_dictionary.items():
                    if v == text_idx:
                        cluster_dictionary[k] = i + n
                cluster_dictionary[text_idx] = i + n

        linkage_matrix[i, 0] = cluster_ind[0]
        linkage_matrix[i, 1] = cluster_ind[1]
        linkage_matrix[i, 2] = min_distance
        linkage_matrix[i, 3] = 0

        # Calculate the new distances our texts are from our newly found cluster
        distance_vector = distance_from_cluster(ind, distance_matrix)

        # Remove the row and column that we just clustered, also remove it from our custom indices
        distance_matrix = delete_row_column(distance_matrix, ind)

        # Insert our new distance vector for the clustered group into the distance matrix
        distance_matrix = np.insert(distance_matrix, 0, distance_vector[1:], axis=0) 
        distance_matrix = np.insert(distance_matrix, 0, distance_vector, axis=1) 

    return np.array(linkage_matrix)

import scipy.spatial.distance as ssd

distance_matrix = np.array([[0, 9, 3, 6, 11],
                            [9, 0, 7, 5, 10],
                            [3, 7, 0, 9, 2],
                            [6, 5, 9, 0, 8],
                            [11, 10, 2, 8, 0]])

own_linkage = cluster(distance_matrix)
print(hierarchy.linkage(ssd.squareform(distance_matrix), method='complete', metric='cosine'))

[[ 2.  4.  2.  0.]
 [ 5.  3.  5.  0.]
 [ 0.  6.  9.  0.]
 [ 7.  1. 11.  0.]]

[[ 2.  4.  2.  2.]
 [ 1.  3.  5.  2.]
 [ 0.  6.  9.  3.]
 [ 5.  7. 11.  5.]]



def delete_row_column(distance_matrix, indices):
    """Returns the distance matrix with the rows and columns specified in indices
    remain_ind = [i for i in range(len(distance_matrix)) if not i in indices]
    distance_matrix = distance_matrix[np.ix_(remain_ind, remain_ind)]
    return distance_matrix

def distance_from_cluster(cluster, distance_matrix):
    """Finds all distances that our texts are from the cluster defined as the longest distance
    from a point within our cluster to a given text"""
    distance_vector = [0]
    for i, distance_row in enumerate(distance_matrix):
        distances = [distance_row[i] for i in cluster]
        # Disregard texts already in the cluster
        if not 0 in distances:
    return np.array(distance_vector)