Python matlab-dummyvar的numpy等价

Python matlab-dummyvar的numpy等价,python,numpy,Python,Numpy,为了很好地处理类别变量,matlab的dummyvar函数最类似于Python的是什么 下面是一个说明我的问题的示例,使用NxM矩阵表示将N个数据点划分为的M种不同方式,您可以使用一些广播魔术来快速获得虚拟阵列: >>> partitions = np.array([[1, 1, 2, 2, 1, 2, 2, 2, 1, 1], ... [1, 2, 2, 1, 2, 1, 2, 2, 2, 1], ...



>>> partitions = np.array([[1, 1, 2, 2, 1, 2, 2, 2, 1, 1],
...                        [1, 2, 2, 1, 2, 1, 2, 2, 2, 1],
...                        [1, 1, 1, 2, 2, 2, 1, 3, 3, 2]])
>>> n = np.max(partitions)
>>> d = (partitions.T[:, None, :] == np.arange(1, n+1)[:, None]).astype(
>>> d = d.reshape(partitions.shape[1], -1)
array([[3, 2, 1, 1, 1, 1, 1, 0, 1, 2],
       [2, 3, 2, 0, 2, 0, 2, 1, 2, 1],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 1, 1, 3, 1, 1, 1, 2, 2],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [0, 1, 2, 1, 1, 1, 2, 3, 2, 0],
       [1, 2, 1, 0, 2, 0, 1, 2, 3, 1],
       [2, 1, 0, 2, 2, 2, 0, 0, 1, 3]])


import scipy.sparse as sps
def sparse_dummyvar(partitions):
    num_rows = np.sum(np.max(partitions, axis=1))
    nnz =
    as_part = np.argsort(partitions, axis=1)
    # You could get s_part from the indices in as_part, left as
    # an exercise for the reader...
    s_part = np.sort(partitions, axis=1)
    mask = np.hstack(([[True]]*len(items_per_row),
                      s_part[:, :-1] != s_part[:, 1:]))
    indptr = np.where(mask.ravel())[0]
    indptr = np.append(indptr, nnz)

    return sps.csr_matrix((np.repeat([1], nnz), as_part.ravel(), indptr),
                          shape=(num_rows, partitions.shape[1],))

>>> dT = sparse_dummyvar(partitions)
<10x10 sparse matrix of type '<type 'numpy.int32'>'
    with 84 stored elements in Compressed Sparse Column format>
array([[3, 2, 1, 1, 1, 1, 1, 0, 1, 2],
       [2, 3, 2, 0, 2, 0, 2, 1, 2, 1],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 1, 1, 3, 1, 1, 1, 2, 2],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [0, 1, 2, 1, 1, 1, 2, 3, 2, 0],
       [1, 2, 1, 0, 2, 0, 1, 2, 3, 1],
       [2, 1, 0, 2, 2, 2, 0, 0, 1, 3]])
dT=sparse\u dummyvar(分区) >>> >>> 数组([[3,2,1,1,1,1,1,1,0,1,2], [2, 3, 2, 0, 2, 0, 2, 1, 2, 1], [1, 2, 3, 1, 1, 1, 3, 2, 1, 0], [1, 0, 1, 3, 1, 3, 1, 1, 0, 2], [1, 2, 1, 1, 3, 1, 1, 1, 2, 2], [1, 0, 1, 3, 1, 3, 1, 1, 0, 2], [1, 2, 3, 1, 1, 1, 3, 2, 1, 0], [0, 1, 2, 1, 1, 1, 2, 3, 2, 0], [1, 2, 1, 0, 2, 0, 1, 2, 3, 1], [2, 1, 0, 2, 2, 2, 0, 0, 1, 3]])
>>> partitions = np.array([[1, 1, 2, 2, 1, 2, 2, 2, 1, 1],
...                        [1, 2, 2, 1, 2, 1, 2, 2, 2, 1],
...                        [1, 1, 1, 2, 2, 2, 1, 3, 3, 2]])
>>> n = np.max(partitions)
>>> d = (partitions.T[:, None, :] == np.arange(1, n+1)[:, None]).astype(
>>> d = d.reshape(partitions.shape[1], -1)
array([[3, 2, 1, 1, 1, 1, 1, 0, 1, 2],
       [2, 3, 2, 0, 2, 0, 2, 1, 2, 1],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 1, 1, 3, 1, 1, 1, 2, 2],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [0, 1, 2, 1, 1, 1, 2, 3, 2, 0],
       [1, 2, 1, 0, 2, 0, 1, 2, 3, 1],
       [2, 1, 0, 2, 2, 2, 0, 0, 1, 3]])
import scipy.sparse as sps
def sparse_dummyvar(partitions):
    num_rows = np.sum(np.max(partitions, axis=1))
    nnz =
    as_part = np.argsort(partitions, axis=1)
    # You could get s_part from the indices in as_part, left as
    # an exercise for the reader...
    s_part = np.sort(partitions, axis=1)
    mask = np.hstack(([[True]]*len(items_per_row),
                      s_part[:, :-1] != s_part[:, 1:]))
    indptr = np.where(mask.ravel())[0]
    indptr = np.append(indptr, nnz)

    return sps.csr_matrix((np.repeat([1], nnz), as_part.ravel(), indptr),
                          shape=(num_rows, partitions.shape[1],))
>>> dT = sparse_dummyvar(partitions)
<10x10 sparse matrix of type '<type 'numpy.int32'>'
    with 84 stored elements in Compressed Sparse Column format>
array([[3, 2, 1, 1, 1, 1, 1, 0, 1, 2],
       [2, 3, 2, 0, 2, 0, 2, 1, 2, 1],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 1, 1, 3, 1, 1, 1, 2, 2],
       [1, 0, 1, 3, 1, 3, 1, 1, 0, 2],
       [1, 2, 3, 1, 1, 1, 3, 2, 1, 0],
       [0, 1, 2, 1, 1, 1, 2, 3, 2, 0],
       [1, 2, 1, 0, 2, 0, 1, 2, 3, 1],
       [2, 1, 0, 2, 2, 2, 0, 0, 1, 3]])