Numpy 在数组中累积不规则切片的总和

Numpy 在数组中累积不规则切片的总和,numpy,Numpy,我需要快速处理一个巨大的二维数组,并且已经预先标记了所需的数据 array([[ 0., 1., 2., 3., 4., 5. , 6. , 7.], [ 6., 7., 8., 9., 10., 4.2, 4.3, 11.], [ 12., 13., 14., 15., 16., 4.2, 4.3, 17.], [ 18., 19., 20., 21., 22., 4

我需要快速处理一个巨大的二维数组,并且已经预先标记了所需的数据

array([[  0.,   1.,   2.,   3.,   4.,   5. ,   6. ,   7.],
       [  6.,   7.,   8.,   9.,  10.,   4.2,   4.3,  11.],
       [ 12.,  13.,  14.,  15.,  16.,   4.2,   4.3,  17.],
       [ 18.,  19.,  20.,  21.,  22.,   4.2,   4.3,  23.]])

array([[False, True,  True,  True, False, True, True , False],
       [False, False, False, True,  True, True, True , False],
       [False, False, True, True, False, False, False, False],
       [False, True, True, False, False, False, True , True ]])
我希望汇总数组中每一行的标记数据。但是np.cumsum不能这样做,我需要解决方案或好主意,谢谢

预期产出:

array([[  0.,   1.,   3.,   6.,   0.,   5. ,   11. ,    0.],
       [  0.,   0.,   0.,   9.,  19.,  23.2,   27.5,    0.],
       [  0.,   0.,  14.,  29.,   0.,     0,      0,    0.],
       [  0.,  19.,  39.,   0.,   0.,     0,     4.3, 27.3]])
解决方案的困难在于每个片段不能包含前一个片段的结果

def mask_to_size(self,axis=-1):
    if self.ndim==2:
        if axis == 0:
            mask = np.zeros((self.shape[0]+1,self.shape[1]), dtype=bool)
            mask[:-1] = self ; mask[0] = False ; mask = mask.ravel('F') 
        else:
            mask = np.zeros((self.shape[0],self.shape[1]+1), dtype=bool)
            mask[:,0:-1]= self ;mask[:,0]=False; mask = mask.ravel('C')
    else:
        mask = np.zeros((self.shape[0]+1), dtype=bool)
        mask[:-1] = self ; mask[0] = False
    return np.diff(np.nonzero(mask[1:]!= mask[:-1])[0])[::2].astype(int)

# https://stackoverflow.com/a/49179628/  by @Divakar
def intervaled_cumsum(ar, sizes):
    out = ar.copy() 
    arc = ar.cumsum() ; idx = sizes.cumsum()
    out[idx[0]] = ar[idx[0]] - arc[idx[0]-1]
    out[idx[1:-1]] = ar[idx[1:-1]] - np.diff(arc[idx[:-1]-1])
    return out.cumsum()  

def cumsum_masked(self,mask,axis=-1):
    sizes = mask_to_size(mask,axis);out = np.zeros(self.size);shape = self.shape
    if len(shape)==2:
        if axis == 0:
            mask = mask.ravel('F') ; self = self.ravel('F')
        else:
            mask = mask.ravel('C') ; self = self.ravel('C')
    out[mask] = intervaled_cumsum(self[mask],sizes)
    if len(shape)==2:
        if axis == 0:
            return out.reshape(shape[1],shape[0]).T
        else:
            return out.reshape(shape)
    return out

cumsum_masked(a,m,axis=1)

我整理了答案,并试图优化速度,但没有成功。我认为其他人可能需要它。

下面是一个实施@hpaulj建议的尝试

>>> a = np.array([[ 0. ,  1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ],
...               [ 6. ,  7. ,  8. ,  9. , 10. ,  4.2,  4.3, 11. ],
...               [12. , 13. , 14. , 15. , 16. ,  4.2,  4.3, 17. ],
...               [18. , 19. , 20. , 21. , 22. ,  4.2,  4.3, 23. ]])

>>> m = np.array([[False,  True,  True,  True, False,  True,  True, False],
...               [False, False, False,  True,  True,  True,  True, False],
...               [False, False,  True,  True, False, False, False, False],
...               [False,  True,  True, False, False, False,  True,  True]])

>>> np.maximum.accumulate(np.cumsum(a, axis=1)*~m, axis=1)
array([[  0. ,   0. ,   0. ,   0. ,  10. ,  10. ,  10. ,  28. ],
       [  6. ,  13. ,  21. ,  21. ,  21. ,  21. ,  21. ,  59.5],
       [ 12. ,  25. ,  25. ,  25. ,  70. ,  74.2,  78.5,  95.5],
       [ 18. ,  18. ,  18. ,  78. , 100. , 104.2, 104.2, 104.2]])

>>> np.cumsum(a, axis=1) - np.maximum.accumulate(np.cumsum(a, axis=1)*~m, axis=1)
array([[ 0. ,  1. ,  3. ,  6. ,  0. ,  5. , 11. ,  0. ],
       [ 0. ,  0. ,  0. ,  9. , 19. , 23.2, 27.5,  0. ],
       [ 0. ,  0. , 14. , 29. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. , 19. , 39. ,  0. ,  0. ,  0. ,  4.3, 27.3]])
另请参见似乎有点相关的内容,特别是如果您的数组不是如本玩具示例中所示的
=0
,则批准的答案应该会有所帮助

编辑 为了将来的参考,这里有一个版本删除了上面的
=0
假设。应该还是相当快的,但是没有对照其他方法进行基准测试

In [38]: def masked_cumsum(a, m):                                                               
    ...:     idx = np.maximum.accumulate(np.where(m, 0, np.arange(m.size).reshape(m.shape)), axis=1)
    ...:     c = np.cumsum(a, axis=-1)                                                    
    ...:     return c - c[np.unravel_index(idx, m.shape)]
    ...: 

In [43]: masked_cumsum(-a, m)
Out[43]: 
array([[  0. ,  -1. ,  -3. ,  -6. ,   0. ,  -5. , -11. ,   0. ],
       [  0. ,   0. ,   0. ,  -9. , -19. , -23.2, -27.5,   0. ],
       [  0. ,   0. , -14. , -29. ,   0. ,   0. ,   0. ,   0. ],
       [  0. , -19. , -39. ,   0. ,   0. ,   0. ,  -4.3, -27.3]])
对于
1D
阵列,没有可用的方法。对于这种情况,我们只需要获取屏蔽元素并设置它们的孤岛长度,然后将其提供给该函数

因此,一种矢量化方法是-

# https://stackoverflow.com/a/49179628/  by @Divakar
def intervaled_cumsum(ar, sizes):
    # Make a copy to be used as output array
    out = ar.copy()

    # Get cumumlative values of array
    arc = ar.cumsum()

    # Get cumsumed indices to be used to place differentiated values into
    # input array's copy
    idx = sizes.cumsum()

    # Place differentiated values that when cumumlatively summed later on would
    # give us the desired intervaled cumsum
    out[idx[0]] = ar[idx[0]] - arc[idx[0]-1]
    out[idx[1:-1]] = ar[idx[1:-1]] - np.diff(arc[idx[:-1]-1])
    return out.cumsum()  

def intervaled_cumsum_masked_rowwise(a, mask):
    z = np.zeros((mask.shape[0],1), dtype=bool)
    maskz = np.hstack((z,mask,z))

    out = np.zeros_like(a)
    sizes = np.diff(np.flatnonzero(maskz[:,1:] != maskz[:,:-1]))[::2]
    out[mask] = intervaled_cumsum(a[mask], sizes)
    return out  
样本运行-

In [95]: a
Out[95]: 
array([[ 0. ,  1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ],
       [ 6. ,  7. ,  8. ,  9. , 10. ,  4.2,  4.3, 11. ],
       [12. , 13. , 14. , 15. , 16. ,  4.2,  4.3, 17. ],
       [18. , 19. , 20. , 21. , 22. ,  4.2,  4.3, 23. ]])

In [96]: mask
Out[96]: 
array([[False,  True,  True,  True, False,  True,  True, False],
       [False, False, False,  True,  True,  True,  True, False],
       [False, False,  True,  True, False, False, False, False],
       [False,  True,  True, False, False, False,  True,  True]])

In [97]: intervaled_cumsum_masked_rowwise(a, mask)
Out[97]: 
array([[ 0. ,  1. ,  3. ,  6. ,  0. ,  5. , 11. ,  0. ],
       [ 0. ,  0. ,  0. ,  9. , 19. , 23.2, 27.5,  0. ],
       [ 0. ,  0. , 14. , 29. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. , 19. , 39. ,  0. ,  0. ,  0. ,  4.3, 27.3]])
对于负数也同样有效-

In [109]: a = -a

In [110]: a
Out[110]: 
array([[ -0. ,  -1. ,  -2. ,  -3. ,  -4. ,  -5. ,  -6. ,  -7. ],
       [ -6. ,  -7. ,  -8. ,  -9. , -10. ,  -4.2,  -4.3, -11. ],
       [-12. , -13. , -14. , -15. , -16. ,  -4.2,  -4.3, -17. ],
       [-18. , -19. , -20. , -21. , -22. ,  -4.2,  -4.3, -23. ]])

In [111]: intervaled_cumsum_masked_rowwise(a, mask)
Out[111]: 
array([[  0. ,  -1. ,  -3. ,  -6. ,   0. ,  -5. , -11. ,   0. ],
       [  0. ,   0. ,   0. ,  -9. , -19. , -23.2, -27.5,   0. ],
       [  0. ,   0. , -14. , -29. ,   0. ,   0. ,   0. ,   0. ],
       [  0. , -19. , -39. ,   0. ,   0. ,   0. ,  -4.3, -27.3]])

这是一种比@Divakar's和@filippo's慢一点的方法,但更健壮。“全局汇总”方法的问题在于,它们可能会失去重要性,见下文:

import numpy as np
from scipy import linalg

def cumsums(data, mask, break_lines=True):
    dr = data[mask]
    if break_lines:
        msk = mask.copy()
        msk[:, 0] = False
        mr = msk.ravel()[1:][mask.ravel()[:-1]][:dr.size-1]
    else:
        mr = mask.ravel()[1:][mask.ravel()[:-1]][:dr.size-1]
    D = np.empty((2, dr.size))
    D.T[...] = 1, 0
    D[1, :-1] -= mr
    out = np.zeros_like(data)
    out[mask] = linalg.solve_banded((1, 0), D, dr)
    return out

def f_staircase(a, m):
    return np.cumsum(a, axis=1) - np.maximum.accumulate(np.cumsum(a, axis=1)*~m, axis=1)

# https://stackoverflow.com/a/49179628/  by @Divakar
def intervaled_cumsum(ar, sizes):
    # Make a copy to be used as output array
    out = ar.copy()

    # Get cumumlative values of array
    arc = ar.cumsum()

    # Get cumsumed indices to be used to place differentiated values into
    # input array's copy
    idx = sizes.cumsum()

    # Place differentiated values that when cumumlatively summed later on would
    # give us the desired intervaled cumsum
    out[idx[0]] = ar[idx[0]] - arc[idx[0]-1]
    out[idx[1:-1]] = ar[idx[1:-1]] - np.diff(arc[idx[:-1]-1])
    return out.cumsum()  

def intervaled_cumsum_masked_rowwise(a, mask):
    z = np.zeros((mask.shape[0],1), dtype=bool)
    maskz = np.hstack((z,mask,z))

    out = np.zeros_like(a)
    sizes = np.diff(np.flatnonzero(maskz[:,1:] != maskz[:,:-1]))[::2]
    out[mask] = intervaled_cumsum(a[mask], sizes)
    return out  

data = np.array([[  0.,   1.,   2.,   3.,   4.,   5. ,   6. ,   7.],
                 [  6.,   7.,   8.,   9.,  10.,   4.2,   4.3,  11.],
                 [ 12.,  13.,  14.,  15.,  16.,   4.2,   4.3,  17.],
                 [ 18.,  19.,  20.,  21.,  22.,   4.2,   4.3,  23.]])

mask = np.array([[False, True,  True,  True, False, True, True , False],
                 [False, False, False, True,  True, True, True , False],
                 [False, False, True, True, False, False, False, False],
                 [False, True, True, False, False, False, True , True ]])

from timeit import timeit

print('fast?')
print('filippo', timeit(lambda: f_staircase(data, mask), number=1000))
print('pp     ', timeit(lambda: cumsums(data, mask), number=1000))
print('divakar', timeit(lambda: intervaled_cumsum_masked_rowwise(data, mask), number=1000))

data = np.random.uniform(-10, 10, (5000, 5000))
mask = np.random.random((5000, 5000)) < 0.125
mask[:, 1:] |= mask[:, :-1]
mask[:, 2:] |= mask[:, :-2]

print()
print('fast on large data?')
print('filippo', timeit(lambda: f_staircase(data, mask), number=3))
print('pp     ', timeit(lambda: cumsums(data, mask), number=3))
print('divakar', timeit(lambda: intervaled_cumsum_masked_rowwise(data, mask), number=3))

data = np.random.uniform(-10, 10, (10000, 10000))
mask = np.random.random((10000, 10000)) < 0.025
mask[:, 1:] |= mask[:, :-1]
mask[:, 2:] |= mask[:, :-2]

print()
print('fast on large sparse data?')
print('filippo', timeit(lambda: f_staircase(data, mask), number=3))
print('pp     ', timeit(lambda: cumsums(data, mask), number=3))
print('divakar', timeit(lambda: intervaled_cumsum_masked_rowwise(data, mask), number=3))

data = np.exp(-np.linspace(-24, 24, 100))[None]
mask = (np.arange(100) % 4).astype(bool)[None]

print()
print('numerically sound?')
print('correct', data[0, -3:].sum())
print('filippo', f_staircase(data, mask)[0,-1]) 
print('pp     ', cumsums(data, mask)[0,-1])
print('divakar', intervaled_cumsum_masked_rowwise(data, mask)[0,-1])

我们看到,在指数下降的例子中,基于累积和的方法不起作用。显然,这是一个工程示例,但它展示了一个真正的问题。

我在前面的问题中看到的基本思想是基于
False
元素处的
cumsum
构建楼梯阵列。从常规的
cumsum
中减去该值,得到所需的锯齿和。第一行是
x.cumsum()-np.array([[0,0,0,10,10,28])
。当你在
巨型二维数组
中提到巨型时,通常你希望有多少行,每行的长度是多少,即列数?我非常简单地喜欢这个答案,但实际数据中有一个负值。@weidong为了完整性,我更新了答案来处理负数据,不是那么简单和以前一样,但仍然紧凑且快速。感谢您的测试和指点。恐怕我认为我需要更高的速度。但我会保留并学习您的代码,以便在适当的时候使用它。我发现,当沿轴计算axis=0时,速度会非常慢。
fast?
filippo 0.008435532916337252
pp      0.07329772273078561
divakar 0.0336935929954052

fast on large data?
filippo 1.6037923698313534
pp      3.982803522143513
divakar 1.706403402145952

fast on large sparse data?
filippo 6.11361704999581
pp      4.717669038102031
divakar 2.9474888620898128

numerically sound?
correct 1.9861262739950047e-10
filippo 0.0
pp      1.9861262739950047e-10
divakar 9.737630365237156e-06