Python 使用值作为索引沿新维度折叠numpy数组
我有一个Python 使用值作为索引沿新维度折叠numpy数组,python,numpy,Python,Numpy,我有一个[m,m]numpy数组,元素在{0,1,2,…,24},现在我想在三维空间中分离每个数字,得到一个[m,m,24]数组 一个简单的例子,[5,5]数组,元素在{0,1,2,3} [0 0 1 0 0 2 0 3 0 1 0 2 3 1 0 0 0 1 0 0 1 0 2 0 1] 现在我需要一个“[5,5,3]”数组 [[0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1] [0 0 0 0 0 2 0 0
[m,m]
numpy数组,元素在{0,1,2,…,24}
,现在我想在三维空间中分离每个数字,得到一个[m,m,24]
数组
一个简单的例子,[5,5]
数组,元素在{0,1,2,3}
[0 0 1 0 0
2 0 3 0 1
0 2 3 1 0
0 0 1 0 0
1 0 2 0 1]
现在我需要一个“[5,5,3]”数组
[[0 0 1 0 0
0 0 0 0 1
0 0 0 1 0
0 0 1 0 0
1 0 0 0 1]
[0 0 0 0 0
2 0 0 0 0
0 2 0 0 0
0 0 0 0 0
0 0 2 0 0]
[0 0 0 0 0
0 0 3 0 0
0 0 3 0 0
0 0 0 0 0
0 0 0 0 0]]
目前我有一个简单的方法,但它的计算非常昂贵。因为我需要经常做这个手术
img = np.expand_dims(img, axis=2)
for i in range(24):
img_norm[..., i] = (img[..., 0] == (i + np.ones(shape=img[..., 0].shape)))
对于大小为[224224]
且元素位于{0,1,2,…,24}
中的64
数组,上面的代码大约需要5s
有没有更快的方法呢?以下内容对我来说非常快:
import numpy as np
max_num = 3
img = np.array([
[0,0,1,0,0],
[2,0,3,0,1],
[0,2,3,1,0],
[0,0,1,0,0],
[1,0,2,0,1],
])
img_norm = np.zeros(img.shape + (max_num,))
for idx in range(1, max_num + 1):
img_norm[idx-1,:,:]=idx*(img == idx)
使用指定大小的随机数组进行测试
max_num = 24
img = np.int64((max_num+1)*np.random.rand(224, 224)) # Random array
img_norm = np.zeros(img.shape + (max_num,))
for idx in range(1, max_num + 1):
img_norm[idx-1,:,:]=img*(img == idx)
在我的机器上几乎不花时间
def getnorm_acdr(img):
max_num = np.max(img)
img_norm = np.zeros([max_num, *img.shape])
for idx in range(1, max_num + 1):
img_norm[idx-1,:,:]=img*(img == idx)
img = np.int64((max_num+1)*np.random.rand(224, 224))
%timeit getnorm_acdr(img)
给出:
11.9 ms ± 536 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
以下内容对我来说非常迅速:
import numpy as np
max_num = 3
img = np.array([
[0,0,1,0,0],
[2,0,3,0,1],
[0,2,3,1,0],
[0,0,1,0,0],
[1,0,2,0,1],
])
img_norm = np.zeros(img.shape + (max_num,))
for idx in range(1, max_num + 1):
img_norm[idx-1,:,:]=idx*(img == idx)
使用指定大小的随机数组进行测试
max_num = 24
img = np.int64((max_num+1)*np.random.rand(224, 224)) # Random array
img_norm = np.zeros(img.shape + (max_num,))
for idx in range(1, max_num + 1):
img_norm[idx-1,:,:]=img*(img == idx)
在我的机器上几乎不花时间
def getnorm_acdr(img):
max_num = np.max(img)
img_norm = np.zeros([max_num, *img.shape])
for idx in range(1, max_num + 1):
img_norm[idx-1,:,:]=img*(img == idx)
img = np.int64((max_num+1)*np.random.rand(224, 224))
%timeit getnorm_acdr(img)
给出:
11.9 ms ± 536 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
绝对更优雅:使用
np.ndenumerate()
看起来这应该比你的要快,因为是O(N^2)而不是O(N^3)。让我们在一个数组上试用,其大小和内容如您所述:
def getnorm_ndenumerate(img):
img_norm = np.zeros([np.max(img), *img.shape])
for (i,j), val in np.ndenumerate(img):
img_norm[val-1,i,j] = val
return img_norm
b = np.int64(25*np.random.rand(224, 224))
%timeit getnorm_ndenumerate(b)
给予
它确实比你的快。但是优雅是要付出代价的,因为它比其他东西慢 绝对更优雅:使用
np.ndenumerate()
看起来这应该比你的要快,因为是O(N^2)而不是O(N^3)。让我们在一个数组上试用,其大小和内容如您所述:
def getnorm_ndenumerate(img):
img_norm = np.zeros([np.max(img), *img.shape])
for (i,j), val in np.ndenumerate(img):
img_norm[val-1,i,j] = val
return img_norm
b = np.int64(25*np.random.rand(224, 224))
%timeit getnorm_ndenumerate(b)
给予
它确实比你的快。但是优雅是要付出代价的,因为它比其他东西慢 我犯了一个错误,在输出数组中,所有非零都应该是1。对不起,我犯了个愚蠢的错误 谢谢你的帮助。我测试了上述三种方法,包括来自
Jean François Corbett
,acdr
+Jean François Corbett
和我的代码。
事实证明,来自acdr
+Jean-François Corbett
的方法是最快的
这是我的测试代码
def test_time():
def func1(img, max_num):
w, h = img.shape
img_norm = np.zeros([w, h, max_num], np.float32)
for (i, j), val in np.ndenumerate(img):
# img_norm[i, j, val - 1] = val
img_norm[i, j, val - 1] = 0 if val == 0 else 1
return img_norm
def func2(img, max_num):
w, h = img.shape
img_norm = np.zeros([w, h, max_num], np.float32)
for idx in range(1, max_num + 1):
# img_norm[:, :, idx - 1] = idx*(img == idx)
img_norm[:, :, idx - 1] = (img == idx)
return img_norm
def func3(img, max_num):
w, h = img.shape
img_norm = np.zeros([w, h, max_num], np.float32)
for idx in range(max_num):
# img_norm[:, :, idx] = (idx+1) * (img[:, :, 0] == (idx + np.ones(shape=img[:, :, 0].shape)))
img_norm[:, :, idx] = (img == (idx + np.ones(shape=img.shape)))
return img_norm
import cv2
img_tmp = cv2.imread('dat.png', cv2.IMREAD_UNCHANGED)
img_tmp = np.asarray(img_tmp, np.int)
# img_tmp = np.array([
# [0, 0, 1, 0, 0],
# [2, 0, 3, 0, 1],
# [0, 2, 3, 1, 0],
# [0, 0, 1, 0, 0],
# [1, 0, 2, 0, 1],
# ])
img_bkp = np.array(img_tmp, copy=True)
print(img_bkp.shape)
import time
cnt = 100
maxnum = 24
start_time = time.time()
for i in range(cnt):
_ = func1(img_tmp, maxnum)
print('1 total time =', time.time() - start_time)
start_time = time.time()
for i in range(cnt):
_ = func2(img_tmp, maxnum)
print('2 total time =', time.time() - start_time)
start_time = time.time()
for i in range(cnt):
_ = func3(img_tmp, maxnum)
print('3 total time =', time.time() - start_time)
print((img_tmp == img_bkp).all())
img1 = func1(img_tmp, maxnum)
img2 = func2(img_tmp, maxnum)
img3 = func3(img_tmp, maxnum)
print(img1.shape, img2.shape, img3.shape)
print((img1 == img2).all())
print((img2 == img3).all())
print((img1 == img3).all())
# print(type(img1[0, 0, 0]), type(img2[0, 0, 0]), type(img3[0, 0, 0]))
# print('img1\n', img1[:, :, 2])
# print('img3\n', img3[:, :, 2])
输出是
(224, 224)
1 total time = 4.738261938095093
2 total time = 0.7725710868835449
3 total time = 1.5980615615844727
True
(224, 224, 24) (224, 224, 24) (224, 224, 24)
True
True
True
如果有任何问题,请发表评论。
谢谢你的帮助 我犯了一个错误,在输出数组中,所有非零都应该是1。对不起,我犯了个愚蠢的错误 谢谢你的帮助。我测试了上述三种方法,包括来自
Jean François Corbett
,acdr
+Jean François Corbett
和我的代码。
事实证明,来自acdr
+Jean-François Corbett
的方法是最快的
这是我的测试代码
def test_time():
def func1(img, max_num):
w, h = img.shape
img_norm = np.zeros([w, h, max_num], np.float32)
for (i, j), val in np.ndenumerate(img):
# img_norm[i, j, val - 1] = val
img_norm[i, j, val - 1] = 0 if val == 0 else 1
return img_norm
def func2(img, max_num):
w, h = img.shape
img_norm = np.zeros([w, h, max_num], np.float32)
for idx in range(1, max_num + 1):
# img_norm[:, :, idx - 1] = idx*(img == idx)
img_norm[:, :, idx - 1] = (img == idx)
return img_norm
def func3(img, max_num):
w, h = img.shape
img_norm = np.zeros([w, h, max_num], np.float32)
for idx in range(max_num):
# img_norm[:, :, idx] = (idx+1) * (img[:, :, 0] == (idx + np.ones(shape=img[:, :, 0].shape)))
img_norm[:, :, idx] = (img == (idx + np.ones(shape=img.shape)))
return img_norm
import cv2
img_tmp = cv2.imread('dat.png', cv2.IMREAD_UNCHANGED)
img_tmp = np.asarray(img_tmp, np.int)
# img_tmp = np.array([
# [0, 0, 1, 0, 0],
# [2, 0, 3, 0, 1],
# [0, 2, 3, 1, 0],
# [0, 0, 1, 0, 0],
# [1, 0, 2, 0, 1],
# ])
img_bkp = np.array(img_tmp, copy=True)
print(img_bkp.shape)
import time
cnt = 100
maxnum = 24
start_time = time.time()
for i in range(cnt):
_ = func1(img_tmp, maxnum)
print('1 total time =', time.time() - start_time)
start_time = time.time()
for i in range(cnt):
_ = func2(img_tmp, maxnum)
print('2 total time =', time.time() - start_time)
start_time = time.time()
for i in range(cnt):
_ = func3(img_tmp, maxnum)
print('3 total time =', time.time() - start_time)
print((img_tmp == img_bkp).all())
img1 = func1(img_tmp, maxnum)
img2 = func2(img_tmp, maxnum)
img3 = func3(img_tmp, maxnum)
print(img1.shape, img2.shape, img3.shape)
print((img1 == img2).all())
print((img2 == img3).all())
print((img1 == img3).all())
# print(type(img1[0, 0, 0]), type(img2[0, 0, 0]), type(img3[0, 0, 0]))
# print('img1\n', img1[:, :, 2])
# print('img3\n', img3[:, :, 2])
输出是
(224, 224)
1 total time = 4.738261938095093
2 total time = 0.7725710868835449
3 total time = 1.5980615615844727
True
(224, 224, 24) (224, 224, 24) (224, 224, 24)
True
True
True
如果有任何问题,请发表评论。
谢谢你的帮助 您的示例中的
0
s发生了什么变化?您似乎只匹配1、2和3。您能为我们提供一种测试大型数据集的方法吗?我不知道为什么这个问题会被否决。很好。想要一个测试数据集吗?一个班轮一个!请参阅到目前为止的答案以获取灵感。@9769953 OP不关心零。@Jean-FrançoisCorbett我看不到问题中所述的内容:它在示例中明确提到元素{0,1,2,3}
,实际数据也类似。如果OP能澄清这一点就好了。在您的示例中,0
s发生了什么?您似乎只匹配1、2和3。您能为我们提供一种测试大型数据集的方法吗?我不知道为什么这个问题会被否决。很好。想要一个测试数据集吗?一个班轮一个!请参阅到目前为止的答案以获取灵感。@9769953 OP不关心零。@Jean-FrançoisCorbett我看不到问题中所述的内容:它在示例中明确提到元素{0,1,2,3}
,实际数据也类似。如果OP能澄清这一点就好了。