Python 三维数据挖掘性能
我有一些三维位置的数据Python 三维数据挖掘性能,python,performance,Python,Performance,我有一些三维位置的数据 # max size of grid (x, y, z) grid_size = (999, 999, 99) class MyObject(object): def __init__(self, id): self.id = id self.trace = [] [...] # objects have some positions in their "trace" print(myobject1.trace) [(65, 128, 12), (
# max size of grid (x, y, z)
grid_size = (999, 999, 99)
class MyObject(object):
def __init__(self, id):
self.id = id
self.trace = []
[...]
# objects have some positions in their "trace"
print(myobject1.trace)
[(65, 128, 12), (66, 128, 12), (66, 129, 12)]
print(myobject2.trace)
[(456, 255, 75), (456, 254, 75), (456, 254, 74)]
我需要创建一个包含所有这些对象位置的地图。目标是找到在该映射中查找对象的最佳性能方法。例如,我有一个X坐标的列表:在这些坐标中发送的对象是什么
所以我想到了四个策略:
带字符串键的一维dict:
{'65.128.12':myobject1, '66.128.12':myobject1, '66.129.12':myobject1,
'456.255.75':myobject2, '456.254.75':myobject2, '456.254.74':myobject2}
def find_in_str_map(search_points, map_str):
found_objects = []
for trace_point in search_points:
key = str(trace_point[0])+'.'+str(trace_point[1])+'.'+str(trace_point[2])
if key in map_str:
if map_str[key].id != myobject.id:
found_objects.append(map_str[key])
return found_objects
{6512812:myobject1, 6612812:myobject1, 6612912:myobject1,
45625575:myobject2, 45625475:myobject2, 45625474:myobject2}
def find_in_int_map(search_points, map_str):
found_myobjects = []
for trace_point in search_points:
key = trace_point[0]*100000+trace_point[1]*100+trace_point[2]
if key in map_str:
if map_str[key].id != myobject.id:
found_myobjects.append(map_str[key])
return found_myobjects
{(65, 128, 12):myobject1, (66, 128, 12):myobject1, (66, 129, 12):myobject1,
(456, 255, 75):myobject2, (456, 254, 75):myobject2, (456, 254, 74):myobject2}
def find_in_tuple_map(search_points, map):
found_myobjects = []
for trace_point in search_points:
if trace_point in map:
if map[trace_point].id != myobject.id:
found_objects.append(map[trace_point])
return found_objects
('str', 8.213999032974243)
('int', 5.6337010860443115)
('3d ', 6.18729305267334)
('tup', 5.0934319496154785)
str 10.11169655699996
int 5.984578157000215
3d 6.448565245998907
tup 5.139268291999542
带int键的一维dict:
{'65.128.12':myobject1, '66.128.12':myobject1, '66.129.12':myobject1,
'456.255.75':myobject2, '456.254.75':myobject2, '456.254.74':myobject2}
def find_in_str_map(search_points, map_str):
found_objects = []
for trace_point in search_points:
key = str(trace_point[0])+'.'+str(trace_point[1])+'.'+str(trace_point[2])
if key in map_str:
if map_str[key].id != myobject.id:
found_objects.append(map_str[key])
return found_objects
{6512812:myobject1, 6612812:myobject1, 6612912:myobject1,
45625575:myobject2, 45625475:myobject2, 45625474:myobject2}
def find_in_int_map(search_points, map_str):
found_myobjects = []
for trace_point in search_points:
key = trace_point[0]*100000+trace_point[1]*100+trace_point[2]
if key in map_str:
if map_str[key].id != myobject.id:
found_myobjects.append(map_str[key])
return found_myobjects
{(65, 128, 12):myobject1, (66, 128, 12):myobject1, (66, 129, 12):myobject1,
(456, 255, 75):myobject2, (456, 254, 75):myobject2, (456, 254, 74):myobject2}
def find_in_tuple_map(search_points, map):
found_myobjects = []
for trace_point in search_points:
if trace_point in map:
if map[trace_point].id != myobject.id:
found_objects.append(map[trace_point])
return found_objects
('str', 8.213999032974243)
('int', 5.6337010860443115)
('3d ', 6.18729305267334)
('tup', 5.0934319496154785)
str 10.11169655699996
int 5.984578157000215
3d 6.448565245998907
tup 5.139268291999542
具有元组(coordonate)键的一维dict:
{'65.128.12':myobject1, '66.128.12':myobject1, '66.129.12':myobject1,
'456.255.75':myobject2, '456.254.75':myobject2, '456.254.74':myobject2}
def find_in_str_map(search_points, map_str):
found_objects = []
for trace_point in search_points:
key = str(trace_point[0])+'.'+str(trace_point[1])+'.'+str(trace_point[2])
if key in map_str:
if map_str[key].id != myobject.id:
found_objects.append(map_str[key])
return found_objects
{6512812:myobject1, 6612812:myobject1, 6612912:myobject1,
45625575:myobject2, 45625475:myobject2, 45625474:myobject2}
def find_in_int_map(search_points, map_str):
found_myobjects = []
for trace_point in search_points:
key = trace_point[0]*100000+trace_point[1]*100+trace_point[2]
if key in map_str:
if map_str[key].id != myobject.id:
found_myobjects.append(map_str[key])
return found_myobjects
{(65, 128, 12):myobject1, (66, 128, 12):myobject1, (66, 129, 12):myobject1,
(456, 255, 75):myobject2, (456, 254, 75):myobject2, (456, 254, 74):myobject2}
def find_in_tuple_map(search_points, map):
found_myobjects = []
for trace_point in search_points:
if trace_point in map:
if map[trace_point].id != myobject.id:
found_objects.append(map[trace_point])
return found_objects
('str', 8.213999032974243)
('int', 5.6337010860443115)
('3d ', 6.18729305267334)
('tup', 5.0934319496154785)
str 10.11169655699996
int 5.984578157000215
3d 6.448565245998907
tup 5.139268291999542
三维dict
{456: {254: {74: myobject2, 75: myobject2}, 255: {75: myobject2}}, 65: {128: {12: myobject1}}, 66: {128: {12: myobject1}, 129: {12: myobject1}}}
def find_in_3d_map(search_points, map):
founds_myobjects = []
for trace_point in search_points:
x = trace_point[0]
y = trace_point[1]
z = trace_point[2]
if x in map:
if y in map[x]:
if z in map[x][y]:
founds_myobjects.append(map[x][y][z])
return founds_myobjects
因此,我使用timeit(和大量对象)测试这些策略的性能:
(此处的可测试代码:)
结果如下:
蟒蛇2.7:
{'65.128.12':myobject1, '66.128.12':myobject1, '66.129.12':myobject1,
'456.255.75':myobject2, '456.254.75':myobject2, '456.254.74':myobject2}
def find_in_str_map(search_points, map_str):
found_objects = []
for trace_point in search_points:
key = str(trace_point[0])+'.'+str(trace_point[1])+'.'+str(trace_point[2])
if key in map_str:
if map_str[key].id != myobject.id:
found_objects.append(map_str[key])
return found_objects
{6512812:myobject1, 6612812:myobject1, 6612912:myobject1,
45625575:myobject2, 45625475:myobject2, 45625474:myobject2}
def find_in_int_map(search_points, map_str):
found_myobjects = []
for trace_point in search_points:
key = trace_point[0]*100000+trace_point[1]*100+trace_point[2]
if key in map_str:
if map_str[key].id != myobject.id:
found_myobjects.append(map_str[key])
return found_myobjects
{(65, 128, 12):myobject1, (66, 128, 12):myobject1, (66, 129, 12):myobject1,
(456, 255, 75):myobject2, (456, 254, 75):myobject2, (456, 254, 74):myobject2}
def find_in_tuple_map(search_points, map):
found_myobjects = []
for trace_point in search_points:
if trace_point in map:
if map[trace_point].id != myobject.id:
found_objects.append(map[trace_point])
return found_objects
('str', 8.213999032974243)
('int', 5.6337010860443115)
('3d ', 6.18729305267334)
('tup', 5.0934319496154785)
str 10.11169655699996
int 5.984578157000215
3d 6.448565245998907
tup 5.139268291999542
蟒蛇3.3:
{'65.128.12':myobject1, '66.128.12':myobject1, '66.129.12':myobject1,
'456.255.75':myobject2, '456.254.75':myobject2, '456.254.74':myobject2}
def find_in_str_map(search_points, map_str):
found_objects = []
for trace_point in search_points:
key = str(trace_point[0])+'.'+str(trace_point[1])+'.'+str(trace_point[2])
if key in map_str:
if map_str[key].id != myobject.id:
found_objects.append(map_str[key])
return found_objects
{6512812:myobject1, 6612812:myobject1, 6612912:myobject1,
45625575:myobject2, 45625475:myobject2, 45625474:myobject2}
def find_in_int_map(search_points, map_str):
found_myobjects = []
for trace_point in search_points:
key = trace_point[0]*100000+trace_point[1]*100+trace_point[2]
if key in map_str:
if map_str[key].id != myobject.id:
found_myobjects.append(map_str[key])
return found_myobjects
{(65, 128, 12):myobject1, (66, 128, 12):myobject1, (66, 129, 12):myobject1,
(456, 255, 75):myobject2, (456, 254, 75):myobject2, (456, 254, 74):myobject2}
def find_in_tuple_map(search_points, map):
found_myobjects = []
for trace_point in search_points:
if trace_point in map:
if map[trace_point].id != myobject.id:
found_objects.append(map[trace_point])
return found_objects
('str', 8.213999032974243)
('int', 5.6337010860443115)
('3d ', 6.18729305267334)
('tup', 5.0934319496154785)
str 10.11169655699996
int 5.984578157000215
3d 6.448565245998907
tup 5.139268291999542
是否存在其他在3d坐标地图集合中储存和挖掘的策略?我的3个演示策略是可优化的?最简单的方法是使用坐标元组作为地图的键
{(65,128,12):myobject1, (66,128,12):myobject1, (66,129,12):myobject1,
(456,255,75):myobject2, (456,254,75):myobject2, (456,254,74):myobject2}
def find_collisions_tuple_map(bugs, map):
collisions_bugs = []
for bug in bugs:
for trace_point in bug.get_possibles_future_trace_point():
if trace_point in map:
collisions_bugs.append(map[trace_point])
return collisions_bugs
在我的电脑上,它稍微快一点
('str', 10.188277582443057)
('int', 7.133011876243648)
('3d ', 7.486879201843017)
('tuple ', 6.406966607422291)
您的跟踪点是
tuple
s,那么为什么不直接将这些tuple用作键呢?简单多了,而且似乎也快了一点。我忘了测试它,arf。我将其添加到测试策略中