在Python中获取相差N或更多的最小坐标
假设我有一个坐标列表:在Python中获取相差N或更多的最小坐标,python,database,numpy,scipy,Python,Database,Numpy,Scipy,假设我有一个坐标列表: data = [ [(10, 20), (100, 120), (0, 5), (50, 60)], [(13, 20), (300, 400), (100, 120), (51, 62)] ] 我想取数据中每个列表中出现的所有元组,或者与列表中除自身元组以外的所有元组相差3或更少的任何元组。如何在Python中高效地执行此操作 对于上述示例,结果应为: [[(100, 120), # since it occurs in both lists (1
data = [
[(10, 20), (100, 120), (0, 5), (50, 60)],
[(13, 20), (300, 400), (100, 120), (51, 62)]
]
我想取数据中每个列表中出现的所有元组,或者与列表中除自身元组以外的所有元组相差3或更少的任何元组。如何在Python中高效地执行此操作
对于上述示例,结果应为:
[[(100, 120), # since it occurs in both lists
(10, 20), (13, 20), # since they differ by only 3
(50, 60), (51, 60)]]
(0,5)和(300,400)将不包括在内,因为它们不会出现在两个列表中,并且与列表中的元素没有3或更少的差异
如何计算?谢谢 我希望这能让你开始。如有任何改进,将不胜感激 “出现在所有列表中”很简单-只需获取列表中所有元素的交集即可
>>> data = [
... [(10, 20), (100, 120), (0, 5), (50, 60)],
... [(13, 20), (300, 400), (100, 120), (51, 62)]
... ]
>>> dataset = [set(d) for d in data]
>>> dataset[0].intersection(*dataset[1:])
set([(100, 120)])
对于同一列表中的元组以外的元组,“3或更少的不同”在我看来是一个图形/二维空间问题。没有多项式算法就没有简单的算法,如果你的数据集不是很大,你可以迭代它们并将不在同一个列表中的闭合点分组。一个简单的实现会很慢:O(n^2),针对每个节点对另一个节点进行测试。用一棵树来加速 这个实现使用一个简单的四叉树来提高搜索效率。这不会试图平衡树,因此点列表的顺序不正确可能会使其效率非常低。对于很多用途来说,简单地洗牌列表可能就足够了;只需确保不要将大量按坐标排序的项目传递给它,因为这样会将其减少为链接列表 这里的优化很简单:如果我们在某个点的欧氏距离为3个单位的范围内寻找项目,并且我们知道子树中的所有项目都至少向右3个单位,那么该区域中的任何点都不可能小于3个单位 此代码是公共域。尽量不要把它作为家庭作业交上来
#!/usr/bin/python
import math
def euclidean_distance(pos1, pos2):
x = math.pow(pos1[0] - pos2[0], 2)
y = math.pow(pos1[1] - pos2[1], 2)
return math.sqrt(x + y)
class QuadTreeNode(object):
def __init__(self, pos):
"""
Create a QuadTreeNode at the specified position. pos must be an (x, y) tuple.
Children are classified by quadrant.
"""
# Children of this node are ordered TL, TR, BL, BL (origin top-left).
self.children = [None, None, None, None]
self.pos = pos
def classify_node(self, pos):
"""
Return which entry in children can contain pos. If pos is equal to this
node, return None.
>>> node = QuadTreeNode((10, 20))
>>> node.classify_node((10, 20)) == None
True
>>> node.classify_node((2, 2))
0
>>> node.classify_node((50, 2))
1
>>> node.classify_node((2, 50))
2
>>> node.classify_node((50, 50))
3
X boundary condition:
>>> node.classify_node((10, 2))
0
>>> node.classify_node((10, 50))
2
Y boundary conditoin:
>>> node.classify_node((2, 20))
0
>>> node.classify_node((50, 20))
1
"""
if pos == self.pos:
return None
if pos[0] <= self.pos[0]: # Left
if pos[1] <= self.pos[1]: # Top-left
return 0
else: # Bottom-left
return 2
else: # Right
if pos[1] <= self.pos[1]: # Top-right
return 1
else: # Bottom-right
return 3
assert False, "not reached"
def add_node(self, node):
"""
Add a specified point under this node.
"""
type = self.classify_node(node.pos)
if type is None:
# node is equal to self, so this is a duplicate node. Ignore it.
return
if self.children[type] is None:
self.children[type] = node
else:
# We already have a node there; recurse and add it to the child.
self.children[type].add_node(node)
@staticmethod
def CreateQuadTree(data):
"""
Create a quad tree from the specified list of points.
"""
root = QuadTreeNode(data[0])
for val in data[1:]:
node = QuadTreeNode(val)
root.add_node(node)
return root
def distance_from_pos(self, pos):
return euclidean_distance(self.pos, pos)
def __str__(self): return str(self.pos)
def find_point_within_range(self, pos, distance):
"""
If a point exists within the specified Euclidean distance of the specified
point, return it. Otherwise, return None.
"""
if self.distance_from_pos(pos) <= distance:
return self
for axis in range(0, 4):
if self.children[axis] is None:
# We don't have a node on this axis.
continue
# If moving forward on this axis would permanently put us out of range of
# the point, short circuit the search on that axis.
if axis in (0, 2): # axis moves left on X
if self.pos[0] < pos[0] - distance:
continue
if axis in (1, 3): # axis moves right on X
if self.pos[0] > pos[0] + distance:
continue
if axis in (0, 1): # axis moves up on Y
if self.pos[1] < pos[1] - distance:
continue
if axis in (2, 3): # axis moves down on Y
if self.pos[1] > pos[1] + distance:
continue
node = self.children[axis].find_point_within_range(pos, distance)
if node is not None:
return node
return None
@staticmethod
def find_point_in_range_for_all_trees(point, trees, distance):
"""
If all QuadTreeNodes in trees contain a a point within the specified distance
of point, return True, Otherwise, return False.
"""
for tree in trees:
if tree.find_point_within_range(point, distance) is None:
return False
return True
def test_naive(data, distance):
def find_point_in_list(iter, point):
for i in iter:
if euclidean_distance(i, point) <= distance:
return True
return False
def find_point_in_all_lists(point):
for d in data:
if not find_point_in_list(d, point):
return False
return True
results = []
for d in data:
for point in d:
if find_point_in_all_lists(point):
results.append(point)
return set(results)
def test_tree(data, distance):
trees = [QuadTreeNode.CreateQuadTree(d) for d in data]
results = []
for d in data:
for point in d:
if QuadTreeNode.find_point_in_range_for_all_trees(point, trees, 3):
results.append(point)
return set(results)
def test():
sample_data = [
[(10, 20), (100, 120), (0, 5), (50, 60)],
[(13, 20), (300, 400), (100, 120), (51, 62)]
]
result1 = test_naive(sample_data, 3)
result2 = test_tree(sample_data, 3)
print result1
assert result1 == result2
# Loosely validate the tree algorithm against a lot of sample data, and compare
# performance while we're at it:
def random_data():
import random
return [(random.randint(0,1000), random.randint(0,1000)) for d in range(0,500)]
data = [random_data() for x in range(0,10)]
print "Searching (naive)..."
result1 = test_naive(data, 3)
print "Searching (tree)..."
result2 = test_tree(data, 3)
assert result1 == result2
if __name__ == "__main__":
test()
import doctest
doctest.testmod()
#/usr/bin/python
输入数学
def欧氏距离(pos1,pos2):
x=数学功率(位置1[0]-位置2[0],2)
y=数学功率(位置1[1]-位置2[1],2)
返回math.sqrt(x+y)
类QuadTreeNode(对象):
定义初始(自我,位置):
"""
在指定位置创建四元组。位置必须是(x,y)元组。
儿童按象限分类。
"""
#该节点的子节点顺序为TL、TR、BL、BL(原点左上角)。
self.children=[无,无,无,无]
self.pos=pos
def分类_节点(自身、pos):
"""
返回子项中可以包含pos的条目。如果pos等于此
节点,返回None。
>>>节点=QuadTreeNode((10,20))
>>>node.classify_node((10,20))==无
真的
>>>node.classify_node((2,2))
0
>>>node.classify_node((50,2))
1.
>>>node.classify_node((2,50))
2.
>>>node.classify_node((50,50))
3.
X边界条件:
>>>node.classify_node((10,2))
0
>>>node.classify_node((10,50))
2.
Y边界条件:
>>>node.classify_node((2,20))
0
>>>节点。分类_节点((50,20))
1.
"""
如果pos==self.pos:
一无所获
如果pos[0]@barrycarter的直觉很有趣:减少比较的次数(通过“比较”两点,我们的意思是检查它们的距离是否为,您想到了什么函数来计算两个元组的差并返回一个int?在我看来,它“与列表中除自身元组以外的所有元组相差3或更少”意味着(10,20)不能出现在结果中,因为它与(300,400)相差超过3。你的意思是说你应该包含一个项目,在另一个列表中可以找到三个以内的匹配项吗?在所有其他列表中??“坐标之间的差异”是没有意义的。你是说“坐标之间的欧几里德距离”吗?你是在问如何做到这一点,还是如何有效地做到这一点?以一种幼稚(缓慢)的方式做这件事很简单——只需迭代每个点并与所有其他点进行比较。@user248237:我可以问这个问题的用例是什么吗?我可能能够指出其他的路线。
import collections
import math
def cellof(point):
x, y = point
return x//3, y//3
def distance(p1, p2):
return math.hypot(p1[0]-p2[0], p1[1]-p2[1])
def process(data):
cells = collections.defaultdict(list)
for i, points in enumerate(data):
for p in points:
cx, cy = cellof(p)
cells[cx, cy].append((i, p))
res = set()
for c, alist in cells.items():
for i, p in alist:
for cx in range(c[0]-1, c[0]+2):
for cy in range(c[1]-1, c[1]+2):
otherc = cells[cx, cy]
for otheri, otherp in otherc:
if i == otheri: continue
dst = distance(p, otherp)
if dst <= 3: res.add(p)
return sorted(res)
if __name__ == '__main__': # just an example
data = [
[(10, 20), (100, 120), (0, 5), (50, 60)],
[(13, 20), (300, 400), (100, 120), (51, 62)]
]
print process(data)
[(10, 20), (13, 20), (50, 60), (51, 62), (100, 120)]