在Python字典中查找失败时查找最近的密钥对_Python_Dictionary

在Python字典中查找失败时查找最近的密钥对

python dictionary

在Python字典中查找失败时查找最近的密钥对,python,dictionary,Python,Dictionary,假设我有一个Python字典，其中键实际上是整数。我可以这样创建一个： >>> d = dict([(1, 0), (7, 10), (28, 20)]) >>> d {1: 0, 7: 10, 28: 20} 现在，我想做一个查找，如果找到了键，它的索引就会返回。这部分非常简单，如下所示： >>> key = 7 >>> d[key] 10 如果找不到键，那么我想返回键的绑定。例如： >>> key

假设我有一个Python字典，其中键实际上是整数。我可以这样创建一个：

>>> d = dict([(1, 0), (7, 10), (28, 20)])
>>> d
{1: 0, 7: 10, 28: 20}

现在，我想做一个查找，如果找到了键，它的索引就会返回。这部分非常简单，如下所示：

>>> key = 7
>>> d[key]
10

如果找不到键，那么我想返回键的绑定。例如：

>>> key = 6
>>> d[key]
Bound(1, 7)

因为6不是一个键，所以我返回它所在的两个键基本上不需要迭代整个词典就可以完成这样的事情吗？如果没有，那么这个问题就不需要回答了。如果这确实可行，请尽可能包括一些性能影响。谢谢。

这是一个使用函数访问普通dict的解决方案（我使用了

OrderedDict

，因为我现在这里有一个较旧版本的Python，如果您有Python 3.6或更高版本，您可以按顺序使用普通的

dict

）

我们按键对dict进行排序，这样可以快速找到周围的键

import bisect
from collections import OrderedDict

d = OrderedDict(sorted([(1, 0), (7, 10), (28, 20)])) # Could be a simple dict with Python 3.6+

class Bound:
    def __init__(self, a, b):
        self.a = a
        self.b = b

    def __repr__(self):
        return 'Bound({}, {})'.format(self.a, self.b)

def closest(key, d):
    try:
        return d[key]
    except KeyError:
        keys = list(d.keys())
        ins_point = bisect.bisect(keys, key)
        return Bound(keys[ins_point-1] if ins_point >= 1 else None,
                     keys[ins_point] if ins_point < len(keys) else None)

closest(7, d)
# 10

closest(8, d)
# Bound(7, 28)

closest(30, d)
# Bound(28, None)

closest(-1, d)
# Bound(None, 1)

具有自定义dict类的解决方案：

import bisect
import collections


class Bound:
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def __repr__(self):
        return 'Bound({}, {})'.format(self.left, self.right)


class MyDict(collections.defaultdict):
    def __init__(self, *args, **kwargs):
        super().__init__()
        dict.__init__(self, *args, **kwargs)
        self.lst = sorted(key for key in self)

    def __setitem__(self, key, value):
        if key not in self:
            bisect.insort_left(self.lst, key)
        super().__setitem__(key, value)

    def __delitem__(self, key):
        self.lst.remove(key)
        super().__delitem__(key)

    def __missing__(self, key):
        right_index = bisect.bisect(self.lst, key)
        left_index = right_index - 1
        right = self.lst[right_index] if right_index != len(self.lst) else None
        left = self.lst[left_index] if left_index != -1 else None
        return Bound(left, right)


d = MyDict([(1, 0), (7, 10), (28, 20)])
print(d[-3]) # Bound(None, 1)
print(d[6]) # Bound(1, 7)
print(d[7]) # 10
print(d[33]) # Bound(28, None)
del d[7]
print(d[6]) # Bound(1, 28)

（15、28）

对于这个用例，对元组列表进行二进制搜索可能会更好。覆盖

\uuuu missing\uuuu

的

defaultdict

的自定义子类，以返回

绑定的对象，而不是向dict
添加一个新项听起来像是您需要的。@chepner但是\uuuu缺少\uuuu在不迭代整个键集的情况下工作？非常确定，不迭代是不可能的。字典查找不需要迭代就能工作的唯一原因是它使用了一个哈希表，但这不能帮助您找到与给定键最接近的键。从某种意义上说，您必须进行迭代。但是，子类可以在现有键之间维护一个单独的间隔列表，允许快速查找正确的间隔。
import bisect
import collections


class Bound:
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def __repr__(self):
        return 'Bound({}, {})'.format(self.left, self.right)


class MyDict(collections.defaultdict):
    def __init__(self, *args, **kwargs):
        super().__init__()
        dict.__init__(self, *args, **kwargs)
        self.lst = sorted(key for key in self)

    def __setitem__(self, key, value):
        if key not in self:
            bisect.insort_left(self.lst, key)
        super().__setitem__(key, value)

    def __delitem__(self, key):
        self.lst.remove(key)
        super().__delitem__(key)

    def __missing__(self, key):
        right_index = bisect.bisect(self.lst, key)
        left_index = right_index - 1
        right = self.lst[right_index] if right_index != len(self.lst) else None
        left = self.lst[left_index] if left_index != -1 else None
        return Bound(left, right)


d = MyDict([(1, 0), (7, 10), (28, 20)])
print(d[-3]) # Bound(None, 1)
print(d[6]) # Bound(1, 7)
print(d[7]) # 10
print(d[33]) # Bound(28, None)
del d[7]
print(d[6]) # Bound(1, 28)

def bound(x, d):

  if x in d:
    return x
  else:

    for i in sorted(d):
      if x > i:
        l = i

    for j in sorted(d, reverse=True):
      if j > x:
        h = j

    return(l,h)


d = dict([(1, 0), (7, 10), (28, 20), (4,5), (2,5), (15,10)])

bound(17,d)