Python 递归第n个子目录get()-效率?
我需要从一些大型嵌套字典中获取一些值。出于懒惰,我决定编写一个递归调用自己的函数,直到找到最后一个子函数,或者叶子为空 因为有很多字典出现,而且每一个新的电话都有一个新的字典建立,我想知道这有多有效 有什么建议吗Python 递归第n个子目录get()-效率?,python,Python,我需要从一些大型嵌套字典中获取一些值。出于懒惰,我决定编写一个递归调用自己的函数,直到找到最后一个子函数,或者叶子为空 因为有很多字典出现,而且每一个新的电话都有一个新的字典建立,我想知道这有多有效 有什么建议吗 def recursive_dict_get(item, string, default=False): if not isinstance(item, dict): return default print "called with ", item,
def recursive_dict_get(item, string, default=False):
if not isinstance(item, dict):
return default
print "called with ", item, "and string", string
if "." in string:
attrs = string.split(".")
parent = attrs.pop(0)
rest = ".".join(attrs)
result = item.get(parent, None)
if result is None:
return default
else:
return recursive_dict_get(item.get(parent, default), rest, default)
else:
return item.get(string, default)
---
我的一个建议是给
split()
第二个参数。您可以做一些简单的事情,如:
parent, rest = string.split(".", 1)
除此之外,我认为代码没有直接的问题
您也可以在不使用递归的情况下执行此操作:
def recursive_dict_get(item, string, default=False):
for s in string.split('.'):
if (isinstance(item, dict) and s in item):
item = item[s]
else:
return default
return item
还有一种方法:
def getVal(d, keys, default):
keys = keys.split(".") # You can avoid this first step if you're willing to use a list like ["1", "2", "3"...] as an input instead of a string like "1.2.3..."
for key in keys:
try:
d = d[key]
except KeyError:
return default
return d
如果你愿意,我可以对它进行分析-让我知道。请记住,除非您已经遇到或有理由相信您将遇到瓶颈,否则优化是没有意义的。是的,您的实现效率相当低,即使它没有构建任何新词典,但可能会返回大量现有词典。无论如何,您可以将接受的答案调整为将访问函数缩减为一行代码。这与J.F.塞巴斯蒂安(@jfs)在《圣经》中提到的类似。我的看法是这样的:
def nonrecursive_dict_get(item, key_string, default=False):
return reduce(lambda d, k: d.get(k, default), key_string.split('.'), item)
print "*" * 3, 'using nonrecursive_dict_get()'
print nonrecursive_dict_get(foo, "1.2.3.4.5.6.7")
print "*" * 3
print nonrecursive_dict_get(foo, "1.2.3.4.5.6")
print "*" * 3
print nonrecursive_dict_get(foo, "1.3")
更新:
每当关注效率时,最好的办法往往是运行各种方法的基准。这里有一个我已经用过很多次了:
global_setup = """
foo = {
"1": {
"2": {
"3": {
"4": {
"5": {
"6": {
"7": "juice"
}
}
}
}
}
}
}
"""
testcases = {
"jay":
{ 'setup' : """
def recursive_dict_get(item, string, default=False):
if not isinstance(item, dict):
return default
if "." in string:
attrs = string.split(".")
parent = attrs.pop(0)
rest = ".".join(attrs)
result = item.get(parent, None)
if result is None:
return default
else:
return recursive_dict_get(item.get(parent, default), rest, default)
else:
return item.get(string, default)
""",
'code' : """
recursive_dict_get(foo, "1.2.3.4.5.6.7", False)
recursive_dict_get(foo, "1.2.3.4.5.6", False)
recursive_dict_get(foo, "1.3", False)
""",
},
"martineau":
{ 'setup' : """
def nonrecursive_dict_get(nested_dict, key_string, default=False):
return reduce(lambda d, k: d.get(k, default), key_string.split('.'), nested_dict)
""",
'code' : """
nonrecursive_dict_get(foo, "1.2.3.4.5.6.7", False)
nonrecursive_dict_get(foo, "1.2.3.4.5.6", False)
nonrecursive_dict_get(foo, "1.3", False)
""",
},
"J.F. Sebastian":
{ 'setup' : """
# modified to support 'default' keyword argument
def quick_n_dirty(nested_dict, key_string, default=False):
reduced = reduce(dict.get, key_string.split('.'), nested_dict)
return default if reduced is None else reduced
""",
'code' : """
quick_n_dirty(foo, "1.2.3.4.5.6.7", False)
quick_n_dirty(foo, "1.2.3.4.5.6", False)
quick_n_dirty(foo, "1.3", False)
""",
},
"arshajii":
{ 'setup' : """
def recursive_dict_get(item, string, default=False):
for s in string.split('.'):
if (isinstance(item, dict) and s in item):
item = item[s]
else:
return default
return item
""",
'code' : """
recursive_dict_get(foo, "1.2.3.4.5.6.7", False)
recursive_dict_get(foo, "1.2.3.4.5.6", False)
recursive_dict_get(foo, "1.3", False)
""",
},
"Brionius":
{ 'setup' : """
def getVal(d, keys, default):
keys = keys.split(".")
for key in keys:
try:
d = d[key]
except KeyError:
return default
return d
""",
'code' : """
getVal(foo, "1.2.3.4.5.6.7", False)
getVal(foo, "1.2.3.4.5.6", False)
getVal(foo, "1.3", False)
""",
},
}
import sys
from textwrap import dedent
import timeit
N = 100000
R = 3
# remove leading whitespace from all code fragments
global_setup = dedent(global_setup)
for testcase in testcases.itervalues():
for label, fragment in testcase.iteritems():
testcase[label] = dedent(fragment)
timings = [(name,
min(timeit.repeat(testcases[name]['code'],
setup=global_setup + testcases[name]['setup'],
repeat=R, number=N)),
) for name in testcases]
longest_name = max(len(t[0]) for t in timings)
print('fastest to slowest timings:\n'
' ({:,d} calls, best of {:d} repetitions)\n'.format(N, R))
ranked = sorted(timings, key=lambda t: t[1]) # sort by speed (fastest first)
for timing in ranked:
print("{:>{width}} : {:.6f} secs ({rel:>8.6f}x)".format(
timing[0], timing[1], rel=timing[1]/ranked[0][1], width=longest_name))
输出:
最快到最慢的计时:
(100000次呼叫,最多重复3次)
J.F.塞巴斯蒂安:1.287209秒(1.000000x)
布里奥尼斯:1.420099秒(1.103239x)
阿尔沙吉:1.431521秒(1.112x)
马提诺:2.031539秒(1.578251x)
周杰伦:7.817713秒(6.073384x)
正如你所看到的,J.F.Sebastian的建议是最快的,即使我做了修改,使其与其他建议相同。我很困惑-你是在问是否有更好的方法来实现这一点,还是如何测试效率?你没有用你的代码构建任何新的词典。一种快速的方法:
print reduce(dict.get,“1.2.3.4.5.6.7.”拆分('..),嵌套_dict)
。请参阅我更喜欢迭代而不是递归。这还需要类型检查:如果不存在(d,dict):返回默认值
(因为非dict上的d[key]不一定会引发keyrorm)。可能是,如果dict树的键和值不总是字符串。如果它们的示例输入不具有代表性,我将把它留给OP来添加。基于基准测试结果和一定程度的优雅,性能为+1。
global_setup = """
foo = {
"1": {
"2": {
"3": {
"4": {
"5": {
"6": {
"7": "juice"
}
}
}
}
}
}
}
"""
testcases = {
"jay":
{ 'setup' : """
def recursive_dict_get(item, string, default=False):
if not isinstance(item, dict):
return default
if "." in string:
attrs = string.split(".")
parent = attrs.pop(0)
rest = ".".join(attrs)
result = item.get(parent, None)
if result is None:
return default
else:
return recursive_dict_get(item.get(parent, default), rest, default)
else:
return item.get(string, default)
""",
'code' : """
recursive_dict_get(foo, "1.2.3.4.5.6.7", False)
recursive_dict_get(foo, "1.2.3.4.5.6", False)
recursive_dict_get(foo, "1.3", False)
""",
},
"martineau":
{ 'setup' : """
def nonrecursive_dict_get(nested_dict, key_string, default=False):
return reduce(lambda d, k: d.get(k, default), key_string.split('.'), nested_dict)
""",
'code' : """
nonrecursive_dict_get(foo, "1.2.3.4.5.6.7", False)
nonrecursive_dict_get(foo, "1.2.3.4.5.6", False)
nonrecursive_dict_get(foo, "1.3", False)
""",
},
"J.F. Sebastian":
{ 'setup' : """
# modified to support 'default' keyword argument
def quick_n_dirty(nested_dict, key_string, default=False):
reduced = reduce(dict.get, key_string.split('.'), nested_dict)
return default if reduced is None else reduced
""",
'code' : """
quick_n_dirty(foo, "1.2.3.4.5.6.7", False)
quick_n_dirty(foo, "1.2.3.4.5.6", False)
quick_n_dirty(foo, "1.3", False)
""",
},
"arshajii":
{ 'setup' : """
def recursive_dict_get(item, string, default=False):
for s in string.split('.'):
if (isinstance(item, dict) and s in item):
item = item[s]
else:
return default
return item
""",
'code' : """
recursive_dict_get(foo, "1.2.3.4.5.6.7", False)
recursive_dict_get(foo, "1.2.3.4.5.6", False)
recursive_dict_get(foo, "1.3", False)
""",
},
"Brionius":
{ 'setup' : """
def getVal(d, keys, default):
keys = keys.split(".")
for key in keys:
try:
d = d[key]
except KeyError:
return default
return d
""",
'code' : """
getVal(foo, "1.2.3.4.5.6.7", False)
getVal(foo, "1.2.3.4.5.6", False)
getVal(foo, "1.3", False)
""",
},
}
import sys
from textwrap import dedent
import timeit
N = 100000
R = 3
# remove leading whitespace from all code fragments
global_setup = dedent(global_setup)
for testcase in testcases.itervalues():
for label, fragment in testcase.iteritems():
testcase[label] = dedent(fragment)
timings = [(name,
min(timeit.repeat(testcases[name]['code'],
setup=global_setup + testcases[name]['setup'],
repeat=R, number=N)),
) for name in testcases]
longest_name = max(len(t[0]) for t in timings)
print('fastest to slowest timings:\n'
' ({:,d} calls, best of {:d} repetitions)\n'.format(N, R))
ranked = sorted(timings, key=lambda t: t[1]) # sort by speed (fastest first)
for timing in ranked:
print("{:>{width}} : {:.6f} secs ({rel:>8.6f}x)".format(
timing[0], timing[1], rel=timing[1]/ranked[0][1], width=longest_name))