Python-Unflatten dict
我有一个多维的格言:Python-Unflatten dict,python,Python,我有一个多维的格言: a = {'a' : 'b', 'c' : {'d' : 'e'}} 并编写了一个简单的函数来平展该命令: def __flatten(self, dictionary, level = []): tmp_dict = {} for key, val in dictionary.items(): if type(val) == dict: tmp_dict.update(self.__flatten(val, lev
a = {'a' : 'b', 'c' : {'d' : 'e'}}
并编写了一个简单的函数来平展该命令:
def __flatten(self, dictionary, level = []):
tmp_dict = {}
for key, val in dictionary.items():
if type(val) == dict:
tmp_dict.update(self.__flatten(val, level + [key]))
else:
tmp_dict['.'.join(level + [key])] = val
return tmp_dict
用dicta
调用此函数后,我得到结果:
{'a' : 'b', 'c.d' : 'e'}
现在,在对这个展平的dict做了一些说明之后,我需要从这个展平的dict构建新的多维dict。例如:
>> unflatten({'a' : 0, 'c.d' : 1))
{'a' : 0, 'c' : {'d' : 1}}
我唯一的问题是我没有一个函数unflatten
:)有人能帮忙吗?我不知道怎么做 编辑: 另一个例子:
{'a' : 'b', 'c.d.e.f.g.h.i.j.k.l.m.n.o.p.r.s.t.u.w' : 'z'}
应在解冻后进行:
{'a': 'b', 'c': {'d': {'e': {'f': {'g': {'h': {'i': {'j': {'k': {'l': {'m': {'n': {'o': {'p': {'r': {'s': {'t': {'u': {'w': 'z'}}}}}}}}}}}}}}}}}}}
还有一个:
{'a' : 'b', 'c.d' : 'z', 'c.e' : 1}
致:
我知道这大大增加了任务的难度。这就是为什么我在这个问题上遇到了问题,在几个小时内没有找到解决方案。作为一个粗略的草案(可能需要在变量名选择上做一些改进,也许还有健壮性,但它适用于给定的示例):
这里是一个利用Python3.5+特性的应用程序,比如键入和解构赋值
一年前我用Python2和Python3编写了这两个版本,我在下面进行了改编。这是为了更容易地检查给定词典是否是较大词典的子集,而不管它是以扁平形式还是以支架形式提供的 一个额外的特性:如果有连续的整数索引(如0、1、2、3、4等),这也会将它们转换回列表
def unflatten_dictionary(field_dict):
field_dict = dict(field_dict)
new_field_dict = dict()
field_keys = list(field_dict)
field_keys.sort()
for each_key in field_keys:
field_value = field_dict[each_key]
processed_key = str(each_key)
current_key = None
current_subkey = None
for i in range(len(processed_key)):
if processed_key[i] == "[":
current_key = processed_key[:i]
start_subscript_index = i + 1
end_subscript_index = processed_key.index("]")
current_subkey = int(processed_key[start_subscript_index : end_subscript_index])
# reserve the remainder descendant keys to be processed later in a recursive call
if len(processed_key[end_subscript_index:]) > 1:
current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
break
# next child key is a dictionary
elif processed_key[i] == ".":
split_work = processed_key.split(".", 1)
if len(split_work) > 1:
current_key, current_subkey = split_work
else:
current_key = split_work[0]
break
if current_subkey is not None:
if current_key.isdigit():
current_key = int(current_key)
if current_key not in new_field_dict:
new_field_dict[current_key] = dict()
new_field_dict[current_key][current_subkey] = field_value
else:
new_field_dict[each_key] = field_value
# Recursively unflatten each dictionary on each depth before returning back to the caller.
all_digits = True
highest_digit = -1
for each_key, each_item in new_field_dict.items():
if isinstance(each_item, dict):
new_field_dict[each_key] = unflatten_dictionary(each_item)
# validate the keys can safely converted to a sequential list.
all_digits &= str(each_key).isdigit()
if all_digits:
next_digit = int(each_key)
if next_digit > highest_digit:
highest_digit = next_digit
# If all digits and can be sequential order, convert to list.
if all_digits and highest_digit == (len(new_field_dict) - 1):
digit_keys = list(new_field_dict)
digit_keys.sort()
new_list = []
for k in digit_keys:
i = int(k)
if len(new_list) <= i:
# Pre-populate missing list elements if the array index keys are out of order
# and the current element is ahead of the current length boundary.
while len(new_list) <= i:
new_list.append(None)
new_list[i] = new_field_dict[k]
new_field_dict = new_list
return new_field_dict
# Test
if __name__ == '__main__':
input_dict = {'a[0]': 1,
'a[1]': 10,
'a[2]': 5,
'b': 10,
'c.test.0': "hi",
'c.test.1': "bye",
"c.head.shoulders": "richard",
"c.head.knees": 'toes',
"z.trick.or[0]": "treat",
"z.trick.or[1]": "halloween",
"z.trick.and.then[0]": "he",
"z.trick.and.then[1]": "it",
"some[0].nested.field[0]": 42,
"some[0].nested.field[1]": 43,
"some[2].nested.field[0]": 44,
"mixed": {
"statement": "test",
"break[0]": True,
"break[1]": False,
}}
expected_dict = {'a': [1, 10, 5],
'b': 10,
'c': {
'test': ['hi', 'bye'],
'head': {
'shoulders': 'richard',
'knees' : 'toes'
}
},
'z': {
'trick': {
'or': ["treat", "halloween"],
'and': {
'then': ["he", "it"]
}
}
},
'some': {
0: {
'nested': {
'field': [42, 43]
}
},
2: {
'nested': {
'field': [44]
}
}
},
"mixed": {
"statement": "test",
"break": [True, False]
}}
# test
print("Input:")
print(input_dict)
print("====================================")
print("Output:")
actual_dict = unflatten_dictionary(input_dict)
print(actual_dict)
print("====================================")
print(f"Test passed? {expected_dict==actual_dict}")
def unflatten\u字典(字段dict):
字段记录=字段记录(字段记录)
新字段\u dict=dict()
字段键=列表(字段键)
字段_keys.sort()
对于字段_key中的每个_key:
field_value=field_dict[每个_键]
已处理的\u键=str(每个\u键)
当前_键=无
当前_子键=无
对于范围内的i(len(已处理的_键)):
如果已处理_键[i]=“[”:
当前_键=已处理的_键[:i]
开始下标索引=i+1
end_subscript_index=processed_key.index(“]”)
当前子项=int(已处理的子项[开始子项索引:结束子项索引])
#保留其余子代键,以便稍后在递归调用中处理
如果len(已处理的索引键[end\u subscript\u index:])>1:
当前_子键=“{}.{}”。格式(当前_子键,已处理_键[结束_下标_索引+2:])
打破
#下一个子键是字典
elif处理的_键[i]=“:
拆分工作=已处理的密钥拆分(“.”,1)
如果len(分割工作)>1:
当前_键,当前_子键=拆分工作
其他:
当前\u键=拆分\u工作[0]
打破
如果当前_子项不是无:
如果当前_key.isdigit():
当前_键=int(当前_键)
如果当前_键不在新_字段中_dict:
新建字段dict[当前密钥]=dict()
新建\u字段\u dict[当前\u键][当前\u子键]=字段\u值
其他:
新建\u字段\u dict[每个\u键]=字段\u值
#在返回调用方之前,递归地取消每个深度上的每个字典。
所有数字=真
最高数字=-1
对于每个\u键,新\u字段中的每个\u项\u dict.items()
如果存在(每个项目,说明):
新建\u字段\u dict[每个\u键]=取消设置\u字典(每个\u项)
#验证密钥是否可以安全地转换为顺序列表。
所有数字&=str(每个键).isdigit()
如果所有_数字:
下一个数字=int(每个键)
如果下一位>最高位:
最高\u位=下一个\u位
#如果所有数字和可以按顺序排列,则转换为列表。
如果所有_位和最高_位==(len(新_字段_dict)-1):
数字键=列表(新字段)
数字键。排序()
新列表=[]
对于k位数字键:
i=int(k)
如果len(new_list)真的我不明白为什么'c':{'d':'e'}
变成'c.d':'e'
,如果你有'c':{'f':'g','d':'e'}
,那么{'c':{'f':'f':'g','d':'e'}
变成{'c.f':'g','c.d':'e':'e'}这个实现比我的{/code>更好,因为它处理的输入是'a','d''e'>'以及任何更深的嵌套(更多点)。我害怕写下我自己的答案。。。不知道会发生什么事它完全符合我的需要。谢谢你,你救了我一整晚的思考:)该死!不能否决我自己的答案,即使我有足够的勇气承认它不如另一个(在这种情况下是Messa的)。+1用于使用递归,但不正确:例如{'a.b':0,'a.c':1}
将返回{'a':{'c':1}
:)可以通过使用ret[k1]来解决。更新(…)
。@Lucasmus,是的,我现在把它改回原来的答案。我一时觉得可以简化一下
def unflatten(d):
result = {}
for k,v in d.iteritems():
if '.' in k:
k1, k2 = k.split('.', 1)
v = {k2: v}
k = k1
result[k] = v
return result
def unflatten(dictionary):
resultDict = dict()
for key, value in dictionary.iteritems():
parts = key.split(".")
d = resultDict
for part in parts[:-1]:
if part not in d:
d[part] = dict()
d = d[part]
d[parts[-1]] = value
return resultDict
from collections import defaultdict
def unflatten(d):
ret = defaultdict(dict)
for k,v in d.items():
k1,delim,k2 = k.partition('.')
if delim:
ret[k1].update({k2:v})
else:
ret[k1] = v
return ret
from typing import Any, Dict
def unflatten(
d: Dict[str, Any],
base: Dict[str, Any] = None,
) -> Dict[str, Any]:
"""Convert any keys containing dotted paths to nested dicts
>>> unflatten({'a': 12, 'b': 13, 'c': 14}) # no expansion
{'a': 12, 'b': 13, 'c': 14}
>>> unflatten({'a.b.c': 12}) # dotted path expansion
{'a': {'b': {'c': 12}}}
>>> unflatten({'a.b.c': 12, 'a': {'b.d': 13}}) # merging
{'a': {'b': {'c': 12, 'd': 13}}}
>>> unflatten({'a.b': 12, 'a': {'b': 13}}) # insertion-order overwrites
{'a': {'b': 13}}
>>> unflatten({'a': {}}) # insertion-order overwrites
{'a': {}}
"""
if base is None:
base = {}
for key, value in d.items():
root = base
###
# If a dotted path is encountered, create nested dicts for all but
# the last level, then change root to that last level, and key to
# the final key in the path.
#
# This allows one final setitem at the bottom of the loop.
#
if '.' in key:
*parts, key = key.split('.')
for part in parts:
root.setdefault(part, {})
root = root[part]
if isinstance(value, dict):
value = unflatten(value, root.get(key, {}))
root[key] = value
return base
def unflatten_dictionary(field_dict):
field_dict = dict(field_dict)
new_field_dict = dict()
field_keys = list(field_dict)
field_keys.sort()
for each_key in field_keys:
field_value = field_dict[each_key]
processed_key = str(each_key)
current_key = None
current_subkey = None
for i in range(len(processed_key)):
if processed_key[i] == "[":
current_key = processed_key[:i]
start_subscript_index = i + 1
end_subscript_index = processed_key.index("]")
current_subkey = int(processed_key[start_subscript_index : end_subscript_index])
# reserve the remainder descendant keys to be processed later in a recursive call
if len(processed_key[end_subscript_index:]) > 1:
current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
break
# next child key is a dictionary
elif processed_key[i] == ".":
split_work = processed_key.split(".", 1)
if len(split_work) > 1:
current_key, current_subkey = split_work
else:
current_key = split_work[0]
break
if current_subkey is not None:
if current_key.isdigit():
current_key = int(current_key)
if current_key not in new_field_dict:
new_field_dict[current_key] = dict()
new_field_dict[current_key][current_subkey] = field_value
else:
new_field_dict[each_key] = field_value
# Recursively unflatten each dictionary on each depth before returning back to the caller.
all_digits = True
highest_digit = -1
for each_key, each_item in new_field_dict.items():
if isinstance(each_item, dict):
new_field_dict[each_key] = unflatten_dictionary(each_item)
# validate the keys can safely converted to a sequential list.
all_digits &= str(each_key).isdigit()
if all_digits:
next_digit = int(each_key)
if next_digit > highest_digit:
highest_digit = next_digit
# If all digits and can be sequential order, convert to list.
if all_digits and highest_digit == (len(new_field_dict) - 1):
digit_keys = list(new_field_dict)
digit_keys.sort()
new_list = []
for k in digit_keys:
i = int(k)
if len(new_list) <= i:
# Pre-populate missing list elements if the array index keys are out of order
# and the current element is ahead of the current length boundary.
while len(new_list) <= i:
new_list.append(None)
new_list[i] = new_field_dict[k]
new_field_dict = new_list
return new_field_dict
# Test
if __name__ == '__main__':
input_dict = {'a[0]': 1,
'a[1]': 10,
'a[2]': 5,
'b': 10,
'c.test.0': "hi",
'c.test.1': "bye",
"c.head.shoulders": "richard",
"c.head.knees": 'toes',
"z.trick.or[0]": "treat",
"z.trick.or[1]": "halloween",
"z.trick.and.then[0]": "he",
"z.trick.and.then[1]": "it",
"some[0].nested.field[0]": 42,
"some[0].nested.field[1]": 43,
"some[2].nested.field[0]": 44,
"mixed": {
"statement": "test",
"break[0]": True,
"break[1]": False,
}}
expected_dict = {'a': [1, 10, 5],
'b': 10,
'c': {
'test': ['hi', 'bye'],
'head': {
'shoulders': 'richard',
'knees' : 'toes'
}
},
'z': {
'trick': {
'or': ["treat", "halloween"],
'and': {
'then': ["he", "it"]
}
}
},
'some': {
0: {
'nested': {
'field': [42, 43]
}
},
2: {
'nested': {
'field': [44]
}
}
},
"mixed": {
"statement": "test",
"break": [True, False]
}}
# test
print("Input:")
print(input_dict)
print("====================================")
print("Output:")
actual_dict = unflatten_dictionary(input_dict)
print(actual_dict)
print("====================================")
print(f"Test passed? {expected_dict==actual_dict}")