Python-Unflatten dict_Python - Fatal编程技术网

Python-Unflatten dict

python

Python-Unflatten dict,python,Python,我有一个多维的格言： a = {'a' : 'b', 'c' : {'d' : 'e'}} 并编写了一个简单的函数来平展该命令： def __flatten(self, dictionary, level = []): tmp_dict = {} for key, val in dictionary.items(): if type(val) == dict: tmp_dict.update(self.__flatten(val, lev

我有一个多维的格言：

a = {'a' : 'b', 'c' : {'d' : 'e'}}

并编写了一个简单的函数来平展该命令：

def __flatten(self, dictionary, level = []):
    tmp_dict = {}
    for key, val in dictionary.items():
        if type(val) == dict:
            tmp_dict.update(self.__flatten(val, level + [key]))
        else:
            tmp_dict['.'.join(level + [key])] = val
    return tmp_dict

用dict

调用此函数后，我得到结果：

{'a' : 'b', 'c.d' : 'e'}

现在，在对这个展平的dict做了一些说明之后，我需要从这个展平的dict构建新的多维dict。例如：

>> unflatten({'a' : 0, 'c.d' : 1))
{'a' : 0, 'c' : {'d' : 1}}

我唯一的问题是我没有一个函数

unflatten

：）
有人能帮忙吗？我不知道怎么做

编辑：

另一个例子：

{'a' : 'b', 'c.d.e.f.g.h.i.j.k.l.m.n.o.p.r.s.t.u.w' : 'z'}

应在解冻后进行：

{'a': 'b', 'c': {'d': {'e': {'f': {'g': {'h': {'i': {'j': {'k': {'l': {'m': {'n': {'o': {'p': {'r': {'s': {'t': {'u': {'w': 'z'}}}}}}}}}}}}}}}}}}}

还有一个：

{'a' : 'b', 'c.d' : 'z', 'c.e' : 1}

致：

我知道这大大增加了任务的难度。这就是为什么我在这个问题上遇到了问题，在几个小时内没有找到解决方案。

作为一个粗略的草案（可能需要在变量名选择上做一些改进，也许还有健壮性，但它适用于给定的示例）：

这里是一个利用Python3.5+特性的应用程序，比如键入和解构赋值

一年前我用Python2和Python3编写了这两个版本，我在下面进行了改编。这是为了更容易地检查给定词典是否是较大词典的子集，而不管它是以扁平形式还是以支架形式提供的

一个额外的特性：如果有连续的整数索引（如0、1、2、3、4等），这也会将它们转换回列表

def unflatten_dictionary(field_dict):
    field_dict = dict(field_dict)
    new_field_dict = dict()
    field_keys = list(field_dict)
    field_keys.sort()

    for each_key in field_keys:
        field_value = field_dict[each_key]
        processed_key = str(each_key)
        current_key = None
        current_subkey = None
        for i in range(len(processed_key)):
            if processed_key[i] == "[":
                current_key = processed_key[:i]
                start_subscript_index = i + 1
                end_subscript_index = processed_key.index("]")
                current_subkey = int(processed_key[start_subscript_index : end_subscript_index])

                # reserve the remainder descendant keys to be processed later in a recursive call
                if len(processed_key[end_subscript_index:]) > 1:
                    current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
                break
            # next child key is a dictionary
            elif processed_key[i] == ".":
                split_work = processed_key.split(".", 1)
                if len(split_work) > 1:
                    current_key, current_subkey = split_work
                else:
                    current_key = split_work[0]
                break

        if current_subkey is not None:
            if current_key.isdigit():
                current_key = int(current_key)
            if current_key not in new_field_dict:
                new_field_dict[current_key] = dict()
            new_field_dict[current_key][current_subkey] = field_value
        else:
            new_field_dict[each_key] = field_value

    # Recursively unflatten each dictionary on each depth before returning back to the caller.
    all_digits = True
    highest_digit = -1
    for each_key, each_item in new_field_dict.items():
        if isinstance(each_item, dict):
            new_field_dict[each_key] = unflatten_dictionary(each_item)

        # validate the keys can safely converted to a sequential list.
        all_digits &= str(each_key).isdigit()
        if all_digits:
            next_digit = int(each_key)
            if next_digit > highest_digit:
                highest_digit = next_digit

    # If all digits and can be sequential order, convert to list.
    if all_digits and highest_digit == (len(new_field_dict) - 1):
        digit_keys = list(new_field_dict)
        digit_keys.sort()
        new_list = []

        for k in digit_keys:
            i = int(k)
            if len(new_list) <= i:
                # Pre-populate missing list elements if the array index keys are out of order
                # and the current element is ahead of the current length boundary.
                while len(new_list) <= i:
                    new_list.append(None)
            new_list[i] = new_field_dict[k]
        new_field_dict = new_list
    return new_field_dict

# Test
if __name__ == '__main__':
    input_dict = {'a[0]': 1,
                  'a[1]': 10,
                  'a[2]': 5,
                  'b': 10,
                  'c.test.0': "hi",
                  'c.test.1': "bye",
                  "c.head.shoulders": "richard",
                  "c.head.knees": 'toes',
                  "z.trick.or[0]": "treat",
                  "z.trick.or[1]": "halloween",
                  "z.trick.and.then[0]": "he",
                  "z.trick.and.then[1]": "it",
                  "some[0].nested.field[0]": 42,
                  "some[0].nested.field[1]": 43,
                  "some[2].nested.field[0]": 44,
                  "mixed": {
                      "statement": "test",
                      "break[0]": True,
                      "break[1]": False,
                  }}
    expected_dict = {'a': [1, 10, 5],
                     'b': 10,
                     'c': {
                         'test': ['hi', 'bye'],
                         'head': {
                             'shoulders': 'richard',
                             'knees' : 'toes'
                         }
                     },
                     'z': {
                         'trick': {
                             'or': ["treat", "halloween"],
                             'and': {
                                 'then': ["he", "it"]
                             }
                         }
                     },
                     'some': {
                         0: {
                             'nested': {
                                 'field': [42, 43]
                             }
                         },
                         2: {
                             'nested': {
                                 'field': [44]
                             }
                         }
                     },
                     "mixed": {
                         "statement": "test",
                         "break": [True, False]
                     }}
    # test
    print("Input:")
    print(input_dict)
    print("====================================")
    print("Output:")
    actual_dict = unflatten_dictionary(input_dict)
    print(actual_dict)
    print("====================================")
    print(f"Test passed? {expected_dict==actual_dict}")

def unflatten\u字典（字段dict）：
字段记录=字段记录（字段记录）
新字段\u dict=dict（）
字段键=列表（字段键）
字段_keys.sort（）
对于字段_key中的每个_key：
field_value=field_dict[每个_键]
已处理的\u键=str（每个\u键）
当前_键=无
当前_子键=无
对于范围内的i（len（已处理的_键））：
如果已处理_键[i]=“[”：
当前_键=已处理的_键[：i]
开始下标索引=i+1
end_subscript_index=processed_key.index（“]”）
当前子项=int（已处理的子项[开始子项索引：结束子项索引]）
#保留其余子代键，以便稍后在递归调用中处理
如果len（已处理的索引键[end\u subscript\u index:]）>1：
当前_子键=“{}.{}”。格式（当前_子键，已处理_键[结束_下标_索引+2:]）
打破
#下一个子键是字典
elif处理的_键[i]=“：
拆分工作=已处理的密钥拆分（“.”，1）
如果len（分割工作）>1：
当前_键，当前_子键=拆分工作
其他：
当前\u键=拆分\u工作[0]
打破
如果当前_子项不是无：
如果当前_key.isdigit（）：
当前_键=int（当前_键）
如果当前_键不在新_字段中_dict：
新建字段dict[当前密钥]=dict（）
新建\u字段\u dict[当前\u键][当前\u子键]=字段\u值
其他：
新建\u字段\u dict[每个\u键]=字段\u值
#在返回调用方之前，递归地取消每个深度上的每个字典。
所有数字=真
最高数字=-1
对于每个\u键，新\u字段中的每个\u项\u dict.items（）
如果存在（每个项目，说明）：
新建\u字段\u dict[每个\u键]=取消设置\u字典（每个\u项）
#验证密钥是否可以安全地转换为顺序列表。
所有数字&=str（每个键）.isdigit（）
如果所有_数字：
下一个数字=int（每个键）
如果下一位>最高位：
最高\u位=下一个\u位
#如果所有数字和可以按顺序排列，则转换为列表。
如果所有_位和最高_位==（len（新_字段_dict）-1）：
数字键=列表（新字段）
数字键。排序（）
新列表=[]
对于k位数字键：
i=int（k）
如果len（new_list）真的我不明白为什么'c'：{'d'：'e'}
变成'c.d'：'e'
，如果你有'c'：{'f'：'g'，'d'：'e'}
，那么{'c'：{'f'：'f'：'g'，'d'：'e'}
变成{'c.f'：'g'，'c.d'：'e'：'e'}这个实现比我的{/code>更好，因为它处理的输入是'a'，'d''e'>'以及任何更深的嵌套（更多点）。我害怕写下我自己的答案。。。不知道会发生什么事它完全符合我的需要。谢谢你，你救了我一整晚的思考：）该死！不能否决我自己的答案，即使我有足够的勇气承认它不如另一个（在这种情况下是Messa的）。+1用于使用递归，但不正确：例如{'a.b'：0，'a.c'：1}
将返回{'a'：{'c'：1}
：）可以通过使用ret[k1]来解决。更新（…）。@Lucasmus，是的，我现在把它改回原来的答案。我一时觉得可以简化一下
def unflatten(d):
    result = {}
    for k,v in d.iteritems():
        if '.' in k:
            k1, k2 = k.split('.', 1)
            v = {k2: v}
            k = k1
        result[k] = v
    return result

def unflatten(dictionary):
    resultDict = dict()
    for key, value in dictionary.iteritems():
        parts = key.split(".")
        d = resultDict
        for part in parts[:-1]:
            if part not in d:
                d[part] = dict()
            d = d[part]
        d[parts[-1]] = value
    return resultDict

from collections import defaultdict
def unflatten(d):
    ret = defaultdict(dict)
    for k,v in d.items():
        k1,delim,k2 = k.partition('.')
        if delim:
            ret[k1].update({k2:v})
        else:
            ret[k1] = v
    return ret

from typing import Any, Dict


def unflatten(
    d: Dict[str, Any], 
    base: Dict[str, Any] = None,
) -> Dict[str, Any]:
    """Convert any keys containing dotted paths to nested dicts

    >>> unflatten({'a': 12, 'b': 13, 'c': 14})  # no expansion
    {'a': 12, 'b': 13, 'c': 14}

    >>> unflatten({'a.b.c': 12})  # dotted path expansion
    {'a': {'b': {'c': 12}}}

    >>> unflatten({'a.b.c': 12, 'a': {'b.d': 13}})  # merging
    {'a': {'b': {'c': 12, 'd': 13}}}

    >>> unflatten({'a.b': 12, 'a': {'b': 13}})  # insertion-order overwrites
    {'a': {'b': 13}}

    >>> unflatten({'a': {}})  # insertion-order overwrites
    {'a': {}}
    """
    if base is None:
      base = {}

    for key, value in d.items():
        root = base

        ###
        # If a dotted path is encountered, create nested dicts for all but
        # the last level, then change root to that last level, and key to
        # the final key in the path.
        #
        # This allows one final setitem at the bottom of the loop.
        #
        if '.' in key:
            *parts, key = key.split('.')

            for part in parts:
                root.setdefault(part, {})
                root = root[part]

        if isinstance(value, dict):
            value = unflatten(value, root.get(key, {}))

        root[key] = value

    return base

def unflatten_dictionary(field_dict):
    field_dict = dict(field_dict)
    new_field_dict = dict()
    field_keys = list(field_dict)
    field_keys.sort()

    for each_key in field_keys:
        field_value = field_dict[each_key]
        processed_key = str(each_key)
        current_key = None
        current_subkey = None
        for i in range(len(processed_key)):
            if processed_key[i] == "[":
                current_key = processed_key[:i]
                start_subscript_index = i + 1
                end_subscript_index = processed_key.index("]")
                current_subkey = int(processed_key[start_subscript_index : end_subscript_index])

                # reserve the remainder descendant keys to be processed later in a recursive call
                if len(processed_key[end_subscript_index:]) > 1:
                    current_subkey = "{}.{}".format(current_subkey, processed_key[end_subscript_index + 2:])
                break
            # next child key is a dictionary
            elif processed_key[i] == ".":
                split_work = processed_key.split(".", 1)
                if len(split_work) > 1:
                    current_key, current_subkey = split_work
                else:
                    current_key = split_work[0]
                break

        if current_subkey is not None:
            if current_key.isdigit():
                current_key = int(current_key)
            if current_key not in new_field_dict:
                new_field_dict[current_key] = dict()
            new_field_dict[current_key][current_subkey] = field_value
        else:
            new_field_dict[each_key] = field_value

    # Recursively unflatten each dictionary on each depth before returning back to the caller.
    all_digits = True
    highest_digit = -1
    for each_key, each_item in new_field_dict.items():
        if isinstance(each_item, dict):
            new_field_dict[each_key] = unflatten_dictionary(each_item)

        # validate the keys can safely converted to a sequential list.
        all_digits &= str(each_key).isdigit()
        if all_digits:
            next_digit = int(each_key)
            if next_digit > highest_digit:
                highest_digit = next_digit

    # If all digits and can be sequential order, convert to list.
    if all_digits and highest_digit == (len(new_field_dict) - 1):
        digit_keys = list(new_field_dict)
        digit_keys.sort()
        new_list = []

        for k in digit_keys:
            i = int(k)
            if len(new_list) <= i:
                # Pre-populate missing list elements if the array index keys are out of order
                # and the current element is ahead of the current length boundary.
                while len(new_list) <= i:
                    new_list.append(None)
            new_list[i] = new_field_dict[k]
        new_field_dict = new_list
    return new_field_dict

# Test
if __name__ == '__main__':
    input_dict = {'a[0]': 1,
                  'a[1]': 10,
                  'a[2]': 5,
                  'b': 10,
                  'c.test.0': "hi",
                  'c.test.1': "bye",
                  "c.head.shoulders": "richard",
                  "c.head.knees": 'toes',
                  "z.trick.or[0]": "treat",
                  "z.trick.or[1]": "halloween",
                  "z.trick.and.then[0]": "he",
                  "z.trick.and.then[1]": "it",
                  "some[0].nested.field[0]": 42,
                  "some[0].nested.field[1]": 43,
                  "some[2].nested.field[0]": 44,
                  "mixed": {
                      "statement": "test",
                      "break[0]": True,
                      "break[1]": False,
                  }}
    expected_dict = {'a': [1, 10, 5],
                     'b': 10,
                     'c': {
                         'test': ['hi', 'bye'],
                         'head': {
                             'shoulders': 'richard',
                             'knees' : 'toes'
                         }
                     },
                     'z': {
                         'trick': {
                             'or': ["treat", "halloween"],
                             'and': {
                                 'then': ["he", "it"]
                             }
                         }
                     },
                     'some': {
                         0: {
                             'nested': {
                                 'field': [42, 43]
                             }
                         },
                         2: {
                             'nested': {
                                 'field': [44]
                             }
                         }
                     },
                     "mixed": {
                         "statement": "test",
                         "break": [True, False]
                     }}
    # test
    print("Input:")
    print(input_dict)
    print("====================================")
    print("Output:")
    actual_dict = unflatten_dictionary(input_dict)
    print(actual_dict)
    print("====================================")
    print(f"Test passed? {expected_dict==actual_dict}")