Python 从数据帧创建嵌套字典
我有一个数据框架,它演示了米的层次结构。一个仪表有一个ID,可以有任意数量的子项,这个子项也可以有子项,这个子项也可以有子项,无限大 数据帧每行有一米,子帧的级别由列显示。如下图所示: 目的是将其转换为以下格式的嵌套词典:Python 从数据帧创建嵌套字典,python,json,pandas,dictionary,Python,Json,Pandas,Dictionary,我有一个数据框架,它演示了米的层次结构。一个仪表有一个ID,可以有任意数量的子项,这个子项也可以有子项,这个子项也可以有子项,无限大 数据帧每行有一米,子帧的级别由列显示。如下图所示: 目的是将其转换为以下格式的嵌套词典: { "meters": [ { "meter_id": "a", "meter_children": [
{
"meters": [
{
"meter_id": "a",
"meter_children": [
{
"meter_id": "b",
"meter_children": []
},
{
"meter_id": "c",
"meter_children": [
{
"meter_id": "d",
"meter_children": []
}
]
},
{
"meter_id": "e",
"meter_children": []
}
]
},
{
"meter_id": "f",
"meter_children": []
},
{
"meter_id": "g",
"meter_children": []
},
{
"meter_id": "h",
"meter_children": []
},
{
"meter_id": "i",
"meter_children": []
},
{
"meter_id": "j",
"meter_children": []
},
{
"meter_id": "k",
"meter_children": []
},
{
"meter_id": "l",
"meter_children": [
{
"meter_id": "m",
"meter_children": []
},
{
"meter_id": "n",
"meter_children": []
},
{
"meter_id": "o",
"meter_children": []
}
]
},
{
"meter_id": "p",
"meter_children": []
},
{
"meter_id": "q",
"meter_children": []
},
{
"meter_id": "r",
"meter_children": []
},
{
"meter_id": "s",
"meter_children": []
},
{
"meter_id": "t",
"meter_children": []
},
{
"meter_id": "u",
"meter_children": []
}
]
}
我已经成功地实现了这一点,使用了您可以在下面看到的可怕代码(对不起)。我想知道是否有一个工具可以为您做到这一点,或者是否有一个更干净,更可读的方式来实现这一点
请注意,这仅上升到嵌套级别4,但可以轻松地进一步扩展
results = {}
list_0 = []
for row in df.values:
counter = 0
for entry in row:
if entry==entry:
entry=str(entry)
if counter==0:
list_0.append({
"meter_id":entry,
"meter_children":[]
})
meter_0 = entry
list_1 = []
if counter==1:
for item in list_0:
if meter_0 in item.values():
list_1.append({
"meter_id":entry,
"meter_children":[]
})
item["meter_children"]=list_1
meter_1=entry
list_2=[]
if counter==2:
for item in list_0:
if meter_0 in item.values():
for item in list_1:
if meter_1 in item.values():
list_2.append({
"meter_id":entry,
"meter_children":[]
})
item["meter_children"]=list_2
meter_3=entry
list_3=[]
if counter==3:
for item in list_0:
if meter_0 in item.values():
for item in list_1:
if meter_1 in item.values():
for item in list_2:
if meter_2 in item.values():
list_3.append({
"meter_id":entry,
"meter_children":[]
})
item["meter_children"]=list_3
meter_4=entry
list_4=[]
counter+=1
results["meters"] = list_0
您当然可以改进您的代码,使其更高效,但据我所知,您的问题对于通用解决方案来说太具体了,对不起 为了改进代码并将其推广到多个(未知)级别,我看到了两种解决方案:
- 编写一个递归函数,对级别
和级别n
n+1
- 编写一个while循环,通过使用
df.iterrows()
- 编写一个递归函数,对级别
和级别n
n+1
- 编写一个while循环,通过使用
df.iterrows()
itertools.groupby
与递归一起使用:
from itertools import groupby as gb
d = [['a', None, None, None, None, None, None, None], [None, 'b', None, None, None, None, None, None], [None, 'c', None, None, None, None, None, None], [None, None, 'd', None, None, None, None, None], [None, 'e', None, None, None, None, None, None], ['f', None, None, None, None, None, None, None], ['g', None, None, None, None, None, None, None], ['h', None, None, None, None, None, None, None], ['i', None, None, None, None, None, None, None], ['j', None, None, None, None, None, None, None], ['k', None, None, None, None, None, None, None], ['l', None, None, None, None, None, None, None], [None, 'm', None, None, None, None, None, None], [None, 'n', None, None, None, None, None, None], [None, 'o', None, None, None, None, None, None], ['p', None, None, None, None, None, None, None], ['q', None, None, None, None, None, None, None], ['r', None, None, None, None, None, None, None], ['s', None, None, None, None, None, None, None], ['t', None, None, None, None, None, None, None], ['u', None, None, None, None, None, None, None]]
def get_tree(d):
r = []
for a, b in gb(d, key=lambda x:x[0] is not None):
if a:
r.extend([{"meter_id":j, "meter_children":[]} for j, *_ in b])
else:
r[-1]['meter_children'] = get_tree([j for _, *j in b])
return r
输出:
{
"meters": [
{
"meter_id": "a",
"meter_children": [
{
"meter_id": "b",
"meter_children": []
},
{
"meter_id": "c",
"meter_children": [
{
"meter_id": "d",
"meter_children": []
}
]
},
{
"meter_id": "e",
"meter_children": []
}
]
},
{
"meter_id": "f",
"meter_children": []
},
{
"meter_id": "g",
"meter_children": []
},
{
"meter_id": "h",
"meter_children": []
},
{
"meter_id": "i",
"meter_children": []
},
{
"meter_id": "j",
"meter_children": []
},
{
"meter_id": "k",
"meter_children": []
},
{
"meter_id": "l",
"meter_children": [
{
"meter_id": "m",
"meter_children": []
},
{
"meter_id": "n",
"meter_children": []
},
{
"meter_id": "o",
"meter_children": []
}
]
},
{
"meter_id": "p",
"meter_children": []
},
{
"meter_id": "q",
"meter_children": []
},
{
"meter_id": "r",
"meter_children": []
},
{
"meter_id": "s",
"meter_children": []
},
{
"meter_id": "t",
"meter_children": []
},
{
"meter_id": "u",
"meter_children": []
}
]
}
您可以将
itertools.groupby
与递归一起使用:
from itertools import groupby as gb
d = [['a', None, None, None, None, None, None, None], [None, 'b', None, None, None, None, None, None], [None, 'c', None, None, None, None, None, None], [None, None, 'd', None, None, None, None, None], [None, 'e', None, None, None, None, None, None], ['f', None, None, None, None, None, None, None], ['g', None, None, None, None, None, None, None], ['h', None, None, None, None, None, None, None], ['i', None, None, None, None, None, None, None], ['j', None, None, None, None, None, None, None], ['k', None, None, None, None, None, None, None], ['l', None, None, None, None, None, None, None], [None, 'm', None, None, None, None, None, None], [None, 'n', None, None, None, None, None, None], [None, 'o', None, None, None, None, None, None], ['p', None, None, None, None, None, None, None], ['q', None, None, None, None, None, None, None], ['r', None, None, None, None, None, None, None], ['s', None, None, None, None, None, None, None], ['t', None, None, None, None, None, None, None], ['u', None, None, None, None, None, None, None]]
def get_tree(d):
r = []
for a, b in gb(d, key=lambda x:x[0] is not None):
if a:
r.extend([{"meter_id":j, "meter_children":[]} for j, *_ in b])
else:
r[-1]['meter_children'] = get_tree([j for _, *j in b])
return r
输出:
{
"meters": [
{
"meter_id": "a",
"meter_children": [
{
"meter_id": "b",
"meter_children": []
},
{
"meter_id": "c",
"meter_children": [
{
"meter_id": "d",
"meter_children": []
}
]
},
{
"meter_id": "e",
"meter_children": []
}
]
},
{
"meter_id": "f",
"meter_children": []
},
{
"meter_id": "g",
"meter_children": []
},
{
"meter_id": "h",
"meter_children": []
},
{
"meter_id": "i",
"meter_children": []
},
{
"meter_id": "j",
"meter_children": []
},
{
"meter_id": "k",
"meter_children": []
},
{
"meter_id": "l",
"meter_children": [
{
"meter_id": "m",
"meter_children": []
},
{
"meter_id": "n",
"meter_children": []
},
{
"meter_id": "o",
"meter_children": []
}
]
},
{
"meter_id": "p",
"meter_children": []
},
{
"meter_id": "q",
"meter_children": []
},
{
"meter_id": "r",
"meter_children": []
},
{
"meter_id": "s",
"meter_children": []
},
{
"meter_id": "t",
"meter_children": []
},
{
"meter_id": "u",
"meter_children": []
}
]
}