在python中从csv列表创建json树
我试图从python中的一个简单表构建json层次结构 数据如下所示:在python中从csv列表创建json树,python,csv,recursion,tree,Python,Csv,Recursion,Tree,我试图从python中的一个简单表构建json层次结构 数据如下所示: id parent name 1 10 test-name-1 2 10 test-name-2 3 5 test-name-3 4 none test-name-4 5 10
id parent name
1 10 test-name-1
2 10 test-name-2
3 5 test-name-3
4 none test-name-4
5 10 test-name-5
6 none test-name-6
7 1 test-name-7
8 1 test-name-8
9 8 test-name-9
10 4 test-name-10
我正在寻找这样的输出:
{"$4":{"name":"test-name-4","children":{
"$10":{"name":"test-name-10","children":{
"$1":{"name":"test-name-1","children":{
"$7":{"name":"test-name-7","children":{}},
"$8":{"name":"test-name-8","children":{
"$9":{"name":"test-name-9","children":{}}}}}},
"$2":{"name":"test-name-2","children":{}},
"$5":{"name":"test-name-5","children":{
"$3":{"name":"test-name-3","children":{}}}}}}}},
"$6":{"name":"test-name-6","children":"test-name-6"}}
我不知道有多少“叶子”或“根”,也不知道csv中的行的顺序。我的问题是,有没有一种方法可以从子节点到父节点递归地构建字典/列表?在python中,如何从树的“叶子”部分生成分层树
谢谢你的帮助 要将所有子节点分配给其父节点,可以对节点列表执行两次传递。第一个过程将每个节点添加到一个节点。在第二个过程中,每个节点的父节点保证位于
UserDict
中,因此可以将节点添加到其父节点的子节点中
要序列化为JSON,可以使用
#!/usr/bin/env python
import sys
import json
import UserDict
class Node(object):
def __init__(self, nid, parent, name):
self.nid = nid
self.parent = parent
self.children = []
self.name = name
class NodeDict(UserDict.UserDict):
def addNodes(self, nodes):
""" Add every node as a child to its parent by doing two passes."""
for i in (1, 2):
for node in nodes:
self.data[node.nid] = node
if node.parent in self.data.keys():
if node.parent != "none" and
node not in self.data[node.parent].children:
self.data[node.parent].children.append(node)
class NodeJSONEncoder(json.JSONEncoder):
def default(self, node):
if type(node) == Node:
return {"nid":node.nid, "name":node.name, "children":node.children}
raise TypeError("{} is not an instance of Node".format(node))
if __name__ == "__main__":
nodes = []
with open(sys.argv[1]) as f:
for row in f.readlines()[1:]:
nid, parent, name = row.split()
nodes.append(Node(nid, parent, name))
nodeDict = NodeDict()
nodeDict.addNodes(nodes)
rootNodes = [node for nid, node in nodeDict.items()
if node.parent == "none"]
for rootNode in rootNodes:
print NodeJSONEncoder().encode(rootNode)
结果:
{"name": "test-name-4", "nid": "4", "children":[
{"name": "test-name-10", "nid": "10", "children":[
{"name": "test-name-1", "nid": "1", "children":[
{"name": "test-name-7", "nid": "7", "children": []},
{"name": "test-name-8", "nid": "8", "children":[
{"name": "test-name-9", "nid": "9", "children": []}]}]},
{"name": "test-name-2", "nid": "2", "children": []},
{"name": "test-name-5", "nid": "5", "children":[
{"name": "test-name-3", "nid": "3", "children": []}]}]}]}
{"name": "test-name-6", "nid": "6", "children": []}
我也有一个基于2个循环(1个用于缓存,1个用于构建)的解决方案,没有JSON编码器,它提供了您所需的准确输出:
>>> import re
>>> from collections import defaultdict
>>> parents = defaultdict(list)
>>> for i, line in enumerate(file_.split('\n')):
if i != 0 and line.strip():
id_, parent, name = re.findall(r'[\d\w-]+', line)
parents[parent].append((id_, name))
>>> parents
defaultdict(<type 'list'>, {'10': [('1', 'test-name-1'), ('2', 'test-name-2'), ('5', 'test-name-5')], 'none': [('4', 'test-name-4'), ('6', 'test-name-6')], '1': [('7', 'test-name-7'), ('8', 'test-name-8')], '5': [('3', 'test-name-3')], '4': [('10', 'test-name-10')], '8': [('9', 'test-name-9')]})
我们只需在先前构建的dict上调用它,值为'none'
,即树根:
>>> from pprint import pprint
>>> pprint(build_tree(parents, 'none'))
{'$4': {'children': {'$10': {'children': {'$1': {'children': {'$7': {'children': {},
'name': 'test-name-7'},
'$8': {'children': {'$9': {'children': {},
'name': 'test-name-9'}},
'name': 'test-name-8'}},
'name': 'test-name-1'},
'$2': {'children': {},
'name': 'test-name-2'},
'$5': {'children': {'$3': {'children': {},
'name': 'test-name-3'}},
'name': 'test-name-5'}},
'name': 'test-name-10'}},
'name': 'test-name-4'},
'$6': {'children': {}, 'name': 'test-name-6'}}
>>>
在Python3.6中,给出的答案不适用于我,因为Dict.Dict已被弃用。所以我做了一些修改,让它工作起来,并通过让用户通过命令行为child\u id、parent\u id和child name指定列,对它进行了一些概括。请看下面的内容(我只是在学习,我相信这是可以改进的,但它符合我的目的)
“”“将具有父/子层次结构的CSV文件转换为用于前端处理的层次JSON文件(javascript/DS)
用法:csv2json.py a b c(a=child_id,b=parent id,c=name(of child))的nrs列)
层次结构的根目录应包含子\u id和父\u id='none'或空。名称必须存在“”
导入系统
导入json
导入csv
#导入用户dict
从集合导入UserDict
类节点(对象):
定义初始化(自我、子id、父id、名称):
self.child\u id=child\u id
self.parent\u id=parent\u id
self.children=[]
self.name=名称
类NodeDict(UserDict):
def addNodes(自身,节点):
“”“通过两次传递,将每个节点作为子节点添加到其父节点\u id。”“”
对于(1,2)中的i:
对于节点中的节点:
self.data[node.child\u id]=节点
如果self.data.keys()中的node.parent_id:
如果(node.parent\u id!=“none”或node.parent\u id!=”)和节点不在self.data[node.parent\u id]中,则子节点:
self.data[node.parent\u id].children.append(节点)
类NodeJSONEncoder(json.JSONEncoder):
def默认值(自身、节点):
如果类型(节点)=节点:
返回{“name”:node.name,“children”:node.children}
raise TypeError(“{}不是节点的实例”。格式(节点))
如果名称=“\uuuuu main\uuuuuuuu”:
节点=[]
将open(sys.argv[1],'r')作为f:
读卡器=csv。读卡器(f)
对于读取器中的行:
如果不是行[int(sys.argv[4])]:#如果不存在名称/标签,则跳过
持续
子id、父id、名称=行[int(sys.argv[2])、行[int(sys.argv[3])、行[int(sys.argv[4])]
附加(节点(子节点id、父节点id、名称))
nodeDict=nodeDict()
nodeDict.addNodes(节点)
rootNodes=[node for child_id,nodeDict.items()中的节点]
如果(node.parent\u id==“无”或node.parent\u id==”“)]
对于rootNodes中的rootNode:
打印(NodeJSONEncoder().encode(rootNode))
没那么难。为每行创建树节点(您可以使用id
作为键将它们保存在字典中),然后将子节点添加到它们各自的父节点中。用json打印树也应该足够简单。试一试,会有很多人帮助你的!我真的希望堆栈溢出可以让我选择更多的答案,因为这也是我最初问题的可行答案。谢谢你用另一种方式做这件事!
>>> from pprint import pprint
>>> pprint(build_tree(parents, 'none'))
{'$4': {'children': {'$10': {'children': {'$1': {'children': {'$7': {'children': {},
'name': 'test-name-7'},
'$8': {'children': {'$9': {'children': {},
'name': 'test-name-9'}},
'name': 'test-name-8'}},
'name': 'test-name-1'},
'$2': {'children': {},
'name': 'test-name-2'},
'$5': {'children': {'$3': {'children': {},
'name': 'test-name-3'}},
'name': 'test-name-5'}},
'name': 'test-name-10'}},
'name': 'test-name-4'},
'$6': {'children': {}, 'name': 'test-name-6'}}
>>>
""" Converts a CSV file with Parent/Child Hierarchy to a hierarchical JSON file for front-end processing (javascript/DS)
USAGE: csv2json.py <somefile.csv> a b c (column nrs of a=child_id, b=parent-id, c=name(of child))
ROOT of hierarchy should contain child_id and parent_id = 'none' or blank. name must exist """
import sys
import json
import csv
#import UserDict
from collections import UserDict
class Node(object):
def __init__(self, child_id, parent_id, name):
self.child_id = child_id
self.parent_id = parent_id
self.children = []
self.name = name
class NodeDict(UserDict):
def addNodes(self, nodes):
""" Add every node as a child to its parent_id by doing two passes."""
for i in (1, 2):
for node in nodes:
self.data[node.child_id] = node
if node.parent_id in self.data.keys():
if (node.parent_id != "none" or node.parent_id != "") and node not in self.data[node.parent_id].children:
self.data[node.parent_id].children.append(node)
class NodeJSONEncoder(json.JSONEncoder):
def default(self, node):
if type(node) == Node:
return {"name":node.name, "children":node.children}
raise TypeError("{} is not an instance of Node".format(node))
if __name__ == "__main__":
nodes = []
with open(sys.argv[1], 'r') as f:
reader = csv.reader(f)
for row in reader:
if not row[int(sys.argv[4])] : #skip if no name/label exists
continue
child_id, parent_id, name = row[int(sys.argv[2])] , row[int(sys.argv[3])] , row[int(sys.argv[4])]
nodes.append(Node(child_id, parent_id, name))
nodeDict = NodeDict()
nodeDict.addNodes(nodes)
rootNodes = [node for child_id, node in nodeDict.items()
if (node.parent_id == "none" or node.parent_id == "")]
for rootNode in rootNodes:
print(NodeJSONEncoder().encode(rootNode))