Python 如何通过一些修改将xml转换为字典?
我目前有一个以下格式的xml文件:Python 如何通过一些修改将xml转换为字典?,python,json,xml,xmltodict,Python,Json,Xml,Xmltodict,我目前有一个以下格式的xml文件: <?xml version="1.0" encoding="UTF-8" ?> <Garden> <id>97</id> <Flowers> <id>98</id> <Type> <id>99</id>
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
我希望能够为所有级别添加默认的“状态”:“0”
。我真的不知道该怎么做。任何帮助都将不胜感激
这就是我现在所拥有的:
with open("gardenlist.xml", 'r') as file:
xmlString = file.read()
print(xmlString)
jsonString = json.dumps(xmltodict.parse(xmlString), indent=4)
这只是打印json,但没有
“state”:“0”
值。您可以在获取字典后递归执行此操作。您可以检查哪些值还包含字典,并将目标添加到字典中。见:
import collections
def addAttr(target):
target["state"] = "0"
for key in target:
if isinstance(target[key], collections.OrderedDict):
addAttr(target[key])
d1 = xmltodict.parse(xmlString)
addAttr(d1["Garden"])
我认为正确的方法是准备所需的XML结构,然后将其转换为dict和json字符串: 复杂方法:
import xml.etree.ElementTree as ET
import xmltodict
import json
tree = ET.parse('gardenlist.xml')
root = tree.getroot()
state_el = ET.Element('state') # prepare `state` node
state_el.text = '0'
root.insert(1, state_el)
def add_state(root, el_to_insert):
for el in root:
if len(list(el)): # check if element has child nodes
el.insert(1, el_to_insert)
add_state(el, el_to_insert)
add_state(root, state_el)
json_str = json.dumps(xmltodict.parse(ET.tostring(root, encoding="unicode")), indent=4)
print(json_str)
实际产出:
{
"Garden": {
"id": "97",
"state": "0",
"Flowers": {
"id": "98",
"state": "0",
"Type": {
"id": "99",
"state": "0",
"Level": {
"id": "100",
"state": "0"
}
}
}
}
}
在我看来,这是一个拥有自己的解析器的好机会:
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode
xml = """
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
"""
class XMLVisitor(NodeVisitor):
grammar = Grammar(
r"""
program = root expr+
expr = opentag list closetag
item = (opentag notpar closetag) / expr
list = item+
root = ws? lpar "?xml" notpar rpar
opentag = ws? lpar word rpar ws?
closetag = lpar slash word rpar ws?
lpar = "<"
rpar = ">"
notpar = ~"[^<>]+"
slash = "/"
word = ~"\w+"
ws = ~"\s+"
"""
)
def generic_visit(self, node, visited_children):
return visited_children or node
def visit_opentag(self, node, visited_children):
ws, lpar, word, *_ = visited_children
return word.text
def visit_closetag(self, node, visited_children):
lpar, slash, word, *_ = visited_children
return word.text
def visit_notpar(self, node, visited_children):
return node.text
def visit_item(self, node, visited_children):
if len(visited_children[0]) == 3:
# first branch
opentag, content, *_= visited_children[0]
return (opentag, content)
else:
return visited_children[0]
def visit_expr(self, node, visited_children):
tag, lst, *_ = visited_children
return (tag, lst)
def visit_program(self, node, visited_children):
root, content = visited_children
return self.__makeDict__(content)
def __makeDict__(self, struct, level = 0):
d = {}
for item in struct:
key, value = item
if isinstance(value, list):
value = self.__makeDict__(value)
d[key] = value
d["state"] = 0
return d
visitor = XMLVisitor()
output = visitor.parse(xml)
print(output)
您现在有什么功能?所以您希望代码能够遍历嵌套字典的所有级别,将
state
添加到每个没有嵌套字典的字典中?这其中的哪一部分你遇到了麻烦?@ScottHunter我在将状态添加到字典中时遇到了麻烦。我已经用我所拥有的更新了这个问题。state
是否可能已经在某些地方定义了?如果是这样的话,应该怎么做?您的代码中哪一部分甚至试图添加状态
?xmltodict可以接受文件名作为参数吗?@LauraSmith我认为这是不可能的。如果回答了您的问题,请随意接受此答案。我发现有额外的“状态”与花园节点关联。请确认一下。@LauraSmith更改了它。@LauraSmith,我使用了您发布的确切的输入xml,效果很好,谢谢。我刚刚也测试了一下,效果很好。:)@LauraSmith,这是greatImo最好的方法是变得更加复杂并编写自己的解析器(参见我的答案;-))谢谢你的方法。我真的很感激。:)@劳拉史密斯:不客气。不过,最好选择一个更容易理解的答案。
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode
xml = """
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
"""
class XMLVisitor(NodeVisitor):
grammar = Grammar(
r"""
program = root expr+
expr = opentag list closetag
item = (opentag notpar closetag) / expr
list = item+
root = ws? lpar "?xml" notpar rpar
opentag = ws? lpar word rpar ws?
closetag = lpar slash word rpar ws?
lpar = "<"
rpar = ">"
notpar = ~"[^<>]+"
slash = "/"
word = ~"\w+"
ws = ~"\s+"
"""
)
def generic_visit(self, node, visited_children):
return visited_children or node
def visit_opentag(self, node, visited_children):
ws, lpar, word, *_ = visited_children
return word.text
def visit_closetag(self, node, visited_children):
lpar, slash, word, *_ = visited_children
return word.text
def visit_notpar(self, node, visited_children):
return node.text
def visit_item(self, node, visited_children):
if len(visited_children[0]) == 3:
# first branch
opentag, content, *_= visited_children[0]
return (opentag, content)
else:
return visited_children[0]
def visit_expr(self, node, visited_children):
tag, lst, *_ = visited_children
return (tag, lst)
def visit_program(self, node, visited_children):
root, content = visited_children
return self.__makeDict__(content)
def __makeDict__(self, struct, level = 0):
d = {}
for item in struct:
key, value = item
if isinstance(value, list):
value = self.__makeDict__(value)
d[key] = value
d["state"] = 0
return d
visitor = XMLVisitor()
output = visitor.parse(xml)
print(output)
{'Garden': {'id': '97', 'state': 0, 'Flowers': {'id': '98', 'state': 0, 'Type': {'id': '99', 'state': 0, 'Level': {'id': '100', 'state': 0}}}}, 'state': 0}