Python 如何通过一些修改将xml转换为字典?

Python 如何通过一些修改将xml转换为字典?,python,json,xml,xmltodict,Python,Json,Xml,Xmltodict,我目前有一个以下格式的xml文件: <?xml version="1.0" encoding="UTF-8" ?> <Garden> <id>97</id> <Flowers> <id>98</id> <Type> <id>99</id>

我目前有一个以下格式的xml文件:

<?xml version="1.0" encoding="UTF-8" ?>
    <Garden>
        <id>97</id>
        <Flowers>
            <id>98</id>
            <Type>
                <id>99</id>
                <Level>
                    <id>100</id>                    
                </Level>
            </Type>
        </Flowers>
    </Garden>

我希望能够为所有级别添加默认的
“状态”:“0”
。我真的不知道该怎么做。任何帮助都将不胜感激

这就是我现在所拥有的:

with open("gardenlist.xml", 'r') as file:
    xmlString = file.read() 
print(xmlString)     
jsonString = json.dumps(xmltodict.parse(xmlString), indent=4)

这只是打印json,但没有
“state”:“0”
值。

您可以在获取字典后递归执行此操作。您可以检查哪些值还包含字典,并将目标添加到字典中。见:

import collections 

def addAttr(target):
    target["state"] = "0"
    for key in target:
        if isinstance(target[key], collections.OrderedDict):
            addAttr(target[key])

d1 = xmltodict.parse(xmlString)
addAttr(d1["Garden"])

我认为正确的方法是准备所需的XML结构,然后将其转换为dict和json字符串:

复杂方法:

import xml.etree.ElementTree as ET
import xmltodict
import json

tree = ET.parse('gardenlist.xml')
root = tree.getroot()

state_el = ET.Element('state')    # prepare `state` node
state_el.text = '0'
root.insert(1, state_el)

def add_state(root, el_to_insert):
    for el in root:
        if len(list(el)):    # check if element has child nodes
            el.insert(1, el_to_insert)
            add_state(el, el_to_insert)


add_state(root, state_el)
json_str = json.dumps(xmltodict.parse(ET.tostring(root, encoding="unicode")), indent=4)
print(json_str)
实际产出:

{
    "Garden": {
        "id": "97",
        "state": "0",
        "Flowers": {
            "id": "98",
            "state": "0",
            "Type": {
                "id": "99",
                "state": "0",
                "Level": {
                    "id": "100",
                    "state": "0"
                }
            }
        }
    }
}

在我看来,这是一个拥有自己的解析器的好机会:

from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode

xml = """
<?xml version="1.0" encoding="UTF-8" ?>
    <Garden>
        <id>97</id>
        <Flowers>
            <id>98</id>
            <Type>
                <id>99</id>
                <Level>
                    <id>100</id>                    
                </Level>
            </Type>
        </Flowers>
    </Garden>
"""

class XMLVisitor(NodeVisitor):
    grammar = Grammar(
        r"""
        program     = root expr+
        expr        = opentag list closetag
        item        = (opentag notpar closetag) / expr
        list        = item+

        root        = ws? lpar "?xml" notpar rpar
        opentag     = ws? lpar word rpar ws?
        closetag    = lpar slash word rpar ws?

        lpar        = "<"
        rpar        = ">"
        notpar      = ~"[^<>]+"
        slash       = "/"

        word        = ~"\w+"
        ws          = ~"\s+"
        """
    )

    def generic_visit(self, node, visited_children):
        return visited_children or node

    def visit_opentag(self, node, visited_children):
        ws, lpar, word, *_ = visited_children
        return word.text

    def visit_closetag(self, node, visited_children):
        lpar, slash, word, *_ = visited_children
        return word.text

    def visit_notpar(self, node, visited_children):
        return node.text

    def visit_item(self, node, visited_children):
        if len(visited_children[0]) == 3:
            # first branch
            opentag, content, *_= visited_children[0]
            return (opentag, content)
        else:
            return visited_children[0]

    def visit_expr(self, node, visited_children):
        tag, lst, *_ = visited_children
        return (tag, lst)

    def visit_program(self, node, visited_children):
        root, content = visited_children
        return self.__makeDict__(content)

    def __makeDict__(self, struct, level = 0):
        d = {}
        for item in struct:
            key, value = item
            if isinstance(value, list):
                value = self.__makeDict__(value)
            d[key] = value
            d["state"] = 0
        return d

visitor = XMLVisitor()
output = visitor.parse(xml)

print(output)

您现在有什么功能?所以您希望代码能够遍历嵌套字典的所有级别,将
state
添加到每个没有嵌套字典的字典中?这其中的哪一部分你遇到了麻烦?@ScottHunter我在将状态添加到字典中时遇到了麻烦。我已经用我所拥有的更新了这个问题。
state
是否可能已经在某些地方定义了?如果是这样的话,应该怎么做?您的代码中哪一部分甚至试图添加
状态
?xmltodict可以接受文件名作为参数吗?@LauraSmith我认为这是不可能的。如果回答了您的问题,请随意接受此答案。我发现有额外的“状态”与花园节点关联。请确认一下。@LauraSmith更改了它。@LauraSmith,我使用了您发布的确切的输入xml,效果很好,谢谢。我刚刚也测试了一下,效果很好。:)@LauraSmith,这是greatImo最好的方法是变得更加复杂并编写自己的解析器(参见我的答案;-))谢谢你的方法。我真的很感激。:)@劳拉史密斯:不客气。不过,最好选择一个更容易理解的答案。
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode

xml = """
<?xml version="1.0" encoding="UTF-8" ?>
    <Garden>
        <id>97</id>
        <Flowers>
            <id>98</id>
            <Type>
                <id>99</id>
                <Level>
                    <id>100</id>                    
                </Level>
            </Type>
        </Flowers>
    </Garden>
"""

class XMLVisitor(NodeVisitor):
    grammar = Grammar(
        r"""
        program     = root expr+
        expr        = opentag list closetag
        item        = (opentag notpar closetag) / expr
        list        = item+

        root        = ws? lpar "?xml" notpar rpar
        opentag     = ws? lpar word rpar ws?
        closetag    = lpar slash word rpar ws?

        lpar        = "<"
        rpar        = ">"
        notpar      = ~"[^<>]+"
        slash       = "/"

        word        = ~"\w+"
        ws          = ~"\s+"
        """
    )

    def generic_visit(self, node, visited_children):
        return visited_children or node

    def visit_opentag(self, node, visited_children):
        ws, lpar, word, *_ = visited_children
        return word.text

    def visit_closetag(self, node, visited_children):
        lpar, slash, word, *_ = visited_children
        return word.text

    def visit_notpar(self, node, visited_children):
        return node.text

    def visit_item(self, node, visited_children):
        if len(visited_children[0]) == 3:
            # first branch
            opentag, content, *_= visited_children[0]
            return (opentag, content)
        else:
            return visited_children[0]

    def visit_expr(self, node, visited_children):
        tag, lst, *_ = visited_children
        return (tag, lst)

    def visit_program(self, node, visited_children):
        root, content = visited_children
        return self.__makeDict__(content)

    def __makeDict__(self, struct, level = 0):
        d = {}
        for item in struct:
            key, value = item
            if isinstance(value, list):
                value = self.__makeDict__(value)
            d[key] = value
            d["state"] = 0
        return d

visitor = XMLVisitor()
output = visitor.parse(xml)

print(output)
{'Garden': {'id': '97', 'state': 0, 'Flowers': {'id': '98', 'state': 0, 'Type': {'id': '99', 'state': 0, 'Level': {'id': '100', 'state': 0}}}}, 'state': 0}