Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/typescript/8.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
如何在XML Python中迭代子元素的子元素?_Python_Python 3.x_Xml_Lxml - Fatal编程技术网

如何在XML Python中迭代子元素的子元素?

如何在XML Python中迭代子元素的子元素?,python,python-3.x,xml,lxml,Python,Python 3.x,Xml,Lxml,我的XML结构如下: <pages> <page> <textbox> <new_line> <text> </text> </new_line> </textbox> </page> </pages> 编辑: 示例字符串: """<?xml version="1.0" encoding="utf-8"?> <

我的XML结构如下:

<pages>
 <page>
  <textbox>
    <new_line>
     <text>
     </text>
    </new_line>
  </textbox>
 </page>
</pages>
编辑:

示例字符串:

"""<?xml version="1.0" encoding="utf-8"?>
<pages>
    <page>
        <textbox>
            <new_line>
                <text size="12.482">C</text>
                <text size="12.333">A</text>
                <text size="12.333">P</text>
                <text size="12.333">I</text>
                <text size="12.482">T</text>
                <text size="12.482">O</text>
                <text size="12.482">L</text>
                <text size="12.482">O</text>
                <text></text>
                <text size="12.482">I</text>
                <text size="12.482">I</text>
                <text size="12.482">I</text>
                <text></text>
          </new_line>
        </textbox>
    </page>
</pages>
"""
“”“
C
A.
P
我
T
O
L
O
我
我
我
"""
预期产出:

<pages>
    <page>
        <textbox>
            <new_line>
                <text size="12.482">C</text>
                <text size="12.333">API</text>
                <text size="12.482">TOLO</text>
                <text/>
                <text size="12.482">III</text>
                <text/>
            </new_line>
        </textbox>
    </page>
</pages>

C
美国石油学会
吐露港
三,

您可以定义一个递归函数来解决本例中的多层XML。 我为这个问题写了一个短代码

import sys
import xml.etree.ElementTree as etree

def add_sub_element(parent, tag, attrib, text='None'):
    new_feed = etree.SubElement(parent, tag, attrib)

    if(text):
        new_feed.text = text

    return new_feed


def my_tree_mapper(parent_tag, current, element):

    if(current.tag == 'new_line' and parent_tag == 'textbox'):

        current_size = -1
        current_text = ""

        for child in element:
            child_tag = child.tag
            child_attrib = child.attrib
            child_text = child.text

            if(child_tag == 'text' and 'size' in child_attrib):
                if(child_attrib['size'] == current_size):
                    # For 'text' children with the same size
                    # Append text until we got a different size
                    current_text = current_text + child_text
                else:
                    if(current_size != -1):
                        # Add sub element into the tree when we got a different size
                        sub_element = add_sub_element(
                            current, child_tag, {'size': current_size}, current_text)

                    current_size = child_attrib['size']
                    current_text = child_text

            else:
                if(current_size != -1):
                    # Or add sub element into the tree when we got different tag
                    sub_element = add_sub_element(
                        current, child_tag, {'size': current_size}, current_text)

                # No logic for different tag
                sub_element = add_sub_element(
                    current, child_tag, child_attrib, child_text)
                my_tree_mapper(current.tag, sub_element, child)

                current_size = -1
                current_text = ""
    else:
        # No logic if not satisfy the condition
        for child in element:
            child_tag = child.tag
            child_attrib = child.attrib
            child_text = child.text

            sub_element = add_sub_element(
                current, child_tag, child_attrib, child_text)
            my_tree_mapper(current.tag, sub_element, child)


the_input = """<?xml version="1.0" encoding="utf-8"?>
<pages>
    <page>
        <textbox>
            <new_line>
                <text size="12.482">C</text>
                <text size="12.333">A</text>
                <text size="12.333">P</text>
                <text size="12.333">I</text>
                <text size="12.482">T</text>
                <text size="12.482">O</text>
                <text size="12.482">L</text>
                <text size="12.482">O</text>
                <text></text>
                <text size="12.482">I</text>
                <text size="12.482">I</text>
                <text size="12.482">I</text>
                <text></text>
          </new_line>
        </textbox>
    </page>
</pages>
"""

tree = etree.ElementTree(etree.fromstring(the_input))
root = tree.getroot()
new_root = etree.Element(root.tag, root.attrib)

my_tree_mapper('', new_root, root)
print(etree.tostring(new_root))
导入系统 将xml.etree.ElementTree作为etree导入 def add_sub_元素(父元素、标记、属性、text='None'): new_feed=etree.SubElement(父元素、标记、属性) 若有(正文): new_feed.text=文本 返回新的订阅源 定义我的树映射器(父标记、当前、元素): 如果(current.tag=='new_line'和parent_tag=='textbox'): 当前大小=-1 当前_text=“” 对于元素中的子元素: child_tag=child.tag child_attrib=child.attrib child_text=child.text 如果(child_标签=='text'和'size'在child_属性中): 如果(子属性['size']==当前大小): #对于具有相同大小的“文本”子级 #追加文本,直到得到不同的大小 当前文本=当前文本+子文本 其他: 如果(当前_大小!=-1): #当我们得到不同的大小时,将子元素添加到树中 子元素=添加子元素( 当前,子_标记,{'size':当前_大小},当前_文本) 当前大小=子属性['size'] 当前文本=子文本 其他: 如果(当前_大小!=-1): #或者当我们得到不同的标签时,将子元素添加到树中 子元素=添加子元素( 当前,子_标记,{'size':当前_大小},当前_文本) #没有不同标签的逻辑 子元素=添加子元素( 当前、子标签、子属性、子文本) my_tree_映射器(current.tag、sub_元素、child) 当前大小=-1 当前_text=“” 其他: #如果不满足条件,则没有逻辑 对于元素中的子元素: child_tag=child.tag child_attrib=child.attrib child_text=child.text 子元素=添加子元素( 当前、子标签、子属性、子文本) my_tree_映射器(current.tag、sub_元素、child) _输入为“” C A. P 我 T O L O 我 我 我 """ tree=etree.ElementTree(etree.fromstring(_输入)) root=tree.getroot() new_root=etree.Element(root.tag,root.attrib) 我的树映射器(“”,新的根,根) 打印(etree.tostring(新根)) 希望这能帮助你,或者至少给你一些想法


(如果您想了解更多关于入侵函数的信息,以及更多关于XML etree方法的信息)

我的问题很糟糕,完成了,谢谢您预期的结果是什么我的问题中包含了它!看看更新的谢谢,这是非常有用的!但是有一件事——我得到的输出不是很好地打印出来,我如何解决这个问题呢?有几种方法可以美化XML。您可以使用lxml库获得更多选项,我发现了这篇文章()
import sys
import xml.etree.ElementTree as etree

def add_sub_element(parent, tag, attrib, text='None'):
    new_feed = etree.SubElement(parent, tag, attrib)

    if(text):
        new_feed.text = text

    return new_feed


def my_tree_mapper(parent_tag, current, element):

    if(current.tag == 'new_line' and parent_tag == 'textbox'):

        current_size = -1
        current_text = ""

        for child in element:
            child_tag = child.tag
            child_attrib = child.attrib
            child_text = child.text

            if(child_tag == 'text' and 'size' in child_attrib):
                if(child_attrib['size'] == current_size):
                    # For 'text' children with the same size
                    # Append text until we got a different size
                    current_text = current_text + child_text
                else:
                    if(current_size != -1):
                        # Add sub element into the tree when we got a different size
                        sub_element = add_sub_element(
                            current, child_tag, {'size': current_size}, current_text)

                    current_size = child_attrib['size']
                    current_text = child_text

            else:
                if(current_size != -1):
                    # Or add sub element into the tree when we got different tag
                    sub_element = add_sub_element(
                        current, child_tag, {'size': current_size}, current_text)

                # No logic for different tag
                sub_element = add_sub_element(
                    current, child_tag, child_attrib, child_text)
                my_tree_mapper(current.tag, sub_element, child)

                current_size = -1
                current_text = ""
    else:
        # No logic if not satisfy the condition
        for child in element:
            child_tag = child.tag
            child_attrib = child.attrib
            child_text = child.text

            sub_element = add_sub_element(
                current, child_tag, child_attrib, child_text)
            my_tree_mapper(current.tag, sub_element, child)


the_input = """<?xml version="1.0" encoding="utf-8"?>
<pages>
    <page>
        <textbox>
            <new_line>
                <text size="12.482">C</text>
                <text size="12.333">A</text>
                <text size="12.333">P</text>
                <text size="12.333">I</text>
                <text size="12.482">T</text>
                <text size="12.482">O</text>
                <text size="12.482">L</text>
                <text size="12.482">O</text>
                <text></text>
                <text size="12.482">I</text>
                <text size="12.482">I</text>
                <text size="12.482">I</text>
                <text></text>
          </new_line>
        </textbox>
    </page>
</pages>
"""

tree = etree.ElementTree(etree.fromstring(the_input))
root = tree.getroot()
new_root = etree.Element(root.tag, root.attrib)

my_tree_mapper('', new_root, root)
print(etree.tostring(new_root))