Python 3.x xml到多个数据帧

Python 3.x xml到多个数据帧,python-3.x,Python 3.x,我想从XML中提取数据并将其转换为多个数据帧, 我试着用元素树xml导入,只是打印出标签和文本(仅2列),我不知道如何将其拆分为多个数据帧 <?xml version="1.0" encoding="ISO-8859-1"?> <spec:zzz> <xxx> <class> <table_name> <attributes>

我想从XML中提取数据并将其转换为多个数据帧, 我试着用元素树xml导入,只是打印出标签和文本(仅2列),我不知道如何将其拆分为多个数据帧

<?xml version="1.0" encoding="ISO-8859-1"?>
<spec:zzz>
<xxx>
    <class>
        <table_name>
            <attributes>
                <aaa>0</aaa>
                <bbb>1</bbb>
                <ccc>
                    <element>
                        <ccc1>0</ccc1>
                        <ccc2>0</ccc2>
                        <ccc3>3</ccc3>
                    </element>
                </ccc>
            </attributes>
        </table_name>
        <table_name>
            <attributes>
                <aaa>0</aaa>
                <bbb>0</bbb>
                <ccc>
                    <element>
                        <ccc1>0</ccc1>
                        <ccc2>0</ccc2>
                        <ccc3>3</ccc3>
                    </element>
                </ccc>
                <ddd>4</ddd>
            </attributes>
        </table_name>
    </class>
    <class>
        <table_name1>
            <attributes>
            </attributes>
        </table_name1>
    </class>
    <class>
        <table_name2>
            <attributes>
                <eee>0</eee>
                <fff></fff>
                <ggg></ggg>
            </attributes>
        </table_name2>
    </class>
</xxx>
</spec:zzz>
试试这个

from simplified_scrapy import utils, SimplifiedDoc
xml = '''
your xml
'''
doc = SimplifiedDoc(xml)
tablenames = doc.selects('class').children

for tablename in tablenames:
    table = tablename.child.children
    rows = []
    for attributes in table:
        # rows.append([attr.text for attr in attributes])
        row = []
        for attr in attributes:
            if attr.child:
                row.append(','.join(attr.child.children.text))
            else:
                row.append(attr.text)
        rows.append(row)
    print (tablename[0].tag, rows)
结果:

table_name [['0', '1', '0,0,3'], ['0', '0', '0,0,3', '4']]
table_name1 [[]]
table_name2 [['0', '', '']]
处理多个文件

from simplified_scrapy import utils, SimplifiedDoc

xmlDir = 'test/'
xmls = utils.getSubFile(xmlDir)
for x in xmls:
    xml = utils.getFileContent(x)
    # xml = '''your xml'''
    doc = SimplifiedDoc(xml)
    tablenames = doc.selects('class').children

    for tablename in tablenames:
        table = tablename.child.children
        rows = []
        for attributes in table:
            # rows.append([attr.text for attr in attributes])
            row = []
            for attr in attributes:
                if attr.child:
                    row.append(','.join(attr.child.children.text))
                else:
                    row.append(attr.text)
            rows.append(row)
        print (tablename[0].tag, rows)

请阅读感谢@Dabinsou可以帮助我如何导入多个xml并生成这样的结果吗?感谢@Dabinsou可以帮助我如何获取尾部标记,例如:0
from simplified_scrapy import utils, SimplifiedDoc

xmlDir = 'test/'
xmls = utils.getSubFile(xmlDir)
for x in xmls:
    xml = utils.getFileContent(x)
    # xml = '''your xml'''
    doc = SimplifiedDoc(xml)
    tablenames = doc.selects('class').children

    for tablename in tablenames:
        table = tablename.child.children
        rows = []
        for attributes in table:
            # rows.append([attr.text for attr in attributes])
            row = []
            for attr in attributes:
                if attr.child:
                    row.append(','.join(attr.child.children.text))
                else:
                    row.append(attr.text)
            rows.append(row)
        print (tablename[0].tag, rows)