Python 合并xml.etree.ElementTree中的元素

Python 合并xml.etree.ElementTree中的元素,python,xml,elementtree,Python,Xml,Elementtree,我有一个巨大的测试数据,如下所示(是的,我无法控制这些数据)。每一行实际上是6个部分,我需要根据这些数据生成一个XML Nav;Basic;Dest;Smoke;No;Yes; Nav;Dest;Recent;Regg;No;Yes; Nav;Dest;Favourites;Regg;No;Yes; ... Nav;Dest using on board;By POI;Smoke;No;Yes; Nav;Dest using on board;Other;Regg;No;Yes; 每行的前3个

我有一个巨大的测试数据,如下所示(是的,我无法控制这些数据)。每一行实际上是6个部分,我需要根据这些数据生成一个XML

Nav;Basic;Dest;Smoke;No;Yes;
Nav;Dest;Recent;Regg;No;Yes;
Nav;Dest;Favourites;Regg;No;Yes;
...
Nav;Dest using on board;By POI;Smoke;No;Yes;
Nav;Dest using on board;Other;Regg;No;Yes;
每行的前3个元素表示“
测试套件”-XML元素
,最后3个元素应创建一个
“测试用例”-XML元素

我已使用以下代码成功地将其转换为XML:

# testsuite (root)
testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")

def _create_testcase_tag(elem):
    global testsuite

    level1, level2, level3, elem4, elem5, elem6 = elem

    # -- testsuite (level1)
    testsuite_level1 = ET.SubElement(testsuite, "testsuite")
    testsuite_level1.set("name", level1)

    # -- testsuite (level2)
    testsuite_level2 = ET.SubElement(testsuite_level1, "testsuite")
    testsuite_level2.set("name", level2)

    # -- testsuite (level3)
    testsuite_level2 = ET.SubElement(testsuite_level2, "testsuite")
    testsuite_level2.set("name", level3)

    # -- testcase
    testcase = ET.SubElement(testsuite_level2, "testcase")
    testcase.set("name", "TBD")
    summary = ET.SubElement(testcase, "summary")
    summary.text = "Test Type= %s, Automated= %s, Available=%s" %(elem4, elem5, elem6)

with open(input_file) as in_file:
    for line_number, a_line in enumerate(in_file):
        try:
            parameters = a_line.split(';')
            if len(parameters) >= 6:
                level1 = parameters[0].strip()
                level2 = parameters[1].strip()
                level3 = parameters[2].strip()
                elem4 = parameters[3].strip()
                elem5 = parameters[4].strip()
                elem6 = parameters[5].strip()

                lines_as_list.append((level1, level2, level3, elem4, elem5, elem6))
        except ValueError:
            pass

lines_as_list.sort()
for elem in lines_as_list:
    _create_testcase_tag(elem)

output_xml = ET.ElementTree(testsuite)
ET.ElementTree.write(output_xml, output_file, xml_declaration=True, encoding="UTF-8")
上面的代码生成如下所示的XML:

<testsuite name="Tests">
    <testsuite name="Nav">
        <testsuite name="Basic navigation">
            <testsuite name="Set destination">
                <testcase name="TBD">
                    <summary>Test Type= Smoke test Automated= No, Available=Yes</summary>
                </testcase>
            </testsuite>
        </testsuite>
    </testsuite>

    <testsuite name="Nav">
        <testsuite name="Set destination">
            <testsuite name="Recent">
                <testcase name="TBD">
                    <summary>
                    Test Type= Reggression test Automated= No, Available=Yes
                    </summary>
                </testcase>
            </testsuite>
        </testsuite>
    </testsuite>
</testsuite>
...
我希望您能理解我的意思,但是level1、level2和level3应该是唯一的,里面有测试用例

我该怎么做?请不要建议使用任何外部库!我无法在客户站点中安装新库
xml.etree.ElementTree
是我的全部


谢谢

在前三个字段中对数据进行排序,然后在添加到xml之前使用
itertools.groupby()
以您想要的方式组织数据这假设您的示例中所示的嵌套级别

from operator import itemgetter
from itertools import groupby

names = itemgetter(slice(3))
test_items = itemgetter(slice(3,None))
level1 = itemgetter(0)
level2 = itemgetter(1)
level3 = itemgetter(2)


s = """Nav;Basic;Dest;Smoke;No;Yes;
Nav;Dest;Recent;Regg;No;Yes;
Nav;Dest;Favourites;Regg;No;Yes;
Nav;Dest using on board;By POI;Smoke;No;Yes;
Nav;Dest;Recent;Regg;Yes;Yes;
Nav;Dest using on board;Other;Regg;No;Yes;
Nav;Basic;Dest;Smoke;Yes;Yes;
Nav;Basic;Dest;Smoke;Yes;No;
"""
data = list()
for line in s.splitlines():
    data.append(line.rstrip(';').split(';'))
data.sort(key = names)
##for thing in data:
##    print thing

testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")
for key1, group1 in groupby(data, level1):
    print '***', key1
    testsuite_level1 = ET.SubElement(testsuite, "testsuite")
    testsuite_level1.set("name", key1)
    for key2, group2 in groupby(group1, level2):
        print '******', key2
        testsuite_level2 = ET.SubElement(testsuite_level1, "testsuite")
        testsuite_level2.set("name", key2)
        for key3, group3 in groupby(group2, level3):
            print '*********', key3
            testsuite_level3 = ET.SubElement(testsuite_level2, "testsuite")
            testsuite_level3.set("name", key3)
            testcase = ET.SubElement(testsuite_level3, "testcase")
            testcase.set("name", "TBD")
            for element in group3:
                print '         ', test_items(element)
                summary = ET.SubElement(testcase, "summary")
                summary.text = "Test Type= {}, Automated= {}, Available={}".format(*test_items(element))

>>>
*** Nav
****** Basic
********* Dest
          ['Smoke', 'No', 'Yes']
          ['Smoke', 'Yes', 'Yes']
          ['Smoke', 'Yes', 'No']
****** Dest
********* Favourites
          ['Regg', 'No', 'Yes']
********* Recent
          ['Regg', 'No', 'Yes']
          ['Regg', 'Yes', 'Yes']
****** Dest using on board
********* By POI
          ['Smoke', 'No', 'Yes']
********* Other
          ['Regg', 'No', 'Yes']
>>>
作为对“正确的XML解决方案”评论的回应,我想到了这一点——也许这就是您的想法。我必须创建唯一的标记,而不是每个标记都是
testsuite
。这具有动态创建树的优点:


为什么我们不建议使用外部库?看起来您已经在使用
lxml
,这已经是一个非常繁重的外部库。我认为您需要在定义之上创建导航和基本导航。我的意思是1级和2级仅创建一次,其余保持不变。@SlaterTyranus,请参阅编辑。我使用xml.etree.ElementTreedoes,第一行除外,所有行都是Nav;目的地;像这样,我想这是假设整个数据集都可以读入内存进行排序。这看起来很有趣,我回家后会试试。谢谢你的回答,尽管我一直在寻找一个关于如何创建这些数据的正确XML解决方案。我将接受这个答案,因为itemgetter/groupby对我来说是新的。只需根据需要添加
ET.SubElement()
语句,签出编辑。将我的变量名与您的变量名混合可能并不理想,但您可以从中吸取教训。
from operator import itemgetter
from itertools import groupby

names = itemgetter(slice(3))
test_items = itemgetter(slice(3,None))
level1 = itemgetter(0)
level2 = itemgetter(1)
level3 = itemgetter(2)


s = """Nav;Basic;Dest;Smoke;No;Yes;
Nav;Dest;Recent;Regg;No;Yes;
Nav;Dest;Favourites;Regg;No;Yes;
Nav;Dest using on board;By POI;Smoke;No;Yes;
Nav;Dest;Recent;Regg;Yes;Yes;
Nav;Dest using on board;Other;Regg;No;Yes;
Nav;Basic;Dest;Smoke;Yes;Yes;
Nav;Basic;Dest;Smoke;Yes;No;
"""
data = list()
for line in s.splitlines():
    data.append(line.rstrip(';').split(';'))
data.sort(key = names)
##for thing in data:
##    print thing

testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")
for key1, group1 in groupby(data, level1):
    print '***', key1
    testsuite_level1 = ET.SubElement(testsuite, "testsuite")
    testsuite_level1.set("name", key1)
    for key2, group2 in groupby(group1, level2):
        print '******', key2
        testsuite_level2 = ET.SubElement(testsuite_level1, "testsuite")
        testsuite_level2.set("name", key2)
        for key3, group3 in groupby(group2, level3):
            print '*********', key3
            testsuite_level3 = ET.SubElement(testsuite_level2, "testsuite")
            testsuite_level3.set("name", key3)
            testcase = ET.SubElement(testsuite_level3, "testcase")
            testcase.set("name", "TBD")
            for element in group3:
                print '         ', test_items(element)
                summary = ET.SubElement(testcase, "summary")
                summary.text = "Test Type= {}, Automated= {}, Available={}".format(*test_items(element))

>>>
*** Nav
****** Basic
********* Dest
          ['Smoke', 'No', 'Yes']
          ['Smoke', 'Yes', 'Yes']
          ['Smoke', 'Yes', 'No']
****** Dest
********* Favourites
          ['Regg', 'No', 'Yes']
********* Recent
          ['Regg', 'No', 'Yes']
          ['Regg', 'Yes', 'Yes']
****** Dest using on board
********* By POI
          ['Smoke', 'No', 'Yes']
********* Other
          ['Regg', 'No', 'Yes']
>>>
def _create_testcase_tag(testsuite, elem):
    """Add elem to testsuite.

    testsuite --> xml.etree.ElementTree
    elem --> list

    return xml.etree.ElementTree
    """

    level1, level2, level3, elem4, elem5, elem6 = elem
    level1 = level1.replace(' ','_')
    level2 = level2.replace(' ','_')
    level3 = level3.replace(' ','_')

    # -- testsuite (level1)
    testsuite_level1 = testsuite.find(level1)
    if not testsuite_level1:
        testsuite_level1 = ET.SubElement(testsuite, level1)

    # -- testsuite (level2)
    testsuite_level2 = testsuite_level1.find(level2)
    if not testsuite_level2:
        testsuite_level2 = ET.SubElement(testsuite_level1, level2)

    # -- testsuite (level3)
    testsuite_level3 = testsuite_level2.find(level3)
    if not testsuite_level3:
        testsuite_level3 = ET.SubElement(testsuite_level2, level3)

    # -- testcase
    testcase = ET.SubElement(testsuite_level3, "testcase")
    testcase.set("name", "TBD")
    summary = ET.SubElement(testcase, "summary")
    summary.text = "Test Type= {}, Automated= {}, Available={}".format(elem4, elem5, elem6)

    return testsuite

# testsuite (root)
testsuite = ET.Element('testsuite')
testsuite.set("name", "Tests")

with open(input_file) as in_file:
    for line in in_file:
        line = line.strip().rstrip(';').split(';')
        testsuite = _create_testcase_tag(testsuite, line)