Python XML解析-获取XML文件名
我有一个工作的Python2.7脚本,它解析在同一目录中找到的所有XML文件,并将它们保存到CSV文件中Python XML解析-获取XML文件名,python,xml,csv,parsing,elementtree,Python,Xml,Csv,Parsing,Elementtree,我有一个工作的Python2.7脚本,它解析在同一目录中找到的所有XML文件,并将它们保存到CSV文件中 问题: 如何修改下面的代码以显示(在CSV输出文件中)解析的记录来自哪个XML文件 示例XML文件 Fruit1.xml 电流输出 期望输出 我曾尝试使用元组将XML文件名与解析的XML元素配对,但未能实现此功能。实现此功能: from itertools import izip import xml.etree.ElementTree as et import os import c
问题: 如何修改下面的代码以显示(在CSV输出文件中)解析的记录来自哪个XML文件
示例XML文件 Fruit1.xml 电流输出 期望输出
我曾尝试使用元组将XML文件名与解析的XML元素配对,但未能实现此功能。实现此功能:
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
fruit = []
color = []
filenames = []
directory = os.path.dirname(os.path.realpath(__file__))
for filename in os.listdir(directory):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
for x in e.findall('CASE'): filenames.append(filename)
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color","filenames"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color,filenames))
为什么代码的XML处理和文件名处理方面彼此有任何关系?整个“扎根第一元素并扩展它”的设计是。。。我想这是一个可行的选项,但是你可以让你的代码返回一个
{filename:element\u root}
格式的映射。更好的是,{filename:(水果,颜色)}
或者诸如此类的格式——这也减少了你的内存需求,因为你一次只需要存储一个解析过的DOM……见鬼,您可以实现一个迭代器,返回(文件名、水果、颜色)
元组。@CharlesDuffy感谢您的输入!我对Python还是相当陌生的,有一段时间我很困惑,但我找到了答案,并将发布答案
<CASES>
<CASE
Fruit="Banana"
Color="Yellow">
</CASE>
<CASE
Fruit="Orange"
Color="Orange">
</CASE>
</CASES>
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
def run(files):
first = None
for filename in os.listdir(files):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
if first is None: first = e
else: first.extend(e)
return first
fruit = []
color = []
e = run(os.path.dirname(os.path.realpath(__file__)))
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color))
+--------+--------+
| fruit | color |
+--------+--------+
| Apple | Red |
| Pear | Yellow |
| Banana | Yellow |
| Orange | Orange |
+--------+--------+
+--------+--------+------------+
| fruit | color | filename |
+--------+--------+------------+
| Apple | Red | fruit1.xml |
| Pear | Yellow | fruit1.xml |
| Banana | Yellow | fruit2.xml |
| Orange | Orange | fruit2.xml |
+--------+--------+------------+
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
fruit = []
color = []
filenames = []
directory = os.path.dirname(os.path.realpath(__file__))
for filename in os.listdir(directory):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
for x in e.findall('CASE'): filenames.append(filename)
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color","filenames"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color,filenames))