使用Python将csv转换为xml_Python_Xml_Csv

使用Python将csv转换为xml

python xml csv

使用Python将csv转换为xml,python,xml,csv,Python,Xml,Csv,我有一个类似以下格式的csv文件： =============================================================== #Type 1 Definition #============================================================================ #TYPE, <name> #Some tag for type---------------------------------

我有一个类似以下格式的csv文件：

===============================================================
#Type 1 Definition
#============================================================================
#TYPE, <name>
#Some tag for type------------------------------------------------------
#TYPESomeTag, <id>, <name>, <param>
#Another tag for type----------------------------------------------
#TYPEAnothertag, <param_1>, <param_2>, <param_3>
TYPE, Name_1
TYPESomeTag, 1, 2, 3
TYPESomeTag, 4, 2, 5
TYPEAnothertag, a, b, c

TYPE, Name_2
TYPESomeTag, 1, 2, 3
TYPESomeTag, 4, 2, 5
TYPEAnothertag, a, b, c

#===============================================================================
#Type 2 Definition
#===============================================================================
#TYPE2, <name>
#Some tag for type------------------------------------------------------
#TYPE2SomeTag, <id>, <name>, <param>
#Another tag for type----------------------------------------------
#TYPE2Anothertag, <param_1>, <param_2>, <param_3>
TYPE2, Name_1
TYPE2SomeTag, 1, 2, 3
TYPE2SomeTag, 4, 2, 5
TYPE2Anothertag, a, b, c

TYPE2, Name_2
TYPE2SomeTag, 1, 2, 3
TYPE2SomeTag, 4, 2, 5
TYPE2Anothertag, a, b, c

and so on...

===============================================================
#第1类定义
#============================================================================
#类型，
#类型的一些标记------------------------------------------------------
#TYPESomeTag，
#类型的另一个标记----------------------------------------------
#TYPEAnothertag，
类型、名称\u 1
TYPESomeTag，1,2,3
TYPESomeTag，4,2,5
类型另一个标签，a、b、c
类型、名称\u 2
TYPESomeTag，1,2,3
TYPESomeTag，4,2,5
类型另一个标签，a、b、c
#===============================================================================
#第2类定义
#===============================================================================
#类型2，
#类型的一些标记------------------------------------------------------
#TYPE2SomeTag，
#类型的另一个标记----------------------------------------------
#第2类其他标签，
类型2，名称\u 1
第2种方法，1、2、3
第2种方法标记，4、2、5
类型2其他标记，a，b，c
类型2，名称2
第2种方法，1、2、3
第2种方法标记，4、2、5
类型2其他标记，a，b，c
等等

我的目标是将上面的csv转换成xml格式，我正在使用Python实现同样的目标。下面是我是如何开始实施的

for row in csv.reader(open(csvFile)):       
    if(row): #check for blank lines
       if row[0] == 'TYPE':
           xmlData.write('      ' + '<TYPE'+ row[1] + '>'+"\n")
       elif row[0] == 'TYPESomeTag'
            xmlData.write('      ' + '<TYPESomeTag'+ row[2] + '>'+"\n")
       elif
           #write some more tags
       else
         #something else
xmlData.close()

csv.reader（打开（csvFile））中的行的

：
如果（行）：#检查是否有空行
如果行[0]=“类型”：
xmlData.write（“”+“”+“\n”）
elif行[0]=“TYPESomeTag”
xmlData.write（“”+“”+“\n”）
否则如果
#写更多的标签
其他的
#别的
xmlData.close（）

我所采用的这种方法非常糟糕，因为它不容易扩展。我将每行的第一列与字符串进行比较。现在，如果有另一组类型定义，比如TYPE2，问题就会出现。然后我必须写另一组if..else语句，我认为这并不是有效的方法

有人能告诉我如何以更好的方式将上述csv转换为xml吗

编辑：

这就是我的目标xml：

<tags>
 <TYPE Name_1>
   <TYPESomeTag>
    <id>1</id>
    <name>2</name>
    <param>3</param>
   </TYPESomeTag>
  <TYPESomeTag>
    <id>4</id>
    <name>2</name>
    <param>5</param>
   </TYPESomeTag>
  <TYPEAnothertag>
    <param_1>a</param_1>
    <param_2>b</param_2>
    <param_3>c</param_3>
   </TYPEAnothertag>
 </TYPE>
 <TYPE2 Name_2>
   <TYPE2SomeTag>
    <id>1</id>
    <name>2</name>
    <param>3</param>
   </TYPE2SomeTag>
  <TYPE2SomeTag>
    <id>4</id>
    <name>2</name>
    <param>5</param>
   </TYPE2SomeTag>
  <TYPE2Anothertag>
    <param_1>a</param_1>
    <param_2>b</param_2>
    <param_3>c</param_3>
   </TYPE2Anothertag>
 </TYPE2>
</tags>


1.
2.
3.
4.
2.
5.
A.
B
C
1.
2.
3.
4.
2.
5.
A.
B
C

您需要将注释行中的参数存储到字典中以进行处理

#TYPESomeTag、id、name、param

进入

tags={“TYPESomeTag”：[“id”、“name”、“param”]}

通过这种方式，您可以解析每个注释行，而无需手工编码参数列表。下面是处理给定csv的示例代码

import csv

csvFile = 'sample.csv'

nextLineIsTagName = False
tags = dict()
tag = None
tagOpened = False

for row in csv.reader(open(csvFile), skipinitialspace=True):
    if not row: #skipping empty lines
        continue

    if row[0][0] == '#': #processing types definition within csv comment block
        if tagOpened: #there is opened tag so we need to close it
            print "</" + tag + ">"
            tags = dict()
            tag = None
            tagOpened = False

        if (len(row) == 1) and 'Definition' in row[0]:
            nextLineIsTagName = True
            continue

        if nextLineIsTagName and len(row) == 2:
            tag = row[0][1:]
            nextLineIsTagName = False
            continue

        if not nextLineIsTagName and len(row) > 1:
            tags[row[0][1:]] = row[1:] #adding 'parameters' to 'tag' dict entry

    else: #processing csv data
        if len(row) < 2:
            continue

        if row[0] == tag: #we need to start new TYPE element
            if tagOpened: #close previous tag before open new one
                print "</" + tag + ">"

            print "<" + tag, row[1] + ">"
            tagOpened = True
        else: #we need to add parameters to open TYPE element
            print "\t<"  + row[0] + ">"
            for i in range(1, len(row)): #iterating over parameters
                print "\t\t<" + tags[row[0]][i-1] + ">" + row[i] + "</" + tags[row[0]][i-1] + ">"
            print "\t</" + row[0] + ">"

if tagOpened: #closing last tag at end of file
    print "</"+ tag + ">"

导入csv
csvFile='sample.csv'
nextLineIsTagName=False
tags=dict（）
标记=无
tagOpened=False
对于csv.reader中的行（打开（csvFile），skipinitialspace=True）：
如果不是行：#跳过空行
持续
如果行[0][0]='#'：#csv注释块中的处理类型定义
if tagOpened:#有一个打开的标签，所以我们需要关闭它
打印“”
tags=dict（）
标记=无
tagOpened=False
如果（len（row）==1）和第[0]行中的“定义”：
nextLineIsTagName=True
持续
如果nextLineIsTagName和len（行）=2：
标记=行[0][1:]
nextLineIsTagName=False
持续
如果不是nextLineIsTagName和len（世界其他地区）>1：
标记[行[0][1:][]=行[1:]#将“参数”添加到“标记”dict条目
其他：#处理csv数据
如果len（世界其他地区）<2：
持续
如果行[0]==标记：#我们需要启动新类型元素
如果标签打开：#在打开新标签之前关闭上一个标签
打印“”
打印“”
tagOpened=True
else:#我们需要向open TYPE元素添加参数
打印“\t”
对于范围（1，len（row））中的i:#迭代参数
打印“\t\t”+行[i]+”“
打印“\t”
如果标记已打开：#关闭文件末尾的最后一个标记
打印“”

您需要将注释行中的参数存储到字典中以进行处理

#TYPESomeTag、id、name、param

进入

tags={“TYPESomeTag”：[“id”、“name”、“param”]}

通过这种方式，您可以解析每个注释行，而无需手工编码参数列表。下面是处理给定csv的示例代码

import csv

csvFile = 'sample.csv'

nextLineIsTagName = False
tags = dict()
tag = None
tagOpened = False

for row in csv.reader(open(csvFile), skipinitialspace=True):
    if not row: #skipping empty lines
        continue

    if row[0][0] == '#': #processing types definition within csv comment block
        if tagOpened: #there is opened tag so we need to close it
            print "</" + tag + ">"
            tags = dict()
            tag = None
            tagOpened = False

        if (len(row) == 1) and 'Definition' in row[0]:
            nextLineIsTagName = True
            continue

        if nextLineIsTagName and len(row) == 2:
            tag = row[0][1:]
            nextLineIsTagName = False
            continue

        if not nextLineIsTagName and len(row) > 1:
            tags[row[0][1:]] = row[1:] #adding 'parameters' to 'tag' dict entry

    else: #processing csv data
        if len(row) < 2:
            continue

        if row[0] == tag: #we need to start new TYPE element
            if tagOpened: #close previous tag before open new one
                print "</" + tag + ">"

            print "<" + tag, row[1] + ">"
            tagOpened = True
        else: #we need to add parameters to open TYPE element
            print "\t<"  + row[0] + ">"
            for i in range(1, len(row)): #iterating over parameters
                print "\t\t<" + tags[row[0]][i-1] + ">" + row[i] + "</" + tags[row[0]][i-1] + ">"
            print "\t</" + row[0] + ">"

if tagOpened: #closing last tag at end of file
    print "</"+ tag + ">"

导入csv
csvFile='sample.csv'
nextLineIsTagName=False
tags=dict（）
标记=无
tagOpened=False
对于csv.reader中的行（打开（csvFile），skipinitialspace=True）：
如果不是行：#跳过空行
持续
如果行[0][0]='#'：#csv注释块中的处理类型定义
if tagOpened:#有一个打开的标签，所以我们需要关闭它
打印“”
tags=dict（）
标记=无
tagOpened=False
如果（len（row）==1）和第[0]行中的“定义”：
nextLineIsTagName=True
持续
如果nextLineIsTagName和len（行）=2：
标记=行[0][1:]
nextLineIsTagName=False
持续
如果不是nextLineIsTagName和len（世界其他地区）>1：
标记[行[0][1:][]=行[1:]#将“参数”添加到“标记”dict条目
其他：#处理csv数据
如果len（世界其他地区）<2：
持续
如果行[0]==标记：#我们需要启动新类型元素
如果标签打开：#在打开新标签之前关闭上一个标签
打印“”
打印“”
tagOpened=True
else:#我们需要向open TYPE元素添加参数
打印“\t”
对于范围（1，len（row））中的i:#迭代参数
打印“\t\t”+行[i]+“”
打印“\t”
如果标记已打开：#关闭文件末尾的最后一个标记
打印“”

考虑使用xml模块构建xml文档，而不是连接元素的字符串表示形式。这样，您可以通过以下方式读取csv行：

xslt_str = '''
            <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
            <xsl:output version="1.0" encoding="UTF-8" indent="yes" />
            <xsl:strip-space elements="*"/>

              <!-- Identity Transform -->
              <xsl:template match="@*|node()">
                <xsl:copy>
                  <xsl:apply-templates select="@*|node()"/>
                </xsl:copy>
              </xsl:template>

              <xsl:template match="TYPESomeTag|TYPE2SomeTag">
                <xsl:copy>
                  <id><xsl:value-of select="tag[1]"/></id>
                  <name><xsl:value-of select="tag[2]"/></name>
                  <param><xsl:value-of select="tag[3]"/></param>
                </xsl:copy>
              </xsl:template>

              <xsl:template match="TYPEAnothertag|TYPE2Anothertag">
                <xsl:copy>
                  <param_1><xsl:value-of select="tag[1]"/></param_1>
                  <param_2><xsl:value-of select="tag[2]"/></param_2>
                  <param_3><xsl:value-of select="tag[3]"/></param_3>
                </xsl:copy>
              </xsl:template>                    
            </xsl:transform>
'''    
# PARSE XSL STRING (CAN ALSO READ FROM FILE)
xslt = ET.fromstring(xslt_str)
# TRANSFORM SOURCE XML WITH XSLT
transform = ET.XSLT(xslt)
newdom = transform(root)    
print(str(newdom))

<?xml version="1.0"?>
<tags>
  <TYPE attr="Name_1">
    <TYPESomeTag>
      <id>1</id>
      <name>2</name>
      <param>3</param>
    </TYPESomeTag>
    <TYPESomeTag>
      <id>4</id>
      <name>2</name>
      <param>5</param>
    </TYPESomeTag>
    <TYPEAnothertag>
      <param_1>a</param_1>
      <param_2>b</param_2>
      <param_3>c</param_3>
    </TYPEAnothertag>
  </TYPE>
  <TYPE attr="Name_2">
    <TYPESomeTag>
      <id>1</id>
      <name>2</name>
      <param>3</param>
    </TYPESomeTag>
    <TYPESomeTag>
      <id>4</id>
      <name>2</name>
      <param>5</param>
    </TYPESomeTag>
    <TYPEAnothertag>
      <param_1>a</param_1>
      <param_2>b</param_2>
      <param_3>c</param_3>
    </TYPEAnothertag>
  </TYPE>
</tags>

import re
import csv
from xml.etree import ElementTree as ET
import xml.dom.minidom as minidom

class DefFilter:
    def __init__(self, fd, conf = None):
        if conf is None:self.conf = {}
        else: self.conf = conf
        self.fd = fd
        self.line = re.compile(r'#\s*(\w+)\s*((?:,\s*\<\w+\>)+)')
        self.tagname = re.compile(',\s*<(\w*)>((?:,\s*\<\w+\>)*)')
    def _parse_tags(self, line):
        l = []
        while True:
            m = self.tagname.match(line)
            #print('>', m.group(2), '<', sep='')
            l.append(m.group(1))
            if len(m.group(2)) == 0: return l
            line = m.group(2)
    def __iter__(self):
        return self
    def next(self):
        while True:
            line = next(self.fd).strip()
            if not line.startswith('#'): return line
            m = self.line.match(line)
            if m:
                self.conf[m.group(1)] = self._parse_tags(m.group(2))
    def __next__(self):
        return self.next()

class Parser:
    def __init__(self, conf = None):
        self.conf = conf
    def parse(self, fd):
        flt = DefFilter(fd, self.conf)
        rd = csv.reader(flt)
        root = ET.Element('tags')
        for row in rd:
            if len(row) ==2:
                name = 'name'
                tag = row[0].strip()
                try:
                    name = flt.conf[tag][0]
                except:
                    pass
                elt = ET.SubElement(root, tag, { name: row[1].strip() })
            elif len(row) > 2:
                tag = row[0].strip()
                x = ET.SubElement(elt, tag)
                tags = [ 'param_' + str(i+1) for i in range(len(row) - 1)]
                try:
                    tags = flt.conf[tag]
                except:
                    pass
                for i, val in enumerate(row[1:]):
                    y = ET.SubElement(x, tags[i])
                    y.text = val.strip()
        self.root = root
    def parsefile(self, filename):
        with open(filename) as fd:
            self.parse(fd)
    def prettyprint(self, fd, addindent = '  ', newl = '\n'):
        minidom.parseString(ET.tostring(p.root)).writexml(fd, newl = newl,
                                                          addindent=addindent)

with open('in.csv') as in, open('out.xml', 'w') as out:
    p = Parser()
    p.parse(in)
    p.prettyprint(out)