Python 如何修复XMLSyntaxError:标记中的数据过早结束<;一些Xml标记"&燃气轮机;
所以我看到了一些答案,但没有一个能解决这个问题。 我正在尝试使用lxml.etree.parse方法解析一个简单的文件,但是我不断得到错误 lxml.etree.XMLSyntaxError:tagbody第2行中的数据过早结束, 第2行第32列 我在这行中得到了错误:Python 如何修复XMLSyntaxError:标记中的数据过早结束<;一些Xml标记"&燃气轮机;,python,python-2.7,xml-parsing,lxml,Python,Python 2.7,Xml Parsing,Lxml,所以我看到了一些答案,但没有一个能解决这个问题。 我正在尝试使用lxml.etree.parse方法解析一个简单的文件,但是我不断得到错误 lxml.etree.XMLSyntaxError:tagbody第2行中的数据过早结束, 第2行第32列 我在这行中得到了错误: tree = etree.parse( infile, parser ) 以下是格式良好的简单xml文件: <?xml version="1.0" encoding="UTF-8"?> <note>
tree = etree.parse( infile, parser )
以下是格式良好的简单xml文件:
<?xml version="1.0" encoding="UTF-8"?>
<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Dont forget me this weekend</body>
</note>
FixNS类和辅助函数-为了完整性而添加:
from cStringIO import StringIO
import xml.sax.expatreader
from xml.sax import make_parser, SAXNotRecognizedException, SAXNotSupportedException
from xml.sax.handler import property_lexical_handler, feature_namespaces, feature_validation
from xml.sax.saxutils import XMLGenerator, quoteattr
from blzip import ReadBLZip
class FixNS(XMLGenerator):
def __init__(self):
XMLGenerator.__init__(self)
def fixNS(self, infilename):
XMLGenerator.__init__(self, StringIO())
self._out = StringIO()
self._result = StringIO()
self._inFileName = infilename
self._nsDeclPos = 0
self._wasBLZipped = False
self._inContent = file(self._inFileName, 'rb').read()
if self._inContent.startswith('BLZIP'):
self._inContent = ReadBLZip(self._inFileName)
self._wasBLZipped = True
self._knownNsPrefixes = set()
self._collectedNsPrefixes = dict()
self._isroot = True
self._in_entity = 0
self._in_cdata = 0
self._line = 0
self._column = 0
self._parser = make_parser(['xml.sax.expatreader'])
self._parser.setContentHandler(self)
self._parser.setProperty(property_lexical_handler, self)
try:
self._parser.setFeature(feature_namespaces, 0)
except (SAXNotRecognizedException, SAXNotSupportedException):
pass
try:
self._parser.setFeature(feature_validation, 0)
except (SAXNotRecognizedException, SAXNotSupportedException):
pass
self._parser.parse(StringIO(self._inContent))
def getResult(self):
return StringIO(self._result.getvalue())
我正在使用python 2.7和lxml-2.3。有关于如何修复此解析错误的帮助吗?更新:我发现在32位windows操作系统上解析xml的效果与预期的一样,该操作系统是最初用于编译应用程序的windows版本。仍然不确定为什么它不能在64位windows上解析。对我来说很奇怪
from cStringIO import StringIO
import xml.sax.expatreader
from xml.sax import make_parser, SAXNotRecognizedException, SAXNotSupportedException
from xml.sax.handler import property_lexical_handler, feature_namespaces, feature_validation
from xml.sax.saxutils import XMLGenerator, quoteattr
from blzip import ReadBLZip
class FixNS(XMLGenerator):
def __init__(self):
XMLGenerator.__init__(self)
def fixNS(self, infilename):
XMLGenerator.__init__(self, StringIO())
self._out = StringIO()
self._result = StringIO()
self._inFileName = infilename
self._nsDeclPos = 0
self._wasBLZipped = False
self._inContent = file(self._inFileName, 'rb').read()
if self._inContent.startswith('BLZIP'):
self._inContent = ReadBLZip(self._inFileName)
self._wasBLZipped = True
self._knownNsPrefixes = set()
self._collectedNsPrefixes = dict()
self._isroot = True
self._in_entity = 0
self._in_cdata = 0
self._line = 0
self._column = 0
self._parser = make_parser(['xml.sax.expatreader'])
self._parser.setContentHandler(self)
self._parser.setProperty(property_lexical_handler, self)
try:
self._parser.setFeature(feature_namespaces, 0)
except (SAXNotRecognizedException, SAXNotSupportedException):
pass
try:
self._parser.setFeature(feature_validation, 0)
except (SAXNotRecognizedException, SAXNotSupportedException):
pass
self._parser.parse(StringIO(self._inContent))
def getResult(self):
return StringIO(self._result.getvalue())