Python 创建lxml元素子类的困难
我试图创建元素类的一个子类。但我在开始时遇到了麻烦Python 创建lxml元素子类的困难,python,lxml,docx,python-docx,Python,Lxml,Docx,Python Docx,我试图创建元素类的一个子类。但我在开始时遇到了麻烦 from lxml import etree try: import docx except ImportError: from docx import docx class File(etree.ElementBase): def _init(self): etree.ElementBase._init(self) self.body = self.append(docx.makeel
from lxml import etree
try:
import docx
except ImportError:
from docx import docx
class File(etree.ElementBase):
def _init(self):
etree.ElementBase._init(self)
self.body = self.append(docx.makeelement('body'))
f = File()
relationships = docx.relationshiplist()
title = 'File'
subject = 'A very special File'
creator = 'Me'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = docx.coreproperties(title=title, subject=subject, creator=creator,
keywords=keywords)
appprops = docx.appproperties()
contenttypes = docx.contenttypes()
websettings = docx.websettings()
wordrelationships = docx.wordrelationships(relationships)
docx.savedocx(f, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'file.docx')
当我尝试打开从该代码输出的文档时,我的Word版本(带兼容包的2003)出现以下错误:“此文件是由Word 2007的早期beta版创建的,无法在此版本中打开。”当我使用使用docx.newdocument()创建的其他元素替换文件对象时,这份文件很好。有什么想法/建议吗?我真的不明白为什么要使用名为File的单独类 正如Michael0x2a所说,您没有放置文档标记,因此它不会工作(我认为Word 2007也无法读取您的文件) 但下面是正确的代码:
from lxml import etree
try:
import docx
except ImportError:
from docx import docx
class File(object):
def makeelement(tagname, tagtext=None, nsprefix='w', attributes=None,
attrnsprefix=None):
'''Create an element & return it'''
# Deal with list of nsprefix by making namespacemap
namespacemap = None
if isinstance(nsprefix, list):
namespacemap = {}
for prefix in nsprefix:
namespacemap[prefix] = nsprefixes[prefix]
# FIXME: rest of code below expects a single prefix
nsprefix = nsprefix[0]
if nsprefix:
namespace = '{'+nsprefixes[nsprefix]+'}'
else:
# For when namespace = None
namespace = ''
newelement = etree.Element(namespace+tagname, nsmap=namespacemap)
# Add attributes with namespaces
if attributes:
# If they haven't bothered setting attribute namespace, use an empty
# string (equivalent of no namespace)
if not attrnsprefix:
# Quick hack: it seems every element that has a 'w' nsprefix for
# its tag uses the same prefix for it's attributes
if nsprefix == 'w':
attributenamespace = namespace
else:
attributenamespace = ''
else:
attributenamespace = '{'+nsprefixes[attrnsprefix]+'}'
for tagattribute in attributes:
newelement.set(attributenamespace+tagattribute,
attributes[tagattribute])
if tagtext:
newelement.text = tagtext
return newelement
def __init__(self):
super(File,self).__init__()
self.document = self.makeelement('document')
self.document.append(self.makeelement('body'))
f = File()
relationships = docx.relationshiplist()
title = 'File'
subject = 'A very special File'
creator = 'Me'
keywords = ['python', 'Office Open XML', 'Word']
coreprops = docx.coreproperties(title=title, subject=subject, creator=creator,
keywords=keywords)
appprops = docx.appproperties()
contenttypes = docx.contenttypes()
websettings = docx.websettings()
wordrelationships = docx.wordrelationships(relationships)
docx.savedocx(f.document, coreprops, appprops, contenttypes, websettings,
wordrelationships, 'file.docx')
你的意思是使用
\uuuu init\uuuu
而不是\u init
作为构造函数吗?此外,您还可以尝试检查docx.newdocument()以查看输出应该是什么样子。看起来您缺少了一个document
标记,虽然这只是我的猜测。谢谢,但问题似乎来自元素类不喜欢init,因此简单的构造函数方法似乎不起作用。为什么要扩展Etree.ElementBase?