Python使用XSLT转换XML

Python使用XSLT转换XML,python,xml,xslt,Python,Xml,Xslt,我是python新手,一直在尝试使用XSLT转换XML文件。 这是我提出的python脚本,generate_ioc.py import os import sys import time import shutil import logging import Queue import ftplib import subprocess import re import lxml.etree as ET try: report_path = os.path.join("/home",

我是python新手,一直在尝试使用XSLT转换XML文件。 这是我提出的python脚本,generate_ioc.py

import os
import sys
import time
import shutil
import logging
import Queue
import ftplib
import subprocess
import re
import lxml.etree as ET
try:
    report_path = os.path.join("/home",
                "user",
                "Desktop",
                "stix_to_openioc")
stix_report = os.path.join(report_path, "report.stix.xml")
print("Retrieve STIX report successful")

# Sanitize report (oxb, remove xlms)
with open (stix_report, "r") as stix_file:
    stix_xml = stix_file.read()
stix_xml = re.sub(u"xmlns='https://github.com/STIXProject/schemas/blob/master/stix_core.xsd' ", u"", stix_xml)
RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
             u'|' + \
             u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \
              (unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
               unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
               unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff))
stix_xml = re.sub(RE_XML_ILLEGAL, "?", stix_xml)

print("Sanitize STIX report successful")

# Save sanitized report to file
tree = ET.XML(stix_xml)
with open(stix_report, "w") as stix_file:
    stix_file.write(ET.tostring(tree))
print("Save STIX report successful")

# Get xsl file
    xslt_path = os.path.join("/home",
                "user",
                "Desktop",
                "stix_to_openioc",
                                "stix_to_openioc.xsl")
print("Retrieve XSL file successful")

#Perform xsl tranformation
#dom = ET.parse(stix_report)
#xslt = ET.parse(xslt_path)
#transform = ET.XSLT(xslt)
#newdom = transform(dom)
#xslt_xml = ET.tostring(newdom, pretty_print=True)
#print("XSL transformation successful")
#print(xslt_xml)
#I have tried this ^ but it resulted in the same error 

from lxml import etree
f_xsl = 'stix_to_openioc.xsl'
f_xml = 'report.stix.xml'
f_out = 'report.ioc.xml'

transform = etree.XSLT(etree.parse(f_xsl))
result = transform(etree.parse(f_xml))
result.write(f_out)

# Get new stix file
openioc_report = os.path.join(report_path,
                "report.openioc.xml")
print("Retrieve OpenIOC report successful")

# Save stix report
with open(openioc_report, "w") as openioc_file:
    openioc_file.write(xslt_xml)
print("Save OpenIOC report successful")

except OSError as e:
log.warning("Error accessing stix report (task=%d): %s", self.task.id, e)
错误代码:

/usr/bin/python2 /home/user/Desktop/IOC/generate_ioc.py
Retrieve STIX report successful
Sanitize STIX report successful
Save STIX report successful
Retrieve XSL file successful
Traceback (most recent call last):
File "/home/user/Desktop/IOC/generate_ioc.py", line 53, in <module>
transform = ET.XSLT(xslt)
File "xslt.pxi", line 403, in lxml.etree.XSLT.__init__ (src/lxml/lxml.etree.c:122894)
lxml.etree.XSLTParseError: xsltParseStylesheetProcess : document is not a stylesheet

Process finished with exit code 1
/usr/bin/python2/home/user/Desktop/IOC/generate\u IOC.py
检索STIX报告成功
消毒STIX报告成功
保存STIX报告成功
成功检索XSL文件
回溯(最近一次呼叫最后一次):
文件“/home/user/Desktop/IOC/generate_IOC.py”,第53行,在
transform=ET.XSLT(XSLT)
文件“xslt.pxi”,第403行,在lxml.etree.xslt.\uuuu init\uuuu中(src/lxml/lxml.etree.c:122894)
lxml.etree.XSLTParseError:xsltParseStylesheetProcess:文档不是样式表
进程已完成,退出代码为1
我想知道我出了什么问题导致了这一切。
注意:我对python和XML非常陌生,因此任何建议都将不胜感激,我愿意学习并花时间纠正此错误。注2:stix_to_openioc.xsl位于正确的目录中。

您正在对xml进行一些清理,并将其保存到路径字符串为
stix_report
的文件中:

with open(stix_report, "w") as stix_file:
    stix_file.write(ET.tostring(tree))
print("Save STIX report successful")
然后创建xslt文件的字符串路径:

xslt_path = os.path.join("/home",
            "user",
            "Desktop",
            "stix_to_openioc",
            "stix_to_openioc.xsl")
print("Retrieve XSL file successful")
但是,然后从新变量加载xml和xsl文件:

from lxml import etree
f_xsl = 'stix_to_openioc.xsl'
f_xml = 'report.stix.xml'
f_out = 'report.ioc.xml'

transform = etree.XSLT(etree.parse(f_xsl))
result = transform(etree.parse(f_xml))
result.write(f_out)
我不能保证它会工作(因为我不知道这些文件中有什么),但我认为这里的一个好的开始是更改此代码:

from lxml import etree
f_xsl = xslt_path
f_xml = stix_report
f_out = 'report.ioc.xml'

transform = etree.XSLT(etree.parse(f_xsl))
result = transform(etree.parse(f_xml))
result.write(f_out)