使用python元素树过滤xml/nessus扫描
Python初学者需要帮助筛选.xml文件。我一直在尝试xml.etree.ElementTree,但收效甚微 xml如下所示:使用python元素树过滤xml/nessus扫描,python,xml,nessus,Python,Xml,Nessus,Python初学者需要帮助筛选.xml文件。我一直在尝试xml.etree.ElementTree,但收效甚微 xml如下所示: <ClientData> <Report> <ReportHost> <ReportItem pluginID="11111"> Ipsum lorem etc leviosa! </ReportItem> </ReportHost>
<ClientData>
<Report>
<ReportHost>
<ReportItem pluginID="11111">
Ipsum lorem etc leviosa!
</ReportItem>
</ReportHost>
<ReportHost>
<ReportItem pluginID="22222">
Sed ut perspiciatis unde omnis iste
</ReportItem>
</ReportHost>
</Report>
</ClientData>
我真的建议使用lxml模块。在使用Python的xml模块时,没有对父元素的引用。我认为使用lxml会容易得多。这就是我最终开发的。我发现它在过滤大于600MB甚至更小的文件时存在内存问题。据我所知,有一些方法比解析整个xml更好地处理内存,但我没有时间进行测试
import lxml.etree as le
import os
from optparse import OptionParser, SUPPRESS_HELP
def removeVulns(filename, pluginlist):
output_file = open("temp.xml","wb")
with open(filename,'r') as f:
doc=le.parse(f)
for nessusID in open(pluginlist):
for elem in doc.xpath('//*[attribute::pluginID]'):
if elem.attrib['pluginID']==nessusID.strip('\n'):
parent=elem.getparent()
parent.remove(elem)
output_file.write(le.tostring(doc))
f.close()
output_file.close()
os.remove(filename)
os.rename('temp.xml', filename)
def main():
parser = OptionParser(usage='%prog -f <filename>',
version='%prog 1.0')
parser.add_option('-f',
dest='name',
type='string',
help='.nessus name')
(options, args) = parser.parse_args()
if not options.name:
parser.error('Pop, you forgot name!')
removeVulns(options.name, 'pluginlist.txt')
if __name__ == "__main__":
main()
将lxml.etree导入为le
导入操作系统
从optpasse导入OptionParser,取消显示帮助
def removeVulns(文件名,插件列表):
输出文件=打开(“temp.xml”、“wb”)
将open(filename,'r')作为f:
doc=le.parse(f)
对于打开的nessusID(插件列表):
对于doc.xpath('/*[attribute::pluginID]'中的元素):
如果elem.attrib['pluginID']==nessusID.strip('\n'):
parent=elem.getparent()
父项删除(elem)
输出_file.write(le.tostring(doc))
f、 关闭()
输出_文件。关闭()
删除(文件名)
重命名('temp.xml',文件名)
def main():
parser=OptionParser(用法=“%prog-f”,
版本=“%prog 1.0”)
parser.add_选项('-f',,
dest='name',
type='string',
帮助=“.nessus名称”)
(options,args)=parser.parse_args()
如果不是options.name:
parser.error('Pop,你忘了名字!')
removeVulns(options.name,'pluginlist.txt')
如果名称=“\uuuuu main\uuuuuuuu”:
main()
通过将逻辑移到XPath中,您可以删除if
检查,如下所示:for doc.XPath('/*[@pluginID=“{0}]”]'%nessusID.strip('\n'):
import lxml.etree as le
import os
from optparse import OptionParser, SUPPRESS_HELP
def removeVulns(filename, pluginlist):
output_file = open("temp.xml","wb")
with open(filename,'r') as f:
doc=le.parse(f)
for nessusID in open(pluginlist):
for elem in doc.xpath('//*[attribute::pluginID]'):
if elem.attrib['pluginID']==nessusID.strip('\n'):
parent=elem.getparent()
parent.remove(elem)
output_file.write(le.tostring(doc))
f.close()
output_file.close()
os.remove(filename)
os.rename('temp.xml', filename)
def main():
parser = OptionParser(usage='%prog -f <filename>',
version='%prog 1.0')
parser.add_option('-f',
dest='name',
type='string',
help='.nessus name')
(options, args) = parser.parse_args()
if not options.name:
parser.error('Pop, you forgot name!')
removeVulns(options.name, 'pluginlist.txt')
if __name__ == "__main__":
main()