使用python元素树过滤xml/nessus扫描

使用python元素树过滤xml/nessus扫描,python,xml,nessus,Python,Xml,Nessus,Python初学者需要帮助筛选.xml文件。我一直在尝试xml.etree.ElementTree,但收效甚微 xml如下所示: <ClientData> <Report> <ReportHost> <ReportItem pluginID="11111"> Ipsum lorem etc leviosa! </ReportItem> </ReportHost>

Python初学者需要帮助筛选.xml文件。我一直在尝试xml.etree.ElementTree,但收效甚微

xml如下所示:

<ClientData>
  <Report>
    <ReportHost>
      <ReportItem pluginID="11111">

        Ipsum lorem etc leviosa!

      </ReportItem>
    </ReportHost>
    <ReportHost>
      <ReportItem pluginID="22222">

        Sed ut perspiciatis unde omnis iste

      </ReportItem>
    </ReportHost>
  </Report>
</ClientData>

我真的建议使用lxml模块。在使用Python的xml模块时,没有对父元素的引用。我认为使用lxml会容易得多。

这就是我最终开发的。我发现它在过滤大于600MB甚至更小的文件时存在内存问题。据我所知,有一些方法比解析整个xml更好地处理内存,但我没有时间进行测试

import lxml.etree as le
import os
from optparse import OptionParser, SUPPRESS_HELP

def removeVulns(filename, pluginlist):
    output_file = open("temp.xml","wb")
    with open(filename,'r') as f:
        doc=le.parse(f)
        for nessusID in open(pluginlist):
            for elem in doc.xpath('//*[attribute::pluginID]'):
                if elem.attrib['pluginID']==nessusID.strip('\n'):
                    parent=elem.getparent()
                    parent.remove(elem)
        output_file.write(le.tostring(doc))
        f.close()
        output_file.close()
        os.remove(filename)
        os.rename('temp.xml', filename)


def main():
    parser = OptionParser(usage='%prog -f <filename>', 
                            version='%prog 1.0')   
    parser.add_option('-f',
                      dest='name',
                      type='string',
                      help='.nessus name')


    (options, args) = parser.parse_args()
    if not options.name:
        parser.error('Pop, you forgot name!')
    removeVulns(options.name, 'pluginlist.txt')

if __name__ == "__main__":
    main()
将lxml.etree导入为le
导入操作系统
从optpasse导入OptionParser,取消显示帮助
def removeVulns(文件名,插件列表):
输出文件=打开(“temp.xml”、“wb”)
将open(filename,'r')作为f:
doc=le.parse(f)
对于打开的nessusID(插件列表):
对于doc.xpath('/*[attribute::pluginID]'中的元素):
如果elem.attrib['pluginID']==nessusID.strip('\n'):
parent=elem.getparent()
父项删除(elem)
输出_file.write(le.tostring(doc))
f、 关闭()
输出_文件。关闭()
删除(文件名)
重命名('temp.xml',文件名)
def main():
parser=OptionParser(用法=“%prog-f”,
版本=“%prog 1.0”)
parser.add_选项('-f',,
dest='name',
type='string',
帮助=“.nessus名称”)
(options,args)=parser.parse_args()
如果不是options.name:
parser.error('Pop,你忘了名字!')
removeVulns(options.name,'pluginlist.txt')
如果名称=“\uuuuu main\uuuuuuuu”:
main()

通过将逻辑移到XPath中,您可以删除
if
检查,如下所示:
for doc.XPath('/*[@pluginID=“{0}]”]'%nessusID.strip('\n'):
import lxml.etree as le
import os
from optparse import OptionParser, SUPPRESS_HELP

def removeVulns(filename, pluginlist):
    output_file = open("temp.xml","wb")
    with open(filename,'r') as f:
        doc=le.parse(f)
        for nessusID in open(pluginlist):
            for elem in doc.xpath('//*[attribute::pluginID]'):
                if elem.attrib['pluginID']==nessusID.strip('\n'):
                    parent=elem.getparent()
                    parent.remove(elem)
        output_file.write(le.tostring(doc))
        f.close()
        output_file.close()
        os.remove(filename)
        os.rename('temp.xml', filename)


def main():
    parser = OptionParser(usage='%prog -f <filename>', 
                            version='%prog 1.0')   
    parser.add_option('-f',
                      dest='name',
                      type='string',
                      help='.nessus name')


    (options, args) = parser.parse_args()
    if not options.name:
        parser.error('Pop, you forgot name!')
    removeVulns(options.name, 'pluginlist.txt')

if __name__ == "__main__":
    main()