Python pyparsing只能找到一个实例
我想用Python pyparsing只能找到一个实例,python,pyparsing,Python,Pyparsing,我想用pyparsing解析Windows资源文件,因为菜单可以有深嵌套结构用正则表达式解析这样的结构将非常困难。 一切正常,但今天我发现我的代码只能找到一个实例。 为了清楚起见,这里是*.rc文件的内容(E:\tool\res\my.rc,为了节省空间,只显示容易出错的部分): 我的Python代码找不到IDR\u MENU\u PRPPERTIES MENU, 现在的输出是: IDM_COLLAPSE_ALL IDM_EXPAND_ALL IDM_LAYER_PROPERTIES IDM_L
pyparsing
解析Windows资源文件,因为菜单可以有深嵌套结构用正则表达式解析这样的结构将非常困难。
一切正常,但今天我发现我的代码只能找到一个实例。
为了清楚起见,这里是*.rc文件的内容(E:\tool\res\my.rc,为了节省空间,只显示容易出错的部分):
我的Python代码找不到IDR\u MENU\u PRPPERTIES MENU
,
现在的输出是:
IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION
但预期产出应为:
IDM_COLLAPSE_ALL
IDM_EXPAND_ALL
IDM_LAYER_PROPERTIES
IDM_LIST_ALL
IDM_LIST_VISIBLE
IDM_RESET_INIT
IDM_SHOW_ALL
IDM_SHOW_VISIBLE
IDR_MENU_OPTION
IDR_MENU_PRPPERTIES
IDM_SHOW
IDM_PROPERTIES
这是我的代码:
import re
import os
import codecs
import fnmatch
from bs4 import UnicodeDammit
from pyparsing import restOfLine, cStyleComment, Word, alphanums, alphas, \
Optional, SkipTo, ZeroOrMore, Group, Keyword, quotedString, delimitedList, \
nums, commaSeparatedList, Forward, Combine
class RcParser:
def __init__(self, rc_file):
self.rc_file = rc_file
handle = open(rc_file, 'rb')
binary_data = handle.read()
handle.close()
dammit = UnicodeDammit(binary_data)
self.rc_src = dammit.unicode_markup
self.encoding = dammit.original_encoding
self.string_table_id = set()
self.dialog_id = set()
self.menu_id = set()
self.img_id = set()
self.parse(self.rc_src)
def get_rc_header(self):
inx = self.rc_file.rfind('\\')
path = self.rc_file[: inx + 1]
file_lists = [path + file for file in os.listdir(path) if file.lower().endswith('resource.h')]
if not file_lists:
return None
return file_lists[0]
def id_by_parsing_rc(self):
rc_id = self.img_id | self.menu_id | self.dialog_id | self.string_table_id
return rc_id
def rc_statement(self):
""" Generate a RC statement parser that can be used to parse a RC file
:rtype: pyparsing.ParserElement
"""
one_line_comment = '//' + restOfLine
comments = cStyleComment ^ one_line_comment
precompiler = Word('#', alphanums) + restOfLine
language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName(
"language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage"))
block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start")
block_end = (Keyword('}') | Keyword("END")).setName("block_end")
reserved_words = block_start | block_end
name_id = ~reserved_words + \
Word(alphas, alphanums + '_').setName("name_id")
numbers = Word(nums)
integerconstant = numbers ^ Combine('0x' + numbers)
constant = Combine(
Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ')
combined_constants = delimitedList(constant, '|')
block_options = Optional(SkipTo(
Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString(
"caption")) + SkipTo(
block_start)("post_caption")
undefined_control = Group(name_id.setResultsName(
"id_control") + delimitedList(quotedString ^ constant ^ numbers ^ Group(combined_constants)).setResultsName(
"values_"))
block = block_start + \
ZeroOrMore(undefined_control)("controls") + block_end
dialog = name_id(
"block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block
string_table = Keyword("STRINGTABLE")(
"block_type") + block_options + block
menu_item = Keyword(
"MENUITEM")("block_type") + (commaSeparatedList("values_") | Keyword("SEPARATOR"))
popup_block = Forward()
popup_block <<= Group(Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start +
ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*")
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block) + block_end
statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu
return statem
def generate_menu_pre_name(self, block_type, block_id):
"""Return the pre-name generated for elements of a menu."""
return "%s.%s" % (block_type, block_id)
def generate_popup_pre_name(self, pre_name, caption):
"""Return the pre-name generated for subelements of a popup.
:param pre_name: The pre_name that already have the popup.
:param caption: The caption (whitout quotes) of the popup.
:return: The subelements pre-name based in the pre-name of the popup and
its caption.
"""
return "%s.%s" % (pre_name, caption.replace(" ", "_"))
def add_popup_units(self, pre_name, popup):
"""Transverses the popup tree making new units as needed."""
for element in popup.elements:
if element.block_type and element.block_type == "MENUITEM":
if element.values_ and len(element.values_) >= 2:
var = element.values_[1]
if not var.isdigit():
self.menu_id.add(var)
# Else it can be a separator.
elif element.popups:
for sub_popup in element.popups:
self.add_popup_units(self.generate_popup_pre_name(pre_name, popup.caption[1:-1]), sub_popup)
def parse(self, rcsrc):
"""Read the source of a .rc file in and include them as units."""
# Parse the strings into a structure.
results = self.rc_statement().searchString(rcsrc)
for statement in results:
if not statement.block_type:
continue
if statement.block_type in ("DIALOG", "DIALOGEX"):
helper = statement.block_id[0]
self.dialog_id.add(statement.block_id[0])
control_type = [
"AUTOCHECKBOX", "AUTORADIOBUTTON", "CAPTION", "CHECKBOX"
, "CTEXT", "CONTROL", "DEFPUSHBUTTON", "GROUPBOX"
, "LTEXT", "PUSHBUTTON", "RADIOBUTTON", "RTEXT"
, "COMBOBOX"
]
for control in statement.controls:
fk = (control.id_control[0] in control_type)
flag = (control.values_[0].startswith('"') or control.values_[0].startswith("'"))
if control.id_control[0] in control_type:
if flag:
self.dialog_id.add(control.values_[1])
else:
self.dialog_id.add(control.values_[0])
continue
if statement.block_type in ("MENU"):
pre_name = self.generate_menu_pre_name(statement.block_type, statement.block_id[0])
self.menu_id.add(statement.block_id[0])
for popup in statement.popups:
self.add_popup_units(pre_name, popup)
continue
if statement.block_type in ("STRINGTABLE"):
for text in statement.controls:
self.string_table_id.add(text.id_control[0])
continue
lines = rcsrc.splitlines()
for line in lines:
line = line.rstrip()
m = re.match(r'(\w+)\s+(\bBITMAP\b|\bPNG\b|\bXML\b|\bICON\b)\s+(\".*\")$', line)
if not m:
continue
self.img_id.add(m.group(1))
def main():
x = RcParser(r'E:\tool\res\my.rc')
print('\n'.join(sorted(x.id_by_parsing_rc())))
if __name__ == "__main__":
main()
重新导入
导入操作系统
导入编解码器
导入fnmatch
从bs4导入UnicodeMit
从py导入restOfLine、cStyleComment、Word、alphanums、alphas、\
可选、SkipTo、ZeroOrMore、组、关键字、quotedString、delimitedList、\
nums、逗号分隔列表、转发、合并
类RcParser:
def uu init uu(self,rc_文件):
self.rc\u文件=rc\u文件
句柄=打开(rc_文件'rb')
二进制_data=handle.read()
handle.close()
dammit=UnicodeAMMIT(二进制数据)
self.rc\u src=dammit.unicode\u标记
self.encoding=dammit.original\u编码
self.string\u table\u id=set()
self.dialog_id=set()
self.menu_id=set()
self.img_id=set()
self.parse(self.rc\u src)
def get_rc_收割台(自身):
inx=self.rc\u file.rfind(“\\”)
path=self.rc_文件[:inx+1]
file_lists=[path+os.listdir(path)if file.lower().endswith('resource.h')中文件的文件]
如果不是文件列表:
一无所获
返回文件列表[0]
def id_by_parsing_rc(self):
rc|u id=self.img|u id | self.menu id | self.dialog|u id | self.string|u table|id
返回rc_id
def rc_声明(自我):
“”“生成可用于分析RC文件的RC语句分析器。”
:rtype:pyparsing.parseRelation
"""
一行注释='/'+restOfLine
注释=cStyleComment^一行注释
预编译器=单词(“#”,字母)+restOfLine
language_definition=“language”+单词(alphas+)。setResultsName(
“语言”)+可选(','+单词(alphas+''.')。setResultsName(“子语言”))
块开始=(关键字('{')|关键字(“开始”)).setName(“块开始”)
block_end=(关键字('}')|关键字(“end”)).setName(“block_end”)
保留字=块开始|块结束
name_id=~保留的_字+\
单词(alphas,alphanums+''.'''.setName(“name_id”)
数字=字(nums)
integerconstant=numbers^Combine('0x'+数字)
常数=联合收割机(
可选(关键字(“NOT”)+(name_id | integerconstant),相邻=False,连接字符串=“”)
组合的_常量=分隔列表(常量“|”)
块_选项=可选(跳过)(
关键字(“标题”),失效=块开始(“前标题”)+关键字(“标题”)+引用字符串(
“标题”)+SkipTo(
块(开始)(“后标题”)
未定义的\u控制=组(名称\u id.setResultsName(
“id_控件”)+分隔列表(quotedString^常量^numbers^组(组合_常量)).setResultsName(
"价值"()
block=block_start+\
零或更多(未定义的控制)(“控制”)+块结束
dialog=name\u id(
“块id”)+(关键字(“DIALOGEX”)|关键字(“DIALOG”)(“块类型”)+块选项+块
string_table=关键字(“STRINGTABLE”)(
“块类型”)+块选项+块
菜单项=关键字(
“菜单项”(“块类型”)+(逗号分隔列表(“值”)|关键字(“分隔符”))
popup_block=Forward()
弹出菜单块您对菜单的定义是:
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block) + block_end
在块开始/块结束中,只允许弹出块。在不匹配的菜单中,菜单中有一个菜单项不是弹出菜单块的一部分。您可能需要以下内容:
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block | menu_item) + block_end
是的,你说得对!按照你的建议修改代码后,一切都是正确的。
menu = name_id("block_id") + \
Keyword("MENU")("block_type") + block_options + \
block_start + ZeroOrMore(popup_block | menu_item) + block_end