2015-11-11 127 views
1

我想解析Windows資源與pyparsing文件,因爲菜單可以有很深的嵌套結構。 用正則表達式解析這樣的結構是非常困難的。pyparsing只能找到一個實例

一切工作正常,但今天我發現我的代碼只能找到一個實例。 爲了讓我清楚,這裏是* rc文件的內容(E:\工具\水庫\ my​​.rc,以節省空間,只顯示了容易出錯的部分):

#include "../include/resource.h" 

IDR_MENU_OPTION MENU BEGIN 
    POPUP "Options" 
    BEGIN 
     MENUITEM "List Layers for &All Pages", IDM_SHOW_ALL 
     MENUITEM "List Layers for &Visible Pages", IDM_SHOW_VISIBLE 
     MENUITEM SEPARATOR 
     MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT 
     MENUITEM SEPARATOR 
     MENUITEM "E&xpand All",     IDM_EXPAND_ALL 
     MENUITEM "C&ollapse All",    IDM_COLLAPSE_ALL 
    END 
    POPUP "" 
    BEGIN 
     MENUITEM "List Layers for &All Pages", IDM_LIST_ALL 
     MENUITEM "List Layers for &Visible Pages", IDM_LIST_VISIBLE 
     MENUITEM SEPARATOR 
     MENUITEM "&Reset to Initial Visibility", IDM_RESET_INIT 
     MENUITEM SEPARATOR 
     MENUITEM "E&xpand All",     IDM_EXPAND_ALL 
     MENUITEM "C&ollapse All",    IDM_COLLAPSE_ALL 
     MENUITEM SEPARATOR 
     MENUITEM "Layer &Properties...",  IDM_LAYER_PROPERTIES 
    END END 

IDR_MENU_PRPPERTIES MENU BEGIN // the menu block is skiped by pyparsing 
    POPUP "" 
    BEGIN 
     MENUITEM "&Show Layers",    IDM_SHOW 
     MENUITEM "&Properties...",    IDM_PROPERTIES 
    END 
    MENUITEM "",       65535 END 

#endif // not APSTUDIO_INVOKED 

我的Python代碼可以」找不到IDR_MENU_PRPPERTIES MENU, 輸出現在是:

IDM_COLLAPSE_ALL 
IDM_EXPAND_ALL 
IDM_LAYER_PROPERTIES 
IDM_LIST_ALL 
IDM_LIST_VISIBLE 
IDM_RESET_INIT 
IDM_SHOW_ALL 
IDM_SHOW_VISIBLE 
IDR_MENU_OPTION 

,但預期的輸出應該是:

IDM_COLLAPSE_ALL 
IDM_EXPAND_ALL 
IDM_LAYER_PROPERTIES 
IDM_LIST_ALL 
IDM_LIST_VISIBLE 
IDM_RESET_INIT 
IDM_SHOW_ALL 
IDM_SHOW_VISIBLE 
IDR_MENU_OPTION 
IDR_MENU_PRPPERTIES 
IDM_SHOW 
IDM_PROPERTIES 

這裏是我的代碼:

import re 
import os 
import codecs 
import fnmatch 
from bs4 import UnicodeDammit 
from pyparsing import restOfLine, cStyleComment, Word, alphanums, alphas, \ 
    Optional, SkipTo, ZeroOrMore, Group, Keyword, quotedString, delimitedList, \ 
    nums, commaSeparatedList, Forward, Combine 


class RcParser: 
    def __init__(self, rc_file): 
     self.rc_file = rc_file 
     handle = open(rc_file, 'rb') 
     binary_data = handle.read() 
     handle.close() 
     dammit = UnicodeDammit(binary_data) 
     self.rc_src = dammit.unicode_markup 
     self.encoding = dammit.original_encoding 
     self.string_table_id = set() 
     self.dialog_id = set() 
     self.menu_id = set() 
     self.img_id = set() 

     self.parse(self.rc_src) 

    def get_rc_header(self): 
     inx = self.rc_file.rfind('\\') 
     path = self.rc_file[: inx + 1] 
     file_lists = [path + file for file in os.listdir(path) if file.lower().endswith('resource.h')] 
     if not file_lists: 
      return None 
     return file_lists[0] 

    def id_by_parsing_rc(self): 
     rc_id = self.img_id | self.menu_id | self.dialog_id | self.string_table_id 
     return rc_id 

    def rc_statement(self): 
     """ Generate a RC statement parser that can be used to parse a RC file 

     :rtype: pyparsing.ParserElement 
     """ 

     one_line_comment = '//' + restOfLine 
     comments = cStyleComment^one_line_comment 
     precompiler = Word('#', alphanums) + restOfLine 
     language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName(
      "language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage")) 
     block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start") 
     block_end = (Keyword('}') | Keyword("END")).setName("block_end") 
     reserved_words = block_start | block_end 
     name_id = ~reserved_words + \ 
        Word(alphas, alphanums + '_').setName("name_id") 
     numbers = Word(nums) 
     integerconstant = numbers^Combine('0x' + numbers) 
     constant = Combine(
      Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ') 
     combined_constants = delimitedList(constant, '|') 
     block_options = Optional(SkipTo(
      Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString(
      "caption")) + SkipTo(
      block_start)("post_caption") 
     undefined_control = Group(name_id.setResultsName(
      "id_control") + delimitedList(quotedString^constant^numbers^Group(combined_constants)).setResultsName(
      "values_")) 
     block = block_start + \ 
       ZeroOrMore(undefined_control)("controls") + block_end 
     dialog = name_id(
      "block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG"))("block_type") + block_options + block 
     string_table = Keyword("STRINGTABLE")(
      "block_type") + block_options + block 
     menu_item = Keyword(
      "MENUITEM")("block_type") + (commaSeparatedList("values_") | Keyword("SEPARATOR")) 
     popup_block = Forward() 
     popup_block <<= Group(Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start + 
           ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*") 
     menu = name_id("block_id") + \ 
       Keyword("MENU")("block_type") + block_options + \ 
       block_start + ZeroOrMore(popup_block) + block_end 
     statem = comments^precompiler^language_definition^dialog^string_table^menu 
     return statem 

    def generate_menu_pre_name(self, block_type, block_id): 
     """Return the pre-name generated for elements of a menu.""" 
     return "%s.%s" % (block_type, block_id) 

    def generate_popup_pre_name(self, pre_name, caption): 
     """Return the pre-name generated for subelements of a popup. 

     :param pre_name: The pre_name that already have the popup. 
     :param caption: The caption (whitout quotes) of the popup. 

     :return: The subelements pre-name based in the pre-name of the popup and 
       its caption. 
     """ 
     return "%s.%s" % (pre_name, caption.replace(" ", "_")) 

    def add_popup_units(self, pre_name, popup): 
     """Transverses the popup tree making new units as needed.""" 
     for element in popup.elements: 
      if element.block_type and element.block_type == "MENUITEM": 
       if element.values_ and len(element.values_) >= 2: 
        var = element.values_[1] 
        if not var.isdigit(): 
         self.menu_id.add(var) 
         # Else it can be a separator. 
      elif element.popups: 
       for sub_popup in element.popups: 
        self.add_popup_units(self.generate_popup_pre_name(pre_name, popup.caption[1:-1]), sub_popup) 

    def parse(self, rcsrc): 
     """Read the source of a .rc file in and include them as units.""" 
     # Parse the strings into a structure. 
     results = self.rc_statement().searchString(rcsrc) 
     for statement in results: 
      if not statement.block_type: 
       continue 
      if statement.block_type in ("DIALOG", "DIALOGEX"): 
       helper = statement.block_id[0] 
       self.dialog_id.add(statement.block_id[0]) 
       control_type = [ 
        "AUTOCHECKBOX", "AUTORADIOBUTTON", "CAPTION", "CHECKBOX" 
        , "CTEXT", "CONTROL", "DEFPUSHBUTTON", "GROUPBOX" 
        , "LTEXT", "PUSHBUTTON", "RADIOBUTTON", "RTEXT" 
        , "COMBOBOX" 
       ] 
       for control in statement.controls: 
        fk = (control.id_control[0] in control_type) 
        flag = (control.values_[0].startswith('"') or control.values_[0].startswith("'")) 
        if control.id_control[0] in control_type: 
         if flag: 
          self.dialog_id.add(control.values_[1]) 
         else: 
          self.dialog_id.add(control.values_[0]) 
       continue 

      if statement.block_type in ("MENU"): 
       pre_name = self.generate_menu_pre_name(statement.block_type, statement.block_id[0]) 
       self.menu_id.add(statement.block_id[0]) 
       for popup in statement.popups: 
        self.add_popup_units(pre_name, popup) 
       continue 

      if statement.block_type in ("STRINGTABLE"): 
       for text in statement.controls: 
        self.string_table_id.add(text.id_control[0]) 
       continue 

     lines = rcsrc.splitlines() 
     for line in lines: 
      line = line.rstrip() 
      m = re.match(r'(\w+)\s+(\bBITMAP\b|\bPNG\b|\bXML\b|\bICON\b)\s+(\".*\")$', line) 
      if not m: 
       continue 
      self.img_id.add(m.group(1)) 


def main(): 
    x = RcParser(r'E:\tool\res\my.rc') 
    print('\n'.join(sorted(x.id_by_parsing_rc()))) 


if __name__ == "__main__": 
    main() 

回答

1

你的菜單的定義是:

menu = name_id("block_id") + \ 
     Keyword("MENU")("block_type") + block_options + \ 
     block_start + ZeroOrMore(popup_block) + block_end 

在您BLOCK_START/block_end只允許popup_blocks。在不匹配的菜單中,菜單中有一個不屬於popup_block的menu_item。你可能需要這樣的東西:

menu = name_id("block_id") + \ 
     Keyword("MENU")("block_type") + block_options + \ 
     block_start + ZeroOrMore(popup_block | menu_item) + block_end 
+0

是的,你是對的!根據您的建議更改我的代碼後,一切都是正確的。 –

相關問題