2016-09-01 53 views
1

我正在使用Ruby::ParsletRuby:系統verilog接口解析器的parslet

我解析類似於SV接口文檔,如:

interface my_intf; 
    protocol validonly; 

    transmit [Bool] valid; 
    transmit [Bool] pipeid; 
    transmit [5:0] incr; 
    transmit [Bool] sample; 

endinterface 

這裏是我的解析器:

class myParse < Parslet::Parser 
    rule(:lparen)  { space? >> str('(') >> space? } 
    rule(:rparen)  { space? >> str(')') >> space? } 
    rule(:lbox)  { space? >> str('[') >> space? } 
    rule(:rbox)  { space? >> str(']') >> space? } 
    rule(:lcurly)  { space? >> str('{') >> space? } 
    rule(:rcurly)  { space? >> str('}') >> space? } 
    rule(:comma)  { space? >> str(',') >> space? } 
    rule(:semicolon) { space? >> str(';') >> space? } 
    rule(:eof)  { any.absent? } 
    rule(:space)  { match["\t\s"] } 
    rule(:whitespace) { space.repeat } 
    rule(:space?)  { whitespace.maybe } 
    rule(:blank_line) { space? >> newline.repeat(1) } 
    rule(:newline) { str("\n") } 

    # Things 
    rule(:integer) { space? >> match('[0-9]').repeat(1).as(:int) >> space? } 
    rule(:identifier) { match['a-z'].repeat(1) } 


    rule(:intf_start)  { space? >> str('interface') >> space? >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:intf_name) >> space? >> str(';') >> space? >> str("\n") } 
    rule(:protocol)  { space? >> str('protocol') >> whitespace >> (str('validonly').maybe).as(:protocol) >> space? >> str(';') >> space? >> str("\n") } 
    rule(:bool)   { lbox >> space? >> str('Bool').as(:bool) >> space? >> rbox } 
    rule(:transmit_width) { lbox >> space? >> match('[0-9]').repeat.as(:msb) >> space? >> str(':') >> space? >> match('[0-9]').repeat.as(:lsb) >> space? >> rbox } 
    rule(:transmit)  { space? >> str('transmit') >> whitespace >> (bool | transmit_width) >> whitespace >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:transmit_name) >> space? >> str(';') >> space? >> str("\n") } 
    rule(:interface_body) { (protocol | blank_line.maybe) } 
    rule(:interface)  { intf_start >> interface_body } 

    rule(:expression)  { (interface).repeat } 

    root :expression 
end 

我有一個問題,使得規則interface_body

它可以有0個或更多transmit線和0或1 線和空白多次,評論等

有人能幫助我嗎?我在代碼片段中編寫的規則與單個transmit和單個一起工作,即它們正確匹配,但是當我解析整個接口時它不起作用。

在此先感謝。

回答

1

好的...這個解析你提到的文件。我不明白所需的格式,所以我不能說它會適用於所有的文件,但希望這會讓你開始。

require 'parslet' 

class MyParse < Parslet::Parser 
    rule(:lparen)  { space? >> str('(') } 
    rule(:rparen)  { space? >> str(')') } 
    rule(:lbox)  { space? >> str('[') } 
    rule(:rbox)  { space? >> str(']') } 
    rule(:lcurly)  { space? >> str('{') } 
    rule(:rcurly)  { space? >> str('}') } 
    rule(:comma)  { space? >> str(',') } 
    rule(:semicolon) { space? >> str(';') } 
    rule(:eof)  { any.absent? } 
    rule(:space)  { match["\t\s"] } 
    rule(:whitespace) { space.repeat(1) } 
    rule(:space?)  { space.repeat(0) } 
    rule(:blank_line) { space? >> newline.repeat(1) } 
    rule(:newline) { str("\n") } 

    # Things 
    rule(:integer) { space? >> match('[0-9]').repeat(1).as(:int) >> space? } 
    rule(:identifier) { match['a-z'].repeat(1) } 

    def line(expression) 
    space? >> 
    expression >> 
    space? >> 
    str(';') >> 
    space? >> 
    str("\n")  
    end 

    rule(:expression?) { (interface).repeat(0) } 

    rule(:interface)  { intf_start >> interface_body.repeat(0) >> intf_end } 

    rule(:interface_body) { 
    intf_end.absent? >> 
    interface_bodyline >> 
    blank_line.repeat(0) 
    } 

    rule(:intf_start) { 
    line ( 
     str('interface') >> 
     space? >> 
     (match['a-zA-Z_'].repeat(1,1) >> 
     match['[:alnum:]_'].repeat(0)).as(:intf_name) 
    ) 
    } 

    rule(:interface_bodyline) { 
    line (protocol | transmit) 
    } 

    rule(:protocol)  { 
    str('protocol') >> whitespace >> 
    (str('validonly').maybe).as(:protocol) 
    } 

    rule(:transmit)  {  
    str('transmit') >> whitespace >> 
    (bool | transmit_width) >> whitespace >> 
    name.as(:transmit_name) 
    } 

    rule(:name) { 
    match('[a-zA-Z_]') >> 
    (match['[:alnum:]'] | str("_")).repeat(0) 
    } 

    rule(:bool)   { lbox >> str('Bool').as(:bool) >> rbox } 

    rule(:transmit_width) { 
    lbox >> 
    space? >> 
    match('[0-9]').repeat(1).as(:msb) >> 
    space? >> 
    str(':') >> 
    space? >> 
    match('[0-9]').repeat(1).as(:lsb) >> 
    space? >> 
    rbox 
    } 

    rule(:intf_end)  { str('endinterface') } 

    root :expression? 
end 

    require 'rspec' 
    require 'parslet/rig/rspec' 

    RSpec.describe MyParse do 
    let(:parser) { MyParse.new } 
    context "simple_rule" do 
     it "should consume protocol line" do 
     expect(parser.interface_bodyline).to parse(' protocol validonly; 
') 
     end 
     it 'name' do 
     expect(parser.name).to parse('valid') 
     end 
     it "bool" do 
     expect(parser.bool).to parse('[Bool]') 
     end 
     it "transmit line" do 
     expect(parser.transmit).to parse('transmit [Bool] valid') 
     end 
     it "transmit as bodyline'" do 
     expect(parser.interface_bodyline).to parse(' transmit [Bool] valid; 
') 
     end 
    end 
    end 

    RSpec::Core::Runner.run(['--format', 'documentation']) 


begin 
    doc = File.read("test.txt") 
    MyParse.new.parse(doc) 
    rescue Parslet::ParseFailed => error 
    puts error.cause.ascii_tree 
    end 

主要的變化...

  • 不要飲用空白您的令牌的兩側。 你有解析過的表達式「[Bool] valid」作爲LBOX BOOL RBOX SPACE?然後期待另一個WHITESPACE,但找不到一個(因爲之前的規則已經消耗掉了)。

  • 當一個表達式可以有效地解析爲零長度(例如帶有repeat(0)的東西),並且寫入的人有問題時,您會得到一個奇怪的錯誤。規則通過並且不匹配,那麼下一個規則通常會失敗。我明確地將「身體線條」作爲「不是結束線」來匹配,因此會失敗並出現錯誤。

  • '重複'默認爲(0),我很樂意改變。我總是看到這個錯誤。

  • x.repeat(1,1)表示進行一次匹配。這與擁有x相同。 :)

  • 有更多的空白問題

左右....

寫自頂向下解析器。自下而上寫測試。 當你的測試達到最高時,你就完成了! :)

祝你好運。