2012-02-06 52 views
6

我已經開始學習FParsec。它有一個非常靈活的方法來解析數字;我可以提供一組我想要使用的數字格式:解析FParsec中的數字

type Number = 
    | Numeral of int 
    | Decimal of float 
    | Hexadecimal of int 
    | Binary of int 

let numberFormat = NumberLiteralOptions.AllowFraction 
        ||| NumberLiteralOptions.AllowHexadecimal 
        ||| NumberLiteralOptions.AllowBinary 

let pnumber = 
    numberLiteral numberFormat "number" 
    |>> fun num -> if num.IsHexadecimal then Hexadecimal (int num.String) 
        elif num.IsBinary then Binary (int num.String) 
        elif num.IsInteger then Numeral (int num.String) 
        else Decimal (float num.String) 

但是,我試圖解析的語言有點奇怪。一些可能是數字(非負int),小數(非負float),十六進制(前綴#x)或二進制(前綴#b):

numeral: 0, 2 
decimal: 0.2, 2.0 
hexadecimal: #xA04, #x611ff 
binary: #b100, #b001 

現在我必須做解析兩次用被0#(如有必要),以利用pnumber

let number: Parser<_, unit> = 
    let isDotOrDigit c = isDigit c || c = '.' 
    let numOrDec = many1Satisfy2 isDigit isDotOrDigit 
    let hexOrBin = skipChar '#' >>. manyChars (letter <|> digit) |>> sprintf "0%s" 
    let str = spaces >>. numOrDec <|> hexOrBin 
    str |>> fun s -> match run pnumber s with 
        | Success(result, _, _) -> result 
        | Failure(errorMsg, _, _) -> failwith errorMsg 

什麼是在這種情況下解析更好的方法?或者我該如何改變FParsec的CharStream以使條件解析更簡單?

回答

9

如果要生成良好的錯誤消息並正確檢查溢出情況,解析數字可能非常麻煩。

下面是一個簡單的FParsec實現你的電話號碼分析器:

let numeralOrDecimal : Parser<_, unit> = 
    // note: doesn't parse a float exponent suffix 
    numberLiteral NumberLiteralOptions.AllowFraction "number" 
    |>> fun num -> 
      // raises an exception on overflow 
      if num.IsInteger then Numeral(int num.String) 
      else Decimal(float num.String) 

let hexNumber =  
    pstring "#x" >>. many1SatisfyL isHex "hex digit" 
    |>> fun hexStr -> 
      // raises an exception on overflow 
      Hexadecimal(System.Convert.ToInt32(hexStr, 16)) 

let binaryNumber =  
    pstring "#b" >>. many1SatisfyL (fun c -> c = '0' || c = '1') "binary digit" 
    |>> fun hexStr -> 
      // raises an exception on overflow 
      Binary(System.Convert.ToInt32(hexStr, 2)) 


let number = 
    choiceL [numeralOrDecimal 
      hexNumber 
      binaryNumber] 
      "number literal" 

生成上溢佳錯誤信息會有點複雜此實現,正如你所理想還需要在錯誤發生後原路返回,使錯誤位置將在數字文字開始處結束(有關示例,請參閱numberLiteral文檔)。

一個簡單的方法妥善處理可能出現溢出的例外是使用一個小的異常處理結合器類似如下:

let mayThrow (p: Parser<'t,'u>) : Parser<'t,'u> = 
    fun stream -> 
     let state = stream.State   
     try 
      p stream 
     with e -> // catching all exceptions is somewhat dangerous 
      stream.BacktrackTo(state) 
      Reply(FatalError, messageError e.Message) 

然後,您可以編寫

let number = mayThrow (choiceL [...] "number literal") 

我不知道你是什麼意思是說「改變FParsec的CharStream能夠使條件分析更容易」,但下面的例子演示瞭如何編寫一個低級別的實現,它只直接使用CharStream方法。

type NumberStyles = System.Globalization.NumberStyles 
let invariantCulture = System.Globalization.CultureInfo.InvariantCulture 

let number: Parser<Number, unit> = 
    let expectedNumber = expected "number" 
    let inline isBinary c = c = '0' || c = '1' 
    let inline hex2int c = (int c &&& 15) + (int c >>> 6)*9 

    let hexStringToInt (str: string) = // does no argument or overflow checking   
     let mutable n = 0 
     for c in str do 
      n <- n*16 + hex2int c 
     n  

    let binStringToInt (str: string) = // does no argument or overflow checking 
     let mutable n = 0 
     for c in str do 
      n <- n*2 + (int c - int '0') 
     n 

    let findIndexOfFirstNonNull (str: string) = 
     let mutable i = 0 
     while i < str.Length && str.[i] = '0' do 
      i <- i + 1 
     i 

    let isHexFun = id isHex // tricks the compiler into caching the function object 
    let isDigitFun = id isDigit 
    let isBinaryFun = id isBinary 

    fun stream -> 
    let start = stream.IndexToken 
    let cs = stream.Peek2()   
    match cs.Char0, cs.Char1 with 
    | '#', 'x' -> 
     stream.Skip(2) 
     let str = stream.ReadCharsOrNewlinesWhile(isHexFun, false) 
     if str.Length <> 0 then 
      let i = findIndexOfFirstNonNull str 
      let length = str.Length - i 
      if length < 8 || (length = 8 && str.[i] <= '7') then 
       Reply(Hexadecimal(hexStringToInt str)) 
      else 
       stream.Seek(start) 
       Reply(Error, messageError "hex number literal is too large for 32-bit int") 
     else 
      Reply(Error, expected "hex digit") 

    | '#', 'b' -> 
     stream.Skip(2) 
     let str = stream.ReadCharsOrNewlinesWhile(isBinaryFun, false) 
     if str.Length <> 0 then 
      let i = findIndexOfFirstNonNull str 
      let length = str.Length - i 
      if length < 32 then 
       Reply(Binary(binStringToInt str)) 
      else 
       stream.Seek(start) 
       Reply(Error, messageError "binary number literal is too large for 32-bit int") 
     else 
      Reply(Error, expected "binary digit") 

    | c, _ -> 
     if not (isDigit c) then Reply(Error, expectedNumber) 
     else 
      stream.SkipCharsOrNewlinesWhile(isDigitFun) |> ignore 
      if stream.Skip('.') then 
       let n2 = stream.SkipCharsOrNewlinesWhile(isDigitFun) 
       if n2 <> 0 then 
        // we don't parse any exponent, as in the other example 
        let mutable result = 0. 
        if System.Double.TryParse(stream.ReadFrom(start), 
               NumberStyles.AllowDecimalPoint, 
               invariantCulture, 
               &result) 
        then Reply(Decimal(result)) 
        else 
         stream.Seek(start) 
         Reply(Error, messageError "decimal literal is larger than System.Double.MaxValue")      
       else 
        Reply(Error, expected "digit") 
      else 
       let decimalString = stream.ReadFrom(start) 
       let mutable result = 0 
       if System.Int32.TryParse(stream.ReadFrom(start), 
             NumberStyles.None, 
             invariantCulture, 
             &result) 
       then Reply(Numeral(result)) 
       else 
        stream.Seek(start) 
        Reply(Error, messageError "decimal number literal is too large for 32-bit int") 

雖然這個實現解析十六進制和二進制數沒有系統方法的幫助下,它最終代表十進制數的Int32.TryParse和Double.TryParse方法的解析。

正如我所說:它很混亂。

+0

+1,感謝您的快速響應,Stephan。通過「改變FParsec的CharStream ...」,我的意思是對CharStream進行低級別的操作。我會採用第一種方法,簡單易懂。順便說一句,使用combinators與標籤的成本是多少?如果我在解析器中的任何地方使用標籤,它會花費很多嗎? – pad 2012-02-06 15:44:13

+0

我剛剛添加了關於如何更優雅地處理第一個版本中的溢出異常的評論。關於標籤:取決於。 'choiceL'實際上比'choice'更快,因爲它不必收集單個錯誤消息。通常''和類似的組合器的開銷在非平凡應用中幾乎不可測量。如果你真的在FParsec解析器中發現性能問題,總有辦法讓它更快...... – 2012-02-06 16:33:38

+0

感謝您的詳細解答。只是一個小問題,在這種情況下'skipString'應該比'pstring'更受歡迎,對吧? – pad 2012-02-06 16:42:52