2011-09-21 79 views
3

的可讀性,我在C#這是越來越難以管理做了手寫CSS解析器,並試圖做到這一點我FParsec使其更mantainable。下面是解析與正則表達式做了CSS選擇器元素的片段:提高FParsec解析器

var tagRegex = @"(?<Tag>(?:[a-zA-Z][_\-0-9a-zA-Z]*|\*))"; 
var idRegex = @"(?:#(?<Id>[a-zA-Z][_\-0-9a-zA-Z]*))"; 
var classesRegex = @"(?<Classes>(?:\.[a-zA-Z][_\-0-9a-zA-Z]*)+)"; 
var pseudoClassRegex = @"(?::(?<PseudoClass>link|visited|hover|active|before|after|first-line|first-letter))"; 
var selectorRegex = new Regex("(?:(?:" + tagRegex + "?" + idRegex + ")|" + 
           "(?:" + tagRegex + "?" + classesRegex + ")|" + 
            tagRegex + ")" + 
           pseudoClassRegex + "?"); 

var m = selectorRegex.Match(str); 

if (m.Length != str.Length) { 
    cssParserTraceSwitch.WriteLine("Unrecognized selector: " + str); 
    return null; 
} 

string tagName = m.Groups["Tag"].Value; 

string pseudoClassString = m.Groups["PseudoClass"].Value; 
CssPseudoClass pseudoClass; 
if (pseudoClassString.IsEmpty()) { 
    pseudoClass = CssPseudoClass.None; 
} else { 
    switch (pseudoClassString.ToLower()) { 
     case "link": 
      pseudoClass = CssPseudoClass.Link; 
      break; 
     case "visited": 
      pseudoClass = CssPseudoClass.Visited; 
      break; 
     case "hover": 
      pseudoClass = CssPseudoClass.Hover; 
      break; 
     case "active": 
      pseudoClass = CssPseudoClass.Active; 
      break; 
     case "before": 
      pseudoClass = CssPseudoClass.Before; 
      break; 
     case "after": 
      pseudoClass = CssPseudoClass.After; 
      break; 
     case "first-line": 
      pseudoClass = CssPseudoClass.FirstLine; 
      break; 
     case "first-letter": 
      pseudoClass = CssPseudoClass.FirstLetter; 
      break; 
     default: 
      cssParserTraceSwitch.WriteLine("Unrecognized selector: " + str); 
      return null; 
    } 
} 

string cssClassesString = m.Groups["Classes"].Value; 
string[] cssClasses = cssClassesString.IsEmpty() ? EmptyArray<string>.Instance : cssClassesString.Substring(1).Split('.'); 
allCssClasses.AddRange(cssClasses); 

return new CssSelectorElement(
    tagName.ToLower(), 
    cssClasses, 
    m.Groups["Id"].Value, 
    pseudoClass); 

我第一次嘗試取得這樣的:

type CssPseudoClass = 
    | None = 0 
    | Link = 1 
    | Visited = 2 
    | Hover = 3 
    | Active = 4 
    | Before = 5 
    | After = 6 
    | FirstLine = 7 
    | FirstLetter = 8 

type CssSelectorElement = 
    { Tag : string 
     Id : string 
     Classes : string list  
     PseudoClass : CssPseudoClass } 
with 
    static member Default = 
     { Tag = ""; 
      Id = ""; 
      Classes = []; 
      PseudoClass = CssPseudoClass.None; } 

open FParsec 

let ws = spaces 
let str = skipString 
let strWithResult str result = skipString str >>. preturn result 

let identifier = 
    let isIdentifierFirstChar c = isLetter c || c = '-' 
    let isIdentifierChar c = isLetter c || isDigit c || c = '_' || c = '-'  
    optional (str "-") >>. many1Satisfy2L isIdentifierFirstChar isIdentifierChar "identifier" 

let stringFromOptional strOption = 
    match strOption with 
    | Some(str) -> str 
    | None -> "" 

let pseudoClassFromOptional pseudoClassOption = 
    match pseudoClassOption with 
    | Some(pseudoClassOption) -> pseudoClassOption 
    | None -> CssPseudoClass.None 

let parseCssSelectorElement = 
    let tag = identifier <?> "tagName" 
    let id = str "#" >>. identifier <?> "#id" 
    let classes = many1 (str "." >>. identifier) <?> ".className" 
    let parseCssPseudoClass = 
     choiceL [ strWithResult "link" CssPseudoClass.Link; 
        strWithResult "visited" CssPseudoClass.Visited; 
        strWithResult "hover" CssPseudoClass.Hover; 
        strWithResult "active" CssPseudoClass.Active; 
        strWithResult "before" CssPseudoClass.Before; 
        strWithResult "after" CssPseudoClass.After; 
        strWithResult "first-line" CssPseudoClass.FirstLine; 
        strWithResult "first-letter" CssPseudoClass.FirstLetter] 
       "pseudo-class"  
    // (tag?id|tag?classes|tag)pseudoClass? 
    pipe2 ((pipe2 (opt tag) 
        id 
        (fun tag id -> 
         { CssSelectorElement.Default with 
          Tag = stringFromOptional tag; 
          Id = id })) |> attempt 
      <|> 
      (pipe2 (opt tag) 
        classes 
        (fun tag classes -> 
         { CssSelectorElement.Default with 
          Tag = stringFromOptional tag; 
          Classes = classes })) |> attempt 
      <|> 
      (tag |>> (fun tag -> { CssSelectorElement.Default with Tag = tag }))) 
      (opt (str ":" >>. parseCssPseudoClass) |> attempt) 
      (fun selectorElem pseudoClass -> { selectorElem with PseudoClass = pseudoClassFromOptional pseudoClass }) 

但我真的不喜歡它是如何塑造。我曾想過提供一些更容易理解的東西,但部分解析(tag?id | tag?classes | tag)pseudoClass?用一些pipe2的和嘗試是非常糟糕的。

來了一個人,在FParsec更多的經驗,教育我的更好的方式來做到這一點? 我想在試圖FSLex/Yacc的或Boost.Spirit代替FParsec是看能不能拿出來與他們更好的代碼

+1

FParsec附帶你的'strWithResult'解析器的優化版本:'stringReturn',看到http://www.quanttec.com/fparsec/reference/charparsers.html#members.stringReturn –

回答

4

正如毛說,如果你發現自己在一個FParsec解析器重複的代碼,你可以隨時因素出於共同點部件放入變量或自定義組合器中。這是combinator庫的一大優點。

然而,在這種情況下,你也可以簡化和重組語法有點優化解析器。你可以,例如,與

let defSel = CssSelectorElement.Default 

let pIdSelector = id |>> (fun str -> {defSel with Id = str}) 
let pClassesSelector = classes |>> (fun strs -> {defSel with Classes = strs}) 

let pSelectorMain = 
    choice [pIdSelector 
      pClassesSelector 
      pipe2 tag (pIdSelector <|> pClassesSelector <|>% defSel) 
        (fun tagStr sel -> {sel with Tag = tagStr})] 

pipe2 pSelectorMain (opt (str ":" >>. parseCssPseudoClass)) 
     (fun sel optPseudo -> 
      match optPseudo with 
      | None -> sel 
      | Some pseudo -> {sel with PseudoClass = pseudo}) 

更換parseCssSelectorElement解析器的下半部順便說一句,如果你想分析大量的字符串常量,它是更有效地使用基於字典的解析器,像

let pCssPseudoClass : Parser<CssPseudoClass,unit> = 
    let pseudoDict = dict ["link", CssPseudoClass.Link 
          "visited", CssPseudoClass.Visited 
          "hover", CssPseudoClass.Hover 
          "active", CssPseudoClass.Active 
          "before", CssPseudoClass.Before 
          "after", CssPseudoClass.After 
          "first-line", CssPseudoClass.FirstLine 
          "first-letter", CssPseudoClass.FirstLetter]   
    fun stream -> 
     let reply = identifier stream    
     if reply.Status <> Ok then Reply(reply.Status, reply.Error) 
     else 
      let mutable pseudo = CssPseudoClass.None 
      if pseudoDict.TryGetValue(reply.Result, &pseudo) then Reply(pseudo) 
      else // skip to beginning of invalid pseudo class     
       stream.Skip(-reply.Result.Length) 
       Reply(Error, messageError "unknown pseudo class") 
+0

來輕鬆(至少對我而言)直觀地忽略某些解析器的結果。這看起來好一點,我想我需要對引用做更徹底的閱讀,但我沒有找到<|>%的操作呢。但我真的希望有一個F#庫在概念上更接近Boost.Spirit。它的語法非常接近EBNF,我們不需要擔心回溯,因爲它具有無限的前瞻性,它只是一個可憐的C++ :(另一方面,FParsec錯誤信息非常好 –

+1

FParsec有「無限前瞻「,它只是有一個不同的默認回溯行爲,如果你更喜歡Spirit的行爲,你可以很容易地定義模擬Spirit的操作符(基於現有的),然而,相信你不會相信必須考慮回溯的後果。您可以在http://www.quanttec.com/fparsec/users-guide/parsing-alternatives.html和http:// www的底部找到FParsec行爲的簡短基本原理.quanttec.com/fparsec /用戶引導/解析-sequences.html。 –

5

你可以提取複雜的解析器變量的某些部分,如:

let tagid = 
    pipe2 (opt tag) 
     id 
     (fun tag id -> 
      { CssSelectorElement.Default with 
       Tag = stringFromOptional tag 
       Id = id }) 

你也嘗試使用applicative interface,我個人覺得它更容易使用,並認爲比pipe2:

let tagid = 
    (fun tag id -> 
      { CssSelectorElement.Default with 
       Tag = stringFromOptional tag 
       Id = id }) 
    <!> opt tag 
    <*> id 
+0

是的,它看起來比pipe2了,謝謝 –

+0

你們是否喜歡應用型接口只是參數的順序,因爲,或者是someth還有其他嗎?對我來說,pipe2似乎比概念的和<*>組合簡單(即容易解釋)。此外,將函數參數放在最後位置允許F#推斷函數參數的類型。當函數體比解析器參數更大時(在源代碼中),通常情況下,在最後使用函數似乎也有助於可讀性。 –

+0

@Stephan:這與參數的順序無關。我同意pipe2在類型推斷方面更好,但我覺得它簡單認爲可適用的仿函數,而不是臨時組合程序的條款。這也可以應用於任何數量的參數,例如f p1 <*> p2 <*> p3 ...而不必使用pipe3,pipe4等(即翻轉升降A3等)。另外,我可以通過使用<* and *> –