2012-05-21 46 views
4

我試着在前些日子問過這個問題,並且最初並沒有說好問題或郵政編碼,答案已經結束。所以在這裏我再次嘗試,因爲老實說,這很快就讓我瘋狂。 :)大型正則表達式匹配導致程序掛起

我想實現這Address Parser,它最初是一個基於控制檯的c#程序。我已成功將其轉換爲獨立的WPF程序,該程序僅包含用於輸入的TextBox,用於激活解析的Button以及用於顯示結果的TextBlock。在寫這篇文章的時候,我的輸出截斷了我在主程序中需要的輸出,並且仍然可以正常工作。我已經在下面列出了完整的代碼。

我的下一步是將其移植到我的主程序中,我通過字面上使用複製/粘貼來完成。然而,運行這個程序後,程序在按下按鈕後掛起。最終,VS會給出一個錯誤,說明進程已經太久而沒有輸出消息,並且TaskManager中的內存使用量從〜70k逐漸增加到3,000,000。爲此,我將Parsing方法分配給後臺工作人員,希望減輕主進程的工作量。這確實解決了程序凍結,但後臺線程只是做了同樣的事情,提高了內存使用率,並沒有返回任何東西。

所以現在我有點陷入僵局。我知道這個問題是在var result = parser.ParseAddress(input);聲明中的某處,因爲當爲每一行代碼使用斷點時,這是最後一次觸發。但基本上,我不知道爲什麼這會在一個WPF程序中導致問題,而不是另一個。

如果需要的話,我會非常樂意爲主程序發佈完整的源代碼,但是我無法想象在這裏發佈大約20個不同的類文件和代碼項目是個好主意。 :)

單機WPF應用程序

namespace AddressParseWPF 
{ 
    /// <summary> 
    /// Interaction logic for MainWindow.xaml 
    /// </summary> 
    public partial class MainWindow : Window 
    { 
     public MainWindow() 
     { 
      InitializeComponent(); 
     } 

     public void Execute() 
     { 
      AddressParser.AddressParser parser = new AddressParser.AddressParser(); 
      var input = inputTextBox.Text; 

      var result = parser.ParseAddress(input); 

      if (result == null) 
      { 
       outputTextBlock.Text = "ERROR. Input could not be parsed."; 
      } 
      else 
      { 
       outputTextBlock.Text = (result.StreetLine + ", " + result.City + ", " + result.State + " " + result.Zip); 
      } 
     } 

     private void actionButton_Click(object sender, RoutedEventArgs e) 
     { 
      Execute(); 
     } 
    } 
} 

主程序接枝分析器成

public void ExecuteAddressParse() 
{ 
    AddressParser.AddressParser parser = new AddressParser.AddressParser(); 
    var input = inputTextBox.Text; 

    var result = parser.ParseAddress(input); 

    if (result == null) 
    { 
     outputTextBlock.Text = "ERROR. Input could not be parsed."; 
    } 
    else 
    { 
     outputTextBlock.Text = (result.StreetLine + ", " + result.City + ", " + result.State + " " + result.Zip); 
    } 
}  

private void actionButton_Click(object sender, RoutedEventArgs e) 
{ 
    ExecuteAddressParse(); 
} 

ParseAddress方法

public AddressParseResult ParseAddress(string input) 
{ 
    if (!string.IsNullOrWhiteSpace(input)) 
    { 
     var match = addressRegex.Match(input.ToUpperInvariant()); 
     if (match.Success) 
     { 
      var extracted = GetApplicableFields(match); 
      return new AddressParseResult(Normalize(extracted)); 
     } 
    } 

    return null; 
} 
省略 RegexOptions.Compiled標誌時10

正則表達式匹配方法

private static void InitializeRegex() 
{ 
    var suffixPattern = new Regex(
     string.Join(
      "|", 
      new [] { 
       string.Join("|", suffixes.Keys), 
       string.Join("|", suffixes.Values.Distinct()) 
      }), 
     RegexOptions.Compiled); 

    var statePattern = 
     @"\b(?:" + 
     string.Join(
      "|", 
      new [] { 
       string.Join("|", states.Keys.Select(x => Regex.Escape(x))), 
       string.Join("|", states.Values) 
      }) + 
     @")\b"; 

    var directionalPattern = 
     string.Join(
      "|", 
      new [] { 
       string.Join("|", directionals.Keys), 
       string.Join("|", directionals.Values), 
       string.Join("|", directionals.Values.Select(x => Regex.Replace(x, @"(\w)", @"$1\."))) 
      }); 

    var zipPattern = @"\d{5}(?:-?\d{4})?"; 

    var numberPattern = 
     @"(
      ((?<NUMBER>\d+)(?<SECONDARYNUMBER>(-[0-9])|(\-?[A-Z]))(?=\b)) # Unit-attached 
      |(?<NUMBER>\d+[\-\ ]?\d+\/\d+)         # Fractional 
      |(?<NUMBER>\d+-?\d*)            # Normal Number 
      |(?<NUMBER>[NSWE]\ ?\d+\ ?[NSWE]\ ?\d+)       # Wisconsin/Illinois 
     )"; 

    var streetPattern = 
     string.Format(
      CultureInfo.InvariantCulture, 
      @" 
       (?: 
        # special case for addresses like 100 South Street 
        (?:(?<STREET>{0})\W+ 
        (?<SUFFIX>{1})\b) 
        | 
        (?:(?<PREDIRECTIONAL>{0})\W+)? 
        (?: 
        (?<STREET>[^,]*\d) 
        (?:[^\w,]*(?<POSTDIRECTIONAL>{0})\b) 
        | 
        (?<STREET>[^,]+) 
        (?:[^\w,]+(?<SUFFIX>{1})\b) 
        (?:[^\w,]+(?<POSTDIRECTIONAL>{0})\b)? 
        | 
        (?<STREET>[^,]+?) 
        (?:[^\w,]+(?<SUFFIX>{1})\b)? 
        (?:[^\w,]+(?<POSTDIRECTIONAL>{0})\b)? 
       ) 
       ) 
      ", 
      directionalPattern, 
      suffixPattern); 

    var rangedSecondaryUnitPattern = 
     @"(?<SECONDARYUNIT>" + 
     string.Join("|", rangedSecondaryUnits.Keys) + 
     @")(?![a-z])"; 
    var rangelessSecondaryUnitPattern = 
     @"(?<SECONDARYUNIT>" + 
     string.Join(
      "|", 
      string.Join("|", rangelessSecondaryUnits.Keys)) + 
     @")\b"; 
    var allSecondaryUnitPattern = string.Format(
     CultureInfo.InvariantCulture, 
     @" 
      (
       (:? 
        (?: (?:{0} \W*) 
         | (?<SECONDARYUNIT>\#)\W* 
        ) 
        (?<SECONDARYNUMBER>[\w-]+) 
       ) 
       |{1} 
      ),? 
     ", 
     rangedSecondaryUnitPattern, 
     rangelessSecondaryUnitPattern); 

    var cityAndStatePattern = string.Format(
     CultureInfo.InvariantCulture, 
     @" 
      (?: 
       (?<CITY>[^\d,]+?)\W+ 
       (?<STATE>{0}) 
      ) 
     ", 
     statePattern); 
    var placePattern = string.Format(
     CultureInfo.InvariantCulture, 
     @" 
      (?:{0}\W*)? 
      (?:(?<ZIP>{1}))? 
     ", 
     cityAndStatePattern, 
     zipPattern); 

    var addressPattern = string.Format(
     CultureInfo.InvariantCulture, 
     @" 
      ^
      # Special case for APO/FPO/DPO addresses 
      (
       [^\w\#]* 
       (?<STREETLINE>.+?) 
       (?<CITY>[AFD]PO)\W+ 
       (?<STATE>A[AEP])\W+ 
       (?<ZIP>{4}) 
       \W* 
      ) 
      | 
      # Special case for PO boxes 
      (
       \W* 
       (?<STREETLINE>(P[\.\ ]?O[\.\ ]?\)?BOX\ [0-9]+)\W+ 
       {3} 
       \W* 
      ) 
      | 
      (
       [^\w\#]* # skip non-word chars except # (eg unit) 
       ( {0})\W* 
        {1}\W+ 
       (?:{2}\W+)? 
        {3} 
       \W*   # require on non-word chars at end 
      ) 
      $   # right up to end of string 
     ", 
     numberPattern, 
     streetPattern, 
     allSecondaryUnitPattern, 
     placePattern, 
     zipPattern); 
    addressRegex = new Regex(
     addressPattern, 
     RegexOptions.Compiled | 
     RegexOptions.Singleline | 
     RegexOptions.IgnorePatternWhitespace); 
} 
+0

不知您輸入? – BlueM

+1

帶回溯的正則表達式可能非常緩慢,特別是在大量輸入時。嘗試將您對錶達式的輸入一次限制爲三行:在「ParseAddress」方法中,將文本拆分爲單獨的行,並將其分配給循環中的正則表達式,行{0,1,2},然後{ 1,2,3},然後{2,3,4}等等。找到匹配項時返回。 – dasblinkenlight

+0

當您移動代碼時,您有沒有偶然修改正則表達式?或者在複製和粘貼時修改輸入?你初始化了解析器嗎?還是你無意中雙擊了按鈕,從而觸發了庫中的一些奇怪的錯誤?高內存消耗指向一個糟糕的正則表達式/輸入組合 - 儘管運行時行爲不佳 – skarmats

回答

5

是否正則表達式的工作?

答覆是肯定的。

那麼爲什麼?

看來正則表達式編譯器對於(某些?)大型模式很慢。

這是一個折衷,你必須做出。

+1

只是想再次表示感謝,我現在有這個工作,這是完美的!對於讀這個問題的其他人,請給leppie一個贊成! –

0

像這樣逐漸增加資源使用量是catastrophic backtracking的吸菸槍。基本上,如果你有類似的東西,比如說這部分:

(?<CITY>[^\d,]+?)\W+ 

...那麼對於哪一部分輸入匹配哪一部分模式將存在歧義。幾乎所有匹配的商品\W也可以匹配[^\d,]。如果輸入在第一次輸入時不匹配,則引擎將返回並嘗試這兩個組的不同排列,這會啃碎資源。

例如,假設輸入的「城市」部分在其後面有一大堆空白。一長串空白符合[^\d,]+?\W+,所以目前還不清楚CITY組是否包含空格。根據這些量詞的懶惰/貪婪行爲,引擎將嘗試僅將城市名稱放入[^\d,]+?以及\W+中的所有空格。然後它會繼續並嘗試匹配其餘的輸入。

如果輸入的其餘部分與第一次嘗試匹配,則罰款。但是,如果匹配失敗,它將不得不返回並再次嘗試,這次將其中一個空格與[^\d,]+?匹配並作爲CITY組的一部分捕獲。當失敗時,它會再次嘗試兩個空格,依此類推。

您通常會發現這成爲嵌套量詞的一個問題,例如,像([ABC]+)*。我沒有看到你的模式發生了什麼,但我可能錯過了所有string.Format調用中的某些內容。我的猜測是它的模式很長,並且有很多量化器和交替器可以回溯(還有很多組來存儲),即使是單一級別的迭代也會讓你失望。我敢打賭,你會得到最大的性能打擊與長期輸入字符串匹配大多數的模式,但未能匹配所有。

在這種情況下編譯正則表達式可能會有所幫助,你應該這樣做。但是,當你一次對你的應用程序進行一千次(或多次)點擊時,我懷疑這會削減它。還會有一些輸入字符串會導致很多回溯,並且在性能方面更加困難。我最大的建議是找到並解決模式中的歧義。

找地方有很多量詞像*+靠近對方,並確保它們之間存在着明顯的,非可選分隔符(例如,\d+-?\d*從你的號碼組將作爲\d+(-\d*)?表現得更好,或者更好\d+(-\d+)?\b)。最後,確保分隔符不能與它們旁邊的標記相匹配。對於一個製作的例子,如果你給它一長串空格,像\W+\ \W+這樣的東西就會拖動。

1

一些正則表達式的子表達式是不恰當的(就像@Justin Morgan提到的那樣)。
這通常是加入可重複使用的零碎正則表達式的結果,並且它使我不安。

但是,如果您打算使用/執行此方法,則在構建它之後,打印出實際的正則表達式總是一個好主意。並且,在格式化後,對照
樣品進行測試,並獨立於您的主程序進行測試。它更容易修復。
如果您看到可疑的子表達式,請嘗試使其失敗,或者一般來說,嘗試在樣本末尾附近插入失敗。如果花費超過
一眨眼的功夫就會失敗,那麼其嚴重的回溯。

但是回溯並不壞。這是一個巨大的好處。沒有它,有些東西
只是不匹配。訣竅在於隔離子表達式,這些子表達式不會影響與其周圍的內容相關的結果,然後將其限制爲不受制約。

我去了USPS站點,抓取了一些樣本狀態/後綴/方向/輔助
樣本,足以生成地址正則表達式。以下是
的清理版本,它是從您的代碼生成的正則表達式。

祝你好運!

^
    # Special case for APO/FPO/DPO addresses 
    (
     [^\w\#]* 
     (?<STREETLINE> .+?) 
     (?<CITY> [AFD] PO) 
     \W+ 
     (?<STATE> A [AEP]) 
     \W+ 
     (?<ZIP> \d{5} (?: -? \d{4})?) 
     \W* 
    ) 
|   
    # Special case for PO boxes 
    (
     \W* 
     (?<STREETLINE> (P [\.\ ]? O [\.\ ]? \ )? BOX \ [0-9]+) 
     \W+ 
     (?: 
      (?: 
       (?<CITY> [^\d,]+?) 
       \W+ 
       (?<STATE> 
       \b 
       (?:AL|AK|AS|AZ|AR|Alabama|Alaska|American Samoa|Arizona|Arkansas) 
       \b 
      ) 
     ) 
      \W* 
    )? 
     (?: 
      (?<ZIP> \d{5} (?: -? \d{4})?) 
    )? 
     \W* 
    ) 
|   
    (
     [^\w\#]* # skip non-word chars except # (eg unit) 
     (
     (
       (
       (?<NUMBER> \d+) 
       (?<SECONDARYNUMBER> (-[0-9]) | (\-?[A-Z])) 
       (?=\b) 
      )             # Unit-attached 
      |   
      (?<NUMBER> \d+ [\-\ ]? \d+ \/ \d+)     # Fractional 
      | 
      (?<NUMBER> \d+ -? \d*)        # Normal Number 
      | 
      (?<NUMBER>[NSWE]\ ?\d+\ ?[NSWE]\ ?\d+)    # Wisconsin/Illinois 
     ) 
     ) 
     \W* 

     (?: 
      # special case for addresses like 100 South Street 
      (?: 
       (?<STREET>North|East|South|West|Northeast|Southeast|Northwest|Southwest|N|E|S|W|NE|SE|NW|SW|N\.|E\.|S\.|W\.|N\.E\.|S\.E\.|N\.W\.|S\.W\.) 
       \W+ 
       (?<SUFFIX>ALLEY|ALY|ALLY|ALLEE|ALLEY|ALY) 
       \b 
      ) 
     | 
      (?: 
       (?<PREDIRECTIONAL>North|East|South|West|Northeast|Southeast|Northwest|Southwest|N|E|S|W|NE|SE|NW|SW|N\.|E\.|S\.|W\.|N\.E\.|S\.E\.|N\.W\.|S\.W\.) 
       \W+ 
      )? 
      (?: 
       (?<STREET> [^,]* \d) 
       (?: 
        [^\w,]* 
        (?<POSTDIRECTIONAL>North|East|South|West|Northeast|Southeast|Northwest|Southwest|N|E|S|W|NE|SE|NW|SW|N\.|E\.|S\.|W\.|N\.E\.|S\.E\.|N\.W\.|S\.W\.) 
        \b 
       ) 
      | 
       (?<STREET> [^,]+) 
       (?: 
        [^\w,]+ 
        (?<SUFFIX>ALLEY|ALY|ALLY|ALLEE|ALLEY|ALY) 
        \b 
       ) 
       (?: 
        [^\w,]+ 
        (?<POSTDIRECTIONAL>North|East|South|West|Northeast|Southeast|Northwest|Southwest|N|E|S|W|NE|SE|NW|SW|N\.|E\.|S\.|W\.|N\.E\.|S\.E\.|N\.W\.|S\.W\.) 
        \b 
       )? 
      | 
       (?<STREET> [^,]+?) 
       (?: 
        [^\w,]+ 
        (?<SUFFIX>ALLEY|ALY|ALLY|ALLEE|ALLEY|ALY) 
        \b 
       )? 
       (?: 
        [^\w,]+ 
        (?<POSTDIRECTIONAL>North|East|South|West|Northeast|Southeast|Northwest|Southwest|N|E|S|W|NE|SE|NW|SW|N\.|E\.|S\.|W\.|N\.E\.|S\.E\.|N\.W\.|S\.W\.) 
        \b 
       )? 
      ) 
     )   

     \W+   

     (?:  
      (
       (
        :? 
        (?: 
         (?: 
         (?<SECONDARYUNIT>APT|BLDG|DEPT|FL|HNGR|LOT|PIER|RM|SLIP|SPC|STOP|STE|TRLR|UNIT) 
         (?! [a-z]) 
         \W* 
         ) 
        | 
         (?<SECONDARYUNIT> \#) 
         \W* 
       ) 
        (?<SECONDARYNUMBER> [\w-]+) 
       ) 
      | 
       (?<SECONDARYUNIT>BSMT|FRNT|LBBY|LOWR|OFC|PH|REAR|SIDE|UPPR) 
       \b 
      ) 
      ,? 
      \W+ 
     )? 

     (?: 
      (?: 
       (?<CITY> [^\d,]+?) 
       \W+ 
       (?<STATE> 
        \b 
        (?:AL|AK|AS|AZ|AR|Alabama|Alaska|American Samoa|Arizona|Arkansas) 
        \b 
       ) 
      ) 
      \W* 
     )? 

     (?: 
      (?<ZIP> \d{5} (?: -? \d{4})?) 
     )? 

     \W*   # require on non-word chars at end 
    ) 
$   # right up to end of string 

C#代碼

public static void InitializeRegex() 
    { 
     Dictionary<string, string> suffixes = new Dictionary<string, string>() 
     { 
      {"ALLEY", "ALLEE"}, 
      {"ALY", "ALLEY"}, 
      {"ALLY", "ALY"}, 
     }; 

     var suffixPattern = new Regex(
      string.Join(
       "|", 
       new[] { 
      string.Join("|", suffixes.Keys.ToArray()), 
      string.Join("|", suffixes.Values.Distinct().ToArray()) 
     }), 
      RegexOptions.Compiled); 

     //Console.WriteLine("\n"+suffixPattern); 

     Dictionary<string, string> states = new Dictionary<string, string>() 
     { 
      {"AL", "Alabama"}, 
      {"AK", "Alaska"}, 
      {"AS", "American Samoa"}, 
      {"AZ", "Arizona"}, 
      {"AR", "Arkansas"} 
     }; 

     var statePattern = 
      @"\b(?:" + 
      string.Join(
       "|", 
       new[] { 
      string.Join("|", states.Keys.Select(x => Regex.Escape(x)).ToArray()), 
      string.Join("|", states.Values.ToArray()) 
     }) + 
      @")\b"; 

     //Console.WriteLine("\n" + statePattern); 

     Dictionary<string, string> directionals = new Dictionary<string, string>() 
     { 
      {"North", "N" }, 
      {"East", "E" }, 
      {"South", "S" }, 
      {"West", "W" }, 
      {"Northeast", "NE" }, 
      {"Southeast", "SE" }, 
      {"Northwest", "NW" }, 
      {"Southwest", "SW" } 
     }; 

     var directionalPattern = 
      string.Join(
       "|", 
       new[] { 
      string.Join("|", directionals.Keys.ToArray()), 
      string.Join("|", directionals.Values.ToArray()), 
      string.Join("|", directionals.Values.Select(x => Regex.Replace(x, @"(\w)", @"$1\.")).ToArray()) 
     }); 

     //Console.WriteLine("\n" + directionalPattern); 

     var zipPattern = @"\d{5}(?:-?\d{4})?"; 

     //Console.WriteLine("\n" + zipPattern); 

     var numberPattern = 
      @"(
       ((?<NUMBER>\d+)(?<SECONDARYNUMBER>(-[0-9])|(\-?[A-Z]))(?=\b)) # Unit-attached 
       |(?<NUMBER>\d+[\-\ ]?\d+\/\d+)         # Fractional 
       |(?<NUMBER>\d+-?\d*)            # Normal Number 
       |(?<NUMBER>[NSWE]\ ?\d+\ ?[NSWE]\ ?\d+)       # Wisconsin/Illinois 
      )"; 

     //Console.WriteLine("\n" + numberPattern); 

     var streetPattern = 
      string.Format(
       CultureInfo.InvariantCulture, 
       @" 
        (?: 
         # special case for addresses like 100 South Street 
         (?:(?<STREET>{0})\W+ 
         (?<SUFFIX>{1})\b) 
         | 
         (?:(?<PREDIRECTIONAL>{0})\W+)? 
         (?: 
         (?<STREET>[^,]*\d) 
         (?:[^\w,]*(?<POSTDIRECTIONAL>{0})\b) 
         | 
         (?<STREET>[^,]+) 
         (?:[^\w,]+(?<SUFFIX>{1})\b) 
         (?:[^\w,]+(?<POSTDIRECTIONAL>{0})\b)? 
         | 
         (?<STREET>[^,]+?) 
         (?:[^\w,]+(?<SUFFIX>{1})\b)? 
         (?:[^\w,]+(?<POSTDIRECTIONAL>{0})\b)? 
        ) 
        ) 
       ", 
       directionalPattern, 
       suffixPattern); 

     //Console.WriteLine("\n" + streetPattern); 


     Dictionary<string, string> rangedSecondaryUnits = new Dictionary<string, string>() 
     { 
      {"APT", "APARTMENT"}, 
      {"BLDG", "BUILDING"}, 
      {"DEPT", "DEPARTMENT"}, 
      {"FL", "FLOOR"}, 
      {"HNGR", "HANGAR"}, 
      {"LOT", "LOT"}, 
      {"PIER", "PIER"}, 
      {"RM", "ROOM"}, 
      {"SLIP", "SLIP"}, 
      {"SPC", "SPACE"}, 
      {"STOP", "STOP"}, 
      {"STE", "SUITE"}, 
      {"TRLR", "TRAILER"}, 
      {"UNIT", "UNIT"} 
     }; 
     var rangedSecondaryUnitPattern = 
      @"(?<SECONDARYUNIT>" + 
      string.Join("|", rangedSecondaryUnits.Keys.ToArray()) + 
      @")(?![a-z])"; 

     //Console.WriteLine("\n" + rangedSecondaryUnitPattern); 


     Dictionary<string, string> rangelessSecondaryUnits = new Dictionary<string, string>() 
     { 
      {"BSMT", "BASEMENT"}, 
      {"FRNT", "FRONT"}, 
      {"LBBY", "LOBBY"}, 
      {"LOWR", "LOWER"}, 
      {"OFC", "OFFICE"}, 
      {"PH", "PENTHOUSE"}, 
      {"REAR", "REAR"}, 
      {"SIDE", "SIDE"}, 
      {"UPPR", "UPPER"} 
     }; 

     var rangelessSecondaryUnitPattern = 
      @"(?<SECONDARYUNIT>" + 
      string.Join("|", rangelessSecondaryUnits.Keys.ToArray()) + 
      @")\b"; 

     //Console.WriteLine("\n" + rangelessSecondaryUnitPattern); 

     var allSecondaryUnitPattern = string.Format(
      CultureInfo.InvariantCulture, 
      @" 
       (
        (:? 
         (?: (?:{0} \W*) 
          | (?<SECONDARYUNIT>\#)\W* 
         ) 
         (?<SECONDARYNUMBER>[\w-]+) 
        ) 
        |{1} 
       ),? 
      ", 
      rangedSecondaryUnitPattern, 
      rangelessSecondaryUnitPattern); 

     //Console.WriteLine("\n" + allSecondaryUnitPattern); 

     var cityAndStatePattern = string.Format(
      CultureInfo.InvariantCulture, 
      @" 
       (?: 
        (?<CITY>[^\d,]+?)\W+ 
        (?<STATE>{0}) 
       ) 
      ", 
      statePattern); 

     //Console.WriteLine("\n" + cityAndStatePattern); 

     var placePattern = string.Format(
      CultureInfo.InvariantCulture, 
      @" 
       (?:{0}\W*)? 
       (?:(?<ZIP>{1}))? 
      ", 
      cityAndStatePattern, 
      zipPattern); 

     //Console.WriteLine("\n" + placePattern); 

     var addressPattern = string.Format(
      CultureInfo.InvariantCulture, 
      @" 
       ^
       # Special case for APO/FPO/DPO addresses 
       (
        [^\w\#]* 
        (?<STREETLINE>.+?) 
        (?<CITY>[AFD]PO)\W+ 
        (?<STATE>A[AEP])\W+ 
        (?<ZIP>{4}) 
        \W* 
       ) 
       | 
       # Special case for PO boxes 
       (
        \W* 
        (?<STREETLINE>(P[\.\ ]?O[\.\ ]?\)?BOX\ [0-9]+)\W+ 
        {3} 
        \W* 
       ) 
       | 
       (
        [^\w\#]* # skip non-word chars except # (eg unit) 
        ( {0})\W* 
         {1}\W+ 
        (?:{2}\W+)? 
         {3} 
        \W*   # require on non-word chars at end 
       ) 
       $   # right up to end of string 
      ", 
      numberPattern, 
      streetPattern, 
      allSecondaryUnitPattern, 
      placePattern, 
      zipPattern); 

     Console.WriteLine("\n-----------------------------\n\n" + addressPattern); 

     var addressRegex = new Regex(
      addressPattern, 
      RegexOptions.Compiled | 
      RegexOptions.Singleline | 
      RegexOptions.IgnorePatternWhitespace); 

    }