2009-12-01 51 views
20

我正在尋找一種快速方法(用C#)來確定一個字符串是否是一個有效的變量名。我的第一個直覺是鞭打一些正則表達式來做到這一點,但我想知道是否有更好的方法來做到這一點。就像也許某種隱藏在某處稱爲IsThisAValidVariableName(字符串名稱)的祕密方法,或者其他一些簡單的方法來做到這一點,不容易出現由於缺乏正則表達式而可能出現的錯誤。如何確定一個字符串是否是一個有效的變量名?

+0

你的意思是C#變量名?我認爲正則表達式是你最好的選擇,除非你推出你自己的小解析器(這對於檢查這麼小的東西來說是過分的) – Earlz 2009-12-01 23:30:46

+0

如果你使用正則表達式,要注意的一件事是有幾個uinicode字符類您可能需要考慮:http://msdn.microsoft.com/en-us/library/aa664670%28VS.71%29.aspx – 2009-12-01 23:41:19

+0

+1不信任正則表達式獎金 – bobince 2009-12-01 23:44:53

回答

42

嘗試這種情況:

// using System.CodeDom.Compiler; 
CodeDomProvider provider = CodeDomProvider.CreateProvider("C#"); 
if (provider.IsValidIdentifier (YOUR_VARIABLE_NAME)) { 
     // Valid 
} else { 
     // Not valid 
} 
+1

你需要引用'System.CodeDom.Compiler'命名空間爲:-) – CesarGon 2009-12-01 23:33:18

+15

是的。您還需要將該代碼放入方法中,並將該方法放入類和名爲YOUR_VARIABLE_NAME的變量中,並...;-) – Gonzalo 2009-12-01 23:35:29

+1

'CodeDomProvider'有多貴? – 2009-12-01 23:35:55

1

的時間越長的方式,再加上它是慢得多,是使用反射來遍歷一個類/命名空間的成員,並通過檢查該反射構件**比較的ToString(。 )**與字符串輸入相同,這需要預先加載程序集。

這樣做的另一種方式(通過使用已經可用的Antlr掃描器/解析器克服正則表達式的使用,可以使用更長的方法)解析/瀏覽C#代碼,然後掃描成員名稱(即變量)並與用作輸入的字符串進行比較,例如,輸入名爲'fooBar'的字符串,然後指定源(例如程序集或C#代碼),並通過分析特定的成員聲明來分析它,例如

 
private int fooBar; 

是的,它很複雜,但是當你意識到編譯器編寫者正在做什麼時,會產生一種強有力的理解,並且會增強你對C#語言的瞭解,使之與你非常親密語法及其特點。

+0

誤解了這個問題。 – 2011-06-17 21:32:46

1

圍繞@字符有幾個特殊情況很容易忘記檢查 - 即'@'本身不是有效標識符,也不是"@1foo"。要捕捉這些信息,可以先檢查字符串是否爲關鍵字,然後從字符串的起始處刪除@,然後檢查剩下的是否是有效標識符(禁止@字符)。

在這裏,我已經將這與一個方法來解析標識符中的Unicode轉義序列,並希望完成C#(5.0)Unicode字符檢查。要使用它,首先調用TryParseRawIdentifier()來處理關鍵字,轉義序列,格式化字符(已刪除)和逐字標識符。接下來,將結果傳遞給IsValidParsedIdentifier()以檢查第一個和後續字符是否有效。注意從TryParseRawIdentifier()返回的字符串是相等的,當且僅當標識符被C#認爲是相同的。

public static class CSharpIdentifiers 
{ 
    private static HashSet<string> _keywords = new HashSet<string> { 
     "abstract", "as", "base", "bool", "break", "byte", "case", "catch", "char", "checked", 
     "class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else", 
     "enum", "event", "explicit", "extern", "false", "finally", "fixed", "float", "for", 
     "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", "is", "lock", 
     "long", "namespace", "new", "null", "object", "operator", "out", "override", "params", 
     "private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed", 
     "short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw", 
     "true", "try", "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using", 
     "virtual", "void", "volatile", "while" 
    }; 

    public static IReadOnlyCollection<string> Keywords { get { return _keywords; } } 


    public static bool TryParseRawIdentifier(string str, out string parsed) 
    { 
     if (string.IsNullOrEmpty(str) || _keywords.Contains(str)) { parsed = null; return false; } 

     StringBuilder sb = new StringBuilder(str.Length); 

     int verbatimCharWidth = str[0] == '@' ? 1 : 0; 

     for (int i = verbatimCharWidth; i < str.Length;) //Manual increment 
     { 
      char c = str[i]; 

      if (c == '\\') 
      { 
       char next = str[i + 1]; 

       int charCodeLength; 
       if (next == 'u') charCodeLength = 4; 
       else if (next == 'U') charCodeLength = 8; 
       else { parsed = null; return false; } 
       //No need to check for escaped backslashes or special sequences like \n, 
       //as they not valid identifier characters 

       int charCode; 
       if (!TryParseHex(str.Substring(i + 2, charCodeLength), out charCode)) { parsed = null; return false; } 

       sb.Append(char.ConvertFromUtf32(charCodeLength)); //Handle characters above 2^16 by converting them to a surrogate pair 
       i += 2 + charCodeLength; 
      } 
      else if (char.GetUnicodeCategory(str, i) == UnicodeCategory.Format) 
      { 
       //Use (string, index) in order to handle surrogate pairs 
       //Skip this character 
       if (char.IsSurrogatePair(str, i)) i += 2; 
       else i += 1; 
      } 
      else 
      { 
       sb.Append(c); 
       i++; 
      } 
     } 

     parsed = sb.ToString(); 
     return true; 
    } 

    private static bool TryParseHex(string str, out int result) 
    { 
     return int.TryParse(str, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out result); 
     //NumberStyles.AllowHexSpecifier forces all characters to be hex digits 
    } 

    public static bool IsValidParsedIdentifier(string str) 
    { 
     if (string.IsNullOrEmpty(str)) return false; 

     if (!IsValidParsedIdentifierStart(str, 0)) return false; 

     int firstCharWidth = char.IsSurrogatePair(str, 0) ? 2 : 1; 

     for (int i = firstCharWidth; i < str.Length;) //Manual increment 
     { 
      if (!IsValidParsedIdentifierPart(str, i)) return false; 
      if (char.IsSurrogatePair(str, i)) i += 2; 
      else i += 1; 
     } 

     return true; 
    } 

    //(String, index) pairs are used instead of chars in order to support surrogate pairs 
    //(Unicode code-points above 2^16 represented using two 16-bit characters) 

    public static bool IsValidParsedIdentifierStart(string s, int index) 
    { 
     return s[index] == '_' || char.IsLetter(s, index) || char.GetUnicodeCategory(s, index) == UnicodeCategory.LetterNumber; 
    } 

    public static bool IsValidParsedIdentifierPart(string s, int index) 
    { 
     if (s[index] == '_' || (s[index] >= '0' && s[index] <= '9') || char.IsLetter(s, index)) return true; 

     switch (char.GetUnicodeCategory(s, index)) 
     { 
      case UnicodeCategory.LetterNumber: //Eg. Special Roman numeral characters (not covered by IsLetter()) 
      case UnicodeCategory.DecimalDigitNumber: //Includes decimal digits in other cultures 
      case UnicodeCategory.ConnectorPunctuation: 
      case UnicodeCategory.NonSpacingMark: 
      case UnicodeCategory.SpacingCombiningMark: 
      //UnicodeCategory.Format handled in TryParseRawIdentifier() 
       return true; 
      default: 
       return false; 
     } 
    } 
} 
0
public static bool IsIdentifier(string text) 
    { 
    if (string.IsNullOrEmpty(text)) 
     return false; 
    if (!char.IsLetter(text[0]) && text[0] != '_') 
     return false; 
    for (int ix = 1; ix < text.Length; ++ix) 
     if (!char.IsLetterOrDigit(text[ix]) && text[ix] != '_') 
      return false; 
    return true; 
    } 
相關問題