如何确定字符串是否是有效的变量名?

我正在寻找一种快速方式(在C#中)来确定字符串是否是有效的变量名。 我的第一个直觉是掀起一些正则表达式,但我想知道是否有更好的方法来做到这一点。 就像某种秘密方法隐藏在一个名为IsThisAValidVariableName(字符串名称)的地方,或其他一些光滑的方法来做到这一点,不容易出现由于缺乏正则表达能力而可能出现的错误。

试试这个:

// using System.CodeDom.Compiler; CodeDomProvider provider = CodeDomProvider.CreateProvider("C#"); if (provider.IsValidIdentifier (YOUR_VARIABLE_NAME)) { // Valid } else { // Not valid } 

更长的路,加上它要慢得多,就是使用reflection来迭代类/命名空间的成员,并通过检查reflection的成员**。ToString()**是否与字符串输入相同来进行比较,这需要assembly好的assembly。

另一种方法(通过使用已经可用的Antlr扫描器/解析器克服使用正则表达式的更长的方法)边界解析/ lexing C#代码,然后扫描成员名称(即变量)并与之比较用作输入的字符串,例如,输入一个名为’fooBar’的字符串,然后指定源(例如汇编或C#代码)并通过分析查找具体的成员声明(例如,例如)来扫描它

 private int fooBar;

是的,它很复杂,但是当您意识到编译器编写者正在做什么时会产生强大的理解,并且会将您对C#语言的了解提升到与语法及其特性完全密切相关的水平。

@字符周围有一些特殊情况很容易忘记检查 – 即'@'本身不是有效的标识符,也不是"@1foo" 。 要捕获这些,您可以先检查字符串是否是关键字,然后从字符串的开头删除@ ,然后检查剩下的是否是有效的标识符(不允许@字符)。

在这里,我将它与一个解析标识符中的Unicode转义序列的方法相结合,并希望完成C#(5.0)Unicode字符检查。 要使用它,首先调用TryParseRawIdentifier()来处理关键字,转义序列,格式化字符(被删除)和逐字标识符。 接下来,将结果传递给IsValidParsedIdentifier()以检查第一个和后续字符是否有效。 请注意,当且仅当C#认为标识符相同时, TryParseRawIdentifier()返回的字符串才相等。

 public static class CSharpIdentifiers { private static HashSet _keywords = new HashSet { "abstract", "as", "base", "bool", "break", "byte", "case", "catch", "char", "checked", "class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else", "enum", "event", "explicit", "extern", "false", "finally", "fixed", "float", "for", "foreach", "goto", "if", "implicit", "in", "int", "interface", "internal", "is", "lock", "long", "namespace", "new", "null", "object", "operator", "out", "override", "params", "private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw", "true", "try", "typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", "while" }; public static IReadOnlyCollection Keywords { get { return _keywords; } } public static bool TryParseRawIdentifier(string str, out string parsed) { if (string.IsNullOrEmpty(str) || _keywords.Contains(str)) { parsed = null; return false; } StringBuilder sb = new StringBuilder(str.Length); int verbatimCharWidth = str[0] == '@' ? 1 : 0; for (int i = verbatimCharWidth; i < str.Length; ) //Manual increment { char c = str[i]; if (c == '\\') { char next = str[i + 1]; int charCodeLength; if (next == 'u') charCodeLength = 4; else if (next == 'U') charCodeLength = 8; else { parsed = null; return false; } //No need to check for escaped backslashes or special sequences like \n, //as they not valid identifier characters int charCode; if (!TryParseHex(str.Substring(i + 2, charCodeLength), out charCode)) { parsed = null; return false; } sb.Append(char.ConvertFromUtf32(charCodeLength)); //Handle characters above 2^16 by converting them to a surrogate pair i += 2 + charCodeLength; } else if (char.GetUnicodeCategory(str, i) == UnicodeCategory.Format) { //Use (string, index) in order to handle surrogate pairs //Skip this character if (char.IsSurrogatePair(str, i)) i += 2; else i += 1; } else { sb.Append(c); i++; } } parsed = sb.ToString(); return true; } private static bool TryParseHex(string str, out int result) { return int.TryParse(str, NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out result); //NumberStyles.AllowHexSpecifier forces all characters to be hex digits } public static bool IsValidParsedIdentifier(string str) { if (string.IsNullOrEmpty(str)) return false; if (!IsValidParsedIdentifierStart(str, 0)) return false; int firstCharWidth = char.IsSurrogatePair(str, 0) ? 2 : 1; for (int i = firstCharWidth; i < str.Length; ) //Manual increment { if (!IsValidParsedIdentifierPart(str, i)) return false; if (char.IsSurrogatePair(str, i)) i += 2; else i += 1; } return true; } //(String, index) pairs are used instead of chars in order to support surrogate pairs //(Unicode code-points above 2^16 represented using two 16-bit characters) public static bool IsValidParsedIdentifierStart(string s, int index) { return s[index] == '_' || char.IsLetter(s, index) || char.GetUnicodeCategory(s, index) == UnicodeCategory.LetterNumber; } public static bool IsValidParsedIdentifierPart(string s, int index) { if (s[index] == '_' || (s[index] >= '0' && s[index] <= '9') || char.IsLetter(s, index)) return true; switch (char.GetUnicodeCategory(s, index)) { case UnicodeCategory.LetterNumber: //Eg. Special Roman numeral characters (not covered by IsLetter()) case UnicodeCategory.DecimalDigitNumber: //Includes decimal digits in other cultures case UnicodeCategory.ConnectorPunctuation: case UnicodeCategory.NonSpacingMark: case UnicodeCategory.SpacingCombiningMark: //UnicodeCategory.Format handled in TryParseRawIdentifier() return true; default: return false; } } } 
  public static bool IsIdentifier(string text) { if (string.IsNullOrEmpty(text)) return false; if (!char.IsLetter(text[0]) && text[0] != '_') return false; for (int ix = 1; ix < text.Length; ++ix) if (!char.IsLetterOrDigit(text[ix]) && text[ix] != '_') return false; return true; }