C# 扩展字符集的算法?
是否有现成的函数用于扩展C#regex样式的字符集 例如,C# 扩展字符集的算法?,c#,C#,是否有现成的函数用于扩展C#regex样式的字符集 例如,expand(“a-z1”)将返回一个包含所有字符a到z的字符串,后跟数字1 以下是到目前为止我得到的信息: public static string ExpandCharacterSet(string set) { var sb = new StringBuilder(); int start = 0; while (start < set.Length - 1) { int da
expand(“a-z1”)
将返回一个包含所有字符a到z的字符串,后跟数字1
以下是到目前为止我得到的信息:
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
return sb.ToString();
}
我花了一点功夫才弄到这个,但这是我能想到的。当然,这不会是可移植的,因为我会搞乱内部结构。但是它对于简单的测试用例来说已经足够好了。它将接受任何正则表达式字符类,但不适用于否定类。值的范围太广,没有任何限制。我不知道它是否适用于所有情况,它根本不处理重复,但这是一个开始。至少您不必推出自己的解析器。从.NET Framework 4.0开始:
public static class RegexHelper
{
public static string ExpandCharClass(string charClass)
{
var regexParser = new RegexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass = regexParser.ScanCharClass(false);
int count = regexCharClass.RangeCount();
List<string> ranges = new List<string>();
// range 0 can be skipped
for (int i = 1; i < count; i++)
{
var range = regexCharClass.GetRangeAt(i);
ranges.Add(ExpandRange(range));
}
return String.Concat(ranges);
}
static string ExpandRange(SingleRange range)
{
char first = range._first;
char last = range._last;
return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
}
internal class RegexParser
{
static readonly Type RegexParserType;
static readonly ConstructorInfo RegexParser_Ctor;
static readonly MethodInfo RegexParser_SetPattern;
static readonly MethodInfo RegexParser_ScanCharClass;
static RegexParser()
{
RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
}
private readonly object instance;
internal RegexParser(CultureInfo culture)
{
instance = RegexParser_Ctor.Invoke(new object[] { culture });
}
internal void SetPattern(string pattern)
{
RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
}
internal RegexCharClass ScanCharClass(bool caseInsensitive)
{
return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
}
}
internal class RegexCharClass
{
static readonly Type RegexCharClassType;
static readonly MethodInfo RegexCharClass_RangeCount;
static readonly MethodInfo RegexCharClass_GetRangeAt;
static RegexCharClass()
{
RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
}
private readonly object instance;
internal RegexCharClass(object regexCharClass)
{
if (regexCharClass == null)
throw new ArgumentNullException("regexCharClass");
if (regexCharClass.GetType() != RegexCharClassType)
throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
instance = regexCharClass;
}
internal int RangeCount()
{
return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
}
internal SingleRange GetRangeAt(int i)
{
return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
}
}
internal struct SingleRange
{
static readonly Type RegexCharClassSingleRangeType;
static readonly FieldInfo SingleRange_first;
static readonly FieldInfo SingleRange_last;
static SingleRange()
{
RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
}
internal char _first;
internal char _last;
internal SingleRange(object singleRange)
{
if (singleRange == null)
throw new ArgumentNullException("singleRange");
if (singleRange.GetType() != RegexCharClassSingleRangeType)
throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
_first = (char)SingleRange_first.GetValue(singleRange);
_last = (char)SingleRange_last.GetValue(singleRange);
}
}
}
// usage:
RegexHelper.ExpandCharClass(@"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
公共静态类RegexHelper
{
公共静态字符串ExpandCharClass(字符串charClass)
{
var regexParser=新的regexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass=regexParser.ScanCharClass(false);
int count=regexCharClass.RangeCount();
列表范围=新列表();
//可以跳过范围0
对于(int i=1;i(char)i));
}
内部类RegexParser
{
静态只读类型RegexParserType;
静态只读构造函数info RegexParser;
静态只读MethodInfo RegexParser_SetPattern;
静态只读MethodInfo RegexParser_ScanCharClass;
静态RegexParser()
{
RegexParserType=Assembly.GetAssembly(typeof(Regex)).GetType(“System.Text.RegularExpressions.RegexParser”);
var flags=BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor=RegexParserType.GetConstructor(flags,null,new[]{typeof(CultureInfo)},null);
RegexParser_SetPattern=RegexParserType.GetMethod(“SetPattern”,flags,null,new[]{typeof(String)},null);
RegexParser_ScanCharClass=RegexParserType.GetMethod(“ScanCharClass”,标志,null,新[]{typeof(Boolean)},null);
}
私有只读对象实例;
内部RegexParser(文化信息文化)
{
instance=RegexParser_Ctor.Invoke(新对象[]{culture});
}
内部无效模式(字符串模式)
{
RegexParser_SetPattern.Invoke(实例,新对象[]{pattern});
}
内部RegexCharClass ScanCharClass(布尔不区分大小写)
{
返回新的RegexCharClass(RegexParser_ScanCharClass.Invoke(实例,新对象[]{casensitive}));
}
}
内部类RegexCharClass
{
静态只读类型RegexCharClassType;
静态只读MethodInfo RegexCharClass_RangeCount;
静态只读MethodInfo RegexCharClass_GetRangeAt;
静态RegexCharClass()
{
RegexCharClassType=Assembly.GetAssembly(typeof(Regex)).GetType(“System.Text.RegularExpressions.RegexCharClass”);
var flags=BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount=RegexCharClassType.GetMethod(“RangeCount”,标志,null,新类型[]{},null);
RegexCharClass_GetRangeAt=RegexCharClassType.GetMethod(“GetRangeAt”,flags,null,new[]{typeof(Int32)},null);
}
私有只读对象实例;
内部RegexCharClass(对象RegexCharClass)
{
if(regexCharClass==null)
抛出新ArgumentNullException(“regexCharClass”);
if(regexCharClass.GetType()!=RegexCharClassType)
抛出新ArgumentException(“不是RegeCharClass对象的实例”,“RegeCharClass”);
instance=regexCharClass;
}
内部int RangeCount()
{
return(int)RegexCharClass_RangeCount.Invoke(实例,新对象[]{});
}
内部单量程GetRangeAt(int i)
{
返回新的SingleRange(RegexCharClass_GetRangeAt.Invoke(实例,新对象[]{i}));
}
}
内部结构单一范围
{
静态只读类型RegeCharClassSingleRangeType;
静态只读FieldInfo SingleRange_优先;
静态只读FieldInfo SingleRange\u last;
静态单量程()
{
RegexCharClassSingleRangeType=Assembly.GetAssembly(typeof(Regex)).GetType(“System.Text.RegularExpressions.RegexCharClass+SingleRange”);
var flags=BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first=RegexCharClassSingleRangeType.GetField(“_first”,标志);
SingleRange_last=RegexCharClassSingleRangeType.GetField(“_last”,标志);
}
内部字符优先;
内部字符最后;
内部单一范围(对象单一范围)
{
if(singleRange==null)
抛出新ArgumentNullException(“singleRange”);
if(singleRange.GetType()!=RegeCharClassSingleRangeType)
抛出新ArgumentException(“不是SingleRange对象的实例”,“SingleRange”);
_first=(char)SingleRange_first.GetValue(SingleRange);
_last=(char)SingleRange\u last.GetValue(SingleRange);
}
}
}
//用法:
RegexHelper.ExpandCharClass(@“[\-a-zA-F1 5-9]”);
//“-abcdefghijklmnopqrstuvxyzabcdef1 56789”
似乎是一个非常不寻常的要求,但由于您只能匹配大约96个字符(除非包含高字符),因此您需要
public static class RegexHelper
{
public static string ExpandCharClass(string charClass)
{
var regexParser = new RegexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass = regexParser.ScanCharClass(false);
int count = regexCharClass.RangeCount();
List<string> ranges = new List<string>();
// range 0 can be skipped
for (int i = 1; i < count; i++)
{
var range = regexCharClass.GetRangeAt(i);
ranges.Add(ExpandRange(range));
}
return String.Concat(ranges);
}
static string ExpandRange(SingleRange range)
{
char first = range._first;
char last = range._last;
return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
}
internal class RegexParser
{
static readonly Type RegexParserType;
static readonly ConstructorInfo RegexParser_Ctor;
static readonly MethodInfo RegexParser_SetPattern;
static readonly MethodInfo RegexParser_ScanCharClass;
static RegexParser()
{
RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
}
private readonly object instance;
internal RegexParser(CultureInfo culture)
{
instance = RegexParser_Ctor.Invoke(new object[] { culture });
}
internal void SetPattern(string pattern)
{
RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
}
internal RegexCharClass ScanCharClass(bool caseInsensitive)
{
return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
}
}
internal class RegexCharClass
{
static readonly Type RegexCharClassType;
static readonly MethodInfo RegexCharClass_RangeCount;
static readonly MethodInfo RegexCharClass_GetRangeAt;
static RegexCharClass()
{
RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
}
private readonly object instance;
internal RegexCharClass(object regexCharClass)
{
if (regexCharClass == null)
throw new ArgumentNullException("regexCharClass");
if (regexCharClass.GetType() != RegexCharClassType)
throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
instance = regexCharClass;
}
internal int RangeCount()
{
return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
}
internal SingleRange GetRangeAt(int i)
{
return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
}
}
internal struct SingleRange
{
static readonly Type RegexCharClassSingleRangeType;
static readonly FieldInfo SingleRange_first;
static readonly FieldInfo SingleRange_last;
static SingleRange()
{
RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");
var flags = BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
}
internal char _first;
internal char _last;
internal SingleRange(object singleRange)
{
if (singleRange == null)
throw new ArgumentNullException("singleRange");
if (singleRange.GetType() != RegexCharClassSingleRangeType)
throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
_first = (char)SingleRange_first.GetValue(singleRange);
_last = (char)SingleRange_last.GetValue(singleRange);
}
}
}
// usage:
RegexHelper.ExpandCharClass(@"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
public static string expando(string input_re) {
// add more chars in s as needed, such as ,.?/|=+_-éñ etc.
string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
string output = "";
Regex exp = new Regex(input_re);
for (int i = 0; i < s.Length; i++) {
if (exp.IsMatch(s.Substring(i, 1))) {
output += s[i];
}
}
return output;
}
private static readonly IEnumerable<char> CharacterSet = Enumerable.Range(0, char.MaxValue + 1).Select(Convert.ToChar).Where(c => !char.IsControl(c));
public static string ExpandCharacterSet(string set)
{
var sb = new StringBuilder();
int start = 0;
bool invertSet = false;
if (set.Length == 0)
return "";
if (set[0] == '[' && set[set.Length - 1] == ']')
set = set.Substring(1, set.Length - 2);
if (set[0] == '^')
{
invertSet = true;
set = set.Substring(1);
}
while (start < set.Length - 1)
{
int dash = set.IndexOf('-', start + 1);
if (dash <= 0 || dash >= set.Length - 1)
break;
sb.Append(set.Substring(start, dash - start - 1));
char a = set[dash - 1];
char z = set[dash + 1];
for (var i = a; i <= z; ++i)
sb.Append(i);
start = dash + 2;
}
sb.Append(set.Substring(start));
if (!invertSet) return sb.ToString();
var A = new HashSet<char>(CharacterSet);
var B = new HashSet<char>(sb.ToString());
A.ExceptWith(B);
return new string(A.ToArray());
}
var input = "a-fA-F0-9!";
var matches = Regex.Matches(input,@".-.|.");
var list = new StringBuilder();
foreach (Match m in matches)
{
var value = m.Value;
if (value.Length == 1)
list.Append(value);
else
{
if (value[2] < value[0]) throw new ArgumentException("invalid format"); // or switch, if you want.
for (char c = value[0]; c <= value[2]; c++)
list.Append(c);
}
}
Console.WriteLine(list);
abcdefABCDEF0123456789!
void Main()
{
//these are all equivalent:
var input = @"\x41-\0x46\u41";
var input2 = @"\65-\70\65";
var input3 = "A-FA";
// match hex as \0x123 or \x123 or \u123, or decimal \412, or the escapes \n\t\r, or any character
var charRegex = @"(\\(0?x|u)[0-9a-fA-F]+|\\[0-9]+|\\[ntr]|.)";
var matches = Regex.Matches(input, charRegex + "-" + charRegex + "|" + charRegex);
var list = new StringBuilder();
foreach (Match m in matches)
{
var dashIndex = m.Value.IndexOf('-', 1); //don't look at 0 (in case it's a dash)
if (dashIndex > 0) // this means we have two items: a range
{
var charLeft = Decode(m.Value.Substring(0,dashIndex));
var charRight = Decode(m.Value.Substring(dashIndex+1));
if (charRight < charLeft) throw new ArgumentException("invalid format (left bigger than right)"); // or switch, if you want.
for (char c = charLeft; c <= charRight; c++)
list.Append(c);
}
else // just one item
{
list.Append(Decode(m.Value));
}
}
Console.WriteLine(list);
}
char Decode(string s)
{
if (s.Length == 1)
return s[0];
// here, s[0] == '\', because of the regex
if (s.Length == 2)
switch (s[1])
{
// incomplete; add more as wished
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
default: break;
}
if (s[1] == 'u' || s[1] == 'x')
return (char)Convert.ToUInt16(s.Substring(2), 16);
else if (s.Length > 2 && s[1] == '0' && s[2] == 'x')
return (char)Convert.ToUInt16(s.Substring(3), 16);
else
return (char)Convert.ToUInt16(s.Substring(1)); // will fail from here if invalid escape (e.g. \g)
}