Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/codeigniter/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 扩展字符集的算法?_C# - Fatal编程技术网

C# 扩展字符集的算法?

C# 扩展字符集的算法?,c#,C#,是否有现成的函数用于扩展C#regex样式的字符集 例如,expand(“a-z1”)将返回一个包含所有字符a到z的字符串,后跟数字1 以下是到目前为止我得到的信息: public static string ExpandCharacterSet(string set) { var sb = new StringBuilder(); int start = 0; while (start < set.Length - 1) { int da

是否有现成的函数用于扩展C#regex样式的字符集

例如,
expand(“a-z1”)
将返回一个包含所有字符a到z的字符串,后跟数字1


以下是到目前为止我得到的信息:

public static string ExpandCharacterSet(string set)
{
    var sb = new StringBuilder();
    int start = 0;
    while (start < set.Length - 1)
    {
        int dash = set.IndexOf('-', start + 1);

        if (dash <= 0 || dash >= set.Length - 1)
            break;

        sb.Append(set.Substring(start, dash - start - 1));

        char a = set[dash - 1];
        char z = set[dash + 1];

        for (var i = a; i <= z; ++i)
            sb.Append(i);

        start = dash + 2;
    }

    sb.Append(set.Substring(start));
    return sb.ToString();
}

我花了一点功夫才弄到这个,但这是我能想到的。当然,这不会是可移植的,因为我会搞乱内部结构。但是它对于简单的测试用例来说已经足够好了。它将接受任何正则表达式字符类,但不适用于否定类。值的范围太广,没有任何限制。我不知道它是否适用于所有情况,它根本不处理重复,但这是一个开始。至少您不必推出自己的解析器。从.NET Framework 4.0开始:

public static class RegexHelper
{
    public static string ExpandCharClass(string charClass)
    {
        var regexParser = new RegexParser(CultureInfo.CurrentCulture);
        regexParser.SetPattern(charClass);
        var regexCharClass = regexParser.ScanCharClass(false);
        int count = regexCharClass.RangeCount();
        List<string> ranges = new List<string>();
        // range 0 can be skipped
        for (int i = 1; i < count; i++)
        {
            var range = regexCharClass.GetRangeAt(i);
            ranges.Add(ExpandRange(range));
        }
        return String.Concat(ranges);
    }

    static string ExpandRange(SingleRange range)
    {
        char first = range._first;
        char last = range._last;
        return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
    }

    internal class RegexParser
    {
        static readonly Type RegexParserType;
        static readonly ConstructorInfo RegexParser_Ctor;
        static readonly MethodInfo RegexParser_SetPattern;
        static readonly MethodInfo RegexParser_ScanCharClass;

        static RegexParser()
        {
            RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");

            var flags = BindingFlags.NonPublic | BindingFlags.Instance;
            RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
            RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
            RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
        }

        private readonly object instance;

        internal RegexParser(CultureInfo culture)
        {
            instance = RegexParser_Ctor.Invoke(new object[] { culture });
        }

        internal void SetPattern(string pattern)
        {
            RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
        }

        internal RegexCharClass ScanCharClass(bool caseInsensitive)
        {
            return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
        }
    }

    internal class RegexCharClass
    {
        static readonly Type RegexCharClassType;
        static readonly MethodInfo RegexCharClass_RangeCount;
        static readonly MethodInfo RegexCharClass_GetRangeAt;

        static RegexCharClass()
        {
            RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");

            var flags = BindingFlags.NonPublic | BindingFlags.Instance;
            RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
            RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
        }

        private readonly object instance;

        internal RegexCharClass(object regexCharClass)
        {
            if (regexCharClass == null)
                throw new ArgumentNullException("regexCharClass");
            if (regexCharClass.GetType() != RegexCharClassType)
                throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
            instance = regexCharClass;
        }

        internal int RangeCount()
        {
            return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
        }

        internal SingleRange GetRangeAt(int i)
        {
            return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
        }
    }

    internal struct SingleRange
    {
        static readonly Type RegexCharClassSingleRangeType;
        static readonly FieldInfo SingleRange_first;
        static readonly FieldInfo SingleRange_last;

        static SingleRange()
        {
            RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");

            var flags = BindingFlags.NonPublic | BindingFlags.Instance;
            SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
            SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
        }

        internal char _first;
        internal char _last;

        internal SingleRange(object singleRange)
        {
            if (singleRange == null)
                throw new ArgumentNullException("singleRange");
            if (singleRange.GetType() != RegexCharClassSingleRangeType)
                throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
            _first = (char)SingleRange_first.GetValue(singleRange);
            _last = (char)SingleRange_last.GetValue(singleRange);
        }
    }
}

// usage:
RegexHelper.ExpandCharClass(@"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
公共静态类RegexHelper
{
公共静态字符串ExpandCharClass(字符串charClass)
{
var regexParser=新的regexParser(CultureInfo.CurrentCulture);
regexParser.SetPattern(charClass);
var regexCharClass=regexParser.ScanCharClass(false);
int count=regexCharClass.RangeCount();
列表范围=新列表();
//可以跳过范围0
对于(int i=1;i(char)i));
}
内部类RegexParser
{
静态只读类型RegexParserType;
静态只读构造函数info RegexParser;
静态只读MethodInfo RegexParser_SetPattern;
静态只读MethodInfo RegexParser_ScanCharClass;
静态RegexParser()
{
RegexParserType=Assembly.GetAssembly(typeof(Regex)).GetType(“System.Text.RegularExpressions.RegexParser”);
var flags=BindingFlags.NonPublic | BindingFlags.Instance;
RegexParser_Ctor=RegexParserType.GetConstructor(flags,null,new[]{typeof(CultureInfo)},null);
RegexParser_SetPattern=RegexParserType.GetMethod(“SetPattern”,flags,null,new[]{typeof(String)},null);
RegexParser_ScanCharClass=RegexParserType.GetMethod(“ScanCharClass”,标志,null,新[]{typeof(Boolean)},null);
}
私有只读对象实例;
内部RegexParser(文化信息文化)
{
instance=RegexParser_Ctor.Invoke(新对象[]{culture});
}
内部无效模式(字符串模式)
{
RegexParser_SetPattern.Invoke(实例,新对象[]{pattern});
}
内部RegexCharClass ScanCharClass(布尔不区分大小写)
{
返回新的RegexCharClass(RegexParser_ScanCharClass.Invoke(实例,新对象[]{casensitive}));
}
}
内部类RegexCharClass
{
静态只读类型RegexCharClassType;
静态只读MethodInfo RegexCharClass_RangeCount;
静态只读MethodInfo RegexCharClass_GetRangeAt;
静态RegexCharClass()
{
RegexCharClassType=Assembly.GetAssembly(typeof(Regex)).GetType(“System.Text.RegularExpressions.RegexCharClass”);
var flags=BindingFlags.NonPublic | BindingFlags.Instance;
RegexCharClass_RangeCount=RegexCharClassType.GetMethod(“RangeCount”,标志,null,新类型[]{},null);
RegexCharClass_GetRangeAt=RegexCharClassType.GetMethod(“GetRangeAt”,flags,null,new[]{typeof(Int32)},null);
}
私有只读对象实例;
内部RegexCharClass(对象RegexCharClass)
{
if(regexCharClass==null)
抛出新ArgumentNullException(“regexCharClass”);
if(regexCharClass.GetType()!=RegexCharClassType)
抛出新ArgumentException(“不是RegeCharClass对象的实例”,“RegeCharClass”);
instance=regexCharClass;
}
内部int RangeCount()
{
return(int)RegexCharClass_RangeCount.Invoke(实例,新对象[]{});
}
内部单量程GetRangeAt(int i)
{
返回新的SingleRange(RegexCharClass_GetRangeAt.Invoke(实例,新对象[]{i}));
}
}
内部结构单一范围
{
静态只读类型RegeCharClassSingleRangeType;
静态只读FieldInfo SingleRange_优先;
静态只读FieldInfo SingleRange\u last;
静态单量程()
{
RegexCharClassSingleRangeType=Assembly.GetAssembly(typeof(Regex)).GetType(“System.Text.RegularExpressions.RegexCharClass+SingleRange”);
var flags=BindingFlags.NonPublic | BindingFlags.Instance;
SingleRange_first=RegexCharClassSingleRangeType.GetField(“_first”,标志);
SingleRange_last=RegexCharClassSingleRangeType.GetField(“_last”,标志);
}
内部字符优先;
内部字符最后;
内部单一范围(对象单一范围)
{
if(singleRange==null)
抛出新ArgumentNullException(“singleRange”);
if(singleRange.GetType()!=RegeCharClassSingleRangeType)
抛出新ArgumentException(“不是SingleRange对象的实例”,“SingleRange”);
_first=(char)SingleRange_first.GetValue(SingleRange);
_last=(char)SingleRange\u last.GetValue(SingleRange);
}
}
}
//用法:
RegexHelper.ExpandCharClass(@“[\-a-zA-F1 5-9]”);
//“-abcdefghijklmnopqrstuvxyzabcdef1 56789”

似乎是一个非常不寻常的要求,但由于您只能匹配大约96个字符(除非包含高字符),因此您需要
public static class RegexHelper
{
    public static string ExpandCharClass(string charClass)
    {
        var regexParser = new RegexParser(CultureInfo.CurrentCulture);
        regexParser.SetPattern(charClass);
        var regexCharClass = regexParser.ScanCharClass(false);
        int count = regexCharClass.RangeCount();
        List<string> ranges = new List<string>();
        // range 0 can be skipped
        for (int i = 1; i < count; i++)
        {
            var range = regexCharClass.GetRangeAt(i);
            ranges.Add(ExpandRange(range));
        }
        return String.Concat(ranges);
    }

    static string ExpandRange(SingleRange range)
    {
        char first = range._first;
        char last = range._last;
        return String.Concat(Enumerable.Range(first, last - first + 1).Select(i => (char)i));
    }

    internal class RegexParser
    {
        static readonly Type RegexParserType;
        static readonly ConstructorInfo RegexParser_Ctor;
        static readonly MethodInfo RegexParser_SetPattern;
        static readonly MethodInfo RegexParser_ScanCharClass;

        static RegexParser()
        {
            RegexParserType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexParser");

            var flags = BindingFlags.NonPublic | BindingFlags.Instance;
            RegexParser_Ctor = RegexParserType.GetConstructor(flags, null, new[] { typeof(CultureInfo) }, null);
            RegexParser_SetPattern = RegexParserType.GetMethod("SetPattern", flags, null, new[] { typeof(String) }, null);
            RegexParser_ScanCharClass = RegexParserType.GetMethod("ScanCharClass", flags, null, new[] { typeof(Boolean) }, null);
        }

        private readonly object instance;

        internal RegexParser(CultureInfo culture)
        {
            instance = RegexParser_Ctor.Invoke(new object[] { culture });
        }

        internal void SetPattern(string pattern)
        {
            RegexParser_SetPattern.Invoke(instance, new object[] { pattern });
        }

        internal RegexCharClass ScanCharClass(bool caseInsensitive)
        {
            return new RegexCharClass(RegexParser_ScanCharClass.Invoke(instance, new object[] { caseInsensitive }));
        }
    }

    internal class RegexCharClass
    {
        static readonly Type RegexCharClassType;
        static readonly MethodInfo RegexCharClass_RangeCount;
        static readonly MethodInfo RegexCharClass_GetRangeAt;

        static RegexCharClass()
        {
            RegexCharClassType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass");

            var flags = BindingFlags.NonPublic | BindingFlags.Instance;
            RegexCharClass_RangeCount = RegexCharClassType.GetMethod("RangeCount", flags, null, new Type[] { }, null);
            RegexCharClass_GetRangeAt = RegexCharClassType.GetMethod("GetRangeAt", flags, null, new[] { typeof(Int32) }, null);
        }

        private readonly object instance;

        internal RegexCharClass(object regexCharClass)
        {
            if (regexCharClass == null)
                throw new ArgumentNullException("regexCharClass");
            if (regexCharClass.GetType() != RegexCharClassType)
                throw new ArgumentException("not an instance of a RegexCharClass object", "regexCharClass");
            instance = regexCharClass;
        }

        internal int RangeCount()
        {
            return (int)RegexCharClass_RangeCount.Invoke(instance, new object[] { });
        }

        internal SingleRange GetRangeAt(int i)
        {
            return new SingleRange(RegexCharClass_GetRangeAt.Invoke(instance, new object[] { i }));
        }
    }

    internal struct SingleRange
    {
        static readonly Type RegexCharClassSingleRangeType;
        static readonly FieldInfo SingleRange_first;
        static readonly FieldInfo SingleRange_last;

        static SingleRange()
        {
            RegexCharClassSingleRangeType = Assembly.GetAssembly(typeof(Regex)).GetType("System.Text.RegularExpressions.RegexCharClass+SingleRange");

            var flags = BindingFlags.NonPublic | BindingFlags.Instance;
            SingleRange_first = RegexCharClassSingleRangeType.GetField("_first", flags);
            SingleRange_last = RegexCharClassSingleRangeType.GetField("_last", flags);
        }

        internal char _first;
        internal char _last;

        internal SingleRange(object singleRange)
        {
            if (singleRange == null)
                throw new ArgumentNullException("singleRange");
            if (singleRange.GetType() != RegexCharClassSingleRangeType)
                throw new ArgumentException("not an instance of a SingleRange object", "singleRange");
            _first = (char)SingleRange_first.GetValue(singleRange);
            _last = (char)SingleRange_last.GetValue(singleRange);
        }
    }
}

// usage:
RegexHelper.ExpandCharClass(@"[\-a-zA-F1 5-9]");
// "-abcdefghijklmnopqrstuvwxyzABCDEF1 56789"
public static string expando(string input_re) {

    // add more chars in s as needed, such as ,.?/|=+_-éñ etc.
    string s = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
    string output = "";

    Regex exp = new Regex(input_re);

    for (int i = 0; i < s.Length; i++) {
        if (exp.IsMatch(s.Substring(i, 1))) {
            output += s[i];
        }
    }

    return output;
}
private static readonly IEnumerable<char> CharacterSet = Enumerable.Range(0, char.MaxValue + 1).Select(Convert.ToChar).Where(c => !char.IsControl(c));

public static string ExpandCharacterSet(string set)
{
    var sb = new StringBuilder();
    int start = 0;
    bool invertSet = false;

    if (set.Length == 0) 
        return "";
    if (set[0] == '[' && set[set.Length - 1] == ']')
        set = set.Substring(1, set.Length - 2);
    if (set[0] == '^')
    {
        invertSet = true;
        set = set.Substring(1);
    }

    while (start < set.Length - 1)
    {
        int dash = set.IndexOf('-', start + 1);

        if (dash <= 0 || dash >= set.Length - 1)
            break;

        sb.Append(set.Substring(start, dash - start - 1));

        char a = set[dash - 1];
        char z = set[dash + 1];

        for (var i = a; i <= z; ++i)
            sb.Append(i);

        start = dash + 2;
    }

    sb.Append(set.Substring(start));

    if (!invertSet) return sb.ToString();

    var A = new HashSet<char>(CharacterSet);
    var B = new HashSet<char>(sb.ToString());
    A.ExceptWith(B);
    return new string(A.ToArray());
}
var input = "a-fA-F0-9!";
var matches = Regex.Matches(input,@".-.|.");

var list = new StringBuilder();

foreach (Match m in matches)
{
    var value = m.Value;

    if (value.Length == 1)
        list.Append(value);
    else
    {
        if (value[2] < value[0]) throw new ArgumentException("invalid format"); // or switch, if you want.
        for (char c = value[0]; c <= value[2]; c++)
            list.Append(c);
    }
}

Console.WriteLine(list);
abcdefABCDEF0123456789!
void Main()
{
        //these are all equivalent:
        var input = @"\x41-\0x46\u41";
        var input2 = @"\65-\70\65";
        var input3 = "A-FA";

        // match hex as \0x123 or \x123 or \u123, or decimal \412, or the escapes \n\t\r, or any character
        var charRegex = @"(\\(0?x|u)[0-9a-fA-F]+|\\[0-9]+|\\[ntr]|.)";
        var matches = Regex.Matches(input, charRegex + "-" + charRegex + "|" + charRegex);

        var list = new StringBuilder();

        foreach (Match m in matches)
        {
            var dashIndex = m.Value.IndexOf('-', 1); //don't look at 0 (in case it's a dash)
            if (dashIndex > 0) // this means we have two items: a range
            {   
                var charLeft = Decode(m.Value.Substring(0,dashIndex));
                var charRight = Decode(m.Value.Substring(dashIndex+1));
                if (charRight < charLeft) throw new ArgumentException("invalid format (left bigger than right)"); // or switch, if you want.
                for (char c = charLeft; c <= charRight; c++)
                    list.Append(c);
            }
            else // just one item
            {
                list.Append(Decode(m.Value));
            }   
        }

        Console.WriteLine(list);
}

char Decode(string s)
{
    if (s.Length == 1)
        return s[0];

    // here, s[0] == '\', because of the regex
    if (s.Length == 2)
        switch (s[1])
        {
        // incomplete; add more as wished
        case 'n': return '\n';
        case 't': return '\t';
        case 'r': return '\r';
        default: break;
        }

    if (s[1] == 'u' || s[1] == 'x')
        return (char)Convert.ToUInt16(s.Substring(2), 16);
    else if (s.Length > 2 && s[1] == '0' && s[2] == 'x')
        return (char)Convert.ToUInt16(s.Substring(3), 16);
    else
        return (char)Convert.ToUInt16(s.Substring(1)); // will fail from here if invalid escape (e.g. \g)
}