C# 如何使用.NET将字符串按字符串拆分并包含分隔符?

C# 如何使用.NET将字符串按字符串拆分并包含分隔符?,c#,.net,string,C#,.net,String,有许多类似的问题,但显然没有完美的匹配,这就是为什么我要问 我想将一个随机字符串(例如,123xx456yyy789)拆分为一个字符串分隔符列表(例如,xx,yy),并在结果中包含分隔符(这里:123,xx,456,yy,789) 良好的表现是一个很好的奖励。如果可能,应避免使用正则表达式 更新:我做了一些性能检查并比较了结果(但我懒得正式检查)。测试溶液为(随机顺序): 其他解决方案未经测试,因为它们要么与另一个解决方案相似,要么来得太晚 这是测试代码: class Program {

有许多类似的问题,但显然没有完美的匹配,这就是为什么我要问

我想将一个随机字符串(例如,
123xx456yyy789
)拆分为一个字符串分隔符列表(例如,
xx
yy
),并在结果中包含分隔符(这里:
123
xx
456
yy
789

良好的表现是一个很好的奖励。如果可能,应避免使用正则表达式

更新:我做了一些性能检查并比较了结果(但我懒得正式检查)。测试溶液为(随机顺序):

  • 其他解决方案未经测试,因为它们要么与另一个解决方案相似,要么来得太晚

    这是测试代码:

    class Program
    {
        private static readonly List<Func<string, List<string>, List<string>>> Functions;
        private static readonly List<string> Sources;
        private static readonly List<List<string>> Delimiters;
    
        static Program ()
        {
            Functions = new List<Func<string, List<string>, List<string>>> ();
            Functions.Add ((s, l) => s.SplitIncludeDelimiters_Gabe (l).ToList ());
            Functions.Add ((s, l) => s.SplitIncludeDelimiters_Guffa (l).ToList ());
            Functions.Add ((s, l) => s.SplitIncludeDelimiters_Naive (l).ToList ());
            Functions.Add ((s, l) => s.SplitIncludeDelimiters_Regex (l).ToList ());
    
            Sources = new List<string> ();
            Sources.Add ("");
            Sources.Add (Guid.NewGuid ().ToString ());
    
            string str = "";
            for (int outer = 0; outer < 10; outer++) {
                for (int i = 0; i < 10; i++) {
                    str += i + "**" + DateTime.UtcNow.Ticks;
                }
                str += "-";
            }
            Sources.Add (str);
    
            Delimiters = new List<List<string>> ();
            Delimiters.Add (new List<string> () { });
            Delimiters.Add (new List<string> () { "-" });
            Delimiters.Add (new List<string> () { "**" });
            Delimiters.Add (new List<string> () { "-", "**" });
        }
    
        private class Result
        {
            public readonly int FuncID;
            public readonly int SrcID;
            public readonly int DelimID;
            public readonly long Milliseconds;
            public readonly List<string> Output;
    
            public Result (int funcID, int srcID, int delimID, long milliseconds, List<string> output)
            {
                FuncID = funcID;
                SrcID = srcID;
                DelimID = delimID;
                Milliseconds = milliseconds;
                Output = output;
            }
    
            public void Print ()
            {
                Console.WriteLine ("S " + SrcID + "\tD " + DelimID + "\tF " + FuncID + "\t" + Milliseconds + "ms");
                Console.WriteLine (Output.Count + "\t" + string.Join (" ", Output.Take (10).Select (x => x.Length < 15 ? x : x.Substring (0, 15) + "...").ToArray ()));
            }
        }
    
        static void Main (string[] args)
        {
            var results = new List<Result> ();
    
            for (int srcID = 0; srcID < 3; srcID++) {
                for (int delimID = 0; delimID < 4; delimID++) {
                    for (int funcId = 3; funcId >= 0; funcId--) { // i tried various orders in my tests
                        Stopwatch sw = new Stopwatch ();
                        sw.Start ();
    
                        var func = Functions[funcId];
                        var src = Sources[srcID];
                        var del = Delimiters[delimID];
    
                        for (int i = 0; i < 10000; i++) {
                            func (src, del);
                        }
                        var list = func (src, del);
                        sw.Stop ();
    
                        var res = new Result (funcId, srcID, delimID, sw.ElapsedMilliseconds, list);
                        results.Add (res);
                        res.Print ();
                    }
                }
            }
        }
    }
    
    类程序
    {
    私有静态只读列表函数;
    私有静态只读列表源;
    私有静态只读列表分隔符;
    静态程序()
    {
    函数=新列表();
    Add((s,l)=>s.spliteIncludeDelimiters_Gabe(l.ToList());
    Add((s,l)=>s.splitedElimiters_Guffa(l).ToList());
    Add((s,l)=>s.spliteIncludeDelimiters_Naive(l.ToList());
    Add((s,l)=>s.spliteIncludeDelimiters_Regex(l.ToList());
    来源=新列表();
    资料来源。加上(“”);
    Add(Guid.NewGuid().ToString());
    字符串str=“”;
    用于(int-outer=0;outer<10;outer++){
    对于(int i=0;i<10;i++){
    str+=i+“**”+DateTime.UtcNow.Ticks;
    }
    str+=“-”;
    }
    资料来源:添加(str);
    分隔符=新列表();
    Delimiters.Add(新列表(){});
    Delimiters.Add(新列表(){“-”});
    Delimiters.Add(新列表(){**});
    添加(新列表(){“-”,“**”});
    }
    私有类结果
    {
    公共只读int-FuncID;
    公共只读int-SrcID;
    公共只读;
    公共只读长毫秒;
    公共只读列表输出;
    公共结果(int funcID、int srcID、int delimID、长毫秒、列表输出)
    {
    FuncID=FuncID;
    SrcID=SrcID;
    DelimID=DelimID;
    毫秒=毫秒;
    输出=输出;
    }
    公开作废印刷品()
    {
    Console.WriteLine(“S”+SrcID+“\tD”+DelimID+“\tF”+FuncID+“\t”+毫秒+“毫秒”);
    Console.WriteLine(Output.Count+“\t”+string.Join(“),Output.Take(10)。选择(x=>x.Length<15?x:x.Substring(0,15)+“…”).ToArray());
    }
    }
    静态void Main(字符串[]参数)
    {
    var results=新列表();
    对于(int-srcID=0;srcID<3;srcID++){
    对于(int-delimID=0;delimID<4;delimID++){
    对于(int-funcId=3;funcId>=0;funcId--){//我在测试中尝试了各种顺序
    秒表sw=新秒表();
    sw.Start();
    var func=函数[funcId];
    var src=源[srcID];
    var del=分隔符[delimID];
    对于(int i=0;i<10000;i++){
    func(src,del);
    }
    var list=func(src,del);
    sw.Stop();
    var res=新结果(funcId、srcID、delimID、sw.elapsedmillisons、list);
    结果:添加(res);
    res.Print();
    }
    }
    }
    }
    }
    
    正如你所看到的,这实际上只是一个快速而肮脏的测试,但我以不同的顺序多次运行测试,结果总是非常一致的。对于较大的数据集,测量的时间范围为毫秒到秒。我在下面的评估中忽略了低毫秒范围内的值,因为它们在实践中似乎可以忽略不计。这是我的盒子上的输出:

    S 0 D 0 F 3 11ms 1 S 0 D 0 F 2 7ms 1 S 0 D 0 F 1 6ms 1 S 0 D 0 F 0 4ms 0 S 0 D 1 F 3 28ms 1 S 0 D 1 F 2 8ms 1 S 0 D 1 F 1 7ms 1 S 0 D 1 F 0 3ms 0 S 0 D 2 F 3 30ms 1 S 0 D 2 F 2 8ms 1 S 0 D 2 F 1 6ms 1 S 0 D 2 F 0 3ms 0 S 0 D 3 F 3 30ms 1 S 0 D 3 F 2 10ms 1 S 0 D 3 F 1 8ms 1 S 0 D 3 F 0 3ms 0 S 1 D 0 F 3 9ms 1 9e5282ec-e2a2-4... S 1 D 0 F 2 6ms 1 9e5282ec-e2a2-4... S 1 D 0 F 1 5ms 1 9e5282ec-e2a2-4... S 1 D 0 F 0 5ms 1 9e5282ec-e2a2-4... S 1 D 1 F 3 63ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 1 F 2 37ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 1 F 1 29ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 1 F 0 22ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 2 F 3 30ms 1 9e5282ec-e2a2-4... S 1 D 2 F 2 10ms 1 9e5282ec-e2a2-4... S 1 D 2 F 1 10ms 1 9e5282ec-e2a2-4... S 1 D 2 F 0 12ms 1 9e5282ec-e2a2-4... S 1 D 3 F 3 73ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 3 F 2 40ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 3 F 1 33ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 1 D 3 F 0 30ms 9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37 S 2 D 0 F 3 10ms 1 0**634226552821... S 2 D 0 F 2 109ms 1 0**634226552821... S 2 D 0 F 1 5ms 1 0**634226552821... S 2 D 0 F 0 127ms 1 0**634226552821... S 2 D 1 F 3 184ms 21 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226 552821... - 0**634226552821... - S 2 D 1 F 2 364ms 21 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226 552821... - 0**634226552821... - S 2 D 1 F 1 134ms 21 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226 552821... - 0**634226552821... - S 2 D 1 F 0 517ms 20 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226 552821... - 0**634226552821... - S 2 D 2 F 3 688ms 201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 2 F 2 2404ms 201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 2 F 1 874ms 201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 2 F 0 717ms 201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 3 F 3 1205ms 221 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 3 F 2 3471ms 221 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 3 F 1 1008ms 221 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** S 2 D 3 F 0 1095ms 220 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6 34226552821217... ** s0d0f3 11ms 1. s0d0f2 7ms 1. s0d0f16ms 1. s0d0f04ms 0 S 0 D 1 F 3 28毫秒 1. s0d1f2 8ms 1. s0d1f1 7ms 1. s0d1f03ms 0 s0d2f330ms 1. s0d2f2 8ms 1. S0D2F16ms 1. s0d2f03ms 0 s0d3f330ms 1. S0D3F210ms 1. s0d3f1 8ms 1. s0d3f03ms 0 s1d0f39ms 1 9e5282ec-e2a2-4。。。 s1d0f2 6ms 1 9e5282ec-e2a2-4。。。 s1d0f1 5ms 1 9e5282ec-e2a2-4。。。 s1d0f05ms 1 9e5282ec-e2a2-4。。。 s1d1f363ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d1f2 37ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d1f1 29ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d1f022ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d2f330ms 1 9e5282ec-e2a2-4。。。 s1d2f2 10ms 1 9e5282ec-e2a2-4。。。 s1d2f1 10ms 1 9e5282ec-e2a2-4。。。 s1d2f012ms 1 9e5282ec-e2a2-4。。。 s1d3f373ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d3f2 40ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d3f1 33ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 s1d3f0 30ms 9 9e5282ec-e2a2-4265-8276-6dbb50fdae37 S2D0F310ms 1 0**634226552821... S2D0F2109ms 1 0**634226552821... S2D0F15ms 1 0**634226552821... S 2 D 0 F 0 127ms 1 0**634226552821... S 2 D 1
        string source = "123xx456yy789";
        foreach (string delimiter in delimiters)
            source = source.Replace(delimiter, ";" + delimiter + ";");
        string[] parts = source.Split(';');
    
    public IEnumerable<string> SplitX (string text, string[] delimiters)
    {
        var split = text.Split (delimiters, StringSplitOptions.None);
    
        foreach (string part in split) {
            yield return part;
            text = text.Substring (part.Length);
    
            string delim = delimiters.FirstOrDefault (x => text.StartsWith (x));
            if (delim != null) {
                yield return delim;
                text = text.Substring (delim.Length);
            }
        }
    }
    
    string input = "123xx456yy789";
    string pattern = "(xx|yy)";
    string[] result = Regex.Split(input, pattern);
    
    var delimiters = new List<string> { ".", "xx", "yy" };
    string pattern = "(" + String.Join("|", delimiters.Select(d => Regex.Escape(d))
                                                      .ToArray())
                      + ")";
    
    string input = "123xx456yy789";
    // to reach the else branch set delimiters to new List();
    var delimiters = new List<string> { ".", "xx", "yy", "()" }; 
    if (delimiters.Count > 0)
    {
        string pattern = "("
                         + String.Join("|", delimiters.Select(d => Regex.Escape(d))
                                                      .ToArray())
                         + ")";
        string[] result = Regex.Split(input, pattern);
        foreach (string s in result)
        {
            Console.WriteLine(s);
        }
    }
    else
    {
        // nothing to split
        Console.WriteLine(input);
    }
    
    string pattern = @"\b("
                    + String.Join("|", delimiters.Select(d => Regex.Escape(d)))
                    + @")\b";
    
    string pattern = @"\s*\b("
                    + String.Join("|", delimiters.Select(d => Regex.Escape(d)))
                    + @")\b\s*";
    
    public static List<string> Split(string searchStr, string[] separators)
    {
        List<string> result = new List<string>();
        int length = searchStr.Length;
        int lastMatchEnd = 0;
        for (int i = 0; i < length; i++)
        {
            for (int j = 0; j < separators.Length; j++)
            {
                string str = separators[j];
                int sepLen = str.Length;
                if (((searchStr[i] == str[0]) && (sepLen <= (length - i))) && ((sepLen == 1) || (String.CompareOrdinal(searchStr, i, str, 0, sepLen) == 0)))
                {
                    result.Add(searchStr.Substring(lastMatchEnd, i - lastMatchEnd));
                    result.Add(separators[j]);
                    i += sepLen - 1;
                    lastMatchEnd = i + 1;
                    break;
                }
            }
        }
        if (lastMatchEnd != length)
            result.Add(searchStr.Substring(lastMatchEnd));
        return result;
    }
    
    string input = "123xx456yy789";
    string[] delimiters = { "xx", "yy" };
    
    int[] nextPosition = delimiters.Select(d => input.IndexOf(d)).ToArray();
    List<string> result = new List<string>();
    int pos = 0;
    while (true) {
      int firstPos = int.MaxValue;
      string delimiter = null;
      for (int i = 0; i < nextPosition.Length; i++) {
        if (nextPosition[i] != -1 && nextPosition[i] < firstPos) {
          firstPos = nextPosition[i];
          delimiter = delimiters[i];
        }
      }
      if (firstPos != int.MaxValue) {
        result.Add(input.Substring(pos, firstPos - pos));
        result.Add(delimiter);
        pos = firstPos + delimiter.Length;
        for (int i = 0; i < nextPosition.Length; i++) {
          if (nextPosition[i] != -1 && nextPosition[i] < pos) {
            nextPosition[i] = input.IndexOf(delimiters[i], pos);
          }
        }
      } else {
        result.Add(input.Substring(pos));
        break;
      }
    }
    
    public static IEnumerable<string> SplitWithTokens(
        string str,
        string[] separators)
    {
        if (separators == null || separators.Length == 0)
        {
            yield return str;
            yield break;
        }
        int prev = 0;
        for (int i = 0; i < str.Length; i++)
        {
            foreach (var sep in separators)
            {
                if (!string.IsNullOrEmpty(sep))
                {
                    if (((str[i] == sep[0]) && 
                              (sep.Length <= (str.Length - i))) 
                         &&
                        ((sep.Length == 1) || 
                        (string.CompareOrdinal(str, i, sep, 0, sep.Length) == 0)))
                    {
                        if (i - prev != 0)
                            yield return str.Substring(prev, i - prev);
                        yield return sep;
                        i += sep.Length - 1;
                        prev = i + 1;
                        break;
                    }
                }
            }
        }
        if (str.Length - prev > 0)
            yield return str.Substring(prev, str.Length - prev);
    }
    
        static void Split(string src, string[] delims, ref List<string> final)
        {
            if (src.Length == 0)
                return;
    
            int endTrimIndex = src.Length;
            foreach (string delim in delims)
            {
                //get the index of the first occurance of this delim
                int indexOfDelim = src.IndexOf(delim);
                //check to see if this delim is at the begining of src
                if (indexOfDelim == 0)
                {
                    endTrimIndex = delim.Length;
                    break;
                }
                //see if this delim comes before previously searched delims
                else if (indexOfDelim < endTrimIndex && indexOfDelim != -1)
                    endTrimIndex = indexOfDelim;
            }
            final.Add(src.Substring(0, endTrimIndex));
            Split(src.Remove(0, endTrimIndex), delims, ref final);
        }