C# 如何使用.NET将字符串按字符串拆分并包含分隔符?
有许多类似的问题,但显然没有完美的匹配,这就是为什么我要问 我想将一个随机字符串(例如,C# 如何使用.NET将字符串按字符串拆分并包含分隔符?,c#,.net,string,C#,.net,String,有许多类似的问题,但显然没有完美的匹配,这就是为什么我要问 我想将一个随机字符串(例如,123xx456yyy789)拆分为一个字符串分隔符列表(例如,xx,yy),并在结果中包含分隔符(这里:123,xx,456,yy,789) 良好的表现是一个很好的奖励。如果可能,应避免使用正则表达式 更新:我做了一些性能检查并比较了结果(但我懒得正式检查)。测试溶液为(随机顺序): 其他解决方案未经测试,因为它们要么与另一个解决方案相似,要么来得太晚 这是测试代码: class Program {
123xx456yyy789
)拆分为一个字符串分隔符列表(例如,xx
,yy
),并在结果中包含分隔符(这里:123
,xx
,456
,yy
,789
)
良好的表现是一个很好的奖励。如果可能,应避免使用正则表达式
更新:我做了一些性能检查并比较了结果(但我懒得正式检查)。测试溶液为(随机顺序):
其他解决方案未经测试,因为它们要么与另一个解决方案相似,要么来得太晚
这是测试代码:
class Program
{
private static readonly List<Func<string, List<string>, List<string>>> Functions;
private static readonly List<string> Sources;
private static readonly List<List<string>> Delimiters;
static Program ()
{
Functions = new List<Func<string, List<string>, List<string>>> ();
Functions.Add ((s, l) => s.SplitIncludeDelimiters_Gabe (l).ToList ());
Functions.Add ((s, l) => s.SplitIncludeDelimiters_Guffa (l).ToList ());
Functions.Add ((s, l) => s.SplitIncludeDelimiters_Naive (l).ToList ());
Functions.Add ((s, l) => s.SplitIncludeDelimiters_Regex (l).ToList ());
Sources = new List<string> ();
Sources.Add ("");
Sources.Add (Guid.NewGuid ().ToString ());
string str = "";
for (int outer = 0; outer < 10; outer++) {
for (int i = 0; i < 10; i++) {
str += i + "**" + DateTime.UtcNow.Ticks;
}
str += "-";
}
Sources.Add (str);
Delimiters = new List<List<string>> ();
Delimiters.Add (new List<string> () { });
Delimiters.Add (new List<string> () { "-" });
Delimiters.Add (new List<string> () { "**" });
Delimiters.Add (new List<string> () { "-", "**" });
}
private class Result
{
public readonly int FuncID;
public readonly int SrcID;
public readonly int DelimID;
public readonly long Milliseconds;
public readonly List<string> Output;
public Result (int funcID, int srcID, int delimID, long milliseconds, List<string> output)
{
FuncID = funcID;
SrcID = srcID;
DelimID = delimID;
Milliseconds = milliseconds;
Output = output;
}
public void Print ()
{
Console.WriteLine ("S " + SrcID + "\tD " + DelimID + "\tF " + FuncID + "\t" + Milliseconds + "ms");
Console.WriteLine (Output.Count + "\t" + string.Join (" ", Output.Take (10).Select (x => x.Length < 15 ? x : x.Substring (0, 15) + "...").ToArray ()));
}
}
static void Main (string[] args)
{
var results = new List<Result> ();
for (int srcID = 0; srcID < 3; srcID++) {
for (int delimID = 0; delimID < 4; delimID++) {
for (int funcId = 3; funcId >= 0; funcId--) { // i tried various orders in my tests
Stopwatch sw = new Stopwatch ();
sw.Start ();
var func = Functions[funcId];
var src = Sources[srcID];
var del = Delimiters[delimID];
for (int i = 0; i < 10000; i++) {
func (src, del);
}
var list = func (src, del);
sw.Stop ();
var res = new Result (funcId, srcID, delimID, sw.ElapsedMilliseconds, list);
results.Add (res);
res.Print ();
}
}
}
}
}
类程序
{
私有静态只读列表函数;
私有静态只读列表源;
私有静态只读列表分隔符;
静态程序()
{
函数=新列表();
Add((s,l)=>s.spliteIncludeDelimiters_Gabe(l.ToList());
Add((s,l)=>s.splitedElimiters_Guffa(l).ToList());
Add((s,l)=>s.spliteIncludeDelimiters_Naive(l.ToList());
Add((s,l)=>s.spliteIncludeDelimiters_Regex(l.ToList());
来源=新列表();
资料来源。加上(“”);
Add(Guid.NewGuid().ToString());
字符串str=“”;
用于(int-outer=0;outer<10;outer++){
对于(int i=0;i<10;i++){
str+=i+“**”+DateTime.UtcNow.Ticks;
}
str+=“-”;
}
资料来源:添加(str);
分隔符=新列表();
Delimiters.Add(新列表(){});
Delimiters.Add(新列表(){“-”});
Delimiters.Add(新列表(){**});
添加(新列表(){“-”,“**”});
}
私有类结果
{
公共只读int-FuncID;
公共只读int-SrcID;
公共只读;
公共只读长毫秒;
公共只读列表输出;
公共结果(int funcID、int srcID、int delimID、长毫秒、列表输出)
{
FuncID=FuncID;
SrcID=SrcID;
DelimID=DelimID;
毫秒=毫秒;
输出=输出;
}
公开作废印刷品()
{
Console.WriteLine(“S”+SrcID+“\tD”+DelimID+“\tF”+FuncID+“\t”+毫秒+“毫秒”);
Console.WriteLine(Output.Count+“\t”+string.Join(“),Output.Take(10)。选择(x=>x.Length<15?x:x.Substring(0,15)+“…”).ToArray());
}
}
静态void Main(字符串[]参数)
{
var results=新列表();
对于(int-srcID=0;srcID<3;srcID++){
对于(int-delimID=0;delimID<4;delimID++){
对于(int-funcId=3;funcId>=0;funcId--){//我在测试中尝试了各种顺序
秒表sw=新秒表();
sw.Start();
var func=函数[funcId];
var src=源[srcID];
var del=分隔符[delimID];
对于(int i=0;i<10000;i++){
func(src,del);
}
var list=func(src,del);
sw.Stop();
var res=新结果(funcId、srcID、delimID、sw.elapsedmillisons、list);
结果:添加(res);
res.Print();
}
}
}
}
}
正如你所看到的,这实际上只是一个快速而肮脏的测试,但我以不同的顺序多次运行测试,结果总是非常一致的。对于较大的数据集,测量的时间范围为毫秒到秒。我在下面的评估中忽略了低毫秒范围内的值,因为它们在实践中似乎可以忽略不计。这是我的盒子上的输出:
S 0 D 0 F 3 11ms
1
S 0 D 0 F 2 7ms
1
S 0 D 0 F 1 6ms
1
S 0 D 0 F 0 4ms
0
S 0 D 1 F 3 28ms
1
S 0 D 1 F 2 8ms
1
S 0 D 1 F 1 7ms
1
S 0 D 1 F 0 3ms
0
S 0 D 2 F 3 30ms
1
S 0 D 2 F 2 8ms
1
S 0 D 2 F 1 6ms
1
S 0 D 2 F 0 3ms
0
S 0 D 3 F 3 30ms
1
S 0 D 3 F 2 10ms
1
S 0 D 3 F 1 8ms
1
S 0 D 3 F 0 3ms
0
S 1 D 0 F 3 9ms
1 9e5282ec-e2a2-4...
S 1 D 0 F 2 6ms
1 9e5282ec-e2a2-4...
S 1 D 0 F 1 5ms
1 9e5282ec-e2a2-4...
S 1 D 0 F 0 5ms
1 9e5282ec-e2a2-4...
S 1 D 1 F 3 63ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 1 F 2 37ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 1 F 1 29ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 1 F 0 22ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 2 F 3 30ms
1 9e5282ec-e2a2-4...
S 1 D 2 F 2 10ms
1 9e5282ec-e2a2-4...
S 1 D 2 F 1 10ms
1 9e5282ec-e2a2-4...
S 1 D 2 F 0 12ms
1 9e5282ec-e2a2-4...
S 1 D 3 F 3 73ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 3 F 2 40ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 3 F 1 33ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 1 D 3 F 0 30ms
9 9e5282ec - e2a2 - 4265 - 8276 - 6dbb50fdae37
S 2 D 0 F 3 10ms
1 0**634226552821...
S 2 D 0 F 2 109ms
1 0**634226552821...
S 2 D 0 F 1 5ms
1 0**634226552821...
S 2 D 0 F 0 127ms
1 0**634226552821...
S 2 D 1 F 3 184ms
21 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226
552821... - 0**634226552821... -
S 2 D 1 F 2 364ms
21 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226
552821... - 0**634226552821... -
S 2 D 1 F 1 134ms
21 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226
552821... - 0**634226552821... -
S 2 D 1 F 0 517ms
20 0**634226552821... - 0**634226552821... - 0**634226552821... - 0**634226
552821... - 0**634226552821... -
S 2 D 2 F 3 688ms
201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 2 F 2 2404ms
201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 2 F 1 874ms
201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 2 F 0 717ms
201 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 3 F 3 1205ms
221 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 3 F 2 3471ms
221 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 3 F 1 1008ms
221 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
S 2 D 3 F 0 1095ms
220 0 ** 634226552821217... ** 634226552821217... ** 634226552821217... ** 6
34226552821217... **
s0d0f3 11ms
1.
s0d0f2 7ms
1.
s0d0f16ms
1.
s0d0f04ms
0
S 0 D 1 F 3 28毫秒
1.
s0d1f2 8ms
1.
s0d1f1 7ms
1.
s0d1f03ms
0
s0d2f330ms
1.
s0d2f2 8ms
1.
S0D2F16ms
1.
s0d2f03ms
0
s0d3f330ms
1.
S0D3F210ms
1.
s0d3f1 8ms
1.
s0d3f03ms
0
s1d0f39ms
1 9e5282ec-e2a2-4。。。
s1d0f2 6ms
1 9e5282ec-e2a2-4。。。
s1d0f1 5ms
1 9e5282ec-e2a2-4。。。
s1d0f05ms
1 9e5282ec-e2a2-4。。。
s1d1f363ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d1f2 37ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d1f1 29ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d1f022ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d2f330ms
1 9e5282ec-e2a2-4。。。
s1d2f2 10ms
1 9e5282ec-e2a2-4。。。
s1d2f1 10ms
1 9e5282ec-e2a2-4。。。
s1d2f012ms
1 9e5282ec-e2a2-4。。。
s1d3f373ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d3f2 40ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d3f1 33ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
s1d3f0 30ms
9 9e5282ec-e2a2-4265-8276-6dbb50fdae37
S2D0F310ms
1 0**634226552821...
S2D0F2109ms
1 0**634226552821...
S2D0F15ms
1 0**634226552821...
S 2 D 0 F 0 127ms
1 0**634226552821...
S 2 D 1
string source = "123xx456yy789";
foreach (string delimiter in delimiters)
source = source.Replace(delimiter, ";" + delimiter + ";");
string[] parts = source.Split(';');
public IEnumerable<string> SplitX (string text, string[] delimiters)
{
var split = text.Split (delimiters, StringSplitOptions.None);
foreach (string part in split) {
yield return part;
text = text.Substring (part.Length);
string delim = delimiters.FirstOrDefault (x => text.StartsWith (x));
if (delim != null) {
yield return delim;
text = text.Substring (delim.Length);
}
}
}
string input = "123xx456yy789";
string pattern = "(xx|yy)";
string[] result = Regex.Split(input, pattern);
var delimiters = new List<string> { ".", "xx", "yy" };
string pattern = "(" + String.Join("|", delimiters.Select(d => Regex.Escape(d))
.ToArray())
+ ")";
string input = "123xx456yy789";
// to reach the else branch set delimiters to new List();
var delimiters = new List<string> { ".", "xx", "yy", "()" };
if (delimiters.Count > 0)
{
string pattern = "("
+ String.Join("|", delimiters.Select(d => Regex.Escape(d))
.ToArray())
+ ")";
string[] result = Regex.Split(input, pattern);
foreach (string s in result)
{
Console.WriteLine(s);
}
}
else
{
// nothing to split
Console.WriteLine(input);
}
string pattern = @"\b("
+ String.Join("|", delimiters.Select(d => Regex.Escape(d)))
+ @")\b";
string pattern = @"\s*\b("
+ String.Join("|", delimiters.Select(d => Regex.Escape(d)))
+ @")\b\s*";
public static List<string> Split(string searchStr, string[] separators)
{
List<string> result = new List<string>();
int length = searchStr.Length;
int lastMatchEnd = 0;
for (int i = 0; i < length; i++)
{
for (int j = 0; j < separators.Length; j++)
{
string str = separators[j];
int sepLen = str.Length;
if (((searchStr[i] == str[0]) && (sepLen <= (length - i))) && ((sepLen == 1) || (String.CompareOrdinal(searchStr, i, str, 0, sepLen) == 0)))
{
result.Add(searchStr.Substring(lastMatchEnd, i - lastMatchEnd));
result.Add(separators[j]);
i += sepLen - 1;
lastMatchEnd = i + 1;
break;
}
}
}
if (lastMatchEnd != length)
result.Add(searchStr.Substring(lastMatchEnd));
return result;
}
string input = "123xx456yy789";
string[] delimiters = { "xx", "yy" };
int[] nextPosition = delimiters.Select(d => input.IndexOf(d)).ToArray();
List<string> result = new List<string>();
int pos = 0;
while (true) {
int firstPos = int.MaxValue;
string delimiter = null;
for (int i = 0; i < nextPosition.Length; i++) {
if (nextPosition[i] != -1 && nextPosition[i] < firstPos) {
firstPos = nextPosition[i];
delimiter = delimiters[i];
}
}
if (firstPos != int.MaxValue) {
result.Add(input.Substring(pos, firstPos - pos));
result.Add(delimiter);
pos = firstPos + delimiter.Length;
for (int i = 0; i < nextPosition.Length; i++) {
if (nextPosition[i] != -1 && nextPosition[i] < pos) {
nextPosition[i] = input.IndexOf(delimiters[i], pos);
}
}
} else {
result.Add(input.Substring(pos));
break;
}
}
public static IEnumerable<string> SplitWithTokens(
string str,
string[] separators)
{
if (separators == null || separators.Length == 0)
{
yield return str;
yield break;
}
int prev = 0;
for (int i = 0; i < str.Length; i++)
{
foreach (var sep in separators)
{
if (!string.IsNullOrEmpty(sep))
{
if (((str[i] == sep[0]) &&
(sep.Length <= (str.Length - i)))
&&
((sep.Length == 1) ||
(string.CompareOrdinal(str, i, sep, 0, sep.Length) == 0)))
{
if (i - prev != 0)
yield return str.Substring(prev, i - prev);
yield return sep;
i += sep.Length - 1;
prev = i + 1;
break;
}
}
}
}
if (str.Length - prev > 0)
yield return str.Substring(prev, str.Length - prev);
}
static void Split(string src, string[] delims, ref List<string> final)
{
if (src.Length == 0)
return;
int endTrimIndex = src.Length;
foreach (string delim in delims)
{
//get the index of the first occurance of this delim
int indexOfDelim = src.IndexOf(delim);
//check to see if this delim is at the begining of src
if (indexOfDelim == 0)
{
endTrimIndex = delim.Length;
break;
}
//see if this delim comes before previously searched delims
else if (indexOfDelim < endTrimIndex && indexOfDelim != -1)
endTrimIndex = indexOfDelim;
}
final.Add(src.Substring(0, endTrimIndex));
Split(src.Remove(0, endTrimIndex), delims, ref final);
}