C# 在C语言中用空格分割字符串#
我想用空格分隔字符串,除非字符串中的文本是双引号(“文本”)或单引号(“文本”) 我正在使用此功能执行此操作:C# 在C语言中用空格分割字符串#,c#,regex,string-split,C#,Regex,String Split,我想用空格分隔字符串,除非字符串中的文本是双引号(“文本”)或单引号(“文本”) 我正在使用此功能执行此操作: public static string[] ParseKeywordExpression(string keywordExpressionValue, bool isUniqueKeywordReq) { keywordExpressionValue = keywordExpressionValue.Trim(); if (keywordExpressionValue
public static string[] ParseKeywordExpression(string keywordExpressionValue, bool isUniqueKeywordReq)
{
keywordExpressionValue = keywordExpressionValue.Trim();
if (keywordExpressionValue == null || !(keywordExpressionValue.Length > 0))
return new string[0];
int idx = keywordExpressionValue.Trim().IndexOf(" ");
if (idx == -1)
return new string[] { keywordExpressionValue };
//idx = idx + 1;
int count = keywordExpressionValue.Length;
ArrayList extractedList = new ArrayList();
while (count > 0)
{
if (keywordExpressionValue[0] == '"')
{
int temp = keywordExpressionValue.IndexOf(BACKSLASH, 1, keywordExpressionValue.Length - 1);
while (keywordExpressionValue[temp - 1] == '\\')
{
temp = keywordExpressionValue.IndexOf(BACKSLASH, temp + 1, keywordExpressionValue.Length - temp - 1);
}
idx = temp + 1;
}
if (keywordExpressionValue[0] == '\'')
{
int temp = keywordExpressionValue.IndexOf(BACKSHASH_QUOTE, 1, keywordExpressionValue.Length - 1);
while (keywordExpressionValue[temp - 1] == '\\')
{
temp = keywordExpressionValue.IndexOf(BACKSHASH_QUOTE, temp + 1, keywordExpressionValue.Length - temp - 1);
}
idx = temp + 1;
}
string s = keywordExpressionValue.Substring(0, idx);
int left = count - idx;
keywordExpressionValue = keywordExpressionValue.Substring(idx, left).Trim();
if (isUniqueKeywordReq)
{
if (!extractedList.Contains(s.Trim('"')))
{
extractedList.Add(s.Trim('"'));
}
}
else
{
extractedList.Add(s.Trim('"'));
}
count = keywordExpressionValue.Length;
idx = keywordExpressionValue.IndexOf(SPACE);
if (idx == -1)
{
string add = keywordExpressionValue.Trim('"', ' ');
if (add.Length > 0)
{
if (isUniqueKeywordReq )
{
if (!extractedList.Contains(add))
{
extractedList.Add(add);
}
}
else
{
extractedList.Add(add);
}
}
break;
}
}
return (string[])extractedList.ToArray(typeof(string));
}
是否有其他方法实现此功能,或者此功能是否可以优化
例如,我希望拆分字符串
%ABC%%aasdf%aalasdjfas“c:\Document and Setting\Program Files\ABC.exe”
到
%ABC%%aasdf%
aalasdjfas
“c:\Document and Setting\Program Files\abc.exe”
最简单的正则表达式,处理单引号和双引号:
(“(\\”)([^“])*”)(“(\\”)([^'])*”)(\S+
所以基本上四到五行代码就足够了
这句话解释道:
Main structure:
("((\\")|([^"]))*") Double-quoted token
| , or
('((\\')|([^']))*') single-quoted token
| , or
(\S+) any group of non-space characters
Double-quoted token:
( Group starts
" Initial double-quote
( Inner group starts
(\\") Either a backslash followed by a double-quote
| , or
([^"]) any non-double-quote character
)* The inner group repeats any number of times (or zero)
" Ending double-quote
)
Single-quoted token:
( Group starts
' Initial single-quote
( Inner group starts
(\\') Either a backslash followed by a single-quote
| , or
([^']) any non-single-quote character
)* The inner group repeats any number of times (or zero)
' Ending single-quote
)
Non-space characters:
( Group starts
\S Non-white-space character
+ , repeated at least once
) Group ends
如果您不喜欢正则表达式,此方法应该能够拆分带引号的字符串,并忽略连续空格:
public IEnumerable<string> SplitString(string input)
{
var isInDoubleQuote = false;
var isInSingleQuote = false;
var sb = new StringBuilder();
foreach (var c in input)
{
if (!isInDoubleQuote && c == '"')
{
isInDoubleQuote = true;
sb.Append(c);
}
else if (isInDoubleQuote)
{
sb.Append(c);
if (c != '"')
continue;
if (sb.Length > 2)
yield return sb.ToString();
sb = sb.Clear();
isInDoubleQuote = false;
}
else if (!isInSingleQuote && c == '\'')
{
isInSingleQuote = true;
sb.Append(c);
}
else if (isInSingleQuote)
{
sb.Append(c);
if (c != '\'')
continue;
if (sb.Length > 2)
yield return sb.ToString();
sb = sb.Clear();
isInSingleQuote = false;
}
else if (c == ' ')
{
if (sb.Length == 0)
continue;
yield return sb.ToString();
sb.Clear();
}
else
sb.Append(c);
}
if (sb.Length > 0)
yield return sb.ToString();
}
public IEnumerable拆分字符串(字符串输入)
{
var isInDoubleQuote=false;
var isInSingleQuote=false;
var sb=新的StringBuilder();
foreach(输入中的var c)
{
如果(!isInDoubleQuote&&c=='“'”)
{
isInDoubleQuote=true;
sb.附加(c);
}
否则如果(isInDoubleQuote)
{
sb.附加(c);
如果(c!=“”)
继续;
如果(某人长度>2)
让某人返回字符串();
sb=sb.Clear();
isInDoubleQuote=false;
}
如果(!isInSingleQuote&&c=='\'',则为else)
{
isInSingleQuote=true;
sb.附加(c);
}
否则,如果(isInSingleQuote)
{
sb.附加(c);
如果(c!='\'')
继续;
如果(某人长度>2)
让某人返回字符串();
sb=sb.Clear();
isInSingleQuote=false;
}
else if(c==“”)
{
如果(sb.Length==0)
继续;
让某人返回字符串();
(某人清楚地);
}
其他的
sb.附加(c);
}
如果(某人长度>0)
让某人返回字符串();
}
编辑:使用yield和StringBuilder将返回类型更改为IEnumerable,我使用字符串中的十六进制值
\x27
和\x22
对单引号和双引号进行了转义。它使模式的C#literal文本更易于阅读和操作
还使用了IgnorePatternWhitespace
,因为它允许用户对模式进行注释以提高可读性;不影响正则表达式处理
string data = @"'single' %ABC% %aasdf% aalasdjjfas ""c:\Document and Setting\Program Files\abc.exe""";
string pattern = @"(?xm) # Tell the regex compiler we are commenting (x = IgnorePatternWhitespace)
# and tell the compiler this is multiline (m),
# In Multiline the ^ matches each start line and $ is each EOL
# -Pattern Start-
^( # Start at the beginning of the line always
(?![\r\n]|$) # Stop the match if EOL or EOF found.
(?([\x27\x22]) # Regex If to check for single/double quotes
(?:[\x27\x22]) # \\x27\\x22 are single/double quotes
(?<Token>[^\x27\x22]+) # Match this in the quotes and place in Named match Token
(?:[\x27\x22])
| # or (else) part of If when Not within quotes
(?<Token>[^\s\r\n]+) # Not within quotes, but put it in the Token match group
) # End of Pattern OR
(?:\s?) # Either a space or EOL/EOF
)+ # 1 or more tokens of data.
";
Console.WriteLine( string.Join(" | ",
Regex.Match(data, pattern)
.Groups["Token"]
.Captures
.OfType<Capture>()
.Select( cp => cp.Value )
)
);
/* Output
single | %ABC% | %aasdf% | aalasdjjfas | c:\Document and Setting\Program Files\abc.exe
*/
string data=@“'single'%ABC%%aasdf%aalasdjfas”“c:\Document and Setting\Program Files\ABC.exe”“;
字符串模式=@”(?xm)#告诉正则表达式编译器我们正在注释(x=IgnorePatternWhitespace)
#告诉编译器这是多行(m),
#在多行中,^匹配每个起始行,$是每个下线
#-模式启动-
^(#始终从行首开始
(?![\r\n]|$)#如果发现EOL或EOF,请停止匹配。
(?([\x27\x22])#正则表达式,如果要检查单引号/双引号
(?:[\x27\x22])#\\x27\\x22是单引号/双引号
(?[^\x27\x22]+)#在引号中匹配此项,并将其放入命名匹配标记中
(?:[\x27\x22])
|#或(否则)不在引号内时的部分If
(?[^\s\r\n]+)#不在引号内,但将其放在标记匹配组中
)#结束模式或
(?:\s?)#空间或EOL/EOF
)+#1个或多个数据标记。
";
Console.WriteLine(string.Join(“|”),
Regex.Match(数据、模式)
.组[“令牌”]
.捕获
第()类
.选择(cp=>cp.值)
)
);
/*输出
单个|%ABC%|%aasdf%| aalasdjfas | c:\Document and Setting\Program Files\ABC.exe
*/
以上是基于我写的以下两篇博客文章:
\s
而不是逗号?@BradChristie我已编辑了我希望如何输出的说明。我不认为CSV正则表达式会有帮助是的,它在双引号上工作,但在单引号上工作,例如-%ABC%%aasdf%aalasdjfas“c:\document and Setting\Program Files\ABC.exe”'c:\document and Setting\Program Files\ABC.exe'将我的答案更新为也包括单引号。非常好,特别是解释。这将产生大量可GC’的临时字符串,不是吗?如果您不打算多次命中结果,只需通过它们foreach
,然后将返回类型更改为IEumerable
,并替换输出。使用添加调用以生成返回当前字符串代码>是个好主意。在这种情况下,使用StringBuilder
而不是大量的连接是有意义的。我完全同意,@JonHanna<代码>收益率回报
是C#的一项未被充分利用的功能。StringBuilder
参数是有效的,但由于它可能仅用于解析命令行参数序列,因此性能影响不大。但无论如何,没有任何借口可以为草率的代码辩解。@JonHannayeild return
它到底做了什么。。。抱歉,我不太了解这些与性能相关的内容stuff@adcool2007试试看yield return
将结果返回给正在执行它的代码,然后继续执行下一位工作。真正发生的是,代码被编译成一个方法,该方法返回一个隐藏的匿名类,该类实现了IEnumerable
,您的方法用于创建构造函数,Current
,Dispose
(如果您的代码中有using
块)最重要的是,我很高兴你找到了你的
string data = @"'single' %ABC% %aasdf% aalasdjjfas ""c:\Document and Setting\Program Files\abc.exe""";
string pattern = @"(?xm) # Tell the regex compiler we are commenting (x = IgnorePatternWhitespace)
# and tell the compiler this is multiline (m),
# In Multiline the ^ matches each start line and $ is each EOL
# -Pattern Start-
^( # Start at the beginning of the line always
(?![\r\n]|$) # Stop the match if EOL or EOF found.
(?([\x27\x22]) # Regex If to check for single/double quotes
(?:[\x27\x22]) # \\x27\\x22 are single/double quotes
(?<Token>[^\x27\x22]+) # Match this in the quotes and place in Named match Token
(?:[\x27\x22])
| # or (else) part of If when Not within quotes
(?<Token>[^\s\r\n]+) # Not within quotes, but put it in the Token match group
) # End of Pattern OR
(?:\s?) # Either a space or EOL/EOF
)+ # 1 or more tokens of data.
";
Console.WriteLine( string.Join(" | ",
Regex.Match(data, pattern)
.Groups["Token"]
.Captures
.OfType<Capture>()
.Select( cp => cp.Value )
)
);
/* Output
single | %ABC% | %aasdf% | aalasdjjfas | c:\Document and Setting\Program Files\abc.exe
*/