C# 在字符串中解析这个字符串的最佳方法是什么?
我有以下字符串:C# 在字符串中解析这个字符串的最佳方法是什么?,c#,regex,string,parsing,C#,Regex,String,Parsing,我有以下字符串: string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))" 我想把这个字符串解析成 string group = ParseoutGroup(fullString); // Expect "2843360" string[] teams = ParseoutTeamNames(fullString); // Expect array with three items
string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))"
我想把这个字符串解析成
string group = ParseoutGroup(fullString); // Expect "2843360"
string[] teams = ParseoutTeamNames(fullString); // Expect array with three items
在完整字符串的示例中,我可以列出一个或多个团队(不总是像上面那样列出三个)
我的代码有一部分工作,但我的代码感觉非常粗糙,不太适合未来,所以我想看看这里是否有更好的正则表达式解决方案,或者有一种更优雅的方法来解析这个完整字符串中的值?以后可能会有其他东西添加到字符串中,所以我希望它尽可能简单。在最简单的情况下,正则表达式可能是最好的答案。 不幸的是,在这种情况下,我们似乎需要解析SQL语言的一个子集。虽然可以用正则表达式来解决这个问题,但它们不是用来解析复杂语言(嵌套括号和转义字符串)的 需求也可能随着时间的推移而演变,需要解析更复杂的结构 如果公司政策允许,我将选择构建内部DSL来解析这个字符串 我最喜欢的构建内部DLS的工具之一是 下面您可以找到一个使用内部DSL方法的示例解析器 在代码中,我定义了原语来处理所需的SQL运算符,并用它们组成了最终的解析器
[Test]
public void Test()
{
string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))";
var resultParser =
from @group in OperatorEquals("group")
from @and in OperatorEnd()
from @team in Brackets(OperatorIn("team"))
select new {@group, @team};
var result = resultParser.Parse(fullString);
Assert.That(result.group, Is.EqualTo("2843360"));
Assert.That(result.team, Is.EquivalentTo(new[] {"TEAM1", "TEAM2", "TEAM3"}));
}
private static readonly Parser<char> CellSeparator =
from space1 in Parse.WhiteSpace.Many()
from s in Parse.Char(',')
from space2 in Parse.WhiteSpace.Many()
select s;
private static readonly Parser<char> QuoteEscape = Parse.Char('\\');
private static Parser<T> Escaped<T>(Parser<T> following)
{
return from escape in QuoteEscape
from f in following
select f;
}
private static readonly Parser<char> QuotedCellDelimiter = Parse.Char('\'');
private static readonly Parser<char> QuotedCellContent =
Parse.AnyChar.Except(QuotedCellDelimiter).Or(Escaped(QuotedCellDelimiter));
private static readonly Parser<string> QuotedCell =
from open in QuotedCellDelimiter
from content in QuotedCellContent.Many().Text()
from end in QuotedCellDelimiter
select content;
private static Parser<string> OperatorEquals(string column)
{
return
from c in Parse.String(column)
from space1 in Parse.WhiteSpace.Many()
from opEquals in Parse.Char('=')
from space2 in Parse.WhiteSpace.Many()
from content in QuotedCell
select content;
}
private static Parser<bool> OperatorEnd()
{
return
from space1 in Parse.WhiteSpace.Many()
from c in Parse.String("and")
from space2 in Parse.WhiteSpace.Many()
select true;
}
private static Parser<T> Brackets<T>(Parser<T> contentParser)
{
return from open in Parse.Char('(')
from space1 in Parse.WhiteSpace.Many()
from content in contentParser
from space2 in Parse.WhiteSpace.Many()
from close in Parse.Char(')')
select content;
}
private static Parser<IEnumerable<string>> ComaSeparated()
{
return from leading in QuotedCell
from rest in CellSeparator.Then(_ => QuotedCell).Many()
select Cons(leading, rest);
}
private static Parser<IEnumerable<string>> OperatorIn(string column)
{
return
from c in Parse.String(column)
from space1 in Parse.WhiteSpace
from opEquals in Parse.String("in")
from space2 in Parse.WhiteSpace.Many()
from content in Brackets(ComaSeparated())
from space3 in Parse.WhiteSpace.Many()
select content;
}
private static IEnumerable<T> Cons<T>(T head, IEnumerable<T> rest)
{
yield return head;
foreach (T item in rest)
yield return item;
}
[测试]
公开无效测试()
{
string fullString=“组='2843360'和(团队成员('TEAM1'、'TEAM2'、'TEAM3'))”;
结果发生器=
来自@group in OperatorEquals(“集团”)
from@和in OperatorEnd()
括号中的@team(操作员(“团队”))
选择新{@group,@team};
var result=resultParser.Parse(fullString);
Assert.That(result.group,Is.EqualTo(“2843360”);
Assert.That(result.team,Is.equaletto(new[]{“TEAM1”、“TEAM2”、“TEAM3”}));
}
专用静态只读解析器单元分隔符=
来自Parse.WhiteSpace.Many()中的space1
来自Parse.Char(',')中的s
来自Parse.WhiteSpace.Many()中的space2
选择s;
私有静态只读解析器QuoteEscape=Parse.Char('\\');
已转义私有静态解析器(后面是解析器)
{
从Quotescape中的escape返回
从下面的f开始
选择f;
}
私有静态只读解析器QuotedCellDelimiter=Parse.Char('\'');
私有静态只读解析器QuotedCellContent=
Parse.AnyChar.Except(QuotedCellDelimiter).或(转义(QuotedCellDelimiter));
私有静态只读解析器QuotedCell=
从在QuotedCellDelimiter中打开
来自QuotedCellContent.Many().Text()中的内容
QuotedCellDelimiter中的from end
选择内容;
专用静态解析器运算符相等(字符串列)
{
返回
从Parse.String(列)中的c
来自Parse.WhiteSpace.Many()中的space1
来自Parse.Char('=')中的opEquals
来自Parse.WhiteSpace.Many()中的space2
来自QuotedCell中的内容
选择内容;
}
私有静态解析器运算符rend()
{
返回
来自Parse.WhiteSpace.Many()中的space1
来自Parse.String中的c(“and”)
来自Parse.WhiteSpace.Many()中的space2
选择true;
}
专用静态解析器括号(解析器contentParser)
{
从Parse.Char中打开返回(“(”)
来自Parse.WhiteSpace.Many()中的space1
从contentParser中的内容
来自Parse.WhiteSpace.Many()中的space2
从Parse.Char(“)”中的close开始
选择内容;
}
私有静态解析器ComaSeparated()
{
从引入QuotedCell返回
从CellSeparator.Then中的rest开始(=>QuotedCell).Many()
选择Cons(引导、休息);
}
专用静态分析器运算符(字符串列)
{
返回
从Parse.String(列)中的c
来自Parse.WhiteSpace中的空格1
来自Parse.String(“in”)中的opEquals
来自Parse.WhiteSpace.Many()中的space2
来自方括号中的内容(ComaSeparated())
来自Parse.WhiteSpace.Many()中的space3
选择内容;
}
专用静态IEnumerable Cons(T头,IEnumerable休息)
{
回程压头;
foreach(静止的T项)
收益回报项目;
}
我认为您需要研究标记化过程,以获得所需的结果,并考虑括号确定的执行顺序。您可以使用调车场算法来协助标记化和执行顺序
调车场的优点是,它允许您定义令牌,稍后可用于属性解析字符串和执行正确的操作。虽然它通常适用于数学运算顺序,但它可以根据您的目的进行调整
以下是一些信息:
我通过以下方法实现了这一点:
可能有一个正则表达式解决方案,但如果格式严格,我会先尝试有效的字符串方法。以下内容适用于您的输入 我使用一个自定义类,
TeamGroup
,来封装复杂性并将所有相关属性保存在一个对象中:
public class TeamGroup
{
public string Group { get; set; }
public string[] Teams { get; set; }
public static TeamGroup ParseOut(string fullString)
{
TeamGroup tg = new TeamGroup{ Teams = new string[]{ } };
int index = fullString.IndexOf("group = '");
if (index >= 0)
{
index += "group = '".Length;
int endIndex = fullString.IndexOf("'", index);
if (endIndex >= 0)
{
tg.Group = fullString.Substring(index, endIndex - index).Trim(' ', '\'');
endIndex += 1;
index = fullString.IndexOf(" and (team in (", endIndex);
if (index >= 0)
{
index += " and (team in (".Length;
endIndex = fullString.IndexOf(")", index);
if (endIndex >= 0)
{
string allTeamsString = fullString.Substring(index, endIndex - index);
tg.Teams = allTeamsString.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)
.Select(t => t.Trim(' ', '\''))
.ToArray();
}
}
}
}
return tg;
}
}
您可以这样使用它:
string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))";
TeamGroup tg = TeamGroup.ParseOut(fullString);
Console.Write("Group: {0} Teams: {1}", tg.Group, string.Join(", ", tg.Teams));
产出:
Group: 2843360 Teams: TEAM1, TEAM2, TEAM3
如果fullString不是机器生成的,您可能需要添加一些错误捕获,但这将是开箱即用的,并为您提供一个测试
public string ParseoutGroup(string fullString)
{
var matches = Regex.Matches(fullString, @"group\s?=\s?'([^']+)'", RegexOptions.IgnoreCase);
return matches[0].Groups[1].Captures[0].Value;
}
public string[] ParseoutTeamNames(string fullString)
{
var teams = new List<string>();
var matches = Regex.Matches(fullString, @"team\s?in\s?\((\s*'([^']+)',?\s*)+\)", RegexOptions.IgnoreCase);
foreach (var capture in matches[0].Groups[2].Captures)
{
teams.Add(capture.ToString());
}
return teams.ToArray();
}
[Test]
public void parser()
{
string test = "group = '2843360' and (team in ('team1', 'team2', 'team3'))";
var group = ParseoutGroup(test);
Assert.AreEqual("2843360",group);
var teams = ParseoutTeamNames(test);
Assert.AreEqual(3, teams.Count());
Assert.AreEqual("team1", teams[0]);
Assert.AreEqual("team2", teams[1]);
Assert.AreEqual("team3", teams[2]);
}
publicstringparseoutgroup(stringfullstring)
{
var matches=Regex.matches(fullString,@“group\s?=\s?”([^']+)”,RegexOptions.IgnoreCase);
返回匹配项[0]。组[1]。捕获[0]。值;
}
公共字符串[]ParseoutTeamNames(字符串fullString)
{
var teams=新列表();
var matches=Regex.matches(fullStrin
Group: 2843360 Teams: TEAM1, TEAM2, TEAM3
public string ParseoutGroup(string fullString)
{
var matches = Regex.Matches(fullString, @"group\s?=\s?'([^']+)'", RegexOptions.IgnoreCase);
return matches[0].Groups[1].Captures[0].Value;
}
public string[] ParseoutTeamNames(string fullString)
{
var teams = new List<string>();
var matches = Regex.Matches(fullString, @"team\s?in\s?\((\s*'([^']+)',?\s*)+\)", RegexOptions.IgnoreCase);
foreach (var capture in matches[0].Groups[2].Captures)
{
teams.Add(capture.ToString());
}
return teams.ToArray();
}
[Test]
public void parser()
{
string test = "group = '2843360' and (team in ('team1', 'team2', 'team3'))";
var group = ParseoutGroup(test);
Assert.AreEqual("2843360",group);
var teams = ParseoutTeamNames(test);
Assert.AreEqual(3, teams.Count());
Assert.AreEqual("team1", teams[0]);
Assert.AreEqual("team2", teams[1]);
Assert.AreEqual("team3", teams[2]);
}
statments = statment.split('and')
//So now:
//statments[0] = "group = '2843360' "
//statments[1] = "(team in ('TEAM1', 'TEAM2','TEAM3'))"
foreach s in statments {
if (s.contains('group') group = RegexFunctionToExtract_GroupValue(s) ;
if (s.contains('team') teams = RegexFunctionToExtract_TeamValue(s) ;
}