Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/regex/16.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# 在字符串中解析这个字符串的最佳方法是什么?_C#_Regex_String_Parsing - Fatal编程技术网

C# 在字符串中解析这个字符串的最佳方法是什么?

C# 在字符串中解析这个字符串的最佳方法是什么?,c#,regex,string,parsing,C#,Regex,String,Parsing,我有以下字符串: string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))" 我想把这个字符串解析成 string group = ParseoutGroup(fullString); // Expect "2843360" string[] teams = ParseoutTeamNames(fullString); // Expect array with three items

我有以下字符串:

 string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))"
我想把这个字符串解析成

 string group = ParseoutGroup(fullString);  // Expect "2843360"
 string[] teams = ParseoutTeamNames(fullString); // Expect array with three items
在完整字符串的示例中,我可以列出一个或多个团队(不总是像上面那样列出三个)


我的代码有一部分工作,但我的代码感觉非常粗糙,不太适合未来,所以我想看看这里是否有更好的正则表达式解决方案,或者有一种更优雅的方法来解析这个完整字符串中的值?以后可能会有其他东西添加到字符串中,所以我希望它尽可能简单。

在最简单的情况下,正则表达式可能是最好的答案。 不幸的是,在这种情况下,我们似乎需要解析SQL语言的一个子集。虽然可以用正则表达式来解决这个问题,但它们不是用来解析复杂语言(嵌套括号和转义字符串)的

需求也可能随着时间的推移而演变,需要解析更复杂的结构

如果公司政策允许,我将选择构建内部DSL来解析这个字符串

我最喜欢的构建内部DLS的工具之一是

下面您可以找到一个使用内部DSL方法的示例解析器

在代码中,我定义了原语来处理所需的SQL运算符,并用它们组成了最终的解析器

    [Test]
    public void Test()
    {
        string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))";


        var resultParser =
            from @group in OperatorEquals("group")
            from @and in OperatorEnd()
            from @team in Brackets(OperatorIn("team"))
            select new {@group, @team};
        var result = resultParser.Parse(fullString);
        Assert.That(result.group, Is.EqualTo("2843360"));
        Assert.That(result.team, Is.EquivalentTo(new[] {"TEAM1", "TEAM2", "TEAM3"}));
    }

    private static readonly Parser<char> CellSeparator =
        from space1 in Parse.WhiteSpace.Many()
        from s in Parse.Char(',')
        from space2 in Parse.WhiteSpace.Many()
        select s;

    private static readonly Parser<char> QuoteEscape = Parse.Char('\\');

    private static Parser<T> Escaped<T>(Parser<T> following)
    {
        return from escape in QuoteEscape
               from f in following
               select f;
    }

    private static readonly Parser<char> QuotedCellDelimiter = Parse.Char('\'');

    private static readonly Parser<char> QuotedCellContent =
        Parse.AnyChar.Except(QuotedCellDelimiter).Or(Escaped(QuotedCellDelimiter));

    private static readonly Parser<string> QuotedCell =
        from open in QuotedCellDelimiter
        from content in QuotedCellContent.Many().Text()
        from end in QuotedCellDelimiter
        select content;

    private static Parser<string> OperatorEquals(string column)
    {
        return
            from c in Parse.String(column)
            from space1 in Parse.WhiteSpace.Many()
            from opEquals in Parse.Char('=')
            from space2 in Parse.WhiteSpace.Many()
            from content in QuotedCell
            select content;
    }

    private static Parser<bool> OperatorEnd()
    {
        return
            from space1 in Parse.WhiteSpace.Many()
            from c in Parse.String("and")
            from space2 in Parse.WhiteSpace.Many()
            select true;
    }

    private static Parser<T> Brackets<T>(Parser<T> contentParser)
    {
        return from open in Parse.Char('(')
               from space1 in Parse.WhiteSpace.Many()
               from content in contentParser
               from space2 in Parse.WhiteSpace.Many()
               from close in Parse.Char(')')
               select content;
    }

    private static Parser<IEnumerable<string>> ComaSeparated()
    {
        return from leading in QuotedCell
               from rest in CellSeparator.Then(_ => QuotedCell).Many()
               select Cons(leading, rest);
    }

    private static Parser<IEnumerable<string>> OperatorIn(string column)
    {
        return
            from c in Parse.String(column)
            from space1 in Parse.WhiteSpace
            from opEquals in Parse.String("in")
            from space2 in Parse.WhiteSpace.Many()
            from content in Brackets(ComaSeparated())
            from space3 in Parse.WhiteSpace.Many()
            select content;
    }

    private static IEnumerable<T> Cons<T>(T head, IEnumerable<T> rest)
    {
        yield return head;
        foreach (T item in rest)
            yield return item;
    }
[测试]
公开无效测试()
{
string fullString=“组='2843360'和(团队成员('TEAM1'、'TEAM2'、'TEAM3'))”;
结果发生器=
来自@group in OperatorEquals(“集团”)
from@和in OperatorEnd()
括号中的@team(操作员(“团队”))
选择新{@group,@team};
var result=resultParser.Parse(fullString);
Assert.That(result.group,Is.EqualTo(“2843360”);
Assert.That(result.team,Is.equaletto(new[]{“TEAM1”、“TEAM2”、“TEAM3”}));
}
专用静态只读解析器单元分隔符=
来自Parse.WhiteSpace.Many()中的space1
来自Parse.Char(',')中的s
来自Parse.WhiteSpace.Many()中的space2
选择s;
私有静态只读解析器QuoteEscape=Parse.Char('\\');
已转义私有静态解析器(后面是解析器)
{
从Quotescape中的escape返回
从下面的f开始
选择f;
}
私有静态只读解析器QuotedCellDelimiter=Parse.Char('\'');
私有静态只读解析器QuotedCellContent=
Parse.AnyChar.Except(QuotedCellDelimiter).或(转义(QuotedCellDelimiter));
私有静态只读解析器QuotedCell=
从在QuotedCellDelimiter中打开
来自QuotedCellContent.Many().Text()中的内容
QuotedCellDelimiter中的from end
选择内容;
专用静态解析器运算符相等(字符串列)
{
返回
从Parse.String(列)中的c
来自Parse.WhiteSpace.Many()中的space1
来自Parse.Char('=')中的opEquals
来自Parse.WhiteSpace.Many()中的space2
来自QuotedCell中的内容
选择内容;
}
私有静态解析器运算符rend()
{
返回
来自Parse.WhiteSpace.Many()中的space1
来自Parse.String中的c(“and”)
来自Parse.WhiteSpace.Many()中的space2
选择true;
}
专用静态解析器括号(解析器contentParser)
{
从Parse.Char中打开返回(“(”)
来自Parse.WhiteSpace.Many()中的space1
从contentParser中的内容
来自Parse.WhiteSpace.Many()中的space2
从Parse.Char(“)”中的close开始
选择内容;
}
私有静态解析器ComaSeparated()
{
从引入QuotedCell返回
从CellSeparator.Then中的rest开始(=>QuotedCell).Many()
选择Cons(引导、休息);
}
专用静态分析器运算符(字符串列)
{
返回
从Parse.String(列)中的c
来自Parse.WhiteSpace中的空格1
来自Parse.String(“in”)中的opEquals
来自Parse.WhiteSpace.Many()中的space2
来自方括号中的内容(ComaSeparated())
来自Parse.WhiteSpace.Many()中的space3
选择内容;
}
专用静态IEnumerable Cons(T头,IEnumerable休息)
{
回程压头;
foreach(静止的T项)
收益回报项目;
}

我认为您需要研究标记化过程,以获得所需的结果,并考虑括号确定的执行顺序。您可以使用调车场算法来协助标记化和执行顺序

调车场的优点是,它允许您定义令牌,稍后可用于属性解析字符串和执行正确的操作。虽然它通常适用于数学运算顺序,但它可以根据您的目的进行调整

以下是一些信息:


我通过以下方法实现了这一点:


可能有一个正则表达式解决方案,但如果格式严格,我会先尝试有效的字符串方法。以下内容适用于您的输入

我使用一个自定义类,
TeamGroup
,来封装复杂性并将所有相关属性保存在一个对象中:

public class TeamGroup
{
    public string Group { get; set; }
    public string[] Teams { get; set; }

    public static TeamGroup ParseOut(string fullString)
    {
        TeamGroup tg = new TeamGroup{ Teams = new string[]{ } };
        int index = fullString.IndexOf("group = '");
        if (index >= 0)
        {
            index += "group = '".Length;
            int endIndex = fullString.IndexOf("'", index);
            if (endIndex >= 0)
            {
                tg.Group = fullString.Substring(index, endIndex - index).Trim(' ', '\'');
                endIndex += 1;
                index = fullString.IndexOf(" and (team in (", endIndex);
                if (index >= 0)
                {
                    index += " and (team in (".Length;
                    endIndex = fullString.IndexOf(")", index);
                    if (endIndex >= 0)
                    {
                        string allTeamsString = fullString.Substring(index, endIndex - index);
                        tg.Teams = allTeamsString.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries)
                            .Select(t => t.Trim(' ', '\''))
                            .ToArray();
                    }
                }
            }
        }
        return tg;
    }
}
您可以这样使用它:

string fullString = "group = '2843360' and (team in ('TEAM1', 'TEAM2','TEAM3'))";
TeamGroup tg = TeamGroup.ParseOut(fullString);
Console.Write("Group: {0} Teams: {1}", tg.Group, string.Join(", ", tg.Teams));
产出:

Group: 2843360 Teams: TEAM1, TEAM2, TEAM3

如果fullString不是机器生成的,您可能需要添加一些错误捕获,但这将是开箱即用的,并为您提供一个测试

    public string ParseoutGroup(string fullString)
    {
        var matches = Regex.Matches(fullString, @"group\s?=\s?'([^']+)'", RegexOptions.IgnoreCase);
        return matches[0].Groups[1].Captures[0].Value;
    }

    public string[] ParseoutTeamNames(string fullString)
    {
        var teams = new List<string>();
        var matches = Regex.Matches(fullString, @"team\s?in\s?\((\s*'([^']+)',?\s*)+\)", RegexOptions.IgnoreCase);
        foreach (var capture in matches[0].Groups[2].Captures)
        {
            teams.Add(capture.ToString());
        }
        return teams.ToArray();
    }

    [Test]
    public void parser()
    {
        string test = "group = '2843360' and (team in ('team1', 'team2', 'team3'))";
        var group = ParseoutGroup(test);
        Assert.AreEqual("2843360",group);

        var teams = ParseoutTeamNames(test);
        Assert.AreEqual(3, teams.Count());
        Assert.AreEqual("team1", teams[0]);
        Assert.AreEqual("team2", teams[1]);
        Assert.AreEqual("team3", teams[2]);
    }
publicstringparseoutgroup(stringfullstring)
{
var matches=Regex.matches(fullString,@“group\s?=\s?”([^']+)”,RegexOptions.IgnoreCase);
返回匹配项[0]。组[1]。捕获[0]。值;
}
公共字符串[]ParseoutTeamNames(字符串fullString)
{
var teams=新列表();
var matches=Regex.matches(fullStrin
Group: 2843360 Teams: TEAM1, TEAM2, TEAM3
    public string ParseoutGroup(string fullString)
    {
        var matches = Regex.Matches(fullString, @"group\s?=\s?'([^']+)'", RegexOptions.IgnoreCase);
        return matches[0].Groups[1].Captures[0].Value;
    }

    public string[] ParseoutTeamNames(string fullString)
    {
        var teams = new List<string>();
        var matches = Regex.Matches(fullString, @"team\s?in\s?\((\s*'([^']+)',?\s*)+\)", RegexOptions.IgnoreCase);
        foreach (var capture in matches[0].Groups[2].Captures)
        {
            teams.Add(capture.ToString());
        }
        return teams.ToArray();
    }

    [Test]
    public void parser()
    {
        string test = "group = '2843360' and (team in ('team1', 'team2', 'team3'))";
        var group = ParseoutGroup(test);
        Assert.AreEqual("2843360",group);

        var teams = ParseoutTeamNames(test);
        Assert.AreEqual(3, teams.Count());
        Assert.AreEqual("team1", teams[0]);
        Assert.AreEqual("team2", teams[1]);
        Assert.AreEqual("team3", teams[2]);
    }
statments = statment.split('and')
//So now:
//statments[0] = "group = '2843360' "
//statments[1] = "(team in ('TEAM1', 'TEAM2','TEAM3'))"
foreach s in statments {
    if (s.contains('group') group = RegexFunctionToExtract_GroupValue(s) ;
    if (s.contains('team') teams = RegexFunctionToExtract_TeamValue(s) ;
}