C# 解析C中的复杂字符串。

C# 解析C中的复杂字符串。,c#,parsing,vb6,C#,Parsing,Vb6,我目前正在维护一个用C编写的应用程序。我需要编写一个新功能,我遇到了麻烦 我正在从数据库中提取数据,用于用VB6编写的另一个应用程序。有一个字段用于保存树列表的数据。我需要在我的应用程序中重新创建此树。数据如下所示: { { table1~col_b^table1~colc^= }| { { table1~col_b^table2~col_b^= }|{ table2~col

我目前正在维护一个用C编写的应用程序。我需要编写一个新功能,我遇到了麻烦

我正在从数据库中提取数据,用于用VB6编写的另一个应用程序。有一个字段用于保存树列表的数据。我需要在我的应用程序中重新创建此树。数据如下所示:

{
    {
        table1~col_b^table1~colc^= 
    }|
    {
        {
            table1~col_b^table2~col_b^=
        }|{
            table2~col_a^table3~cola^=
        }|AND
    }|OR
}
OR
-- table1~col_b^table1~colc^= 
-- AND
---- table1~col_b^table2~col_b^=
---- table2~col_a^table3~cola^=
我甚至不知道从哪里开始。我需要完成的就是这个。{}表示一个表达式,|单独的表达式。基本上,这棵树应该是这样的:

{
    {
        table1~col_b^table1~colc^= 
    }|
    {
        {
            table1~col_b^table2~col_b^=
        }|{
            table2~col_a^table3~cola^=
        }|AND
    }|OR
}
OR
-- table1~col_b^table1~colc^= 
-- AND
---- table1~col_b^table2~col_b^=
---- table2~col_a^table3~cola^=
解决这个问题的任何帮助或指导都将是伟大的

谢谢


道格

我建议你什么时候定义语法。它允许非常容易地编写解析器

这是代码的粗略语法:

    [Language("ExpressionEvaluator", "1.0", "Multi-line expression evaluator")]
    public class ExpressionEvaluatorGrammar : Grammar
    {
        public ExpressionEvaluatorGrammar()
        {
            // 1. Terminals

            var identifier = new RegexBasedTerminal("identifier", "[a-z\\d_^~]+");


            // 2. Non-terminals
            var root = new NonTerminal("root");
            var block = new NonTerminal("block");
            var expression = new NonTerminal("expression");
            var expressions = new NonTerminal("expressions");

            var prop = new NonTerminal("prop");
            var op = new NonTerminal("op");

            // 3. BNF rules
            op.Rule = ToTerm("OR") | "AND";
            prop.Rule = identifier + "=" ;


            expression.Rule = "{" + (prop | block) + "}" + "|" ;
            expressions.Rule = MakeStarRule(expressions,  expression);
            block.Rule = expressions  + op;
            root.Rule = "{" + block +"}";

            Root = root;
            //automatically add NewLine before EOF so that our BNF rules work correctly when there's no final line break in source
            this.LanguageFlags =  LanguageFlags.NewLineBeforeEOF;
        }
    }
} //namespace
它解析得很好,您所需要做的就是添加AST树并使用它


当你定义语法时,我建议你看看。它允许非常容易地编写解析器

这是代码的粗略语法:

    [Language("ExpressionEvaluator", "1.0", "Multi-line expression evaluator")]
    public class ExpressionEvaluatorGrammar : Grammar
    {
        public ExpressionEvaluatorGrammar()
        {
            // 1. Terminals

            var identifier = new RegexBasedTerminal("identifier", "[a-z\\d_^~]+");


            // 2. Non-terminals
            var root = new NonTerminal("root");
            var block = new NonTerminal("block");
            var expression = new NonTerminal("expression");
            var expressions = new NonTerminal("expressions");

            var prop = new NonTerminal("prop");
            var op = new NonTerminal("op");

            // 3. BNF rules
            op.Rule = ToTerm("OR") | "AND";
            prop.Rule = identifier + "=" ;


            expression.Rule = "{" + (prop | block) + "}" + "|" ;
            expressions.Rule = MakeStarRule(expressions,  expression);
            block.Rule = expressions  + op;
            root.Rule = "{" + block +"}";

            Root = root;
            //automatically add NewLine before EOF so that our BNF rules work correctly when there's no final line break in source
            this.LanguageFlags =  LanguageFlags.NewLineBeforeEOF;
        }
    }
} //namespace
它解析得很好,您所需要做的就是添加AST树并使用它


如果没有任何附加库,在纯C代码中将其解析为表达式树并不难:

class TreeNode
{
    private enum ParseState
    {
        Operator,
        Expression
    }
    public static TreeNode ParseTree(string treeData)
    {
        Stack<TreeNode> parsed = new Stack<TreeNode>();
        StringBuilder nodeData = new StringBuilder();
        ParseState state = ParseState.Operator;

        for (int charIndex = 0; charIndex < treeData.Length; charIndex++)
        {
            switch (treeData[charIndex])
            {
                case '{':
                    nodeData.Clear();
                    state = ParseState.Expression;
                    break;
                case '\t':
                case ' ':
                case '\r':
                case '\n':
                case '|':
                    // ignore whitespace and |
                    break;
                case '}':
                    {
                        if (state == ParseState.Expression)
                        {
                            state = ParseState.Operator;
                            parsed.Push(new TreeNodeData(nodeData.ToString()));
                        }
                        else // Operator
                        {
                            TreeNodeOperators op = (TreeNodeOperators)(Enum.Parse(typeof(TreeNodeOperators), nodeData.ToString()));
                            TreeNodeExpression exp = new TreeNodeExpression();
                            exp.Operator = op;
                            exp.Right = parsed.Pop();
                            exp.Left = parsed.Pop();
                            parsed.Push(exp);
                        }
                        nodeData.Clear();
                    }
                    break;
                default:
                    nodeData.Append(treeData[charIndex]);
                    break;
            }
        }
        return parsed.Pop();
    }
}

enum TreeNodeOperators
{
    AND,
    OR
}

class TreeNodeExpression : TreeNode
{
    public TreeNodeOperators Operator {get; set;}
    public TreeNode Left { get; set; }
    public TreeNode Right { get; set; }
}

class TreeNodeData : TreeNode
{
    public string Data {get; set;}
    public TreeNodeData(string data)
    {
        Data = data;
    }
}

如果没有任何附加库,在纯C代码中将其解析为表达式树并不难:

class TreeNode
{
    private enum ParseState
    {
        Operator,
        Expression
    }
    public static TreeNode ParseTree(string treeData)
    {
        Stack<TreeNode> parsed = new Stack<TreeNode>();
        StringBuilder nodeData = new StringBuilder();
        ParseState state = ParseState.Operator;

        for (int charIndex = 0; charIndex < treeData.Length; charIndex++)
        {
            switch (treeData[charIndex])
            {
                case '{':
                    nodeData.Clear();
                    state = ParseState.Expression;
                    break;
                case '\t':
                case ' ':
                case '\r':
                case '\n':
                case '|':
                    // ignore whitespace and |
                    break;
                case '}':
                    {
                        if (state == ParseState.Expression)
                        {
                            state = ParseState.Operator;
                            parsed.Push(new TreeNodeData(nodeData.ToString()));
                        }
                        else // Operator
                        {
                            TreeNodeOperators op = (TreeNodeOperators)(Enum.Parse(typeof(TreeNodeOperators), nodeData.ToString()));
                            TreeNodeExpression exp = new TreeNodeExpression();
                            exp.Operator = op;
                            exp.Right = parsed.Pop();
                            exp.Left = parsed.Pop();
                            parsed.Push(exp);
                        }
                        nodeData.Clear();
                    }
                    break;
                default:
                    nodeData.Append(treeData[charIndex]);
                    break;
            }
        }
        return parsed.Pop();
    }
}

enum TreeNodeOperators
{
    AND,
    OR
}

class TreeNodeExpression : TreeNode
{
    public TreeNodeOperators Operator {get; set;}
    public TreeNode Left { get; set; }
    public TreeNode Right { get; set; }
}

class TreeNodeData : TreeNode
{
    public string Data {get; set;}
    public TreeNodeData(string data)
    {
        Data = data;
    }
}

您可以使用regexp进行标记化,使用堆栈进行递归解析,如下所示

        internal class Node
        {
            public string Terminal { get; set; }
            public List<Node> Operands { get; set; }
        }

        internal static readonly Regex TokensPattern = new Regex(@"(?<ws>\s+)|{\s*(?<value>[^\s}]+)\s*}|(?<token>OR|AND|.)", RegexOptions.Compiled);

        static Node parseData(string str)
        {
            // init stack
            var stack = new Stack<Node>();
            stack.Push(new Node() { Operands = new List<Node>() });
            // define parser
            var parser = new Dictionary<string, Action<string>>();
            parser.Add("{", _ => stack.Push(new Node() { Operands = new List<Node>() }));
            parser.Add("}", _ => { var top = stack.Pop(); stack.Peek().Operands.Add(top); });
            parser.Add("|", _ => { });
            parser.Add("AND", _ => stack.Peek().Terminal = "AND");
            parser.Add("OR", _ => stack.Peek().Terminal = "OR");
            parser.Add("", value => stack.Peek().Operands.Add(new Node { Terminal = value }));
            // execute parser
            TokensPattern.Matches(str).Cast<Match>()
                .Where(m => string.IsNullOrEmpty(m.Groups["ws"].Value))
                .Count(m => { parser[m.Groups["token"].Value](m.Groups["value"].Value); return false; });
            // return top of the tree
            return stack.Peek().Operands[0];
        }

        static void Main(string[] args)
        {
            const string str = @"{
    {
        table1~col_b^table1~colc^= 
    }|
    {
        {
            table1~col_b^table2~col_b^=
        }|{
            table2~col_a^table3~cola^=
        }|{cccc}|AND
    }|OR
}";            
            // print tree function
            Action<int, Node> dump = null;
            dump = new Action<int, Node>((level, node) =>
            {
                Console.WriteLine("{0}{1}", new string(' ', level * 2), node.Terminal);
                if (node.Operands != null)
                    node.Operands.ForEach(el => dump(level + 1, el));
            });
            dump(0, parseData(str));
        }

您可以使用regexp进行标记化,使用堆栈进行递归解析,如下所示

        internal class Node
        {
            public string Terminal { get; set; }
            public List<Node> Operands { get; set; }
        }

        internal static readonly Regex TokensPattern = new Regex(@"(?<ws>\s+)|{\s*(?<value>[^\s}]+)\s*}|(?<token>OR|AND|.)", RegexOptions.Compiled);

        static Node parseData(string str)
        {
            // init stack
            var stack = new Stack<Node>();
            stack.Push(new Node() { Operands = new List<Node>() });
            // define parser
            var parser = new Dictionary<string, Action<string>>();
            parser.Add("{", _ => stack.Push(new Node() { Operands = new List<Node>() }));
            parser.Add("}", _ => { var top = stack.Pop(); stack.Peek().Operands.Add(top); });
            parser.Add("|", _ => { });
            parser.Add("AND", _ => stack.Peek().Terminal = "AND");
            parser.Add("OR", _ => stack.Peek().Terminal = "OR");
            parser.Add("", value => stack.Peek().Operands.Add(new Node { Terminal = value }));
            // execute parser
            TokensPattern.Matches(str).Cast<Match>()
                .Where(m => string.IsNullOrEmpty(m.Groups["ws"].Value))
                .Count(m => { parser[m.Groups["token"].Value](m.Groups["value"].Value); return false; });
            // return top of the tree
            return stack.Peek().Operands[0];
        }

        static void Main(string[] args)
        {
            const string str = @"{
    {
        table1~col_b^table1~colc^= 
    }|
    {
        {
            table1~col_b^table2~col_b^=
        }|{
            table2~col_a^table3~cola^=
        }|{cccc}|AND
    }|OR
}";            
            // print tree function
            Action<int, Node> dump = null;
            dump = new Action<int, Node>((level, node) =>
            {
                Console.WriteLine("{0}{1}", new string(' ', level * 2), node.Terminal);
                if (node.Operands != null)
                    node.Operands.ForEach(el => dump(level + 1, el));
            });
            dump(0, parseData(str));
        }

如果需要起点,请通过详细定义语法来检查REGEXStart。基于一个或多个示例字符串的解析器往往会导致很多不愉快。请参阅我的编辑。如果需要起点,请通过详细定义语法检查REGEXStart。基于一个或多个示例字符串的解析器往往会导致很多不愉快。请参阅我的编辑。您假设和/或是二进制运算符,这很可能不是事实。是的,此代码基于有限的可用信息进行了一些假设,但调整以适应不符合这些假设的情况并不困难。您假设和/或是二进制运算符,这很可能不是实际情况。是的,此代码确实基于有限的可用信息做出了一些假设,但调整以适应不符合这些假设的情况并不困难。