ANTLR:异构AST和虚拟令牌

ANTLR:异构AST和虚拟令牌,antlr,token,abstract-syntax-tree,heterogeneous,Antlr,Token,Abstract Syntax Tree,Heterogeneous,这是我的第一个问题:) 我想用ANTLR构建一个异构AST,用于简单语法。有不同的接口来表示AST节点,例如。GIInfiExp,IVariableDecl。ANTLR提供了CommonTree来保存源代码的所有信息(行号、字符位置等),我想以此作为AST interfacese IINFIXXP实现的基础 为了以CommonTree作为节点类型获得AST作为输出,我设置: options { language = Java; k = 1; output

这是我的第一个问题:)

我想用ANTLR构建一个异构AST,用于简单语法。有不同的接口来表示AST节点,例如。GIInfiExp,IVariableDecl。ANTLR提供了CommonTree来保存源代码的所有信息(行号、字符位置等),我想以此作为AST interfacese IINFIXXP实现的基础

为了以CommonTree作为节点类型获得AST作为输出,我设置:

options {
  language     = Java;
  k            = 1;
  output       = AST;
  ASTLabelType = CommonTree;
}
IInifxExp是:

package toylanguage;

public interface IInfixExp extends IExpression {
    public enum Operator {
        PLUS, MINUS, TIMES, DIVIDE;
    }

    public Operator getOperator();

    public IExpression getLeftHandSide();

    public IExpression getRightHandSide();
}
而InfixExp的实现是:

package toylanguage;

import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;

// IInitializable has only void initialize()
public class InfixExp extends CommonTree implements IInfixExp, IInitializable {
    private Operator operator;
    private IExpression leftHandSide;
    private IExpression rightHandSide;

    InfixExp(Token token) {
        super(token);
    }

    @Override
    public Operator getOperator() {
        return operator;
    }

    @Override
    public IExpression getLeftHandSide() {
        return leftHandSide;
    }

    @Override
    public IExpression getRightHandSide() {
        return rightHandSide;
    }

    // from IInitializable. get called from ToyTreeAdaptor.rulePostProcessing
    @Override
    public void initialize() {
        // term ((PLUS|MINUS) term)+
        // atom ((TIMES|DIIDE) atom)+

        // exact 2 children
        assert getChildCount() == 2;

        // left and right child are IExpressions
        assert getChild(0) instanceof IExpression
                && getChild(1) instanceof IExpression;

        // operator
        switch (token.getType()) {
        case ToyLanguageParser.PLUS:
            operator = Operator.PLUS;
            break;
        case ToyLanguageParser.MINUS:
            operator = Operator.MINUS;
            break;
        case ToyLanguageParser.TIMES:
            operator = Operator.TIMES;
            break;
        case ToyLanguageParser.DIVIDE:
            operator = Operator.DIVIDE;
            break;
        default:
            assert false;
        }

        // left and right operands
        leftHandSide = (IExpression) getChild(0);
        rightHandSide = (IExpression) getChild(1);
    }
}
相应的规则是:

exp // e.g. a+b
  : term ((PLUS<InfixExp>^|MINUS<InfixExp>^) term)*
  ;

term // e.g. a*b
  : atom ((TIMES<InfixExp>^|DIVIDE<InfixExp>^) atom)*
  ;
相应的规则是:

program // e.g. var a, b; a + b
  : varDecl* exp
    -> ^(PROGRAM<Program> varDecl* exp)
  ;
与InfixExp不同,调用的不是程序(令牌)构造函数,而是程序(int)

计划是:

package toylanguage;

import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTree;

class Program extends CommonTree implements IProgram, IInitializable {
    private final LinkedList<IVariableDecl> variableDeclarations = new LinkedList<IVariableDecl>();
    private IExpression expression = null;

    Program(Token token) {
        super(token);
    }

    public Program(int tokeType) {
        // What to do?
        super();
    }

    @Override
    public List<IVariableDecl> getVariableDeclarations() {
        // don't allow to change the list
        return Collections.unmodifiableList(variableDeclarations);
    }

    @Override
    public IExpression getExpression() {
        return expression;
    }

    @Override
    public void initialize() {
        // program: varDecl* exp;

        // at least one child
        assert getChildCount() > 0;

        // the last one is a IExpression
        assert getChild(getChildCount() - 1) instanceof IExpression;

        // iterate over varDecl*
        int i = 0;
        while (getChild(i) instanceof IVariableDecl) {
            variableDeclarations.add((IVariableDecl) getChild(i));
            i++;
        }

        // exp
        expression = (IExpression) getChild(i);
    }
}
因此,使用super()可以生成一个没有令牌的公共树。因此,CommonTreeAdaptor.rulePostProcessing将看到一个平面列表,而不是一个以令牌为根的树

我的TreeAdaptor看起来像:

package toylanguage;

import org.antlr.runtime.tree.CommonTreeAdaptor;

public class ToyTreeAdaptor extends CommonTreeAdaptor {
    public Object rulePostProcessing(Object root) {
        Object result = super.rulePostProcessing(root);

        // check if needs initialising
        if (result instanceof IInitializable) {
            IInitializable initializable = (IInitializable) result;
            initializable.initialize();
        }

        return result;
    };
}
要测试我使用的任何东西:

package toylanguage;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.TokenStream;
import org.antlr.runtime.tree.CommonTree;

import toylanguage.ToyLanguageParser.program_return;

public class Processor {
    public static void main(String[] args) {
        String input = "var a, b; a + b + 123"; // sample input

        ANTLRStringStream stream = new ANTLRStringStream(input);
        ToyLanguageLexer lexer = new ToyLanguageLexer(stream);
        TokenStream tokens = new CommonTokenStream(lexer);
        ToyLanguageParser parser = new ToyLanguageParser(tokens);
        ToyTreeAdaptor treeAdaptor = new ToyTreeAdaptor();
        parser.setTreeAdaptor(treeAdaptor);

        try {
            // test with: var a, b; a + b
            program_return program = parser.program();

            CommonTree root = program.tree;
            // prints 'a b (+ a b)'
            System.out.println(root.toStringTree());

            // get (+ a b), the third child of root
            CommonTree third = (CommonTree) root.getChild(2);

            // prints '(+ a b)'
            System.out.println(third.toStringTree());

            // prints 'true'
            System.out.println(third instanceof IInfixExp);

            // prints 'false'
            System.out.println(root instanceof IProgram);
        } catch (RecognitionException e) {
            e.printStackTrace();
        }
    }
}
为了完整起见,以下是完整的语法:

grammar ToyLanguage;

options {
  language     = Java;
  k            = 1;
  output       = AST;
  ASTLabelType = CommonTree;
}

tokens {
  PROGRAM;
}

@header {
  package toylanguage;
}

@lexer::header {
  package toylanguage;
}

program // e.g. var a, b; a + b
  : varDecl* exp
    -> ^(PROGRAM<Program> varDecl* exp)
  ;

varDecl // e.g. var a, b;
  : 'var'! ID<VariableDecl> (','! ID<VariableDecl>)* ';'!
  ;

exp // e.g. a+b
  : term ((PLUS<InfixExp>^|MINUS<InfixExp>^) term)*
  ;

term // e.g. a*b
  : atom ((TIMES<InfixExp>^|DIVIDE<InfixExp>^) atom)*
  ;

atom
  : INT<IntegerLiteralExp> // e.g. 123
  | ID<VariableExp>        // e.g. a
  | '(' exp ')' -> exp     // e.g. (a+b)
  ;

INT    : ('0'..'9')+ ;
ID     : ('a'..'z')+ ;
PLUS   : '+' ;
MINUS  : '-' ;
TIMES  : '*' ;
DIVIDE : '/' ;

WS : ('\t' | '\n' | '\r' | ' ')+ { $channel = HIDDEN; } ;
而不是一个简单的列表

(varDecl* exp) ?
(很抱歉出现这么多代码片段)


Ciao Vertex

尝试创建以下构造函数:

    public Program(int tokeType) {
        // What to do?
        super();
    }
public Program(int tokenType) {
    super(new CommonToken(tokenType, "PROGRAM"));
}
program // e.g. var a, b; a + b
  : varDecl* exp
    -> ^(PROGRAM<Program> varDecl* exp)
  ;
^(PROGRAM varDecl* exp)
(varDecl* exp) ?
public Program(int tokenType) {
    super(new CommonToken(tokenType, "PROGRAM"));
}