C# C“和ANTLR4：处理”；包括「；解析文件时的指令_C#_Parsing_Include_Antlr4_Antlr4cs

C# C“和ANTLR4：处理”；包括「；解析文件时的指令

c# parsing antlr4

C# C“和ANTLR4：处理”；包括「；解析文件时的指令,c#,parsing,include,antlr4,antlr4cs,C#,Parsing,Include,Antlr4,Antlr4cs,我所处的情况是，使用ANTLR，我试图解析包含对其中其他文件的引用的输入文件，就像C语言的#include“[插入文件名]” 一种建议的方法是：解析根文件，将所述引用保存为节点（因此，特定语法规则）访问搜索“参考”节点的树对于每个引用节点，解析引用的文件并用新生成的树替换节点递归地重复此过程，以处理多个级别的夹杂物此解决方案的问题在于引用的文件可能是完全部分的（请参见C函数体中的includes）。为了解析这些文件，我必须实现一个不同的解析器来处理碎片语法是否有任何有效/建议的方法（

我所处的情况是，使用ANTLR，我试图解析包含对其中其他文件的引用的输入文件，就像C语言的

#include“[插入文件名]”

一种建议的方法是：

解析根文件，将所述引用保存为节点（因此，特定语法规则）

访问搜索“参考”节点的树

对于每个引用节点，解析引用的文件并用新生成的树替换节点

递归地重复此过程，以处理多个级别的夹杂物

此解决方案的问题在于引用的文件可能是完全部分的（请参见C函数体中的includes）。为了解析这些文件，我必须实现一个不同的解析器来处理碎片语法

是否有任何有效/建议的方法（字面上）将新文件注入正在进行的解析过程中？

可以通过覆盖扫描仪的行为，特别是

NextToken（）

方法来解决此问题。这是必要的，因为EOF标记不能由ANTLR lexer语法（据我所知）和任何操作处理附加到识别EOF的lexer规则的代码被忽略（如下面的代码所示）。因此，有必要直接在scanner方法中实现此行为

假设我们有一个语法分析器

parser grammar INCParserGrammar;

@parser::members {
        public static Stack<ICharStream> m_nestedfiles = new Stack<ICharStream>();
}

options { tokenVocab = INCLexerGrammar; }

/*
 * Parser Rules
 */

compileUnit
    :   (include_directives | ANY )+ ENDOFFILE
    ;

include_directives : INCLUDEPREFIX FILE DQUOTE
                     ;

   lexer grammar INCLexerGrammar;

   @lexer::header {
    using System;
    using System.IO;
   }

   @lexer::members { 
    string file;
    ICharStream current;
    
   }


/*
 * Lexer Rules
 */
INCLUDEPREFIX : '#include'[ \t]+'"' {                                                 
                                      Mode(INCLexerGrammar.FILEMODE);
                                    };

// The following ruls has always less length matched string that the the rule above
ANY : ~[#]+ ;


ENDOFFILE : EOF { // Any actions in the this rule are ignored by the ANTLR lexer };


////////////////////////////////////////////////////////////////////////////////////////////////////////

mode FILEMODE;
FILE : [a-zA-Z][a-zA-Z0-9_]*'.'[a-zA-Z0-9_]+ {  file= Text;
                                                StreamReader s = new StreamReader(file);
                                                INCParserGrammar.m_nestedfiles.Push(_input);                                                
                                                current =new AntlrInputStream(s);           
                                            
                                             };
DQUOTE: '"'  {  
                this._input = current;
                Mode(INCLexerGrammar.DefaultMode);  };

被重写的NextToken（）方法主体将放在.g4.cs文件中，该文件用于扩展生成的scanner类使用“partial”关键字修饰生成的scanner类

生成与给定语法关联的部分Scanner类后，导航到 ANTLR运行时中给定的ANTLR4 Lexer类，并将所有原始代码复制到此新方法，在中间do-while块（在尝试catch块之后）添加下面的代码：

if (this._input.La(1) == -1)
{
    if ( mySpecialFileStack.Count == 0 )
        this._hitEOF = true;
    else
        this._input = mySpecialFileStack.Pop();
}

NextToken（）方法重写的整个主体是

public override IToken NextToken() {
            int marker = this._input != null ? this._input.Mark() : throw new InvalidOperationException("nextToken requires a non-null input stream.");
            label_3:
            try {
                while (!this._hitEOF) {
                    this._token = (IToken)null;
                    this._channel = 0;
                    this._tokenStartCharIndex = this._input.Index;
                    this._tokenStartCharPositionInLine = this.Interpreter.Column;
                    this._tokenStartLine = this.Interpreter.Line;
                    this._text = (string)null;
                    do {
                        this._type = 0;
                        int num;
                        try {
                            num = this.Interpreter.Match(this._input, this._mode);
                        } catch (LexerNoViableAltException ex) {
                            this.NotifyListeners(ex);
                            this.Recover(ex);
                            num = -3;
                        }

                        if (this._input.La(1) == -1) {
                            if (INCParserGrammar.m_nestedfiles.Count == 0 ) {
                                this._hitEOF = true;
                            }
                            else
                            {
                                this._input = INCParserGrammar.m_nestedfiles.Pop();
                            }
                        }

                        if (this._type == 0)
                            this._type = num;
                        if (this._type == -3)
                            goto label_3;
                    }
                    while (this._type == -2);
                    if (this._token == null)
                        this.Emit();
                    return this._token;
                }
                this.EmitEOF();
                return this._token;
            } finally {
                this._input.Release(marker);
            }
        }

现在，当您识别代码中应该解析的文件时，只需添加以下操作

FILE
    : [a-zA-Z][a-zA-Z0-9_]*'.'[a-zA-Z0-9_]+ {
        StreamReader s = new StreamReader(Text);
        mySpecialFileStack.Push(_input);                                                
        _input = new AntlrInputStream(s);                                               
    };
    
DQUOTE: '"'  {  this._input = current;
            Mode(INCLexerGrammar.DefaultMode);  };
//***Warning:***
// Be careful when your file inclusion is enclosed inside quotes or other symbols, or if  
// the filename-to-be-included is not the last token that defines an inclusion: `_input`  
// should only be switched AFTER the inclusion detection is completely found (i.e. after  
// the closing quote has been recognized).

最后，下面给出了主程序，很明显，根文件首先添加到ICharStream堆栈中

 static void Main(string[] args) {
            var a = new StreamReader("./root.txt");
            var antlrInput = new AntlrInputStream(a);
            INCParserGrammar.m_nestedfiles.Push(antlrInput);
            var lexer = new INCLexerGrammar(antlrInput);
            var tokens = new BufferedTokenStream(lexer);
            var parser = new INCParserGrammar(tokens);
            parser.compileUnit();
            
        }

阅读Grigoris先生的答案帮助我找到了解决问题的另一种可能的方法：

在试图弄清楚建议的解决方案是如何工作的时候，我偶然发现了

公共虚拟IToken emitteof（）

方法。如果Grigoris先生提供的代码放在这个函数中（只做了一些小改动），那么一切似乎都按预期工作

这使我有机会直接从lexer的

@members

块重写

emitteof（）

的功能，而无需创建一个全新的文件或了解当前解析器的

NextToken（）

方法的工作原理

词汇语法：

lexer grammar INCLexerGrammar;  
  
@lexer::header {  
    using System;  
    using System.IO;  
    using System.Collections.Generic;  
}  
  
@lexer::members {   
  
    private Stack<ICharStream> _nestedFiles = new Stack<ICharStream>();  
      
    public override IToken EmitEOF(){  
        if (_nestedFiles.Count == 0 ) {  
            return base.EmitEOF();  
        };  
        this._hitEOF = false;  
        this._input = _nestedFiles.Pop();  
        return this.NextToken();  
    }  
}  
  
/////////////////////////////////////////////////////////////////////////////////////  
// Default Mode /////////////////////////////////////////////////////////////////////  
/////////////////////////////////////////////////////////////////////////////////////  
  
// Skipped because we don't want to hide INCLUDEPREFIX's existance from parser  
INCLUDEPREFIX : '#include'[ \t]+'"' { Mode(INCLexerGrammar.FILEMODE); } -> skip;  

// This is the only valid token our Grammar accepts
ANY : ~[#]+ ;  
  
/////////////////////////////////////////////////////////////////////////////////////  
mode FILEMODE; //////////////////////////////////////////////////////////////////////  
/////////////////////////////////////////////////////////////////////////////////////  
  
// Skipped because we don't want to hide FILE's existance from parser  
FILE : [a-zA-Z][a-zA-Z0-9_]*'.'[a-zA-Z0-9_]+ {   
  
    // Create new StreamReader from the file mentioned  
    StreamReader s = new StreamReader(Text);  
      
    // Push the old stream to stack  
    _nestedFiles.Push(_input);  
      
    // This new stream will be popped and used right after, on DQUOTE.  
    _nestedFiles.Push(new AntlrInputStream(s));  

} -> skip;  
  
// Skipped because we don't want to hide DQUOTE's existance from parser  
DQUOTE: '"' { 

    // Injecting the newly generated Stream.  
    this._input = _nestedFiles.Pop();

    Mode(INCLexerGrammar.DefaultMode);

} -> skip;

parser grammar INCParserGrammar;  
  
options { tokenVocab = INCLexerGrammar; }  
  
// Our Grammar contains only ANY tokens. Include directives  
// and other Tokens exists only for helping lexer to  
// inject the contents of other files inside the current  
// scanning process.  
  
compileUnit  
  :  ANY+ EOF  
  ;

执行调用：

lexer grammar INCLexerGrammar;  
  
@lexer::header {  
    using System;  
    using System.IO;  
    using System.Collections.Generic;  
}  
  
@lexer::members {   
  
    private Stack<ICharStream> _nestedFiles = new Stack<ICharStream>();  
      
    public override IToken EmitEOF(){  
        if (_nestedFiles.Count == 0 ) {  
            return base.EmitEOF();  
        };  
        this._hitEOF = false;  
        this._input = _nestedFiles.Pop();  
        return this.NextToken();  
    }  
}  
  
/////////////////////////////////////////////////////////////////////////////////////  
// Default Mode /////////////////////////////////////////////////////////////////////  
/////////////////////////////////////////////////////////////////////////////////////  
  
// Skipped because we don't want to hide INCLUDEPREFIX's existance from parser  
INCLUDEPREFIX : '#include'[ \t]+'"' { Mode(INCLexerGrammar.FILEMODE); } -> skip;  

// This is the only valid token our Grammar accepts
ANY : ~[#]+ ;  
  
/////////////////////////////////////////////////////////////////////////////////////  
mode FILEMODE; //////////////////////////////////////////////////////////////////////  
/////////////////////////////////////////////////////////////////////////////////////  
  
// Skipped because we don't want to hide FILE's existance from parser  
FILE : [a-zA-Z][a-zA-Z0-9_]*'.'[a-zA-Z0-9_]+ {   
  
    // Create new StreamReader from the file mentioned  
    StreamReader s = new StreamReader(Text);  
      
    // Push the old stream to stack  
    _nestedFiles.Push(_input);  
      
    // This new stream will be popped and used right after, on DQUOTE.  
    _nestedFiles.Push(new AntlrInputStream(s));  

} -> skip;  
  
// Skipped because we don't want to hide DQUOTE's existance from parser  
DQUOTE: '"' { 

    // Injecting the newly generated Stream.  
    this._input = _nestedFiles.Pop();

    Mode(INCLexerGrammar.DefaultMode);

} -> skip;

parser grammar INCParserGrammar;  
  
options { tokenVocab = INCLexerGrammar; }  
  
// Our Grammar contains only ANY tokens. Include directives  
// and other Tokens exists only for helping lexer to  
// inject the contents of other files inside the current  
// scanning process.  
  
compileUnit  
  :  ANY+ EOF  
  ;

/[…]
var myRootFile=newstreamreader（“./root.txt”）；
var myAntlrInputStream=新的AntlrInputStream（myRootFile）；
var lexer=新语法（MyantlInputStream）；
var令牌=新的BufferedTokenStream（lexer）；
var parser=新的INCParserGrammar（令牌）；
parser.compileUnit（）；
// [...]