提高java解析器性能_Java_Parsing_Javacc

提高java解析器性能

java parsing

提高java解析器性能,java,parsing,javacc,Java,Parsing,Javacc,我有一个用.jjt文件编写的解析器。这是一个非常相似的。在我的例子中，唯一的变化是插入我自己的表达式求值方法。目前，解析1个表达式大约需要1毫秒。我需要改进这个解析器的性能。我使用VisualVM进行分析，发现在运行我的代码所需的44.5秒内，使用我的解析器将一行文件读入ArrayList，并计算93个表达式（这些表达式的参数值来自ArrayList中的文件行），在parseStream方法中大约花费了43秒。为了改进解析器，我还尝试将ERROR\u REPORTING选项设置为FALSE，但

我有一个用

.jjt

文件编写的解析器。这是一个非常相似的。在我的例子中，唯一的变化是插入我自己的表达式求值方法。目前，解析1个表达式大约需要1毫秒。我需要改进这个解析器的性能。我使用VisualVM进行分析，发现在运行我的代码所需的44.5秒内，使用我的解析器将一行文件读入

ArrayList

，并计算93个表达式（这些表达式的参数值来自

ArrayList

中的文件行），在

parseStream

方法中大约花费了43秒。为了改进解析器，我还尝试将

ERROR\u REPORTING

选项设置为

FALSE

，但没有帮助

编辑1：

这是parser.jjt文件。我在文件中分析了我的应用程序的1000行，占用大部分时间的方法是

Start（）

选项{
JAVA_UNICODE_ESCAPE=true；
多=真；
访客=真实；
访问者_EXCEPTION=“ParseException”；
节点\默认值\无效=真；
//NODE_PACKAGE=“org.nfunk.jep.parser”；
//BUILD_NODE_FILES=false；
静态=假；
//调试令牌管理器=true；
//DEBUG_PARSER=true；
//DEBUG_LOOKAHEAD=true；
}
/***************************************************************
分析器开始
***************************************************************/
解析器_BEGIN（解析器）
包org.nfunk.jep；
导入java.util.Vector；
导入org.nfunk.jep.function.*；
导入org.nfunk.jep.type.*；
公共类解析器{
私人杰普；
专用符号表；
私人操作集；
private int initialTokenManagerState=默认值；
公共节点parseStream（java.io.Reader stream，JEP-JEP_-in）
抛出解析异常{
重新启动（流，jep_in）；
//解析表达式，并返回
启用_跟踪（）；
Node=Start（）；
如果（node==null）抛出新的ParseException（“未输入表达式”）；
返回节点jjtGetChild（0）；
}
/** 
*使用给定的流重新启动解析。
*@自2.3.0 beta 1起
*/
公共void重启（java.io.Reader流，JEP-JEP_-in）
{
雷尼特（溪流）；
this.token_source.SwitchTo（initialTokenManagerState）；
jep=jep_in；
symTab=jep.getSymbolTable（）；
opSet=jep.getOperatorSet（）；
}
/**
*继续解析而不重新初始化流。
*允许解析器的renetrancy，以便
*可以解析“x=1；y=2；z=3；”。
*当遇到分号时，解析结束，字符串的其余部分未被解析。
*可以使用此方法从当前位置恢复解析。
*比如说
* 
*XJep j=新的XJep（）；
*Parser parse=j.getParse（）；
*StringReader sr=新的StringReader（“x=1；y=2；z=3；”）；
*重新启动（sr，j）；
*节点；
*试一试{
*而（（node=j.continueParse（））！=null）{
*j.println（节点）；
*}}catch（parsee异常）{}
* 
*/
公共节点continueParse（）引发ParseException
{
ASTStart节点=Start（）；
如果（node==null）返回null；
返回节点jjtGetChild（0）；
}
私有void addToErrorList（字符串errorStr）{
jep.errorList.addElement（errorStr）；
}
/**
*设置令牌管理器处于的初始状态。
*可用于更改x.x的解释方式，无论是作为单个
*标识符（默认）或as x x（标识符中无点）
*@param state要处于的状态。当前唯一的合法值是默认值和无点标识符
*/
public void setInitialTokenManagerState（int状态）
{
initialTokenManagerState=状态；
}
/**
*将所有转义序列翻译成角色。灵感来自Rob Millar的
*Web Sphinx项目的rcm.util.Str中的unescape（）方法。
*
*@param inputStr包含转义字符的字符串。
*@已替换所有转义序列的返回字符串。
*/
私有字符串替换转义（字符串inputStr）{
int len=inputStr.length（）；
int p=0；
int i；
字符串metachars=“tnrbf\\\”；
字符串字符=“\t\n\r\b\f\\\”；
StringBuffer输出=新的StringBuffer（）；
while（（i=inputStr.indexOf（'\\'，p））！=-1）{
append（inputStr.substring（p，i））；
如果（i+1==len）中断；
//查找元字符
字符元=输入字符（i+1）；
//查找metac的索引
int k=元字符索引of（metac）；
如果（k==-1）{
//未找到元字符，请保留已找到的序列。
//如果解析器
//由于字符串包含
//不应接受未知转义字符。
output.append（'\\'）；
输出追加（metac）；
}否则{
//它对应的真字符
output.append（chars.charAt（k））；
}
//跳过转义字符和元字符
p=i+2；
}
//将输入字符串的结尾添加到输出
如果（p
|
<#十进制文字：[“0”-“9”]（[“0”-“9”]）*>
|

|
<#指数：[“e”，“e”]（[“+”，“-”]）？（[“0”-“9”]）+>
|

}
/*标识符
版本2.2之前的字母
options {
    JAVA_UNICODE_ESCAPE = true;
    MULTI = true;
    VISITOR = true;
    VISITOR_EXCEPTION = "ParseException";
    NODE_DEFAULT_VOID = true;
//  NODE_PACKAGE = "org.nfunk.jep.parser";
//  BUILD_NODE_FILES=false;
    STATIC = false;
//  DEBUG_TOKEN_MANAGER = true;
//  DEBUG_PARSER = true;
//  DEBUG_LOOKAHEAD = true;
}


/***************************************************************
PARSER BEGIN
***************************************************************/

PARSER_BEGIN(Parser)
package org.nfunk.jep;

import java.util.Vector;
import org.nfunk.jep.function.*;
import org.nfunk.jep.type.*;

public class Parser {
    private JEP     jep;
    private SymbolTable symTab;
    private OperatorSet opSet;
    private int initialTokenManagerState = DEFAULT;

public Node parseStream(java.io.Reader stream, JEP jep_in)
                        throws ParseException {
    restart(stream,jep_in);
    // Parse the expression, and return the 
    enable_tracing();
    Node node = Start();
    if (node == null) throw new ParseException("No expression entered");
    return node.jjtGetChild(0);
}

/** 
 * Restart the parse with the given stream.
 * @since 2.3.0 beta 1
 */
public void restart(java.io.Reader stream, JEP jep_in)
{
    ReInit(stream);
    this.token_source.SwitchTo(initialTokenManagerState);
    jep = jep_in;
    symTab = jep.getSymbolTable();
    opSet = jep.getOperatorSet();
}
/**
 * Continue parsing without re-initilising stream.
 * Allows renetrancy of parser so that strings like
 * "x=1; y=2; z=3;" can be parsed.
 * When a semi colon is encountered parsing finishes leaving the rest of the string unparsed.
 * Parsing can be resumed from the current position by using this method.
 * For example
 * <pre>
 * XJep j = new XJep();
 * Parser parse = j.getParse();
 * StringReader sr = new StringReader("x=1; y=2; z=3;");
 * parse.restart(sr,j);
 * Node node;
 * try {
 * while((node = j.continueParse())!=null) {
 *    j.println(node);
 * } }catch(ParseException e) {}
 * </pre>
 */
public Node continueParse() throws ParseException
{
    ASTStart node = Start();
    if (node==null) return null;
    return node.jjtGetChild(0);
}

private void addToErrorList(String errorStr) {
    jep.errorList.addElement(errorStr);     
}

/**
 * Sets the initial state that the token manager is in.
 * Can be used to change how x.x is interpreted, either as a single
 * identifier (DEFAULT) or as x <DOT> x (NO_DOT_IN_IDENTIFIERS)
 * @param state the state to be in. Currently the only legal values are DEFAULT and NO_DOT_IN_IDENTIFIER
 */
public void setInitialTokenManagerState(int state)
{
    initialTokenManagerState = state;
}
/**
 * Translate all escape sequences to characters. Inspired by Rob Millar's
 * unescape() method in rcm.util.Str fron the Web Sphinx project.
 *
 * @param inputStr String containing escape characters.
 * @return String with all escape sequences replaced.
 */
private String replaceEscape(String inputStr) {
    int len = inputStr.length();
    int p = 0;
    int i;
    String metachars = "tnrbf\\\"'";
    String chars = "\t\n\r\b\f\\\"'";

    StringBuffer output = new StringBuffer();

    while ((i = inputStr.indexOf('\\', p)) != -1) {
        output.append(inputStr.substring(p, i));

        if (i+1 == len) break;

        // find metacharacter
        char metac = inputStr.charAt(i+1);

        // find the index of the metac
        int k = metachars.indexOf(metac);
        if (k == -1) {
            // didn't find the metachar, leave sequence as found.
            // This code should be unreachable if the parser
            // is functioning properly because strings containing
            // unknown escape characters should not be accepted.
            output.append('\\');
            output.append(metac);
        } else {
            // its corresponding true char
            output.append(chars.charAt(k));   
        }

        // skip over both escape character & metacharacter
        p = i + 2;
    }

    // add the end of the input string to the output
    if (p < len)
        output.append(inputStr.substring(p));

    return output.toString();
}
}

PARSER_END(Parser)

/***************************************************************
SKIP
***************************************************************/

<*> SKIP :
{
  " "
  | "\t"
  | "\n"
  | "\r"

  | <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
  | <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
}


/***************************************************************
TOKENS
***************************************************************/

<*> TOKEN : /* LITERALS */
{
    < INTEGER_LITERAL:
        <DECIMAL_LITERAL>
    >
|
    < #DECIMAL_LITERAL: ["0"-"9"] (["0"-"9"])* >
|
    < FLOATING_POINT_LITERAL:
        (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)?
        | "." (["0"-"9"])+ (<EXPONENT>)?
        | (["0"-"9"])+ <EXPONENT>
    >
|
    < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
|
    < STRING_LITERAL:
        "\""
        ( (~["\"","\\","\n","\r"])
        | ("\\" ["n","t","b","r","f","\\","'","\""] )
        )*
        "\""
    >
}

/* IDENTIFIERS 

    Letters before version 2.22
    < #LETTER: ["_","a"-"z","A"-"Z"] >

    In Ver 2.3.0.1 presence of . in an identifier is switchable.
    In the DEFAULT lexical state identifiers can contain a .
    In the NO_DOT_IN_IDENTIFIERS state identifiers cannot contain a .
    the state can be set by using
    Parser.setInitialTokenManagerState
*/

<DEFAULT> TOKEN:
{
    <INDENTIFIER1: <LETTER1>(<LETTER1>|<DIGIT1>|".")*>
    |
    < #LETTER1:
    [
        "\u0024",           // $
        "\u0041"-"\u005a",  // A - Z
        "\u005f",           // _
        "\u0061"-"\u007a",  // a - z
        "\u00c0"-"\u00d6",  // Upper case symbols of Latin-1 Supplement
        "\u00d8"-"\u00f6",  // Lower case symbols of Latin-1 Supplement
        "\u00f8"-"\u00ff",  // More lower case symbols of Latin-1 Supplement
        "\u0100"-"\u1fff",  // Many languages (including Greek)
        "\u3040"-"\u318f",  // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo
        "\u3300"-"\u337f",  // CJK Compatibility
        "\u3400"-"\u3d2d",  // CJK Unified Ideographs Extension A
        "\u4e00"-"\u9fff",  // CJK Unified Ideographs
        "\uf900"-"\ufaff"   // CJK Compatibility Ideographs
    ]
    >   
|
    < #DIGIT1: ["0"-"9"] >
}

<NO_DOT_IN_IDENTIFIERS> TOKEN:
{
    <INDENTIFIER2: <LETTER2>(<LETTER2>|<DIGIT2>)*>
    |
    < #LETTER2:
    [
        "\u0024",           // $
        "\u0041"-"\u005a",  // A - Z
        "\u005f",           // _
        "\u0061"-"\u007a",  // a - z
        "\u00c0"-"\u00d6",  // Upper case symbols of Latin-1 Supplement
        "\u00d8"-"\u00f6",  // Lower case symbols of Latin-1 Supplement
        "\u00f8"-"\u00ff",  // More lower case symbols of Latin-1 Supplement
        "\u0100"-"\u1fff",  // Many languages (including Greek)
        "\u3040"-"\u318f",  // Hiragana, Katakana, Bopomofo, Hangul Compatibility Jamo
        "\u3300"-"\u337f",  // CJK Compatibility
        "\u3400"-"\u3d2d",  // CJK Unified Ideographs Extension A
        "\u4e00"-"\u9fff",  // CJK Unified Ideographs
        "\uf900"-"\ufaff"   // CJK Compatibility Ideographs
    ]
    >   
    |
    < #DIGIT2: ["0"-"9"] >
}

/* OPERATORS */
<*> TOKEN:
{
    < ASSIGN:"="  > // rjm
|   < SEMI: ";" >   // rjm
|   < COMMA: "," >  // rjm
|   < GT:   ">"  >
|   < LT:   "<"  >
|   < EQ:   "==" >
|   < LE:   "<=" >
|   < GE:   ">=" >
|   < NE:   "!=" >
|   < AND:  "&&" >
|   < OR:   "||" >
|   < PLUS: "+"  >
|   < MINUS:"-"  >
|   < MUL:  "*"  >
|   < DOT:  "."  >  // rjm
|   < DIV:  "/"  >
|   < MOD:  "%"  >
|   < NOT:  "!"  >
|   < POWER:"^"  >
|   < CROSS:"^^" > // rjm
|   < LSQ:  "["  >  // rjm
|   < RSQ:  "]"  >  // rjm
|   < LRND: "("  >  // rjm
|   < RRND: ")"  >  // rjm
|   < COLON: ":" >  // rjm
}


/***************************************************************
GRAMMAR START
***************************************************************/

ASTStart Start() #Start :
{
}
{
    Expression() ( <EOF> | <SEMI> ) { return jjtThis; }
    |  ( <EOF> | <SEMI> )
    {
        // njf - The next line is commented out in 2.3.0 since
        //       two "No expression entered" errors are reported
        //       in EvaluatorVisitor and Console (one from here
        //       the other from ParseStream() )
        //       Decided to just return null, and handle the error
        //       in ParseStream.
        // addToErrorList("No expression entered");
        return null;
    }
}

// Expresions can be like
// x=3
// x=y=3 parsed as x=(y=3)

void Expression() : {}
{
    LOOKAHEAD(LValue() <ASSIGN>)        // need to prevent javacc warning with left recusion
    AssignExpression() // rjm changes from OrExpresion
    |
    RightExpression()
}

void AssignExpression() : {} // rjm addition
{

    ( LValue() <ASSIGN> Expression()
        {
            if (!jep.getAllowAssignment()) throw new ParseException(
            "Syntax Error (assignment not enabled)");

            jjtThis.setOperator(opSet.getAssign());
        }
      )
      #FunNode(2)
}

void RightExpression() :
{
}
{
    OrExpression()
}

void OrExpression() :
{
}
{
    AndExpression()
    (
      ( <OR> AndExpression()
        {
            jjtThis.setOperator(opSet.getOr());
        }
      ) #FunNode(2)
    )*
}


void AndExpression() :
{
}
{
    EqualExpression()
    (
      ( <AND> EqualExpression()
        {
            jjtThis.setOperator(opSet.getAnd());
        }
      ) #FunNode(2)
    )*
}



void EqualExpression() :
{
}
{
    RelationalExpression()
    (
      ( <NE> RelationalExpression()
        {
        jjtThis.setOperator(opSet.getNE());
        }
      ) #FunNode(2)
    |
      ( <EQ> RelationalExpression()
        {
          jjtThis.setOperator(opSet.getEQ());
        }
      ) #FunNode(2)
    )*
}



void RelationalExpression() :
{
}
{
  AdditiveExpression()
  (
    ( <LT> AdditiveExpression()
      {
        jjtThis.setOperator(opSet.getLT());
      }
    ) #FunNode(2)
    |
    ( <GT> AdditiveExpression()
      {
        jjtThis.setOperator(opSet.getGT());
      }
    ) #FunNode(2)
    |
    ( <LE> AdditiveExpression()
      {
        jjtThis.setOperator(opSet.getLE());
      }
    ) #FunNode(2)
    |
    ( <GE> AdditiveExpression()
      {
        jjtThis.setOperator(opSet.getGE());
      }
    ) #FunNode(2)
  )*
}


void AdditiveExpression() :
{
}
{
  MultiplicativeExpression()
  (
    ( <PLUS> MultiplicativeExpression()
      {
        jjtThis.setOperator(opSet.getAdd());
      }
    ) #FunNode(2)
    |
    ( <MINUS> MultiplicativeExpression()
      {
        jjtThis.setOperator(opSet.getSubtract());
      }
    ) #FunNode(2)
  )*
}


void MultiplicativeExpression() :
{
}
{
  UnaryExpression()
  (
    (       
      PowerExpression()
      {
        if (!jep.implicitMul) throw new ParseException(
            "Syntax Error (implicit multiplication not enabled)");

        jjtThis.setOperator(opSet.getMultiply());
      }
    ) #FunNode(2)
    |
    ( <MUL> UnaryExpression()
      {
        jjtThis.setOperator(opSet.getMultiply());
      }
    ) #FunNode(2)
    |
    ( <DOT> UnaryExpression()
      {
        jjtThis.setOperator(opSet.getDot());
      }
    ) #FunNode(2)
    |
    ( <CROSS> UnaryExpression()
      {
        jjtThis.setOperator(opSet.getCross());
      }
    ) #FunNode(2)
    |
    ( <DIV> UnaryExpression()
      {
        jjtThis.setOperator(opSet.getDivide());
      }
    ) #FunNode(2)
    |
    ( <MOD> UnaryExpression()
      {
        jjtThis.setOperator(opSet.getMod());
      }
    ) #FunNode(2)
  )*
}


void UnaryExpression() :
{
}
{
  ( <PLUS> UnaryExpression())
|
  ( <MINUS> UnaryExpression()
    {
      jjtThis.setOperator(opSet.getUMinus());
    }
  ) #FunNode(1)
|
  ( <NOT> UnaryExpression()
    {
      jjtThis.setOperator(opSet.getNot());
    }
  ) #FunNode(1)
|
  PowerExpression()
}


void PowerExpression() :
{
}
{
  UnaryExpressionNotPlusMinus()
  [
  ( <POWER> UnaryExpression()
    {
      jjtThis.setOperator(opSet.getPower());
    }
  ) #FunNode(2)
  ]
}


void UnaryExpressionNotPlusMinus() :
{
    String identString = "";
    int type;
}
{
    AnyConstant()
    |
    LOOKAHEAD(ArrayAccess())
    ArrayAccess()
    |
    LOOKAHEAD({ (getToken(1).kind == INDENTIFIER1 || getToken(1).kind == INDENTIFIER2) &&
                  jep.funTab.containsKey(getToken(1).image) })
    Function()
    |
    Variable()
    |
    <LRND> Expression() <RRND>
    |
//  LOOKAHEAD(<LSQ> Expression() <COLON>)
//  RangeExpression()
//  |
    ListExpression()
}

void ListExpression() #FunNode:
{
    jjtThis.setOperator(opSet.getList());
}
{
    <LSQ> Expression() ( <COMMA> Expression() )* <RSQ> 
}

/*
void RangeExpression()  #FunNode:
{
    jjtThis.setOperator(opSet.getRange());
}
{
    <LSQ> Expression() ( <COLON> Expression() )+ <RSQ>
}
*/

void LValue() :
{
}
{
    LOOKAHEAD(ArrayAccess())
    ArrayAccess()
    |   Variable()
}

void ArrayAccess() : 
{
}
{
    Variable() ListExpression()
    {
     jjtThis.setOperator(opSet.getElement());
    } #FunNode(2)

}
void Variable() :
{
    String identString = "";
}
{
    (identString = Identifier()
    {
        if (symTab.containsKey(identString)) {
            jjtThis.setVar(symTab.getVar(identString));
        } else {
            if (jep.allowUndeclared) {
                jjtThis.setVar(symTab.makeVarIfNeeded(identString));
            } else {
                addToErrorList("Unrecognized symbol \"" + identString +"\"");
            }
        }
    }
    ) #VarNode
}



void Function() :
{
    int reqArguments = 0;
    String identString = "";
}
{
    ( identString = Identifier()
        {
            if (jep.funTab.containsKey(identString)) {
                //Set number of required arguments
                reqArguments =
                    ((PostfixMathCommandI)jep.funTab.get(identString)).getNumberOfParameters();
                jjtThis.setFunction(identString,
                    (PostfixMathCommandI)jep.funTab.get(identString));
            } else {
                addToErrorList("!!! Unrecognized function \"" + identString +"\"");
            }
        }

        <LRND> ArgumentList(reqArguments, identString) <RRND>

    ) #FunNode
}

void ArgumentList(int reqArguments, String functionName) :
{
    int count = 0;
    String errorStr = "";
}
{
    [
    Expression() { count++; }
    (
        <COMMA>
        Expression() { count++; }
    )*
    ]
    {
        if(reqArguments == -1) {
            if(!((PostfixMathCommandI)jep.funTab.get(functionName)).checkNumberOfParameters(count))
            {
                errorStr = "Function \"" + functionName +"\" illegal number of arguments " + count;
                addToErrorList(errorStr);
            }
        }
        else if (reqArguments != count) {
            errorStr = "Function \"" + functionName +"\" requires "
                       + reqArguments + " parameter";
            if (reqArguments!=1) errorStr += "s";
            addToErrorList(errorStr);
        }
    }
}



String Identifier() :
{
  Token t;
}
{
    ( t = <INDENTIFIER1> |  t = <INDENTIFIER2> ) { return t.image; }
}


void AnyConstant() #Constant:
{
    Token t;
    Object value;
}
{
    t=<STRING_LITERAL> {
        // strip away double quotes at end of string
        String temp = (t.image).substring(1,t.image.length()-1);

        // replace escape characters
        temp = replaceEscape(temp);

        jjtThis.setValue(temp);
    }
    |
    value = RealConstant() {
        jjtThis.setValue(value);
//  }
//  |
//  value = Array() {
//      jjtThis.setValue(value);
    }
}

/*
Vector Array() :
{
    Object value;
    Vector result = new Vector();
}
{
    <LSQ>
    value = RealConstant()
    {
        result.addElement(value);
    }
    (
        <COMMA>
        value = RealConstant()
        {
            result.addElement(value);
        }
    )* 
    <RSQ>
    {
        return result;
    }
}
*/




Object RealConstant() :
{
  Token t;
  Object value;
}
{
    (t=<INTEGER_LITERAL>    |   t=<FLOATING_POINT_LITERAL>)
    {
        try {
            value = jep.getNumberFactory().createNumber(t.image);
        } catch (Exception e) {
            value = null;
            addToErrorList("Can't parse \"" + t.image + "\"");
        }

        return value;
    }
}

private static final String METACHARS = "tnrbf\\\"'";
private static final String CHARS = "\t\n\r\b\f\\\"'";

private String replaceEscape(String inputStr) {
    int i = inputStr.indexOf('\\');
    if (i == -1) { // 1. Heuristic strings without backslash
        return inputStr;
    }
    int len = inputStr.length();
    int p = 0;
    StringBuilder output = new StringBuilder(); // 2. Faster StringBuilder

    while (i != -1) {
        if (i + 1 == len) break;

        if (p < i) output.append(inputStr.substring(p, i));
        p = i + 1;
        char metac = inputStr.charAt(i+1);

        // find the index of the metac
        int k = METACHARS.indexOf(metac);
        if (k != -1) {
            // its corresponding true char
            metac = CHARS.charAt(k));   
            ++p; // Start copying after metachar
        }
        output.append(metac);
    }

    // add the end of the input string to the output
    if (p < len)
        output.append(inputStr.substring(p));
    return output.toString();
}