使用Java从文件中读取算术表达式中的自然数

使用Java从文件中读取算术表达式中的自然数,java,analyzer,lexical,Java,Analyzer,Lexical,我正在用java构建一个词法分析器。这就是我现在拥有的: import java.io.*; enum TokenType{ NUM,SOMA, MULT,APar,FPar, EOF} class Token{ char lexema; TokenType token; Token (char l, TokenType t) { lexema=l;token = t;} } class AnaliseLexica { BufferedReader arq

我正在用java构建一个词法分析器。这就是我现在拥有的:

import java.io.*;

enum TokenType{ NUM,SOMA, MULT,APar,FPar, EOF}

class Token{
  char lexema;
  TokenType token;

  Token (char l, TokenType t)
  { lexema=l;token = t;}    

}  

class AnaliseLexica {

BufferedReader arquivo;

AnaliseLexica(String a) throws Exception
{

    this.arquivo = new BufferedReader(new FileReader(a));

}

Token getNextToken() throws Exception
{   
    Token token;
    int eof = -1;
    char currchar;
    int currchar1;

        do{
            currchar1 =  arquivo.read();
            currchar = (char) currchar1;
        } while (currchar == '\n' || currchar == ' ' || currchar =='\t' || currchar == '\r');

        if(currchar1 != eof && currchar1 !=10)
        {


            if (currchar >= '0' && currchar <= '9')
                return (new Token (currchar, TokenType.NUM));
            else
                switch (currchar){
                    case '(':
                        return (new Token (currchar,TokenType.APar));
                    case ')':
                        return (new Token (currchar,TokenType.FPar));
                    case '+':
                        return (new Token (currchar,TokenType.SOMA));
                    case '*':
                        return (new Token (currchar,TokenType.MULT));

                    default: throw (new Exception("Caractere inválido: " + ((int) currchar)));
                }
        }

        arquivo.close();

    return (new Token(currchar,TokenType.EOF));

}

如何从文件中读取自然数并继续读取算术运算符?

由于空格是标记的有效分隔符,因此可以简化代码。默认情况下,Scanner类将按空格分隔读取的值。你只需要逐一阅读。当扫描器没有更多的数据要读取时,我们关闭它并返回一个EOF令牌

import java.io.FileReader;
import java.io.IOException;
import java.util.Scanner;

public class AnalisadorLexico {

    public enum TokenType {
        NUM,
        SOMA,
        MULT,
        APar,
        FPar,
        EOF
    }

    public class Token {

        String lexema;
        TokenType token;

        Token( String l, TokenType t ) {
            lexema = l;
            token = t;
        }

        Token( char l, TokenType t ) {
            lexema = String.valueOf( l );
            token = t;
        }

        @Override
        public String toString() {
            return lexema + " (" + token + ")";
        }

    }

    private Scanner fileReader;
    private boolean scannerClosed;

    public AnalisadorLexico( String filePath ) throws IOException {
        fileReader = new Scanner( new FileReader( filePath ) );
    }

    public Token getNextToken() throws IOException {

        if ( !scannerClosed && fileReader.hasNext() ) {

            String currentData = fileReader.next();

            try {
                Integer.parseInt( currentData );
                return new Token( currentData, TokenType.NUM );
            } catch ( NumberFormatException exc ) {
            }

            switch ( currentData ) {
                case "(":
                    return new Token( currentData,TokenType.APar );
                case ")":
                    return new Token( currentData,TokenType.FPar );
                case "+":
                    return new Token( currentData,TokenType.SOMA );
                case "*":
                    return new Token( currentData,TokenType.MULT );
            }

        } else {
            scannerClosed = true;
            fileReader.close();
            return new Token( "", TokenType.EOF );
        }

        return null;

    }

    public static void main( String[] args ) throws IOException {

        AnalisadorLexico al = new AnalisadorLexico( "testAL.txt" );
        Token t = null;

        while ( ( t = al.getNextToken() ).token != TokenType.EOF ) {
            System.out.println( t );
        }

        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );

    }

}
如果不能使用类扫描程序,可以继续使用BufferedReader,标记其数据:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class AnalisadorLexico2 {

    public enum TokenType {
        NUM,
        SOMA,
        MULT,
        APar,
        FPar,
        EOF
    }

    public class Token {

        String lexema;
        TokenType token;

        Token( String l, TokenType t ) {
            lexema = l;
            token = t;
        }

        Token( char l, TokenType t ) {
            lexema = String.valueOf( l );
            token = t;
        }

        @Override
        public String toString() {
            return lexema + " (" + token + ")";
        }

    }

    private BufferedReader fileReader;
    private boolean fileReaderClosed;

    public AnalisadorLexico2( String filePath ) throws IOException {
        fileReader = new BufferedReader( new FileReader( filePath ) );
    }

    public Token getNextToken() throws IOException {

        String currentData = nextBufferedReaderToken();

        if ( currentData != null ) {

            try {
                Integer.parseInt( currentData );
                return new Token( currentData, TokenType.NUM );
            } catch ( NumberFormatException exc ) {
            }

            switch ( currentData ) {
                case "(":
                    return new Token( currentData,TokenType.APar );
                case ")":
                    return new Token( currentData,TokenType.FPar );
                case "+":
                    return new Token( currentData,TokenType.SOMA );
                case "*":
                    return new Token( currentData,TokenType.MULT );
            }

        } else {
            if ( !fileReaderClosed ) {
                fileReaderClosed = true;
                fileReader.close();
            }
            return new Token( "", TokenType.EOF );
        }

        return null;

    }

    public String nextBufferedReaderToken() throws IOException {

        boolean started = false;
        String data = null;

        while ( !fileReaderClosed ) {

            int d = fileReader.read();
            char c = (char) d;

            if ( d != -1 ) {

                if ( c == '\n' || c == ' ' || c == '\t' || c == '\r' ) {
                    if ( !started ) {
                        // discard...
                    } else {
                        break;
                    }
                } else {
                    if ( !started ) {
                        data = "";
                        started = true;
                    }
                    data += c;
                }

            } else {
                break;
            }

        }

        return data;

    }

    public static void main( String[] args ) throws IOException {

        AnalisadorLexico2 al = new AnalisadorLexico2( "testAL.txt" );
        Token t = null;

        while ( ( t = al.getNextToken() ).token != TokenType.EOF ) {
            System.out.println( t );
        }

        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );

    }

}
我的
testAL.txt
文件内容如下:

    1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
    1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) ) 

因为空格是令牌的有效分隔符,所以可以简化代码。默认情况下,Scanner类将按空格分隔读取的值。你只需要逐一阅读。当扫描器没有更多的数据要读取时,我们关闭它并返回一个EOF令牌

import java.io.FileReader;
import java.io.IOException;
import java.util.Scanner;

public class AnalisadorLexico {

    public enum TokenType {
        NUM,
        SOMA,
        MULT,
        APar,
        FPar,
        EOF
    }

    public class Token {

        String lexema;
        TokenType token;

        Token( String l, TokenType t ) {
            lexema = l;
            token = t;
        }

        Token( char l, TokenType t ) {
            lexema = String.valueOf( l );
            token = t;
        }

        @Override
        public String toString() {
            return lexema + " (" + token + ")";
        }

    }

    private Scanner fileReader;
    private boolean scannerClosed;

    public AnalisadorLexico( String filePath ) throws IOException {
        fileReader = new Scanner( new FileReader( filePath ) );
    }

    public Token getNextToken() throws IOException {

        if ( !scannerClosed && fileReader.hasNext() ) {

            String currentData = fileReader.next();

            try {
                Integer.parseInt( currentData );
                return new Token( currentData, TokenType.NUM );
            } catch ( NumberFormatException exc ) {
            }

            switch ( currentData ) {
                case "(":
                    return new Token( currentData,TokenType.APar );
                case ")":
                    return new Token( currentData,TokenType.FPar );
                case "+":
                    return new Token( currentData,TokenType.SOMA );
                case "*":
                    return new Token( currentData,TokenType.MULT );
            }

        } else {
            scannerClosed = true;
            fileReader.close();
            return new Token( "", TokenType.EOF );
        }

        return null;

    }

    public static void main( String[] args ) throws IOException {

        AnalisadorLexico al = new AnalisadorLexico( "testAL.txt" );
        Token t = null;

        while ( ( t = al.getNextToken() ).token != TokenType.EOF ) {
            System.out.println( t );
        }

        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );

    }

}
如果不能使用类扫描程序,可以继续使用BufferedReader,标记其数据:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class AnalisadorLexico2 {

    public enum TokenType {
        NUM,
        SOMA,
        MULT,
        APar,
        FPar,
        EOF
    }

    public class Token {

        String lexema;
        TokenType token;

        Token( String l, TokenType t ) {
            lexema = l;
            token = t;
        }

        Token( char l, TokenType t ) {
            lexema = String.valueOf( l );
            token = t;
        }

        @Override
        public String toString() {
            return lexema + " (" + token + ")";
        }

    }

    private BufferedReader fileReader;
    private boolean fileReaderClosed;

    public AnalisadorLexico2( String filePath ) throws IOException {
        fileReader = new BufferedReader( new FileReader( filePath ) );
    }

    public Token getNextToken() throws IOException {

        String currentData = nextBufferedReaderToken();

        if ( currentData != null ) {

            try {
                Integer.parseInt( currentData );
                return new Token( currentData, TokenType.NUM );
            } catch ( NumberFormatException exc ) {
            }

            switch ( currentData ) {
                case "(":
                    return new Token( currentData,TokenType.APar );
                case ")":
                    return new Token( currentData,TokenType.FPar );
                case "+":
                    return new Token( currentData,TokenType.SOMA );
                case "*":
                    return new Token( currentData,TokenType.MULT );
            }

        } else {
            if ( !fileReaderClosed ) {
                fileReaderClosed = true;
                fileReader.close();
            }
            return new Token( "", TokenType.EOF );
        }

        return null;

    }

    public String nextBufferedReaderToken() throws IOException {

        boolean started = false;
        String data = null;

        while ( !fileReaderClosed ) {

            int d = fileReader.read();
            char c = (char) d;

            if ( d != -1 ) {

                if ( c == '\n' || c == ' ' || c == '\t' || c == '\r' ) {
                    if ( !started ) {
                        // discard...
                    } else {
                        break;
                    }
                } else {
                    if ( !started ) {
                        data = "";
                        started = true;
                    }
                    data += c;
                }

            } else {
                break;
            }

        }

        return data;

    }

    public static void main( String[] args ) throws IOException {

        AnalisadorLexico2 al = new AnalisadorLexico2( "testAL.txt" );
        Token t = null;

        while ( ( t = al.getNextToken() ).token != TokenType.EOF ) {
            System.out.println( t );
        }

        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );
        System.out.println( al.getNextToken() );

    }

}
我的
testAL.txt
文件内容如下:

    1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
    1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) )
1234 + 5 * 65 + ( 44 * 55555 ) * 444 + ( 2354 * ( 34 + 44 ) ) 

跟踪上一个字符可以知道如何处理当前字符,或者跟踪您正在处理的令牌类型。跟踪上一个字符可以知道如何处理当前字符,或者跟踪您正在处理的令牌类型。