Java语法分析器
我的代币课Java语法分析器,java,regex,tokenize,Java,Regex,Tokenize,我的代币课 public class Token { public enum TokenType { RELATIONALOPERATOR("==|<>|<=|>=|>|<"), MULTIPLYINGOPERATOR("[*/]"), SIGNADDINGOP("[+-]"), LEFTPAREN( "\\("), RIGHTPAREN("\\)"), COMMA(","), PEROID("\\."), ASSIGNMENTOP("="), SEM
public class Token {
public enum TokenType {
RELATIONALOPERATOR("==|<>|<=|>=|>|<"), MULTIPLYINGOPERATOR("[*/]"), SIGNADDINGOP("[+-]"), LEFTPAREN(
"\\("), RIGHTPAREN("\\)"), COMMA(","), PEROID("\\."), ASSIGNMENTOP("="), SEMICOLON(";"), WHILE(
"while"), IF("if"), ELSE("else"), COMMENT("//"), PUBLIC("public"), PRIVATE("private"), PACKAGE(
"package"), IMPORT("import"), ENUM("enum"), CONSTANT(
"[0-9]*"), VARIABLE("[a-zA-Z][a-zA-Z0-9]*"), SKIP("[\\s+\\t]*"), INVALID(".*");
public final String pattern;
private TokenType(String pattern) {
this.pattern = pattern;
}
}
public TokenType type;
public String data;
public Token(TokenType type, String data) {
this.type = type;
this.data = data;
}
@Override
public String toString() {
return String.format("[ %s, %s ]", type.name(), this.data);
}
}
我希望它能打印出像[公共,公共],[类,类]之类的东西。。。以[TYPENAMEHERE,actualRegexMatchHere]的形式,直到它读取整个文件。多亏了欧文的建议,特殊角色现在被转义了,但问题依然存在 最初我的正则表达式定义为
常量(“[0-9]*”),变量(“[a-zA-Z][a-zA-Z0-9]*”),跳过([\\s+\\t]*”),无效(“*”)代码>
[0-9]*
将匹配任何字符,因为该字符为星号。将正则表达式中的所有*替换为+修复了该问题。不是答案,但您的解析器将需要不同的标记来表示=,您应该转义标记中的特殊字符,如lparen,否则它们将被解释为正则表达式特殊字符。
public static ArrayList<Token> lex(String input) {
// The tokens to return
ArrayList<Token> tokens = new ArrayList<Token>();
StringBuffer tokenPatternsBuffer = new StringBuffer();
for (TokenType tokenType : TokenType.values()) {
// format everything to match |(?<EXAMPLE> [0-9]*)
tokenPatternsBuffer.append(String.format("|(?<%s>%s)", tokenType.name(), tokenType.pattern));
}
Pattern tokenPatterns = Pattern.compile(tokenPatternsBuffer.substring(1));
//System.out.println(tokenPatternsBuffer.substring(1));
// Object that finds matches of pattern tokenPatterns
Matcher matcher = tokenPatterns.matcher(input);
while (matcher.find()) {
int i = 0;
System.out.println(matcher.group());
for (TokenType tk : TokenType.values()) {
// don't want to grab spaces
if (matcher.group(TokenType.SKIP.toString()) != null) {
continue;
}
// grab anything that isn't a space and add TokenType to the
// matcher group using .named() because the text matching
// exactly is vital
else if (matcher.group(tk.name()) != null) {
tokens.add(new Token(tk, matcher.group(tk.name())));
i++;
continue;
}
}
}
return tokens;
}
static String readFile(String path) throws IOException {
byte[] encoded = Files.readAllBytes(Paths.get(path));
return new String(encoded);
}
public static void main(String[] args) throws IOException {
String toDebugg = readFile("Test.java");
ArrayList<Token> myTokens = lex(toDebugg);
// System.out.println(toDebugg);
// for (Token tok : myTokens) {
// System.out.println(tok);
// }
}
package myLex;
public class Test {
public static void main(String[] args) {
}
}