Compiler construction GNU Bison:语法错误,意外<;代币>;
我正在使用bison为一种玩具编程语言编写解析器,但我遇到了一个难题: 我的Compiler construction GNU Bison:语法错误,意外<;代币>;,compiler-construction,bison,yacc,context-free-grammar,Compiler Construction,Bison,Yacc,Context Free Grammar,我正在使用bison为一种玩具编程语言编写解析器,但我遇到了一个难题: 我的grammar.y文件如下: %{ #include <stdio.h> #include "util.h" #include "errormsg.h" #define YYDEBUG 1 int yylex(void); /* function prototype */ void yyerror(char *s) { EM_error(EM_tokPos, "%s", s); } %} %unio
grammar.y
文件如下:
%{
#include <stdio.h>
#include "util.h"
#include "errormsg.h"
#define YYDEBUG 1
int yylex(void); /* function prototype */
void yyerror(char *s)
{
EM_error(EM_tokPos, "%s", s);
}
%}
%union {
int pos;
int ival;
string sval;
}
%token <sval> TK_ID TK_STRING
%token <ival> TK_INT
%token <pos>
TK_COMMA TK_COLON TK_SEMICOLON TK_LPAREN TK_RPAREN TK_LBRACK TK_RBRACK
TK_LBRACE TK_RBRACE TK_DOT TK_ASSIGN
TK_ARRAY TK_IF TK_THEN TK_ELSE TK_WHILE TK_FOR TK_TO TK_DO TK_LET TK_IN
TK_END TK_OF TK_BREAK TK_NIL
TK_FUNCTION TK_VAR TK_TYPE
/* Precedence in Bison is weird: lower is higher. Take a look at the spec too. */
%left <pos> TK_OR
%left <pos> TK_AND
%nonassoc <pos> TK_EQ TK_NEQ TK_LT TK_LE TK_GT TK_GE
%left <pos> TK_PLUS TK_MINUS
%left <pos> TK_TIMES TK_DIVIDE
%left <pos> TK_UMINUS
%error-verbose
%start program
%%
/* According to the spec, Tiger programs are just an expression exp. */
program: exp
/* An expression can be many things; consult the spec for more info: Expressions. */
/* For the %prec rule, take a look at 5.4 Context-Dependent Precedence on bison manual */
exp:
lvalue
| TK_NIL
| exp exp_seq_aug
| TK_LPAREN TK_RPAREN
| TK_LET TK_IN TK_END
| TK_INT
| TK_STRING
| TK_MINUS exp %prec TK_UMINUS
| TK_ID TK_LPAREN TK_RPAREN
| TK_ID TK_LPAREN exp params TK_RPAREN
| exp TK_PLUS exp
| exp TK_MINUS exp
| exp TK_TIMES exp
| exp TK_DIVIDE exp
| exp TK_EQ exp
| exp TK_NEQ exp
| exp TK_GT exp
| exp TK_LT exp
| exp TK_GE exp
| exp TK_LE exp
| exp TK_AND exp
| exp TK_OR exp
| TK_ID TK_LBRACE TK_RBRACE
| TK_ID TK_LBRACE TK_ID TK_EQ exp record_exp TK_RBRACE
| TK_ID TK_LBRACK exp TK_RBRACK TK_OF exp
| lvalue TK_ASSIGN exp
| TK_IF exp TK_THEN exp TK_ELSE exp
| TK_IF exp TK_THEN exp
| TK_WHILE exp TK_DO exp
| TK_FOR TK_ID TK_ASSIGN exp TK_TO exp TK_DO exp
| TK_BREAK
| TK_LET decl_seq TK_IN exp_seq_aug TK_END
;
decl_seq:
/* empty */
| decl_seq decl
;
decl:
type_decl
| var_decl
| func_decl
;
var_decl:
TK_VAR TK_ID TK_ASSIGN exp
| TK_VAR TK_ID TK_COLON TK_ID TK_ASSIGN exp
;
func_decl:
TK_FUNCTION TK_ID TK_LPAREN type_fields TK_RPAREN TK_EQ exp
| TK_FUNCTION TK_ID TK_LPAREN type_fields TK_COLON TK_ID TK_EQ exp
;
type_decl:
TK_TYPE TK_ID TK_EQ type
;
type:
TK_TYPE
| TK_LBRACE type_fields TK_RBRACE
| TK_ARRAY TK_OF TK_ID
;
type_fields:
/* empty */
| TK_ID TK_COLON TK_ID type_fields
| TK_COMMA TK_ID TK_COLON TK_ID type_fields
;
lvalue:
TK_ID
| lvalue TK_DOT TK_ID
| lvalue TK_LBRACK exp TK_RBRACK
;
exp_seq:
/* epsilon */
| TK_SEMICOLON exp
| exp_seq TK_SEMICOLON exp
;
exp_seq_aug:
TK_LPAREN exp_seq TK_RPAREN
;
params:
/* epsilon */
| params TK_COMMA exp
;
record_exp:
/* epsilon */
| record_exp TK_COMMA TK_ID TK_EQ exp
;
这非常令人沮丧,因为解析器说它找到了一个更大或相等的令牌,而测试文件没有:
/* define a recursive function */
let
/* calculate n! */
function nfactor(n: int): int =
if n = 0
then 1
else n * nfactor(n-1)
in
nfactor(10)
end
我怎么可能调试这个
[编辑]:以下是我的flex
lexer的源代码,请参见:
%{
#include <string.h>
#include "util.h"
#include "tokens.h"
#include "errormsg.h"
int charPos = 1;
int
yywrap (void)
{
charPos = 1;
return 1;
}
// Adjust the token position in the string
// Mainly used for error checking
void
adjust (void)
{
EM_tokPos = charPos;
charPos += yyleng;
}
%}
/* Will be used for conditional activation of the comment rule. */
%x C_COMMENT
digits [0-9]+
letters [_a-zA-Z]+
%%
" " {adjust(); continue;}
\n {adjust(); EM_newline(); continue;}
\t {adjust(); continue;}
"/*" {adjust(); BEGIN(C_COMMENT);}
<C_COMMENT>[^*\n] {adjust();}
<C_COMMENT>"*/" {adjust(); BEGIN(INITIAL);}
\"(\\.|[^"])*\" {adjust(); yylval.sval = String(yytext); return STRING;}
"," {adjust(); return COMMA;}
";" {adjust(); return SEMICOLON;}
":" {adjust(); return COLON;}
"." {adjust(); return DOT;}
"+" {adjust(); return PLUS;}
"-" {adjust(); return MINUS;}
"*" {adjust(); return TIMES;}
"/" {adjust(); return DIVIDE;}
"=" {adjust(); return EQ;}
"<>" {adjust(); return NEQ;}
"<" {adjust(); return LT;}
"<=" {adjust(); return LE;}
">" {adjust(); return GT;}
">=" {adjust(); return GE;}
"&" {adjust(); return AND;}
"|" {adjust(); return OR;}
":=" {adjust(); return ASSIGN;}
"(" {adjust(); return LPAREN;}
")" {adjust(); return RPAREN;}
"{" {adjust(); return LBRACE;}
"}" {adjust(); return RBRACE;}
"[" {adjust(); return LBRACK;}
"]" {adjust(); return RBRACK;}
for {adjust(); return FOR;}
if {adjust(); return IF;}
then {adjust(); return THEN;}
else {adjust(); return ELSE;}
while {adjust(); return WHILE;}
to {adjust(); return TO;}
do {adjust(); return DO;}
let {adjust(); return LET;}
in {adjust(); return IN;}
end {adjust(); return END;}
of {adjust(); return OF;}
break {adjust(); return BREAK;}
nil {adjust(); return NIL;}
function {adjust(); return FUNCTION;}
var {adjust(); return VAR;}
type {adjust(); return TYPE;}
array {adjust(); return ARRAY;}
{digits} {adjust(); yylval.ival = atoi (yytext); return INT;}
{letters}[a-zA-Z0-9_]* {adjust(); yylval.sval = String (yytext); return ID;}
. {adjust(); EM_error (EM_tokPos,"illegal token");}
%{
#包括
#包括“util.h”
#包括“tokens.h”
#包括“errormsg.h”
int charPos=1;
int
yywrap(无效)
{
charPos=1;
返回1;
}
//调整字符串中的标记位置
//主要用于错误检查
无效的
调整(无效)
{
EM_tokPos=charPos;
charPos+=yyleng;
}
%}
/*将用于有条件激活注释规则*/
%x C_评论
数字[0-9]+
字母[_a-zA-Z]+
%%
“{adjust();continue;}
\n{adjust();EM_newline();continue;}
\t{adjust();continue;}
“/*”{adjust();BEGIN(C_COMMENT);}
[^*\n]{adjust();}
“*/”{adjust();BEGIN(INITIAL);}
\“(\\.\[^”])*\”{adjust();yylval.sval=String(yytext);返回字符串;}
“,”{adjust();返回逗号;}
";{adjust();返回分号;}
“:”{adjust();返回冒号;}
“{adjust();返回点;}
“+”{adjust();return PLUS;}
“-”{adjust();返回减号;}
“*”{adjust();返回时间;}
“/”{adjust();返回DIVIDE;}
“=”{adjust();返回EQ;}
“{adjust();return NEQ;}
“=”{adjust();返回GE;}
&“{adjust();return AND;}
“|”{adjust();return OR;}
“:=”{adjust();返回赋值;}
({adjust();return LPAREN;}
“{adjust();返回RPAREN;}”
{{adjust();返回LBRACE;}
“}”{adjust();return RBRACE;}
“[”{adjust();return LBRACK;}
“]”{adjust();return RBRACK;}
对于{adjust();返回for;}
if{adjust();返回if;}
然后{adjust();返回然后;}
else{adjust();return else;}
while{adjust();返回while;}
到{adjust();返回到;}
do{adjust();返回do;}
let{adjust();return let;}
在{adjust();return in;}
end{adjust();return end;}
of{adjust();返回;}
break{adjust();返回break;}
nil{adjust();返回nil;}
函数{adjust();返回函数;}
var{adjust();返回var;}
类型{adjust();返回类型;}
数组{adjust();返回数组;}
{digits}{adjust();yylval.ival=atoi(yytext);返回INT;}
{letters}[a-zA-Z0-9_]*{adjust();yylval.sval=String(yytext);返回ID;}
.{adjust();EM_错误(EM_tokPos,“非法令牌”);}
首先,您需要学习使用Bison调试选项。这将输出所有状态的转储,当然,调试它们需要大量耐心和时间,乍一看,您通常至少可以缩小导致问题的规则范围
至于你的问题,你的lexer没有返回bison定义的令牌
例如,在Bison中,你有%token TK_GE
,但是你的词法分析器返回GE
。Bison语法只知道TK_GE,这就是它所期望的。如果我记得的话,它会将token定义为ASCII序列之上的递增数字序列,你必须在词法分析器中使用这些值
除非您正在进行某种我在tokens.h中看不到的重新定义,否则您需要重写lexer来完成:
">=" {adjust(); return TK_GE;}
可能您在某个地方有
#define GE 42
,而bison正在生成一个带有#define TK#u GE 21
(示例值)的令牌文件.你的lexer在哪里?@codenheim刚刚更新了问题,使其也包含lexer代码。好的,我在回答。简而言之,你的lexer没有返回bison令牌指令定义的实际令牌。@codenheim怎么会这样?它(是bison)不是应该调用yylex()
并从那里获取令牌?是不是bison文件中的%token
指令对我耍了一个卑鄙的把戏?谢谢你的回答!我现在会检查它,完成后尽快给你回复。请记住,你的lexer和解析器必须共享令牌定义。这只是CHuge谢谢!尽管我仍然了解语法错误,这一次是因为我的语法,而不是一些不可理解的错误消息!你帮助我越过了砖墙!为了记录在案,我所要做的就是将lexer返回的令牌更改为前缀为TK
,并将令牌定义文件更改为我的自定义文件(tokens.h
)由野牛生产的(y.tab.h
)没问题。最后,标记只是整数。因此,您可以返回1、2、3、256、257、258等,Bison可以工作,但为了让您的词法分析器和解析器在标记的定义上达成一致,您可以使用符号定义,这是一个#define常量。将来,如果您要手动重写解析器,您仍然可以重用这些标记尽管您需要直接维护令牌文件,但仍需要使用oken定义。
How can I possibly debug this?
">=" {adjust(); return TK_GE;}