flex/bison解析器使用分段错误进行编译

flex/bison解析器使用分段错误进行编译,bison,flex-lexer,Bison,Flex Lexer,我正在用flex/bison编写解析器(我可以用Python编写解析器,但我总是更喜欢经典的) 当我用以下代码编译代码时: gcc -lfl -ly chance.tab.c lex.yy.c -o chance 当我用一个文件运行程序时,我得到如下结果: Segmentation fault (core dumped) 以下文件供任何人参考: chance.y %{ #include <stdio.h> %} %union { char* str; } %t

我正在用
flex
/
bison
编写解析器(我可以用Python编写解析器,但我总是更喜欢经典的)

当我用以下代码编译代码时:

gcc -lfl -ly chance.tab.c lex.yy.c -o chance
当我用一个文件运行程序时,我得到如下结果:

Segmentation fault (core dumped)
以下文件供任何人参考:

chance.y

%{
    #include <stdio.h>
%}

%union {
    char* str;
}

%token ASSERT BREAK CATCH CLASS CONTINUE DEL EACH ELSE ELSEIF FINALLY FROM
%token FUNC IF LOAD PASS PRINT REPEAT RETURN RUN THROW TRY WHILE UNTIL
%token YIELD AND OR NOT KTRUE KFALSE NONE

%token MINUS EXCLAM PERCENT LAND LPAREN RPAREN STAR COMMA DOT SLASH COLON
%token SEMICOLON QUESTION AT LBRACKET BACKSLASH RBRACKET CIRCUMFLEX LBRACE
%token BAR RBRACE TILDE PLUS LTHAN EQUAL GTHAN INTDIV

%token ADDASS SUBASS MULASS DIVASS INTDASS MODASS ANDASS ORASS LTEQ EQUALS
%token GTEQ INCREMENT DECREMENT DBLSTAR

%token<str> NAME STRING INTEGER FLOAT
%token INDENT DEDENT NEWLINE

%type<str> exprs names args kwdspec dfltarg arg arglist exprlist name namelist
%type<str> funcargs parenexpr lstexpr eachspec optargs inheritance addop
%type<str> expr ifs elifs elif elses trys catchs catchx finally suite stmts
%type<str> stmt program

%start program

%%

exprs: expr                         { $$ = $1; }
|   exprs COMMA expr                { sprintf($$, "%s %s", $1, $3); }
;

names: name                         { $$ = $1; }
|   names COMMA name                { sprintf($$, "%s %s", $1, $3); }
;

args: arg                           { $$ = $1; }
|   args COMMA arg                  { sprintf($$, "%s %s", $1, $3); }
;

kwdspec:                            { $$ = "regular"; }
|   STAR                            { $$ = "list"; }
|   DBLSTAR                         { $$ = "keyword"; }
;

dfltarg:                            { $$ = "null"; }
|   EQUAL expr                      { $$ = $2; }
;

arg: kwdspec name dfltarg
        { sprintf($$, "(argument %s %s %s)", $1, $2, $3); } ;

arglist: args                       { sprintf($$, "[%s]", $1); } ;
exprlist: exprs                     { sprintf($$, "[%s]", $1); } ;
name: NAME                          { sprintf($$, "(name %s)", $1); } ;
namelist: names                     { sprintf($$, "[%s]", $1); } ;
funcargs: LPAREN arglist RPAREN     { $$ = $2 } ;
parenexpr: LPAREN exprlist RPAREN   { sprintf($$, "(tuple %s)", $2); } ;
lstexpr: LBRACKET exprlist RBRACKET { sprintf($$, "(list %s)", $2); } ;

eachspec: BAR namelist BAR          { sprintf($$, "(each-spec %s)", $2); } ;

optargs:                            { $$ = ""; }
|   funcargs                        { $$ = $1; }
;

inheritance:                        { $$ = ""; }
|   parenexpr                       { $$ = $1; }
;

addop:
    ADDASS                          { $$ = "add"; }
|   SUBASS                          { $$ = "sub"; }
|   MULASS                          { $$ = "mul"; }
|   DIVASS                          { $$ = "div"; }
|   INTDASS                         { $$ = "int-div"; }
|   MODASS                          { $$ = "mod"; }
|   ANDASS                          { $$ = "and"; }
|   ORASS                           { $$ = "or"; }
;

expr:       /* NotYetImplemented! */
    NUMBER                          { sprintf($$, "(number %s)", $1); }
|   TRUE                            { $$ = "(true)"; }
|   FALSE                           { $$ = "(false)"; }
|   NONE                            { $$ = "(none)"; }
|   STRING                          { sprintf($$, "(string %s)", $1); }
|   lstexpr                         { $$ = $1; }
;

ifs: IF expr suite                  { sprintf($$, "(if %s %s)", $2, $3); } ;

elifs:                              { $$ = ""; }
|   elifs elif                      { sprintf($$, "%s %s", $1, $2); }
;

elif: ELSEIF expr suite             { sprintf($$, "(else-if %s %s)", $2, $3); } ;

elses:                              { $$ = ""; }
|   ELSE suite                      { sprintf($$, "(else %s)", $2); }
;

trys: TRY suite                     { sprintf($$, "(try %s)", $2); } ;

catchs:                             { $$ = ""; }
| catchs catchx                     { sprintf($$, "%s %s", $1, $2); }
;

catchx: CATCH expr suite            { sprintf($$, "(catch %s %s)", $2, $3); } ;

finally: FINALLY suite              { sprintf($$, "(finally %s)", $2); } ;

suite: COLON stmts SEMICOLON        { sprintf($$, "(block [%s])", $2); } ;

stmts:                              { $$ = ""; }
|   stmts NEWLINE stmt              { sprintf($$, "%s %s", $1, $3); }
;

stmt:
    ASSERT expr                     { printf("(assert %s)", $2); }
|   BREAK                           { printf("(break)"); }
|   CATCH expr suite                { printf("(catch %s %s)", $2, $3); }
|   CLASS name inheritance suite    { printf("(class %s %s %s)", $2, $3, $4); }
|   CONTINUE                        { printf("(continue)"); }
|   DEL expr                        { printf("(del %s)", $2); }
|   expr DOT EACH eachspec suite    { printf("(each %s %s %s)", $1, $4, $5); }
|   FROM name LOAD namelist         { printf("(from %s %s)", $2, $4); }
|   FUNC name optargs suite         { printf("(func %s %s %s)", $2, $3, $4); }
|   ifs elifs elses                 { printf("(if-block %s %s %s)", $1, $2, $3); }
|   LOAD namelist                   { printf("(load %s)", $2); }
|   PASS                            { printf("(pass)"); }
|   PRINT expr                      { printf("(print %s)", $2); }
|   REPEAT expr suite               { printf("(repeat %s %s)", $2, $3); }
|   RUN expr                        { printf("(run %s)", $2); }
|   THROW expr                      { printf("(throw %s)", $2); }
|   trys catchs elses finally       { printf("(try-block %s %s %s %s)", $1, $2, $3, $4); }
|   WHILE expr suite                { printf("(while %s %s)", $2, $3); }
|   UNTIL expr suite                { printf("(until %s %s)", $2, $3); }
|   YIELD expr                      { printf("(yield %s)", $2); }
|   RETURN expr                     { printf("(return %s)", $2); }
|   expr addop expr                 { printf("(%s-assign %s %s)", $2, $1, $3); }
|   expr INCREMENT                  { printf("(increment %s)", $1); }
|   expr DECREMENT                  { printf("(decrement %s)", $1); }
|   expr                            { printf("(expr-stmt %s)", $1); }
;

program: stmts                      { printf("(program [%s])", $1); } ;
提前谢谢~~机会


EDIT:输入新的和改进的代码(不幸的是,仍然会产生segfault)。

您的lexer从不设置
yylval
,因此当解析器读取令牌的值时,它会得到随机垃圾。例如,在您的规则中:

expr: NUMBER { sprintf($$, "(number %s)", $1); }
$1
引用了
NUMBER
中的令牌值,因此将是随机垃圾。此外,
$
是规则的输出,因此传递给
sprintf
的值也将是随机垃圾(因为您没有先将其设置为某个值)

编辑

一个“简单”的解决方案是大量使用strdup/asprintf为字符串分配内存。例如,在您的.l文件中,您会有如下内容:

[+-]?[0-9]+(\.[0-9]+)?([Ee][+-]?[0-9]+)? { yylval = strdup(yytext); return NUMBER; }
那么您的expr规则将是:

expr: NUMBER { asprintf(&$$, "(number %s)", $1); free($1); }

当然,问题是,找出所有空闲的内存应该放在哪里以避免内存泄漏是很困难的。

我在网上读过关于flex的文章,看到了这个小片段:
yylval=strdup(yytext)
。这就是我应该使用的吗?@ChanceDeGuzman:是的,这对在lexer中设置yylval很有用。您仍然需要在解析器中设置
$$
,因此我必须使用
strdup
asprintf
?我对
asprintf
free
不太熟悉。我不知道classic
yacc
是否有它,但GNU
bison
有一个
%destructor
指令,可以让内存管理方面的工作变得更轻松。
[+-]?[0-9]+(\.[0-9]+)?([Ee][+-]?[0-9]+)? { yylval = strdup(yytext); return NUMBER; }
expr: NUMBER { asprintf(&$$, "(number %s)", $1); free($1); }