Parsing 如何减少解析器堆栈或'；取消移位'；当前令牌取决于以下内容？_Parsing_Compiler Construction_Grammar_Bison

Parsing 如何减少解析器堆栈或'；取消移位'；当前令牌取决于以下内容？

parsing compiler-construction bison

Parsing 如何减少解析器堆栈或'；取消移位'；当前令牌取决于以下内容？,parsing,compiler-construction,grammar,bison,Parsing,Compiler Construction,Grammar,Bison,鉴于以下语言描述为：正式：（标识符操作员标识符+* 在简单英语中，零个或多个操作被写成标识符（左值），然后是运算符，然后是一个或多个标识符（右值）给定任意运算符@，该语言中的操作序列示例如下： A @ B C X @ Y 空白并不重要，也可以更清楚地写为： A @ B C X @ Y 您将如何使用类似于yacc的LALR解析器来解析它？我到目前为止所做的尝试我知道如何解析显式分隔的操作，比如A@bc；X@Y但我想知道解析上述输入是否可行，以及如何进行。以下是使用Flex/Bis

鉴于以下语言描述为：

正式：
```
（标识符操作员标识符+*
```
在简单英语中，零个或多个操作被写成标识符（左值），然后是运算符，然后是一个或多个标识符（右值）

给定任意运算符

，该语言中的操作序列示例如下：

A @ B C X @ Y

空白并不重要，也可以更清楚地写为：

A @ B C
X @ Y

您将如何使用类似于yacc的LALR解析器来解析它？

我到目前为止所做的尝试

我知道如何解析显式分隔的操作，比如

A@bc；X@Y

但我想知道解析上述输入是否可行，以及如何进行。以下是使用Flex/Bison的（非功能性）最小示例

l.法：

%{
#include "y.tab.h"
%}

%option noyywrap
%option yylineno

%%
[a-zA-Z][a-zA-Z0-9_]*   { return ID; }
@                       { return OP; }
[ \t\r\n]+              ; /* ignore whitespace */
.                       { return ERROR; } /* any other character causes parse error */
%%

%{
#include "y.tab.h"
%}

%option noyywrap
%option yylineno

%%
[a-zA-Z][a-zA-Z0-9_]*   { yylval.a = strdup(yytext); return ID; }
@                       { return OP; }
[ \t\r\n]+              ; /* ignore whitespace */
.                       { return ERROR; } /* any other character causes parse error */
%%

yacc.y：

%{
#include <stdio.h>

extern int yylineno;
void yyerror(const char *str);
int yylex();
%}

%define parse.lac full
%define parse.error verbose

%token ID OP ERROR
%left OP

%start opdefs

%%
opright:
       | opright ID
       ;

opdef: ID OP ID opright
     ;

opdefs:
      | opdefs opdef
      ;
%%

void yyerror(const char *str) {
    fprintf(stderr, "error@%d: %s\n", yylineno, str);
}

int main(int argc, char *argv[]) {
    yyparse();
}

%{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>

extern int yylineno;
void yyerror(const char *str);
int yylex();


#define STR_OP    " @ "
#define STR_SPACE " "

char *concat3(const char *, const char *, const char *);

struct oplist {
    char **ops;
    size_t capacity, count;
} my_oplist = { NULL, 0, 0 };

int oplist_append(struct oplist *, char *);
void oplist_clear(struct oplist *);
void oplist_dump(struct oplist *);
%}

%union {
    char *a;
}

%define parse.lac full
%define parse.error verbose

%token ID OP END ERROR

%start input

%%

opbase: ID OP ID {
         char *s = concat3($<a>1, STR_OP, $<a>3);
         free($<a>1);
         free($<a>3);
         assert(s && "opbase: allocation failed");

         $<a>$ = s;
     }
     ;

ops: opbase {
       $<a>$ = $<a>1;
   }
   | ops opbase {
       int r = oplist_append(&my_oplist, $<a>1);
       assert(r == 0 && "ops: allocation failed");

       $<a>$ = $<a>2;
   }
   | ops ID {
       char *s = concat3($<a>1, STR_SPACE, $<a>2);
       free($<a>1);
       free($<a>2);
       assert(s && "ops: allocation failed");

       $<a>$ = s;
   }
   ;

input: ops {
         int r = oplist_append(&my_oplist, $<a>1);
         assert(r == 0 && "input: allocation failed");
     }
     ;       
%%

char *concat3(const char *s1, const char *s2, const char *s3) {
    size_t len = strlen(s1) + strlen(s2) + strlen(s3);
    char *s = malloc(len + 1);
    if (!s)
        goto concat3__end;

    sprintf(s, "%s%s%s", s1, s2, s3);

concat3__end:
    return s;
}


int oplist_append(struct oplist *oplist, char *op) {
    if (oplist->count == oplist->capacity) {  
        char **ops = realloc(oplist->ops, (oplist->capacity + 32) * sizeof(char *));
        if (!ops)
            return 1;

        oplist->ops = ops;
        oplist->capacity += 32;
    } 

    oplist->ops[oplist->count++] = op;
    return 0;
}

void oplist_clear(struct oplist *oplist) {
    if (oplist->count > 0) {
        for (size_t i = 0; i < oplist->count; ++i)
            free(oplist->ops[i]);
        oplist->count = 0;
    }

    if (oplist->capacity > 0) {
        free(oplist->ops);
        oplist->capacity = 0;
    }
}

void oplist_dump(struct oplist *oplist) {
    for (size_t i = 0; i < oplist->count; ++i)
        printf("%2zu: '%s'\n", i, oplist->ops[i]);
}


void yyerror(const char *str) {
    fprintf(stderr, "error@%d: %s\n", yylineno, str);
}

int main(int argc, char *argv[]) {
    yyparse();

    oplist_dump(&my_oplist);
    oplist_clear(&my_oplist);
}

上述内容始终被解析为：

reduce（A@B reduce（C X））@Y

我有种感觉，我必须以某种方式对前瞻令牌设置一个条件，即如果它是运算符，则不应移动最后一个标识符，并且应减少当前堆栈：

A @ B C X @ Y
        ^ *    // ^: current, *: lookahead
-> reduce 'A @ B C' !
-> shift 'X' !

我尝试了各种操作员优先安排，但无法使其工作

我愿意接受一个不适用于Bison的解决方案。

该语言的一个简单语法是LALR（2），而Bison不生成LALR（2）解析器

任何LALR（2）语法都可以机械地修改，以生成具有兼容解析树的LALR（1）语法，但我不知道有任何自动工具可以做到这一点

手动执行转换是可能的，但很烦人，但请注意，您需要调整操作以恢复正确的解析树：

%{
  typedef struct IdList  { char* id; struct IdList* next; };
  typedef struct Def     { char* lhs; IdList* rhs; };
  typedef struct DefList { Def* def; struct DefList* next; };
%}
union {
  Def*     def;
  DefList* defs;
  char*    id;
}
%type <def>  ophead
%type <defs> opdefs
%token <id>   ID

%%

prog  : opdefs        { $1->def->rhs = IdList_reverse($1->def->rhs);
                        DefList_show(DefList_reverse($1)); }
ophead: ID '@' ID     { $$ = Def_new($1);
                        $$->rhs = IdList_push($$->rhs, $3); } 
opdefs: ophead        { $$ = DefList_push(NULL, $1); }
      | opdefs ID     { $1->def->rhs = IdList_push($1->def->rhs, $2); }
      | opdefs ophead { $1->def->rhs = IdList_reverse($1->def->rhs);
                        $$ = DefList_push($1, $2); }

在

SEEK_AT

start条件下，我们只对

感兴趣。如果我们找到一个，那么ID就是

def

的开始，我们返回正确的令牌类型。如果我们找到任何其他内容（除了空格），我们将使用

yyless

将字符返回到输入流，并返回

ID

标记类型。请注意，

yylval

已从

ID

的初始扫描中设置，因此无需在此担心

上述代码中唯一复杂的部分是

EOF

处理。一旦检测到

EOF

，无论是使用

yyless

还是使用

unUTC

都无法将其重新插入输入流。让扫描仪再次读取

EOF

也不合法。因此，它需要得到充分处理。不幸的是，在

SEEK_AT

start条件下，完全处理

EOF

需要发送两个令牌：首先是已检测到的

ID

令牌，然后是

yyparse

将识别为输入结束的0。如果没有推送解析器，我们无法从单个扫描程序操作发送两个令牌，因此我们需要注册已接收到

EOF

的事实，并在下次调用扫描程序时检查该事实

第一条规则之前的缩进代码插入到

yylex

函数的顶部，因此它可以声明局部变量，并在扫描开始之前执行任何需要执行的操作。如前所述，此lexer不是可重入的，但它是可重新启动的，因为持久状态在

if（deferred_eof）

操作中重置。要使其重新进入，您只需将

deferred_eof

放在

yystate

结构中，而不是将其设置为静态本地。

以下是有用的注释和答案，以下是我的想法：

l.法：

%{
#include "y.tab.h"
%}

%option noyywrap
%option yylineno

%%
[a-zA-Z][a-zA-Z0-9_]*   { return ID; }
@                       { return OP; }
[ \t\r\n]+              ; /* ignore whitespace */
.                       { return ERROR; } /* any other character causes parse error */
%%

%{
#include "y.tab.h"
%}

%option noyywrap
%option yylineno

%%
[a-zA-Z][a-zA-Z0-9_]*   { yylval.a = strdup(yytext); return ID; }
@                       { return OP; }
[ \t\r\n]+              ; /* ignore whitespace */
.                       { return ERROR; } /* any other character causes parse error */
%%

yacc.y：

%{
#include <stdio.h>

extern int yylineno;
void yyerror(const char *str);
int yylex();
%}

%define parse.lac full
%define parse.error verbose

%token ID OP ERROR
%left OP

%start opdefs

%%
opright:
       | opright ID
       ;

opdef: ID OP ID opright
     ;

opdefs:
      | opdefs opdef
      ;
%%

void yyerror(const char *str) {
    fprintf(stderr, "error@%d: %s\n", yylineno, str);
}

int main(int argc, char *argv[]) {
    yyparse();
}

%{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>

extern int yylineno;
void yyerror(const char *str);
int yylex();


#define STR_OP    " @ "
#define STR_SPACE " "

char *concat3(const char *, const char *, const char *);

struct oplist {
    char **ops;
    size_t capacity, count;
} my_oplist = { NULL, 0, 0 };

int oplist_append(struct oplist *, char *);
void oplist_clear(struct oplist *);
void oplist_dump(struct oplist *);
%}

%union {
    char *a;
}

%define parse.lac full
%define parse.error verbose

%token ID OP END ERROR

%start input

%%

opbase: ID OP ID {
         char *s = concat3($<a>1, STR_OP, $<a>3);
         free($<a>1);
         free($<a>3);
         assert(s && "opbase: allocation failed");

         $<a>$ = s;
     }
     ;

ops: opbase {
       $<a>$ = $<a>1;
   }
   | ops opbase {
       int r = oplist_append(&my_oplist, $<a>1);
       assert(r == 0 && "ops: allocation failed");

       $<a>$ = $<a>2;
   }
   | ops ID {
       char *s = concat3($<a>1, STR_SPACE, $<a>2);
       free($<a>1);
       free($<a>2);
       assert(s && "ops: allocation failed");

       $<a>$ = s;
   }
   ;

input: ops {
         int r = oplist_append(&my_oplist, $<a>1);
         assert(r == 0 && "input: allocation failed");
     }
     ;       
%%

char *concat3(const char *s1, const char *s2, const char *s3) {
    size_t len = strlen(s1) + strlen(s2) + strlen(s3);
    char *s = malloc(len + 1);
    if (!s)
        goto concat3__end;

    sprintf(s, "%s%s%s", s1, s2, s3);

concat3__end:
    return s;
}


int oplist_append(struct oplist *oplist, char *op) {
    if (oplist->count == oplist->capacity) {  
        char **ops = realloc(oplist->ops, (oplist->capacity + 32) * sizeof(char *));
        if (!ops)
            return 1;

        oplist->ops = ops;
        oplist->capacity += 32;
    } 

    oplist->ops[oplist->count++] = op;
    return 0;
}

void oplist_clear(struct oplist *oplist) {
    if (oplist->count > 0) {
        for (size_t i = 0; i < oplist->count; ++i)
            free(oplist->ops[i]);
        oplist->count = 0;
    }

    if (oplist->capacity > 0) {
        free(oplist->ops);
        oplist->capacity = 0;
    }
}

void oplist_dump(struct oplist *oplist) {
    for (size_t i = 0; i < oplist->count; ++i)
        printf("%2zu: '%s'\n", i, oplist->ops[i]);
}


void yyerror(const char *str) {
    fprintf(stderr, "error@%d: %s\n", yylineno, str);
}

int main(int argc, char *argv[]) {
    yyparse();

    oplist_dump(&my_oplist);
    oplist_clear(&my_oplist);
}

这个答案是相关的：谢谢！在根据您在第一条评论中链接的答案提出修复方案后，我也将其作为一个答案发布，以免阻塞问题。理论上的解释和野牛做事的方式正是我想要的。我仍然不确定是否应该在lexer中修复我的特定用例（并进行一些字符串修改以从yytext中提取左值标识符），还是在解析器中修复。@dummydev:lexer中不需要字符串修改。我将添加一个简单的lexer实现。非常非常有用。我现在明白了。再次感谢！顺便说一句，需要将

-std=gnu99

或

-std=gnu11

添加到问题生成命令的

gcc

-部分以编译此文件。