2017-04-16 30 views
0

我想明白爲什麼野牛以下規則BISON + FLEX的語法 - 爲什麼令牌被連接在一起

stmt: 
    declaration     { ... } 
    | assignment    { ... } 
    | exp      { ... } 
    | ID ';' <-- this rule  { ...  
           fprintf(stderr, "\n my id is '%s'", $1); 
           ... 

連接兩個記號,​​如果你檢查輸出會明白我的意思。我運行我的解析器,並將字符ab;輸入到程序中。根據我的野牛語法,這應該被解析爲ID,然後是;。並且在某種程度上它是發生了什麼。

然而,當我嘗試使用規則ID ';'方案產出ab;$1變量,而不是我的ab

運行程序

ab;         <-- this my input to the program 

#TOKEN 294[ID] yytext -> ab 
Next token is token "identifier" (1.1:) 
Shifting token "identifier" (1.1:) 
Entering state 5 
Reading a token: 
#TOKEN 59[] yytext -> ; 
Next token is token ';' (1.1:) 
Shifting token ';' (1.1:) 
Entering state 16 
Reducing stack by rule 6 (line 133): 
    $1 = token "identifier" (1.1:)  <-- first token which is 'ab' 
    $2 = token ';' (1.1:)    <-- second token which is ';' 

[stmt] 4: 
my id is 'ab;'      <-- the issue! This should be 'ab' not 'ab;' 
ERROR: No such ID ab; found 
-> $$ = nterm stmt (1.1:) 
Stack now 0 1 
Entering state 10 
Reducing stack by rule 2 (line 126): 
    $1 = nterm prog (1.1:) 
    $2 = nterm stmt (1.1:) 
-> $$ = nterm prog (1.1:) 
Stack now 0 
Entering state 1 
Reading a token: 

語法

%{ 
#include <stdio.h> 
#include <string> 
#include <map> 
#include <math.h> 
#include "noname-parse.h" 
#include "noname-types.h" 

extern int yylex(void); 
extern void yyerror(const char *error_msg); 
extern void division_by_zero(YYLTYPE &yylloc); 


std::map<std::string, symrec*> symbol_table; 
std::map<std::string, symrec*>::iterator symbol_table_it; 
%} 

////////////////////////////////////////////////// 
///////////* Bison declarations. */////////////// 
////////////////////////////////////////////////// 

%union { 

    char* id_v; 
    double double_v; 
    long long_v; 

    symrecv symrecv; 
    char* error_msg; 
}; 

%{ 

    bool symbol_exist(const char* key) { 
    std::string skey = key; 
    symbol_table_it = symbol_table.find(skey); 
    return (symbol_table_it != symbol_table.end()); 
    } 

    void symbol_insert(const char* key, symrecv symrecv) { 
    std::string skey = key; 
    symbol_table[skey] = symrecv; 
    } 

    symrecv symbol_retrieve(const char* key) { 
    std::string skey = key; 
    return symbol_table[skey]; 
    } 

    void print_stmt(symrecv sym) { 

    if (sym->type == TYPE_LONG) { 
     fprintf(stderr, "%d", sym->value.intv); 

    } else if (sym->type == TYPE_DOUBLE) { 
     fprintf(stderr, "%lf", sym->value.doublev); 

    } else { 
     fprintf(stderr, "print not implemented for type %d", sym->type); 
    } 
    } 
%} 

%token LINE_BREAK   "line_break"    
// %token ';'    "stmt_sep"   
%token LETTER    "letter"   
%token DIGIT     "digit"   
%token DIGITS    "digits"   
%token DARROW    "darrow"   
%token ELSE     "else"  
%token FALSE     "false"   
%token IF     "if"  
%token IN     "in"  
%token LET     "let"  
%token LOOP     "loop"  
%token THEN     "then"  
%token WHILE     "while"   
%token BREAK     "break"   
%token CASE     "case"  
%token NEW     "new"  
%token NOT     "not"  
%token TRUE     "true"  
%token NEWLINE    "newline"   
%token NOTNEWLINE   "notnewline"    
%token WHITESPACE   "whitespace"    
%token LE     "le"  
%token ASSIGN    "assign"   
%token NULLCH    "nullch"   
%token BACKSLASH    "backslash"    
%token STAR     "star"  
%token NOTSTAR    "notstar"   
%token LEFTPAREN    "leftparen"    
%token NOTLEFTPAREN   "notleftparen"    
%token RIGHTPAREN   "rightparen"    
%token NOTRIGHTPAREN   "notrightparen"     
%token LINE_COMMENT   "line_comment"    
%token START_COMMENT   "start_comment"     
%token END_COMMENT   "end_comment"    
%token QUOTES    "quotes"   
%token ERROR     "error" 

%token <id_v> ID    "identifier" 
%token <double_v> DOUBLE  "double" 
%token <long_v> LONG   "long" 
%type <symrecv> assignment "assignment" 
%type <symrecv> declaration "declaration" 
%type <symrecv> exp   "expression" 
%type <symrecv> stmt  "statement" 

%left '-' '+' 
%left '*' '/' 
%left LET ID 
%right '^'  /* exponentiation */ 
%precedence NEG /* negation--unary minus */ 

%start prog 

%% 

////////////////////////////////////////////////// 
///////////* The grammar follows. */////////////// 
////////////////////////////////////////////////// 

prog: 
    %empty 
    | prog stmt 
; 

stmt: 
    declaration  { fprintf(stderr, "\n[stmt] 2: "); print_stmt($1); } 
    | assignment  { fprintf(stderr, "\n[stmt] 3: "); print_stmt($1); } 
    | exp    { fprintf(stderr, "\n[stmt] 1: "); print_stmt($1); } 
    | ID ';'   { fprintf(stderr, "\n[stmt] 4: "); 

    fprintf(stderr, "\n my id is '%s'", $1); 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (!symbol_exist($1)) { 

     char buf[1024]; 
     sprintf(buf, "No such ID %s found", $1); 
     yyerror(buf); 

    } else { 

     $$->name = $1; 
     $$->value.doublev = symbol_retrieve($1)->value.doublev; 
     printf("\nID %s -> %lf", $1, $$->value.doublev); 
    } 
    } 
    | error   { printf("%d:%d", @1.first_column, @1.last_column); } 
; 

assignment: 
    ID ASSIGN exp ';' { 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (!symbol_exist($1)) { 

     char buf[1024]; 
     sprintf(buf, "No such ID %s found", $1); 
     yyerror(buf); 

    } else { 

     $$->name = $1; 
     $$->type = $3->type; 
     $$->value.doublev = $3->value.doublev; 
     symbol_insert($1, $$); 
     // printf("\nID %s -> %lf", $1, $$->value.doublev); 
     printf("\n[assignment]"); 
    } 
    } 
    | LET ID ASSIGN exp ';' { 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (symbol_exist($2)) { 

     char buf[1024]; 
     sprintf(buf, "Cannot redefine ID %s", $2); 
     yyerror(buf); 

    } else { 

     $$->name = $2; 
     $$->type = $4->type; 
     $$->value.doublev = $4->value.doublev; 
     symbol_insert($2, $$); 
     // printf("\nID %s -> %lf", $1, $$->value.doublev); 
     printf("\n[assignment]"); 
    } 
    } 
; 

declaration: 
    LET ID ';' { 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (symbol_exist($2)) { 

     char buf[1024]; 
     sprintf(buf, "Cannot redefine ID %s", $2); 
     yyerror(buf); 

    } else { 

     $$->name = $2; 
     // $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     symbol_insert($2, $$); 
     // $$->value.doublev = symbol_table_it->second->value.doublev; 
     // printf("\nID %s -> %lf", $1, $$->value.doublev); 
     printf("\n[declaration]"); 
    } 
    } 
; 

exp: 
    LONG { 
    $$ = (symrec *) malloc (sizeof (symrec)); 
    $$->name = (char*) "__annon"; 
    $$->type = TYPE_LONG; 
    $$->value.intv = $1; 
    printf("\nexp %ld", $1); 
    } 
    | DOUBLE { 
    $$ = (symrec *) malloc (sizeof (symrec)); 
    $$->name = (char*) "__annon"; 
    $$->type = TYPE_DOUBLE; 
    $$->value.doublev = $1; 
    printf("\nexp %lf", $1); 
    } 
    | exp '+' exp  { 
     // $$ = $1 + $3; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     $$->value.doublev = $1->value.doublev + $3->value.doublev; 
     printf("\nexp + exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | exp '-' exp  { 
     // $$ = $1 - $3; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     $$->value.doublev = $1->value.doublev - $3->value.doublev; 
     printf("\nexp - exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | exp '*' exp  { 
     // $$ = $1 * $3; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     $$->value.doublev = $1->value.doublev * $3->value.doublev; 
     printf("\nexp * exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | exp '/' exp { 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 

     if ($3->value.doublev) { 
     // $$ = $1/$3; 
     $$->value.doublev = $1->value.doublev/$3->value.doublev; 
     } else { 
     // $$ = $1; 
     $$->value.doublev = $1->value.doublev; 
     division_by_zero(@3); 
     } 
     printf("\nexp/exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | '-' exp %prec NEG { 
     /** 
     * The %prec simply instructs Bison that the rule ‘| '-' exp’ 
     * has the same precedence as NEG—in this case the next-to-highest 
     */ 
     // $$ = -($2->value.doublev); 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $2->type; 
     $$->value.doublev = -$2->value.doublev; 
     printf("\nexp^exp %lf", $2->value.doublev); 
    } 
    | exp '^' exp  { 
     //$$ = pow($1->value.doublev, $3->value.doublev); 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type; 
     $$->value.doublev = pow($1->value.doublev, $3->value.doublev); 
     printf("\nexp^exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | '(' exp ')'  { 
     // $$ = $2->value.doublev; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $2->type; 
     $$->value.doublev = $2->value.doublev; 
     printf("\n(exp) %lf", $2->value.doublev); 
    } 
    | error     { printf("\nERROR on exp rule"); } 
    ; 
%% 

詞法

%{ 
    #include "stdio.h" 
    #include "stdlib.h" 
    #include "lexer-utilities.h" 
    #include "noname-parse.h" 
    #include "noname-types.h" 

    int num_lines = 0, num_chars = 0; 
    extern YYSTYPE yylval; 
    extern void yyerror(char const *s); 

    extern int curr_lineno; 
    extern int verbose_flag; 

    unsigned int comment = 0; 
%} 

%option noyywrap 
    // %option noyywrap nounput batch debug yylineno 
    // %option warn noyywrap nodefault yylineno reentrant bison-bridge 

%x COMMENT 
%x STRING 

LINE_BREAK  \n 
LETTER   [a-zA-Z] 
ALPHA   [a-zA-Z$_] 
DIGIT   [0-9] 
DIGITS   {DIGIT}+ 
LONG   {DIGIT}+ 
DOUBLE   {DIGIT}+(\.{DIGIT}+)? 
ID    {ALPHA}({ALPHA}|{DIGIT})* 

ELSE   [eE][lL][sS][eE] 
FALSE   f[aA][lL][sS][eE] 
IF    [iI][fF] 
IN    [iI][nN] 
LET    [lL][eE][tT] 
LOOP   [lL][oO][oO][pP] 
THEN   [tT][hH][eE][nN] 
WHILE   [wW][hH][iI][lL][eE] 
BREAK   [bB][rR][eE][aA][kK] 
CASE   [cC][aA][sS][eE] 
NEW    [nN][eE][wW] 
NOT    [nN][oO][tT] 
TRUE   t[rR][uU][eE] 
NEWLINE   [\n] 
NOTNEWLINE  [^\n] 
WHITESPACE  [ \t\r\f\v]+ 
ASSIGN   = 
LE    <= 
DARROW   => 
NULLCH   [\0] 
BACKSLASH  [\\] 
STAR   [*] 
NOTSTAR   [^*] 
LEFTPAREN  [(] 
NOTLEFTPAREN [^(] 
RIGHTPAREN  [)] 
NOTRIGHTPAREN [^)] 

LINE_COMMENT "--" 
START_COMMENT "/*" 
END_COMMENT  "*/" 

QUOTES   \" 


%% 

{LINE_BREAK}     { 
            ++num_chars; 
            ++num_lines; 
           } 

{START_COMMENT} { 
    comment++; 
    BEGIN(COMMENT); 
} 

<COMMENT><<EOF>> { 
    yylval.error_msg = "EOF in comment"; 
    BEGIN(INITIAL); 
    return (ERROR); 
} 

<COMMENT>{BACKSLASH}(.|{NEWLINE}) { 
    backslash_common(); 
}; 

<COMMENT>{BACKSLASH}    ; 

<COMMENT>{START_COMMENT} { 
    comment++; 
} 

<COMMENT>{END_COMMENT} { 
    comment--; 
    if (comment == 0) { 
    BEGIN(INITIAL); 
    } 
} 

<COMMENT>.      { ++num_chars; } 

<INITIAL>{END_COMMENT} { 
    yylval.error_msg = "Unmatched */"; 
    return (ERROR); 
} 

<*>{WHITESPACE}     { ++num_chars; } 
<INITIAL>{ASSIGN}    { return (ASSIGN); } 
<INITIAL>{ELSE}     { return (ELSE); } 
<INITIAL>{IF}     { return (IF); } 
<INITIAL>{IN}     { return (IN); } 
<INITIAL>{LET}     { return (LET); } 
<INITIAL>{THEN}     { return (THEN); } 
<INITIAL>{WHILE}     { return (WHILE); } 
<INITIAL>{CASE}     { return (CASE); } 
<INITIAL>{NEW}     { return (NEW); } 
<INITIAL>{NOT}     { return (NOT); } 
<INITIAL>{ID}  { 
    yylval.id_v = yytext; 
    return (ID); } 
<INITIAL>{LONG}  { 
    yylval.long_v = atoi(yytext); 
    return (LONG); } 
<INITIAL>{DOUBLE} { 
    yylval.double_v = atof(yytext); 
    return (DOUBLE); } 

<INITIAL>","      { return int(','); } 
<INITIAL>":"      { return int(':'); } 
<INITIAL>"{"      { return int('{'); } 
<INITIAL>"}"      { return int('}'); } 
<INITIAL>"+"      { return int('+'); } 
<INITIAL>"-"      { return int('-'); } 
<INITIAL>"*"      { return int('*'); } 
<INITIAL>"/"      { return int('/'); } 
<INITIAL>"<"      { return int('<'); } 
<INITIAL>"~"      { return int('~'); } 
<INITIAL>"."      { return int('.'); } 
<INITIAL>"@"      { return int('@'); } 
<INITIAL>"("      { return int('('); } 
<INITIAL>")"      { return int(')'); } 
<INITIAL>"&"      { return int('&'); } 
<INITIAL>";"      { return int(';'); } 

<INITIAL>. { 
    printf("lexer error '%s'", yytext); 
    yylval.error_msg = yytext; return 0; 
    } 

%% 

回答

1

這彎曲動作不正確:

yylval.id_v = yytext; 

yytext指向內部工作緩衝區。每次調用掃描儀時,其內容都會改變。因此,如果您想保留組成令牌的字符串,則必須將該字符串複製到您自己的存儲中,例如使用strdup。 (不要忘記在完成後釋放分配的存儲空間。)