2012-05-17 31 views
2

我想創建一個cison解析器與野牛和lex。是的,這是一個學校任務,但我很失落,我在網上學校,所以我沒有得到太多的幫助。我需要這個解析信息到表中,但我很確定我的parse.y文件中有錯誤,因爲它不會將信息加載到表中,表示沒有代碼。我知道我的.y文件缺少一些語法,但我認爲這應該加載到符號表中,因爲它是。Lexx和Yacc用c語法解析C符號表

運行make文件也就是當輸出:

default: 
    clear 
    yacc -d -v --debug parse.y 
    lex -l scan.l 
    gcc -o cfc symtab.c lex.yy.c y.tab.c 
clean: 
    $(RM) cfc *.o lex.yy.c parse.tab.c y.tab.h y.output dump.symtab 

屏幕輸出:

> yacc -d -v --debug parse.y parse.y:39 parser name defined to default 
> :"parse" conflicts: 2 reduce/reduce lex -l scan.l gcc -o cfc symtab.c 
> lex.yy.c y.tab.c 

通過解析器傳遞60.cf:

> semantic error cnt: 0  lines of code: -1 

從結果解析器是符號表是空的,這裏是輸出結果:

> Starting parse 
> Entering state 0 
> Reading a token: Next token is 292 (INT) 
> parse error: line 0: Shifting error token, Entering state 1 
> Reducing via rule 3 (line 57), error -> block 

的示例代碼加載60.cf:

int foo() 
{ 
    while (i >= 0) { 
     i = i * 1; 
     c = c + 1; 
    } 
    if (c >= 0) 
    { ; } 
    else 
    { i = 7; 
     c = i; 
    } 
    return 0; 
} 

這裏是scan.l裏的文件:

%option yylineno 

%{ 
#ifndef TRUE 
#define TRUE 1 
#endif 

#ifndef FALSE 
#define FALSE 0 
#endif 

#include <stdio.h> 
#include <ctype.h> 
#include <string.h> 
#include "symtab.h" 
#include "y.tab.h" 

int badtoken_cnt = 0; 
int token_cnt = 0; 
int col_cnt = 0; 
int lineno = 0; 

%} 

comment  \/\*([^*]|\n)*\*\/ 
digit  [0-9] 
ichar  [A-Z_a-z] 
integer  {digit}+ 
newline  \n 
strchar  ([ ~]|\\n) 
identifier {ichar}([0-9]|{ichar})* 
whitespace [ \t]+ 
float  ([+-]?{digit}+)?\.{digit}*(e?[+-]?{digit}+)? 
chrliteral '([!*]|\\n)' 
nullstring \"\" 
escquote [^"]*\\\"[^"]* 
strliteral \"[^"]*{escquote}*\" 
%% 

"if"   { return IF;} 
"then"   { return THEN;} 
"else"   { return ELSE;} 
"while"   { return WHILE;} 
"return"  { return RETURN;} 
"break"   { return GOTO;} 
"goto"   { return GOTO;} 
"read"   { return READ;} 
"write"   { return WRITE;} 
"float"   { return REAL;} 
"int"   { return INT;} 
"void"   { return VOID;} 
"char"   { return CHAR;} 

"="    { return ASSIGN;} 
"!="   { return NE;} 
"=="   { return EQ;} 
"<"    { return LT;} 
"<="   { return LE;} 
">"    { return GT;} 
">="   { return GE;} 
"&&"   { return AND;} 
"||"   { return OR;} 

"+"    { return PLUS;} 
"-"    { return MINUS;} 
"*"    { return TIMES;} 
"/"    { return OVER;} 
"%"    { return MOD;} 

"{"    { return LBRACE;} 
"}"    { return RBRACE;} 
"["    { return LBRACK;} 
"]"    { return RBRACK;} 
"("    { return LPAREN;} 
")"    { return RPAREN;} 
";"    { return SEMI;} 
","    { return COMMA;} 

{float}   { 
        yylval.tokname = malloc(sizeof(yytext)); 
        strncpy(yylval.tokname,yytext,yyleng); 
        printf("yylval: %s\n",yylval.tokname); 
        insert(yytext, yyleng, REAL_TYPE, lineno); 
        printf("yytext: %s\n",yytext); 
        return FLOAT; 
       } 
{integer}  { 
        yylval.tokname = malloc(sizeof(yytext)); 
        printf("yylval: %s\n",yylval.tokname); 
        strncpy(yylval.tokname,yytext,yyleng); 
        insert(yytext, yyleng, INT_TYPE, lineno); 
        printf("yytext: %s\n",yytext); 
        return INTEGER; 
       } 


{chrliteral} { 
        yylval.tokname = malloc(sizeof(yytext)); 
        strncpy(yylval.tokname,yytext,yyleng); 
        printf("yylval: %s\n",yylval.tokname); 
        insert(yytext, yyleng, -1, lineno); 
        printf("yytext: %s\n",yytext); 
        return CHRLIT; 
       } 

{nullstring} { 
        yylval.tokname = malloc(sizeof(yytext)); 
        strncpy(yylval.tokname,yytext,yyleng); 
        printf("yylval: %s\n",yylval.tokname); 
        insert(yytext, yyleng, -1, lineno); 
        printf("yytext: %s\n",yytext); 
        return STRLIT; 
       } 

{strliteral} { 
        yylval.tokname = malloc(sizeof(yytext)); 
        strncpy(yylval.tokname,yytext,yyleng); 
        printf("yylval: %s\n",yylval.tokname); 
        insert(yytext, yyleng, STR_TYPE, lineno); 
        printf("yytext: %s\n",yytext); 
        return STRLIT; 
       } 

{identifier} { 

        return IDENT; 
       } 
{newline}  { col_cnt = 1; } 

{whitespace} { col_cnt+=yyleng; } 

{comment}  { col_cnt = 0; } 

"//"   { /* handle C++ style comments */ 
        char c; 
        do { c = input(); 
        } while (c != '\n'); 
        lineno++; 
       } 

.    { return ERROR;} 

%% 

這裏是parse.y文件,這是我相信錯誤是:

%{ 

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include "symtab.h" 

#define DEBUG 0 
#define TRUE 1 
#define FALSE 0 
#define MAX_MSG_LEN 50 
#define YYDEBUG 1 

int errcnt = 0; 
char errmsg[40]; 
extern char *yytext; 
extern FILE *yyin; 
extern FILE *yyout; 
extern int yyparse(); 
extern int lineno; 
int yydebug = 1; 
int t; 
%} 

/* no warning for fewer than 1 shift/reduce conflicts and 0 reduce/reduce */ 
%expect 1 
%union { int tokid; 
     char *tokname; 
     } 

%token <tokname> IDENT NUMBER 
%token <tokid> ASSIGN PLUS LBRACE RBRACE LPAREN RPAREN SEMI ERROR FLOAT INTEGER 
/* ADDED */ 
%token <tokid> IF THEN ELSE WHILE RETURN GOTO READ WRITE VOID CHAR 
/* ADDED */ 
%token <tokid> NE EQ LT LE GT GE AND OR MINUS TIMES OVER MOD INT REAL 
/* ADDED */ 
%token <tokid> LBRACK RBRACK COMMA CHRLIT STRLIT 
%type <tokid> block stmt_seq stmt decl expr term assignmnt decltype error 

%start block 

%% 

block  : LBRACE stmt_seq RBRACE 
      | LPAREN stmt_seq RPAREN 
      | error { yyerrok; return 0; } 
      ; 

stmt_seq : stmt_seq stmt SEMI 
      | stmt SEMI 
      | error { yyerrok; return 0;} 
      ; 

stmt  : expr 
      | decl 
      | assignmnt { $$ = $1; } 
      | error { yyerrok; return 0;} 
      ; 

decl  : decltype IDENT { 
       setType($2,$1); 
       fprintf(stdout,"set decltype to: %d for %s\n",$$,$2); 
      } 
      ; 

expr  : expr PLUS term 
       { /* add constraint here */ } 

      | term { $$ = $1; } 
      | error { yyerrok; return 0;} 
      ; 

assignmnt : IDENT ASSIGN expr 
       { /* add constraint here */ } 
      ; 

term  : NUMBER { $$ = lookupType($1); } 

      | IDENT { $$ = lookupType($1); } 
      ; 

decltype : INTEGER { $$ = INT_TYPE; } 
      | FLOAT { $$ = REAL_TYPE; } 
      ; 

%% 

int main(int argc,char *argv[]) 
{ 
    strcpy(errmsg,"type error\n"); 
    int i; 
    if(argc < 2) { 
     printf("Usage: ./cfc <source filename>\n"); 
     exit(0); 
    } 
    FILE *fp = fopen(argv[1],"r"); 
    if(!fp) { 
    printf("Unable to open file for reading\n"); 
    exit(0); 
    } 
    yyin = fp; 

    fp = fopen("dump.symtab","w"); 
    if(!fp) { 
    printf("Unable to open file for writing\n"); 
    exit(0); 
    } 

    int flag = yyparse(); 

    /* dump symtab for debugging if necessary */ 
    symtab_dump(fp); 
    lineno--; /* don't count the last newline */ 
    printf("\nsemantic error cnt: %d \tlines of code: %d\n",errcnt,lineno); 

    /* cleanup */ 
    fclose(fp); 
    fclose(yyin); 

    return flag; 
} 


yywrap() 
{ 
    return(1); 
} 

int yyerror(char * msg) 
{ fprintf(stderr,"%s: line %d: \n",msg,lineno); 
    return 0; 
} 

如果你需要它,這裏是符號表我正在使用:

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include "symtab.h" 

/* maximum size of hash table */ 
#define SIZE 200 
#define MAXTOKENLEN 40 

/* power of two multiplier in hash function */ 
#define SHIFT 4 

/* the hash function */ 
static int hash (char * key) 
{ int temp = 0; 
    int i = 0; 
    while (key[i] != '\0') 
    { temp = ((temp << SHIFT) + key[i]) % SIZE; 
    ++i; 
    } 
    return temp; 
} 

/* a linked list of references (line nos) for each variable */ 
typedef struct RefListRec { 
    int lineno; 
    struct RefListRec * next; 
    /* ADDED */ 
    int type; 
} * RefList; 


/* hash entry holds variable name and its reference list */ 
typedef struct HashRec { 
    char st_name[MAXTOKENLEN]; 
    int st_size; 
    RefList lines; 
    int st_value; 
    /* ADDED */ 
    int st_type; 
    struct HashRec * next; 
} * Node; 

/* the hash table */ 
static Node hashTable[SIZE]; 

/* insert an entry with its line number - if entry 
    * already exists just add its reference line no. 
    */ 
void insert(char * name, int len, int type, int lineno) 
{ 
    /* ADDED */ 
    /*int len = strlen(name);*/ 
    int h = hash(name); 
    Node l = hashTable[h]; 
    while ((l != NULL) && (strcmp(name,l->st_name) != 0)) 
    l = l->next; 
    if (l == NULL) /* variable not yet in table */ 
    { l = (Node) malloc(sizeof(struct HashRec)); 
    strncpy(l->st_name, name, len); 
    /* ADDED */ 
    l->st_type = type; 
    l->lines = (RefList) malloc(sizeof(struct RefListRec)); 
    l->lines->lineno = lineno; 
    l->lines->next = NULL; 
    l->next = hashTable[h]; 
    hashTable[h] = l; } 
    else /* found in table, so just add line number */ 
    { RefList t = l->lines; 
    while (t->next != NULL) t = t->next; 
    t->next = (RefList) malloc(sizeof(struct RefListRec)); 
    t->next->lineno = lineno; 
    t->next->next = NULL; 
    } 
} 

/* return value (address) of symbol if found or -1 if not found */ 
int lookup (char * name) 
{ int h = hash(name); 
    Node l = hashTable[h]; 
    while ((l != NULL) && (strcmp(name,l->st_name) != 0)) 
    l = l->next; 
    if (l == NULL) return -1; 
    else return l->st_value; 
} 

/* return type value of symbol or -1 if symbol not found */ 
int lookupType(char * name) 
{ 
    int h = hash(name); 
    Node l = hashTable[h]; 
    while ((l != NULL) && (strcmp(name,l->st_name) != 0)) 
    l = l->next; 
    if (l == NULL) return -1; 
    else return l->st_type; 
} 

/* set datatype of symbol returns 0 if symbol not found */ 
int setType(char * name, int t) 
{ 
    int h = hash(name); 
    Node l = hashTable[h]; 
    while ((l != NULL) && (strcmp(name,l->st_name) != 0)) 
    l = l->next; 
    if (l == NULL) return -1; 
    else { 
    l->st_type = t; 
    return 0; 
    } 
} 

/* print to stdout by default */ 
void symtab_dump(FILE * of) { 
    int i; 
    fprintf(of,"------------ ------ ------------\n"); 
    fprintf(of,"Name   Type Line Numbers\n"); 
    fprintf(of,"------------ ------ -------------\n"); 
    for (i=0; i < SIZE; ++i) 
    { if (hashTable[i] != NULL) 
    { Node l = hashTable[i]; 
     while (l != NULL) 
     { RefList t = l->lines; 
     fprintf(of,"%-12s ",l->st_name); 

     if (l->st_type == INT_TYPE) 
      fprintf(of,"%-7s","int "); 
     if (l->st_type == REAL_TYPE) 
      fprintf(of,"%-7s","real"); 
     if (l->st_type == STR_TYPE) 
      fprintf(of,"%-7s","string"); 


     while (t != NULL) 
     { fprintf(of,"%4d ",t->lineno); 
      t = t->next; 
     } 
     fprintf(of,"\n"); 
     l = l->next; 
     } 
    } 
    } 
} 
+0

運行它時,應該提供程序的一些輸出。例如,解析器說什麼失敗了?它也看起來像你的開始規則,塊,只會匹配一些東西。想想它可以匹配什麼。 – Kizaru

+0

添加了更多信息 – Brandon

+0

是的,你的開始規則是塊,它需要一個LBRACE或LPAREN令牌作爲第一個令牌。您的測試文件以不同的標記開始 - 「int」。所以,你的解析器永遠不會超過第一個標記。我不確定你打算實現多少C語法,但是你的開始狀態需要一些東西來捕捉那個函數頭(假設你的語法只有一個函數定義)。想想一個簡化的C程序,忽略包含函數原型。該程序基本上是功能列表,其中每個功能是一個功能標題,然後是一個塊,其中每個塊是... – Kizaru

回答

2

由於競爭產品error產生衝突。您的stmt派生自error。但是stmt也衍生出expr,其衍生error。把它放在expr裏面。

看看yacc生成的y.output文件(因爲您通過了-v)。

您的語法不可能匹配輸入程序,因爲您的開始符號派生括號或括號封閉的語句塊,而不是一系列外部函數定義。您需要添加將處理函數定義的語法片段:decltype IDENT arglist body,依此類推。

一個小東西立即站出來:

comment  \/\*([^*]|\n)*\*\/ 

你說,註釋不能包含星號。 Awww,學生的簡化非常可愛。 :)

一個匹配真實C語句的正則表達式是:[/][*]([^*]|[*]*[^*/])*[*]+[/](但是請注意,這並沒有考慮到Lex對於換行符的約定)。

您不需要給單個字符標記賦予標記名稱。這隻會增加語法的語言,你必須寫LBRACE而不是'{'。

+0

Kaz真的是一個不錯的答案。 –

+0

@Kaz我有一個關於你的代碼的問題。我想寫一個簡單的編譯器。現在我正在使用flex進行掃描。但問題是,當我編寫123abc I時,除報告錯誤外。但不幸的是,它報告123爲整數,abc爲id。解決方案是什麼?你能幫我麼? – Linda

+0

@Linda解決這個問題的一種方法是將'123abc'與單獨的規則進行匹配,該規則的作用是發出一個錯誤,如「整數標記中的尾隨垃圾」。你的「整數」和「ID」規則獨立於此。 – Kaz