我想創建一個cison解析器與野牛和lex。是的,這是一個學校任務,但我很失落,我在網上學校,所以我沒有得到太多的幫助。我需要這個解析信息到表中,但我很確定我的parse.y文件中有錯誤,因爲它不會將信息加載到表中,表示沒有代碼。我知道我的.y文件缺少一些語法,但我認爲這應該加載到符號表中,因爲它是。Lexx和Yacc用c語法解析C符號表
運行make文件也就是當輸出:
default:
clear
yacc -d -v --debug parse.y
lex -l scan.l
gcc -o cfc symtab.c lex.yy.c y.tab.c
clean:
$(RM) cfc *.o lex.yy.c parse.tab.c y.tab.h y.output dump.symtab
屏幕輸出:
> yacc -d -v --debug parse.y parse.y:39 parser name defined to default
> :"parse" conflicts: 2 reduce/reduce lex -l scan.l gcc -o cfc symtab.c
> lex.yy.c y.tab.c
通過解析器傳遞60.cf:
> semantic error cnt: 0 lines of code: -1
從結果解析器是符號表是空的,這裏是輸出結果:
> Starting parse
> Entering state 0
> Reading a token: Next token is 292 (INT)
> parse error: line 0: Shifting error token, Entering state 1
> Reducing via rule 3 (line 57), error -> block
的示例代碼加載60.cf:
int foo()
{
while (i >= 0) {
i = i * 1;
c = c + 1;
}
if (c >= 0)
{ ; }
else
{ i = 7;
c = i;
}
return 0;
}
這裏是scan.l裏的文件:
%option yylineno
%{
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "symtab.h"
#include "y.tab.h"
int badtoken_cnt = 0;
int token_cnt = 0;
int col_cnt = 0;
int lineno = 0;
%}
comment \/\*([^*]|\n)*\*\/
digit [0-9]
ichar [A-Z_a-z]
integer {digit}+
newline \n
strchar ([ ~]|\\n)
identifier {ichar}([0-9]|{ichar})*
whitespace [ \t]+
float ([+-]?{digit}+)?\.{digit}*(e?[+-]?{digit}+)?
chrliteral '([!*]|\\n)'
nullstring \"\"
escquote [^"]*\\\"[^"]*
strliteral \"[^"]*{escquote}*\"
%%
"if" { return IF;}
"then" { return THEN;}
"else" { return ELSE;}
"while" { return WHILE;}
"return" { return RETURN;}
"break" { return GOTO;}
"goto" { return GOTO;}
"read" { return READ;}
"write" { return WRITE;}
"float" { return REAL;}
"int" { return INT;}
"void" { return VOID;}
"char" { return CHAR;}
"=" { return ASSIGN;}
"!=" { return NE;}
"==" { return EQ;}
"<" { return LT;}
"<=" { return LE;}
">" { return GT;}
">=" { return GE;}
"&&" { return AND;}
"||" { return OR;}
"+" { return PLUS;}
"-" { return MINUS;}
"*" { return TIMES;}
"/" { return OVER;}
"%" { return MOD;}
"{" { return LBRACE;}
"}" { return RBRACE;}
"[" { return LBRACK;}
"]" { return RBRACK;}
"(" { return LPAREN;}
")" { return RPAREN;}
";" { return SEMI;}
"," { return COMMA;}
{float} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, REAL_TYPE, lineno);
printf("yytext: %s\n",yytext);
return FLOAT;
}
{integer} {
yylval.tokname = malloc(sizeof(yytext));
printf("yylval: %s\n",yylval.tokname);
strncpy(yylval.tokname,yytext,yyleng);
insert(yytext, yyleng, INT_TYPE, lineno);
printf("yytext: %s\n",yytext);
return INTEGER;
}
{chrliteral} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, -1, lineno);
printf("yytext: %s\n",yytext);
return CHRLIT;
}
{nullstring} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, -1, lineno);
printf("yytext: %s\n",yytext);
return STRLIT;
}
{strliteral} {
yylval.tokname = malloc(sizeof(yytext));
strncpy(yylval.tokname,yytext,yyleng);
printf("yylval: %s\n",yylval.tokname);
insert(yytext, yyleng, STR_TYPE, lineno);
printf("yytext: %s\n",yytext);
return STRLIT;
}
{identifier} {
return IDENT;
}
{newline} { col_cnt = 1; }
{whitespace} { col_cnt+=yyleng; }
{comment} { col_cnt = 0; }
"//" { /* handle C++ style comments */
char c;
do { c = input();
} while (c != '\n');
lineno++;
}
. { return ERROR;}
%%
這裏是parse.y文件,這是我相信錯誤是:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"
#define DEBUG 0
#define TRUE 1
#define FALSE 0
#define MAX_MSG_LEN 50
#define YYDEBUG 1
int errcnt = 0;
char errmsg[40];
extern char *yytext;
extern FILE *yyin;
extern FILE *yyout;
extern int yyparse();
extern int lineno;
int yydebug = 1;
int t;
%}
/* no warning for fewer than 1 shift/reduce conflicts and 0 reduce/reduce */
%expect 1
%union { int tokid;
char *tokname;
}
%token <tokname> IDENT NUMBER
%token <tokid> ASSIGN PLUS LBRACE RBRACE LPAREN RPAREN SEMI ERROR FLOAT INTEGER
/* ADDED */
%token <tokid> IF THEN ELSE WHILE RETURN GOTO READ WRITE VOID CHAR
/* ADDED */
%token <tokid> NE EQ LT LE GT GE AND OR MINUS TIMES OVER MOD INT REAL
/* ADDED */
%token <tokid> LBRACK RBRACK COMMA CHRLIT STRLIT
%type <tokid> block stmt_seq stmt decl expr term assignmnt decltype error
%start block
%%
block : LBRACE stmt_seq RBRACE
| LPAREN stmt_seq RPAREN
| error { yyerrok; return 0; }
;
stmt_seq : stmt_seq stmt SEMI
| stmt SEMI
| error { yyerrok; return 0;}
;
stmt : expr
| decl
| assignmnt { $$ = $1; }
| error { yyerrok; return 0;}
;
decl : decltype IDENT {
setType($2,$1);
fprintf(stdout,"set decltype to: %d for %s\n",$$,$2);
}
;
expr : expr PLUS term
{ /* add constraint here */ }
| term { $$ = $1; }
| error { yyerrok; return 0;}
;
assignmnt : IDENT ASSIGN expr
{ /* add constraint here */ }
;
term : NUMBER { $$ = lookupType($1); }
| IDENT { $$ = lookupType($1); }
;
decltype : INTEGER { $$ = INT_TYPE; }
| FLOAT { $$ = REAL_TYPE; }
;
%%
int main(int argc,char *argv[])
{
strcpy(errmsg,"type error\n");
int i;
if(argc < 2) {
printf("Usage: ./cfc <source filename>\n");
exit(0);
}
FILE *fp = fopen(argv[1],"r");
if(!fp) {
printf("Unable to open file for reading\n");
exit(0);
}
yyin = fp;
fp = fopen("dump.symtab","w");
if(!fp) {
printf("Unable to open file for writing\n");
exit(0);
}
int flag = yyparse();
/* dump symtab for debugging if necessary */
symtab_dump(fp);
lineno--; /* don't count the last newline */
printf("\nsemantic error cnt: %d \tlines of code: %d\n",errcnt,lineno);
/* cleanup */
fclose(fp);
fclose(yyin);
return flag;
}
yywrap()
{
return(1);
}
int yyerror(char * msg)
{ fprintf(stderr,"%s: line %d: \n",msg,lineno);
return 0;
}
如果你需要它,這裏是符號表我正在使用:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symtab.h"
/* maximum size of hash table */
#define SIZE 200
#define MAXTOKENLEN 40
/* power of two multiplier in hash function */
#define SHIFT 4
/* the hash function */
static int hash (char * key)
{ int temp = 0;
int i = 0;
while (key[i] != '\0')
{ temp = ((temp << SHIFT) + key[i]) % SIZE;
++i;
}
return temp;
}
/* a linked list of references (line nos) for each variable */
typedef struct RefListRec {
int lineno;
struct RefListRec * next;
/* ADDED */
int type;
} * RefList;
/* hash entry holds variable name and its reference list */
typedef struct HashRec {
char st_name[MAXTOKENLEN];
int st_size;
RefList lines;
int st_value;
/* ADDED */
int st_type;
struct HashRec * next;
} * Node;
/* the hash table */
static Node hashTable[SIZE];
/* insert an entry with its line number - if entry
* already exists just add its reference line no.
*/
void insert(char * name, int len, int type, int lineno)
{
/* ADDED */
/*int len = strlen(name);*/
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) /* variable not yet in table */
{ l = (Node) malloc(sizeof(struct HashRec));
strncpy(l->st_name, name, len);
/* ADDED */
l->st_type = type;
l->lines = (RefList) malloc(sizeof(struct RefListRec));
l->lines->lineno = lineno;
l->lines->next = NULL;
l->next = hashTable[h];
hashTable[h] = l; }
else /* found in table, so just add line number */
{ RefList t = l->lines;
while (t->next != NULL) t = t->next;
t->next = (RefList) malloc(sizeof(struct RefListRec));
t->next->lineno = lineno;
t->next->next = NULL;
}
}
/* return value (address) of symbol if found or -1 if not found */
int lookup (char * name)
{ int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->st_value;
}
/* return type value of symbol or -1 if symbol not found */
int lookupType(char * name)
{
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else return l->st_type;
}
/* set datatype of symbol returns 0 if symbol not found */
int setType(char * name, int t)
{
int h = hash(name);
Node l = hashTable[h];
while ((l != NULL) && (strcmp(name,l->st_name) != 0))
l = l->next;
if (l == NULL) return -1;
else {
l->st_type = t;
return 0;
}
}
/* print to stdout by default */
void symtab_dump(FILE * of) {
int i;
fprintf(of,"------------ ------ ------------\n");
fprintf(of,"Name Type Line Numbers\n");
fprintf(of,"------------ ------ -------------\n");
for (i=0; i < SIZE; ++i)
{ if (hashTable[i] != NULL)
{ Node l = hashTable[i];
while (l != NULL)
{ RefList t = l->lines;
fprintf(of,"%-12s ",l->st_name);
if (l->st_type == INT_TYPE)
fprintf(of,"%-7s","int ");
if (l->st_type == REAL_TYPE)
fprintf(of,"%-7s","real");
if (l->st_type == STR_TYPE)
fprintf(of,"%-7s","string");
while (t != NULL)
{ fprintf(of,"%4d ",t->lineno);
t = t->next;
}
fprintf(of,"\n");
l = l->next;
}
}
}
}
運行它時,應該提供程序的一些輸出。例如,解析器說什麼失敗了?它也看起來像你的開始規則,塊,只會匹配一些東西。想想它可以匹配什麼。 – Kizaru
添加了更多信息 – Brandon
是的,你的開始規則是塊,它需要一個LBRACE或LPAREN令牌作爲第一個令牌。您的測試文件以不同的標記開始 - 「int」。所以,你的解析器永遠不會超過第一個標記。我不確定你打算實現多少C語法,但是你的開始狀態需要一些東西來捕捉那個函數頭(假設你的語法只有一個函數定義)。想想一個簡化的C程序,忽略包含函數原型。該程序基本上是功能列表,其中每個功能是一個功能標題,然後是一個塊,其中每個塊是... – Kizaru