2010-04-15 75 views
6

我沒有得到的錯誤,請你能幫我,這裏是.l和.y file.thanks。我如何實現如果在Flex /野牛聲明

%{ 
#include "ifanw.tab.h" 
extern int yylval; 
%} 
%% 
"="  { return EQ; } 
"!="  { return NE; } 
"<"  { return LT; } 
"<="  { return LE; } 
">"  { return GT; } 
">="  { return GE; } 
"+"  { return PLUS; } 
"-"  { return MINUS; } 
"*"  { return MULT; } 
"/"  { return DIVIDE; } 
")"  { return RPAREN; } 
"("  { return LPAREN; } 
":="  { return ASSIGN; } 
";"  { return SEMICOLON; } 
"IF"  { return IF; } 
"THEN" { return THEN; } 
"ELSE" { return ELSE; } 
"FI"  { return FI; } 
"WHILE" { return WHILE; } 
"DO"  { return DO; } 
"OD"  { return OD; } 
"PRINT" { return PRINT; } 
[0-9]+ { yylval = atoi(yytext); return NUMBER; } 
[a-z] { yylval = yytext[0] - 'a'; return NAME; } 
\  { ; } 
\n  { nextline(); } 
\t  { ; } 
"//".*\n { nextline(); } 
.  { yyerror("illegal token"); } 
%% 

與Yacc文件

%start ROOT 

%token EQ 
%token NE 
%token LT 
%token LE 
%token GT 
%token GE 
%token PLUS 
%token MINUS 
%token MULT 
%token DIVIDE 
%token RPAREN 
%token LPAREN 
%token ASSIGN 
%token SEMICOLON 
%token IF 
%token THEN 
%token ELSE 
%token FI 
%token WHILE 
%token DO 
%token OD 
%token PRINT 
%token NUMBER 
%token NAME 

%% 

ROOT: 
    stmtseq { execute($1); } 
    ; 

statement: 
    designator ASSIGN expression { $$ = assignment($1, $3); } 
    | PRINT expression { $$ = print($2); } 
    | IF expression THEN stmtseq ELSE stmtseq FI 
    { $$ = ifstmt($2, $4, $6); } 
    | IF expression THEN stmtseq FI 
    { $$ = ifstmt($2, $4, empty()); } 
    | WHILE expression DO stmtseq OD { $$ = whilestmt($2, $4); } 
    ; 

stmtseq: 
    stmtseq SEMICOLON statement { $$ = seq($1, $3); } 
    | statement { $$ = $1; } 
    ; 

expression: 
expr2 { $$ = $1; } 
    | expr2 EQ expr2 { $$ = eq($1, $3); } 
    | expr2 NE expr2 { $$ = ne($1, $3); } 
    | expr2 LT expr2 { $$ = le($1, $3); } 
    | expr2 LE expr2 { $$ = le($1, $3); } 
    | expr2 GT expr2 { $$ = gt($1, $3); } 
    | expr2 GE expr2 { $$ = gt($1, $3); } 
    ; 

expr2: 
    expr3 { $$ == $1; } 
    | expr2 PLUS expr3 { $$ = plus($1, $3); } 
    | expr2 MINUS expr3 { $$ = minus($1, $3); } 
    ; 

expr3: 
    expr4 { $$ = $1; } 
    | expr3 MULT expr4 { $$ = mult($1, $3); } 
    | expr3 DIVIDE expr4 { $$ = divide ($1, $3); } 
    ; 

expr4: 
    PLUS expr4 { $$ = $2; } 
    | MINUS expr4 { $$ = neg($2); } 
    | LPAREN expression RPAREN { $$ = $2; } 
    | NUMBER { $$ = number($1); } 
    | designator { $$ = $1; } 
    ; 

designator: 
    NAME { $$ = name($1); } 
    ; 
%% 

我還有一個問題, 有以使用Flex /野牛JMP指令就像彙編去像我舉的例子標籤的可能性, 謝謝你的幫助。

:L1 
IF FLAG AND X"0001" 
    EVT 23; 
ELSE 
    WAIT 500 ms; 
    JMP L1; 
END IF; 

回答

30

編輯:我把舊的答案到底

這裏是承諾更詳細的例子:

我通常所需語言的示例文件開始:

# example.toy 
begin # example of the simple toy language 
    x = 23; 
    while x > 0 do begin 
     x = x - 1; 
     print(x*x); 
    end; 
end; 

下一步是創建一個詞法分析器+解析器組合,其中prev通過文件 。

這裏是詞法分析器(使用flex -o lexer.c lexer.l生成源代碼)。還要注意的是,詞法分析器源依賴於解析器源(因爲TOKEN_ *常數),所以野牛必須編譯詞法分析器源之前運行:

%option noyywrap 

%{ 
#include "parser.h" 
#include <stdlib.h> 
%} 

%% 

"while" return TOKEN_WHILE; 
"begin" return TOKEN_BEGIN; 
"end" return TOKEN_END; 
"do" return TOKEN_DO; 
[a-zA-Z_][a-zA-Z0-9_]* {yylval.name = strdup(yytext); return TOKEN_ID;} 
[-]?[0-9]+ {yylval.val = atoi(yytext); return TOKEN_NUMBER;} 
[()=;] {return *yytext;} 
[*/+-<>] {yylval.op = *yytext; return TOKEN_OPERATOR;} 
[ \t\n] {/* suppress the output of the whitespaces from the input file to stdout */} 
#.* {/* one-line comment */} 

和分析器(編譯bison -d -o parser.c parser.y,在-d告訴野牛創造一些東西詞法分析器需要parser.h頭文件)

%error-verbose /* instruct bison to generate verbose error messages*/ 
%{ 
/* enable debugging of the parser: when yydebug is set to 1 before the 
* yyparse call the parser prints a lot of messages about what it does */ 
#define YYDEBUG 1 
%} 

%union { 
    int val; 
    char op; 
    char* name; 
} 

%token TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO TOKEN_ID TOKEN_NUMBER TOKEN_OPERATOR 
%start program 

%{ 
/* Forward declarations */ 
void yyerror(const char* const message); 


%} 

%% 

program: statement';'; 

block: TOKEN_BEGIN statements TOKEN_END; 

statements: 
    | statements statement ';' 
    | statements block';'; 

statement: 
     assignment 
    | whileStmt 
    | block 
    | call; 

assignment: TOKEN_ID '=' expression; 

expression: TOKEN_ID 
    | TOKEN_NUMBER 
    | expression TOKEN_OPERATOR expression; 

whileStmt: TOKEN_WHILE expression TOKEN_DO statement; 

call: TOKEN_ID '(' expression ')'; 

%% 

#include <stdlib.h> 

void yyerror(const char* const message) 
{ 
    fprintf(stderr, "Parse error:%s\n", message); 
    exit(1); 
} 

int main() 
{ 
    yydebug = 0; 
    yyparse(); 
} 

gcc parser.c lexer.c -o toylang-noop後的toylang-noop < example.toy的調用必須沒有任何錯誤運行。所以現在解析器本身可以工作,並且可以解析示例腳本。

下一步是創建語法的所謂抽象語法樹。在這一點上,我首先通過定義不同類型的標記和規則來擴展解析器,並將規則插入到每個解析步驟中。

%error-verbose /* instruct bison to generate verbose error messages*/ 
%{ 
#include "astgen.h" 
#define YYDEBUG 1 

/* Since the parser must return the AST, it must get a parameter where 
* the AST can be stored. The type of the parameter will be void*. */ 
#define YYPARSE_PARAM astDest 
%} 

%union { 
    int val; 
    char op; 
    char* name; 
    struct AstElement* ast; /* this is the new member to store AST elements */ 
} 

%token TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO 
%token<name> TOKEN_ID 
%token<val> TOKEN_NUMBER 
%token<op> TOKEN_OPERATOR 
%type<ast> program block statements statement assignment expression whileStmt call 
%start program 

%{ 
/* Forward declarations */ 
void yyerror(const char* const message); 


%} 

%% 

program: statement';' { (*(struct AstElement**)astDest) = $1; }; 

block: TOKEN_BEGIN statements TOKEN_END{ $$ = $2; }; 

statements: {$$=0;} 
    | statements statement ';' {$$=makeStatement($1, $2);} 
    | statements block';' {$$=makeStatement($1, $2);}; 

statement: 
     assignment {$$=$1;} 
    | whileStmt {$$=$1;} 
    | block {$$=$1;} 
    | call {$$=$1;} 

assignment: TOKEN_ID '=' expression {$$=makeAssignment($1, $3);} 

expression: TOKEN_ID {$$=makeExpByName($1);} 
    | TOKEN_NUMBER {$$=makeExpByNum($1);} 
    | expression TOKEN_OPERATOR expression {$$=makeExp($1, $3, $2);} 

whileStmt: TOKEN_WHILE expression TOKEN_DO statement{$$=makeWhile($2, $4);}; 

call: TOKEN_ID '(' expression ')' {$$=makeCall($1, $3);}; 

%% 

#include "astexec.h" 
#include <stdlib.h> 

void yyerror(const char* const message) 
{ 
    fprintf(stderr, "Parse error:%s\n", message); 
    exit(1); 
} 

int main() 
{ 
    yydebug = 0; 
    struct AstElement *a; 
    yyparse(&a); 
} 

正如你所看到的,當產生AST是當分析器一定的規則傳遞給創建的 節點的AST主要部分。由於野牛維護 堆棧中的當前解析過程本身的,被只需要來分配 當前解析狀態到堆棧 的元素(這些是$$=foo(bar)線)

的目標是在存儲器中的下列結構:

ekStatements 
    .count = 2 
    .statements 
    ekAssignment 
     .name = "x" 
     .right 
     ekNumber 
      .val = 23 
    ekWhile 
     .cond 
     ekBinExpression 
     .left 
      ekId 
      .name = "x" 
     .right 
      ekNumber 
      .val=0 
     .op = '>' 
     .statements 
     ekAssignment 
      .name = "x" 
      .right 
      ekBinExpression 
       .left 
       ekId 
        .name = "x" 
       .right 
       ekNumber 
        .val = 1 
       .op = '-' 
     ekCall 
      .name = "print" 
      .param 
      ekBinExpression 
       .left 
       ekId 
        .name = "x" 
       .right 
       ekId 
        .name = "x" 
       .op = '*' 

爲了得到這個圖,有需要的生成代碼,astgen.h:

#ifndef ASTGEN_H 
#define ASTGEN_H 

struct AstElement 
{ 
    enum {ekId, ekNumber, ekBinExpression, ekAssignment, ekWhile, ekCall, ekStatements, ekLastElement} kind; 
    union 
    { 
     int val; 
     char* name; 
     struct 
     { 
      struct AstElement *left, *right; 
      char op; 
     }expression; 
     struct 
     { 
      char*name; 
      struct AstElement* right; 
     }assignment; 
     struct 
     { 
      int count; 
      struct AstElement** statements; 
     }statements; 
     struct 
     { 
      struct AstElement* cond; 
      struct AstElement* statements; 
     } whileStmt; 
     struct 
     { 
      char* name; 
      struct AstElement* param; 
     }call; 
    } data; 
}; 

struct AstElement* makeAssignment(char*name, struct AstElement* val); 
struct AstElement* makeExpByNum(int val); 
struct AstElement* makeExpByName(char*name); 
struct AstElement* makeExp(struct AstElement* left, struct AstElement* right, char op); 
struct AstElement* makeStatement(struct AstElement* dest, struct AstElement* toAppend); 
struct AstElement* makeWhile(struct AstElement* cond, struct AstElement* exec); 
struct AstElement* makeCall(char* name, struct AstElement* param); 
#endif 

astgen。C:

#include "astgen.h" 
#include <stdio.h> 
#include <stdlib.h> 
#include <assert.h> 

static void* checkAlloc(size_t sz) 
{ 
    void* result = calloc(sz, 1); 
    if(!result) 
    { 
     fprintf(stderr, "alloc failed\n"); 
     exit(1); 
    } 
} 

struct AstElement* makeAssignment(char*name, struct AstElement* val) 
{ 
    struct AstElement* result = checkAlloc(sizeof(*result)); 
    result->kind = ekAssignment; 
    result->data.assignment.name = name; 
    result->data.assignment.right = val; 
    return result; 
} 

struct AstElement* makeExpByNum(int val) 
{ 
    struct AstElement* result = checkAlloc(sizeof(*result)); 
    result->kind = ekNumber; 
    result->data.val = val; 
    return result; 
} 

struct AstElement* makeExpByName(char*name) 
{ 
    struct AstElement* result = checkAlloc(sizeof(*result)); 
    result->kind = ekId; 
    result->data.name = name; 
    return result; 
} 

struct AstElement* makeExp(struct AstElement* left, struct AstElement* right, char op) 
{ 
    struct AstElement* result = checkAlloc(sizeof(*result)); 
    result->kind = ekBinExpression; 
    result->data.expression.left = left; 
    result->data.expression.right = right; 
    result->data.expression.op = op; 
    return result; 
} 

struct AstElement* makeStatement(struct AstElement* result, struct AstElement* toAppend) 
{ 
    if(!result) 
    { 
     result = checkAlloc(sizeof(*result)); 
     result->kind = ekStatements; 
     result->data.statements.count = 0; 
     result->data.statements.statements = 0; 
    } 
    assert(ekStatements == result->kind); 
    result->data.statements.count++; 
    result->data.statements.statements = realloc(result->data.statements.statements, result->data.statements.count*sizeof(*result->data.statements.statements)); 
    result->data.statements.statements[result->data.statements.count-1] = toAppend; 
    return result; 
} 

struct AstElement* makeWhile(struct AstElement* cond, struct AstElement* exec) 
{ 
    struct AstElement* result = checkAlloc(sizeof(*result)); 
    result->kind = ekWhile; 
    result->data.whileStmt.cond = cond; 
    result->data.whileStmt.statements = exec; 
    return result; 
} 

struct AstElement* makeCall(char* name, struct AstElement* param) 
{ 
    struct AstElement* result = checkAlloc(sizeof(*result)); 
    result->kind = ekCall; 
    result->data.call.name = name; 
    result->data.call.param = param; 
    return result; 
} 

你可以在這裏看到,AST元素的生成是一個相當單調 工作。該步驟完成後,程序仍然沒有任何操作,但AST可以在調試器中查看 。

下一步是編寫解釋器。這是astexec.h:

#ifndef ASTEXEC_H 
#define ASTEXEC_H 

struct AstElement; 
struct ExecEnviron; 

/* creates the execution engine */ 
struct ExecEnviron* createEnv(); 

/* removes the ExecEnviron */ 
void freeEnv(struct ExecEnviron* e); 

/* executes an AST */ 
void execAst(struct ExecEnviron* e, struct AstElement* a); 

#endif 

那麼,這看起來很友好。口譯員本身很簡單,儘管它的長度爲 。大多數函數只處理特定種類的AstElement。通過dispatchExpression和dispatchStatement 函數選擇 正確的函數。調度函數在valExecs 和runExecs數組中查找目標函數。

astexec.c:

#include "astexec.h" 
#include "astgen.h" 
#include <stdlib.h> 
#include <assert.h> 
#include <stdio.h> 

struct ExecEnviron 
{ 
    int x; /* The value of the x variable, a real language would have some name->value lookup table instead */ 
}; 

static int execTermExpression(struct ExecEnviron* e, struct AstElement* a); 
static int execBinExp(struct ExecEnviron* e, struct AstElement* a); 
static void execAssign(struct ExecEnviron* e, struct AstElement* a); 
static void execWhile(struct ExecEnviron* e, struct AstElement* a); 
static void execCall(struct ExecEnviron* e, struct AstElement* a); 
static void execStmt(struct ExecEnviron* e, struct AstElement* a); 

/* Lookup Array for AST elements which yields values */ 
static int(*valExecs[])(struct ExecEnviron* e, struct AstElement* a) = 
{ 
    execTermExpression, 
    execTermExpression, 
    execBinExp, 
    NULL, 
    NULL, 
    NULL, 
    NULL 
}; 

/* lookup array for non-value AST elements */ 
static void(*runExecs[])(struct ExecEnviron* e, struct AstElement* a) = 
{ 
    NULL, /* ID and numbers are canonical and */ 
    NULL, /* don't need to be executed */ 
    NULL, /* a binary expression is not executed */ 
    execAssign, 
    execWhile, 
    execCall, 
    execStmt, 
}; 

/* Dispatches any value expression */ 
static int dispatchExpression(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(a); 
    assert(valExecs[a->kind]); 
    return valExecs[a->kind](e, a); 
} 

static void dispatchStatement(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(a); 
    assert(runExecs[a->kind]); 
    runExecs[a->kind](e, a); 
} 

static void onlyName(const char* name, const char* reference, const char* kind) 
{ 
    if(strcmp(reference, name)) 
    { 
     fprintf(stderr, 
      "This language knows only the %s '%s', not '%s'\n", 
      kind, reference, name); 
     exit(1); 
    } 
} 

static void onlyX(const char* name) 
{ 
    onlyName(name, "x", "variable"); 
} 

static void onlyPrint(const char* name) 
{ 
    onlyName(name, "print", "function"); 
} 

static int execTermExpression(struct ExecEnviron* e, struct AstElement* a) 
{ 
    /* This function looks ugly because it handles two different kinds of 
    * AstElement. I would refactor it to an execNameExp and execVal 
    * function to get rid of this two if statements. */ 
    assert(a); 
    if(ekNumber == a->kind) 
    { 
     return a->data.val; 
    } 
    else 
    { 
     if(ekId == a->kind) 
     { 
      onlyX(a->data.name); 
      assert(e); 
      return e->x; 
     } 
    } 
    fprintf(stderr, "OOPS: tried to get the value of a non-expression(%d)\n", a->kind); 
    exit(1); 
} 

static int execBinExp(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(ekBinExpression == a->kind); 
    const int left = dispatchExpression(e, a->data.expression.left); 
    const int right = dispatchExpression(e, a->data.expression.right); 
    switch(a->data.expression.op) 
    { 
     case '+': 
      return left + right; 
     case '-': 
      return left - right; 
     case '*': 
      return left * right; 
     case '<': 
      return left < right; 
     case '>': 
      return left > right; 
     default: 
      fprintf(stderr, "OOPS: Unknown operator:%c\n", a->data.expression.op); 
      exit(1); 
    } 
    /* no return here, since every switch case returns some value (or bails out) */ 
} 

static void execAssign(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(a); 
    assert(ekAssignment == a->kind); 
    onlyX(a->data.assignment.name); 
    assert(e); 
    struct AstElement* r = a->data.assignment.right; 
    e->x = dispatchExpression(e, r); 
} 

static void execWhile(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(a); 
    assert(ekWhile == a->kind); 
    struct AstElement* const c = a->data.whileStmt.cond; 
    struct AstElement* const s = a->data.whileStmt.statements; 
    assert(c); 
    assert(s); 
    while(dispatchExpression(e, c)) 
    { 
     dispatchStatement(e, s); 
    } 
} 

static void execCall(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(a); 
    assert(ekCall == a->kind); 
    onlyPrint(a->data.call.name); 
    printf("%d\n", dispatchExpression(e, a->data.call.param)); 
} 

static void execStmt(struct ExecEnviron* e, struct AstElement* a) 
{ 
    assert(a); 
    assert(ekStatements == a->kind); 
    int i; 
    for(i=0; i<a->data.statements.count; i++) 
    { 
     dispatchStatement(e, a->data.statements.statements[i]); 
    } 
} 

void execAst(struct ExecEnviron* e, struct AstElement* a) 
{ 
    dispatchStatement(e, a); 
} 

struct ExecEnviron* createEnv() 
{ 
    assert(ekLastElement == (sizeof(valExecs)/sizeof(*valExecs))); 
    assert(ekLastElement == (sizeof(runExecs)/sizeof(*runExecs))); 
    return calloc(1, sizeof(struct ExecEnviron)); 
} 

void freeEnv(struct ExecEnviron* e) 
{ 
    free(e); 
} 

現在的解釋是齊全,例如可以運行,其主要功能是更新後:

#include <assert.h> 

int main() 
{ 
    yydebug = 0; 
    struct AstElement *a = 0; 
    yyparse(&a); 
    /* Q&D WARNING: in production code this assert must be replaced by 
    * real error handling. */ 
    assert(a); 
    struct ExecEnviron* e = createEnv(); 
    execAst(e, a); 
    freeEnv(e); 
    /* TODO: destroy the AST */ 
} 

現在解釋了這種語言的作品。需要注意的是有這個解釋中的一些限制:

  • 它具有價值
  • 只有一個變量和一個功能
    • ,只有一個參數的函數
  • 只有類型爲int
  • 很難添加goto支持,因爲對於每個AST元素,解釋器都會調用解釋函數。轉到可以在一個塊內通過將某些東西轉入execStmt函數來實現,但是爲了在不同的塊或層之間跳轉,執行機構必須顯着改變(這是因爲不能在解釋器中的不同棧幀之間跳轉)。例如,AST可以轉換爲字節碼,並且該字節碼由vm解釋。
  • 一些其他的,我需要查找:)

您需要定義語法的語言。像這樣的一些東西(包括詞法和語法分析器不完整):

 
/* foo.y */ 
%token ID IF ELSE OR AND /* First list all terminal symbols of the language */ 
%% 

statements: /* allow empty statements */ | stm | statements ';' stm; 

stm: ifStatement 
    | NAME 
    | NAME expList 
    | label; 

expList: expression | expList expression; 

label: ':' NAME { /* code to store the label */ }; 

ifStatement: IF expression statements 
      | IF expression statements ELSE statements; 

expression: ID       { /* Code to handle the found ID */ } 
      | expression AND expression { /* Code to con cat two expression with and */ } 
      | expression OR expression 
      | '(' expression ')'; 

然後你編譯該文件與bison -d foo.y -o foo.c-d開關指示野牛生成包含解析器使用的所有標記的標題。現在,您創建詞法分析器

 
/* bar.l */ 
%{ 
#include "foo.h" 
%} 

%% 

IF return IF; 
ELSE return ELSE; 
OR return OR; 
AND return AND; 
[A-Z]+ { /*store yylval somewhere to access it in the parser*/ return ID; } 

此之後,你有你的詞法和語法分析器完成,「只」需要編寫語言的語義動作。

+0

hallo感謝您的回答,其真正有幫助 但我沒有一個想法我如何編寫代碼來存儲標籤,你能給我任何想法,你解釋一些你寫的代碼例如「語句或「清單」。謝謝你 – Imran 2010-04-16 09:04:14

+0

我試着在這個週末建立一個更好的例子,期待它在星期一或星期二 – Rudi 2010-04-16 11:08:09

+0

「grammar」,而不是「grammer」 – Viet 2010-04-19 02:15:23