2016-12-30 91 views
2

我正在嘗試使用Flex和Bison爲簡單格式化語言構建編譯器。我仍然處於開始階段,我編寫了一些功能。爲什麼野牛(Yacc)打印新行顯然沒有理由?

儘管在這個階段,我還沒有在任何地方打印任何東西。我有一些錯誤的情況下輸出文件中打印的東西,但顯然不會發生這種輸入。我所有的其他打印語句都將打印到控制檯。所以,我預計輸出文件將是完全空的。然而,當我嘗試使用下面我的輸入文件:

\begin {document} 

\tabsize(5) 
\title{"Why I Love Compiler Design"} 
\author{"COMP421 Student"} 
\date{29/12/2016} 
\pagesetup{30,100 } 

\end{document} 

生成的輸出文件是:

enter image description here

有9個空行,相當於9條線,我在我的輸入文件。然而我期望的輸出只有1個空行。

這是我.L文件:

%{ 
    #include <stdio.h> 
    #include <stdlib.h> 
    #include <string.h> 
    #include "y.tab.h" 
    void yyerror(const char *); 
    int yylex(void); 
    /* "Connect" with the output file */ 
    extern FILE *yyout; 
    extern int yyparse(); 
%} 

/* Allows printing the line number (of an error) */ 
%option yylineno 

%% 

^\\ { printf("LEX returned token BSLASH\n"); return BSLASH; } /* every backslash has to be at the start of a line */ 
\{ { printf("LEX returned token LBRACE\n"); return LBRACE; } 
\} { printf("LEX returned token RBRACE\n"); return RBRACE; } 
\( { printf("LEX returned token LPAREN\n"); return LPAREN; } 
\) { printf("LEX returned token RPAREN\n"); return RPAREN; } 
, { printf("LEX returned token COMMA\n"); return COMMA; } 

begin { printf("LEX returned token BEGIN_\n"); return BEGIN_; } /* Note the use of \ in this and other regex expressions to escape the following symbols: \, {, }, (,), */ 
end  { printf("LEX returned token END\n"); return END; } 
document { printf("LEX returned token DOCUMENT\n"); return DOCUMENT; } 

pagesetup { printf("LEX returned token PAGESETUP\n"); return PAGESETUP; } 
tabsize { printf("LEX returned token TABSIZE\n"); return TABSIZE; } 
title  { printf("LEX returned token TITLE\n"); return TITLE; } 
author { printf("LEX returned token AUTHOR\n"); return AUTHOR; } 
date  { printf("LEX returned token DATE\n"); return DATE; } 

(((0[1-9]|[12][0-9]|30)[-/ ]?(0[13-9]|1[012])|31[-/ ]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/ ]?02)[-/ ]?[0-9]{4}|29[-/ ]?02[-/ ]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00)) { printf("LEX returned token DDMMYYYYDATE\n"); yylval.sValue = yytext; return DDMMYYYYDATE; } 
[0-9]*[1-9][0-9]* { printf("LEX returned token INTEGER\n"); yylval.iValue = atoi(yytext); return INTEGER; } 
\".*\"    { printf("LEX returned token STRING\n"); yylval.sValue = yytext; return STRING; } 

    /* skip whitespace which is not part of a string */ 
[ \t] ; 

    /* anything else is an error */ 
. yyerror("invalid character"); 

%% 

int main(int argc, char *argv[]) { 
    if (argc != 3) 
     yyerror("ERROR You need 2 args: inputFileName outputFileName"); 
    else { 
     yyin = fopen(argv[1], "r"); 
     yyout = fopen(argv[2], "w"); 
     yyparse(); 
     fclose(yyin); 
     fclose(yyout); 
    } 

    return 0; 
} 

這是我.Y文件:

%{ 
    #include <stdio.h> 
    #include <stdlib.h> 
    #include <string.h> 
    #include "y.tab.h" 
    void yyerror(const char *); 
    int yylex(void); 

    /* "Connect" with the output file */ 
    extern FILE *yyout; 
    extern int yylineno; 

    /* An array with counters of how many times each of the 5 document properties appears in the input file. The order of the properties is defined in the enum below */ 
    int docPropertyCounters[5]; 

    /* An enumerated list with the 5 document properties */ 
    typedef enum {PAGE_SETUP, TAB_SIZE, DOC_TITLE, DOC_AUTHOR, DOC_DATE} document_property; 

    /* Takes an integer and returns the corresponding document property as a string. The order is based on the enum. */ 
    static inline char *stringFromDocPropertyEnum(document_property indexOfProperty) { 
     static char *strings[] = { "\\pagesetup{}", "\\tabsize()", "\\title{}", "\\author{}", "\\date{}"}; 
     return strings[indexOfProperty]; 
    } 
%} 

%union { 
    int iValue;  /* integer value */ 
    char* sValue;  /* C-String */ 
}; 

%start file /* defining the start condition */ 

%token BSLASH LBRACE RBRACE LPAREN RPAREN COMMA 

%token BEGIN_ END DOCUMENT /* BEGIN seems to be a reserved word so BEGIN_ was used instead */ 

%token PAGESETUP TABSIZE TITLE AUTHOR DATE 

%token <iValue> INTEGER 

%token <sValue> DDMMYYYYDATE STRING 

%% 

file: beginDocument docProperties endDocument 
      { 
       /* Checks for all possible errors in document properties */ 
       for (int i = 0; i < sizeof(docPropertyCounters)/sizeof(docPropertyCounters[0]); i++) 
        if (docPropertyCounters[i] < 1) 
         /* yyerror() is not used in this function because the line number does not need to be shown */ 
         fprintf(stderr, "SYNTAX ERROR: Your source file does not contain the required document property %s", stringFromDocPropertyEnum(i)); 
        else if (docPropertyCounters[i] > 1) 
         fprintf(stderr, "SYNTAX ERROR: Your source file contains more than one instance of the document property %s", stringFromDocPropertyEnum(i)); 
      } 
      | /* An empty document is parsed to an empty document, no errors generated*/ 
      ; 

beginDocument: BSLASH BEGIN_ LBRACE DOCUMENT RBRACE; 

docProperties: docProperties docProperty 
       | /* empty */ 
       ;     

    /* required properties... there should be one instance of each in the input file */ 
docProperty: pageSetupProperty { docPropertyCounters[PAGE_SETUP]++; } 
       | tabSizeProperty { docPropertyCounters[TAB_SIZE]++; } 
       | titleProperty { docPropertyCounters[DOC_TITLE]++; } 
       | authorProperty { docPropertyCounters[DOC_AUTHOR]++; } 
       | dateProperty { docPropertyCounters[DOC_DATE]++; } 
       ; 

pageSetupProperty: BSLASH PAGESETUP LBRACE INTEGER COMMA INTEGER RBRACE; 

tabSizeProperty: BSLASH TABSIZE LPAREN INTEGER RPAREN; 

titleProperty: BSLASH TITLE LBRACE STRING RBRACE; 

authorProperty: BSLASH AUTHOR LBRACE STRING RBRACE; 

dateProperty: BSLASH DATE LBRACE DDMMYYYYDATE RBRACE; 

endDocument: BSLASH END LBRACE DOCUMENT RBRACE; 

%% 

int yywrap(void) { 
    return 1; 
} 

void yyerror(const char* str) 
{ 
    fprintf(stderr,"SYNTAX ERROR near line [%d]: %s\n",yylineno, str); 
} 

PS:我使用Windows 10和比較舊版本的Flex(2.5.4a )。

+1

事實並非如此。這是通過* lex *或* flex *完成的,無論您使用何種方式。 – EJP

回答

2

這些行包含回車符和/或換行\r\n,因爲您沒有將它放入空白模式中。

也許你應該有:

[ \t\r\n]  ; 

你也應該小心在規範使用Ç風格的註釋。有時這些被視爲模式。我總是建議學生只在實際的C代碼中放置C樣式註釋。例如,最好這樣做:

[ \t\r\n]  ; /* skip whitespace which is not part of a string */ 

並且從未在別處放置註釋。其他人可能會不同意,但我發現它避免了彎曲和野牛的悲劇。

PS:我沒有測試過您的代碼我的建議....

+0

我剛剛測試了你的建議,它的功能就像是魅力 – hb20007

+1

爲了性能的原因,我會在這裏支持'[\ t \ r \ n] +'。它只執行一次生成的switch語句。 – EJP

+1

將C註釋放入您的fleχ文件*提供它是縮進的。*這是Posix所要求的,我相信它適用於所有lex實現。 Flex有時候可以讓你不加註意地留下評論,但我會避免這些評論。我也強烈推薦'%option nodefault',它會捕獲導致這個問題的錯誤。 – rici