我正在嘗試使用Flex和Bison爲簡單格式化語言構建編譯器。我仍然處於開始階段,我編寫了一些功能。爲什麼野牛(Yacc)打印新行顯然沒有理由?
儘管在這個階段,我還沒有在任何地方打印任何東西。我有一些錯誤的情況下輸出文件中打印的東西,但顯然不會發生這種輸入。我所有的其他打印語句都將打印到控制檯。所以,我預計輸出文件將是完全空的。然而,當我嘗試使用下面我的輸入文件:
\begin {document}
\tabsize(5)
\title{"Why I Love Compiler Design"}
\author{"COMP421 Student"}
\date{29/12/2016}
\pagesetup{30,100 }
\end{document}
生成的輸出文件是:
有9個空行,相當於9條線,我在我的輸入文件。然而我期望的輸出只有1個空行。
這是我.L文件:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"
void yyerror(const char *);
int yylex(void);
/* "Connect" with the output file */
extern FILE *yyout;
extern int yyparse();
%}
/* Allows printing the line number (of an error) */
%option yylineno
%%
^\\ { printf("LEX returned token BSLASH\n"); return BSLASH; } /* every backslash has to be at the start of a line */
\{ { printf("LEX returned token LBRACE\n"); return LBRACE; }
\} { printf("LEX returned token RBRACE\n"); return RBRACE; }
\( { printf("LEX returned token LPAREN\n"); return LPAREN; }
\) { printf("LEX returned token RPAREN\n"); return RPAREN; }
, { printf("LEX returned token COMMA\n"); return COMMA; }
begin { printf("LEX returned token BEGIN_\n"); return BEGIN_; } /* Note the use of \ in this and other regex expressions to escape the following symbols: \, {, }, (,), */
end { printf("LEX returned token END\n"); return END; }
document { printf("LEX returned token DOCUMENT\n"); return DOCUMENT; }
pagesetup { printf("LEX returned token PAGESETUP\n"); return PAGESETUP; }
tabsize { printf("LEX returned token TABSIZE\n"); return TABSIZE; }
title { printf("LEX returned token TITLE\n"); return TITLE; }
author { printf("LEX returned token AUTHOR\n"); return AUTHOR; }
date { printf("LEX returned token DATE\n"); return DATE; }
(((0[1-9]|[12][0-9]|30)[-/ ]?(0[13-9]|1[012])|31[-/ ]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/ ]?02)[-/ ]?[0-9]{4}|29[-/ ]?02[-/ ]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00)) { printf("LEX returned token DDMMYYYYDATE\n"); yylval.sValue = yytext; return DDMMYYYYDATE; }
[0-9]*[1-9][0-9]* { printf("LEX returned token INTEGER\n"); yylval.iValue = atoi(yytext); return INTEGER; }
\".*\" { printf("LEX returned token STRING\n"); yylval.sValue = yytext; return STRING; }
/* skip whitespace which is not part of a string */
[ \t] ;
/* anything else is an error */
. yyerror("invalid character");
%%
int main(int argc, char *argv[]) {
if (argc != 3)
yyerror("ERROR You need 2 args: inputFileName outputFileName");
else {
yyin = fopen(argv[1], "r");
yyout = fopen(argv[2], "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
return 0;
}
這是我.Y文件:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"
void yyerror(const char *);
int yylex(void);
/* "Connect" with the output file */
extern FILE *yyout;
extern int yylineno;
/* An array with counters of how many times each of the 5 document properties appears in the input file. The order of the properties is defined in the enum below */
int docPropertyCounters[5];
/* An enumerated list with the 5 document properties */
typedef enum {PAGE_SETUP, TAB_SIZE, DOC_TITLE, DOC_AUTHOR, DOC_DATE} document_property;
/* Takes an integer and returns the corresponding document property as a string. The order is based on the enum. */
static inline char *stringFromDocPropertyEnum(document_property indexOfProperty) {
static char *strings[] = { "\\pagesetup{}", "\\tabsize()", "\\title{}", "\\author{}", "\\date{}"};
return strings[indexOfProperty];
}
%}
%union {
int iValue; /* integer value */
char* sValue; /* C-String */
};
%start file /* defining the start condition */
%token BSLASH LBRACE RBRACE LPAREN RPAREN COMMA
%token BEGIN_ END DOCUMENT /* BEGIN seems to be a reserved word so BEGIN_ was used instead */
%token PAGESETUP TABSIZE TITLE AUTHOR DATE
%token <iValue> INTEGER
%token <sValue> DDMMYYYYDATE STRING
%%
file: beginDocument docProperties endDocument
{
/* Checks for all possible errors in document properties */
for (int i = 0; i < sizeof(docPropertyCounters)/sizeof(docPropertyCounters[0]); i++)
if (docPropertyCounters[i] < 1)
/* yyerror() is not used in this function because the line number does not need to be shown */
fprintf(stderr, "SYNTAX ERROR: Your source file does not contain the required document property %s", stringFromDocPropertyEnum(i));
else if (docPropertyCounters[i] > 1)
fprintf(stderr, "SYNTAX ERROR: Your source file contains more than one instance of the document property %s", stringFromDocPropertyEnum(i));
}
| /* An empty document is parsed to an empty document, no errors generated*/
;
beginDocument: BSLASH BEGIN_ LBRACE DOCUMENT RBRACE;
docProperties: docProperties docProperty
| /* empty */
;
/* required properties... there should be one instance of each in the input file */
docProperty: pageSetupProperty { docPropertyCounters[PAGE_SETUP]++; }
| tabSizeProperty { docPropertyCounters[TAB_SIZE]++; }
| titleProperty { docPropertyCounters[DOC_TITLE]++; }
| authorProperty { docPropertyCounters[DOC_AUTHOR]++; }
| dateProperty { docPropertyCounters[DOC_DATE]++; }
;
pageSetupProperty: BSLASH PAGESETUP LBRACE INTEGER COMMA INTEGER RBRACE;
tabSizeProperty: BSLASH TABSIZE LPAREN INTEGER RPAREN;
titleProperty: BSLASH TITLE LBRACE STRING RBRACE;
authorProperty: BSLASH AUTHOR LBRACE STRING RBRACE;
dateProperty: BSLASH DATE LBRACE DDMMYYYYDATE RBRACE;
endDocument: BSLASH END LBRACE DOCUMENT RBRACE;
%%
int yywrap(void) {
return 1;
}
void yyerror(const char* str)
{
fprintf(stderr,"SYNTAX ERROR near line [%d]: %s\n",yylineno, str);
}
PS:我使用Windows 10和比較舊版本的Flex(2.5.4a )。
事實並非如此。這是通過* lex *或* flex *完成的,無論您使用何種方式。 – EJP