2013-05-22 34 views
0

當我使用伊恩Piumarta的PEG /腿軟件來解析我工作的一個玩具語言。不幸的是一些投入解析器是導致飛機墜毀的是鏘的地址消毒劑說是由堆緩衝區溢出引起的。這裏的輸出我得到與代碼相關線路:緩衝區溢出使用PEG /腿

==27761== ERROR: AddressSanitizer: heap-buffer-overflow on address 0x7f051b06c398 at pc 0x419e18 bp 0x7fffd2223780 sp 0x7fffd2223778 
WRITE of size 8 at 0x7f051b06c398 thread T0 
    #0 0x419e17 in yySet grammar.c:257 
    #1 0x41767b in yyDone grammar.c:224 
    #2 0x41745e in yyparsefrom grammar.c:2061 
    #3 0x418dec in xr_parse_dump_from_stdin grammar.c:2129 
    #4 0x42a6d9 in main parse_dump.c:19 
    #5 0x7f051b09652c in ?? ??:0 
0x7f051b06c398 is located 88 bytes to the right of 256-byte region [0x7f051b06c240,0x7f051b06c340) 
allocated by thread T0 here: 
    #0 0x42f4e0 in malloc ??:0 
    #1 0x417234 in yyparsefrom grammar.c:2054 
    #2 0x418dec in xr_parse_dump_from_stdin grammar.c:2129 
    #3 0x42a6d9 in main parse_dump.c:19 
    #4 0x7f051b09652c in ?? ??:0 
Shadow byte and word: 
    0x1fe0a360d873: fa 
    0x1fe0a360d870: fa fa fa fa fa fa fa fa 
More shadow bytes: 
    0x1fe0a360d850: 00 00 00 00 00 00 00 00 
    0x1fe0a360d858: 00 00 00 00 00 00 00 00 
    0x1fe0a360d860: 00 00 00 00 00 00 00 00 
    0x1fe0a360d868: fa fa fa fa fa fa fa fa 
=>0x1fe0a360d870: fa fa fa fa fa fa fa fa 
    0x1fe0a360d878: fa fa fa fa fa fa fa fa 
    0x1fe0a360d880: fa fa fa fa fa fa fa fa 
    0x1fe0a360d888: fa fa fa fa fa fa fa fa 
    0x1fe0a360d890: fa fa fa fa fa fa fa fa 
Stats: 0M malloced (0M for red zones) by 136 calls 
Stats: 0M realloced by 11 calls 
Stats: 0M freed by 15 calls 
Stats: 0M really freed by 0 calls 
Stats: 3M (898 full pages) mmaped in 7 calls 
    mmaps by size class: 7:4095; 8:2047; 9:1023; 10:511; 11:255; 12:128; 13:64; 
    mallocs by size class: 7:112; 8:2; 9:3; 10:14; 11:3; 12:1; 13:1; 
    frees by size class: 7:8; 8:2; 9:2; 10:1; 11:1; 12:1; 
    rfrees by size class: 
Stats: malloc large: 0 small slow: 7 
==27761== ABORTING 

這裏就是內存在grammar.c分配(從我grammar.leg規範生成)

yyctx->valslen= 32; 
    yyctx->vals= (YYSTYPE *)malloc(sizeof(YYSTYPE) * yyctx->valslen); 

然後訪問這裏:

YY_LOCAL(void) yySet(yycontext *ctx, char *text, int count) { ctx->val[count]= ctx->yy; } 

在分析之前,似乎yyctx-> val設置爲yyctx-> vals。

我假設這是說我正在訪問一個解析器變量,這個變量是由一些規則沒有被正確定義或者什麼東西生成的,但是我在我的語法中看不到任何這樣的實例(這當然有點被打了一巴掌再加上幾乎可以肯定的其他問題)。希望有人能夠至少讓我朝着正確的方向前進。謝謝!

編輯:爲了解決達亞爾RAI的問題,並希望提供更多的有用信息......如果我使用此代碼它通常溢出:

printf("COUNT %d w/ TEXT: %s\n", count, text); 
fflush(stdout); 
ctx->val[count]= ctx->yy; 

這個解析器輸入:

#!./parse_dump 
object $root; 
method foo() 
{ 
    var i = 0; 

    while (i < 10) { 
     i = i + 1 
    } 
} 

我緩衝區溢出之前得到這樣的輸出:

COUNT -2 w/ TEXT: root 
COUNT -3 w/ TEXT: foo 
COUNT -2 w/ TEXT: foo 
COUNT -5 w/ TEXT: i 
COUNT -2 w/ TEXT: 0 
COUNT -1 w/ TEXT: 0 
COUNT -2 w/ TEXT: 0 
COUNT -2 w/ TEXT: 0 
COUNT -3 w/ TEXT: 0 
COUNT -2 w/ TEXT: 
COUNT -2 w/ TEXT: 
COUNT -6 w/ TEXT: 
COUNT -2 w/ TEXT: 
COUNT -2 w/ TEXT: i 
COUNT -1 w/ TEXT: i 
COUNT -2 w/ TEXT: i 
COUNT -2 w/ TEXT: i 
COUNT -3 w/ TEXT: i 
COUNT -2 w/ TEXT: 
COUNT -2 w/ TEXT: 10 
COUNT -1 w/ TEXT: 10 
COUNT -2 w/ TEXT: 10 
COUNT -2 w/ TEXT: 10 
COUNT -3 w/ TEXT: 10 
COUNT -1 w/ TEXT: 
COUNT -2 w/ TEXT: 
COUNT -6 w/ TEXT: 
COUNT -5 w/ TEXT: i 
COUNT -2 w/ TEXT: i 

我真的不明白是怎麼MEM ORY應該在這裏擺出來,但其解析某些程序就好了,如果我沒有趕上鏘這個錯誤,如果它不能最終破壞其他一些重要的內存和其他地方造成段錯誤。如果我已經在這裏拋棄了所有這些文本,我想我應該也包括語法。這是一種狡猾的,主要是因爲我還沒有完全確定什麼語言即將結束看起來像,但在這裏我們去:

%{ 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 

#include "internal.h" 
#include "elixr.h" 
#include "compile.h" 
#include "types.h" 

#define YYSTYPE XR 
#define YY_CTX_LOCAL 
#define YY_CTX_MEMBERS XR src; XR method; XR object; XR dump; 

unsigned int lineNumber; 

#define YY_INPUT(buf, result, max)  \ 
    {      \ 
    int c= getc(stdin);    \ 
    if ('\n' == c || '\r' == c) ++lineNumber; \ 
    result= (EOF == c) ? 0 : (*(buf)= c, 1); \ 
    } 


#define XR_AST1(type, a)  ast_node(AST_##type, a, VAL_NIL, VAL_NIL) 
#define XR_AST2(type, a, b) ast_node(AST_##type, a, b, VAL_NIL) 
#define XR_AST3(type, a, b, c) ast_node(AST_##type, a, b, c) 
%} 

Dump   =  -- o1:Object   { $$ = o1 = list_new(o1); } 
        (-- o2:Object   { $$ = o1 = list_append(0, o1, o2); } 
        )*      { ctx->dump = o1;} 

Object   = "object" - '$' i:ID ';' - EOL {ctx->object = (i == xr_sym("root")) ? root : object_new(0, i); } 
        (m:Method { object_add_method(ctx->object, m); })* 
        { $$ = ctx->object; } 

Method = -- "method" - i:ID a:Params -- s:Code { struct XRMethod *m = xr_method_new(i, a); printf("CODE\n-----\n"); qsend(s, "source", val_num(0)); xr_ast_compile(s, m); m->object = ctx->object; $$ = ctx->method = (XR)m; assert(ctx->object); } 

Params  = '(' - ')' { $$ = list_empty(); } 
      | '(' - p1:ID { p1 = list_new(p1); } 
      (',' - p2:ID { p1 = list_append(0, p1, p2); } 
      )* ')' { $$ = p1; } 

Code  = SBLOCK -- s:Statements -- CBLOCK { $$ = XR_AST1(CODE, s);} 

Statements = (s1:Stmt   { s1 = list_new(s1); } 
       (SEP s2:Stmt  { s1 = list_append(0, s1, s2); })* SEP? 
       ) { $$ = s1; } 

Stmt  = e:BitAndOr    { $$ = XR_AST1(EXPRSTMT, e); } 
      | VAR i:ID !ASSIGN  { $$ = XR_AST1(VDECL, i); } 
      | VAR i:ID ASSIGN e:BitAndOr{ $$ = XR_AST2(VINIT, i, e); } 
      | i:ID ASSIGN s:BitAndOr { $$ = XR_AST2(ASSIGN, i, s); } 
      | c:Code    { $$ = c; } 
      | "if" - '(' - e:BitAndOr - ')' -- t:Stmt -- "else" -- f:Stmt { $$ = XR_AST3(IFELSE, e, t, f); } 
      | "if" - '(' - e:BitAndOr - ')' -- c:Stmt { $$ = XR_AST2(IF, e, c); } 
      | "while" - '(' - e:BitAndOr - ')' -- c:Stmt { $$ = XR_AST2(WHILE, e, c); } 
      | "debug" { $$ = XR_AST1(DEBUG, 0); } 
      | "print" - s:BitAndOr { $$ = XR_AST1(PRINT, s); } 
      #| "do" -- c:Stmt "while" - '(' - e:Cmp - ')' { $$ = XR_AST2(DOWHILE, e, c; } 

BitAndOr  = s:Cmp 
       (AND s2:Cmp { s = XR_AST2(AND, s, s2); } 
       | OR s2:Cmp { s = XR_AST2(OR, s, s2); } 
      )* { $$ = s; } 

Cmp   = s:Msg 
       (EQ s2:Msg { s = XR_AST2(EQ, s, s2); } 
       | NEQ s2:Msg { s = XR_AST2(NEQ, s, s2); } 
       | GT s2:Msg { s = XR_AST2(GT, s, s2); } 
       | LT s2:Msg { s = XR_AST2(LT, s, s2); } 
       | GTE s2:Msg { s = XR_AST2(GTE, s, s2); } 
       | LTE s2:Msg { s = XR_AST2(LTE, s, s2); } 
      )*    { $$ = s; } 

Msg   = s:Sum 
       (i:ID    { s = XR_AST3(SEND, s, XR_AST1(SYMBOL, i), VAL_NIL);} 
       | i:ID a:Arguments { s = XR_AST3(SEND, s, XR_AST1(SYMBOL, i), a); } 
      )*     { $$ = s; } 

Arguments = OPEN CLOSE   - { $$ = VAL_NIL; } 
      | '(' - l:ExprList ')' - { $$ = l; } 

List = '[' -- ']' - { puts("EMPTY LSIT\n"); $$ = list_empty(); } 
    | '[' -- l:ExprList -- ']' - { $$ = l; } 

ExprList = h:Cmp { h = list_new(h); } (',' -- t:Cmp { list_append(0, h, t);})* { $$ = h; } 


Sum   = l:Product 
       (PLUS r:Product { l = XR_AST2(PLUS, l, r); } 
       | MINUS r:Product { l = XR_AST2(MINUS, l, r); } 
      )*     { $$ = l; } 

Product  = l:BinaryNot 
       (TIMES r:BinaryNot { l = XR_AST2(TIMES, l, r); } 
       | DIVIDE r:BinaryNot { l = XR_AST2(DIVIDE, l, r); } 
      )*     { $$ = l; } 

BinaryNot = v:Value  { $$ = v;} 
     | NOT v:Value { $$ = XR_AST1(NOT, v); } 

#FIXME: sort out values/symbols/ids 
Value  = v:NUMBER   { $$ = XR_AST1(NUMBER, v); } 
      | v:STRING   { $$ = XR_AST1(STRING, v); } 
      | ':' v:ID    { $$ = XR_AST1(SYMBOL, v); } 
      | v:List    { $$ = XR_AST1(LIST, v); } 
      | "false" -   { $$ = XR_AST1(VALUE, VAL_FALSE); } 
      | "true" -   { $$ = XR_AST1(VALUE, VAL_TRUE); } 
      | "nil" -    { $$ = XR_AST1(VALUE, VAL_NIL); } 
      | "self" -   { $$ = XR_AST1(SELF, 0); } 
      | v:ID !ASSIGN  { $$ = XR_AST1(VAR, v); } 
      | OPEN e:BitAndOr CLOSE { $$ = e; } 


NUMBER = FIXNUM | DECIMAL 
FIXNUM = < '-'? [0-9]+ !'.' >   - { $$ = val_num(atoi(yytext)); } 
DECIMAL = < [0-9]* '.'? [0-9]+ >  - { $$ = xr_double(atof(yytext)); } 
ID  = !KEYWORD < [a-z] [a-z0-9_]* > - { $$ = xr_sym_n(yytext, yyleng); } 

#FIXME: escapes 
CHAR = !EOL ('\\' [abefnrtv'"\[\]\\] 
       | '\\' [0-3][0-7][0-7] 
       | '\\' [0-7][0-7]? 
       | !'\\' . 
       ) 
STRING = ["] < (!["] CHAR)* > ["] - { $$ = xr_strn(yytext, yyleng); } 



SEP  = (';' | EOL) -- 

# TODO: sort out keywords 
KEYWORD = "debug" | "nil" | "while" | "self" | "else" | "true" | "false" | "if" |"var" | "print" 

ASSIGN = '=' !'='  - 
PLUS = '+'   - 
MINUS = '-'   - 
TIMES = '*'   - 
DIVIDE = '/'   - 
OPEN = '('   - 
CLOSE = ')'   - 
VAR  = "var"   - 
SBLOCK = '{'   - 
CBLOCK = '}'   - 
NOT  = '!'   - 

EQ  = '=='   - 
NEQ  = '!='   - 
GT  = '>'   - 
LT  = '<'   - 
GTE  = '>='   - 
LTE  = '<='   - 

OR  = '||'   - 
AND  = '&&'   - 

SPACE = ' ' | '\f' | '\v' | '\t' 
COMMENT = '#' (!EOL .)* 
EOL  = '\n' | '\r\n' | '\r' 
-  = (SPACE | COMMENT)* 
--  = (SPACE | COMMENT | EOL)* 
#EOF  = !. 
%% 

void yyerror(char *message, yycontext *yyctx) 
{ 

char *fileName = "<stdin>"; 
FILE *input = stdin; 
    fprintf(stderr, "%s:%d: %s", fileName, lineNumber, message); 
    if (yyctx->text[0]) fprintf(stderr, " near token '%s'", yyctx->text); 
    if (yyctx->pos < yyctx->limit || !feof(input)) 
    { 
     yyctx->buf[yyctx->limit]= '\0'; 
     fprintf(stderr, " before text \""); 
     while (yyctx->pos < yyctx->limit) 
    { 
     if ('\n' == yyctx->buf[yyctx->pos] || '\r' == yyctx->buf[yyctx->pos]) break; 
     fputc(yyctx->buf[yyctx->pos++], stderr); 
    } 
     if (yyctx->pos == yyctx->limit) 
    { 
     int c; 
     while (EOF != (c= fgetc(input)) && '\n' != c && '\r' != c) 
     fputc(c, stderr); 
    } 
     fputc('\"', stderr); 
    } 
    fprintf(stderr, "\n"); 
} 

XR xr_parse_code_from_stdin() 
{ 
    yycontext ctx; 
    memset(&ctx, 0, sizeof(yycontext)); 

    while (yyparsefrom(&ctx, yy_Code)); 


    return ctx.src; 
} 

XR xr_parse_method_from_stdin() 
{ 
    yycontext ctx; 
    memset(&ctx, 0, sizeof(yycontext)); 

    while (yyparsefrom(&ctx, yy_Method)); 

    return ctx.method; 
} 

XR xr_parse_dump_from_stdin() 
{ 
    lineNumber = 1; 

    yycontext ctx; 
    memset(&ctx, 0, sizeof(yycontext)); 

    while (yyparsefrom(&ctx, yy_Dump)); 

    /*yyerror("syntax error", &ctx);*/ 

    return ctx.dump; 
} 
+0

是在訪問的時間大於32計數值? –

+0

@Dayalrai:不是我能說的。在OP中查看我的編輯。 – jenga

回答

1

看來,問題是,我的語法是造成太多的遞歸調用和yyctx->valslen= 32;是不夠的,以防止它。腿部沒有任何邊界檢查,所以我只是假定它會沒事的。我雖然vals內存剛剛舉行的語法規則使用的變量,我只有那些〜每個規則1-5。 This guy得到PEG /腿的基礎上的窗口,並且還通過包括用於一個更大vals陣列以及一個斷言來檢查邊界在yyPush一個#define固定這個錯誤。多謝,夥計!