2016-10-05 57 views
1

我有一個小的Brainfuck解釋器,我寫了一段時間;在編譯它時,我注意到gcc的優化開關的輸出大小並不是我所期望的。以下是我編的程序:意外的文件大小,同時變化gcc優化開關

struct node { 
    struct node *prev; 
    int val; 
    struct node *jump; 
    struct node *next; 
}; 
typedef struct node node; 
node *newnode(); 
node *append(node *n); 
node *prepend(node *n); 
void erase(node *n); 
int pop(node *n); 
void doop(node *n); 
node *link(node *n); 

#include <stdlib.h> 

// allocates a new node and sets all the things to zero 
node *newnode() { 
    node *n = malloc(sizeof(node)); 
    n->prev = n->next = n->jump = NULL; 
    n->val = 0; 
    return n; 
} 

// appends a node to a given node. assumes it is an end node 
node *append(node *n) { 
    n->next = newnode(); 
    n->next->prev = n; 
    return n->next; 
} 

// prepend node to list. assumes it is the first node 
node *prepend(node *n) { 
    n->prev = newnode(); 
    n->prev->next = n; 
    return n->prev; 
} 

// navigates to first node, then frees all the nodes, iterating to the end 
void erase(node *n) { 
    node *m; 
    while (n->prev) 
     n = n->prev; 
    while (n) { 
     m = n->next; 
     free(n); 
     n = m; 
    } 
} 

// pops any node and links any connected nodes to each other 
// returns value of erased node 
int pop(node *n) { 
    int ret; 
    if (n->prev) 
     n->prev->next = n->next; 
    if (n->next) 
     n->next->prev = n->prev; 
    ret = n->val; 
    free(n); 
    return ret; 
} 

#include <stdio.h> 

// bf tokens. all other are ignored 
#define LSEEK  '<' 
#define RSEEK  '>' 
#define INCREMENT '+' 
#define DECREMENT '-' 
#define STDOUT  '.' 
#define STDIN  ',' 
#define LBRACKET '[' 
#define RBRACKET ']' 

// memory used by bf program. is this really turing-compliant? 
char mem[30000] = { 0 }; 
// pointer used by bf program 
char *ptr = mem; 

// do operation beginning with given node 
void doop(node *n) { 
    // copy node pointer in case we need the head of the list later 
    node *m = n; 
    // loop while node pointer is a valid one; e.g. stop at EOF 
    while (m) { 
     switch (m->val) { 
      // most of these are pretty self-explanatory 
      case LSEEK: 
       ptr--; 
       break; 
      case RSEEK: 
       ptr++; 
       break; 
      case INCREMENT: 
       (*ptr)++; 
       break; 
      case DECREMENT: 
       (*ptr)--; 
       break; 
      case STDOUT: 
       printf("%c", *ptr); 
       fflush(stdout); 
       break; 
      case STDIN: 
       *ptr = getchar(); 
       break; 
      case LBRACKET: 
       // jump to closing bracket if value at pointer is false 
       if (!*ptr) 
        m = m->jump; 
       break; 
      case RBRACKET: 
       // jump back to opening bracket if value at pointer is true 
       if (*ptr) 
        m = m->jump; 
       break; 
     } 
     // proceed to next instruction 
     m = m->next; 
    } 
} 

// finds and references each bracket instruction to its corresponding bracket 
node *link(node *n) { 
    // make a copy of the list head 
    node *m = n; 
    // make a temporary list to contain bracket links 
    node *links = newnode(); 
    // while a valid node 
    while (m) { 
     // switch to bracket type 
     switch (m->val) { 
      case LBRACKET: 
       // this is an opening bracket, so we temporarily store it's 
       // location, and append the list as it grows 
       if (links->jump) 
        links = append(links); 
       links->jump = m; 
       break; 
      case RBRACKET: 
       // this is the closing bracket, so we save the temporarily 
       // stored link address to the closing bracket node, and 
       // connect the opening bracket node to the closing also; 
       // popping the list as we no longer need the data 
       m->jump = links->jump; 
       links->jump->jump = m; 
       if (links->prev) { 
        links = links->prev; 
        pop(links->next); 
       } 
       break; 
     } 
     // increment to next character 
     m = m->next; 
    } 
    // erase all the nodes in the temporary linked list 
    erase(links); 
    // return the head of the list 
    return n; 
} 

#include <signal.h> 

// press ctrl-c then enter to quit if not running from a file 
int run = 1; 
void quit(int val) { 
    run = 0; 
} 

int main(int argc, char** argv) { 
    // catch crtl-c 
    signal(SIGINT, quit); 
    int c; 
    // our text structure is a linked list 
    node *text, *text_start; 
    if (argc > 1) { 
     // print the file name 
     printf("%s\n", argv[1]); 
     // open the file and read it to the linked list 
     FILE *f = fopen(argv[1], "r"); 
     if (f == NULL) return 1; 
     text = text_start = newnode(); 
     while ((c = fgetc(f)) != EOF) { 
      if (text->val) 
       text = append(text); 
      text->val = c; 
     } 
     fclose(f); 
     // link all the loops/ gotos, then process all instructions 
     doop(link(text_start)); 
     // free all linked list nodes 
     erase(text_start); 
     // we just ran a file, so no interpreter 
     run = 0; 
    } 
    // repeatedly read and execute only one line until interrupted 
    while (run) { 
     // linked list generated for each line of input 
     text = text_start = newnode(); 
     // read stdin buffer to list 
     while ((c = getchar()) != '\n') { 
      if (text->val) 
       text = append(text); 
      text->val = c; 
     } 
     // link all the loops/ gotos, then process the 
     // instructions for the line 
     doop(link(text_start)); 
     // free all linked list nodes 
     erase(text_start); 
    } 
    return 0; 
} 

最後,以下是編譯程序意外文件大小從派生:

$ gcc brainfuck.c -o brainfuck -Os && ls brainfuck -l && for i in `seq 0 3`; do gcc brainfuck.c -o brainfuck -O$i && ls brainfuck -l; done && gcc --version 
-rwxrwxr-x 1 m m 13552 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13544 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck 
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.2) 5.4.0 20160609 
+1

你以爲是什麼意思? –

+0

@ M.M我預計文件大小會有很大的變化。 – motoku

+2

也許在代碼中沒有太多的優化。您可以比較不同版本之間生成的程序集,以確切瞭解發生了什麼變化。 –

回答

4

了大量小的二進制文件的大小將是樣板啓動,加上調試符號表,以及全局數據區域和其他部分的大量零填充。對null填充進行二進制檢查。要得到一個有點比例更符合實際,去掉符號。

您應該只是比較TEXT部分的大小,即指令流而不是整個Unix可執行文件二進制文件的大小。

優化代碼還有一個非常大小的不可預測的影響。展開循環可以延長代碼以及內聯,但是刪除冗餘內存加載/存儲,常見子表達式消除,死代碼消除和常量摺疊可以減小大小。因此,在總結這些對立的力量時,你的視野非常不透明。如果你真的想學習一些東西,請一行一行地研究組件。見gcc -S並回報。

此外,如果您花費大部分能量將數據傳輸到I/O流和從I/O流傳輸數據,許多代碼將不會很好地優化。優化適用於CPU綁定和內存綁定材質。

% gcc -OS -o bfos brainfuck.C# -OS is optimize but keep code small 
% objdump -h bfos | grep text 
12 .text   00000452 0000000000400730 0000000000400730 00000730 2**4 

% gcc -O0 -o bfo0 brainfuck.C# -O0 is default: no optimizations 
% objdump -h bfo0 | grep text 
12 .text   00000652 0000000000400730 0000000000400730 00000730 2**4 

0x452/0x652 =巨大的差異。

然而二進制尺寸大許多倍,有填充,並沒有什麼做的編譯後的代碼大小:

% ls -l bfo0 bfos 
-rwxr-xr-x 1 root root 13461 Oct 4 22:42 bfo0 
-rwxr-xr-x 1 root root 13469 Oct 4 22:41 bfos 

% gcc --version 
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4 

最後,零填充的很長一段(以下簡稱「*」表示所有的重複,所以從0x000760到0x0006700,都是零字節)

% od -x bfo0 | grep -1 '\*' 
0000720 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0000760 0000 0000 0000 0000 0010 0000 0000 0000 
-- 
0006700 0a8c 0040 0000 0000 0a8c 0040 0000 0000 
* 
0007040 09c9 0040 0000 0000 0a8c 0040 0000 0000 
-- 
0007100 0a8c 0040 0000 0000 0a8c 0040 0000 0000 
* 
0007420 0a8c 0040 0000 0000 0a51 0040 0000 0000 
-- 
0010640 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0017020 07f0 0040 0000 0000 07d0 0040 0000 0000 
-- 
0017640 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0020000 1e28 0060 0000 0000 0000 0000 0000 0000 
-- 
0020720 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0021000 0000 0000 0000 0000 001b 0000 0001 0000 
-- 
0024500 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0024540 0000 0000 0003 0001 0238 0040 0000 0000