更改gcc优化开关时出现意外的文件大小
我有一个小脑力操翻译我写了一段时间;在编译它的过程中,我注意到为gcc改变优化开关的输出大小并不是我所期望的。以下是我编译的程序:更改gcc优化开关时出现意外的文件大小,c,gcc,compiler-optimization,compiler-options,C,Gcc,Compiler Optimization,Compiler Options,我有一个小脑力操翻译我写了一段时间;在编译它的过程中,我注意到为gcc改变优化开关的输出大小并不是我所期望的。以下是我编译的程序: struct node { struct node *prev; int val; struct node *jump; struct node *next; }; typedef struct node node; node *newnode(); node *append(node *n); node *prepend(node
struct node {
struct node *prev;
int val;
struct node *jump;
struct node *next;
};
typedef struct node node;
node *newnode();
node *append(node *n);
node *prepend(node *n);
void erase(node *n);
int pop(node *n);
void doop(node *n);
node *link(node *n);
#include <stdlib.h>
// allocates a new node and sets all the things to zero
node *newnode() {
node *n = malloc(sizeof(node));
n->prev = n->next = n->jump = NULL;
n->val = 0;
return n;
}
// appends a node to a given node. assumes it is an end node
node *append(node *n) {
n->next = newnode();
n->next->prev = n;
return n->next;
}
// prepend node to list. assumes it is the first node
node *prepend(node *n) {
n->prev = newnode();
n->prev->next = n;
return n->prev;
}
// navigates to first node, then frees all the nodes, iterating to the end
void erase(node *n) {
node *m;
while (n->prev)
n = n->prev;
while (n) {
m = n->next;
free(n);
n = m;
}
}
// pops any node and links any connected nodes to each other
// returns value of erased node
int pop(node *n) {
int ret;
if (n->prev)
n->prev->next = n->next;
if (n->next)
n->next->prev = n->prev;
ret = n->val;
free(n);
return ret;
}
#include <stdio.h>
// bf tokens. all other are ignored
#define LSEEK '<'
#define RSEEK '>'
#define INCREMENT '+'
#define DECREMENT '-'
#define STDOUT '.'
#define STDIN ','
#define LBRACKET '['
#define RBRACKET ']'
// memory used by bf program. is this really turing-compliant?
char mem[30000] = { 0 };
// pointer used by bf program
char *ptr = mem;
// do operation beginning with given node
void doop(node *n) {
// copy node pointer in case we need the head of the list later
node *m = n;
// loop while node pointer is a valid one; e.g. stop at EOF
while (m) {
switch (m->val) {
// most of these are pretty self-explanatory
case LSEEK:
ptr--;
break;
case RSEEK:
ptr++;
break;
case INCREMENT:
(*ptr)++;
break;
case DECREMENT:
(*ptr)--;
break;
case STDOUT:
printf("%c", *ptr);
fflush(stdout);
break;
case STDIN:
*ptr = getchar();
break;
case LBRACKET:
// jump to closing bracket if value at pointer is false
if (!*ptr)
m = m->jump;
break;
case RBRACKET:
// jump back to opening bracket if value at pointer is true
if (*ptr)
m = m->jump;
break;
}
// proceed to next instruction
m = m->next;
}
}
// finds and references each bracket instruction to its corresponding bracket
node *link(node *n) {
// make a copy of the list head
node *m = n;
// make a temporary list to contain bracket links
node *links = newnode();
// while a valid node
while (m) {
// switch to bracket type
switch (m->val) {
case LBRACKET:
// this is an opening bracket, so we temporarily store it's
// location, and append the list as it grows
if (links->jump)
links = append(links);
links->jump = m;
break;
case RBRACKET:
// this is the closing bracket, so we save the temporarily
// stored link address to the closing bracket node, and
// connect the opening bracket node to the closing also;
// popping the list as we no longer need the data
m->jump = links->jump;
links->jump->jump = m;
if (links->prev) {
links = links->prev;
pop(links->next);
}
break;
}
// increment to next character
m = m->next;
}
// erase all the nodes in the temporary linked list
erase(links);
// return the head of the list
return n;
}
#include <signal.h>
// press ctrl-c then enter to quit if not running from a file
int run = 1;
void quit(int val) {
run = 0;
}
int main(int argc, char** argv) {
// catch crtl-c
signal(SIGINT, quit);
int c;
// our text structure is a linked list
node *text, *text_start;
if (argc > 1) {
// print the file name
printf("%s\n", argv[1]);
// open the file and read it to the linked list
FILE *f = fopen(argv[1], "r");
if (f == NULL) return 1;
text = text_start = newnode();
while ((c = fgetc(f)) != EOF) {
if (text->val)
text = append(text);
text->val = c;
}
fclose(f);
// link all the loops/ gotos, then process all instructions
doop(link(text_start));
// free all linked list nodes
erase(text_start);
// we just ran a file, so no interpreter
run = 0;
}
// repeatedly read and execute only one line until interrupted
while (run) {
// linked list generated for each line of input
text = text_start = newnode();
// read stdin buffer to list
while ((c = getchar()) != '\n') {
if (text->val)
text = append(text);
text->val = c;
}
// link all the loops/ gotos, then process the
// instructions for the line
doop(link(text_start));
// free all linked list nodes
erase(text_start);
}
return 0;
}
一个小二进制文件的很大一部分大小将是样板文件启动,加上调试符号表,再加上全局数据区域和其他部分的大量零填充。对空填充执行二进制检查。要获得更真实的比例,请去掉符号 您实际上应该只是比较文本部分的大小,即指令流,而不是整个Unix可执行文件格式的二进制文件 此外,优化代码会对大小产生非常不可预测的影响。展开循环延长了代码和内联,但删除冗余内存加载/存储、公共子表达式消除、死代码消除和常量折叠减少了大小。所以,当对这些相反的力求和时,你有一个非常不透明的观点。如果你真的想学点什么,就一行一行地并排学习装配。请参阅gcc-S,然后进行报告 另外,这些评论是正确的,如果您将大部分精力用于向I/O流传输数据和从I/O流传输数据,那么许多代码将不会非常优化。优化适用于CPU限制和内存限制的材料
% gcc -OS -o bfos brainfuck.c # -OS is optimize but keep code small
% objdump -h bfos | grep text
12 .text 00000452 0000000000400730 0000000000400730 00000730 2**4
% gcc -O0 -o bfo0 brainfuck.c # -O0 is default: no optimizations
% objdump -h bfo0 | grep text
12 .text 00000652 0000000000400730 0000000000400730 00000730 2**4
0x452/0x652=差异巨大
但是二进制大小要大很多倍,有填充,与编译代码大小无关:
% ls -l bfo0 bfos
-rwxr-xr-x 1 root root 13461 Oct 4 22:42 bfo0
-rwxr-xr-x 1 root root 13469 Oct 4 22:41 bfos
% gcc --version
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
最后,长时间的零填充(“*”表示所有重复,因此从0x000760到0x0006700,都是零字节)
一个小二进制文件的很大一部分大小将是样板文件启动,加上调试符号表,再加上全局数据区域和其他部分的大量零填充。对空填充执行二进制检查。要获得更真实的比例,请去掉符号 您实际上应该只是比较文本部分的大小,即指令流,而不是整个Unix可执行文件格式的二进制文件 此外,优化代码会对大小产生非常不可预测的影响。展开循环延长了代码和内联,但删除冗余内存加载/存储、公共子表达式消除、死代码消除和常量折叠减少了大小。所以,当对这些相反的力求和时,你有一个非常不透明的观点。如果你真的想学点什么,就一行一行地并排学习装配。请参阅gcc-S,然后进行报告 另外,这些评论是正确的,如果您将大部分精力用于向I/O流传输数据和从I/O流传输数据,那么许多代码将不会非常优化。优化适用于CPU限制和内存限制的材料
% gcc -OS -o bfos brainfuck.c # -OS is optimize but keep code small
% objdump -h bfos | grep text
12 .text 00000452 0000000000400730 0000000000400730 00000730 2**4
% gcc -O0 -o bfo0 brainfuck.c # -O0 is default: no optimizations
% objdump -h bfo0 | grep text
12 .text 00000652 0000000000400730 0000000000400730 00000730 2**4
0x452/0x652=差异巨大
但是二进制大小要大很多倍,有填充,与编译代码大小无关:
% ls -l bfo0 bfos
-rwxr-xr-x 1 root root 13461 Oct 4 22:42 bfo0
-rwxr-xr-x 1 root root 13469 Oct 4 22:41 bfos
% gcc --version
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
最后,长时间的零填充(“*”表示所有重复,因此从0x000760到0x0006700,都是零字节)
您期望什么?@M.M我期望文件大小有更多的变化。也许您的代码中没有太多可优化的内容。您可以在不同版本之间比较生成的程序集,以查看到底发生了什么更改。答案中添加了.text部分的完整分析。您的程序并不复杂,所以我不希望优化器有太多机会做任何有趣的事情。你期望什么?@M.M我期望文件大小有更多的变化。也许你的代码中没有太多可优化的内容。您可以在不同版本之间比较生成的程序集,以查看到底发生了什么变化。完整分析在答案中添加了.text部分。您的程序并不复杂,因此我不希望优化器有太多机会做任何有趣的事情。