這是一個簡單的程序。 測試環境:Debian的8,去1.4.2golang:數組索引效率
union.go:
package main
import "fmt"
type A struct {
t int32
u int64
}
func test() (total int64) {
a := [...]A{{1, 100}, {2, 3}}
for i := 0; i < 5000000000; i++ {
p := &a[i%2]
total += p.u
}
return
}
func main() {
total := test()
fmt.Println(total)
}
union.c:
#include <stdio.h>
struct A {
int t;
long u;
};
long test()
{
struct A a[2];
a[0].t = 1;
a[0].u = 100;
a[1].t = 2;
a[1].u = 3;
long total = 0;
long i;
for (i = 0; i < 5000000000; i++) {
struct A* p = &a[i % 2];
total += p->u;
}
return total;
}
int main()
{
long total = test();
printf("%ld\n", total);
}
結果比較:
走:
257500000000
real 0m9.167s
user 0m9.196s
sys 0m0.012s
C:
257500000000
real 0m3.585s
user 0m3.560s
sys 0m0.008s
看來,去編譯很多奇怪的彙編代碼(你可以使用objdump -D來檢查它)。
例如,爲什麼movabs $0x12a05f200,%rbp
出現兩次?
400c60: 31 c0 xor %eax,%eax
400c62: 48 bd 00 f2 05 2a 01 movabs $0x12a05f200,%rbp
400c69: 00 00 00
400c6c: 48 39 e8 cmp %rbp,%rax
400c6f: 7d 46 jge 400cb7 <main.test+0xb7>
400c71: 48 89 c1 mov %rax,%rcx
400c74: 48 c1 f9 3f sar $0x3f,%rcx
400c78: 48 89 c3 mov %rax,%rbx
400c7b: 48 29 cb sub %rcx,%rbx
400c7e: 48 83 e3 01 and $0x1,%rbx
400c82: 48 01 cb add %rcx,%rbx
400c85: 48 8d 2c 24 lea (%rsp),%rbp
400c89: 48 83 fb 02 cmp $0x2,%rbx
400c8d: 73 2d jae 400cbc <main.test+0xbc>
400c8f: 48 6b db 10 imul $0x10,%rbx,%rbx
400c93: 48 01 dd add %rbx,%rbp
400c96: 48 8b 5d 08 mov 0x8(%rbp),%rbx
400c9a: 48 01 f3 add %rsi,%rbx
400c9d: 48 89 de mov %rbx,%rsi
400ca0: 48 89 5c 24 28 mov %rbx,0x28(%rsp)
400ca5: 48 ff c0 inc %rax
400ca8: 48 bd 00 f2 05 2a 01 movabs $0x12a05f200,%rbp
400caf: 00 00 00
400cb2: 48 39 e8 cmp %rbp,%rax
400cb5: 7c ba jl 400c71 <main.test+0x71>
400cb7: 48 83 c4 20 add $0x20,%rsp
400cbb: c3 retq
400cbc: e8 6f e0 00 00 callq 40ed30 <runtime.panicindex>
400cc1: 0f 0b ud2
...
而C裝配更乾淨:
0000000000400570 <test>:
400570: 48 c7 44 24 e0 64 00 movq $0x64,-0x20(%rsp)
400577: 00 00
400579: 48 c7 44 24 f0 03 00 movq $0x3,-0x10(%rsp)
400580: 00 00
400582: b9 64 00 00 00 mov $0x64,%ecx
400587: 31 d2 xor %edx,%edx
400589: 31 c0 xor %eax,%eax
40058b: 48 be 00 f2 05 2a 01 movabs $0x12a05f200,%rsi
400592: 00 00 00
400595: eb 18 jmp 4005af <test+0x3f>
400597: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
40059e: 00 00
4005a0: 48 89 d1 mov %rdx,%rcx
4005a3: 83 e1 01 and $0x1,%ecx
4005a6: 48 c1 e1 04 shl $0x4,%rcx
4005aa: 48 8b 4c 0c e0 mov -0x20(%rsp,%rcx,1),%rcx
4005af: 48 83 c2 01 add $0x1,%rdx
4005b3: 48 01 c8 add %rcx,%rax
4005b6: 48 39 f2 cmp %rsi,%rdx
4005b9: 75 e5 jne 4005a0 <test+0x30>
4005bb: f3 c3 repz retq
4005bd: 0f 1f 00 nopl (%rax)
有人能解釋一下嗎?謝謝!
您可以用'-gcflag -B'選項禁用數組邊界檢查來構建go應用程序並重新運行您的基準測試? – kostya
@kostya,我嘗試了國旗,但它沒有幫助。 – kingluo
@kingluo使用'gccgo'和-O3運行你的代碼產生與C類似的結果:'time go run -compiler gccgo -gccgoflags -O3 foo.go'輸出:'real \t 0m3.667s用戶\t 0m3.644s'。所以這可能是gcc正在做的一些優化。 –