我正在使用Windows Mobile 6 ARMV4I的Visual Studio 2008 C++,並試圖學習如何讀取由VS生成的ARM彙編代碼,以儘量減少應用程序中不必要的緩衝區副本。所以,我創建了一個測試應用程序看起來像這樣:在彙編代碼中尋找不必要的緩衝區副本
#include <vector>
typedef std::vector<BYTE> Buf;
class Foo
{
public:
Foo(Buf b) { b_.swap(b); };
private:
Buf b_;
};
Buf Create()
{
Buf b(1024);
b[ 0 ] = 0x0001;
return b;
}
int _tmain(int argc, _TCHAR* argv[])
{
Foo f(Create());
return 0;
}
我想了解,如果將其賦予Foo
構造函數或如果編譯器能夠優化通過Create
返回緩衝區被複制複製。在發佈建立與優化開啓,這會產生裝配這樣的:
class Foo
{
public:
Foo(Buf b) { b_.swap(b); };
0001112C stmdb sp!, {r4 - r7, lr}
00011130 mov r7, r0
00011134 mov r3, #0
00011138 str r3, this
0001113C str r3, [r7, #4]
00011140 str r3, [r7, #8]
00011144 ldr r3, this
00011148 ldr r2, this
0001114C mov r5, r7
00011150 mov r4, r1
00011154 str r3, this, #4
00011158 str r2, this, #4
0001115C mov r6, r1
00011160 ldr r2, this
00011164 ldr r3, this
00011168 mov lr, r7
0001116C str r3, this
00011170 str r2, this
00011174 ldr r2, [lr, #8]!
00011178 ldr r3, [r6, #8]!
0001117C str r3, this
00011180 str r2, this
00011184 ldr r3, this
00011188 movs r0, r3
0001118C beq |Foo::Foo + 0x84 (111b0h)|
00011190 ldr r3, [r1, #8]
00011194 sub r1, r3, r0
00011198 cmp r1, #0x80
0001119C bls |Foo::Foo + 0x80 (111ach)|
000111A0 bl 000112D4
000111A4 mov r0, r7
000111A8 ldmia sp!, {r4 - r7, pc}
000111AC bl |stlp_std::__node_alloc::_M_deallocate (11d2ch)|
000111B0 mov r0, r7
000111B4 ldmia sp!, {r4 - r7, pc}
--- ...\stlport\stl\_vector.h -----------------------------
// snip!
--- ...\asm_test.cpp
private:
Buf b_;
};
Buf Create()
{
00011240 stmdb sp!, {r4, lr}
00011244 mov r4, r0
Buf b(1024);
00011248 mov r1, #1, 22
0001124C bl |
b[ 0 ] = 0x0001;
00011250 ldr r3, [r4]
00011254 mov r2, #1
return b;
}
int _tmain(int argc, _TCHAR* argv[])
{
00011264 str lr, [sp, #-4]!
00011268 sub sp, sp, #0x18
Foo f(Create());
0001126C add r0, sp, #0xC
00011270 bl |Create (11240h)|
00011274 mov r1, r0
00011278 add r0, sp, #0
0001127C bl |Foo::Foo (1112ch)|
return 0;
00011280 ldr r0, argc
00011284 cmp r0, #0
00011288 beq |wmain + 0x44 (112a8h)|
0001128C ldr r3, [sp, #8]
00011290 sub r1, r3, r0
00011294 cmp r1, #0x80
00011298 bls |wmain + 0x40 (112a4h)|
0001129C bl 000112D4
000112A0 b |wmain + 0x44 (112a8h)|
000112A4 bl |stlp_std::__node_alloc::_M_deallocate (11d2ch)|
000112A8 mov r0, #0
}
我可以在彙編代碼明白的地方被複制Buf
結構應該是什麼模式呢?
在C++代碼中看不到更容易嗎?所有副本都是可預測的。 – 2011-04-26 15:36:20
通過查看C++代碼,我可以看到'Buf'被複制到兩個地方:'Foo'構造函數,以及從'Create'函數返回時。 – 2011-04-26 15:37:38
@Etienne de Martel - 創建函數返回的'Buf'應該被RVO優化掉。編譯器也可以優化掉'Foo'構造函數中的副本。我不知道。我試圖瞭解如何閱讀程序集以瞭解應用了哪些優化。 – PaulH 2011-04-26 15:43:02