內在__lzcnt64返回不同的值與

我有以下代碼不同的編譯選項：內在__lzcnt64返回不同的值與

#include <stdint.h> 
#include <stdio.h> 
#include <x86intrin.h> 

long long lzcnt(long long l) 
{ 
    return __lzcnt64(l); 
} 

int main(int argc, char** argv) 
{ 
    printf("%lld\n", lzcnt(atoll(argv[1]))); 
    return 0; 
}

用不同的編譯器和參數執行我得到（組件示出）：

鏘

$ clang -Wall src/test.c -D__LZCNT__ && ./a.out 2047 
53 

0000000000400560 <lzcnt>: 
400560: 55      push %rbp 
400561: 48 89 e5    mov %rsp,%rbp 
400564: 48 89 7d f0    mov %rdi,-0x10(%rbp) 
400568: 48 8b 7d f0    mov -0x10(%rbp),%rdi 
40056c: 48 89 7d f8    mov %rdi,-0x8(%rbp) 
400570: 48 8b 7d f8    mov -0x8(%rbp),%rdi 
400574: 48 0f bd ff    bsr %rdi,%rdi 
400578: 48 83 f7 3f    xor $0x3f,%rdi 
40057c: 89 f8     mov %edi,%eax 
40057e: 48 63 c0    movslq %eax,%rax 
400581: 5d      pop %rbp 
400582: c3      retq 
400583: 66 66 66 66 2e 0f 1f data32 data32 data32 nopw %cs:0x0(%rax,%rax,1) 
40058a: 84 00 00 00 00 00

GCC without -mlzcnt

$ gcc -Wall src/test.c -D__LZCNT__ && ./a.out 2047 
53 

0000000000400580 <lzcnt>: 
400580: 55     push %rbp 
400581: 48 89 e5    mov %rsp,%rbp 
400584: 48 89 7d e8   mov %rdi,-0x18(%rbp) 
400588: 48 8b 45 e8   mov -0x18(%rbp),%rax 
40058c: 48 89 45 f8   mov %rax,-0x8(%rbp) 
400590: 48 0f bd 45 f8  bsr -0x8(%rbp),%rax 
400595: 48 83 f0 3f   xor $0x3f,%rax 
400599: 48 98     cltq 
40059b: 5d     pop %rbp 
40059c: c3     retq

GCC與-mlzcnt

$ gcc -Wall src/test.c -D__LZCNT__ -mlzcnt && ./a.out 2047 
10 

0000000000400580 <lzcnt>: 
400580: 55     push %rbp 
400581: 48 89 e5    mov %rsp,%rbp 
400584: 48 89 7d e8   mov %rdi,-0x18(%rbp) 
400588: 48 8b 45 e8   mov -0x18(%rbp),%rax 
40058c: 48 89 45 f8   mov %rax,-0x8(%rbp) 
400590: f3 48 0f bd 45 f8  lzcnt -0x8(%rbp),%rax 
400596: 48 98     cltq 
400598: 5d     pop %rbp 
400599: c3     retq

G ++不-mlzcnt

$ g++ -Wall src/test.c -D__LZCNT__ && ./a.out 2047 
In file included from /usr/lib/gcc/x86_64-redhat-linux/4.8.2/include/immintrin.h:64:0, 
       from /usr/lib/gcc/x86_64-redhat-linux/4.8.2/include/x86intrin.h:62, 
       from src/test.c:3: 
/usr/lib/gcc/x86_64-redhat-linux/4.8.2/include/lzcntintrin.h: In function ‘short unsigned int __lzcnt16(short unsigned int)’: 
/usr/lib/gcc/x86_64-redhat-linux/4.8.2/include/lzcntintrin.h:38:29: error: ‘__builtin_clzs’ was not declared in this scope 
return __builtin_clzs (__X);

G ++與-mlzcnt

$ g++ -Wall src/test.c -D__LZCNT__ -mlzcnt && ./a.out 2047 
10 

0000000000400640 <_Z5lzcntx>: 
400640: 55     push %rbp 
400641: 48 89 e5    mov %rsp,%rbp 
400644: 48 89 7d e8   mov %rdi,-0x18(%rbp) 
400648: 48 8b 45 e8   mov -0x18(%rbp),%rax 
40064c: 48 89 45 f8   mov %rax,-0x8(%rbp) 
400650: f3 48 0f bd 45 f8  lzcnt -0x8(%rbp),%rax 
400656: 48 98     cltq 
400658: 5d     pop %rbp 
400659: c3     retq

的區別是很清楚使用-mlzcnt的，但是我實際上在C++中工作，沒有這個選項，它不能在g ++上編譯（clang ++很好）。它看起來像使用-mlzcnt時，結果是63-（沒有-mlzct的結果）。是否有關於gcc的-mlzcnt選項的任何文檔（我查看了信息文件，但找不到任何內容）？它是否做更多選擇lzcnt指令？

來源

2013-10-30 Michael Barker

您是否反彙編程序以查看它們是否使用了預期的指令？你確定兩個平臺都有*指令嗎？ – unwind

未提供裝配就不能回答此問題。 –

Mac是英特爾Sandy Bridge，Linux是Intel IVY Bridge –

首先，我可以用clang 3.3和gcc 4.8.1完美地複製你的問題。

這是我的想法......我只有50％左右。

LZCNT是您的計算機可能不支持的指令。
Wikipedia建議LZCNT需要Haswell支持
我們可以嘗試使用Linux應用程序cpuid來驗證此信息。（包含在Debian，RHEL等中）。
維基百科再次提示「支持通過CPUID.80000001H：ECX.ABM [位5]標誌」指示。

讓我們來看看我的系統（這是至強X3430，Lynnfield，Nehalem）。

[4:48pm][[email protected] /tmp] sudo cpuid -1ir | grep 80000001 
    0x80000001 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000001 edx=0x28100800

因此，ECX的第23位是不正確的。所以我的系統不支持LZCNT。

它也看起來像是我的機器將不支持的LZCNT解釋爲BSR。

來源

2013-10-30 21:49:28

基本上是正確的答案。我錯過的信息是，即使當前平臺不支持它，-mlzcnt也會強制產生lzcnt指令。英特爾CPU只接受lzcnt指令並將其解釋爲bsr而不是[用非法指令拒絕]（https://code.google.com/p/corkami/wiki/x86oddities#lzcnt）是很奇怪的。 –

我認爲g ++編譯器也有一個缺陷，它缺少一個內建函數（__builtin_clzs）。如果我從它編譯和工作的系統頭文件評論引用。 –

@邁克爾：是的，這是正確的。 'lzcnt'被編碼爲'rep bsr'，x86 CPU靜靜地忽略不適用的insn前綴字節。儘管這不是未來發展的趨勢，就像發生在這裏一樣。對於舊式AMD CPU中分支預測器限制的'rep ret'解決方案是一個例外：它已被廣泛使用，所以未來的擴展無法重新定義它：http://stackoverflow.com/a/32347393/224132。通常在意外使用目標機器不支持的指令時調試很容易，但在這種情況下不會。 –

您似乎在調用__lzcnt64，但傳遞了一個32位整數。也許這會讓編譯器感到困惑。

可能是返回10的人在寄存器的另一半看到一些垃圾？

試試這個：

long long int v = __lzcnt64(2047LL);

（使它成爲long long文字）。

來源

2013-10-30 11:31:29 Ben

使用此[代碼]（https://gist.github.com/sharth/b689c85eb7e2288b2076），GCC 4.8.1，CentOS 6.4 x64。在'-mlzcnt -O0'下：我得到10.在'-mlzcnt -O3'下：我得到53. –

如果你嘗試'long long int v = 2047LL; v = __lzcnt64（v）;'？無論哪種方式看起來像一個錯誤。 – Ben

內在__lzcnt64返回不同的值與

回答

相關問題