這個錯誤可能實際上並沒有顯示在所有的機器上,但在我的我跑下面的代碼,並得到了輸出(注意值-1.#IND00
)?SSE移位指令在隨後的指令中導致奇怪的輸出(-1。#IND00)?
values int:: 4 2
shifts:: 4 2
result: : 64 32
input 1 HADDPD:: 10.000000 -1.#IND00
input 2 HADDPD:: 13.000000 10.000000
result of HADDPD:: -1.#IND00 23.000000
如果我註釋掉
__m64 PSLLDm64_IN = _mm_set_pi32(2,4);
__m64 PSLLDm64_C = _mm_set_pi32(2,4);//could this be the culprit?
__m64 PSLLDm64_r = PSLLD(PSLLDm64_IN, PSLLDm64_C);
print_2_32_bit_int("values int:" , PSLLDm64_IN);
print_2_32_bit_int("shifts:", PSLLDm64_C);
print_2_32_bit_int("result: ", PSLLDm64_r);
我得到...
input 1 HADDPD:: 10.000000 100.000000
input 2 HADDPD:: 13.000000 10.000000
result of HADDPD:: 110.000000 23.000000
我想知道如果線32,其中__m64 PSLLDm64_C = _mm_set_pi32(2,4);
可以搞砸了?
繼承人完整的代碼(它使用g ++與-msse3 -mmmx
一起運行)並非所有的頭文件都是非常必要的。
#include <xmmintrin.h>
#include <emmintrin.h>
#include <pmmintrin.h>
#include <stdio.h>
#include <stdint.h>
#include <iostream>
void print_2_64_bit_doubles(const char * label, __m128d m64_r)
{
double *val = (double *) &m64_r;
printf("%s: %f %f\n",
label, val[0], val[1]);
}
void print_2_32_bit_int(const char * label, __m64 m32_r)
{
int *val = (int *) &m32_r;
printf("%s: %d %d\n",
label, val[0], val[1]);
}
__m128d HADDPD(__m128d __X, __m128d __Y)
{
return _mm_hadd_pd (__X, __Y);
}
__m64 PSLLD(__m64 __m, __m64 __count)
{
return _mm_sll_pi32 (__m, __count);
}
int main()
{
//PSLLD-------------------------------------------------------------------
__m64 PSLLDm64_IN = _mm_set_pi32(2,4);
__m64 PSLLDm64_C = _mm_set_pi32(2,4);
__m64 PSLLDm64_r = PSLLD(PSLLDm64_IN, PSLLDm64_C);
print_2_32_bit_int("values int:" , PSLLDm64_IN);
print_2_32_bit_int("shifts:", PSLLDm64_C);
print_2_32_bit_int("result: ", PSLLDm64_r);
//HADDPD------------------------------------------------------------------
double C1 = 10;
double D = C1*C1;
double x = 10;
double y = 13;
__m128d HADDPDm64_1 = _mm_set_pd(D,C1);
__m128d HADDPDm64_2 = _mm_set_pd(x,y);
__m128d HADDPDm64_r = HADDPD(HADDPDm64_1, HADDPDm64_2);
print_2_64_bit_doubles("input 1 HADDPD:", HADDPDm64_1);
print_2_64_bit_doubles("input 2 HADDPD:", HADDPDm64_2);
print_2_64_bit_doubles("result of HADDPD:", HADDPDm64_r);
return 0;
}
編輯:這是與G ++ 4.4.1 -msse -msse2 -msse3 -msse4
#include <xmmintrin.h>
#include <emmintrin.h>
#include <pmmintrin.h>
#include <mmintrin.h>
#include <stdio.h>
#include <stdint.h>
void print_2_64_bit_doubles(const char * label, __m128d m64_r)
{
double *val = (double *) &m64_r;
printf("%s: %f %f\n",
label, val[0], val[1]);
}
void print_2_32_bit_int(const char * label, __m64 m32_r)
{
int *val = (int *) &m32_r;
printf("%s: %d %d\n",
label, val[0], val[1]);
}
void print_1_32_bit_int(const char * label, __m64 m32_r)
{
int *val = (int *) &m32_r;
printf("%s: %d \n",
label, val[0]);
}
__m128d HADDPD(__m128d __X, __m128d __Y)
{
return _mm_hadd_pd (__X, __Y);
}
__m64 PSLLD(__m64 __m, __m64 __count)
{
return _mm_sll_pi32 (__m, __count);
}
int main()
{
//PSLLD-------------------------------------------------------------------
__m64 PSLLDm64_IN = _mm_set_pi32(2,4);
long long __i = 2;
__m64 PSLLDm64_C = (__m64)(__i);
__m64 PSLLDm64_r = PSLLD(PSLLDm64_IN, PSLLDm64_C);
_mm_empty();
print_2_32_bit_int("values int:" , PSLLDm64_IN);
print_1_32_bit_int("shifts:", PSLLDm64_C);
print_2_32_bit_int("result: ", PSLLDm64_r);
//HADDPD------------------------------------------------------------------
double C1 = 10;
double D = C1*C1;
double x = 10;
double y = 13;
__m128d HADDPDm64_1 = _mm_set_pd(D,C1);
__m128d HADDPDm64_2 = _mm_set_pd(x,y);
__m128d HADDPDm64_r = HADDPD(HADDPDm64_1, HADDPDm64_2);
print_2_64_bit_doubles("input 1 HADDPD:", HADDPDm64_1);
print_2_64_bit_doubles("input 2 HADDPD:", HADDPDm64_2);
print_2_64_bit_doubles("result of HADDPD:", HADDPDm64_r);
return 0;
}
和輸出
values int:: 4 2
shifts:: 2
result: : 16 8
input 1 HADDPD:: 10.000000 -1.#IND00
input 2 HADDPD:: 13.000000 10.000000
result of HADDPD:: -1.#IND00 23.000000
用gcc工作正常4.2.1 - 你在用什麼編譯器? –
我想知道在64位SIMD之後是否需要'_mm_empty()'? –
當我在命令行輸入'g ++ -v'時,得到'gcc 4.4.1 tdm-2 mingw 32'。我在__m64 PSLLDm64_r = PSLLD(PSLLDm64_IN,PSLLDm64_C)之後放置了_mm_empty();'和輸出結果相同? – pandoragami