理性浮點

-2

考慮下面結構表示的有理數。理性浮點

struct rational { 
    uint64_t n; 
    uint64_t d; 
    unsigned char sign : 1; 
};

假設的IEEE-754 binary64表示的double，如何能結構被轉換爲最接近double與正確的舍入？將n和d轉換爲double並將其明確劃分爲舍入誤差的幼稚方法。

來源

2015-10-05 68ejxfcj5669

C或C++？挑一個... –

沒關係。如果它困擾你，我會將Java添加到列表中。 – 68ejxfcj5669

不，它確實沒有。問題是關於數學。 – 68ejxfcj5669

實現所需結果的一種方法是在整數空間中執行除法。由於標準C/C++不提供128位整數類型（儘管某些工具鏈可能會將此作爲擴展），但這不是非常有效，但它會產生正確的結果。

下面的代碼會生成54個商位和一個餘數，一次一位。根據IEEE-754，最顯着的53商位表示double結果的尾數部分，而最小有效商位和剩餘部分是四捨五入到「最接近或甚至」所需的。

下面的代碼可以編譯爲C或C++程序（至少它可以和我的工具鏈一起使用）。它已被輕度測試。由於按位處理，這不是很快，並且各種優化都是可能的，特別是如果使用機器特定的數據類型和內部函數。

#include <stdlib.h> 
#include <stdio.h> 
#include <string.h> 
#include <stdint.h> 

struct rational { 
    uint64_t n; 
    uint64_t d; 
    unsigned char sign : 1; 
}; 

double uint64_as_double (uint64_t a) 
{ 
    double res; 
#if defined (__cplusplus) 
    memcpy (&res, &a, sizeof (res)); 
#else /* __cplusplus */ 
    volatile union { 
     double f; 
     uint64_t i; 
    } cvt; 
    cvt.i = a; 
    res = cvt.f; 
#endif /* __cplusplus */ 
    return res; 
} 

#define ADDcc(a,b,cy,t0,t1) (t0=(b), t1=(a), t0=t0+t1, cy=t0<t1, t0=t0) 
#define ADDC(a,b,cy,t0,t1) (t0=(b)+cy, t1=(a), t0+t1) 
#define SUBcc(a,b,cy,t0,t1) (t0=(b), t1=(a), cy=t1<t0, t1-t0) 

double rational2double (struct rational a) 
{ 
    uint64_t dividend, divisor, quot, rem, t0, t1, cy, res, expo; 
    int sticky, round, odd, sign, i; 

    dividend = a.n; 
    divisor = a.d; 
    sign = a.sign; 

    /* handle special cases */ 
    if ((dividend == 0) && (divisor == 0)) { 
     res = 0xFFF8000000000000ULL; /* NaN INDEFINITE */ 
    } else if (dividend == 0) {    
     res = (uint64_t)sign << 63; /* zero */ 
    } else if (divisor == 0) { 
     res = ((uint64_t)sign << 63) | 0x7ff0000000000000ULL; /* Inf */ 
    } 
    /* handle normal cases */ 
    else { 
     quot = dividend; 
     rem = 0; 
     expo = 0; 
     /* normalize operands using 128-bit shifts */ 
     while (rem < divisor) { 
      quot = ADDcc (quot, quot, cy, t0, t1); 
      rem = ADDC (rem, rem, cy, t0, t1); 
      expo--; 
     } 
     /* integer bit of quotient is known to be 1 */ 
     rem = rem - divisor; 
     quot = quot + 1; 
     /* generate 53 more quotient bits */ 
     for (i = 0; i < 53; i++) { 
      quot = ADDcc (quot, quot, cy, t0, t1); 
      rem = ADDC (rem, rem, cy, t0, t1); 
      rem = SUBcc (rem, divisor, cy, t0, t1); 
      if (cy) { 
       rem = rem + divisor; 
      } else { 
       quot = quot + 1; 
      } 
     } 
     /* round to nearest or even */ 
     sticky = rem != 0; 
     round = quot & 1; 
     quot = quot >> 1; 
     odd = quot & 1; 
     if (round && (sticky || odd)) { 
      quot++; 
     } 
     /* compose normalized IEEE-754 double-precision number */ 
     res = ((uint64_t)sign << 63) + ((expo + 64 + 1023 - 1) << 52) + quot; 
    } 
    return uint64_as_double (res); 
}

來源

2015-10-05 20:32:27 njuffa

謝謝。只要我意識到長分區產生的確切數字，我知道解決方案看起來像這樣。乾杯! – 68ejxfcj5669

回答

相關問題