Skip to content

Commit

Permalink
truncating mantissa part of f128 to 64 bits in integer_decode
Browse files Browse the repository at this point in the history
Signed-off-by: usamoi <[email protected]>
  • Loading branch information
usamoi committed Aug 30, 2024
1 parent 2853b7c commit e485d2d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ i128 = []

[build-dependencies]
autocfg = "1"

[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(has_total_cmp)'] }
28 changes: 24 additions & 4 deletions src/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,11 @@ pub trait FloatCore: Num + NumCast + Neg<Output = Self> + PartialOrd + Copy {

/// Returns the mantissa, base 2 exponent, and sign as integers, respectively.
/// The original number can be recovered by `sign * mantissa * 2 ^ exponent`.
///
/// This function is implemented for `f128`, however, lossily. The mantissa part
/// of `f128` is 113 bits, which does not fit in 64 bits. This function truncates
/// the radix to its 64 most significant bits, and the original number can be
/// estimated by `sign * mantissa * 2 ^ 49 * 2 ^ exponent`.
///
/// # Examples
///
Expand Down Expand Up @@ -2197,10 +2202,25 @@ fn integer_decode_f64(f: f64) -> (u64, i16, i8) {
(mantissa, exponent, sign)
}

// FIXME: implement it
#[cfg(feature = "f128")]
fn integer_decode_f128(_f: f128) -> (u64, i16, i8) {
unimplemented!("radix part of `f128` is wider than `u64`")
fn integer_decode_f128(f: f128) -> (u128, i16, i8) {
let bits: u128 = f.to_bits();
let sign: i8 = if bits >> 127 == 0 { 1 } else { -1 };
let mut exponent: i16 = ((bits >> 112) & 0x7fff) as i16;
let mantissa = if exponent == 0 {
(bits & 0xffffffffffffffffffffffffffff) << 1
} else {
(bits & 0xffffffffffffffffffffffffffff) | 0x10000000000000000000000000000
};
// Exponent bias + mantissa shift
exponent -= 16383 + 112;
(mantissa, exponent, sign)
}

#[cfg(feature = "f128")]
fn integer_decode_f128_truncated(f: f128) -> (u64, i16, i8) {
let (mantissa, exponent, sign) = integer_decode_f128(f);
((mantissa >> (113 - 64)) as u64, exponent, sign)
}

#[cfg(feature = "f16")]
Expand All @@ -2212,7 +2232,7 @@ float_impl_std!(f32 integer_decode_f32);
float_impl_std!(f64 integer_decode_f64);
#[cfg(feature = "f128")]
#[cfg(feature = "std")]
float_impl_std!(f128 integer_decode_f128);
float_impl_std!(f128 integer_decode_f128_truncated);

#[cfg(all(not(feature = "std"), feature = "libm"))]
impl Float for f32 {
Expand Down

0 comments on commit e485d2d

Please sign in to comment.