From 993f7bf3ab1ea82feca9b485fc0308534fbc47ca Mon Sep 17 00:00:00 2001 From: Lohann Paterno Coutinho Ferreira Date: Wed, 10 Jul 2024 23:14:21 -0300 Subject: [PATCH 1/5] Support widening mul --- src/ops/mod.rs | 1 + src/ops/widening.rs | 137 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 src/ops/widening.rs diff --git a/src/ops/mod.rs b/src/ops/mod.rs index 2128d86a..ac07d3b2 100644 --- a/src/ops/mod.rs +++ b/src/ops/mod.rs @@ -5,4 +5,5 @@ pub mod inv; pub mod mul_add; pub mod overflowing; pub mod saturating; +pub mod widening; pub mod wrapping; diff --git a/src/ops/widening.rs b/src/ops/widening.rs new file mode 100644 index 00000000..6b55a9d3 --- /dev/null +++ b/src/ops/widening.rs @@ -0,0 +1,137 @@ +macro_rules! widening_impl { + ($limb:ty, $doublelimb:ty, $test_name:ident) => { + impl WideningMul<$limb> for $limb { + type Output = $limb; + + #[inline] + fn widening_mul(self, rhs: $limb) -> (Self::Output, Self::Output) { + // SAFETY: overflow will be contained within the wider types + let wide = (self as $doublelimb).wrapping_mul(rhs as $doublelimb); + (wide as $limb, (wide >> <$limb>::BITS) as $limb) + } + } + + impl WideningMul<&'_ $limb> for $limb { + type Output = $limb; + + #[inline] + fn widening_mul(self, rhs: &'_ $limb) -> (Self::Output, Self::Output) { + WideningMul::<$limb>::widening_mul(self, *rhs) + } + } + + impl WideningMul<&'_ $limb> for &'_ $limb { + type Output = $limb; + + #[inline] + fn widening_mul(self, rhs: &'_ $limb) -> (Self::Output, Self::Output) { + WideningMul::<$limb>::widening_mul(*self, *rhs) + } + } + + impl WideningMul<$limb> for &'_ $limb { + type Output = $limb; + + #[inline] + fn widening_mul(self, rhs: $limb) -> (Self::Output, Self::Output) { + WideningMul::<$limb>::widening_mul(*self, rhs) + } + } + + #[test] + fn $test_name() { + fn widening_mul>(a: T, b: T) -> (T, T) { + a.widening_mul(b) + } + assert_eq!(widening_mul(0 as $limb, 0 as $limb), (0, 0)); + assert_eq!(widening_mul(<$limb>::MAX, 1), (<$limb>::MAX, 0)); + assert_eq!(widening_mul(<$limb>::MAX, 2), (<$limb>::MAX - 1, 1)); + assert_eq!( + widening_mul(<$limb>::MAX, <$limb>::MAX), + (1, <$limb>::MAX - 1) + ); + } + }; +} + +/// Calculates the complete product self * rhs without the possibility to overflow. +pub trait WideningMul: Sized { + type Output; + + #[must_use] + fn widening_mul(self, rhs: Rhs) -> (Self::Output, Self::Output); +} + +widening_impl!(u8, u16, test_u8_wrapping); +widening_impl!(u16, u32, test_u16_wrapping); +widening_impl!(u32, u64, test_u32_wrapping); +widening_impl!(u64, u128, test_u64_wrapping); + +#[cfg(target_pointer_width = "16")] +widening_impl!(usize, u16, test_usize_wrapping); + +#[cfg(target_pointer_width = "32")] +widening_impl!(usize, u64, test_usize_wrapping); + +#[cfg(target_pointer_width = "64")] +widening_impl!(usize, u128, test_usize_wrapping); + +#[inline(always)] +const fn split_u128(a: u128) -> (u64, u64) { + ((a >> 64) as _, (a & 0xFFFF_FFFF_FFFF_FFFF) as _) +} + +impl WideningMul for u128 { + type Output = Self; + + // l4 = 6edd4bba28970573e250bf2d9c0a78e678099b2cbe4fe173049627b94adc6e00 // x_low * y_low + // l5 = 6f8eadccec0b2a496887a6c5e50423418263442505e6c7a8896a4b2c0cedceb0 // x_high * y_low + // l5 = 6f8eadccec0b2a496887a6c5e5042341f1408fdf2e7dcd1c6bbb0a59a8f84796 // l5 + (l4 >> 64) + // l6 = 6f7e8d9cabbac9d8e7f7061524334250817263544536271808f9eadbccbdaea0 // x_low * y_high + // l6 = 6f7e8d9cabbac9d8e7f706152433425172b2f33373b3f43474b4f53575b5f636 // l6 + (l5 & u64::MAX) + // l6_high = l6 >> 64 + // l7 = 7030f1b27333f4b57636f7b87939faba9bdb1a5998d8175695d5145392d21151 // x_high * y_high + // l7 = 7030f1b27333f4b57636f7b87939fabb0b69c82684e3419ffe5cbb1977d63492 // (l5 >> 64) + (l6 >> 64) + l7 + + // r0 = (l6 << 64) | (l4 & u64::MAX) + // r1 = l7 + l6_high; + + #[inline] + fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) { + const LOW_MASK: u128 = u64::MAX as u128; + let mut lhs_lo = self & LOW_MASK; + let mut lhs_hi = self >> 64; + let mut rhs_lo = rhs & LOW_MASK; + let mut rhs_hi = rhs >> 64; + + let mut l4 = lhs_lo.wrapping_mul(rhs_lo); + + rhs_lo = rhs_lo.wrapping_mul(lhs_hi).wrapping_add(l4.wrapping_shr(64)); + lhs_lo = lhs_lo.wrapping_mul(rhs_hi).wrapping_add(rhs_lo & LOW_MASK); + + lhs_hi = lhs_hi.wrapping_mul(rhs_hi); + rhs_hi = lhs_lo.wrapping_shr(64); + rhs_lo >>= 64; + + lhs_hi = lhs_hi.wrapping_add(rhs_lo); + + lhs_lo = lhs_lo.wrapping_shl(64); + l4 &= LOW_MASK; + lhs_lo |= l4; + + lhs_hi = lhs_hi.wrapping_add(rhs_hi); + + (lhs_lo, lhs_hi) + } +} + +#[test] +fn test_u128_wrapping() { + fn widening_mul>(a: T, b: T) -> (T, T) { + a.widening_mul(b) + } + assert_eq!(widening_mul(0u128, 0u128), (0, 0)); + assert_eq!(widening_mul(u128::MAX, 1), (u128::MAX, 0)); + assert_eq!(widening_mul(u128::MAX, 2), (u128::MAX - 1, 1)); + assert_eq!(widening_mul(u128::MAX, u128::MAX), (1, u128::MAX - 1)); +} From 4253a6fb21efa8c7c3344857b3f09042c3b84ef0 Mon Sep 17 00:00:00 2001 From: Lohann Paterno Coutinho Ferreira Date: Wed, 10 Jul 2024 23:16:52 -0300 Subject: [PATCH 2/5] Remove u128 --- src/ops/widening.rs | 60 --------------------------------------------- 1 file changed, 60 deletions(-) diff --git a/src/ops/widening.rs b/src/ops/widening.rs index 6b55a9d3..6ce10628 100644 --- a/src/ops/widening.rs +++ b/src/ops/widening.rs @@ -75,63 +75,3 @@ widening_impl!(usize, u64, test_usize_wrapping); #[cfg(target_pointer_width = "64")] widening_impl!(usize, u128, test_usize_wrapping); - -#[inline(always)] -const fn split_u128(a: u128) -> (u64, u64) { - ((a >> 64) as _, (a & 0xFFFF_FFFF_FFFF_FFFF) as _) -} - -impl WideningMul for u128 { - type Output = Self; - - // l4 = 6edd4bba28970573e250bf2d9c0a78e678099b2cbe4fe173049627b94adc6e00 // x_low * y_low - // l5 = 6f8eadccec0b2a496887a6c5e50423418263442505e6c7a8896a4b2c0cedceb0 // x_high * y_low - // l5 = 6f8eadccec0b2a496887a6c5e5042341f1408fdf2e7dcd1c6bbb0a59a8f84796 // l5 + (l4 >> 64) - // l6 = 6f7e8d9cabbac9d8e7f7061524334250817263544536271808f9eadbccbdaea0 // x_low * y_high - // l6 = 6f7e8d9cabbac9d8e7f706152433425172b2f33373b3f43474b4f53575b5f636 // l6 + (l5 & u64::MAX) - // l6_high = l6 >> 64 - // l7 = 7030f1b27333f4b57636f7b87939faba9bdb1a5998d8175695d5145392d21151 // x_high * y_high - // l7 = 7030f1b27333f4b57636f7b87939fabb0b69c82684e3419ffe5cbb1977d63492 // (l5 >> 64) + (l6 >> 64) + l7 - - // r0 = (l6 << 64) | (l4 & u64::MAX) - // r1 = l7 + l6_high; - - #[inline] - fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) { - const LOW_MASK: u128 = u64::MAX as u128; - let mut lhs_lo = self & LOW_MASK; - let mut lhs_hi = self >> 64; - let mut rhs_lo = rhs & LOW_MASK; - let mut rhs_hi = rhs >> 64; - - let mut l4 = lhs_lo.wrapping_mul(rhs_lo); - - rhs_lo = rhs_lo.wrapping_mul(lhs_hi).wrapping_add(l4.wrapping_shr(64)); - lhs_lo = lhs_lo.wrapping_mul(rhs_hi).wrapping_add(rhs_lo & LOW_MASK); - - lhs_hi = lhs_hi.wrapping_mul(rhs_hi); - rhs_hi = lhs_lo.wrapping_shr(64); - rhs_lo >>= 64; - - lhs_hi = lhs_hi.wrapping_add(rhs_lo); - - lhs_lo = lhs_lo.wrapping_shl(64); - l4 &= LOW_MASK; - lhs_lo |= l4; - - lhs_hi = lhs_hi.wrapping_add(rhs_hi); - - (lhs_lo, lhs_hi) - } -} - -#[test] -fn test_u128_wrapping() { - fn widening_mul>(a: T, b: T) -> (T, T) { - a.widening_mul(b) - } - assert_eq!(widening_mul(0u128, 0u128), (0, 0)); - assert_eq!(widening_mul(u128::MAX, 1), (u128::MAX, 0)); - assert_eq!(widening_mul(u128::MAX, 2), (u128::MAX - 1, 1)); - assert_eq!(widening_mul(u128::MAX, u128::MAX), (1, u128::MAX - 1)); -} From 64b1fe83f7b9f9a1cc2a754f9883e8ed2c873b82 Mon Sep 17 00:00:00 2001 From: Lohann Paterno Coutinho Ferreira Date: Thu, 11 Jul 2024 19:31:48 -0300 Subject: [PATCH 3/5] implement widening multiplication for u128 --- src/ops/widening.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/ops/widening.rs b/src/ops/widening.rs index 6ce10628..875ee666 100644 --- a/src/ops/widening.rs +++ b/src/ops/widening.rs @@ -62,6 +62,7 @@ pub trait WideningMul: Sized { fn widening_mul(self, rhs: Rhs) -> (Self::Output, Self::Output); } +// Implement widening multiplication for all primitive types widening_impl!(u8, u16, test_u8_wrapping); widening_impl!(u16, u32, test_u16_wrapping); widening_impl!(u32, u64, test_u32_wrapping); @@ -75,3 +76,34 @@ widening_impl!(usize, u64, test_usize_wrapping); #[cfg(target_pointer_width = "64")] widening_impl!(usize, u128, test_usize_wrapping); + + +// Implement widening multiplication for u64, +// required while feature(bigint_helper_methods) is not stable. +#[allow(clippy::cast_possible_truncation)] +#[inline] +const fn carrying_mul(a: u64, rhs: u64, carry: u64) -> (u64, u64) { + // SAFETY: overflow will be contained within the wider types + let wide = (a as u128).wrapping_mul(rhs as u128).wrapping_add(carry as u128); + (wide as u64, (wide >> u64::BITS) as u64) +} + +impl WideningMul for u128 { + type Output = Self; + + #[allow(clippy::cast_possible_truncation, clippy::similar_names, clippy::cast_lossless)] + #[inline] + fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) { + let a = (self >> 64) as u64; + let b = self as u64; + let c = (rhs >> 64) as u64; + let d = rhs as u64; + let (p1, p2) = WideningMul::widening_mul(b, d); + let (p2, p31) = carrying_mul(b, c, p2); + let (p2, p32) = carrying_mul(a, d, p2); + let (p3, p4_overflow) = p31.overflowing_add(p32); + let (p3, p4) = carrying_mul(a, c, p3); + let p4 = p4.wrapping_add(p4_overflow as u64); + ((p1 as Self) | (p2 as Self) << 64, (p3 as Self) | (p4 as Self) << 64) + } +} From 430e1868efa475332dc90422c1bdcbf4cfb5455e Mon Sep 17 00:00:00 2001 From: Lohann Paterno Coutinho Ferreira Date: Thu, 11 Jul 2024 19:39:41 -0300 Subject: [PATCH 4/5] Add u128 widening multiplication tests --- src/ops/widening.rs | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/ops/widening.rs b/src/ops/widening.rs index 875ee666..32de5c89 100644 --- a/src/ops/widening.rs +++ b/src/ops/widening.rs @@ -63,30 +63,19 @@ pub trait WideningMul: Sized { } // Implement widening multiplication for all primitive types -widening_impl!(u8, u16, test_u8_wrapping); -widening_impl!(u16, u32, test_u16_wrapping); -widening_impl!(u32, u64, test_u32_wrapping); -widening_impl!(u64, u128, test_u64_wrapping); +widening_impl!(u8, u16, test_u8_widening_mul); +widening_impl!(u16, u32, test_u16_widening_mul); +widening_impl!(u32, u64, test_u32_widening_mul); +widening_impl!(u64, u128, test_u64_widening_mul); #[cfg(target_pointer_width = "16")] -widening_impl!(usize, u16, test_usize_wrapping); +widening_impl!(usize, u16, test_usize_widening_mul); #[cfg(target_pointer_width = "32")] -widening_impl!(usize, u64, test_usize_wrapping); +widening_impl!(usize, u64, test_usize_widening_mul); #[cfg(target_pointer_width = "64")] -widening_impl!(usize, u128, test_usize_wrapping); - - -// Implement widening multiplication for u64, -// required while feature(bigint_helper_methods) is not stable. -#[allow(clippy::cast_possible_truncation)] -#[inline] -const fn carrying_mul(a: u64, rhs: u64, carry: u64) -> (u64, u64) { - // SAFETY: overflow will be contained within the wider types - let wide = (a as u128).wrapping_mul(rhs as u128).wrapping_add(carry as u128); - (wide as u64, (wide >> u64::BITS) as u64) -} +widening_impl!(usize, u128, test_usize_widening_mul); impl WideningMul for u128 { type Output = Self; @@ -94,6 +83,14 @@ impl WideningMul for u128 { #[allow(clippy::cast_possible_truncation, clippy::similar_names, clippy::cast_lossless)] #[inline] fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) { + #[inline] + // Carrying multiplication for u64, computes: lhs * rhs + carry + const fn carrying_mul(lhs: u64, rhs: u64, carry: u64) -> (u64, u64) { + // SAFETY: overflow will be contained within the wider types + let wide = (lhs as u128).wrapping_mul(rhs as u128).wrapping_add(carry as u128); + (wide as u64, (wide >> u64::BITS) as u64) + } + let a = (self >> 64) as u64; let b = self as u64; let c = (rhs >> 64) as u64; @@ -107,3 +104,14 @@ impl WideningMul for u128 { ((p1 as Self) | (p2 as Self) << 64, (p3 as Self) | (p4 as Self) << 64) } } + +#[test] +fn test_u128_widening_mul() { + fn widening_mul>(a: T, b: T) -> (T, T) { + a.widening_mul(b) + } + assert_eq!(widening_mul(0u128, 0u128), (0, 0)); + assert_eq!(widening_mul(u128::MAX, 1), (u128::MAX, 0)); + assert_eq!(widening_mul(u128::MAX, 2), (u128::MAX - 1, 1)); + assert_eq!(widening_mul(u128::MAX, u128::MAX), (1, u128::MAX - 1)); +} From cf8f3277fe8f7efef161d10ed30f3c39849abbdb Mon Sep 17 00:00:00 2001 From: Lohann Paterno Coutinho Ferreira Date: Thu, 11 Jul 2024 19:42:00 -0300 Subject: [PATCH 5/5] Fix usize for 16 pointer width --- src/ops/widening.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ops/widening.rs b/src/ops/widening.rs index 32de5c89..e14152b4 100644 --- a/src/ops/widening.rs +++ b/src/ops/widening.rs @@ -69,7 +69,7 @@ widening_impl!(u32, u64, test_u32_widening_mul); widening_impl!(u64, u128, test_u64_widening_mul); #[cfg(target_pointer_width = "16")] -widening_impl!(usize, u16, test_usize_widening_mul); +widening_impl!(usize, u32, test_usize_widening_mul); #[cfg(target_pointer_width = "32")] widening_impl!(usize, u64, test_usize_widening_mul);