From 993f7bf3ab1ea82feca9b485fc0308534fbc47ca Mon Sep 17 00:00:00 2001
From: Lohann Paterno Coutinho Ferreira <developer@lohann.dev>
Date: Wed, 10 Jul 2024 23:14:21 -0300
Subject: [PATCH 1/5] Support widening mul

---
 src/ops/mod.rs      |   1 +
 src/ops/widening.rs | 137 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 src/ops/widening.rs
diff --git a/src/ops/mod.rs b/src/ops/mod.rs
index 2128d86a..ac07d3b2 100644
--- a/src/ops/mod.rs
+++ b/src/ops/mod.rs
@@ -5,4 +5,5 @@ pub mod inv;
 pub mod mul_add;
 pub mod overflowing;
 pub mod saturating;
+pub mod widening;
 pub mod wrapping;
diff --git a/src/ops/widening.rs b/src/ops/widening.rs
new file mode 100644
index 00000000..6b55a9d3
--- /dev/null
+++ b/src/ops/widening.rs
@@ -0,0 +1,137 @@
+macro_rules! widening_impl {
+    ($limb:ty, $doublelimb:ty, $test_name:ident) => {
+        impl WideningMul<$limb> for $limb {
+            type Output = $limb;
+
+            #[inline]
+            fn widening_mul(self, rhs: $limb) -> (Self::Output, Self::Output) {
+                // SAFETY: overflow will be contained within the wider types
+                let wide = (self as $doublelimb).wrapping_mul(rhs as $doublelimb);
+                (wide as $limb, (wide >> <$limb>::BITS) as $limb)
+            }
+        }
+
+        impl WideningMul<&'_ $limb> for $limb {
+            type Output = $limb;
+
+            #[inline]
+            fn widening_mul(self, rhs: &'_ $limb) -> (Self::Output, Self::Output) {
+                WideningMul::<$limb>::widening_mul(self, *rhs)
+            }
+        }
+
+        impl WideningMul<&'_ $limb> for &'_ $limb {
+            type Output = $limb;
+
+            #[inline]
+            fn widening_mul(self, rhs: &'_ $limb) -> (Self::Output, Self::Output) {
+                WideningMul::<$limb>::widening_mul(*self, *rhs)
+            }
+        }
+
+        impl WideningMul<$limb> for &'_ $limb {
+            type Output = $limb;
+
+            #[inline]
+            fn widening_mul(self, rhs: $limb) -> (Self::Output, Self::Output) {
+                WideningMul::<$limb>::widening_mul(*self, rhs)
+            }
+        }
+
+        #[test]
+        fn $test_name() {
+            fn widening_mul<T: WideningMul<Output = T>>(a: T, b: T) -> (T, T) {
+                a.widening_mul(b)
+            }
+            assert_eq!(widening_mul(0 as $limb, 0 as $limb), (0, 0));
+            assert_eq!(widening_mul(<$limb>::MAX, 1), (<$limb>::MAX, 0));
+            assert_eq!(widening_mul(<$limb>::MAX, 2), (<$limb>::MAX - 1, 1));
+            assert_eq!(
+                widening_mul(<$limb>::MAX, <$limb>::MAX),
+                (1, <$limb>::MAX - 1)
+            );
+        }
+    };
+}
+
+/// Calculates the complete product self * rhs without the possibility to overflow.
+pub trait WideningMul<Rhs = Self>: Sized {
+    type Output;
+
+    #[must_use]
+    fn widening_mul(self, rhs: Rhs) -> (Self::Output, Self::Output);
+}
+
+widening_impl!(u8, u16, test_u8_wrapping);
+widening_impl!(u16, u32, test_u16_wrapping);
+widening_impl!(u32, u64, test_u32_wrapping);
+widening_impl!(u64, u128, test_u64_wrapping);
+
+#[cfg(target_pointer_width = "16")]
+widening_impl!(usize, u16, test_usize_wrapping);
+
+#[cfg(target_pointer_width = "32")]
+widening_impl!(usize, u64, test_usize_wrapping);
+
+#[cfg(target_pointer_width = "64")]
+widening_impl!(usize, u128, test_usize_wrapping);
+
+#[inline(always)]
+const fn split_u128(a: u128) -> (u64, u64) {
+    ((a >> 64) as _, (a & 0xFFFF_FFFF_FFFF_FFFF) as _)
+}
+
+impl WideningMul<Self> for u128 {
+    type Output = Self;
+
+    // l4 = 6edd4bba28970573e250bf2d9c0a78e678099b2cbe4fe173049627b94adc6e00 // x_low  * y_low
+    // l5 = 6f8eadccec0b2a496887a6c5e50423418263442505e6c7a8896a4b2c0cedceb0 // x_high * y_low
+    // l5 = 6f8eadccec0b2a496887a6c5e5042341f1408fdf2e7dcd1c6bbb0a59a8f84796 // l5 + (l4 >> 64)
+    // l6 = 6f7e8d9cabbac9d8e7f7061524334250817263544536271808f9eadbccbdaea0 // x_low  * y_high
+    // l6 = 6f7e8d9cabbac9d8e7f706152433425172b2f33373b3f43474b4f53575b5f636 // l6 + (l5 & u64::MAX)
+    // l6_high = l6 >> 64
+    // l7 = 7030f1b27333f4b57636f7b87939faba9bdb1a5998d8175695d5145392d21151 // x_high * y_high
+    // l7 = 7030f1b27333f4b57636f7b87939fabb0b69c82684e3419ffe5cbb1977d63492 // (l5 >> 64) + (l6 >> 64) + l7
+
+    // r0 = (l6 << 64) | (l4 & u64::MAX)
+    // r1 = l7 + l6_high;
+
+    #[inline]
+    fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) {
+        const LOW_MASK: u128 = u64::MAX as u128;
+        let mut lhs_lo = self & LOW_MASK;
+        let mut lhs_hi = self >> 64;
+        let mut rhs_lo = rhs & LOW_MASK;
+        let mut rhs_hi = rhs >> 64;
+        
+        let mut l4 = lhs_lo.wrapping_mul(rhs_lo);
+
+        rhs_lo = rhs_lo.wrapping_mul(lhs_hi).wrapping_add(l4.wrapping_shr(64));
+        lhs_lo = lhs_lo.wrapping_mul(rhs_hi).wrapping_add(rhs_lo & LOW_MASK);
+
+        lhs_hi = lhs_hi.wrapping_mul(rhs_hi);
+        rhs_hi = lhs_lo.wrapping_shr(64);
+        rhs_lo >>= 64;
+
+        lhs_hi = lhs_hi.wrapping_add(rhs_lo);
+
+        lhs_lo = lhs_lo.wrapping_shl(64);
+        l4 &= LOW_MASK;
+        lhs_lo |= l4;
+
+        lhs_hi = lhs_hi.wrapping_add(rhs_hi);
+
+        (lhs_lo, lhs_hi)
+    }
+}
+
+#[test]
+fn test_u128_wrapping() {
+    fn widening_mul<T: WideningMul<Output = T>>(a: T, b: T) -> (T, T) {
+        a.widening_mul(b)
+    }
+    assert_eq!(widening_mul(0u128, 0u128), (0, 0));
+    assert_eq!(widening_mul(u128::MAX, 1), (u128::MAX, 0));
+    assert_eq!(widening_mul(u128::MAX, 2), (u128::MAX - 1, 1));
+    assert_eq!(widening_mul(u128::MAX, u128::MAX), (1, u128::MAX - 1));
+}

From 4253a6fb21efa8c7c3344857b3f09042c3b84ef0 Mon Sep 17 00:00:00 2001
From: Lohann Paterno Coutinho Ferreira <developer@lohann.dev>
Date: Wed, 10 Jul 2024 23:16:52 -0300
Subject: [PATCH 2/5] Remove u128

---
 src/ops/widening.rs | 60 ---------------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/src/ops/widening.rs b/src/ops/widening.rs
index 6b55a9d3..6ce10628 100644
--- a/src/ops/widening.rs
+++ b/src/ops/widening.rs
@@ -75,63 +75,3 @@ widening_impl!(usize, u64, test_usize_wrapping);
 
 #[cfg(target_pointer_width = "64")]
 widening_impl!(usize, u128, test_usize_wrapping);
-
-#[inline(always)]
-const fn split_u128(a: u128) -> (u64, u64) {
-    ((a >> 64) as _, (a & 0xFFFF_FFFF_FFFF_FFFF) as _)
-}
-
-impl WideningMul<Self> for u128 {
-    type Output = Self;
-
-    // l4 = 6edd4bba28970573e250bf2d9c0a78e678099b2cbe4fe173049627b94adc6e00 // x_low  * y_low
-    // l5 = 6f8eadccec0b2a496887a6c5e50423418263442505e6c7a8896a4b2c0cedceb0 // x_high * y_low
-    // l5 = 6f8eadccec0b2a496887a6c5e5042341f1408fdf2e7dcd1c6bbb0a59a8f84796 // l5 + (l4 >> 64)
-    // l6 = 6f7e8d9cabbac9d8e7f7061524334250817263544536271808f9eadbccbdaea0 // x_low  * y_high
-    // l6 = 6f7e8d9cabbac9d8e7f706152433425172b2f33373b3f43474b4f53575b5f636 // l6 + (l5 & u64::MAX)
-    // l6_high = l6 >> 64
-    // l7 = 7030f1b27333f4b57636f7b87939faba9bdb1a5998d8175695d5145392d21151 // x_high * y_high
-    // l7 = 7030f1b27333f4b57636f7b87939fabb0b69c82684e3419ffe5cbb1977d63492 // (l5 >> 64) + (l6 >> 64) + l7
-
-    // r0 = (l6 << 64) | (l4 & u64::MAX)
-    // r1 = l7 + l6_high;
-
-    #[inline]
-    fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) {
-        const LOW_MASK: u128 = u64::MAX as u128;
-        let mut lhs_lo = self & LOW_MASK;
-        let mut lhs_hi = self >> 64;
-        let mut rhs_lo = rhs & LOW_MASK;
-        let mut rhs_hi = rhs >> 64;
-        
-        let mut l4 = lhs_lo.wrapping_mul(rhs_lo);
-
-        rhs_lo = rhs_lo.wrapping_mul(lhs_hi).wrapping_add(l4.wrapping_shr(64));
-        lhs_lo = lhs_lo.wrapping_mul(rhs_hi).wrapping_add(rhs_lo & LOW_MASK);
-
-        lhs_hi = lhs_hi.wrapping_mul(rhs_hi);
-        rhs_hi = lhs_lo.wrapping_shr(64);
-        rhs_lo >>= 64;
-
-        lhs_hi = lhs_hi.wrapping_add(rhs_lo);
-
-        lhs_lo = lhs_lo.wrapping_shl(64);
-        l4 &= LOW_MASK;
-        lhs_lo |= l4;
-
-        lhs_hi = lhs_hi.wrapping_add(rhs_hi);
-
-        (lhs_lo, lhs_hi)
-    }
-}
-
-#[test]
-fn test_u128_wrapping() {
-    fn widening_mul<T: WideningMul<Output = T>>(a: T, b: T) -> (T, T) {
-        a.widening_mul(b)
-    }
-    assert_eq!(widening_mul(0u128, 0u128), (0, 0));
-    assert_eq!(widening_mul(u128::MAX, 1), (u128::MAX, 0));
-    assert_eq!(widening_mul(u128::MAX, 2), (u128::MAX - 1, 1));
-    assert_eq!(widening_mul(u128::MAX, u128::MAX), (1, u128::MAX - 1));
-}

From 64b1fe83f7b9f9a1cc2a754f9883e8ed2c873b82 Mon Sep 17 00:00:00 2001
From: Lohann Paterno Coutinho Ferreira <developer@lohann.dev>
Date: Thu, 11 Jul 2024 19:31:48 -0300
Subject: [PATCH 3/5] implement widening multiplication for u128

---
 src/ops/widening.rs | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/ops/widening.rs b/src/ops/widening.rs
index 6ce10628..875ee666 100644
--- a/src/ops/widening.rs
+++ b/src/ops/widening.rs
@@ -62,6 +62,7 @@ pub trait WideningMul<Rhs = Self>: Sized {
     fn widening_mul(self, rhs: Rhs) -> (Self::Output, Self::Output);
 }
 
+// Implement widening multiplication for all primitive types
 widening_impl!(u8, u16, test_u8_wrapping);
 widening_impl!(u16, u32, test_u16_wrapping);
 widening_impl!(u32, u64, test_u32_wrapping);
@@ -75,3 +76,34 @@ widening_impl!(usize, u64, test_usize_wrapping);
 
 #[cfg(target_pointer_width = "64")]
 widening_impl!(usize, u128, test_usize_wrapping);
+
+
+// Implement widening multiplication for u64,
+// required while feature(bigint_helper_methods) is not stable.
+#[allow(clippy::cast_possible_truncation)]
+#[inline]
+const fn carrying_mul(a: u64, rhs: u64, carry: u64) -> (u64, u64) {
+    // SAFETY: overflow will be contained within the wider types
+    let wide = (a as u128).wrapping_mul(rhs as u128).wrapping_add(carry as u128);
+    (wide as u64, (wide >> u64::BITS) as u64)
+}
+
+impl WideningMul<Self> for u128 {
+    type Output = Self;
+
+    #[allow(clippy::cast_possible_truncation, clippy::similar_names, clippy::cast_lossless)]
+    #[inline]
+    fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) {
+        let a = (self >> 64) as u64;
+        let b = self as u64;
+        let c = (rhs >> 64) as u64;
+        let d = rhs as u64;
+        let (p1, p2) = WideningMul::widening_mul(b, d);
+        let (p2, p31) = carrying_mul(b, c, p2);
+        let (p2, p32) = carrying_mul(a, d, p2);
+        let (p3, p4_overflow) = p31.overflowing_add(p32);
+        let (p3, p4) = carrying_mul(a, c, p3);
+        let p4 = p4.wrapping_add(p4_overflow as u64);
+        ((p1 as Self) | (p2 as Self) << 64, (p3 as Self) | (p4 as Self) << 64)
+    }
+}

From 430e1868efa475332dc90422c1bdcbf4cfb5455e Mon Sep 17 00:00:00 2001
From: Lohann Paterno Coutinho Ferreira <developer@lohann.dev>
Date: Thu, 11 Jul 2024 19:39:41 -0300
Subject: [PATCH 4/5] Add u128 widening multiplication tests

---
 src/ops/widening.rs | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/ops/widening.rs b/src/ops/widening.rs
index 875ee666..32de5c89 100644
--- a/src/ops/widening.rs
+++ b/src/ops/widening.rs
@@ -63,30 +63,19 @@ pub trait WideningMul<Rhs = Self>: Sized {
 }
 
 // Implement widening multiplication for all primitive types
-widening_impl!(u8, u16, test_u8_wrapping);
-widening_impl!(u16, u32, test_u16_wrapping);
-widening_impl!(u32, u64, test_u32_wrapping);
-widening_impl!(u64, u128, test_u64_wrapping);
+widening_impl!(u8, u16, test_u8_widening_mul);
+widening_impl!(u16, u32, test_u16_widening_mul);
+widening_impl!(u32, u64, test_u32_widening_mul);
+widening_impl!(u64, u128, test_u64_widening_mul);
 
 #[cfg(target_pointer_width = "16")]
-widening_impl!(usize, u16, test_usize_wrapping);
+widening_impl!(usize, u16, test_usize_widening_mul);
 
 #[cfg(target_pointer_width = "32")]
-widening_impl!(usize, u64, test_usize_wrapping);
+widening_impl!(usize, u64, test_usize_widening_mul);
 
 #[cfg(target_pointer_width = "64")]
-widening_impl!(usize, u128, test_usize_wrapping);
-
-
-// Implement widening multiplication for u64,
-// required while feature(bigint_helper_methods) is not stable.
-#[allow(clippy::cast_possible_truncation)]
-#[inline]
-const fn carrying_mul(a: u64, rhs: u64, carry: u64) -> (u64, u64) {
-    // SAFETY: overflow will be contained within the wider types
-    let wide = (a as u128).wrapping_mul(rhs as u128).wrapping_add(carry as u128);
-    (wide as u64, (wide >> u64::BITS) as u64)
-}
+widening_impl!(usize, u128, test_usize_widening_mul);
 
 impl WideningMul<Self> for u128 {
     type Output = Self;
@@ -94,6 +83,14 @@ impl WideningMul<Self> for u128 {
     #[allow(clippy::cast_possible_truncation, clippy::similar_names, clippy::cast_lossless)]
     #[inline]
     fn widening_mul(self, rhs: Self) -> (Self::Output, Self::Output) {
+        #[inline]
+        // Carrying multiplication for u64, computes: lhs * rhs + carry
+        const fn carrying_mul(lhs: u64, rhs: u64, carry: u64) -> (u64, u64) {
+            // SAFETY: overflow will be contained within the wider types
+            let wide = (lhs as u128).wrapping_mul(rhs as u128).wrapping_add(carry as u128);
+            (wide as u64, (wide >> u64::BITS) as u64)
+        }
+
         let a = (self >> 64) as u64;
         let b = self as u64;
         let c = (rhs >> 64) as u64;
@@ -107,3 +104,14 @@ impl WideningMul<Self> for u128 {
         ((p1 as Self) | (p2 as Self) << 64, (p3 as Self) | (p4 as Self) << 64)
     }
 }
+
+#[test]
+fn test_u128_widening_mul() {
+    fn widening_mul<T: WideningMul<Output = T>>(a: T, b: T) -> (T, T) {
+        a.widening_mul(b)
+    }
+    assert_eq!(widening_mul(0u128, 0u128), (0, 0));
+    assert_eq!(widening_mul(u128::MAX, 1), (u128::MAX, 0));
+    assert_eq!(widening_mul(u128::MAX, 2), (u128::MAX - 1, 1));
+    assert_eq!(widening_mul(u128::MAX, u128::MAX), (1, u128::MAX - 1));
+}

From cf8f3277fe8f7efef161d10ed30f3c39849abbdb Mon Sep 17 00:00:00 2001
From: Lohann Paterno Coutinho Ferreira <developer@lohann.dev>
Date: Thu, 11 Jul 2024 19:42:00 -0300
Subject: [PATCH 5/5] Fix usize for 16 pointer width

---
 src/ops/widening.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ops/widening.rs b/src/ops/widening.rs
index 32de5c89..e14152b4 100644
--- a/src/ops/widening.rs
+++ b/src/ops/widening.rs
@@ -69,7 +69,7 @@ widening_impl!(u32, u64, test_u32_widening_mul);
 widening_impl!(u64, u128, test_u64_widening_mul);
 
 #[cfg(target_pointer_width = "16")]
-widening_impl!(usize, u16, test_usize_widening_mul);
+widening_impl!(usize, u32, test_usize_widening_mul);
 
 #[cfg(target_pointer_width = "32")]
 widening_impl!(usize, u64, test_usize_widening_mul);