backport patch for underflow and div by 0

ingonyama-zk · Apr 10, 2024 · 3d1e433 · 3d1e433
1 parent e603569
commit 3d1e433
Show file tree

Hide file tree

Showing 8 changed files with 15 additions and 15 deletions.
diff --git a/icicle/appUtils/msm/msm.cu b/icicle/appUtils/msm/msm.cu
@@ -28,7 +28,7 @@ namespace msm {
     // #define BIG_TRIANGLE
     // #define SSM_SUM  //WIP
 
-    unsigned get_optimal_c(int bitsize) { return max((unsigned)ceil(log2(bitsize)) - 4, 1U); }
+    unsigned get_optimal_c(int bitsize) { return (unsigned)max(ceil(log2(bitsize)) - 4.0, 1.0); }
 
     template <typename E>
     __global__ void normalize_kernel(E* inout, E factor, int n)
@@ -654,7 +654,7 @@ namespace msm {
         unsigned log_nof_large_buckets = (unsigned)ceil(log2(h_nof_large_buckets));
         unsigned* large_bucket_indices;
         CHK_IF_RETURN(cudaMallocAsync(&large_bucket_indices, sizeof(unsigned) * large_buckets_nof_threads, stream));
-        NUM_THREADS = min(1 << 8, h_nof_large_buckets);
+        NUM_THREADS = max(1, min(1 << 8, h_nof_large_buckets));
         NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
         initialize_large_bucket_indices<P><<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
           sorted_bucket_sizes_sum, average_bucket_size, h_nof_large_buckets, log_nof_large_buckets,
@@ -663,24 +663,24 @@ namespace msm {
         P* large_buckets;
         CHK_IF_RETURN(cudaMallocAsync(&large_buckets, sizeof(P) * large_buckets_nof_threads, stream_large_buckets));
 
-        NUM_THREADS = min(1 << 8, large_buckets_nof_threads);
+        NUM_THREADS = max(1, min(1 << 8, large_buckets_nof_threads));
         NUM_BLOCKS = (large_buckets_nof_threads + NUM_THREADS - 1) / NUM_THREADS;
         accumulate_large_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
           large_buckets, sorted_bucket_offsets, sorted_bucket_sizes, large_bucket_indices, sorted_point_indices,
           d_points, h_nof_large_buckets, c, average_bucket_size, log_nof_large_buckets, large_buckets_nof_threads);
 
-        NUM_THREADS = min(MAX_TH, h_nof_large_buckets);
+        NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets));
         NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
         // normalization is needed to update buckets sizes and offsets due to reduction that already took place
         normalize_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
           sorted_bucket_sizes_sum, average_bucket_size, h_nof_large_buckets);
         // reduce
         for (int s = h_largest_bucket; s > 1; s = ((s + 1) >> 1)) {
-          NUM_THREADS = min(MAX_TH, h_nof_large_buckets);
+          NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets));
           NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
           normalize_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
             sorted_bucket_sizes, s == h_largest_bucket ? average_bucket_size : 2, h_nof_large_buckets);
-          NUM_THREADS = min(MAX_TH, large_buckets_nof_threads);
+          NUM_THREADS = max(1, min(MAX_TH, large_buckets_nof_threads));
           NUM_BLOCKS = (large_buckets_nof_threads + NUM_THREADS - 1) / NUM_THREADS;
           sum_reduction_variable_size_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
             large_buckets, sorted_bucket_sizes_sum, sorted_bucket_sizes, large_bucket_indices,
@@ -689,7 +689,7 @@ namespace msm {
         CHK_IF_RETURN(cudaFreeAsync(large_bucket_indices, stream_large_buckets));
 
         // distribute
-        NUM_THREADS = min(MAX_TH, h_nof_large_buckets);
+        NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets));
         NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
         distribute_large_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
           large_buckets, buckets, sorted_bucket_sizes_sum, sorted_single_bucket_indices, h_nof_large_buckets,
@@ -778,7 +778,7 @@ namespace msm {
               const bool is_last_iter = (j == target_bits_count - 1);
               unsigned nof_threads =
                 (((target_buckets_count - target_windows_count) >> 1) << (target_bits_count - 1 - j)) * batch_size;
-              NUM_THREADS = min(MAX_TH, nof_threads);
+              NUM_THREADS = max(1, min(MAX_TH, nof_threads));
               NUM_BLOCKS = (nof_threads + NUM_THREADS - 1) / NUM_THREADS;
               single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(
                 is_first_iter ? source_buckets : temp_buckets1, is_last_iter ? target_buckets : temp_buckets1,

diff --git a/wrappers/rust/icicle-core/Cargo.toml b/wrappers/rust/icicle-core/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-core"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = ["Ingonyama"]
 description = "A library for GPU ZK acceleration by Ingonyama"

diff --git a/wrappers/rust/icicle-cuda-runtime/Cargo.toml b/wrappers/rust/icicle-cuda-runtime/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-cuda-runtime"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = [ "Ingonyama" ]
 description = "Ingonyama's Rust wrapper of CUDA runtime"

diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-bls12-377"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = [ "Ingonyama" ]
 description = "Rust wrapper for the CUDA implementation of BLS12-377 pairing friendly elliptic curve by Ingonyama"

diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-bls12-381"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = [ "Ingonyama" ]
 description = "Rust wrapper for the CUDA implementation of BLS12-381 pairing friendly elliptic curve by Ingonyama"

diff --git a/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-bn254"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = [ "Ingonyama" ]
 description = "Rust wrapper for the CUDA implementation of BN254 pairing friendly elliptic curve by Ingonyama"

diff --git a/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-bw6-761"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = [ "Ingonyama" ]
 description = "Rust wrapper for the CUDA implementation of BW6-761 pairing friendly elliptic curve by Ingonyama"

diff --git a/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml b/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "icicle-grumpkin"
-version = "1.4.0"
+version = "1.5.1"
 edition = "2021"
 authors = [ "Ingonyama" ]
 description = "Rust wrapper for the CUDA implementation of Grumpkin elliptic curve by Ingonyama"