Skip to content

Commit

Permalink
backport patch for underflow and div by 0
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremyfelder committed Apr 10, 2024
1 parent e603569 commit 3d1e433
Show file tree
Hide file tree
Showing 8 changed files with 15 additions and 15 deletions.
16 changes: 8 additions & 8 deletions icicle/appUtils/msm/msm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace msm {
// #define BIG_TRIANGLE
// #define SSM_SUM //WIP

unsigned get_optimal_c(int bitsize) { return max((unsigned)ceil(log2(bitsize)) - 4, 1U); }
unsigned get_optimal_c(int bitsize) { return (unsigned)max(ceil(log2(bitsize)) - 4.0, 1.0); }

template <typename E>
__global__ void normalize_kernel(E* inout, E factor, int n)
Expand Down Expand Up @@ -654,7 +654,7 @@ namespace msm {
unsigned log_nof_large_buckets = (unsigned)ceil(log2(h_nof_large_buckets));
unsigned* large_bucket_indices;
CHK_IF_RETURN(cudaMallocAsync(&large_bucket_indices, sizeof(unsigned) * large_buckets_nof_threads, stream));
NUM_THREADS = min(1 << 8, h_nof_large_buckets);
NUM_THREADS = max(1, min(1 << 8, h_nof_large_buckets));
NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
initialize_large_bucket_indices<P><<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
sorted_bucket_sizes_sum, average_bucket_size, h_nof_large_buckets, log_nof_large_buckets,
Expand All @@ -663,24 +663,24 @@ namespace msm {
P* large_buckets;
CHK_IF_RETURN(cudaMallocAsync(&large_buckets, sizeof(P) * large_buckets_nof_threads, stream_large_buckets));

NUM_THREADS = min(1 << 8, large_buckets_nof_threads);
NUM_THREADS = max(1, min(1 << 8, large_buckets_nof_threads));
NUM_BLOCKS = (large_buckets_nof_threads + NUM_THREADS - 1) / NUM_THREADS;
accumulate_large_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
large_buckets, sorted_bucket_offsets, sorted_bucket_sizes, large_bucket_indices, sorted_point_indices,
d_points, h_nof_large_buckets, c, average_bucket_size, log_nof_large_buckets, large_buckets_nof_threads);

NUM_THREADS = min(MAX_TH, h_nof_large_buckets);
NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets));
NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
// normalization is needed to update buckets sizes and offsets due to reduction that already took place
normalize_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
sorted_bucket_sizes_sum, average_bucket_size, h_nof_large_buckets);
// reduce
for (int s = h_largest_bucket; s > 1; s = ((s + 1) >> 1)) {
NUM_THREADS = min(MAX_TH, h_nof_large_buckets);
NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets));
NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
normalize_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
sorted_bucket_sizes, s == h_largest_bucket ? average_bucket_size : 2, h_nof_large_buckets);
NUM_THREADS = min(MAX_TH, large_buckets_nof_threads);
NUM_THREADS = max(1, min(MAX_TH, large_buckets_nof_threads));
NUM_BLOCKS = (large_buckets_nof_threads + NUM_THREADS - 1) / NUM_THREADS;
sum_reduction_variable_size_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
large_buckets, sorted_bucket_sizes_sum, sorted_bucket_sizes, large_bucket_indices,
Expand All @@ -689,7 +689,7 @@ namespace msm {
CHK_IF_RETURN(cudaFreeAsync(large_bucket_indices, stream_large_buckets));

// distribute
NUM_THREADS = min(MAX_TH, h_nof_large_buckets);
NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets));
NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
distribute_large_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream_large_buckets>>>(
large_buckets, buckets, sorted_bucket_sizes_sum, sorted_single_bucket_indices, h_nof_large_buckets,
Expand Down Expand Up @@ -778,7 +778,7 @@ namespace msm {
const bool is_last_iter = (j == target_bits_count - 1);
unsigned nof_threads =
(((target_buckets_count - target_windows_count) >> 1) << (target_bits_count - 1 - j)) * batch_size;
NUM_THREADS = min(MAX_TH, nof_threads);
NUM_THREADS = max(1, min(MAX_TH, nof_threads));
NUM_BLOCKS = (nof_threads + NUM_THREADS - 1) / NUM_THREADS;
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(
is_first_iter ? source_buckets : temp_buckets1, is_last_iter ? target_buckets : temp_buckets1,
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-core"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = ["Ingonyama"]
description = "A library for GPU ZK acceleration by Ingonyama"
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-cuda-runtime/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-cuda-runtime"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = [ "Ingonyama" ]
description = "Ingonyama's Rust wrapper of CUDA runtime"
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-bls12-377"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = [ "Ingonyama" ]
description = "Rust wrapper for the CUDA implementation of BLS12-377 pairing friendly elliptic curve by Ingonyama"
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-bls12-381"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = [ "Ingonyama" ]
description = "Rust wrapper for the CUDA implementation of BLS12-381 pairing friendly elliptic curve by Ingonyama"
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-bn254"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = [ "Ingonyama" ]
description = "Rust wrapper for the CUDA implementation of BN254 pairing friendly elliptic curve by Ingonyama"
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-bw6-761"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = [ "Ingonyama" ]
description = "Rust wrapper for the CUDA implementation of BW6-761 pairing friendly elliptic curve by Ingonyama"
Expand Down
2 changes: 1 addition & 1 deletion wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "icicle-grumpkin"
version = "1.4.0"
version = "1.5.1"
edition = "2021"
authors = [ "Ingonyama" ]
description = "Rust wrapper for the CUDA implementation of Grumpkin elliptic curve by Ingonyama"
Expand Down

0 comments on commit 3d1e433

Please sign in to comment.