diff --git a/icicle/appUtils/msm/msm.cu b/icicle/appUtils/msm/msm.cu index 0b0392c6f..39773378a 100644 --- a/icicle/appUtils/msm/msm.cu +++ b/icicle/appUtils/msm/msm.cu @@ -28,7 +28,7 @@ namespace msm { // #define BIG_TRIANGLE // #define SSM_SUM //WIP - unsigned get_optimal_c(int bitsize) { return max((unsigned)ceil(log2(bitsize)) - 4, 1U); } + unsigned get_optimal_c(int bitsize) { return (unsigned)max(ceil(log2(bitsize)) - 4.0, 1.0); } template __global__ void normalize_kernel(E* inout, E factor, int n) @@ -654,7 +654,7 @@ namespace msm { unsigned log_nof_large_buckets = (unsigned)ceil(log2(h_nof_large_buckets)); unsigned* large_bucket_indices; CHK_IF_RETURN(cudaMallocAsync(&large_bucket_indices, sizeof(unsigned) * large_buckets_nof_threads, stream)); - NUM_THREADS = min(1 << 8, h_nof_large_buckets); + NUM_THREADS = max(1, min(1 << 8, h_nof_large_buckets)); NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS; initialize_large_bucket_indices

<<>>( sorted_bucket_sizes_sum, average_bucket_size, h_nof_large_buckets, log_nof_large_buckets, @@ -663,24 +663,24 @@ namespace msm { P* large_buckets; CHK_IF_RETURN(cudaMallocAsync(&large_buckets, sizeof(P) * large_buckets_nof_threads, stream_large_buckets)); - NUM_THREADS = min(1 << 8, large_buckets_nof_threads); + NUM_THREADS = max(1, min(1 << 8, large_buckets_nof_threads)); NUM_BLOCKS = (large_buckets_nof_threads + NUM_THREADS - 1) / NUM_THREADS; accumulate_large_buckets_kernel<<>>( large_buckets, sorted_bucket_offsets, sorted_bucket_sizes, large_bucket_indices, sorted_point_indices, d_points, h_nof_large_buckets, c, average_bucket_size, log_nof_large_buckets, large_buckets_nof_threads); - NUM_THREADS = min(MAX_TH, h_nof_large_buckets); + NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets)); NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS; // normalization is needed to update buckets sizes and offsets due to reduction that already took place normalize_kernel<<>>( sorted_bucket_sizes_sum, average_bucket_size, h_nof_large_buckets); // reduce for (int s = h_largest_bucket; s > 1; s = ((s + 1) >> 1)) { - NUM_THREADS = min(MAX_TH, h_nof_large_buckets); + NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets)); NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS; normalize_kernel<<>>( sorted_bucket_sizes, s == h_largest_bucket ? average_bucket_size : 2, h_nof_large_buckets); - NUM_THREADS = min(MAX_TH, large_buckets_nof_threads); + NUM_THREADS = max(1, min(MAX_TH, large_buckets_nof_threads)); NUM_BLOCKS = (large_buckets_nof_threads + NUM_THREADS - 1) / NUM_THREADS; sum_reduction_variable_size_kernel<<>>( large_buckets, sorted_bucket_sizes_sum, sorted_bucket_sizes, large_bucket_indices, @@ -689,7 +689,7 @@ namespace msm { CHK_IF_RETURN(cudaFreeAsync(large_bucket_indices, stream_large_buckets)); // distribute - NUM_THREADS = min(MAX_TH, h_nof_large_buckets); + NUM_THREADS = max(1, min(MAX_TH, h_nof_large_buckets)); NUM_BLOCKS = (h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS; distribute_large_buckets_kernel<<>>( large_buckets, buckets, sorted_bucket_sizes_sum, sorted_single_bucket_indices, h_nof_large_buckets, @@ -778,7 +778,7 @@ namespace msm { const bool is_last_iter = (j == target_bits_count - 1); unsigned nof_threads = (((target_buckets_count - target_windows_count) >> 1) << (target_bits_count - 1 - j)) * batch_size; - NUM_THREADS = min(MAX_TH, nof_threads); + NUM_THREADS = max(1, min(MAX_TH, nof_threads)); NUM_BLOCKS = (nof_threads + NUM_THREADS - 1) / NUM_THREADS; single_stage_multi_reduction_kernel<<>>( is_first_iter ? source_buckets : temp_buckets1, is_last_iter ? target_buckets : temp_buckets1, diff --git a/wrappers/rust/icicle-core/Cargo.toml b/wrappers/rust/icicle-core/Cargo.toml index 9965faab2..3558be3da 100644 --- a/wrappers/rust/icicle-core/Cargo.toml +++ b/wrappers/rust/icicle-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-core" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = ["Ingonyama"] description = "A library for GPU ZK acceleration by Ingonyama" diff --git a/wrappers/rust/icicle-cuda-runtime/Cargo.toml b/wrappers/rust/icicle-cuda-runtime/Cargo.toml index 3bb3e40ed..a75805c75 100644 --- a/wrappers/rust/icicle-cuda-runtime/Cargo.toml +++ b/wrappers/rust/icicle-cuda-runtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-cuda-runtime" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = [ "Ingonyama" ] description = "Ingonyama's Rust wrapper of CUDA runtime" diff --git a/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml index c73ff5b72..e34f0905b 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bls12-377/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-bls12-377" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = [ "Ingonyama" ] description = "Rust wrapper for the CUDA implementation of BLS12-377 pairing friendly elliptic curve by Ingonyama" diff --git a/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml index f995e7ab1..ee38dec35 100644 --- a/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bls12-381/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-bls12-381" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = [ "Ingonyama" ] description = "Rust wrapper for the CUDA implementation of BLS12-381 pairing friendly elliptic curve by Ingonyama" diff --git a/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml index 37bf14891..24e62fadd 100644 --- a/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bn254/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-bn254" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = [ "Ingonyama" ] description = "Rust wrapper for the CUDA implementation of BN254 pairing friendly elliptic curve by Ingonyama" diff --git a/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml b/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml index 82819e197..de6ba5ede 100644 --- a/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-bw6-761/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-bw6-761" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = [ "Ingonyama" ] description = "Rust wrapper for the CUDA implementation of BW6-761 pairing friendly elliptic curve by Ingonyama" diff --git a/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml b/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml index a1a6db0ae..f1377c194 100644 --- a/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml +++ b/wrappers/rust/icicle-curves/icicle-grumpkin/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "icicle-grumpkin" -version = "1.4.0" +version = "1.5.1" edition = "2021" authors = [ "Ingonyama" ] description = "Rust wrapper for the CUDA implementation of Grumpkin elliptic curve by Ingonyama"