diff --git a/CHANGELOG.md b/CHANGELOG.md index d378b2068..6b26526d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Introducing an example for proving knowledge of exponent - Add api to get SRS size. +- Adding micro benchmarks for MSM, FFT and Poly Evaluation. ### Improvements diff --git a/README.md b/README.md index 24880fc7a..e15fff082 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ The additional flags allow using assembly implementation of `square_in_place` an For benchmark, run: ``` -RAYON_NUM_THREADS=N cargo bench +RAYON_NUM_THREADS=N cargo bench --features bench ``` where N is the number of threads you want to use (N = 1 for single-thread). diff --git a/plonk/Cargo.toml b/plonk/Cargo.toml index 018ee5267..f0ae37267 100644 --- a/plonk/Cargo.toml +++ b/plonk/Cargo.toml @@ -51,6 +51,8 @@ path = "benches/bench.rs" harness = false [features] -std = [] +std = [ ] # exposing apis for testing purpose test_apis = [] +# enabling mircobench +bench = [] \ No newline at end of file diff --git a/plonk/benches/bench.rs b/plonk/benches/bench.rs index 19004b3fe..ecee43343 100644 --- a/plonk/benches/bench.rs +++ b/plonk/benches/bench.rs @@ -5,7 +5,7 @@ // along with the Jellyfish library. If not, see . // For benchmark, run: -// RAYON_NUM_THREADS=N cargo bench +// RAYON_NUM_THREADS=N cargo bench --features bench // where N is the number of threads you want to use (N = 1 for single-thread). use ark_bls12_377::{Bls12_377, Fr as Fr377}; @@ -13,7 +13,9 @@ use ark_bls12_381::{Bls12_381, Fr as Fr381}; use ark_bn254::{Bn254, Fr as Fr254}; use ark_bw6_761::{Fr as Fr761, BW6_761}; use ark_ff::PrimeField; +use ark_std::{fs::File, io::Write}; use jf_plonk::{ + bencher::{init_timers, total_fft_time, total_msm_time, total_poly_eval_time}, circuit::{Circuit, PlonkCircuit}, errors::PlonkError, proof_system::{PlonkKzgSnark, Snark}, @@ -54,6 +56,7 @@ macro_rules! plonk_prove_bench { let (pk, _) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap(); + init_timers(); let start = ark_std::time::Instant::now(); for _ in 0..NUM_REPETITIONS { @@ -62,13 +65,98 @@ macro_rules! plonk_prove_bench { ) .unwrap(); } + println!("====================================="); + println!( + "proving time for {}, {} with dim {}: {} ns/gate", + stringify!($bench_curve), + stringify!($bench_plonk_type), + $num_gates, + start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_gates as u128 + ); + println!( + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 + ); + println!( + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on poly evaluation: {:.2} ms, or {:.2}%", + total_poly_eval_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_poly_eval_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!("====================================="); + }; +} +macro_rules! plonk_prove_mt_bench { + ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_gates:expr, $file:expr) => { + let rng = &mut ark_std::test_rng(); + let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap(); + + let max_degree = $num_gates + 2; + let srs = PlonkKzgSnark::<$bench_curve>::universal_setup(max_degree, rng).unwrap(); + + let (pk, _) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap(); + + init_timers(); + let start = ark_std::time::Instant::now(); + + for _ in 0..NUM_REPETITIONS { + let _ = PlonkKzgSnark::<$bench_curve>::prove::<_, _, StandardTranscript>( + rng, &cs, &pk, None, + ) + .unwrap(); + } + println!("====================================="); println!( - "proving time for {}, {}: {} ns/gate", + "proving time for {}, {} with dim {}: {} ns/gate", stringify!($bench_curve), stringify!($bench_plonk_type), + $num_gates, start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_gates as u128 ); + println!( + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 + ); + println!( + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on poly evaluation: {:.2} ms, or {:.2}%", + total_poly_eval_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_poly_eval_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!("====================================="); + $file + .write_all( + format!( + "{} {:.2} {:.2} {:.2} {:.2} {:.2}\n", + $num_gates, + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64, + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64, + ) + .as_ref(), + ) + .expect("Unable to write data"); }; } @@ -97,6 +185,7 @@ macro_rules! plonk_verify_bench { PlonkKzgSnark::<$bench_curve>::prove::<_, _, StandardTranscript>(rng, &cs, &pk, None) .unwrap(); + init_timers(); let start = ark_std::time::Instant::now(); for _ in 0..NUM_REPETITIONS { @@ -104,13 +193,30 @@ macro_rules! plonk_verify_bench { PlonkKzgSnark::<$bench_curve>::verify::(&vk, &[], &proof, None) .unwrap(); } - + println!("====================================="); println!( - "verifying time for {}, {}: {} ns", + "verifying time for {}, {} with dim {}: {} ns", stringify!($bench_curve), stringify!($bench_plonk_type), + $num_gates, start.elapsed().as_nanos() / NUM_REPETITIONS as u128 ); + println!( + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 + ); + println!( + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + + println!("====================================="); }; } @@ -144,6 +250,7 @@ macro_rules! plonk_batch_verify_bench { let public_inputs_ref = vec![&pub_input[..]; $num_proofs]; let proofs_ref = vec![&proof; $num_proofs]; + init_timers(); let start = ark_std::time::Instant::now(); for _ in 0..NUM_REPETITIONS { @@ -163,6 +270,21 @@ macro_rules! plonk_batch_verify_bench { stringify!($num_proofs), start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_proofs as u128 ); + + println!( + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 + ); + println!( + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); }; } @@ -177,7 +299,28 @@ fn bench_batch_verify() { plonk_batch_verify_bench!(BW6_761, Fr761, PlonkType::UltraPlonk, 1000); } +fn bench_intense() { + let mut f = File::create(format!( + "../target/{}-threads.txt", + rayon::current_num_threads() + )) + .expect("Unable to create file"); + + for i in 10..=30 { + let dim = 1 << i; + println!("bench with log(dim) = {}", i); + plonk_prove_mt_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim, f); + } + + for i in 10..=30 { + let dim = 1 << i; + println!("bench with log(dim) = {}", i); + plonk_verify_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); + } +} + fn main() { + bench_intense(); bench_prove(); bench_verify(); bench_batch_verify(); diff --git a/plonk/src/bencher.rs b/plonk/src/bencher.rs new file mode 100644 index 000000000..46db6c165 --- /dev/null +++ b/plonk/src/bencher.rs @@ -0,0 +1,184 @@ +//! Helper functions for micro-benchmarks + +use ark_std::{thread_local, time::Instant}; +use core::{cell::RefCell, time::Duration}; + +thread_local!(static FFT_START_TIME: RefCell = RefCell::new(Instant::now())); +thread_local!(static FFT_TIMER_LOCK: RefCell = RefCell::new(false)); +thread_local!(static FFT_TOTAL_TIME: RefCell = RefCell::new(Duration::ZERO)); + +thread_local!(static MSM_START_TIME: RefCell = RefCell::new(Instant::now())); +thread_local!(static MSM_TIMER_LOCK: RefCell = RefCell::new(false)); +thread_local!(static MSM_TOTAL_TIME: RefCell = RefCell::new(Duration::ZERO)); + +thread_local!(static POLY_EVAL_START_TIME: RefCell = RefCell::new(Instant::now())); +thread_local!(static POLY_EVAL_TIMER_LOCK: RefCell = RefCell::new(false)); +thread_local!(static POLY_EVAL_TOTAL_TIME: RefCell = RefCell::new(Duration::ZERO)); + +/// Initializing the timers +#[inline] +pub fn init_timers() { + #[cfg(feature = "bench")] + { + FFT_TOTAL_TIME.with(|timer| { + *timer.borrow_mut() = Duration::ZERO; + }); + FFT_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }); + MSM_TOTAL_TIME.with(|timer| { + *timer.borrow_mut() = Duration::ZERO; + }); + MSM_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }); + POLY_EVAL_TOTAL_TIME.with(|timer| { + *timer.borrow_mut() = Duration::ZERO; + }); + POLY_EVAL_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }); + } +} + +/// Get the total time that we have spend on FFT related computations +#[inline] +pub fn total_fft_time() -> Duration { + #[cfg(feature = "bench")] + { + FFT_TOTAL_TIME.with(|duration| *duration.borrow()) + } + #[cfg(not(feature = "bench"))] + Duration::ZERO +} + +/// Get the total time that we have spend on MSM related computations +#[inline] +pub fn total_msm_time() -> Duration { + #[cfg(feature = "bench")] + { + MSM_TOTAL_TIME.with(|duration| *duration.borrow()) + } + #[cfg(not(feature = "bench"))] + Duration::ZERO +} + +/// Get the total time that we have spend on polynomial evaluations +#[inline] +pub fn total_poly_eval_time() -> Duration { + #[cfg(feature = "bench")] + { + POLY_EVAL_TOTAL_TIME.with(|duration| *duration.borrow()) + } + #[cfg(not(feature = "bench"))] + Duration::ZERO +} + +#[inline] +pub(crate) fn fft_start() { + #[cfg(feature = "bench")] + { + if FFT_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("another FFT timer has already started somewhere else"); + } + + FFT_START_TIME.with(|timer| { + *timer.borrow_mut() = Instant::now(); + }); + + FFT_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = true; + }) + } +} + +#[inline] +pub(crate) fn fft_end() { + #[cfg(feature = "bench")] + { + if !FFT_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("FFT timer has not started yet"); + } + + let start_time = FFT_START_TIME.with(|timer| *timer.borrow()); + let end_time = Instant::now(); + FFT_TOTAL_TIME.with(|duration| { + *duration.borrow_mut() += end_time - start_time; + }); + FFT_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }) + } +} + +#[inline] +pub(crate) fn msm_start() { + #[cfg(feature = "bench")] + { + if MSM_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("another MSM timer has already started somewhere else"); + } + + MSM_START_TIME.with(|timer| { + *timer.borrow_mut() = Instant::now(); + }); + + MSM_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = true; + }) + } +} + +#[inline] +pub(crate) fn msm_end() { + #[cfg(feature = "bench")] + { + if !MSM_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("MSM timer has not started yet"); + } + let start_time = MSM_START_TIME.with(|timer| *timer.borrow()); + let end_time = Instant::now(); + MSM_TOTAL_TIME.with(|duration| { + *duration.borrow_mut() += end_time - start_time; + }); + MSM_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }) + } +} + +#[inline] +pub(crate) fn poly_eval_start() { + #[cfg(feature = "bench")] + { + if POLY_EVAL_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("another poly eval timer has already started somewhere else"); + } + + POLY_EVAL_START_TIME.with(|timer| { + *timer.borrow_mut() = Instant::now(); + }); + + POLY_EVAL_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = true; + }) + } +} + +#[inline] +pub(crate) fn poly_eval_end() { + #[cfg(feature = "bench")] + { + if !POLY_EVAL_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("poly eval timer has not started yet"); + } + let start_time = POLY_EVAL_START_TIME.with(|timer| *timer.borrow()); + let end_time = Instant::now(); + POLY_EVAL_TOTAL_TIME.with(|duration| { + *duration.borrow_mut() += end_time - start_time; + }); + POLY_EVAL_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }) + } +} diff --git a/plonk/src/circuit/basic.rs b/plonk/src/circuit/basic.rs index 8c8cc3c16..1d8100893 100644 --- a/plonk/src/circuit/basic.rs +++ b/plonk/src/circuit/basic.rs @@ -7,6 +7,7 @@ //! Basic instantiations of Plonk-based constraint systems use super::{Arithmetization, Circuit, GateId, Variable, WireId}; use crate::{ + bencher::{fft_end, fft_start}, circuit::{gates::*, SortedLookupVecAndPolys}, constants::{compute_coset_representatives, GATE_WIDTH, N_MUL_SELECTORS}, errors::{CircuitError::*, PlonkError}, @@ -1059,6 +1060,8 @@ where } fn compute_selector_polynomials(&self) -> Result>, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let domain = &self.eval_domain; if domain.size() < self.num_gates() { @@ -1074,12 +1077,16 @@ where .map(|selector| DensePolynomial::from_coefficients_vec(domain.ifft(selector))) .collect(); + fft_end(); + Ok(selector_polys) } fn compute_extended_permutation_polynomials( &self, ) -> Result>, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let domain = &self.eval_domain; let n = domain.size(); @@ -1092,6 +1099,8 @@ where ) }) .collect(); + fft_end(); + Ok(extended_perm_polys) } @@ -1100,6 +1109,8 @@ where beta: &F, gamma: &F, ) -> Result, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let mut product_vec = vec![F::one()]; let domain = &self.eval_domain; @@ -1119,10 +1130,14 @@ where product_vec.push(prev_prod * a / b); } domain.ifft_in_place(&mut product_vec); - Ok(DensePolynomial::from_coefficients_vec(product_vec)) + + let res = DensePolynomial::from_coefficients_vec(product_vec); + fft_end(); + Ok(res) } fn compute_wire_polynomials(&self) -> Result>, PlonkError> { + fft_start(); self.check_finalize_flag(true)?; let domain = &self.eval_domain; if domain.size() < self.num_gates() { @@ -1145,10 +1160,13 @@ where }) .collect(); assert_eq!(wire_polys.len(), self.num_wire_types()); + fft_end(); Ok(wire_polys) } fn compute_pub_input_polynomial(&self) -> Result, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let domain = &self.eval_domain; let mut pub_input_vec = vec![F::zero(); domain.size()]; @@ -1157,25 +1175,31 @@ where pub_input_vec[io_gate_id] = self.witness[var]; }); domain.ifft_in_place(&mut pub_input_vec); - Ok(DensePolynomial::from_coefficients_vec(pub_input_vec)) + let res = DensePolynomial::from_coefficients_vec(pub_input_vec); + fft_end(); + Ok(res) } // Plookup-related methods // fn compute_range_table_polynomial(&self) -> Result, PlonkError> { + fft_start(); let range_table = self.compute_range_table()?; let domain = &self.eval_domain; - Ok(DensePolynomial::from_coefficients_vec( - domain.ifft(&range_table), - )) + + let res = DensePolynomial::from_coefficients_vec(domain.ifft(&range_table)); + fft_end(); + Ok(res) } fn compute_key_table_polynomial(&self) -> Result, PlonkError> { + fft_start(); let key_table = self.compute_key_table()?; let domain = &self.eval_domain; - Ok(DensePolynomial::from_coefficients_vec( - domain.ifft(&key_table), - )) + + let res = DensePolynomial::from_coefficients_vec(domain.ifft(&key_table)); + fft_end(); + Ok(res) } fn compute_merged_lookup_table(&self, tau: F) -> Result, PlonkError> { @@ -1252,8 +1276,12 @@ where product_vec.push(prev_prod * a / b); } product_vec.push(F::one()); + + fft_start(); domain.ifft_in_place(&mut product_vec); - Ok(DensePolynomial::from_coefficients_vec(product_vec)) + let res = DensePolynomial::from_coefficients_vec(product_vec); + fft_end(); + Ok(res) } fn compute_lookup_sorted_vec_polynomials( @@ -1301,8 +1329,11 @@ where if sorted_vec.len() != 2 * n - 1 { return Err(ParameterError("The sorted vector has wrong length, some lookup variables might be outside the table".to_string()).into()); } + + fft_start(); let h1_poly = DensePolynomial::from_coefficients_vec(domain.ifft(&sorted_vec[..n])); let h2_poly = DensePolynomial::from_coefficients_vec(domain.ifft(&sorted_vec[n - 1..])); + fft_end(); Ok((sorted_vec, h1_poly, h2_poly)) } } diff --git a/plonk/src/lib.rs b/plonk/src/lib.rs index 4020ea392..25b6139ac 100644 --- a/plonk/src/lib.rs +++ b/plonk/src/lib.rs @@ -18,6 +18,7 @@ extern crate downcast_rs; #[macro_use] extern crate derivative; +pub mod bencher; pub mod circuit; pub mod constants; pub mod errors; diff --git a/plonk/src/proof_system/prover.rs b/plonk/src/proof_system/prover.rs index fbc0ea40d..5bbe2592d 100644 --- a/plonk/src/proof_system/prover.rs +++ b/plonk/src/proof_system/prover.rs @@ -11,6 +11,7 @@ use super::structs::{ PlookupOracles, ProofEvaluations, ProvingKey, }; use crate::{ + bencher::{fft_end, fft_start, msm_end, msm_start, poly_eval_end, poly_eval_start}, circuit::Arithmetization, constants::{domain_size_ratio, GATE_WIDTH}, errors::{PlonkError, SnarkError::*}, @@ -80,7 +81,9 @@ impl Prover { .into_iter() .map(|poly| self.mask_polynomial(prng, poly, 1)) .collect(); + msm_start(); let wires_poly_comms = Self::commit_polynomials(ck, &wire_polys)?; + msm_end(); let pub_input_poly = cs.compute_pub_input_polynomial()?; Ok(((wires_poly_comms, wire_polys), pub_input_poly)) } @@ -104,7 +107,9 @@ impl Prover { let h_1_poly = self.mask_polynomial(prng, h_1_poly, 2); let h_2_poly = self.mask_polynomial(prng, h_2_poly, 2); let h_polys = vec![h_1_poly, h_2_poly]; + msm_start(); let h_poly_comms = Self::commit_polynomials(ck, &h_polys)?; + msm_end(); Ok(((h_poly_comms, h_polys), sorted_vec, merged_lookup_table)) } @@ -122,7 +127,9 @@ impl Prover { cs.compute_prod_permutation_polynomial(&challenges.beta, &challenges.gamma)?, 2, ); + msm_start(); let prod_perm_comm = Self::commit_polynomial(ck, &prod_perm_poly)?; + msm_end(); Ok((prod_perm_comm, prod_perm_poly)) } @@ -155,7 +162,9 @@ impl Prover { )?, 2, ); + msm_start(); let prod_lookup_comm = Self::commit_polynomial(ck, &prod_lookup_poly)?; + msm_end(); Ok((prod_lookup_comm, prod_lookup_poly)) } @@ -173,8 +182,9 @@ impl Prover { let quot_poly = self.compute_quotient_polynomial(challenges, pks, online_oracles, num_wire_types)?; let split_quot_polys = self.split_quotient_polynomial("_poly, num_wire_types)?; + msm_start(); let split_quot_poly_comms = Self::commit_polynomials(ck, &split_quot_polys)?; - + msm_end(); Ok((split_quot_poly_comms, split_quot_polys)) } @@ -190,6 +200,9 @@ impl Prover { online_oracles: &Oracles, num_wire_types: usize, ) -> ProofEvaluations { + // TODO: a potential optimization -- dense polynomial evaluations re-computed + // powers-of-zetas consider pre-compute them and pass them in + poly_eval_start(); let wires_evals: Vec = online_oracles .wire_polys .par_iter() @@ -205,6 +218,7 @@ impl Prover { .prod_perm_poly .evaluate(&(challenges.zeta * self.domain.group_gen)); + poly_eval_end(); ProofEvaluations { wires_evals, wire_sigma_evals, @@ -220,6 +234,8 @@ impl Prover { challenges: &Challenges, online_oracles: &Oracles, ) -> Result, PlonkError> { + poly_eval_start(); + if pk.plookup_pk.is_none() { return Err(ParameterError( "Evaluate Plookup polynomials without supporting lookup".to_string(), @@ -241,6 +257,8 @@ impl Prover { let h_1_eval = online_oracles.plookup_oracles.h_polys[0].evaluate(&challenges.zeta); let q_lookup_eval = pk.q_lookup_poly()?.evaluate(&challenges.zeta); + // TODO: a potential optimization -- dense polynomial evaluations re-computed + // powers-of-gs consider pre-compute them and pass them in let zeta_mul_g = challenges.zeta * self.domain.group_gen; let prod_next_eval = online_oracles .plookup_oracles @@ -254,6 +272,7 @@ impl Prover { let w_3_next_eval = online_oracles.wire_polys[3].evaluate(&zeta_mul_g); let w_4_next_eval = online_oracles.wire_polys[4].evaluate(&zeta_mul_g); + poly_eval_end(); Ok(PlookupEvaluations { range_table_eval, key_table_eval, @@ -483,8 +502,10 @@ impl Prover { *eval_point, &empty_rand, )?; - - Self::commit_polynomial(ck, &witness_poly) + msm_start(); + let res = Self::commit_polynomial(ck, &witness_poly); + msm_end(); + res } /// Compute the quotient polynomial via (i)FFTs. @@ -521,10 +542,12 @@ impl Prover { let alpha_3 = challenges.alpha.square() * challenges.alpha; let alpha_7 = alpha_3.square() * challenges.alpha; // enumerate proving instances + fft_start(); for (oracles, pk) in online_oracles.iter().zip(pks.iter()) { // lookup_flag = 1 if support Plookup argument. let lookup_flag = pk.plookup_pk.is_some(); + // fft_start(); // Compute coset evaluations. let selectors_coset_fft: Vec> = pk .selectors @@ -581,6 +604,8 @@ impl Prover { (None, None, None, None) }; + // fft_end(); + // Compute coset evaluations of the quotient polynomial. let quot_poly_coset_evals: Vec = (0..m) .into_par_iter() @@ -646,9 +671,12 @@ impl Prover { } } // Compute the coefficient form of the quotient polynomial - Ok(DensePolynomial::from_coefficients_vec( + // fft_start(); + let res = DensePolynomial::from_coefficients_vec( self.quot_domain.coset_ifft("_poly_coset_evals_sum), - )) + ); + fft_end(); + Ok(res) } // Compute the i-th coset evaluation of the circuit part of the quotient diff --git a/plonk/src/proof_system/structs.rs b/plonk/src/proof_system/structs.rs index 9a6f2d33a..1ac67f8f4 100644 --- a/plonk/src/proof_system/structs.rs +++ b/plonk/src/proof_system/structs.rs @@ -6,6 +6,7 @@ //! Data structures used in Plonk proof systems use crate::{ + bencher::{msm_end, msm_start}, circuit::{ customized::{ ecc::{Point, SWToTEConParam}, @@ -866,13 +867,16 @@ impl ScalarsAndBases { } /// Compute the multi-scalar multiplication. pub(crate) fn multi_scalar_mul(&self) -> E::G1Projective { + msm_start(); let mut bases = vec![]; let mut scalars = vec![]; for (&base, scalar) in &self.base_scalar_map { bases.push(base); scalars.push(scalar.into_repr()); } - VariableBaseMSM::multi_scalar_mul(&bases, &scalars) + let res = VariableBaseMSM::multi_scalar_mul(&bases, &scalars); + msm_end(); + res } } diff --git a/scripts/run_mt_bench.sh b/scripts/run_mt_bench.sh new file mode 100755 index 000000000..f6bdcaa64 --- /dev/null +++ b/scripts/run_mt_bench.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +rm target/*.txt +rm target/*.log +RAYON_NUM_THREADS=64 cargo bench --features=bench > target/64core.log +RAYON_NUM_THREADS=32 cargo bench --features=bench > target/32core.log +RAYON_NUM_THREADS=16 cargo bench --features=bench > target/16core.log +RAYON_NUM_THREADS=8 cargo bench --features=bench > target/8core.log +RAYON_NUM_THREADS=4 cargo bench --features=bench > target/4core.log + + +