From 91f1606d9052cedb370d0eb728c8aac79cba3538 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Fri, 27 Sep 2024 09:43:50 -0700 Subject: [PATCH] perf(s2n-quic-dc): Optimize path secret Entry size (#2329) * Start tracking size of Entry * Add init bench for hmac keys * Remove caching of hmac control key This shrinks the Entry state by ~700 bytes. --- .github/workflows/ci.yml | 2 +- dc/s2n-quic-dc/benches/src/crypto.rs | 2 + dc/s2n-quic-dc/benches/src/crypto/hmac.rs | 32 +++++++++++++ dc/s2n-quic-dc/src/path/secret/map.rs | 56 +++++++++++++++++++++- dc/s2n-quic-dc/src/path/secret/map/test.rs | 9 ++++ dc/s2n-quic-dc/src/path/secret/receiver.rs | 32 +++++++++++++ dc/s2n-quic-dc/src/path/secret/schedule.rs | 24 ++++++++++ dc/s2n-quic-dc/src/path/secret/sender.rs | 21 ++++++-- 8 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 dc/s2n-quic-dc/benches/src/crypto/hmac.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a9897fe661..f1eccd6a4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -220,7 +220,7 @@ jobs: - rust: stable os: ubuntu-latest target: native - env: S2N_QUIC_PLATFORM_FEATURES_OVERRIDE="mtu_disc,pktinfo,tos,socket_msg" + env: S2N_QUIC_PLATFORM_FEATURES_OVERRIDE="mtu_disc,pktinfo,tos,socket_msg" >> $GITHUB_ENV; echo S2N_QUIC_RUN_VERSION_SPECIFIC_TESTS=1 steps: - uses: ilammy/setup-nasm@v1 - uses: actions/checkout@v4 diff --git a/dc/s2n-quic-dc/benches/src/crypto.rs b/dc/s2n-quic-dc/benches/src/crypto.rs index 34911befae..cc0fd00854 100644 --- a/dc/s2n-quic-dc/benches/src/crypto.rs +++ b/dc/s2n-quic-dc/benches/src/crypto.rs @@ -5,8 +5,10 @@ use criterion::Criterion; pub mod encrypt; pub mod hkdf; +pub mod hmac; pub fn benchmarks(c: &mut Criterion) { encrypt::benchmarks(c); hkdf::benchmarks(c); + hmac::benchmarks(c); } diff --git a/dc/s2n-quic-dc/benches/src/crypto/hmac.rs b/dc/s2n-quic-dc/benches/src/crypto/hmac.rs new file mode 100644 index 0000000000..689abc4ca9 --- /dev/null +++ b/dc/s2n-quic-dc/benches/src/crypto/hmac.rs @@ -0,0 +1,32 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{black_box, Criterion, Throughput}; + +pub fn benchmarks(c: &mut Criterion) { + init(c); +} + +fn init(c: &mut Criterion) { + let mut group = c.benchmark_group("crypto/hmac/init"); + + group.throughput(Throughput::Elements(1)); + + let algs = [ + ("sha256", aws_lc_rs::hmac::HMAC_SHA256), + ("sha384", aws_lc_rs::hmac::HMAC_SHA384), + ("sha512", aws_lc_rs::hmac::HMAC_SHA512), + ]; + + for (alg_name, alg) in algs { + group.bench_function(format!("{alg_name}_init"), |b| { + let key_len = aws_lc_rs::hkdf::KeyType::len(&alg); + let mut key = vec![0u8; key_len]; + aws_lc_rs::rand::fill(&mut key).unwrap(); + let key = black_box(&key); + b.iter(move || { + let _ = black_box(aws_lc_rs::hmac::Key::new(alg, &key)); + }); + }); + } +} diff --git a/dc/s2n-quic-dc/src/path/secret/map.rs b/dc/s2n-quic-dc/src/path/secret/map.rs index 792bbacad8..31d9750203 100644 --- a/dc/s2n-quic-dc/src/path/secret/map.rs +++ b/dc/s2n-quic-dc/src/path/secret/map.rs @@ -437,7 +437,7 @@ impl Map { match packet { control::Packet::StaleKey(packet) => { - let Some(packet) = packet.authenticate(key) else { + let Some(packet) = packet.authenticate(&key) else { return; }; state.mark_live(self.state.cleaner.epoch()); @@ -447,7 +447,7 @@ impl Map { .fetch_add(1, Ordering::Relaxed); } control::Packet::ReplayDetected(packet) => { - let Some(_packet) = packet.authenticate(key) else { + let Some(_packet) = packet.authenticate(&key) else { return; }; self.state @@ -673,6 +673,58 @@ pub(super) struct Entry { parameters: ApplicationParams, } +impl SizeOf for Instant {} +impl SizeOf for u32 {} +impl SizeOf for SocketAddr {} +impl SizeOf for AtomicU64 {} + +impl SizeOf for IsRetired {} +impl SizeOf for ApplicationParams {} + +impl SizeOf for Entry { + fn size(&self) -> usize { + let Entry { + creation_time, + rehandshake_delta_secs, + peer, + secret, + retired, + used_at, + sender, + receiver, + parameters, + } = self; + creation_time.size() + + rehandshake_delta_secs.size() + + peer.size() + + secret.size() + + retired.size() + + used_at.size() + + sender.size() + + receiver.size() + + parameters.size() + } +} + +/// Provide an approximation of the size of Self, including any heap indirection (e.g., a vec +/// backed by a megabyte is a megabyte in `size`, not 24 bytes). +/// +/// Approximation because we don't currently attempt to account for (as an example) padding. It's +/// too annoying to do that. +#[cfg_attr(not(test), allow(unused))] +pub(crate) trait SizeOf: Sized { + fn size(&self) -> usize { + // If we don't need drop, it's very likely that this type is fully contained in size_of + // Self. This simplifies implementing this trait for e.g. std types. + assert!( + !std::mem::needs_drop::(), + "{:?} requires custom SizeOf impl", + std::any::type_name::() + ); + std::mem::size_of::() + } +} + // Retired is 0 if not yet retired. Otherwise it stores the background cleaner epoch at which it // retired; that epoch increments roughly once per minute. #[derive(Default)] diff --git a/dc/s2n-quic-dc/src/path/secret/map/test.rs b/dc/s2n-quic-dc/src/path/secret/map/test.rs index 5fda426f84..84444a4519 100644 --- a/dc/s2n-quic-dc/src/path/secret/map/test.rs +++ b/dc/s2n-quic-dc/src/path/secret/map/test.rs @@ -308,3 +308,12 @@ fn check_invariants_no_overflow() { } }) } + +#[test] +#[cfg(all(target_pointer_width = "64", target_os = "linux"))] +fn entry_size() { + // This gates to running only on specific GHA to reduce false positives. + if std::env::var("S2N_QUIC_RUN_VERSION_SPECIFIC_TESTS").is_ok() { + assert_eq!(fake_entry(0).size(), 350); + } +} diff --git a/dc/s2n-quic-dc/src/path/secret/receiver.rs b/dc/s2n-quic-dc/src/path/secret/receiver.rs index 601e1c37f4..c7a8502fbb 100644 --- a/dc/s2n-quic-dc/src/path/secret/receiver.rs +++ b/dc/s2n-quic-dc/src/path/secret/receiver.rs @@ -257,6 +257,38 @@ pub struct State { shared: Option>, } +impl super::map::SizeOf for Mutex { + fn size(&self) -> usize { + // If we don't need drop, it's very likely that this type is fully contained in size_of + // Self. This simplifies implementing this trait for e.g. std types. + // + // Mutex on macOS (at least) has a more expensive, pthread-based impl that allocates. But + // on Linux there's no extra allocation. + if cfg!(target_os = "linux") { + assert!( + !std::mem::needs_drop::(), + "{:?} requires custom SizeOf impl", + std::any::type_name::() + ); + } + std::mem::size_of::() + } +} + +impl super::map::SizeOf for State { + fn size(&self) -> usize { + let State { + min_key_id, + max_seen_key_id, + seen, + shared, + } = self; + // shared is shared across all State's (effectively) so we don't currently account for that + // allocation. + min_key_id.size() + max_seen_key_id.size() + seen.size() + std::mem::size_of_val(shared) + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, thiserror::Error)] pub enum Error { /// This indicates that we know about this element and it *definitely* already exists. diff --git a/dc/s2n-quic-dc/src/path/secret/schedule.rs b/dc/s2n-quic-dc/src/path/secret/schedule.rs index dd44989f72..cdfae36a6a 100644 --- a/dc/s2n-quic-dc/src/path/secret/schedule.rs +++ b/dc/s2n-quic-dc/src/path/secret/schedule.rs @@ -109,6 +109,30 @@ pub struct Secret { ciphersuite: Ciphersuite, } +impl super::map::SizeOf for Id {} +impl super::map::SizeOf for Prk { + fn size(&self) -> usize { + // FIXME: maybe don't use this type since it has overhead and needs this weird assert to + // check the mode? + assert!(format!("{:?}", self).contains("mode: Expand")); + std::mem::size_of::() + } +} +impl super::map::SizeOf for endpoint::Type {} +impl super::map::SizeOf for Ciphersuite {} + +impl super::map::SizeOf for Secret { + fn size(&self) -> usize { + let Secret { + id, + prk, + endpoint, + ciphersuite, + } = self; + id.size() + prk.size() + endpoint.size() + ciphersuite.size() + } +} + impl Secret { #[inline] pub fn new( diff --git a/dc/s2n-quic-dc/src/path/secret/sender.rs b/dc/s2n-quic-dc/src/path/secret/sender.rs index 54d6193dda..99785c88b7 100644 --- a/dc/s2n-quic-dc/src/path/secret/sender.rs +++ b/dc/s2n-quic-dc/src/path/secret/sender.rs @@ -3,7 +3,6 @@ use super::schedule; use crate::{crypto::awslc::open, packet::secret_control}; -use once_cell::sync::OnceCell; use s2n_quic_core::varint::VarInt; use std::sync::atomic::{AtomicU64, Ordering}; @@ -13,7 +12,18 @@ type StatelessReset = [u8; secret_control::TAG_LEN]; pub struct State { current_id: AtomicU64, pub(super) stateless_reset: StatelessReset, - control_secret: OnceCell, +} + +impl super::map::SizeOf for StatelessReset {} + +impl super::map::SizeOf for State { + fn size(&self) -> usize { + let State { + current_id, + stateless_reset, + } = self; + current_id.size() + stateless_reset.size() + } } impl State { @@ -21,7 +31,6 @@ impl State { Self { current_id: AtomicU64::new(0), stateless_reset, - control_secret: Default::default(), } } @@ -47,8 +56,10 @@ impl State { } #[inline] - pub fn control_secret(&self, secret: &schedule::Secret) -> &open::control::Secret { - self.control_secret.get_or_init(|| secret.control_opener()) + pub fn control_secret(&self, secret: &schedule::Secret) -> open::control::Secret { + // We don't try to cache this, hmac init is cheap (~200-600ns depending on algorithm) and + // the space requirement is huge (700+ bytes) + secret.control_opener() } /// Update the sender for a received stale key packet.