Skip to content

Commit

Permalink
Add additional checksum algorithms in mountpoint-s3-crt crate (#1082)
Browse files Browse the repository at this point in the history
* Add support for SHA1

Signed-off-by: Alessandro Passaro <[email protected]>

* Remove implementation of std::hash::Hasher for checksum types

Signed-off-by: Daniel Carl Jones <[email protected]>

* Add benchmark for SHA1 checksum

Signed-off-by: Daniel Carl Jones <[email protected]>

* Fix Rustdoc, length checks for c_int

Signed-off-by: Daniel Carl Jones <[email protected]>

* Add CRC64, SHA256

Signed-off-by: Daniel Carl Jones <[email protected]>

* Add changelog entry for adding bindings

Signed-off-by: Daniel Carl Jones <[email protected]>

* Add PR links for change log entry

Signed-off-by: Daniel Carl Jones <[email protected]>

* Remove mountpoint-s3-client changes

Signed-off-by: Daniel Carl Jones <[email protected]>

* Update SHA1 tests to be consistent with SHA256 tests

Signed-off-by: Daniel Carl Jones <[email protected]>

* Add ByteBuf wrapper for aws_byte_buf

Signed-off-by: Daniel Carl Jones <[email protected]>

* Add CRT IO lib init call on benchmark lib load

Signed-off-by: Daniel Carl Jones <[email protected]>

---------

Signed-off-by: Alessandro Passaro <[email protected]>
Signed-off-by: Daniel Carl Jones <[email protected]>
Co-authored-by: Alessandro Passaro <[email protected]>
  • Loading branch information
dannycjones and passaro authored Oct 28, 2024
1 parent 8f2770b commit 05a50da
Show file tree
Hide file tree
Showing 11 changed files with 427 additions and 31 deletions.
1 change: 1 addition & 0 deletions mountpoint-s3-crt-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const CRT_LIBRARIES: &[&str] = &[
const CRT_HEADERS: &[&str] = &[
"auth/credentials.h",
"auth/aws_imds_client.h",
"cal/hash.h",
"checksums/crc.h",
"common/atomics.h",
"common/log_channel.h",
Expand Down
5 changes: 4 additions & 1 deletion mountpoint-s3-crt/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
## Unreleased

* Update to latest CRT dependencies
* Update to latest CRT dependencies.
* Checksum hashers no longer implement `std::hash::Hasher`. ([#1082](https://github.com/awslabs/mountpoint-s3/pull/1082))
* Add bindings to remaining checksum types CRC64, SHA1, and SHA256. ([#1082](https://github.com/awslabs/mountpoint-s3/pull/1082))
* Add wrapping type `ByteBuf` for `aws_byte_buf`. ([#1082](https://github.com/awslabs/mountpoint-s3/pull/1082))

## v0.10.0 (October 17, 2024)

Expand Down
22 changes: 20 additions & 2 deletions mountpoint-s3-crt/benches/checksums.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
//! Benchmarks for the CRT checksums library
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use mountpoint_s3_crt::checksums::{crc32, crc32c};
use mountpoint_s3_crt::checksums::{crc32, crc32c, crc64, sha1, sha256};
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};

#[ctor::ctor]
fn init_crt() {
// TODO: If we add additional benchmarks needing CRT initialization, move to a benchmark harness script.
mountpoint_s3_crt::io::io_library_init(&mountpoint_s3_crt::common::allocator::Allocator::default());
}

fn benchmark_hasher<F, R>(c: &mut Criterion, hash_fn: F, name: &str)
where
F: Fn(&[u8]) -> R,
Expand Down Expand Up @@ -35,5 +41,17 @@ fn crc32c(c: &mut Criterion) {
benchmark_hasher(c, crc32c::checksum, "crc32c");
}

criterion_group!(checksum_benches, crc32, crc32c);
fn crc64(c: &mut Criterion) {
benchmark_hasher(c, crc64::checksum, "crc64");
}

fn sha1(c: &mut Criterion) {
benchmark_hasher(c, sha1::checksum, "sha1");
}

fn sha256(c: &mut Criterion) {
benchmark_hasher(c, sha256::checksum, "sha256");
}

criterion_group!(checksum_benches, crc32, crc32c, crc64, sha1, sha256);
criterion_main!(checksum_benches);
22 changes: 8 additions & 14 deletions mountpoint-s3-crt/src/checksums/crc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,18 @@ impl Hasher {

/// Compute CRC32 checksum of the data in the given bytes slice, append to the previous checksum.
///
/// The underlying CRT funtion requires the buffer's length to be type `i32`, so this function cannot take
/// any buffer that is bigger than `i32::MAX` as an input.
/// The underlying CRT function requires the buffer's length to be type [::libc::c_int], so this function cannot take
/// any buffer that is bigger than [::libc::c_int::MAX] as an input.
fn crc32(buf: &[u8], previous_checksum: u32) -> u32 {
assert!(buf.len() <= i32::MAX as usize);
assert!(
buf.len() <= ::libc::c_int::MAX as usize,
"buffer length cannot exceed {}",
::libc::c_int::MAX,
);

// SAFETY: we pass a valid buffer to the CRT, and trust
// the CRT function to only read from the buffer's boundary.
unsafe { aws_checksums_crc32(buf.as_ptr(), buf.len() as i32, previous_checksum) }
unsafe { aws_checksums_crc32(buf.as_ptr(), buf.len() as ::libc::c_int, previous_checksum) }
}
}

Expand All @@ -66,16 +70,6 @@ impl Default for Hasher {
}
}

impl std::hash::Hasher for Hasher {
fn finish(&self) -> u64 {
self.clone().finalize().0.into()
}

fn write(&mut self, bytes: &[u8]) {
self.update(bytes);
}
}

#[cfg(test)]
mod tests {
use crate::checksums::crc32::{self, Crc32};
Expand Down
22 changes: 8 additions & 14 deletions mountpoint-s3-crt/src/checksums/crc32c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,18 @@ impl Hasher {

/// Compute CRC32C checksum of the data in the given bytes slice, append to the previous checksum.
///
/// The underlying CRT funtion requires the buffer's length to be type `i32`, so this function cannot take
/// any buffer that is bigger than `i32::MAX` as an input.
/// The underlying CRT function requires the buffer's length to be type [::libc::c_int], so this function cannot take
/// any buffer that is bigger than [::libc::c_int::MAX] as an input.
fn crc32c(buf: &[u8], previous_checksum: u32) -> u32 {
assert!(buf.len() <= i32::MAX as usize);
assert!(
buf.len() <= ::libc::c_int::MAX as usize,
"buffer length cannot exceed {}",
::libc::c_int::MAX,
);

// SAFETY: we pass a valid buffer to the CRT, and trust
// the CRT function to only read from the buffer's boundary.
unsafe { aws_checksums_crc32c(buf.as_ptr(), buf.len() as i32, previous_checksum) }
unsafe { aws_checksums_crc32c(buf.as_ptr(), buf.len() as ::libc::c_int, previous_checksum) }
}
}

Expand All @@ -66,16 +70,6 @@ impl Default for Hasher {
}
}

impl std::hash::Hasher for Hasher {
fn finish(&self) -> u64 {
self.clone().finalize().0.into()
}

fn write(&mut self, bytes: &[u8]) {
self.update(bytes);
}
}

#[cfg(test)]
mod tests {
use crate::checksums::crc32c::{self, Crc32c};
Expand Down
92 changes: 92 additions & 0 deletions mountpoint-s3-crt/src/checksums/crc64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use mountpoint_s3_crt_sys::aws_checksums_crc64nvme;

/// CRC64-NVME (aka. CRC64-Rocksoft) checksum
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub struct Crc64(u64);

impl Crc64 {
/// Create a new CRC64 checksum with the given value.
pub fn new(value: u64) -> Self {
Self(value)
}

/// The CRC64 checksum value.
pub fn value(&self) -> u64 {
self.0
}
}

/// Computes the CRC64 checksum of a byte slice.
///
/// Use [Crc64Hasher] for more advanced use-cases.
pub fn checksum(buf: &[u8]) -> Crc64 {
let mut hasher = Crc64Hasher::new();
hasher.update(buf);
hasher.finalize()
}

/// CRC64 Hasher
#[derive(Debug, Clone)]
pub struct Crc64Hasher {
state: Crc64,
}

impl Crc64Hasher {
/// Create a new CRC64 Hasher.
pub fn new() -> Self {
Self { state: Crc64(0) }
}

/// Update the hash state with the given bytes slice.
pub fn update(&mut self, buf: &[u8]) {
self.state = Crc64(Self::crc64(buf, self.state.0));
}

/// Finalize the hash state and return the computed CRC64 checksum value.
pub fn finalize(self) -> Crc64 {
self.state
}

/// Compute CRC64 checksum of the data in the given bytes slice, append to the previous checksum.
///
/// The underlying CRT function requires the buffer's length to be type [::libc::c_int], so this function cannot take
/// any buffer that is bigger than [::libc::c_int::MAX] as an input.
fn crc64(buf: &[u8], previous_checksum: u64) -> u64 {
assert!(
buf.len() <= ::libc::c_int::MAX as usize,
"buffer length cannot exceed {}",
::libc::c_int::MAX,
);

// SAFETY: we pass a valid buffer to the CRT, and trust
// the CRT function to only read from the buffer's boundary.
unsafe { aws_checksums_crc64nvme(buf.as_ptr(), buf.len() as ::libc::c_int, previous_checksum) }
}
}

impl Default for Crc64Hasher {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
mod tests {
use crate::checksums::crc64::{self, Crc64};

#[test]
fn crc64_simple() {
let buf: &[u8] = b"123456789";
let crc = crc64::checksum(buf);
assert_eq!(crc, Crc64(0xAE8B14860A799888));
}

#[test]
fn crc64_append() {
let mut hasher = crc64::Crc64Hasher::new();
hasher.update(b"1234");
hasher.update(b"56789");
let crc = hasher.finalize();
assert_eq!(crc, Crc64(0xAE8B14860A799888));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,12 @@ pub mod crc32;

/// CRC32C checksums
pub mod crc32c;

/// CRC64 checksums
pub mod crc64;

/// SHA1 checksums
pub mod sha1;

/// SHA256 checksums
pub mod sha256;
108 changes: 108 additions & 0 deletions mountpoint-s3-crt/src/checksums/sha1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use std::ptr::NonNull;

use mountpoint_s3_crt_sys::{
aws_hash, aws_hash_destroy, aws_hash_finalize, aws_hash_update, aws_sha1_new, AWS_SHA1_LEN,
};

use crate::common::allocator::Allocator;
use crate::common::byte_buf::ByteBuf;
use crate::common::error::Error;
use crate::{CrtError as _, ToAwsByteCursor};

/// SHA1 checksum
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Sha1([u8; Self::LENGTH]);

impl Sha1 {
/// Length of a SHA1 checksum.
pub const LENGTH: usize = AWS_SHA1_LEN as usize;

/// Create a SHA1 checksum.
pub fn new(value: [u8; Self::LENGTH]) -> Self {
Self(value)
}

/// The binary value
pub fn value(&self) -> &[u8; Self::LENGTH] {
&self.0
}
}

/// Computes the SHA1 checksum of a byte slice.
///
/// Use [Sha1Hasher] for more advanced use-cases.
pub fn checksum(buf: &[u8]) -> Result<Sha1, Error> {
let allocator = Allocator::default();
let mut hasher = Sha1Hasher::new(&allocator)?;
hasher.update(buf)?;
hasher.finalize(&allocator)
}

/// SHA1 Hasher
#[derive(Debug, Clone)]
pub struct Sha1Hasher {
inner: NonNull<aws_hash>,
}

impl Sha1Hasher {
/// Create a new [Sha1Hasher].
pub fn new(allocator: &Allocator) -> Result<Self, Error> {
// SAFETY: allocator is a valid aws_allocator, and we check the return is non-null.
let inner = unsafe { aws_sha1_new(allocator.inner.as_ptr()).ok_or_last_error()? };
Ok(Self { inner })
}

/// Update the hash state with the given bytes slice.
pub fn update(&mut self, buf: &[u8]) -> Result<(), Error> {
// SAFETY: `self.inner` is a valid `aws_hash` and `buf` will outlive the call to `aws_hash_update`.
unsafe { aws_hash_update(self.inner.as_ptr(), &buf.as_aws_byte_cursor()).ok_or_last_error() }
}

/// Finalize the hash state and return the computed SHA1 checksum value.
pub fn finalize(self, allocator: &Allocator) -> Result<Sha1, Error> {
let mut buffer = ByteBuf::new(allocator, Sha1::LENGTH)?;

// SAFETY: `self.inner` is a valid `aws_hash` and `buffer` was initialized above.
unsafe { aws_hash_finalize(self.inner.as_ptr(), buffer.as_mut_ptr(), 0).ok_or_last_error()? };

// Slice will be copied into the struct.
Ok(Sha1(buffer.as_slice().try_into().unwrap()))
}
}

impl Drop for Sha1Hasher {
fn drop(&mut self) {
// SAFETY: `self.inner` is a valid `aws_hash` and safe to destroy since it's the only reference.
unsafe {
aws_hash_destroy(self.inner.as_ptr());
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn sha1_simple() {
let buf: &[u8] = b"123456789";
let sha1 = checksum(buf).expect("checksum failed");
assert_eq!(
sha1,
Sha1([247, 195, 188, 29, 128, 142, 4, 115, 42, 223, 103, 153, 101, 204, 195, 76, 167, 174, 52, 65])
);
}

#[test]
fn sha1_append() {
let allocator = Allocator::default();
let mut hasher = Sha1Hasher::new(&allocator).expect("hasher creation failed");
hasher.update(b"1234").expect("hasher updated failed");
hasher.update(b"56789").expect("hasher updated failed");
let sha1 = hasher.finalize(&allocator).expect("hasher finalization failed");
assert_eq!(
sha1,
Sha1([247, 195, 188, 29, 128, 142, 4, 115, 42, 223, 103, 153, 101, 204, 195, 76, 167, 174, 52, 65])
);
}
}
Loading

0 comments on commit 05a50da

Please sign in to comment.