-
Notifications
You must be signed in to change notification settings - Fork 182
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add additional checksum algorithms in mountpoint-s3-crt crate (#1082)
* Add support for SHA1 Signed-off-by: Alessandro Passaro <[email protected]> * Remove implementation of std::hash::Hasher for checksum types Signed-off-by: Daniel Carl Jones <[email protected]> * Add benchmark for SHA1 checksum Signed-off-by: Daniel Carl Jones <[email protected]> * Fix Rustdoc, length checks for c_int Signed-off-by: Daniel Carl Jones <[email protected]> * Add CRC64, SHA256 Signed-off-by: Daniel Carl Jones <[email protected]> * Add changelog entry for adding bindings Signed-off-by: Daniel Carl Jones <[email protected]> * Add PR links for change log entry Signed-off-by: Daniel Carl Jones <[email protected]> * Remove mountpoint-s3-client changes Signed-off-by: Daniel Carl Jones <[email protected]> * Update SHA1 tests to be consistent with SHA256 tests Signed-off-by: Daniel Carl Jones <[email protected]> * Add ByteBuf wrapper for aws_byte_buf Signed-off-by: Daniel Carl Jones <[email protected]> * Add CRT IO lib init call on benchmark lib load Signed-off-by: Daniel Carl Jones <[email protected]> --------- Signed-off-by: Alessandro Passaro <[email protected]> Signed-off-by: Daniel Carl Jones <[email protected]> Co-authored-by: Alessandro Passaro <[email protected]>
- Loading branch information
1 parent
8f2770b
commit 05a50da
Showing
11 changed files
with
427 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
use mountpoint_s3_crt_sys::aws_checksums_crc64nvme; | ||
|
||
/// CRC64-NVME (aka. CRC64-Rocksoft) checksum | ||
#[derive(Debug, Clone, PartialEq, Eq, Copy)] | ||
pub struct Crc64(u64); | ||
|
||
impl Crc64 { | ||
/// Create a new CRC64 checksum with the given value. | ||
pub fn new(value: u64) -> Self { | ||
Self(value) | ||
} | ||
|
||
/// The CRC64 checksum value. | ||
pub fn value(&self) -> u64 { | ||
self.0 | ||
} | ||
} | ||
|
||
/// Computes the CRC64 checksum of a byte slice. | ||
/// | ||
/// Use [Crc64Hasher] for more advanced use-cases. | ||
pub fn checksum(buf: &[u8]) -> Crc64 { | ||
let mut hasher = Crc64Hasher::new(); | ||
hasher.update(buf); | ||
hasher.finalize() | ||
} | ||
|
||
/// CRC64 Hasher | ||
#[derive(Debug, Clone)] | ||
pub struct Crc64Hasher { | ||
state: Crc64, | ||
} | ||
|
||
impl Crc64Hasher { | ||
/// Create a new CRC64 Hasher. | ||
pub fn new() -> Self { | ||
Self { state: Crc64(0) } | ||
} | ||
|
||
/// Update the hash state with the given bytes slice. | ||
pub fn update(&mut self, buf: &[u8]) { | ||
self.state = Crc64(Self::crc64(buf, self.state.0)); | ||
} | ||
|
||
/// Finalize the hash state and return the computed CRC64 checksum value. | ||
pub fn finalize(self) -> Crc64 { | ||
self.state | ||
} | ||
|
||
/// Compute CRC64 checksum of the data in the given bytes slice, append to the previous checksum. | ||
/// | ||
/// The underlying CRT function requires the buffer's length to be type [::libc::c_int], so this function cannot take | ||
/// any buffer that is bigger than [::libc::c_int::MAX] as an input. | ||
fn crc64(buf: &[u8], previous_checksum: u64) -> u64 { | ||
assert!( | ||
buf.len() <= ::libc::c_int::MAX as usize, | ||
"buffer length cannot exceed {}", | ||
::libc::c_int::MAX, | ||
); | ||
|
||
// SAFETY: we pass a valid buffer to the CRT, and trust | ||
// the CRT function to only read from the buffer's boundary. | ||
unsafe { aws_checksums_crc64nvme(buf.as_ptr(), buf.len() as ::libc::c_int, previous_checksum) } | ||
} | ||
} | ||
|
||
impl Default for Crc64Hasher { | ||
fn default() -> Self { | ||
Self::new() | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::checksums::crc64::{self, Crc64}; | ||
|
||
#[test] | ||
fn crc64_simple() { | ||
let buf: &[u8] = b"123456789"; | ||
let crc = crc64::checksum(buf); | ||
assert_eq!(crc, Crc64(0xAE8B14860A799888)); | ||
} | ||
|
||
#[test] | ||
fn crc64_append() { | ||
let mut hasher = crc64::Crc64Hasher::new(); | ||
hasher.update(b"1234"); | ||
hasher.update(b"56789"); | ||
let crc = hasher.finalize(); | ||
assert_eq!(crc, Crc64(0xAE8B14860A799888)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
use std::ptr::NonNull; | ||
|
||
use mountpoint_s3_crt_sys::{ | ||
aws_hash, aws_hash_destroy, aws_hash_finalize, aws_hash_update, aws_sha1_new, AWS_SHA1_LEN, | ||
}; | ||
|
||
use crate::common::allocator::Allocator; | ||
use crate::common::byte_buf::ByteBuf; | ||
use crate::common::error::Error; | ||
use crate::{CrtError as _, ToAwsByteCursor}; | ||
|
||
/// SHA1 checksum | ||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub struct Sha1([u8; Self::LENGTH]); | ||
|
||
impl Sha1 { | ||
/// Length of a SHA1 checksum. | ||
pub const LENGTH: usize = AWS_SHA1_LEN as usize; | ||
|
||
/// Create a SHA1 checksum. | ||
pub fn new(value: [u8; Self::LENGTH]) -> Self { | ||
Self(value) | ||
} | ||
|
||
/// The binary value | ||
pub fn value(&self) -> &[u8; Self::LENGTH] { | ||
&self.0 | ||
} | ||
} | ||
|
||
/// Computes the SHA1 checksum of a byte slice. | ||
/// | ||
/// Use [Sha1Hasher] for more advanced use-cases. | ||
pub fn checksum(buf: &[u8]) -> Result<Sha1, Error> { | ||
let allocator = Allocator::default(); | ||
let mut hasher = Sha1Hasher::new(&allocator)?; | ||
hasher.update(buf)?; | ||
hasher.finalize(&allocator) | ||
} | ||
|
||
/// SHA1 Hasher | ||
#[derive(Debug, Clone)] | ||
pub struct Sha1Hasher { | ||
inner: NonNull<aws_hash>, | ||
} | ||
|
||
impl Sha1Hasher { | ||
/// Create a new [Sha1Hasher]. | ||
pub fn new(allocator: &Allocator) -> Result<Self, Error> { | ||
// SAFETY: allocator is a valid aws_allocator, and we check the return is non-null. | ||
let inner = unsafe { aws_sha1_new(allocator.inner.as_ptr()).ok_or_last_error()? }; | ||
Ok(Self { inner }) | ||
} | ||
|
||
/// Update the hash state with the given bytes slice. | ||
pub fn update(&mut self, buf: &[u8]) -> Result<(), Error> { | ||
// SAFETY: `self.inner` is a valid `aws_hash` and `buf` will outlive the call to `aws_hash_update`. | ||
unsafe { aws_hash_update(self.inner.as_ptr(), &buf.as_aws_byte_cursor()).ok_or_last_error() } | ||
} | ||
|
||
/// Finalize the hash state and return the computed SHA1 checksum value. | ||
pub fn finalize(self, allocator: &Allocator) -> Result<Sha1, Error> { | ||
let mut buffer = ByteBuf::new(allocator, Sha1::LENGTH)?; | ||
|
||
// SAFETY: `self.inner` is a valid `aws_hash` and `buffer` was initialized above. | ||
unsafe { aws_hash_finalize(self.inner.as_ptr(), buffer.as_mut_ptr(), 0).ok_or_last_error()? }; | ||
|
||
// Slice will be copied into the struct. | ||
Ok(Sha1(buffer.as_slice().try_into().unwrap())) | ||
} | ||
} | ||
|
||
impl Drop for Sha1Hasher { | ||
fn drop(&mut self) { | ||
// SAFETY: `self.inner` is a valid `aws_hash` and safe to destroy since it's the only reference. | ||
unsafe { | ||
aws_hash_destroy(self.inner.as_ptr()); | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn sha1_simple() { | ||
let buf: &[u8] = b"123456789"; | ||
let sha1 = checksum(buf).expect("checksum failed"); | ||
assert_eq!( | ||
sha1, | ||
Sha1([247, 195, 188, 29, 128, 142, 4, 115, 42, 223, 103, 153, 101, 204, 195, 76, 167, 174, 52, 65]) | ||
); | ||
} | ||
|
||
#[test] | ||
fn sha1_append() { | ||
let allocator = Allocator::default(); | ||
let mut hasher = Sha1Hasher::new(&allocator).expect("hasher creation failed"); | ||
hasher.update(b"1234").expect("hasher updated failed"); | ||
hasher.update(b"56789").expect("hasher updated failed"); | ||
let sha1 = hasher.finalize(&allocator).expect("hasher finalization failed"); | ||
assert_eq!( | ||
sha1, | ||
Sha1([247, 195, 188, 29, 128, 142, 4, 115, 42, 223, 103, 153, 101, 204, 195, 76, 167, 174, 52, 65]) | ||
); | ||
} | ||
} |
Oops, something went wrong.