diff --git a/mountpoint-s3-client/src/object_client.rs b/mountpoint-s3-client/src/object_client.rs index 87d01ae3a..77f45d86a 100644 --- a/mountpoint-s3-client/src/object_client.rs +++ b/mountpoint-s3-client/src/object_client.rs @@ -20,7 +20,7 @@ pub type GetBodyPart = (u64, Box<[u8]>); /// An ETag (entity tag) is a unique identifier for a HTTP object. /// /// New ETags can be created with the [`FromStr`] implementation. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Hash, PartialEq, Eq)] pub struct ETag { etag: String, } diff --git a/mountpoint-s3/src/data_cache.rs b/mountpoint-s3/src/data_cache.rs index ac33e0635..118193d9d 100644 --- a/mountpoint-s3/src/data_cache.rs +++ b/mountpoint-s3/src/data_cache.rs @@ -8,10 +8,18 @@ pub mod in_memory_data_cache; use std::ops::RangeBounds; +use mountpoint_s3_client::types::ETag; use thiserror::Error; pub use crate::checksums::ChecksummedBytes; +/// Struct representing a key for accessing an entry in a [DataCache]. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct CacheKey { + s3_key: String, + etag: ETag, +} + /// Indexes blocks within a given object. pub type BlockIndex = u64; @@ -26,20 +34,18 @@ pub enum DataCacheError { pub type DataCacheResult = Result; -/// Cache data with a checksum identified by some [Key]. -/// -/// The underlying cache is divided into blocks of equal size. +/// Data cache for fixed-size checksummed buffers. /// /// TODO: Deletion and eviction of cache entries. -/// TODO: Some version information (ETag) independent from [Key] to allow smarter eviction? -pub trait DataCache { - /// Get block of data from the cache for the given [Key] and [BlockIndex], if available. +/// TODO: Some version information (ETag) independent from [CacheKey] to allow smarter eviction? +pub trait DataCache { + /// Get block of data from the cache for the given [CacheKey] and [BlockIndex], if available. /// /// Operation may fail due to errors, or return [None] if the block was not available in the cache. - fn get_block(&self, cache_key: &Key, block_idx: BlockIndex) -> DataCacheResult>; + fn get_block(&self, cache_key: &CacheKey, block_idx: BlockIndex) -> DataCacheResult>; - /// Put block of data to the cache for the given [Key] and [BlockIndex]. - fn put_block(&self, cache_key: Key, block_idx: BlockIndex, bytes: ChecksummedBytes) -> DataCacheResult<()>; + /// Put block of data to the cache for the given [CacheKey] and [BlockIndex]. + fn put_block(&self, cache_key: CacheKey, block_idx: BlockIndex, bytes: ChecksummedBytes) -> DataCacheResult<()>; /// Returns the block size for the data cache. fn block_size(&self) -> u64; @@ -52,7 +58,7 @@ pub trait DataCache { /// There is no guarantee that the data will still be available at the time of reading. fn cached_block_indices>( &self, - cache_key: &Key, + cache_key: &CacheKey, range: R, ) -> DataCacheResult>; } diff --git a/mountpoint-s3/src/data_cache/in_memory_data_cache.rs b/mountpoint-s3/src/data_cache/in_memory_data_cache.rs index 809453adb..f9852bd97 100644 --- a/mountpoint-s3/src/data_cache/in_memory_data_cache.rs +++ b/mountpoint-s3/src/data_cache/in_memory_data_cache.rs @@ -2,19 +2,18 @@ use std::collections::HashMap; use std::default::Default; -use std::hash::Hash; use std::ops::RangeBounds; -use super::{BlockIndex, ChecksummedBytes, DataCache, DataCacheResult}; +use super::{BlockIndex, CacheKey, ChecksummedBytes, DataCache, DataCacheResult}; use crate::sync::RwLock; /// Simple in-memory (RAM) implementation of [DataCache]. Recommended for use in testing only. -pub struct InMemoryDataCache { +pub struct InMemoryDataCache { data: RwLock>>, block_size: u64, } -impl InMemoryDataCache { +impl InMemoryDataCache { /// Create a new instance of an [InMemoryDataCache] with the specified `block_size`. pub fn new(block_size: u64) -> Self { InMemoryDataCache { @@ -24,14 +23,14 @@ impl InMemoryDataCache { } } -impl DataCache for InMemoryDataCache { - fn get_block(&self, cache_key: &Key, block_idx: BlockIndex) -> DataCacheResult> { +impl DataCache for InMemoryDataCache { + fn get_block(&self, cache_key: &CacheKey, block_idx: BlockIndex) -> DataCacheResult> { let data = self.data.read().unwrap(); let block_data = data.get(cache_key).and_then(|blocks| blocks.get(&block_idx)).cloned(); Ok(block_data) } - fn put_block(&self, cache_key: Key, block_idx: BlockIndex, bytes: ChecksummedBytes) -> DataCacheResult<()> { + fn put_block(&self, cache_key: CacheKey, block_idx: BlockIndex, bytes: ChecksummedBytes) -> DataCacheResult<()> { let mut data = self.data.write().unwrap(); let blocks = data.entry(cache_key).or_default(); blocks.insert(block_idx, bytes); @@ -44,7 +43,7 @@ impl DataCache for InMemoryDataCache { fn cached_block_indices>( &self, - cache_key: &Key, + cache_key: &CacheKey, range: R, ) -> DataCacheResult> { let data = self.data.read().unwrap(); @@ -63,8 +62,7 @@ mod tests { use super::*; use bytes::Bytes; - - type TestCacheKey = String; + use mountpoint_s3_client::types::ETag; #[test] fn test_put_get() { @@ -76,8 +74,14 @@ mod tests { let data_3 = ChecksummedBytes::from_bytes(data_3.clone()); let cache = InMemoryDataCache::new(8 * 1024 * 1024); - let cache_key_1: TestCacheKey = String::from("a"); - let cache_key_2: TestCacheKey = String::from("b"); + let cache_key_1 = CacheKey { + s3_key: "a".into(), + etag: ETag::for_tests(), + }; + let cache_key_2 = CacheKey { + s3_key: "b".into(), + etag: ETag::for_tests(), + }; let block = cache.get_block(&cache_key_1, 0).expect("cache is accessible"); assert!( @@ -144,8 +148,14 @@ mod tests { let data_2 = ChecksummedBytes::from_bytes(data_2.clone()); let cache = InMemoryDataCache::new(8 * 1024 * 1024); - let cache_key_1: TestCacheKey = String::from("a"); - let cache_key_2: TestCacheKey = String::from("b"); + let cache_key_1 = CacheKey { + s3_key: "a".into(), + etag: ETag::for_tests(), + }; + let cache_key_2 = CacheKey { + s3_key: "b".into(), + etag: ETag::for_tests(), + }; let cached_indices = cache .cached_block_indices(&cache_key_1, 0..5)