Skip to content

Commit

Permalink
Add caching Prefetch implementation (#598)
Browse files Browse the repository at this point in the history
* Integrate cache in Prefetcher

Introduce a new implementation of the Prefetch trait which uses a DataCache. This change also adds CLI args to Mountpoint to enable the on-disk data cache. The new args are only available under the "caching" feature and are subject to change.

Signed-off-by: Alessandro Passaro <[email protected]>

* Introduce unified RequestRange::align

Signed-off-by: Alessandro Passaro <[email protected]>

* Add comments and address other feedback

Signed-off-by: Alessandro Passaro <[email protected]>

* Assert invariants when populating the cache from the GetObject result

Signed-off-by: Alessandro Passaro <[email protected]>

---------

Signed-off-by: Alessandro Passaro <[email protected]>
  • Loading branch information
passaro authored Nov 15, 2023
1 parent efc4c0e commit a1e4d86
Show file tree
Hide file tree
Showing 9 changed files with 783 additions and 58 deletions.
3 changes: 2 additions & 1 deletion mountpoint-s3/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,8 @@ where
))) => reply.error(err!(libc::ESTALE, "object was mutated remotely")),
Err(PrefetchReadError::Integrity(e)) => reply.error(err!(libc::EIO, source:e, "integrity error")),
Err(e @ PrefetchReadError::GetRequestFailed(_))
| Err(e @ PrefetchReadError::GetRequestTerminatedUnexpectedly) => {
| Err(e @ PrefetchReadError::GetRequestTerminatedUnexpectedly)
| Err(e @ PrefetchReadError::GetRequestReturnedWrongOffset { .. }) => {
reply.error(err!(libc::EIO, source:e, "get request failed"))
}
}
Expand Down
38 changes: 38 additions & 0 deletions mountpoint-s3/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,28 @@ struct CliArgs {
)]
pub metadata_cache_ttl: Option<Duration>,

// TODO: Temporary for testing. Review before exposing outside "caching" feature.
#[cfg(feature = "caching")]
#[clap(
long,
help = "Enable caching of object data in a directory",
help_heading = CACHING_OPTIONS_HEADER,
value_name = "DIRECTORY",
)]
pub data_caching_directory: Option<PathBuf>,

// TODO: Temporary for testing. Review before exposing outside "caching" feature.
#[cfg(feature = "caching")]
#[clap(
long,
help = "Block size for the data cache",
default_value = "1048576",
value_parser = value_parser!(u64).range(1..),
help_heading = CACHING_OPTIONS_HEADER,
requires = "data_caching_directory",
)]
pub data_cache_block_size: u64,

#[clap(
long,
help = "Configure a string to be prepended to the 'User-Agent' HTTP request header for all S3 requests",
Expand Down Expand Up @@ -537,7 +559,9 @@ fn mount(args: CliArgs) -> anyhow::Result<FuseSession> {

#[cfg(feature = "caching")]
{
use mountpoint_s3::data_cache::DiskDataCache;
use mountpoint_s3::fs::CacheConfig;
use mountpoint_s3::prefetch::caching_prefetch;

if args.enable_metadata_caching {
// TODO: Review default for TTL
Expand All @@ -548,6 +572,20 @@ fn mount(args: CliArgs) -> anyhow::Result<FuseSession> {
file_ttl: metadata_cache_ttl,
};
}

if let Some(path) = args.data_caching_directory {
let cache = DiskDataCache::new(path, args.data_cache_block_size);
let prefetcher = caching_prefetch(cache, runtime, prefetcher_config);
return create_filesystem(
client,
prefetcher,
&args.bucket_name,
&prefix,
filesystem_config,
fuse_config,
&bucket_description,
);
}
}

let prefetcher = default_prefetch(runtime, prefetcher_config);
Expand Down
108 changes: 95 additions & 13 deletions mountpoint-s3/src/prefetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//! we increase the size of the GetObject requests up to some maximum. If the reader ever makes a
//! non-sequential read, we abandon the prefetching and start again with the minimum request size.
mod caching_stream;
mod part;
mod part_queue;
mod part_stream;
Expand All @@ -27,6 +28,8 @@ use thiserror::Error;
use tracing::trace;

use crate::checksums::{ChecksummedBytes, IntegrityError};
use crate::data_cache::DataCache;
use crate::prefetch::caching_stream::CachingPartStream;
use crate::prefetch::part_stream::{ClientPartStream, ObjectPartStream, RequestRange};
use crate::prefetch::seek_window::SeekWindow;
use crate::prefetch::task::RequestTask;
Expand Down Expand Up @@ -67,6 +70,9 @@ pub enum PrefetchReadError<E> {
#[error("get object request failed")]
GetRequestFailed(#[source] ObjectClientError<GetObjectError, E>),

#[error("get object request returned wrong offset")]
GetRequestReturnedWrongOffset { offset: u64, expected_offset: u64 },

#[error("get request terminated unexpectedly")]
GetRequestTerminatedUnexpectedly,

Expand All @@ -76,6 +82,7 @@ pub enum PrefetchReadError<E> {

pub type DefaultPrefetcher<Runtime> = Prefetcher<ClientPartStream<Runtime>>;

/// Creates an instance of the default [Prefetch].
pub fn default_prefetch<Runtime>(runtime: Runtime, prefetcher_config: PrefetcherConfig) -> DefaultPrefetcher<Runtime>
where
Runtime: Spawn + Send + Sync + 'static,
Expand All @@ -84,6 +91,22 @@ where
Prefetcher::new(part_stream, prefetcher_config)
}

pub type CachingPrefetcher<Cache, Runtime> = Prefetcher<CachingPartStream<Cache, Runtime>>;

/// Creates an instance of a caching [Prefetch].
pub fn caching_prefetch<Cache, Runtime>(
cache: Cache,
runtime: Runtime,
prefetcher_config: PrefetcherConfig,
) -> CachingPrefetcher<Cache, Runtime>
where
Cache: DataCache + Send + Sync + 'static,
Runtime: Spawn + Send + Sync + 'static,
{
let part_stream = CachingPartStream::new(runtime, cache);
Prefetcher::new(part_stream, prefetcher_config)
}

#[derive(Debug, Clone, Copy)]
pub struct PrefetcherConfig {
/// Size of the first request in a prefetch run
Expand Down Expand Up @@ -504,6 +527,10 @@ mod tests {
// It's convenient to write test constants like "1 * 1024 * 1024" for symmetry
#![allow(clippy::identity_op)]

use crate::data_cache::InMemoryDataCache;
use crate::prefetch::part_stream::ClientPartStream;

use super::caching_stream::CachingPartStream;
use super::*;
use futures::executor::{block_on, ThreadPool};
use mountpoint_s3_client::error::{GetObjectError, ObjectClientError};
Expand All @@ -515,6 +542,8 @@ mod tests {
use std::collections::HashMap;
use test_case::test_case;

const MB: usize = 1024 * 1024;

#[derive(Debug, Arbitrary)]
struct TestConfig {
#[proptest(strategy = "16usize..1*1024*1024")]
Expand All @@ -536,6 +565,12 @@ mod tests {
ClientPartStream::new(runtime)
}

fn caching_stream(block_size: usize) -> CachingPartStream<InMemoryDataCache, ThreadPool> {
let runtime = ThreadPool::builder().pool_size(1).create().unwrap();
let cache = InMemoryDataCache::new(block_size as u64);
CachingPartStream::new(runtime, cache)
}

fn run_sequential_read_test<Stream: ObjectPartStream + Send + Sync + 'static>(
part_stream: Stream,
size: u64,
Expand Down Expand Up @@ -578,8 +613,12 @@ mod tests {
assert_eq!(next_offset, size);
}

#[test]
fn sequential_read_small() {
#[test_case(default_stream())]
#[test_case(caching_stream(1 * MB))]
fn sequential_read_small<Stream>(part_stream: Stream)
where
Stream: ObjectPartStream + Send + Sync + 'static,
{
let config = TestConfig {
first_request_size: 256 * 1024,
max_request_size: 1024 * 1024 * 1024,
Expand All @@ -588,11 +627,15 @@ mod tests {
max_forward_seek_distance: 16 * 1024 * 1024,
max_backward_seek_distance: 2 * 1024 * 1024,
};
run_sequential_read_test(default_stream(), 1024 * 1024 + 111, 1024 * 1024, config);
run_sequential_read_test(part_stream, 1024 * 1024 + 111, 1024 * 1024, config);
}

#[test]
fn sequential_read_medium() {
#[test_case(default_stream())]
#[test_case(caching_stream(1 * MB))]
fn sequential_read_medium<Stream>(part_stream: Stream)
where
Stream: ObjectPartStream + Send + Sync + 'static,
{
let config = TestConfig {
first_request_size: 256 * 1024,
max_request_size: 64 * 1024 * 1024,
Expand All @@ -601,11 +644,15 @@ mod tests {
max_forward_seek_distance: 16 * 1024 * 1024,
max_backward_seek_distance: 2 * 1024 * 1024,
};
run_sequential_read_test(default_stream(), 16 * 1024 * 1024 + 111, 1024 * 1024, config);
run_sequential_read_test(part_stream, 16 * 1024 * 1024 + 111, 1024 * 1024, config);
}

#[test]
fn sequential_read_large() {
#[test_case(default_stream())]
#[test_case(caching_stream(1 * MB))]
fn sequential_read_large<Stream>(part_stream: Stream)
where
Stream: ObjectPartStream + Send + Sync + 'static,
{
let config = TestConfig {
first_request_size: 256 * 1024,
max_request_size: 64 * 1024 * 1024,
Expand All @@ -614,7 +661,8 @@ mod tests {
max_forward_seek_distance: 16 * 1024 * 1024,
max_backward_seek_distance: 2 * 1024 * 1024,
};
run_sequential_read_test(default_stream(), 256 * 1024 * 1024 + 111, 1024 * 1024, config);

run_sequential_read_test(part_stream, 256 * 1024 * 1024 + 111, 1024 * 1024, config);
}

fn fail_sequential_read_test<Stream: ObjectPartStream + Send + Sync + 'static>(
Expand Down Expand Up @@ -664,10 +712,15 @@ mod tests {
assert!(next_offset < size); // Since we're injecting failures, shouldn't make it to the end
}

#[test_case("invalid range; length=42")]
#[test_case("invalid range; length=42", default_stream())]
#[test_case("invalid range; length=42", caching_stream(1 * MB))]
// test case for the request failure due to etag not matching
#[test_case("At least one of the pre-conditions you specified did not hold")]
fn fail_request_sequential_small(err_value: &str) {
#[test_case("At least one of the pre-conditions you specified did not hold", default_stream())]
#[test_case("At least one of the pre-conditions you specified did not hold", caching_stream(1 * MB))]
fn fail_request_sequential_small<Stream>(err_value: &str, part_stream: Stream)
where
Stream: ObjectPartStream + Send + Sync + 'static,
{
let config = TestConfig {
first_request_size: 256 * 1024,
max_request_size: 1024 * 1024 * 1024,
Expand All @@ -685,7 +738,7 @@ mod tests {
))),
);

fail_sequential_read_test(default_stream(), 1024 * 1024 + 111, 1024 * 1024, config, get_failures);
fail_sequential_read_test(part_stream, 1024 * 1024 + 111, 1024 * 1024, config, get_failures);
}

proptest! {
Expand All @@ -700,9 +753,28 @@ mod tests {

#[test]
fn proptest_sequential_read_small_read_size(size in 1u64..1 * 1024 * 1024, read_factor in 1usize..10, config: TestConfig) {
// Pick read size smaller than the object size
let read_size = (size as usize / read_factor).max(1);
run_sequential_read_test(default_stream(), size, read_size, config);
}

#[test]
fn proptest_sequential_read_with_cache(
size in 1u64..1 * 1024 * 1024,
read_size in 1usize..1 * 1024 * 1024,
block_size in 16usize..1 * 1024 * 1024,
config: TestConfig,
) {
run_sequential_read_test(caching_stream(block_size), size, read_size, config);
}

#[test]
fn proptest_sequential_read_small_read_size_with_cache(size in 1u64..1 * 1024 * 1024, read_factor in 1usize..10,
block_size in 16usize..1 * 1024 * 1024, config: TestConfig) {
// Pick read size smaller than the object size
let read_size = (size as usize / read_factor).max(1);
run_sequential_read_test(caching_stream(block_size), size, read_size, config);
}
}

#[test]
Expand Down Expand Up @@ -792,6 +864,16 @@ mod tests {
let (object_size, reads) = reads;
run_random_read_test(default_stream(), object_size, reads, config);
}

#[test]
fn proptest_random_read_with_cache(
reads in random_read_strategy(1 * 1024 * 1024),
block_size in 16usize..1 * 1024 * 1024,
config: TestConfig,
) {
let (object_size, reads) = reads;
run_random_read_test(caching_stream(block_size), object_size, reads, config);
}
}

#[test]
Expand Down
Loading

0 comments on commit a1e4d86

Please sign in to comment.