Add detailed rustdoc to reftests (#1232)

The reference tests for Mountpoint can be quite complex, especially for those unfamiliar both with the tests themselves or the idea of reference-based testing. This change adds more detailed rustdoc with the goal to ramp up new readers with the reftests, give an overview of what the tests are doing, and point the reader to resources for learning more. ### Does this change impact existing behavior? No, all code documentation changes. ### Does this change need a changelog entry? Does it require a version change? No, code doc changes only. --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license and I agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). Signed-off-by: Daniel Carl Jones <[email protected]>
awslabs · Jan 15, 2025 · d008177 · d008177 · github-actions · Jan 16, 2025
1 parent dd8b881
commit d008177
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 15 deletions.
diff --git a/mountpoint-s3/tests/reftests/generators.rs b/mountpoint-s3/tests/reftests/generators.rs
@@ -1,3 +1,5 @@
+//! Provides [Strategy]s for [proptest]. Strategies are ways that [proptest] can generate values for tests from primitive types.
+
 use mountpoint_s3_client::mock_client::MockObject;
 use mountpoint_s3_client::types::ETag;
 use proptest::prelude::*;
@@ -9,13 +11,20 @@ use std::ops::Deref;
 
 use crate::reftests::reference::valid_inode_name;
 
+/// [Strategy] for providing valid POSIX path names.
+///
+/// We intentionally limit to a small input space to avoid testing less useful inputs.
 pub fn valid_name_strategy() -> impl Strategy<Value = String> {
+    // Literally the character `a` and `-` between 1 and 3 times.
     string_regex("[a-]{1,3}").unwrap()
 }
 
+/// [Strategy] providing strings which may or may not be valid POSIX path names.
+///
+/// We intentionally limit to a small input space to avoid testing less useful inputs.
+/// We also give more weight to the generation of valid names.
 pub fn name_strategy() -> impl Strategy<Value = String> {
     prop_oneof![
-        // Valid names
         5 => valid_name_strategy(),
         // Potentially invalid names
         1 => string_regex("[a\\-/.\u{1}]{1,3}").unwrap(),
@@ -57,6 +66,12 @@ impl From<&str> for ValidName {
     }
 }
 
+/// Split file size into two groups.
+///
+/// This allows [proptest] to generate a set of values for each group,
+/// and we can apply a weight to each group (which is equal by default).
+/// It is in our interest to balance focus between smaller file sizes where we may hit more edge cases,
+/// but also cover some much larger file sizes.
 #[derive(Clone, Copy, Arbitrary)]
 pub enum FileSize {
     Small(u8),
@@ -81,6 +96,10 @@ impl Debug for FileSize {
     }
 }
 
+/// Represents some file content for property-based testing.
+///
+/// The second value is the length of the file content,
+/// while the first is the byte that will be repeated to generate the content.
 #[derive(Clone, Debug, Arbitrary)]
 pub struct FileContent(pub u8, pub FileSize);
 

diff --git a/mountpoint-s3/tests/reftests/harness.rs b/mountpoint-s3/tests/reftests/harness.rs
@@ -1,3 +1,12 @@
+//! Provides the test harness along with operations that can be performed against
+//! the [Harness::reference] model and the system under test [Harness::fs].
+//!
+//! Note that the system under test is Mountpoint's file system type wrapped by [TestS3Filesystem],
+//! and does not include FUSE integration.
+//!
+//! - TODO: How can we test the new incremental upload mode?
+//! - TODO: How can we test directory bucket semantics?
+
 use std::collections::{BTreeMap, HashSet};
 use std::fmt::Debug;
 use std::path::{Component, Path, PathBuf};
@@ -23,14 +32,26 @@ use crate::reftests::reference::{File, Node, Reference};
 // TODO: "reboot" (forget all the local inodes and re-bootstrap)
 #[derive(Debug, Arbitrary)]
 pub enum Op {
-    /// Do an entire write in one step
+    /// Opens a new file, writes to it, and 'closes' it in one step.
     WriteFile(ValidName, DirectoryIndex, FileContent),
 
     // Individual steps of a file write -- create, open, write, close
+    /// Create a new file, but don't open it yet. (aka. `mknod`)
+    ///
+    /// If other writing file operations are invoked before this step,
+    /// the other file operations will be no-op (as there are no in-flight writes).
     CreateFile(ValidName, DirectoryIndex, FileContent),
+    /// Open a file for writing that was created by `CreateFile`.
     StartWriting(InflightWriteIndex),
-    // usize is the percentage of the file to write (taken modulo 101)
+    /// Write some percentage of the desired file content to the open file.
+    ///
+    /// Effectively applies `StartWriting` if not already done.
+    /// The second value is the percentage of the file to write (taken modulo 101).
     WritePart(InflightWriteIndex, usize),
+    /// Closes an open file using `release`.
+    ///
+    /// Does not invoke `flush`.
+    /// If `StartWriting` wasn't already applied, it is effectively applied in this operation.
     FinishWrite(InflightWriteIndex),
 
     /// Remove a file
@@ -120,6 +141,7 @@ pub struct InflightWrite {
     inode: InodeNo,
     /// Initially None until the file is opened by [Op::StartWriting]
     file_handle: Option<u64>,
+    /// Prepared bytes ready to be written to the file system by the test.
     object: MockObject,
     written: usize,
 }
@@ -161,9 +183,15 @@ impl InflightWrites {
 
 #[derive(Debug)]
 pub struct Harness {
-    readdir_limit: usize, // max number of entries that a readdir will return; 0 means no limit
+    /// Max number of entries that a readdir operation will return.
+    ///
+    /// 0 means no limit.
+    readdir_limit: usize,
+    /// Reference model for the system under test (SUT) to be compared against.
     reference: Reference,
+    /// The system under test (SUT) that we compare against the [Self::reference].
     fs: TestS3Filesystem<Arc<MockClient>>,
+    /// An S3 client, used for performing operations against the 'remote' S3.
     client: Arc<MockClient>,
     bucket: String,
     inflight_writes: InflightWrites,
@@ -242,14 +270,15 @@ impl Harness {
         }
     }
 
-    /// Walk the filesystem tree and check that at each level, contents match the reference
+    /// Walk the filesystem tree using `readdir` and `lookup` and check that at each level, contents match the reference.
     pub async fn compare_contents(&self) {
         let root = self.reference.root();
         self.compare_contents_recursive(FUSE_ROOT_INODE, FUSE_ROOT_INODE, root)
             .await;
     }
 
     /// Walk a single path through the filesystem tree and ensure each node matches the reference.
+    ///
     /// Compared to [compare_contents], this avoids doing a `readdir` before `lookup`, and so tests
     /// a different path through the inode code.
     pub async fn compare_single_path(&self, idx: usize) {
@@ -302,8 +331,15 @@ impl Harness {
         Ok(inode)
     }
 
-    /// Create a new file ready for writing. We don't allow overwrites of existing files by default, so this
-    /// can return None if the name already exists in the chosen directory.
+    /// Create a new file within the directory ready for writing.
+    ///
+    /// The file is not yet opened.
+    /// Contents are provided only to be stored as part of the test state in this step,
+    /// and will be read from when performing writes against the file system.
+    ///
+    /// We don't allow overwrites of existing files by default,
+    /// so this returns [None] if the name already exists in the chosen directory.
+    ///
     /// TODO: Update this function to support overwrites
     async fn perform_create_file(
         &mut self,
@@ -347,7 +383,7 @@ impl Harness {
         }
     }
 
-    /// Open a previously created file (by `perform_create_file`) for writing
+    /// Open a previously created file (using [Self::perform_create_file]) for writing.
     async fn perform_start_writing(&mut self, index: InflightWriteIndex) {
         let Some(inflight_write) = self.inflight_writes.get(index) else {
             return;
@@ -366,7 +402,7 @@ impl Harness {
         }
     }
 
-    /// Continue writing to an open file
+    /// Continue writing to a file open for writing.
     async fn perform_write_part(&mut self, index: InflightWriteIndex, percent: usize) {
         let Some(inflight_write) = self.inflight_writes.get(index) else {
             // No inflight writes available
@@ -597,6 +633,8 @@ impl Harness {
         self.reference.remove_remote_key(&key);
     }
 
+    /// Recurse through the file systems using readdir,
+    /// comparing the [Self::reference] expected state to the system under test ([Self::fs]).
     fn compare_contents_recursive<'a>(
         &'a self,
         fs_parent: InodeNo,
@@ -693,6 +731,8 @@ impl Harness {
         .boxed()
     }
 
+    /// Compare the contents of a given reference file to the system under test (SUT),
+    /// by opening and reading the file from the SUT.
     async fn compare_file<'a>(&'a self, fs_file: InodeNo, ref_file: &'a MockObject) {
         let fh = match self.fs.open(fs_file, OpenFlags::empty(), 0).await {
             Ok(ret) => ret.fh,

diff --git a/mountpoint-s3/tests/reftests/mod.rs b/mountpoint-s3/tests/reftests/mod.rs
@@ -1,3 +1,15 @@
+//! This module uses [proptest] to perform property or reference testing of Mountpoint.
+//!
+//! The [Proptest Book](https://proptest-rs.github.io/) is recommended reading
+//! for understanding the purpose and functionality of tests in this module.
+//!
+//! The reference tests are broken down as follows:
+//! - [generators] provides strategies for generating test input to our reference tests.
+//! - [harness] configures and runs the tests comparing an expected FS and S3 bucket state (provided by [reference])
+//!   to what occurs with MP
+//! - [fuse] is a harness configuring and running tests comparing a real local FS with MP,
+//!   with the goal to identify divergences from POSIX semantics
+
 mod fuse;
 mod generators;
 mod harness;

diff --git a/mountpoint-s3/tests/reftests/reference.rs b/mountpoint-s3/tests/reftests/reference.rs
@@ -1,15 +1,24 @@
+//! Provides an expected state for the file system and its S3 bucket.
+//!
+//! As part of the reference model testing,
+//! a [MaterializedReference] is generated representing the expected state of the file system and S3.
+//! We compare its state with that returned by Mountpoint's file system
+//! when traversing or visiting specific paths to assert correctness.
+
 use mountpoint_s3_client::mock_client::MockObject;
 use std::cell::RefCell;
 use std::collections::BTreeMap;
 use std::path::{Component, Path, PathBuf};
 use std::rc::Rc;
 
+/// A file node, which could be local or remote.
 #[derive(Debug)]
 pub enum File {
     Local,
     Remote(Box<MockObject>),
 }
 
+/// A node in the reference model. This node could be local or remote.
 #[derive(Debug)]
 pub enum Node {
     Directory {
@@ -52,12 +61,18 @@ impl Node {
     }
 }
 
-/// The expected state of a file system. We track three pieces of state: the keys in an S3 bucket,
-/// plus lists of local files and local directories. Whenever we need the tree structure of the
-/// file system, we construct it from these inputs as a [MaterializedReference]. Building the
-/// reference in this indirect way allows us to have only one definition of correctness -- the
-/// implementation of [build_reference] -- and to test both mutations to the file system itself and
-/// "remote" mutations to the bucket (like adding or deleting a key using another client).
+/// The expected state of a file system and its S3 bucket.
+///
+/// We track three pieces of state:
+/// - The keys in an S3 bucket.
+/// - The list of local directories.
+/// - The list of local files.
+///
+/// Whenever we need the tree structure of the file system,
+/// we take this state and produce a [MaterializedReference].
+/// By producing the reference in this indirect way, it allows us to have only one definition of correctness
+/// -- the implementation of [build_reference] -- and to test both mutations to the file system itself
+/// and "remote" mutations to the bucket (like adding or deleting a key using another client).
 #[derive(Debug)]
 pub struct Reference {
     /// Contents of our S3 bucket
@@ -70,9 +85,17 @@ pub struct Reference {
     materialized: MaterializedReference,
 }
 
+/// A [MaterializedReference] is a product of the [Reference],
+/// presenting the desired tree ([Self::root]) and list of directories ([Self::directories]).
+///
+/// This should be 'rematerialized' each time S3 or local state is mutated
+/// to show the new desired state of the reference using [Reference::rematerialize].
+/// This will use [build_reference] to construct the file system based on the remote state,
+/// and then mutate it based on local state.
 #[derive(Debug)]
 struct MaterializedReference {
     root: Node,
+    /// A collection of all the directories in the [Reference], both local and remote.
     directories: Vec<PathBuf>,
 }
 
@@ -160,6 +183,10 @@ impl Reference {
         }
     }
 
+    /// Create a new [MaterializedReference] from the [Reference].
+    ///
+    /// This will reevaluate what is in S3 and what should be maintained locally,
+    /// and produce a result which should maintain our promised semantics.
     fn rematerialize(&self) -> MaterializedReference {
         tracing::debug!(
             remote_keys=?self.remote_keys, local_files=?self.local_files, local_directories=?self.local_directories,