-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #57 from eigerco/feat/23/filestore
Implement Filestore
- Loading branch information
Showing
9 changed files
with
576 additions
and
315 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
use std::path::Path; | ||
|
||
use sha2::{Digest, Sha256}; | ||
use tokio::{ | ||
fs::File, | ||
io::{AsyncRead, AsyncSeek, AsyncSeekExt, AsyncWrite}, | ||
}; | ||
use tokio_stream::StreamExt; | ||
use tokio_util::io::ReaderStream; | ||
|
||
use super::Config; | ||
use crate::{ | ||
multicodec::SHA_256_CODE, unixfs::stream_balanced_tree, CarV1Header, CarV2Header, CarV2Writer, | ||
Error, Index, IndexEntry, MultihashIndexSorted, SingleWidthIndex, | ||
}; | ||
|
||
async fn balanced_import<Src, Out>( | ||
mut source: Src, | ||
mut output: Out, | ||
chunk_size: usize, | ||
tree_width: usize, | ||
) -> Result<(), Error> | ||
where | ||
Src: AsyncRead + Unpin + Send, | ||
Out: AsyncWrite + AsyncSeek + Unpin, | ||
{ | ||
let chunker = ReaderStream::with_capacity(&mut source, chunk_size); | ||
let nodes = stream_balanced_tree(chunker, tree_width); | ||
tokio::pin!(nodes); | ||
let mut nodes = nodes.peekable(); | ||
|
||
let mut writer = CarV2Writer::new(&mut output); | ||
let mut position = 0; | ||
|
||
let placeholder_header = CarV2Header::default(); | ||
position += writer.write_header(&placeholder_header).await?; | ||
let car_v1_start = position; | ||
|
||
let placeholder_header_v1 = CarV1Header::default(); | ||
position += writer.write_v1_header(&placeholder_header_v1).await?; | ||
|
||
let mut root = None; | ||
let mut entries = vec![]; | ||
while let Some(node) = nodes.next().await { | ||
let (node_cid, node_bytes) = node?; | ||
let digest = node_cid.hash().digest().to_owned(); | ||
let entry = IndexEntry::new(digest, (position - car_v1_start) as u64); | ||
entries.push(entry); | ||
position += writer.write_block(&node_cid, &node_bytes).await?; | ||
|
||
if nodes.peek().await.is_none() { | ||
root = Some(node_cid); | ||
} | ||
} | ||
|
||
let Some(root) = root else { | ||
return Err(Error::EmptyRootsError); | ||
}; | ||
|
||
let index_offset = position; | ||
let single_width_index = | ||
SingleWidthIndex::new(Sha256::output_size() as u32, entries.len() as u64, entries); | ||
let index = Index::MultihashIndexSorted(MultihashIndexSorted::from_single_width( | ||
SHA_256_CODE, | ||
single_width_index.into(), | ||
)); | ||
writer.write_index(&index).await?; | ||
|
||
// Go back to the beginning of the file | ||
writer.get_inner_mut().rewind().await?; | ||
let header = CarV2Header::new( | ||
false, | ||
(car_v1_start) as u64, | ||
(index_offset - car_v1_start) as u64, | ||
(index_offset) as u64, | ||
); | ||
writer.write_header(&header).await?; | ||
|
||
// If the length of the roots doesn't match the previous one, you WILL OVERWRITE parts of the file | ||
let header_v1 = CarV1Header::new(vec![root]); | ||
writer.write_v1_header(&header_v1).await?; | ||
|
||
Ok(()) | ||
} | ||
|
||
/// Convert a `source` file into a CARv2 file and write it to `output`. | ||
pub async fn create_filestore<Src, Out>( | ||
source: Src, | ||
output: Out, | ||
config: Config, | ||
) -> Result<(), Error> | ||
where | ||
Src: AsRef<Path>, | ||
Out: AsRef<Path>, | ||
{ | ||
match config { | ||
Config::Balanced { | ||
chunk_size, | ||
tree_width, | ||
} => { | ||
let source_file = File::open(source).await?; | ||
let output_file = File::create(output).await?; | ||
balanced_import(source_file, output_file, chunk_size, tree_width).await | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use std::path::Path; | ||
|
||
use tempfile::tempdir; | ||
|
||
use crate::{ | ||
stores::{filestore::create_filestore, Config}, | ||
test_utils::assert_buffer_eq, | ||
}; | ||
|
||
async fn test_filestore_roundtrip<P1, P2>(original: P1, expected: P2) | ||
where | ||
P1: AsRef<Path>, | ||
P2: AsRef<Path>, | ||
{ | ||
let temp_dir = tempdir().unwrap(); | ||
let temp_path = temp_dir.path().join("lorem.car"); | ||
|
||
create_filestore(original, &temp_path, Config::default()) | ||
.await | ||
.unwrap(); | ||
|
||
let expected = tokio::fs::read(expected.as_ref()).await.unwrap(); | ||
let result = tokio::fs::read(temp_path).await.unwrap(); | ||
|
||
assert_buffer_eq!(&expected, &result); | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_filestore_lorem() { | ||
test_filestore_roundtrip( | ||
"tests/fixtures/original/lorem.txt", | ||
"tests/fixtures/car_v2/lorem.car", | ||
) | ||
.await | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_filestore_spaceglenda() { | ||
test_filestore_roundtrip( | ||
"tests/fixtures/original/spaceglenda.jpg", | ||
"tests/fixtures/car_v2/spaceglenda.car", | ||
) | ||
.await | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
mod blockstore; | ||
mod filestore; | ||
|
||
pub use blockstore::Blockstore; | ||
pub use filestore::create_filestore; | ||
|
||
/// The default block size, as defined in | ||
/// [boxo](https://github.com/ipfs/boxo/blob/f4fe8997dcbeb39b3a4842d8f08b34739bfd84a4/chunker/parse.go#L13). | ||
pub(crate) const DEFAULT_BLOCK_SIZE: usize = 1024 * 256; | ||
|
||
/// The default tree width, also called links per block, as defined in | ||
/// [boxo](https://github.com/ipfs/boxo/blob/625ba769263c2beeec934836f54bbd6624db945a/ipld/unixfs/importer/helpers/helpers.go#L16-L30). | ||
pub(crate) const DEFAULT_TREE_WIDTH: usize = 174; | ||
|
||
/// Store configuration options. | ||
pub enum Config { | ||
/// The store should use the balanced tree layout, | ||
/// generating byte chunks of `chunk_size` and | ||
/// generating parent nodes every `tree_width` nodes. | ||
Balanced { | ||
chunk_size: usize, | ||
tree_width: usize, | ||
}, | ||
} | ||
|
||
impl Config { | ||
/// Create a new [`Config::Balanced`]. | ||
pub fn balanced(chunk_size: usize, tree_width: usize) -> Self { | ||
Self::Balanced { | ||
chunk_size, | ||
tree_width, | ||
} | ||
} | ||
} | ||
|
||
impl Default for Config { | ||
fn default() -> Self { | ||
Self::Balanced { | ||
chunk_size: DEFAULT_BLOCK_SIZE, | ||
tree_width: DEFAULT_TREE_WIDTH, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters