From eba617d332fed6094fde1d914e62a47afab7995b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 28 Jun 2024 12:56:19 +0200 Subject: [PATCH 01/27] feat: work in progress upload --- Cargo.lock | 241 ++++++++++++++++-- Cargo.toml | 5 +- cli/polka-storage-provider/Cargo.toml | 11 +- .../src/commands/run.rs | 102 +++++++- cli/polka-storage-provider/src/rpc/server.rs | 2 +- storage/mater/src/stores/blockstore.rs | 2 +- storage/mater/src/unixfs/mod.rs | 2 +- 7 files changed, 337 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cbd618fa1..bac2a3cae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -837,6 +837,62 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +[[package]] +name = "axum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "hyper 1.3.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "multer", + "percent-encoding", + "pin-project-lite 0.2.14", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper 1.0.1", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "mime", + "pin-project-lite 0.2.14", + "rustversion", + "sync_wrapper 0.1.2", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "backtrace" version = "0.3.72" @@ -3159,6 +3215,15 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + [[package]] name = "enum-as-inner" version = "0.5.1" @@ -4197,7 +4262,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", "indexmap 2.2.6", "slab", "tokio", @@ -4385,6 +4450,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -4392,7 +4468,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite 0.2.14", +] + +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", "pin-project-lite 0.2.14", ] @@ -4431,8 +4530,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -4444,6 +4543,25 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "httparse", + "httpdate", + "itoa", + "pin-project-lite 0.2.14", + "smallvec", + "tokio", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -4451,8 +4569,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.29", "log", "rustls 0.21.12", "rustls-native-certs 0.6.3", @@ -4460,6 +4578,21 @@ dependencies = [ "tokio-rustls 0.24.1", ] +[[package]] +name = "hyper-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "hyper 1.3.1", + "pin-project-lite 0.2.14", + "tokio", +] + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -4826,7 +4959,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4978087a58c3ab02efc5b07c5e5e2803024536106fd5506f558db172c889b3aa" dependencies = [ "futures-util", - "http", + "http 0.2.12", "jsonrpsee-core", "pin-project", "rustls-native-certs 0.7.0", @@ -4851,7 +4984,7 @@ dependencies = [ "beef", "futures-timer", "futures-util", - "hyper", + "hyper 0.14.29", "jsonrpsee-types", "parking_lot 0.12.3", "pin-project", @@ -4872,7 +5005,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ccf93fc4a0bfe05d851d37d7c32b7f370fe94336b52a2f0efc5f1981895c2e5" dependencies = [ "async-trait", - "hyper", + "hyper 0.14.29", "hyper-rustls", "jsonrpsee-core", "jsonrpsee-types", @@ -4905,8 +5038,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12d8b6a9674422a8572e0b0abb12feeb3f2aeda86528c80d0350c2bd0923ab41" dependencies = [ "futures-util", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.29", "jsonrpsee-core", "jsonrpsee-types", "pin-project", @@ -4941,7 +5074,7 @@ version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58b9db2dfd5bb1194b0ce921504df9ceae210a345bc2f6c5a61432089bbab070" dependencies = [ - "http", + "http 0.2.12", "jsonrpsee-client-transport", "jsonrpsee-core", "jsonrpsee-types", @@ -5799,6 +5932,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "mater" version = "0.1.0" @@ -5910,6 +6049,12 @@ dependencies = [ "thrift", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -6049,6 +6194,23 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "multer" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" +dependencies = [ + "bytes", + "encoding_rs", + "futures-util", + "http 1.1.0", + "httparse", + "memchr", + "mime", + "spin 0.9.8", + "version_check", +] + [[package]] name = "multiaddr" version = "0.17.1" @@ -8295,9 +8457,12 @@ name = "polka-storage-provider" version = "0.1.0" dependencies = [ "async-trait", + "axum", "chrono", "clap", + "futures", "jsonrpsee", + "mater", "sc-cli", "sealed", "serde", @@ -8306,6 +8471,7 @@ dependencies = [ "subxt-signer", "thiserror", "tokio", + "tokio-util", "tracing", "tracing-subscriber", "url", @@ -11639,7 +11805,7 @@ dependencies = [ "fnv", "futures", "futures-timer", - "hyper", + "hyper 0.14.29", "hyper-rustls", "libp2p", "log", @@ -11733,8 +11899,8 @@ dependencies = [ "forwarded-header-value", "futures", "governor", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.29", "ip_network", "jsonrpsee", "log", @@ -12415,6 +12581,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.6" @@ -12424,6 +12600,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serdect" version = "0.2.0" @@ -12903,7 +13091,7 @@ dependencies = [ "bytes", "flate2", "futures", - "http", + "http 0.2.12", "httparse", "log", "rand 0.8.5", @@ -14070,7 +14258,7 @@ name = "substrate-prometheus-endpoint" version = "0.17.0" source = "git+https://github.com/paritytech/polkadot-sdk?tag=polkadot-v1.13.0#d5160c1d567cc73c7df6c816d41e21aa3adb188d" dependencies = [ - "hyper", + "hyper 0.14.29", "log", "prometheus", "thiserror", @@ -14304,6 +14492,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "sync_wrapper" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" + [[package]] name = "synstructure" version = "0.12.6" @@ -14717,6 +14917,7 @@ dependencies = [ "futures-util", "pin-project", "pin-project-lite 0.2.14", + "tokio", "tower-layer", "tower-service", "tracing", @@ -14732,8 +14933,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "http-range-header", "pin-project-lite 0.2.14", "tower-layer", @@ -14982,7 +15183,7 @@ dependencies = [ "byteorder", "bytes", "data-encoding", - "http", + "http 0.2.12", "httparse", "log", "rand 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index 22bc0e144..0b2b95ac7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,6 +92,7 @@ uuid = "1.8.0" # Local cli-primitives = { path = "primitives/cli" } +mater = { path = "storage/mater" } pallet-market = { path = "pallets/market", default-features = false } polka-storage-runtime = { path = "runtime" } primitives-proofs = { path = "primitives/proofs", default-features = false } @@ -136,7 +137,9 @@ substrate-prometheus-endpoint = { git = "https://github.com/paritytech/polkadot- # Polkadot pallet-xcm = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", default-features = false } -polkadot-cli = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", features = ["rococo-native"] } +polkadot-cli = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", features = [ + "rococo-native", +] } polkadot-parachain-primitives = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", default-features = false } polkadot-primitives = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0" } polkadot-runtime-common = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", default-features = false } diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index 2170a02af..b9c9cf727 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -8,10 +8,19 @@ repository.workspace = true version = "0.1.0" [dependencies] +# TODO(no-ref,@cernicc,28/06/2024): Move to workkspacecs +mater = { workspace = true } async-trait = { workspace = true } +futures = "0.3.30" +axum = { version = "0.7.5", features = ["multipart"] } +tokio-util = { version = "0.7.11" } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } -jsonrpsee = { workspace = true, features = ["http-client", "server", "ws-client"] } +jsonrpsee = { workspace = true, features = [ + "http-client", + "server", + "ws-client", +] } sc-cli = { workspace = true } sealed = { workspace = true } serde = { workspace = true } diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 5f31a656b..0ee482345 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -1,7 +1,19 @@ -use std::{net::SocketAddr, sync::Arc}; +use std::{io, net::SocketAddr, sync::Arc}; +use axum::{ + body::Bytes, + extract::{Path, Request, State}, + http::{Error, StatusCode}, + response::IntoResponse, + routing::{get, post}, + BoxError, Router, +}; use chrono::Utc; use clap::Parser; +use futures::{Stream, TryStreamExt}; +use mater::Blockstore; +use tokio::{fs::File, io::BufWriter}; +use tokio_util::io::StreamReader; use tracing::info; use url::Url; @@ -13,6 +25,9 @@ use crate::{ const FULL_NODE_DEFAULT_RPC_ADDR: &str = "ws://127.0.0.1:9944"; +/// Directory where uploaded files are stored. +const UPLOADS_DIRECTORY: &str = "uploads"; + /// Command to start the storage provider. #[derive(Debug, Clone, Parser)] pub(crate) struct RunCommand { @@ -34,11 +49,23 @@ impl RunCommand { }); // Start RPC server - let handle = start_rpc_server(state, self.listen_addr).await?; + let handle = start_rpc_server(state.clone(), self.listen_addr).await?; info!("RPC server started at {}", self.listen_addr); + // Upload endpoint + let router = configure_router(state); + // TODO(no-ref,@cernicc,28/06/2024): Listen on the same address that rpc listens on + let listener = tokio::net::TcpListener::bind("127.0.0.1:3000") + .await + .unwrap(); + + let _ = axum::serve(listener, router) + .with_graceful_shutdown(shutdown_signal()) + .await + .unwrap(); + // Monitor shutdown - tokio::signal::ctrl_c().await?; + shutdown_signal().await; // Stop the Server let _ = handle.stop(); @@ -50,3 +77,72 @@ impl RunCommand { Ok(()) } } + +// TODO(no-ref,@cernicc,28/06/2024): Handle shutdown better +async fn shutdown_signal() { + tokio::signal::ctrl_c() + .await + .expect("failed to install Ctrl+C handler"); +} + +// TODO(no-ref,@cernicc,28/06/2024): Move somewhere else +// TODO(no-ref,@cernicc,28/06/2024): Handle response +// TODO(no-ref,@cernicc,28/06/2024): Better error handling +async fn upload( + State(state): State>, + request: Request, +) -> Result<(), (StatusCode, String)> { + dbg!("Uploading file"); + stream_to_file(request.into_body().into_data_stream()) + .await + .unwrap(); + Ok(()) +} + +async fn download( + State(state): State>, + Path(cid): Path, +) -> Result<(), (StatusCode, String)> { + Ok(()) +} + +// TODO(no-ref,@cernicc,28/06/2024): Move somewhere else +fn configure_router(state: Arc) -> Router { + Router::new() + .route("/upload", post(upload)) + .route("/download/:cid", get(download)) + .with_state(state) +} + +// Save a `Stream` to a file +async fn stream_to_file(stream: S) -> Result<(), (StatusCode, String)> +where + S: Stream>, + E: Into, +{ + // TODO: Check if file is already a car file + let path = "something.car"; + + async { + // Convert the stream into an `AsyncRead`. + let body_with_io_error = stream.map_err(|err| io::Error::new(io::ErrorKind::Other, err)); + let body_reader = StreamReader::new(body_with_io_error); + futures::pin_mut!(body_reader); + + // Stream the body to the Blockstore + let mut block_store = Blockstore::new(); + block_store.read(body_reader).await.unwrap(); + + // Create the file. `File` implements `AsyncWrite`. + let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); + let mut file = BufWriter::new(File::create(path).await?); + + // Copy the body into the file. + // Uncomment this for error. + // block_store.write(&mut file).await.unwrap(); + + Ok::<_, io::Error>(()) + } + .await + .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string())) +} diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index ea1213761..87a8990d7 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -37,7 +37,7 @@ pub async fn start_rpc_server( ) -> Result { let server = Server::builder().build(listen_addr).await?; - let module = create_module(state.clone()); + let module = create_module(state); let server_handle = server.start(module); Ok(server_handle) diff --git a/storage/mater/src/stores/blockstore.rs b/storage/mater/src/stores/blockstore.rs index f8a5ce56a..69e6a29c1 100644 --- a/storage/mater/src/stores/blockstore.rs +++ b/storage/mater/src/stores/blockstore.rs @@ -85,7 +85,7 @@ impl Blockstore { /// converting the contents into a CARv2 file. pub async fn read(&mut self, reader: R) -> Result<(), Error> where - R: AsyncRead + Unpin + Send, + R: AsyncRead + Unpin, { let chunks = ReaderStream::with_capacity(reader, self.chunk_size); diff --git a/storage/mater/src/unixfs/mod.rs b/storage/mater/src/unixfs/mod.rs index 963701c02..1a84cfa66 100644 --- a/storage/mater/src/unixfs/mod.rs +++ b/storage/mater/src/unixfs/mod.rs @@ -253,7 +253,7 @@ pub(crate) fn stream_balanced_tree( width: usize, ) -> impl Stream> where - I: Stream> + Send, + I: Stream>, { try_stream! { let mut tree: VecDeque> = VecDeque::new(); From 92279f2f3f9e2855d5d031fee181e8aabef704f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 28 Jun 2024 12:58:34 +0200 Subject: [PATCH 02/27] fix: taplo --- Cargo.toml | 4 +--- cli/polka-storage-provider/Cargo.toml | 12 ++++-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0b2b95ac7..dbd2311b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,9 +137,7 @@ substrate-prometheus-endpoint = { git = "https://github.com/paritytech/polkadot- # Polkadot pallet-xcm = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", default-features = false } -polkadot-cli = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", features = [ - "rococo-native", -] } +polkadot-cli = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", features = ["rococo-native"] } polkadot-parachain-primitives = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", default-features = false } polkadot-primitives = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0" } polkadot-runtime-common = { git = "https://github.com/paritytech/polkadot-sdk", tag = "polkadot-v1.13.0", default-features = false } diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index b9c9cf727..f3450f453 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -9,18 +9,13 @@ version = "0.1.0" [dependencies] # TODO(no-ref,@cernicc,28/06/2024): Move to workkspacecs -mater = { workspace = true } async-trait = { workspace = true } -futures = "0.3.30" axum = { version = "0.7.5", features = ["multipart"] } -tokio-util = { version = "0.7.11" } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } -jsonrpsee = { workspace = true, features = [ - "http-client", - "server", - "ws-client", -] } +futures = "0.3.30" +jsonrpsee = { workspace = true, features = ["http-client", "server", "ws-client"] } +mater = { workspace = true } sc-cli = { workspace = true } sealed = { workspace = true } serde = { workspace = true } @@ -29,6 +24,7 @@ subxt = { workspace = true } subxt-signer = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } +tokio-util = { version = "0.7.11" } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter"] } url = { workspace = true } From fe2f5cfa6a8d2d07b4b475ad7b76d6f6350fa8ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 28 Jun 2024 18:36:27 +0200 Subject: [PATCH 03/27] feat: wip --- Cargo.lock | 13 ++++ cli/polka-storage-provider/Cargo.toml | 2 +- .../src/commands/run.rs | 63 ++++++------------- storage/mater/benches/benchmark.rs | 5 +- storage/mater/src/stores/filestore.rs | 26 +++----- 5 files changed, 48 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bac2a3cae..00e5bbc58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -845,6 +845,7 @@ checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" dependencies = [ "async-trait", "axum-core", + "axum-macros", "bytes", "futures-util", "http 1.1.0", @@ -893,6 +894,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00c055ee2d014ae5981ce1016374e8213682aa14d9bf40e48ab48b5f3ef20eaa" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "backtrace" version = "0.3.72" diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index f3450f453..2f8c0b4c6 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -10,7 +10,7 @@ version = "0.1.0" [dependencies] # TODO(no-ref,@cernicc,28/06/2024): Move to workkspacecs async-trait = { workspace = true } -axum = { version = "0.7.5", features = ["multipart"] } +axum = { version = "0.7.5", features = ["macros", "multipart"] } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } futures = "0.3.30" diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 0ee482345..d5c68031f 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -1,17 +1,16 @@ use std::{io, net::SocketAddr, sync::Arc}; use axum::{ - body::Bytes, + debug_handler, extract::{Path, Request, State}, - http::{Error, StatusCode}, - response::IntoResponse, + http::StatusCode, routing::{get, post}, - BoxError, Router, + Router, }; use chrono::Utc; use clap::Parser; -use futures::{Stream, TryStreamExt}; -use mater::Blockstore; +use futures::TryStreamExt; +use mater::{create_filestore, Config}; use tokio::{fs::File, io::BufWriter}; use tokio_util::io::StreamReader; use tracing::info; @@ -88,14 +87,25 @@ async fn shutdown_signal() { // TODO(no-ref,@cernicc,28/06/2024): Move somewhere else // TODO(no-ref,@cernicc,28/06/2024): Handle response // TODO(no-ref,@cernicc,28/06/2024): Better error handling +#[debug_handler] async fn upload( State(state): State>, request: Request, ) -> Result<(), (StatusCode, String)> { - dbg!("Uploading file"); - stream_to_file(request.into_body().into_data_stream()) - .await - .unwrap(); + // Body stream and reader + let body_data_stream = request.into_body().into_data_stream(); + let body_with_io_error = + body_data_stream.map_err(|err| io::Error::new(io::ErrorKind::Other, err)); + let body_reader = StreamReader::new(body_with_io_error); + + // Destination file + let path = "something.car"; + let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); + let mut file = BufWriter::new(File::create(path).await.unwrap()); + + // Stream the body, convert it to car and write it to the file + create_filestore(body_reader, &mut file, Config::default()).await; + Ok(()) } @@ -113,36 +123,3 @@ fn configure_router(state: Arc) -> Router { .route("/download/:cid", get(download)) .with_state(state) } - -// Save a `Stream` to a file -async fn stream_to_file(stream: S) -> Result<(), (StatusCode, String)> -where - S: Stream>, - E: Into, -{ - // TODO: Check if file is already a car file - let path = "something.car"; - - async { - // Convert the stream into an `AsyncRead`. - let body_with_io_error = stream.map_err(|err| io::Error::new(io::ErrorKind::Other, err)); - let body_reader = StreamReader::new(body_with_io_error); - futures::pin_mut!(body_reader); - - // Stream the body to the Blockstore - let mut block_store = Blockstore::new(); - block_store.read(body_reader).await.unwrap(); - - // Create the file. `File` implements `AsyncWrite`. - let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); - let mut file = BufWriter::new(File::create(path).await?); - - // Copy the body into the file. - // Uncomment this for error. - // block_store.write(&mut file).await.unwrap(); - - Ok::<_, io::Error>(()) - } - .await - .map_err(|err| (StatusCode::INTERNAL_SERVER_ERROR, err.to_string())) -} diff --git a/storage/mater/benches/benchmark.rs b/storage/mater/benches/benchmark.rs index 8ec3f4c97..6b55e4954 100644 --- a/storage/mater/benches/benchmark.rs +++ b/storage/mater/benches/benchmark.rs @@ -195,7 +195,10 @@ fn prepare_source_file(content: &[u8]) -> (TempDir, PathBuf) { /// Create a filestore. This function is benchmarked. async fn create_filestore_benched(source: &Path, target: &Path) { - create_filestore(source, target, Config::default()) + let source_file = File::open(source).await.unwrap(); + let output_file = File::create(target).await.unwrap(); + + create_filestore(source_file, output_file, Config::default()) .await .unwrap(); } diff --git a/storage/mater/src/stores/filestore.rs b/storage/mater/src/stores/filestore.rs index 1a97640f1..fbc8f57fe 100644 --- a/storage/mater/src/stores/filestore.rs +++ b/storage/mater/src/stores/filestore.rs @@ -1,10 +1,5 @@ -use std::path::Path; - use sha2::{Digest, Sha256}; -use tokio::{ - fs::File, - io::{AsyncRead, AsyncSeek, AsyncSeekExt, AsyncWrite}, -}; +use tokio::io::{AsyncRead, AsyncSeek, AsyncSeekExt, AsyncWrite}; use tokio_stream::StreamExt; use tokio_util::io::ReaderStream; @@ -21,7 +16,7 @@ async fn balanced_import( tree_width: usize, ) -> Result<(), Error> where - Src: AsyncRead + Unpin + Send, + Src: AsyncRead + Unpin, Out: AsyncWrite + AsyncSeek + Unpin, { let chunker = ReaderStream::with_capacity(&mut source, chunk_size); @@ -83,25 +78,21 @@ where Ok(()) } -/// Convert a `source` file into a CARv2 file and write it to `output`. +/// Convert a `source` stream into a CARv2 file and write it to `output` stream. pub async fn create_filestore( source: Src, output: Out, config: Config, ) -> Result<(), Error> where - Src: AsRef, - Out: AsRef, + Src: AsyncRead + Unpin, + Out: AsyncWrite + AsyncSeek + Unpin, { match config { Config::Balanced { chunk_size, tree_width, - } => { - let source_file = File::open(source).await?; - let output_file = File::create(output).await?; - balanced_import(source_file, output_file, chunk_size, tree_width).await - } + } => balanced_import(source, output, chunk_size, tree_width).await, } } @@ -110,6 +101,7 @@ mod test { use std::path::Path; use tempfile::tempdir; + use tokio::fs::File; use crate::{ stores::{filestore::create_filestore, Config}, @@ -124,7 +116,9 @@ mod test { let temp_dir = tempdir().unwrap(); let temp_path = temp_dir.path().join("lorem.car"); - create_filestore(original, &temp_path, Config::default()) + let source_file = File::open(original).await.unwrap(); + let output_file = File::create(&temp_path).await.unwrap(); + create_filestore(source_file, output_file, Config::default()) .await .unwrap(); From 4ddade0a9af32cf5f2a30b7ff72003119f9e3076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 28 Jun 2024 18:37:46 +0200 Subject: [PATCH 04/27] fix: remove mut --- cli/polka-storage-provider/src/commands/run.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index d5c68031f..c9157d29a 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -101,10 +101,10 @@ async fn upload( // Destination file let path = "something.car"; let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); - let mut file = BufWriter::new(File::create(path).await.unwrap()); + let file = BufWriter::new(File::create(path).await.unwrap()); // Stream the body, convert it to car and write it to the file - create_filestore(body_reader, &mut file, Config::default()).await; + create_filestore(body_reader, file, Config::default()).await; Ok(()) } From 95d4ffdcc0ebc714158cd84dcc0fb94ee6799044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Tue, 2 Jul 2024 10:52:41 +0200 Subject: [PATCH 05/27] fix: filestore future Send --- .../src/commands/run.rs | 17 ++++++---- storage/mater/src/lib.rs | 15 ++++++++- storage/mater/src/stores/filestore.rs | 28 ++++++++-------- storage/mater/src/v1/writer.rs | 7 ++-- storage/mater/src/v2/index.rs | 16 ++++------ storage/mater/src/v2/mod.rs | 5 ++- storage/mater/src/v2/writer.rs | 32 ++++++++++++------- 7 files changed, 72 insertions(+), 48 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index c9157d29a..13fdcc15b 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -98,13 +98,16 @@ async fn upload( body_data_stream.map_err(|err| io::Error::new(io::ErrorKind::Other, err)); let body_reader = StreamReader::new(body_with_io_error); - // Destination file - let path = "something.car"; - let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); - let file = BufWriter::new(File::create(path).await.unwrap()); - - // Stream the body, convert it to car and write it to the file - create_filestore(body_reader, file, Config::default()).await; + // Stream the body, convert it to car and write it to the file] + // TODO: Remove spawn. Currently used only to check if the future is Send + tokio::spawn(async move { + // Destination file + let path = "something.car"; + let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); + let file = Box::new(BufWriter::new(File::create(path).await.unwrap())); + + create_filestore(body_reader, file, Config::default()).await; + }); Ok(()) } diff --git a/storage/mater/src/lib.rs b/storage/mater/src/lib.rs index 175134cd5..cb8752adb 100644 --- a/storage/mater/src/lib.rs +++ b/storage/mater/src/lib.rs @@ -16,9 +16,11 @@ mod unixfs; mod v1; mod v2; -// We need to expose this because `read_block` returns `(Cid, Vec)`. +use integer_encoding::VarInt; +// We need to re-expose this because `read_block` returns `(Cid, Vec)`. pub use ipld_core::cid::Cid; pub use stores::{create_filestore, Blockstore, Config}; +use tokio::io::{AsyncWrite, AsyncWriteExt}; pub use v1::{Header as CarV1Header, Reader as CarV1Reader, Writer as CarV1Writer}; pub use v2::{ Characteristics, Header as CarV2Header, Index, IndexEntry, IndexSorted, MultihashIndexSorted, @@ -102,6 +104,17 @@ pub enum Error { DagPbError(#[from] ipld_dagpb::Error), } +pub(crate) async fn write_varint_async(writer: &mut W, n: VI) -> Result +where + W: AsyncWrite + Unpin, + VI: VarInt, +{ + let mut buf = [0 as u8; 10]; + let b = n.encode_var(&mut buf); + writer.write_all(&buf[0..b]).await?; + Ok(b) +} + #[cfg(test)] pub(crate) mod test_utils { /// Check if two given slices are equal. diff --git a/storage/mater/src/stores/filestore.rs b/storage/mater/src/stores/filestore.rs index fbc8f57fe..03024ee97 100644 --- a/storage/mater/src/stores/filestore.rs +++ b/storage/mater/src/stores/filestore.rs @@ -1,16 +1,16 @@ +use futures::stream::StreamExt; use sha2::{Digest, Sha256}; use tokio::io::{AsyncRead, AsyncSeek, AsyncSeekExt, AsyncWrite}; -use tokio_stream::StreamExt; use tokio_util::io::ReaderStream; use super::Config; use crate::{ - multicodec::SHA_256_CODE, unixfs::stream_balanced_tree, CarV1Header, CarV2Header, CarV2Writer, + multicodec::SHA_256_CODE, unixfs::stream_balanced_tree, v1, v2, CarV1Header, CarV2Header, Error, Index, IndexEntry, MultihashIndexSorted, SingleWidthIndex, }; async fn balanced_import( - mut source: Src, + source: Src, mut output: Out, chunk_size: usize, tree_width: usize, @@ -19,20 +19,18 @@ where Src: AsyncRead + Unpin, Out: AsyncWrite + AsyncSeek + Unpin, { - let chunker = ReaderStream::with_capacity(&mut source, chunk_size); - let nodes = stream_balanced_tree(chunker, tree_width); + let chunker = ReaderStream::with_capacity(source, chunk_size); + let nodes = stream_balanced_tree(chunker, tree_width).peekable(); tokio::pin!(nodes); - let mut nodes = nodes.peekable(); - let mut writer = CarV2Writer::new(&mut output); let mut position = 0; let placeholder_header = CarV2Header::default(); - position += writer.write_header(&placeholder_header).await?; + position += v2::write_header(&mut output, &placeholder_header).await?; let car_v1_start = position; let placeholder_header_v1 = CarV1Header::default(); - position += writer.write_v1_header(&placeholder_header_v1).await?; + position += v1::write_header(&mut output, &placeholder_header_v1).await?; let mut root = None; let mut entries = vec![]; @@ -41,9 +39,9 @@ where let digest = node_cid.hash().digest().to_owned(); let entry = IndexEntry::new(digest, (position - car_v1_start) as u64); entries.push(entry); - position += writer.write_block(&node_cid, &node_bytes).await?; + position += v1::write_block(&mut output, &node_cid, &node_bytes).await?; - if nodes.peek().await.is_none() { + if nodes.as_mut().peek().await.is_none() { root = Some(node_cid); } } @@ -59,21 +57,21 @@ where SHA_256_CODE, single_width_index.into(), )); - writer.write_index(&index).await?; + v2::write_index(&mut output, &index).await?; // Go back to the beginning of the file - writer.get_inner_mut().rewind().await?; + output.rewind().await?; let header = CarV2Header::new( false, (car_v1_start) as u64, (index_offset - car_v1_start) as u64, (index_offset) as u64, ); - writer.write_header(&header).await?; + v2::write_header(&mut output, &header).await?; // If the length of the roots doesn't match the previous one, you WILL OVERWRITE parts of the file let header_v1 = CarV1Header::new(vec![root]); - writer.write_v1_header(&header_v1).await?; + v1::write_header(&mut output, &header_v1).await?; Ok(()) } diff --git a/storage/mater/src/v1/writer.rs b/storage/mater/src/v1/writer.rs index 0a06429f2..c7c7f3561 100644 --- a/storage/mater/src/v1/writer.rs +++ b/storage/mater/src/v1/writer.rs @@ -1,10 +1,9 @@ -use integer_encoding::VarIntAsyncWriter; use ipld_core::{cid::Cid, codec::Codec}; use serde_ipld_dagcbor::codec::DagCborCodec; use tokio::io::{AsyncWrite, AsyncWriteExt}; pub use crate::v1::Header; -use crate::Error; +use crate::{write_varint_async, Error}; /// Write [`crate::v1::Header`] to the provider writer. pub(crate) async fn write_header(writer: &mut W, header: &Header) -> Result @@ -12,7 +11,7 @@ where W: AsyncWrite + Unpin, { let encoded_header = DagCborCodec::encode_to_vec(header)?; - let varint_len = writer.write_varint_async(encoded_header.len()).await?; + let varint_len = write_varint_async(writer, encoded_header.len()).await?; writer.write_all(&encoded_header).await?; Ok(varint_len + encoded_header.len()) } @@ -32,7 +31,7 @@ where let data = block.as_ref(); let len = cid.encoded_len() + data.len(); - let varint_len = writer.write_varint_async(len).await?; + let varint_len = write_varint_async(writer, len).await?; // This allocation can probably be spared writer.write_all(&cid.to_bytes()).await?; writer.write_all(block.as_ref()).await?; diff --git a/storage/mater/src/v2/index.rs b/storage/mater/src/v2/index.rs index 000d82458..71671b8e1 100644 --- a/storage/mater/src/v2/index.rs +++ b/storage/mater/src/v2/index.rs @@ -1,9 +1,9 @@ use std::{collections::BTreeMap, mem::size_of}; -use integer_encoding::{VarIntAsyncReader, VarIntAsyncWriter}; +use integer_encoding::VarIntAsyncReader; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; -use crate::Error; +use crate::{write_varint_async, Error}; /// `IndexSorted` code format value, as defined in the /// [specification](https://ipld.io/specs/transport/car/carv2/#format-0x0400-indexsorted). @@ -175,21 +175,19 @@ impl Index { } } -pub(crate) async fn write_index(mut writer: W, index: &Index) -> Result +pub async fn write_index(writer: &mut W, index: &Index) -> Result where W: AsyncWrite + Unpin, { let mut written_bytes = 0; match index { Index::IndexSorted(index) => { - written_bytes += writer.write_varint_async(INDEX_SORTED_CODE).await?; - written_bytes += write_index_sorted(&mut writer, index).await?; + written_bytes += write_varint_async(writer, INDEX_SORTED_CODE).await?; + written_bytes += write_index_sorted(writer, index).await?; } Index::MultihashIndexSorted(index) => { - written_bytes += writer - .write_varint_async(MULTIHASH_INDEX_SORTED_CODE) - .await?; - written_bytes += write_multihash_index_sorted(&mut writer, index).await?; + written_bytes += write_varint_async(writer, MULTIHASH_INDEX_SORTED_CODE).await?; + written_bytes += write_multihash_index_sorted(writer, index).await?; } } Ok(written_bytes) diff --git a/storage/mater/src/v2/mod.rs b/storage/mater/src/v2/mod.rs index 61c17b92a..5b38cb4d0 100644 --- a/storage/mater/src/v2/mod.rs +++ b/storage/mater/src/v2/mod.rs @@ -3,8 +3,11 @@ mod reader; mod writer; use bitflags::bitflags; -pub use index::{Index, IndexEntry, IndexSorted, MultihashIndexSorted, SingleWidthIndex}; +pub use index::{ + write_index, Index, IndexEntry, IndexSorted, MultihashIndexSorted, SingleWidthIndex, +}; pub use reader::Reader; +pub(crate) use writer::write_header; pub use writer::Writer; /// The pragma for a CARv2. This is also a valid CARv1 header, with version 2 and no root CIDs. diff --git a/storage/mater/src/v2/writer.rs b/storage/mater/src/v2/writer.rs index 3622e7a6c..2b16616f6 100644 --- a/storage/mater/src/v2/writer.rs +++ b/storage/mater/src/v2/writer.rs @@ -27,17 +27,7 @@ where /// /// Returns the number of bytes written. pub async fn write_header(&mut self, header: &Header) -> Result { - self.writer.write_all(&PRAGMA).await?; - - let mut buffer = [0; 40]; - let mut handle = &mut buffer[..]; - WriteBytesExt::write_u128::(&mut handle, header.characteristics.bits())?; - WriteBytesExt::write_u64::(&mut handle, header.data_offset)?; - WriteBytesExt::write_u64::(&mut handle, header.data_size)?; - WriteBytesExt::write_u64::(&mut handle, header.index_offset)?; - - self.writer.write_all(&buffer).await?; - Ok(PRAGMA.len() + buffer.len()) + write_header(&mut self.writer, header).await } /// Write a [`crate::v1::Header`]. @@ -86,6 +76,26 @@ where } } +/// Write a [`Header`]. +/// +/// Returns the number of bytes written. +pub(crate) async fn write_header(writer: &mut W, header: &Header) -> Result +where + W: AsyncWrite + Unpin, +{ + writer.write_all(&PRAGMA).await?; + + let mut buffer = [0; 40]; + let mut handle = &mut buffer[..]; + WriteBytesExt::write_u128::(&mut handle, header.characteristics.bits())?; + WriteBytesExt::write_u64::(&mut handle, header.data_offset)?; + WriteBytesExt::write_u64::(&mut handle, header.data_size)?; + WriteBytesExt::write_u64::(&mut handle, header.index_offset)?; + + writer.write_all(&buffer).await?; + Ok(PRAGMA.len() + buffer.len()) +} + #[cfg(test)] mod tests { use std::{collections::BTreeMap, io::Cursor}; From e1193b23b41bf8ab8365dc258af961823045fb4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Wed, 3 Jul 2024 15:47:34 +0200 Subject: [PATCH 06/27] fix: mater future should be Send --- storage/mater/src/lib.rs | 14 +- storage/mater/src/utils.rs | 123 ++++++++++++++++++ storage/mater/src/v1/reader.rs | 7 +- storage/mater/src/v1/writer.rs | 6 +- storage/mater/src/v2/index.rs | 12 +- .../src/local_index_directory/rdb.rs | 4 +- 6 files changed, 139 insertions(+), 27 deletions(-) create mode 100644 storage/mater/src/utils.rs diff --git a/storage/mater/src/lib.rs b/storage/mater/src/lib.rs index cb8752adb..e31e9527f 100644 --- a/storage/mater/src/lib.rs +++ b/storage/mater/src/lib.rs @@ -13,14 +13,13 @@ mod multicodec; mod stores; mod unixfs; +mod utils; mod v1; mod v2; -use integer_encoding::VarInt; // We need to re-expose this because `read_block` returns `(Cid, Vec)`. pub use ipld_core::cid::Cid; pub use stores::{create_filestore, Blockstore, Config}; -use tokio::io::{AsyncWrite, AsyncWriteExt}; pub use v1::{Header as CarV1Header, Reader as CarV1Reader, Writer as CarV1Writer}; pub use v2::{ Characteristics, Header as CarV2Header, Index, IndexEntry, IndexSorted, MultihashIndexSorted, @@ -104,17 +103,6 @@ pub enum Error { DagPbError(#[from] ipld_dagpb::Error), } -pub(crate) async fn write_varint_async(writer: &mut W, n: VI) -> Result -where - W: AsyncWrite + Unpin, - VI: VarInt, -{ - let mut buf = [0 as u8; 10]; - let b = n.encode_var(&mut buf); - writer.write_all(&buf[0..b]).await?; - Ok(b) -} - #[cfg(test)] pub(crate) mod test_utils { /// Check if two given slices are equal. diff --git a/storage/mater/src/utils.rs b/storage/mater/src/utils.rs new file mode 100644 index 000000000..dc7c92db0 --- /dev/null +++ b/storage/mater/src/utils.rs @@ -0,0 +1,123 @@ +/// Utility functions for the mater crate. The contents were mostly borrowed +/// from the https://github.com/dermesser/integer-encoding-rs. +/// +/// The original issue why we needed to borrow the implantation of the reader +/// and writer is +/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/writer.rs#L20. +/// This specifies the Send bound as optional. The side effect of this choice is +/// that all futures using the writer or reader are non Send and there is no way +/// to make them Send. +/// +/// The second crate researched was +/// https://github.com/paritytech/unsigned-varint/tree/master. Issue with that +/// crate is that it only implements AsyncRead and AsyncWrite from the futures +/// crate and not tokio. For the future reference we could probably used +/// `unsigned-varint` with the tokio and use +/// https://docs.rs/tokio-util/latest/tokio_util/compat/trait.FuturesAsyncReadCompatExt.html +/// as the compatibility layer. +use std::{io, mem::size_of}; + +use integer_encoding::VarInt; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; + +/// Write a VarInt integer to an asynchronous writer. +/// +/// Borrowed from: +/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/writer.rs#L29 +pub(crate) async fn write_varint(writer: &mut W, n: VI) -> Result +where + W: AsyncWrite + Unpin, + VI: VarInt, +{ + let mut buf = [0 as u8; 10]; + let b = n.encode_var(&mut buf); + writer.write_all(&buf[0..b]).await?; + Ok(b) +} + +/// Returns either the decoded integer, or an error. +/// +/// In general, this always reads a whole varint. If the encoded varint's value +/// is bigger than the valid value range of `VI`, then the value is truncated. +/// +/// On EOF, an io::Error with io::ErrorKind::UnexpectedEof is returned. +/// +/// Borrowed from: +/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/reader.rs#L70 +pub(crate) async fn read_varint(reader: &mut R) -> Result +where + R: AsyncRead + Unpin, + VI: VarInt, +{ + let mut buf = [0_u8; 1]; + let mut p = VarIntProcessor::new::(); + + while !p.finished() { + let read = reader.read(&mut buf).await?; + + // EOF + if read == 0 && p.i == 0 { + return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "Reached EOF")); + } + if read == 0 { + break; + } + + p.push(buf[0])?; + } + + p.decode() + .ok_or_else(|| io::Error::new(io::ErrorKind::UnexpectedEof, "Reached EOF")) +} + +/// Most-significant byte, == 0x80 +const MSB: u8 = 0b1000_0000; + +/// VarIntProcessor encapsulates the logic for decoding a VarInt byte-by-byte. +/// +/// Borrowed from +/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/reader.rs#L35 +#[derive(Default)] +struct VarIntProcessor { + buf: [u8; 10], + maxsize: usize, + i: usize, +} + +impl VarIntProcessor { + fn new() -> VarIntProcessor { + VarIntProcessor { + maxsize: VI::varint_max_size(), + ..VarIntProcessor::default() + } + } + fn push(&mut self, b: u8) -> Result<(), io::Error> { + if self.i >= self.maxsize { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Unterminated varint", + )); + } + self.buf[self.i] = b; + self.i += 1; + Ok(()) + } + fn finished(&self) -> bool { + self.i > 0 && (self.buf[self.i - 1] & MSB == 0) + } + fn decode(&self) -> Option { + Some(VI::decode_var(&self.buf[0..self.i])?.0) + } +} + +/// Borrowed from +/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/varint.rs#L69 +pub(crate) trait VarIntMaxSize { + fn varint_max_size() -> usize; +} + +impl VarIntMaxSize for VI { + fn varint_max_size() -> usize { + (size_of::() * 8 + 7) / 7 + } +} diff --git a/storage/mater/src/v1/reader.rs b/storage/mater/src/v1/reader.rs index cffa3ff96..7eafbb55c 100644 --- a/storage/mater/src/v1/reader.rs +++ b/storage/mater/src/v1/reader.rs @@ -1,17 +1,16 @@ use std::io::Cursor; -use integer_encoding::VarIntAsyncReader; use ipld_core::{cid::Cid, codec::Codec}; use serde_ipld_dagcbor::codec::DagCborCodec; use tokio::io::{AsyncRead, AsyncReadExt}; -use crate::{v1::Header, v2::PRAGMA, Error}; +use crate::{utils::read_varint, v1::Header, v2::PRAGMA, Error}; pub(crate) async fn read_header(mut reader: R) -> Result where R: AsyncRead + Unpin, { - let header_length: usize = reader.read_varint_async().await?; + let header_length: usize = read_varint(&mut reader).await?; let mut header_buffer = vec![0; header_length]; reader.read_exact(&mut header_buffer).await?; @@ -46,7 +45,7 @@ pub(crate) async fn read_block(mut reader: R) -> Result<(Cid, Vec), Error where R: AsyncRead + Unpin, { - let full_block_length: usize = reader.read_varint_async().await?; + let full_block_length: usize = read_varint(&mut reader).await?; let mut full_block_buffer = vec![0; full_block_length]; reader.read_exact(&mut full_block_buffer).await?; diff --git a/storage/mater/src/v1/writer.rs b/storage/mater/src/v1/writer.rs index c7c7f3561..b4fe4826e 100644 --- a/storage/mater/src/v1/writer.rs +++ b/storage/mater/src/v1/writer.rs @@ -3,7 +3,7 @@ use serde_ipld_dagcbor::codec::DagCborCodec; use tokio::io::{AsyncWrite, AsyncWriteExt}; pub use crate::v1::Header; -use crate::{write_varint_async, Error}; +use crate::{utils::write_varint, Error}; /// Write [`crate::v1::Header`] to the provider writer. pub(crate) async fn write_header(writer: &mut W, header: &Header) -> Result @@ -11,7 +11,7 @@ where W: AsyncWrite + Unpin, { let encoded_header = DagCborCodec::encode_to_vec(header)?; - let varint_len = write_varint_async(writer, encoded_header.len()).await?; + let varint_len = write_varint(writer, encoded_header.len()).await?; writer.write_all(&encoded_header).await?; Ok(varint_len + encoded_header.len()) } @@ -31,7 +31,7 @@ where let data = block.as_ref(); let len = cid.encoded_len() + data.len(); - let varint_len = write_varint_async(writer, len).await?; + let varint_len = write_varint(writer, len).await?; // This allocation can probably be spared writer.write_all(&cid.to_bytes()).await?; writer.write_all(block.as_ref()).await?; diff --git a/storage/mater/src/v2/index.rs b/storage/mater/src/v2/index.rs index 71671b8e1..28c613b0b 100644 --- a/storage/mater/src/v2/index.rs +++ b/storage/mater/src/v2/index.rs @@ -1,9 +1,11 @@ use std::{collections::BTreeMap, mem::size_of}; -use integer_encoding::VarIntAsyncReader; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; -use crate::{write_varint_async, Error}; +use crate::{ + utils::{read_varint, write_varint}, + Error, +}; /// `IndexSorted` code format value, as defined in the /// [specification](https://ipld.io/specs/transport/car/carv2/#format-0x0400-indexsorted). @@ -182,11 +184,11 @@ where let mut written_bytes = 0; match index { Index::IndexSorted(index) => { - written_bytes += write_varint_async(writer, INDEX_SORTED_CODE).await?; + written_bytes += write_varint(writer, INDEX_SORTED_CODE).await?; written_bytes += write_index_sorted(writer, index).await?; } Index::MultihashIndexSorted(index) => { - written_bytes += write_varint_async(writer, MULTIHASH_INDEX_SORTED_CODE).await?; + written_bytes += write_varint(writer, MULTIHASH_INDEX_SORTED_CODE).await?; written_bytes += write_multihash_index_sorted(writer, index).await?; } } @@ -260,7 +262,7 @@ pub(crate) async fn read_index(mut reader: R) -> Result where R: AsyncRead + Unpin, { - let index_type: u64 = reader.read_varint_async().await?; + let index_type: u64 = read_varint(&mut reader).await?; return match index_type { INDEX_SORTED_CODE => Ok(Index::IndexSorted(read_index_sorted(&mut reader).await?)), MULTIHASH_INDEX_SORTED_CODE => Ok(Index::MultihashIndexSorted( diff --git a/storage/polka-index/src/local_index_directory/rdb.rs b/storage/polka-index/src/local_index_directory/rdb.rs index f1fa9a411..5d2046ee3 100644 --- a/storage/polka-index/src/local_index_directory/rdb.rs +++ b/storage/polka-index/src/local_index_directory/rdb.rs @@ -1361,7 +1361,7 @@ mod test { db.cf_handle(MULTIHASH_TO_PIECE_CID_CF), rocksdb::IteratorMode::Start, ) - .flat_map(std::convert::identity) + .flatten() .collect(); assert_eq!(indexes.len(), 2); @@ -1377,7 +1377,7 @@ mod test { db.cf_handle(MULTIHASH_TO_PIECE_CID_CF), rocksdb::IteratorMode::Start, ) - .flat_map(std::convert::identity) + .flatten() .collect(); assert!(indexes.is_empty()); From 68db3f17cf8821412e3cca5a5e2122d8e1bbb965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 11:05:20 +0200 Subject: [PATCH 07/27] feat: car file upload & download --- Cargo.lock | 1 + cli/polka-storage-provider/Cargo.toml | 1 + .../src/commands/run.rs | 137 ++++++++++-------- cli/polka-storage-provider/src/main.rs | 1 + cli/polka-storage-provider/src/rpc/server.rs | 19 ++- cli/polka-storage-provider/src/storage.rs | 40 +++++ storage/mater/src/stores/filestore.rs | 7 +- 7 files changed, 141 insertions(+), 65 deletions(-) create mode 100644 cli/polka-storage-provider/src/storage.rs diff --git a/Cargo.lock b/Cargo.lock index 00e5bbc58..9d517172e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8482,6 +8482,7 @@ dependencies = [ "serde_json", "subxt", "subxt-signer", + "tempfile", "thiserror", "tokio", "tokio-util", diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index 2f8c0b4c6..6674d67fd 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -22,6 +22,7 @@ serde = { workspace = true } serde_json = { workspace = true } subxt = { workspace = true } subxt-signer = { workspace = true } +tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-util = { version = "0.7.11" } diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 13fdcc15b..5b5ca2468 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -1,32 +1,31 @@ -use std::{io, net::SocketAddr, sync::Arc}; +use std::{io, net::SocketAddr, str::FromStr, sync::Arc}; use axum::{ - debug_handler, + body::Body, extract::{Path, Request, State}, http::StatusCode, + response::{IntoResponse, Response}, routing::{get, post}, Router, }; use chrono::Utc; use clap::Parser; use futures::TryStreamExt; -use mater::{create_filestore, Config}; -use tokio::{fs::File, io::BufWriter}; -use tokio_util::io::StreamReader; +use mater::Cid; +use tokio::{fs::File, signal, sync::Notify}; +use tokio_util::io::{ReaderStream, StreamReader}; use tracing::info; use url::Url; use crate::{ cli::CliError, rpc::server::{start_rpc_server, RpcServerState, RPC_SERVER_DEFAULT_BIND_ADDR}, + storage::{content_path, stream_contents_to_car}, substrate, }; const FULL_NODE_DEFAULT_RPC_ADDR: &str = "ws://127.0.0.1:9944"; -/// Directory where uploaded files are stored. -const UPLOADS_DIRECTORY: &str = "uploads"; - /// Command to start the storage provider. #[derive(Debug, Clone, Parser)] pub(crate) struct RunCommand { @@ -47,82 +46,102 @@ impl RunCommand { substrate_client, }); - // Start RPC server - let handle = start_rpc_server(state.clone(), self.listen_addr).await?; - info!("RPC server started at {}", self.listen_addr); - - // Upload endpoint - let router = configure_router(state); - // TODO(no-ref,@cernicc,28/06/2024): Listen on the same address that rpc listens on - let listener = tokio::net::TcpListener::bind("127.0.0.1:3000") - .await - .unwrap(); + // Notify setup for graceful shutdown + let shutdown = Arc::new(Notify::new()); - let _ = axum::serve(listener, router) - .with_graceful_shutdown(shutdown_signal()) - .await - .unwrap(); - - // Monitor shutdown - shutdown_signal().await; + // Listen for shutdown signal + let shutdown_clone = shutdown.clone(); + tokio::spawn(async move { + signal::ctrl_c().await.expect("failed to listen for event"); + shutdown_clone.notify_one(); + }); - // Stop the Server - let _ = handle.stop(); + // Start both servers + tokio::select! { + _ = start_rpc_server(state.clone(), self.listen_addr, shutdown.clone()) => { + info!("RPC server stopped"); + } + _ = start_upload_server(state.clone(), shutdown.clone()) => { + info!("Upload server stopped"); + } + } - // Wait for the server to stop - handle.stopped().await; - info!("RPC server stopped"); + info!("Storage provider stopped"); Ok(()) } } -// TODO(no-ref,@cernicc,28/06/2024): Handle shutdown better -async fn shutdown_signal() { - tokio::signal::ctrl_c() - .await - .expect("failed to install Ctrl+C handler"); -} - -// TODO(no-ref,@cernicc,28/06/2024): Move somewhere else -// TODO(no-ref,@cernicc,28/06/2024): Handle response -// TODO(no-ref,@cernicc,28/06/2024): Better error handling -#[debug_handler] async fn upload( - State(state): State>, + State(_state): State>, request: Request, -) -> Result<(), (StatusCode, String)> { +) -> Result { // Body stream and reader let body_data_stream = request.into_body().into_data_stream(); let body_with_io_error = body_data_stream.map_err(|err| io::Error::new(io::ErrorKind::Other, err)); let body_reader = StreamReader::new(body_with_io_error); - // Stream the body, convert it to car and write it to the file] - // TODO: Remove spawn. Currently used only to check if the future is Send - tokio::spawn(async move { - // Destination file - let path = "something.car"; - let path = std::path::Path::new(UPLOADS_DIRECTORY).join(path); - let file = Box::new(BufWriter::new(File::create(path).await.unwrap())); - - create_filestore(body_reader, file, Config::default()).await; - }); - - Ok(()) + let cid = stream_contents_to_car(body_reader).await.unwrap(); + Ok(cid.to_string()) } async fn download( - State(state): State>, + State(_state): State>, Path(cid): Path, -) -> Result<(), (StatusCode, String)> { - Ok(()) +) -> Result { + // Path to a CAR file + let Ok(cid) = Cid::from_str(&cid) else { + return Err((StatusCode::BAD_REQUEST, "cid incorrect format".to_string())); + }; + let path = content_path(cid); + + // Check if the file exists + if !path.exists() { + // Stream the file + return Err((StatusCode::NOT_FOUND, "File not found".to_string())); + } + + // Open car file + let Ok(file) = File::open(path).await else { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to open file".to_string(), + )); + }; + + // convert the `AsyncRead` into a `Stream` + let stream = ReaderStream::new(file); + // convert the `Stream` into the Body + let body = Body::from_stream(stream); + + // TODO(no-ref,@cernicc,03/07/2024): What should be the response headers? + Ok(body.into_response()) } -// TODO(no-ref,@cernicc,28/06/2024): Move somewhere else +// TODO(no-ref,@cernicc,28/06/2024): Move routing and handlers somewhere else +// TODO(no-ref,@cernicc,28/06/2024): Nicer error handling in handlers fn configure_router(state: Arc) -> Router { Router::new() .route("/upload", post(upload)) .route("/download/:cid", get(download)) .with_state(state) } + +async fn start_upload_server(state: Arc, shutdown: Arc) { + // Configure router + let router = configure_router(state); + + // TODO(no-ref,@cernicc,04/07/2024): handle error if the address is already + // in use. This should be done when both servers will listen on the same + // address + let address = "127.0.0.1:3000"; + let listener = tokio::net::TcpListener::bind(address).await.unwrap(); + + // Start server + info!("Upload server started at: {address}"); + axum::serve(listener, router) + .with_graceful_shutdown(async move { shutdown.notified().await }) + .await + .unwrap(); +} diff --git a/cli/polka-storage-provider/src/main.rs b/cli/polka-storage-provider/src/main.rs index 9aa390eec..a639e5f46 100644 --- a/cli/polka-storage-provider/src/main.rs +++ b/cli/polka-storage-provider/src/main.rs @@ -4,6 +4,7 @@ mod cli; pub(crate) mod commands; mod rpc; +mod storage; mod substrate; pub(crate) use cli::Cli; diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index 87a8990d7..5d6cd96b2 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -6,7 +6,7 @@ use std::{ use chrono::Utc; use jsonrpsee::{ - server::{Server, ServerHandle}, + server::Server, types::{ error::{INTERNAL_ERROR_CODE, INVALID_PARAMS_CODE}, ErrorObjectOwned, @@ -14,6 +14,8 @@ use jsonrpsee::{ RpcModule, }; use serde_json::Value; +use tokio::sync::Notify; +use tracing::info; use super::{ methods::{common::InfoRequest, register_async, wallet::WalletRequest}, @@ -34,13 +36,24 @@ pub struct RpcServerState { pub async fn start_rpc_server( state: Arc, listen_addr: SocketAddr, -) -> Result { + shutdown: Arc, +) -> Result<(), CliError> { let server = Server::builder().build(listen_addr).await?; let module = create_module(state); let server_handle = server.start(module); + info!("RPC server started at {}", listen_addr); - Ok(server_handle) + // Wait for shutdown signal + shutdown.notified().await; + + // Stop returns and error if the server has already been stopped + let _ = server_handle.stop(); + + // Wait for server to be stopped + server_handle.stopped().await; + + Ok(()) } /// Initialize [`RpcModule`] and register the handlers diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs new file mode 100644 index 000000000..0a197b8de --- /dev/null +++ b/cli/polka-storage-provider/src/storage.rs @@ -0,0 +1,40 @@ +use std::path::{Path, PathBuf}; + +use mater::{create_filestore, Cid, Config}; +use tempfile::tempdir; +use tokio::{ + fs::{self, File}, + io::{AsyncRead, BufWriter}, +}; + +/// Directory where uploaded files are stored. +const UPLOADS_DIRECTORY: &str = "uploads"; + +/// Reads bytes from the source and writes them to a CAR file. +pub async fn stream_contents_to_car(source: R) -> Result> +where + R: AsyncRead + Unpin, +{ + // Temp file which will be used to store the CAR file content. The temp + // director has a randomized name. + let temp_dir = tempdir()?; + let temp_file_path = temp_dir.path().join("temp.car"); + + // Stream the body from source to the temp file. + let file = File::create(&temp_file_path).await?; + let writer = BufWriter::new(file); + let cid = create_filestore(source, writer, Config::default()).await?; + + // If the file is successfully written, we can now move it to the final + // location. + let final_content_path = content_path(cid); + fs::rename(temp_file_path, final_content_path).await?; + + Ok(cid) +} + +/// Returns the path to the content with the specified CID. +pub fn content_path(cid: Cid) -> PathBuf { + let name = format!("{cid}.car"); + Path::new(UPLOADS_DIRECTORY).join(name) +} diff --git a/storage/mater/src/stores/filestore.rs b/storage/mater/src/stores/filestore.rs index 03024ee97..686fe8b68 100644 --- a/storage/mater/src/stores/filestore.rs +++ b/storage/mater/src/stores/filestore.rs @@ -1,4 +1,5 @@ use futures::stream::StreamExt; +use ipld_core::cid::Cid; use sha2::{Digest, Sha256}; use tokio::io::{AsyncRead, AsyncSeek, AsyncSeekExt, AsyncWrite}; use tokio_util::io::ReaderStream; @@ -14,7 +15,7 @@ async fn balanced_import( mut output: Out, chunk_size: usize, tree_width: usize, -) -> Result<(), Error> +) -> Result where Src: AsyncRead + Unpin, Out: AsyncWrite + AsyncSeek + Unpin, @@ -73,7 +74,7 @@ where let header_v1 = CarV1Header::new(vec![root]); v1::write_header(&mut output, &header_v1).await?; - Ok(()) + Ok(root) } /// Convert a `source` stream into a CARv2 file and write it to `output` stream. @@ -81,7 +82,7 @@ pub async fn create_filestore( source: Src, output: Out, config: Config, -) -> Result<(), Error> +) -> Result where Src: AsyncRead + Unpin, Out: AsyncWrite + AsyncSeek + Unpin, From 9b6e06d4a1b7580944eb3b127f17ad5ea53310fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 11:07:32 +0200 Subject: [PATCH 08/27] feat: add a question --- cli/polka-storage-provider/src/storage.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index 0a197b8de..8482d9491 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -8,6 +8,7 @@ use tokio::{ }; /// Directory where uploaded files are stored. +// TODO(no-ref,@cernicc,04/07/2024): Where should the uploads be stored? const UPLOADS_DIRECTORY: &str = "uploads"; /// Reads bytes from the source and writes them to a CAR file. From fde6fb8be83499672db16e900df3c858b6bbd812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 11:33:09 +0200 Subject: [PATCH 09/27] feat: move dependencies to the workspace --- Cargo.toml | 1 + cli/polka-storage-provider/Cargo.toml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f6f238349..23cca7156 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ substrate-wasm-builder = { git = "https://github.com/paritytech/polkadot-sdk", t async-channel = "2.3.1" async-stream = "0.3.5" async-trait = "0.1.80" +axum = "0.7.5" base64 = "0.22.1" bitflags = "2.5.0" blake2b_simd = { version = "1.0.2" } diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index 6674d67fd..86e3ea870 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -10,10 +10,10 @@ version = "0.1.0" [dependencies] # TODO(no-ref,@cernicc,28/06/2024): Move to workkspacecs async-trait = { workspace = true } -axum = { version = "0.7.5", features = ["macros", "multipart"] } +axum = { workspace = true, features = ["macros", "multipart"] } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } -futures = "0.3.30" +futures = { workspace = true } jsonrpsee = { workspace = true, features = ["http-client", "server", "ws-client"] } mater = { workspace = true } sc-cli = { workspace = true } @@ -25,7 +25,7 @@ subxt-signer = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } -tokio-util = { version = "0.7.11" } +tokio-util = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter"] } url = { workspace = true } From f9d28b3712383f2a0a1b7de12f66ade3b650a660 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 11:34:57 +0200 Subject: [PATCH 10/27] fix: remove comment --- cli/polka-storage-provider/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index 86e3ea870..b76cf2d52 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -8,7 +8,6 @@ repository.workspace = true version = "0.1.0" [dependencies] -# TODO(no-ref,@cernicc,28/06/2024): Move to workkspacecs async-trait = { workspace = true } axum = { workspace = true, features = ["macros", "multipart"] } chrono = { workspace = true, features = ["serde"] } From 097133cc73d0fe3d5e513dad1f185b0c53a069c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=CC=81=20Duarte?= Date: Thu, 4 Jul 2024 12:19:01 +0100 Subject: [PATCH 11/27] fix(mater): partially revert filestore changes --- cli/polka-storage-provider/Cargo.toml | 8 +++++-- storage/mater/src/stores/filestore.rs | 17 +++++++------- storage/mater/src/v2/mod.rs | 5 +---- storage/mater/src/v2/writer.rs | 32 +++++++++------------------ 4 files changed, 27 insertions(+), 35 deletions(-) diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index b76cf2d52..74c18e027 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -13,7 +13,11 @@ axum = { workspace = true, features = ["macros", "multipart"] } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } futures = { workspace = true } -jsonrpsee = { workspace = true, features = ["http-client", "server", "ws-client"] } +jsonrpsee = { workspace = true, features = [ + "http-client", + "server", + "ws-client", +] } mater = { workspace = true } sc-cli = { workspace = true } sealed = { workspace = true } @@ -28,7 +32,7 @@ tokio-util = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter"] } url = { workspace = true } -uuid = { workspace = true } +uuid = { workspace = true, features = ["v4"] } [lints] workspace = true diff --git a/storage/mater/src/stores/filestore.rs b/storage/mater/src/stores/filestore.rs index 686fe8b68..59de91ac2 100644 --- a/storage/mater/src/stores/filestore.rs +++ b/storage/mater/src/stores/filestore.rs @@ -6,7 +6,7 @@ use tokio_util::io::ReaderStream; use super::Config; use crate::{ - multicodec::SHA_256_CODE, unixfs::stream_balanced_tree, v1, v2, CarV1Header, CarV2Header, + multicodec::SHA_256_CODE, unixfs::stream_balanced_tree, CarV1Header, CarV2Header, CarV2Writer, Error, Index, IndexEntry, MultihashIndexSorted, SingleWidthIndex, }; @@ -24,14 +24,15 @@ where let nodes = stream_balanced_tree(chunker, tree_width).peekable(); tokio::pin!(nodes); + let mut writer = CarV2Writer::new(&mut output); let mut position = 0; let placeholder_header = CarV2Header::default(); - position += v2::write_header(&mut output, &placeholder_header).await?; + position += writer.write_header(&placeholder_header).await?; let car_v1_start = position; let placeholder_header_v1 = CarV1Header::default(); - position += v1::write_header(&mut output, &placeholder_header_v1).await?; + position += writer.write_v1_header(&placeholder_header_v1).await?; let mut root = None; let mut entries = vec![]; @@ -40,7 +41,7 @@ where let digest = node_cid.hash().digest().to_owned(); let entry = IndexEntry::new(digest, (position - car_v1_start) as u64); entries.push(entry); - position += v1::write_block(&mut output, &node_cid, &node_bytes).await?; + position += writer.write_block(&node_cid, &node_bytes).await?; if nodes.as_mut().peek().await.is_none() { root = Some(node_cid); @@ -58,21 +59,21 @@ where SHA_256_CODE, single_width_index.into(), )); - v2::write_index(&mut output, &index).await?; + writer.write_index(&index).await?; // Go back to the beginning of the file - output.rewind().await?; + writer.get_inner_mut().rewind().await?; let header = CarV2Header::new( false, (car_v1_start) as u64, (index_offset - car_v1_start) as u64, (index_offset) as u64, ); - v2::write_header(&mut output, &header).await?; + writer.write_header(&header).await?; // If the length of the roots doesn't match the previous one, you WILL OVERWRITE parts of the file let header_v1 = CarV1Header::new(vec![root]); - v1::write_header(&mut output, &header_v1).await?; + writer.write_v1_header(&header_v1).await?; Ok(root) } diff --git a/storage/mater/src/v2/mod.rs b/storage/mater/src/v2/mod.rs index 5b38cb4d0..61c17b92a 100644 --- a/storage/mater/src/v2/mod.rs +++ b/storage/mater/src/v2/mod.rs @@ -3,11 +3,8 @@ mod reader; mod writer; use bitflags::bitflags; -pub use index::{ - write_index, Index, IndexEntry, IndexSorted, MultihashIndexSorted, SingleWidthIndex, -}; +pub use index::{Index, IndexEntry, IndexSorted, MultihashIndexSorted, SingleWidthIndex}; pub use reader::Reader; -pub(crate) use writer::write_header; pub use writer::Writer; /// The pragma for a CARv2. This is also a valid CARv1 header, with version 2 and no root CIDs. diff --git a/storage/mater/src/v2/writer.rs b/storage/mater/src/v2/writer.rs index 2b16616f6..3622e7a6c 100644 --- a/storage/mater/src/v2/writer.rs +++ b/storage/mater/src/v2/writer.rs @@ -27,7 +27,17 @@ where /// /// Returns the number of bytes written. pub async fn write_header(&mut self, header: &Header) -> Result { - write_header(&mut self.writer, header).await + self.writer.write_all(&PRAGMA).await?; + + let mut buffer = [0; 40]; + let mut handle = &mut buffer[..]; + WriteBytesExt::write_u128::(&mut handle, header.characteristics.bits())?; + WriteBytesExt::write_u64::(&mut handle, header.data_offset)?; + WriteBytesExt::write_u64::(&mut handle, header.data_size)?; + WriteBytesExt::write_u64::(&mut handle, header.index_offset)?; + + self.writer.write_all(&buffer).await?; + Ok(PRAGMA.len() + buffer.len()) } /// Write a [`crate::v1::Header`]. @@ -76,26 +86,6 @@ where } } -/// Write a [`Header`]. -/// -/// Returns the number of bytes written. -pub(crate) async fn write_header(writer: &mut W, header: &Header) -> Result -where - W: AsyncWrite + Unpin, -{ - writer.write_all(&PRAGMA).await?; - - let mut buffer = [0; 40]; - let mut handle = &mut buffer[..]; - WriteBytesExt::write_u128::(&mut handle, header.characteristics.bits())?; - WriteBytesExt::write_u64::(&mut handle, header.data_offset)?; - WriteBytesExt::write_u64::(&mut handle, header.data_size)?; - WriteBytesExt::write_u64::(&mut handle, header.index_offset)?; - - writer.write_all(&buffer).await?; - Ok(PRAGMA.len() + buffer.len()) -} - #[cfg(test)] mod tests { use std::{collections::BTreeMap, io::Cursor}; From d2894c70431cb11e692f5d08582fd041e0c43667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 13:27:32 +0200 Subject: [PATCH 12/27] fix: taplo fmt --- cli/polka-storage-provider/Cargo.toml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index 74c18e027..e3697136b 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -13,11 +13,7 @@ axum = { workspace = true, features = ["macros", "multipart"] } chrono = { workspace = true, features = ["serde"] } clap = { workspace = true, features = ["derive"] } futures = { workspace = true } -jsonrpsee = { workspace = true, features = [ - "http-client", - "server", - "ws-client", -] } +jsonrpsee = { workspace = true, features = ["http-client", "server", "ws-client"] } mater = { workspace = true } sc-cli = { workspace = true } sealed = { workspace = true } From 31bc9eff0f2c9be17438e1f5e0d85b1a6f37c351 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 14:31:22 +0200 Subject: [PATCH 13/27] feat: rename utils to async_varint --- storage/mater/src/{utils.rs => async_varint.rs} | 0 storage/mater/src/lib.rs | 2 +- storage/mater/src/v1/reader.rs | 2 +- storage/mater/src/v1/writer.rs | 2 +- storage/mater/src/v2/index.rs | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename storage/mater/src/{utils.rs => async_varint.rs} (100%) diff --git a/storage/mater/src/utils.rs b/storage/mater/src/async_varint.rs similarity index 100% rename from storage/mater/src/utils.rs rename to storage/mater/src/async_varint.rs diff --git a/storage/mater/src/lib.rs b/storage/mater/src/lib.rs index e31e9527f..b4a97292c 100644 --- a/storage/mater/src/lib.rs +++ b/storage/mater/src/lib.rs @@ -10,10 +10,10 @@ #![deny(rustdoc::private_intra_doc_links)] #![deny(unsafe_code)] +mod async_varint; mod multicodec; mod stores; mod unixfs; -mod utils; mod v1; mod v2; diff --git a/storage/mater/src/v1/reader.rs b/storage/mater/src/v1/reader.rs index 7eafbb55c..0e201cd00 100644 --- a/storage/mater/src/v1/reader.rs +++ b/storage/mater/src/v1/reader.rs @@ -4,7 +4,7 @@ use ipld_core::{cid::Cid, codec::Codec}; use serde_ipld_dagcbor::codec::DagCborCodec; use tokio::io::{AsyncRead, AsyncReadExt}; -use crate::{utils::read_varint, v1::Header, v2::PRAGMA, Error}; +use crate::{async_varint::read_varint, v1::Header, v2::PRAGMA, Error}; pub(crate) async fn read_header(mut reader: R) -> Result where diff --git a/storage/mater/src/v1/writer.rs b/storage/mater/src/v1/writer.rs index b4fe4826e..e78597f3a 100644 --- a/storage/mater/src/v1/writer.rs +++ b/storage/mater/src/v1/writer.rs @@ -3,7 +3,7 @@ use serde_ipld_dagcbor::codec::DagCborCodec; use tokio::io::{AsyncWrite, AsyncWriteExt}; pub use crate::v1::Header; -use crate::{utils::write_varint, Error}; +use crate::{async_varint::write_varint, Error}; /// Write [`crate::v1::Header`] to the provider writer. pub(crate) async fn write_header(writer: &mut W, header: &Header) -> Result diff --git a/storage/mater/src/v2/index.rs b/storage/mater/src/v2/index.rs index 28c613b0b..92822984b 100644 --- a/storage/mater/src/v2/index.rs +++ b/storage/mater/src/v2/index.rs @@ -3,7 +3,7 @@ use std::{collections::BTreeMap, mem::size_of}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use crate::{ - utils::{read_varint, write_varint}, + async_varint::{read_varint, write_varint}, Error, }; From 31504e967189ad837574c7f43e45024262c81dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Thu, 4 Jul 2024 18:52:56 +0200 Subject: [PATCH 14/27] fix: pr related changes --- Cargo.lock | 20 +++- Cargo.toml | 1 + cli/polka-storage-provider/Cargo.toml | 1 + .../src/commands/run.rs | 112 +++++++++++++----- cli/polka-storage-provider/src/main.rs | 1 + cli/polka-storage-provider/src/rpc/server.rs | 6 +- cli/polka-storage-provider/src/storage.rs | 21 ++-- 7 files changed, 122 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6fe212ea2..c4864d5c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8513,6 +8513,7 @@ dependencies = [ "thiserror", "tokio", "tokio-util", + "tower-http 0.5.2", "tracing", "tracing-subscriber", "url", @@ -11949,7 +11950,7 @@ dependencies = [ "substrate-prometheus-endpoint", "tokio", "tower", - "tower-http", + "tower-http 0.4.4", ] [[package]] @@ -14993,6 +14994,23 @@ dependencies = [ "tower-service", ] +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags 2.5.0", + "bytes", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "pin-project-lite 0.2.14", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower-layer" version = "0.3.2" diff --git a/Cargo.toml b/Cargo.toml index 23cca7156..1967152d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,6 +87,7 @@ thiserror = { version = "1.0.48" } tokio = "1.37.0" tokio-stream = "0.1.15" tokio-util = "0.7.11" +tower-http = "0.5.2" tracing = "0.1.40" tracing-subscriber = "0.3.18" url = "2.5.0" diff --git a/cli/polka-storage-provider/Cargo.toml b/cli/polka-storage-provider/Cargo.toml index e3697136b..a9ffce5ca 100644 --- a/cli/polka-storage-provider/Cargo.toml +++ b/cli/polka-storage-provider/Cargo.toml @@ -25,6 +25,7 @@ tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } +tower-http = { workspace = true, features = ["trace"] } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter"] } url = { workspace = true } diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 5b5ca2468..9881d9669 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -2,8 +2,8 @@ use std::{io, net::SocketAddr, str::FromStr, sync::Arc}; use axum::{ body::Body, - extract::{Path, Request, State}, - http::StatusCode, + extract::{MatchedPath, Path, Request, State}, + http::{header, StatusCode}, response::{IntoResponse, Response}, routing::{get, post}, Router, @@ -14,13 +14,15 @@ use futures::TryStreamExt; use mater::Cid; use tokio::{fs::File, signal, sync::Notify}; use tokio_util::io::{ReaderStream, StreamReader}; -use tracing::info; +use tower_http::trace::TraceLayer; +use tracing::{error, info, info_span, instrument}; use url::Url; +use uuid::Uuid; use crate::{ cli::CliError, rpc::server::{start_rpc_server, RpcServerState, RPC_SERVER_DEFAULT_BIND_ADDR}, - storage::{content_path, stream_contents_to_car}, + storage::{content_path, stream_contents_to_car, STORAGE_DEFAULT_DIRECTORY}, substrate, }; @@ -35,6 +37,9 @@ pub(crate) struct RunCommand { /// Address and port used for RPC server. #[arg(long, default_value = RPC_SERVER_DEFAULT_BIND_ADDR)] pub listen_addr: SocketAddr, + /// Directory where uploaded files are stored. + #[arg(long, default_value = STORAGE_DEFAULT_DIRECTORY)] + pub storage_dir: String, } impl RunCommand { @@ -44,6 +49,7 @@ impl RunCommand { let state = Arc::new(RpcServerState { start_time: Utc::now(), substrate_client, + storage_dir: self.storage_dir.clone(), }); // Notify setup for graceful shutdown @@ -62,61 +68,84 @@ impl RunCommand { info!("RPC server stopped"); } _ = start_upload_server(state.clone(), shutdown.clone()) => { - info!("Upload server stopped"); + info!("upload server stopped"); } } - info!("Storage provider stopped"); + info!("storage provider stopped"); Ok(()) } } +/// Handler for the upload endpoint. It receives a stream of bytes, coverts them +/// to a CAR file and returns the CID of the CAR file to the user. async fn upload( - State(_state): State>, + State(state): State>, request: Request, ) -> Result { - // Body stream and reader - let body_data_stream = request.into_body().into_data_stream(); - let body_with_io_error = - body_data_stream.map_err(|err| io::Error::new(io::ErrorKind::Other, err)); - let body_reader = StreamReader::new(body_with_io_error); - - let cid = stream_contents_to_car(body_reader).await.unwrap(); - Ok(cid.to_string()) + // Body reader + let body_reader = StreamReader::new( + request + .into_body() + .into_data_stream() + .map_err(|err| io::Error::new(io::ErrorKind::Other, err)), + ); + + stream_contents_to_car(&state.storage_dir, body_reader) + .await + .map_err(|err| { + error!(?err, "failed to create a CAR file"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + "failed to create a CAR file".to_string(), + ) + }) + .map(|cid| cid.to_string()) } +/// Handler for the download endpoint. It receives a CID and streams the CAR +/// file back to the user. async fn download( - State(_state): State>, + State(state): State>, Path(cid): Path, ) -> Result { // Path to a CAR file let Ok(cid) = Cid::from_str(&cid) else { + error!(cid, "cid incorrect format"); return Err((StatusCode::BAD_REQUEST, "cid incorrect format".to_string())); }; - let path = content_path(cid); + let (file_name, path) = content_path(&state.storage_dir, cid); + info!(path = %path.display(), "file requested"); // Check if the file exists if !path.exists() { - // Stream the file - return Err((StatusCode::NOT_FOUND, "File not found".to_string())); + error!(?path, "file not found"); + return Err((StatusCode::NOT_FOUND, "file not found".to_string())); } // Open car file let Ok(file) = File::open(path).await else { return Err(( StatusCode::INTERNAL_SERVER_ERROR, - "Failed to open file".to_string(), + "failed to open file".to_string(), )); }; - // convert the `AsyncRead` into a `Stream` + // Convert the `AsyncRead` into a `Stream` let stream = ReaderStream::new(file); - // convert the `Stream` into the Body + // Convert the `Stream` into the Body let body = Body::from_stream(stream); - - // TODO(no-ref,@cernicc,03/07/2024): What should be the response headers? - Ok(body.into_response()) + // Response headers + let headers = [ + (header::CONTENT_TYPE, "application/octet-stream"), + ( + header::CONTENT_DISPOSITION, + &format!("attachment; filename=\"{:?}\"", file_name), + ), + ]; + + Ok((headers, body).into_response()) } // TODO(no-ref,@cernicc,28/06/2024): Move routing and handlers somewhere else @@ -126,9 +155,31 @@ fn configure_router(state: Arc) -> Router { .route("/upload", post(upload)) .route("/download/:cid", get(download)) .with_state(state) + // Tracing layer + .layer( + TraceLayer::new_for_http().make_span_with(|request: &Request<_>| { + // Log the matched route's path (with placeholders not filled in). + // Use request.uri() or OriginalUri if you want the real path. + let matched_path = request + .extensions() + .get::() + .map(MatchedPath::as_str); + + info_span!( + "request", + method = ?request.method(), + matched_path, + request_id = %Uuid::new_v4() + ) + }), + ) } -async fn start_upload_server(state: Arc, shutdown: Arc) { +#[instrument(skip_all)] +async fn start_upload_server( + state: Arc, + shutdown: Arc, +) -> Result<(), CliError> { // Configure router let router = configure_router(state); @@ -136,12 +187,13 @@ async fn start_upload_server(state: Arc, shutdown: Arc) // in use. This should be done when both servers will listen on the same // address let address = "127.0.0.1:3000"; - let listener = tokio::net::TcpListener::bind(address).await.unwrap(); + let listener = tokio::net::TcpListener::bind(address).await?; // Start server - info!("Upload server started at: {address}"); + info!("upload server started at: {address}"); axum::serve(listener, router) .with_graceful_shutdown(async move { shutdown.notified().await }) - .await - .unwrap(); + .await?; + + Ok(()) } diff --git a/cli/polka-storage-provider/src/main.rs b/cli/polka-storage-provider/src/main.rs index a639e5f46..5c4c8b502 100644 --- a/cli/polka-storage-provider/src/main.rs +++ b/cli/polka-storage-provider/src/main.rs @@ -1,5 +1,6 @@ //! A CLI application that facilitates management operations over a running full node and other components. #![deny(unused_crate_dependencies)] +#![deny(clippy::unwrap_used)] mod cli; pub(crate) mod commands; diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index 5d6cd96b2..7c2313265 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -15,7 +15,7 @@ use jsonrpsee::{ }; use serde_json::Value; use tokio::sync::Notify; -use tracing::info; +use tracing::{info, instrument}; use super::{ methods::{common::InfoRequest, register_async, wallet::WalletRequest}, @@ -30,9 +30,13 @@ pub const RPC_SERVER_DEFAULT_BIND_ADDR: &str = "127.0.0.1:8000"; pub struct RpcServerState { pub start_time: chrono::DateTime, pub substrate_client: substrate::Client, + // TODO(no-ref,@cernicc,04/07/2024): Should use config struct that would + // provide the configuration across the provider + pub storage_dir: String, } /// Start the RPC server. +#[instrument(skip_all)] pub async fn start_rpc_server( state: Arc, listen_addr: SocketAddr, diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index 8482d9491..cb02a2e65 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -6,13 +6,16 @@ use tokio::{ fs::{self, File}, io::{AsyncRead, BufWriter}, }; +use tracing::info; /// Directory where uploaded files are stored. -// TODO(no-ref,@cernicc,04/07/2024): Where should the uploads be stored? -const UPLOADS_DIRECTORY: &str = "uploads"; +pub const STORAGE_DEFAULT_DIRECTORY: &str = "./uploads"; /// Reads bytes from the source and writes them to a CAR file. -pub async fn stream_contents_to_car(source: R) -> Result> +pub async fn stream_contents_to_car( + folder: &str, + source: R, +) -> Result> where R: AsyncRead + Unpin, { @@ -28,14 +31,16 @@ where // If the file is successfully written, we can now move it to the final // location. - let final_content_path = content_path(cid); - fs::rename(temp_file_path, final_content_path).await?; + let (_, final_content_path) = content_path(folder, cid); + fs::rename(temp_file_path, &final_content_path).await?; + info!(location = %final_content_path.display(), "CAR file created"); Ok(cid) } -/// Returns the path to the content with the specified CID. -pub fn content_path(cid: Cid) -> PathBuf { +/// Returns the tuple of file name and path for a specified Cid. +pub fn content_path(folder: &str, cid: Cid) -> (String, PathBuf) { let name = format!("{cid}.car"); - Path::new(UPLOADS_DIRECTORY).join(name) + let path = Path::new(folder).join(&name); + (name, path) } From d7b7613760420fc3d78bf30cc73e45d323915a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 5 Jul 2024 09:42:16 +0200 Subject: [PATCH 15/27] fix: refactor shutdown mechanism --- .../src/commands/run.rs | 50 +++++++++++-------- cli/polka-storage-provider/src/rpc/server.rs | 11 ++-- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 9881d9669..12d974892 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -12,7 +12,11 @@ use chrono::Utc; use clap::Parser; use futures::TryStreamExt; use mater::Cid; -use tokio::{fs::File, signal, sync::Notify}; +use tokio::{ + fs::File, + join, signal, + sync::broadcast::{self, Receiver, Sender}, +}; use tokio_util::io::{ReaderStream, StreamReader}; use tower_http::trace::TraceLayer; use tracing::{error, info, info_span, instrument}; @@ -52,32 +56,34 @@ impl RunCommand { storage_dir: self.storage_dir.clone(), }); - // Notify setup for graceful shutdown - let shutdown = Arc::new(Notify::new()); - - // Listen for shutdown signal - let shutdown_clone = shutdown.clone(); - tokio::spawn(async move { - signal::ctrl_c().await.expect("failed to listen for event"); - shutdown_clone.notify_one(); - }); + // Setup shutdown mechanism + let (notify_shutdown_tx, _) = broadcast::channel(1); + tokio::spawn(shutdown_trigger(notify_shutdown_tx.clone())); // Start both servers - tokio::select! { - _ = start_rpc_server(state.clone(), self.listen_addr, shutdown.clone()) => { - info!("RPC server stopped"); - } - _ = start_upload_server(state.clone(), shutdown.clone()) => { - info!("upload server stopped"); - } - } + let _ = join!( + start_rpc_server( + state.clone(), + self.listen_addr, + notify_shutdown_tx.subscribe() + ), + start_upload_server(state.clone(), notify_shutdown_tx.subscribe()) + ); info!("storage provider stopped"); - Ok(()) } } +async fn shutdown_trigger(notify_shutdown_tx: Sender<()>) { + // Listen for the shutdown signal + signal::ctrl_c().await.expect("failed to listen for event"); + + // Notify the shutdown + info!("shutdown signal received"); + let _ = notify_shutdown_tx.send(()); +} + /// Handler for the upload endpoint. It receives a stream of bytes, coverts them /// to a CAR file and returns the CID of the CAR file to the user. async fn upload( @@ -178,7 +184,7 @@ fn configure_router(state: Arc) -> Router { #[instrument(skip_all)] async fn start_upload_server( state: Arc, - shutdown: Arc, + mut notify_shutdown_rx: Receiver<()>, ) -> Result<(), CliError> { // Configure router let router = configure_router(state); @@ -192,7 +198,9 @@ async fn start_upload_server( // Start server info!("upload server started at: {address}"); axum::serve(listener, router) - .with_graceful_shutdown(async move { shutdown.notified().await }) + .with_graceful_shutdown(async move { + let _ = notify_shutdown_rx.recv().await; + }) .await?; Ok(()) diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index 7c2313265..5d3dc1120 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -14,7 +14,7 @@ use jsonrpsee::{ RpcModule, }; use serde_json::Value; -use tokio::sync::Notify; +use tokio::sync::broadcast::Receiver; use tracing::{info, instrument}; use super::{ @@ -40,7 +40,7 @@ pub struct RpcServerState { pub async fn start_rpc_server( state: Arc, listen_addr: SocketAddr, - shutdown: Arc, + mut notify_shutdown_rx: Receiver<()>, ) -> Result<(), CliError> { let server = Server::builder().build(listen_addr).await?; @@ -48,10 +48,11 @@ pub async fn start_rpc_server( let server_handle = server.start(module); info!("RPC server started at {}", listen_addr); - // Wait for shutdown signal - shutdown.notified().await; + // Wait for shutdown signal. No need to handle the error. We stop ste server + // in any case. + let _ = notify_shutdown_rx.recv().await; - // Stop returns and error if the server has already been stopped + // Stop returns and error if the server has already been stopped. let _ = server_handle.stop(); // Wait for server to be stopped From 2f197ff7e02d8185e3043dd48065e5c990e3f603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 5 Jul 2024 09:54:33 +0200 Subject: [PATCH 16/27] fix: small change --- cli/polka-storage-provider/src/commands/run.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 12d974892..4553bfc30 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -16,6 +16,7 @@ use tokio::{ fs::File, join, signal, sync::broadcast::{self, Receiver, Sender}, + try_join, }; use tokio_util::io::{ReaderStream, StreamReader}; use tower_http::trace::TraceLayer; @@ -61,14 +62,14 @@ impl RunCommand { tokio::spawn(shutdown_trigger(notify_shutdown_tx.clone())); // Start both servers - let _ = join!( + try_join!( start_rpc_server( state.clone(), self.listen_addr, notify_shutdown_tx.subscribe() ), start_upload_server(state.clone(), notify_shutdown_tx.subscribe()) - ); + )?; info!("storage provider stopped"); Ok(()) From 033073cfd942fc27bf5a139653d9f2cc39c2ff5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 5 Jul 2024 10:20:39 +0200 Subject: [PATCH 17/27] fix: remove unused import --- cli/polka-storage-provider/src/commands/run.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 4553bfc30..d30e37b65 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -14,7 +14,7 @@ use futures::TryStreamExt; use mater::Cid; use tokio::{ fs::File, - join, signal, + signal, sync::broadcast::{self, Receiver, Sender}, try_join, }; From 05e0662788c274cbf1c031846ea1047a182e3441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Fri, 5 Jul 2024 15:36:33 +0200 Subject: [PATCH 18/27] fix: pr related suggestions --- .../src/commands/run.rs | 199 ++++-------------- cli/polka-storage-provider/src/rpc/server.rs | 6 +- cli/polka-storage-provider/src/storage.rs | 155 +++++++++++++- storage/mater/src/async_varint.rs | 16 +- storage/mater/src/stores/filestore.rs | 2 +- 5 files changed, 196 insertions(+), 182 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index d30e37b65..e85cc22f8 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -1,38 +1,31 @@ -use std::{io, net::SocketAddr, str::FromStr, sync::Arc}; +use std::{env, net::SocketAddr, path::PathBuf, sync::Arc}; -use axum::{ - body::Body, - extract::{MatchedPath, Path, Request, State}, - http::{header, StatusCode}, - response::{IntoResponse, Response}, - routing::{get, post}, - Router, -}; use chrono::Utc; use clap::Parser; -use futures::TryStreamExt; -use mater::Cid; use tokio::{ - fs::File, signal, - sync::broadcast::{self, Receiver, Sender}, - try_join, + sync::broadcast::{self, Sender}, }; -use tokio_util::io::{ReaderStream, StreamReader}; -use tower_http::trace::TraceLayer; -use tracing::{error, info, info_span, instrument}; +use tracing::info; use url::Url; -use uuid::Uuid; use crate::{ cli::CliError, rpc::server::{start_rpc_server, RpcServerState, RPC_SERVER_DEFAULT_BIND_ADDR}, - storage::{content_path, stream_contents_to_car, STORAGE_DEFAULT_DIRECTORY}, + storage::start_upload_server, substrate, }; +/// Default RPC API endpoint used by the parachain node. const FULL_NODE_DEFAULT_RPC_ADDR: &str = "ws://127.0.0.1:9944"; +/// Default storage path. +fn default_storage_path() -> PathBuf { + let mut current_dir = env::current_dir().expect("failed to get current directory"); + current_dir.push("uploads"); + current_dir +} + /// Command to start the storage provider. #[derive(Debug, Clone, Parser)] pub(crate) struct RunCommand { @@ -43,8 +36,8 @@ pub(crate) struct RunCommand { #[arg(long, default_value = RPC_SERVER_DEFAULT_BIND_ADDR)] pub listen_addr: SocketAddr, /// Directory where uploaded files are stored. - #[arg(long, default_value = STORAGE_DEFAULT_DIRECTORY)] - pub storage_dir: String, + #[arg(long, default_value = default_storage_path().into_os_string())] + pub storage_dir: PathBuf, } impl RunCommand { @@ -57,152 +50,32 @@ impl RunCommand { storage_dir: self.storage_dir.clone(), }); - // Setup shutdown mechanism + // Setup shutdown channel let (notify_shutdown_tx, _) = broadcast::channel(1); - tokio::spawn(shutdown_trigger(notify_shutdown_tx.clone())); - - // Start both servers - try_join!( - start_rpc_server( - state.clone(), - self.listen_addr, - notify_shutdown_tx.subscribe() - ), - start_upload_server(state.clone(), notify_shutdown_tx.subscribe()) - )?; - - info!("storage provider stopped"); - Ok(()) - } -} - -async fn shutdown_trigger(notify_shutdown_tx: Sender<()>) { - // Listen for the shutdown signal - signal::ctrl_c().await.expect("failed to listen for event"); - - // Notify the shutdown - info!("shutdown signal received"); - let _ = notify_shutdown_tx.send(()); -} - -/// Handler for the upload endpoint. It receives a stream of bytes, coverts them -/// to a CAR file and returns the CID of the CAR file to the user. -async fn upload( - State(state): State>, - request: Request, -) -> Result { - // Body reader - let body_reader = StreamReader::new( - request - .into_body() - .into_data_stream() - .map_err(|err| io::Error::new(io::ErrorKind::Other, err)), - ); - - stream_contents_to_car(&state.storage_dir, body_reader) - .await - .map_err(|err| { - error!(?err, "failed to create a CAR file"); - ( - StatusCode::INTERNAL_SERVER_ERROR, - "failed to create a CAR file".to_string(), - ) - }) - .map(|cid| cid.to_string()) -} -/// Handler for the download endpoint. It receives a CID and streams the CAR -/// file back to the user. -async fn download( - State(state): State>, - Path(cid): Path, -) -> Result { - // Path to a CAR file - let Ok(cid) = Cid::from_str(&cid) else { - error!(cid, "cid incorrect format"); - return Err((StatusCode::BAD_REQUEST, "cid incorrect format".to_string())); - }; - let (file_name, path) = content_path(&state.storage_dir, cid); - info!(path = %path.display(), "file requested"); - - // Check if the file exists - if !path.exists() { - error!(?path, "file not found"); - return Err((StatusCode::NOT_FOUND, "file not found".to_string())); - } - - // Open car file - let Ok(file) = File::open(path).await else { - return Err(( - StatusCode::INTERNAL_SERVER_ERROR, - "failed to open file".to_string(), + // Start the tasks in the background + let rpc_handler = tokio::spawn(start_rpc_server( + state.clone(), + self.listen_addr, + notify_shutdown_tx.subscribe(), + )); + let upload_handler = tokio::spawn(start_upload_server( + state.clone(), + notify_shutdown_tx.subscribe(), )); - }; - - // Convert the `AsyncRead` into a `Stream` - let stream = ReaderStream::new(file); - // Convert the `Stream` into the Body - let body = Body::from_stream(stream); - // Response headers - let headers = [ - (header::CONTENT_TYPE, "application/octet-stream"), - ( - header::CONTENT_DISPOSITION, - &format!("attachment; filename=\"{:?}\"", file_name), - ), - ]; - - Ok((headers, body).into_response()) -} - -// TODO(no-ref,@cernicc,28/06/2024): Move routing and handlers somewhere else -// TODO(no-ref,@cernicc,28/06/2024): Nicer error handling in handlers -fn configure_router(state: Arc) -> Router { - Router::new() - .route("/upload", post(upload)) - .route("/download/:cid", get(download)) - .with_state(state) - // Tracing layer - .layer( - TraceLayer::new_for_http().make_span_with(|request: &Request<_>| { - // Log the matched route's path (with placeholders not filled in). - // Use request.uri() or OriginalUri if you want the real path. - let matched_path = request - .extensions() - .get::() - .map(MatchedPath::as_str); - - info_span!( - "request", - method = ?request.method(), - matched_path, - request_id = %Uuid::new_v4() - ) - }), - ) -} - -#[instrument(skip_all)] -async fn start_upload_server( - state: Arc, - mut notify_shutdown_rx: Receiver<()>, -) -> Result<(), CliError> { - // Configure router - let router = configure_router(state); - // TODO(no-ref,@cernicc,04/07/2024): handle error if the address is already - // in use. This should be done when both servers will listen on the same - // address - let address = "127.0.0.1:3000"; - let listener = tokio::net::TcpListener::bind(address).await?; + // Wait for SIGTERM on the main thread and once received "unblock" + signal::ctrl_c().await.expect("failed to listen for event"); + // Send the shutdown signal + let _ = notify_shutdown_tx.send(()); - // Start server - info!("upload server started at: {address}"); - axum::serve(listener, router) - .with_graceful_shutdown(async move { - let _ = notify_shutdown_rx.recv().await; - }) - .await?; + // We can't wait forever, but we wait on this first so we can give extra + // time for any pending uploads to finish + let _ = tokio::time::timeout(std::time::Duration::from_secs(10), rpc_handler).await; + // And still limit the uploads to a bound anyways + let _ = tokio::time::timeout(std::time::Duration::from_secs(30), upload_handler).await; - Ok(()) + info!("storage provider stopped"); + Ok(()) + } } diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index 5d3dc1120..fe3044eca 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -1,6 +1,7 @@ use std::{ fmt::{Debug, Display}, net::SocketAddr, + path::PathBuf, sync::Arc, }; @@ -32,7 +33,7 @@ pub struct RpcServerState { pub substrate_client: substrate::Client, // TODO(no-ref,@cernicc,04/07/2024): Should use config struct that would // provide the configuration across the provider - pub storage_dir: String, + pub storage_dir: PathBuf, } /// Start the RPC server. @@ -52,7 +53,8 @@ pub async fn start_rpc_server( // in any case. let _ = notify_shutdown_rx.recv().await; - // Stop returns and error if the server has already been stopped. + // Stop returns an error if the server has already been stopped. + // PRE-COND: the server is only shutdown by receiving from `notify_shutdown_rx` let _ = server_handle.stop(); // Wait for server to be stopped diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index cb02a2e65..f535a8c45 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -1,24 +1,163 @@ -use std::path::{Path, PathBuf}; +use std::{io, path::PathBuf, str::FromStr, sync::Arc}; +use axum::{ + body::Body, + extract::{MatchedPath, Path, Request, State}, + http::{header, StatusCode}, + response::{IntoResponse, Response}, + routing::{get, post}, + Router, +}; +use futures::TryStreamExt; use mater::{create_filestore, Cid, Config}; use tempfile::tempdir; use tokio::{ fs::{self, File}, io::{AsyncRead, BufWriter}, + sync::broadcast::Receiver, }; -use tracing::info; +use tokio_util::io::{ReaderStream, StreamReader}; +use tower_http::trace::TraceLayer; +use tracing::{error, info, info_span, instrument}; +use uuid::Uuid; + +use crate::{cli::CliError, rpc::server::RpcServerState}; + +#[instrument(skip_all)] +pub async fn start_upload_server( + state: Arc, + mut notify_shutdown_rx: Receiver<()>, +) -> Result<(), CliError> { + // Configure router + let router = configure_router(state); + + // TODO(no-ref,@cernicc,04/07/2024): handle error if the address is already + // in use. This should be done when both servers will listen on the same + // address + let address = "127.0.0.1:3000"; + let listener = tokio::net::TcpListener::bind(address).await?; + + // Start server + info!("upload server started at: {address}"); + axum::serve(listener, router) + .with_graceful_shutdown(async move { + let _ = notify_shutdown_rx.recv().await; + }) + .await?; + + Ok(()) +} + +// TODO(no-ref,@cernicc,28/06/2024): Nicer error handling in handlers +fn configure_router(state: Arc) -> Router { + Router::new() + .route("/upload", post(upload)) + .route("/download/:cid", get(download)) + .with_state(state) + // Tracing layer + .layer( + TraceLayer::new_for_http().make_span_with(|request: &Request<_>| { + // Log the matched route's path (with placeholders not filled in). + // Use request.uri() or OriginalUri if you want the real path. + let matched_path = request + .extensions() + .get::() + .map(MatchedPath::as_str); + + info_span!( + "request", + method = ?request.method(), + matched_path, + request_id = %Uuid::new_v4() + ) + }), + ) +} -/// Directory where uploaded files are stored. -pub const STORAGE_DEFAULT_DIRECTORY: &str = "./uploads"; +/// Handler for the upload endpoint. It receives a stream of bytes, coverts them +/// to a CAR file and returns the CID of the CAR file to the user. +async fn upload( + State(state): State>, + request: Request, +) -> Result { + // Body reader + let body_reader = StreamReader::new( + request + .into_body() + .into_data_stream() + .map_err(|err| io::Error::new(io::ErrorKind::Other, err)), + ); + + stream_contents_to_car(&state.storage_dir, body_reader) + .await + .map_err(|err| { + error!(?err, "failed to create a CAR file"); + ( + StatusCode::INTERNAL_SERVER_ERROR, + "failed to create a CAR file".to_string(), + ) + }) + .map(|cid| cid.to_string()) +} + +/// Handler for the download endpoint. It receives a CID and streams the CAR +/// file back to the user. +async fn download( + State(state): State>, + Path(cid): Path, +) -> Result { + // Path to a CAR file + let Ok(cid) = Cid::from_str(&cid) else { + error!(cid, "cid incorrect format"); + return Err((StatusCode::BAD_REQUEST, "cid incorrect format".to_string())); + }; + let (file_name, path) = content_path(&state.storage_dir, cid); + info!(path = %path.display(), "file requested"); + + // Check if the file exists + if !path.exists() { + error!(?path, "file not found"); + return Err((StatusCode::NOT_FOUND, "file not found".to_string())); + } + + // Open car file + let Ok(file) = File::open(path).await else { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "failed to open file".to_string(), + )); + }; + + // Convert the `AsyncRead` into a `Stream` + let stream = ReaderStream::new(file); + // Convert the `Stream` into the Body + let body = Body::from_stream(stream); + // Response headers + let headers = [ + (header::CONTENT_TYPE, "application/octet-stream"), + ( + header::CONTENT_DISPOSITION, + &format!("attachment; filename=\"{:?}\"", file_name), + ), + ]; + + Ok((headers, body).into_response()) +} /// Reads bytes from the source and writes them to a CAR file. -pub async fn stream_contents_to_car( - folder: &str, +async fn stream_contents_to_car( + folder: &std::path::Path, source: R, ) -> Result> where R: AsyncRead + Unpin, { + // Create a storage folder if it doesn't exist. + if !folder.exists() { + info!("creating storage folder: {}", folder.display()); + fs::create_dir_all(folder).await?; + } + // Temp file which will be used to store the CAR file content. The temp // director has a randomized name. let temp_dir = tempdir()?; @@ -39,8 +178,8 @@ where } /// Returns the tuple of file name and path for a specified Cid. -pub fn content_path(folder: &str, cid: Cid) -> (String, PathBuf) { +fn content_path(folder: &std::path::Path, cid: Cid) -> (String, PathBuf) { let name = format!("{cid}.car"); - let path = Path::new(folder).join(&name); + let path = folder.join(&name); (name, path) } diff --git a/storage/mater/src/async_varint.rs b/storage/mater/src/async_varint.rs index dc7c92db0..3920cc174 100644 --- a/storage/mater/src/async_varint.rs +++ b/storage/mater/src/async_varint.rs @@ -1,19 +1,19 @@ /// Utility functions for the mater crate. The contents were mostly borrowed -/// from the https://github.com/dermesser/integer-encoding-rs. +/// from the . /// /// The original issue why we needed to borrow the implantation of the reader /// and writer is -/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/writer.rs#L20. +/// . /// This specifies the Send bound as optional. The side effect of this choice is /// that all futures using the writer or reader are non Send and there is no way /// to make them Send. /// /// The second crate researched was -/// https://github.com/paritytech/unsigned-varint/tree/master. Issue with that +/// . Issue with that /// crate is that it only implements AsyncRead and AsyncWrite from the futures /// crate and not tokio. For the future reference we could probably used /// `unsigned-varint` with the tokio and use -/// https://docs.rs/tokio-util/latest/tokio_util/compat/trait.FuturesAsyncReadCompatExt.html +/// /// as the compatibility layer. use std::{io, mem::size_of}; @@ -23,7 +23,7 @@ use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; /// Write a VarInt integer to an asynchronous writer. /// /// Borrowed from: -/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/writer.rs#L29 +/// pub(crate) async fn write_varint(writer: &mut W, n: VI) -> Result where W: AsyncWrite + Unpin, @@ -43,7 +43,7 @@ where /// On EOF, an io::Error with io::ErrorKind::UnexpectedEof is returned. /// /// Borrowed from: -/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/reader.rs#L70 +/// pub(crate) async fn read_varint(reader: &mut R) -> Result where R: AsyncRead + Unpin, @@ -76,7 +76,7 @@ const MSB: u8 = 0b1000_0000; /// VarIntProcessor encapsulates the logic for decoding a VarInt byte-by-byte. /// /// Borrowed from -/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/reader.rs#L35 +/// #[derive(Default)] struct VarIntProcessor { buf: [u8; 10], @@ -111,7 +111,7 @@ impl VarIntProcessor { } /// Borrowed from -/// https://github.com/dermesser/integer-encoding-rs/blob/4f57046ae90b6b923ff235a91f0729d3cf868d72/src/varint.rs#L69 +/// pub(crate) trait VarIntMaxSize { fn varint_max_size() -> usize; } diff --git a/storage/mater/src/stores/filestore.rs b/storage/mater/src/stores/filestore.rs index 59de91ac2..aaf20b955 100644 --- a/storage/mater/src/stores/filestore.rs +++ b/storage/mater/src/stores/filestore.rs @@ -78,7 +78,7 @@ where Ok(root) } -/// Convert a `source` stream into a CARv2 file and write it to `output` stream. +/// Convert a `source` stream into a CARv2 file and write it to an `output` stream. pub async fn create_filestore( source: Src, output: Out, From 1ca53ac9178f27a72e519f4a517e0a085f8c7a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=CC=81=20Duarte?= Date: Fri, 5 Jul 2024 17:17:28 +0100 Subject: [PATCH 19/27] style: clippy --- cli/polka-storage-provider/src/commands/run.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index e85cc22f8..1be4ed6b5 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -2,10 +2,7 @@ use std::{env, net::SocketAddr, path::PathBuf, sync::Arc}; use chrono::Utc; use clap::Parser; -use tokio::{ - signal, - sync::broadcast::{self, Sender}, -}; +use tokio::{signal, sync::broadcast}; use tracing::info; use url::Url; From 0ede48bab879924d1010faceb4db0eecfc6541b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 09:51:05 +0200 Subject: [PATCH 20/27] feat: split storage server to a different commaand & add Dockerfile --- ci/Dockerfile.provider | 35 +++++++++++ ci/PROVIDER.md | 47 +++++++++++++++ cli/polka-storage-provider/src/cli.rs | 4 +- cli/polka-storage-provider/src/commands.rs | 2 + .../src/commands/run.rs | 13 +--- .../src/commands/runner.rs | 1 + .../src/commands/storage.rs | 60 +++++++++++++++++++ cli/polka-storage-provider/src/rpc/server.rs | 4 -- cli/polka-storage-provider/src/storage.rs | 35 ++++++----- 9 files changed, 169 insertions(+), 32 deletions(-) create mode 100644 ci/Dockerfile.provider create mode 100644 ci/PROVIDER.md create mode 100644 cli/polka-storage-provider/src/commands/storage.rs diff --git a/ci/Dockerfile.provider b/ci/Dockerfile.provider new file mode 100644 index 000000000..b94235bbe --- /dev/null +++ b/ci/Dockerfile.provider @@ -0,0 +1,35 @@ +################ +##### Chef +FROM rust:1.79 AS chef +RUN cargo install cargo-chef +WORKDIR /app + +################ +##### Planer +FROM chef AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +################ +##### Builder +FROM chef AS builder + +RUN apt update && apt upgrade -y +RUN apt install -y protobuf-compiler +RUN apt install -y clang + +# Copy required files +COPY --from=planner /app/recipe.json recipe.json +COPY --from=planner /app/rust-toolchain.toml rust-toolchain.toml +# Build dependencies - this is the caching Docker layer! +RUN cargo chef cook --release --recipe-path recipe.json +# Build application +COPY . . +RUN cargo build --release --bin polka-storage-provider + +################ +##### Runtime +FROM debian:bookworm-slim AS runtime +WORKDIR /app +COPY --from=builder /app/target/release/polka-storage-provider /usr/local/bin +ENTRYPOINT ["/usr/local/bin/polka-storage-provider"] \ No newline at end of file diff --git a/ci/PROVIDER.md b/ci/PROVIDER.md new file mode 100644 index 000000000..4ea4f285f --- /dev/null +++ b/ci/PROVIDER.md @@ -0,0 +1,47 @@ +## Build the Docker Image + +To build the Docker image for the provider, execute the following command: + +`docker build -t eiger/provider --file ./ci/Dockerfile.provider .` + +This command uses the Dockerfile located at ./ci/Dockerfile.provider to create an image named `eiger/provider`. + +## Start the Storage Provider Server + +### Create a Docker Volume + +First, create a Docker volume that the storage provider will use to store uploaded files: + +`docker volume create storage_provider` + +This command creates a volume named storage_provider. + +### Start the Storage Server + +Next, start the storage server using the created volume: + +`docker run --mount source=storage_provider,destination=/app/uploads eiger/provider storage` + +- `--mount source=storage_provider,destination=/app/uploads`: Mounts the storage_provider volume to /app/uploads inside the container. +- `eiger/provider storage`: Runs the eiger/provider image with the storage command. + +## Upload a file + +To upload a file to the provider's server, use the following curl command. Replace image.jpg with the path to your file: + +`curl -X POST --data-binary "@image.jpg" http://localhost:9000/upload` + +This command uploads the file image.jpg to the server running at http://localhost:9000/upload. The server converts the uploaded content to a CAR file and saves it to the mounted volume. + +## Download the CAR File + +After uploading, you will receive a CID (Content Identifier) for the file. Use this CID to download the corresponding CAR file. Replace :cid with the actual CID provided: + +`curl -v -X GET http://localhost:9000/download/:cid --output ./content.car` + +- `-v`: Enables verbose mode, providing detailed information about the request. +- `-X GET`: Specifies the GET request method. +- `http://localhost:9000/download/:cid`: The URL to download the CAR file, with :cid being the placeholder for the actual CID. +- `--output ./content.car`: Saves the downloaded CAR file as content.car in the current directory. + +By following these steps, you can successfully build the Docker image, start the storage provider server, upload a file, and download the CAR file. diff --git a/cli/polka-storage-provider/src/cli.rs b/cli/polka-storage-provider/src/cli.rs index b1f2beeef..0af871ecf 100644 --- a/cli/polka-storage-provider/src/cli.rs +++ b/cli/polka-storage-provider/src/cli.rs @@ -3,7 +3,7 @@ use thiserror::Error; use url::Url; use crate::{ - commands::{InfoCommand, InitCommand, RunCommand, WalletCommand}, + commands::{InfoCommand, InitCommand, RunCommand, StorageCommand, WalletCommand}, rpc::{server::RPC_SERVER_DEFAULT_BIND_ADDR, ClientError}, }; @@ -27,6 +27,8 @@ pub enum SubCommand { Init(InitCommand), /// Start a polka storage provider Run(RunCommand), + /// Command to start storage server. + Storage(StorageCommand), /// Info command to display information about the storage provider. Info(InfoCommand), /// Command to manage wallet operations. diff --git a/cli/polka-storage-provider/src/commands.rs b/cli/polka-storage-provider/src/commands.rs index b3724cc1c..4e8c8b336 100644 --- a/cli/polka-storage-provider/src/commands.rs +++ b/cli/polka-storage-provider/src/commands.rs @@ -1,6 +1,7 @@ mod info; mod init; mod run; +mod storage; mod wallet; pub(crate) mod runner; @@ -8,4 +9,5 @@ pub(crate) mod runner; pub(crate) use info::InfoCommand; pub(crate) use init::InitCommand; pub(crate) use run::RunCommand; +pub(crate) use storage::StorageCommand; pub(crate) use wallet::WalletCommand; diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 1be4ed6b5..6a6ddc4aa 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -9,7 +9,6 @@ use url::Url; use crate::{ cli::CliError, rpc::server::{start_rpc_server, RpcServerState, RPC_SERVER_DEFAULT_BIND_ADDR}, - storage::start_upload_server, substrate, }; @@ -44,33 +43,25 @@ impl RunCommand { let state = Arc::new(RpcServerState { start_time: Utc::now(), substrate_client, - storage_dir: self.storage_dir.clone(), }); // Setup shutdown channel let (notify_shutdown_tx, _) = broadcast::channel(1); - // Start the tasks in the background + // Start the server in the background let rpc_handler = tokio::spawn(start_rpc_server( state.clone(), self.listen_addr, notify_shutdown_tx.subscribe(), )); - let upload_handler = tokio::spawn(start_upload_server( - state.clone(), - notify_shutdown_tx.subscribe(), - )); // Wait for SIGTERM on the main thread and once received "unblock" signal::ctrl_c().await.expect("failed to listen for event"); // Send the shutdown signal let _ = notify_shutdown_tx.send(()); - // We can't wait forever, but we wait on this first so we can give extra - // time for any pending uploads to finish + // Give server some time to finish let _ = tokio::time::timeout(std::time::Duration::from_secs(10), rpc_handler).await; - // And still limit the uploads to a bound anyways - let _ = tokio::time::timeout(std::time::Duration::from_secs(30), upload_handler).await; info!("storage provider stopped"); Ok(()) diff --git a/cli/polka-storage-provider/src/commands/runner.rs b/cli/polka-storage-provider/src/commands/runner.rs index 408e212d1..831d9832b 100644 --- a/cli/polka-storage-provider/src/commands/runner.rs +++ b/cli/polka-storage-provider/src/commands/runner.rs @@ -19,6 +19,7 @@ pub(crate) async fn run() -> Result<(), CliError> { match &cli_arguments.subcommand { SubCommand::Init(cmd) => cmd.run().await, SubCommand::Run(cmd) => cmd.run().await, + SubCommand::Storage(cmd) => cmd.run().await, SubCommand::Info(cmd) => cmd.run(&rpc_client).await, SubCommand::Wallet(cmd) => match cmd { WalletCommand::GenerateNodeKey(cmd) => Ok(cmd.run()?), diff --git a/cli/polka-storage-provider/src/commands/storage.rs b/cli/polka-storage-provider/src/commands/storage.rs new file mode 100644 index 000000000..0928e388d --- /dev/null +++ b/cli/polka-storage-provider/src/commands/storage.rs @@ -0,0 +1,60 @@ +use std::{env, net::SocketAddr, path::PathBuf, sync::Arc}; + +use clap::Parser; +use tokio::{signal, sync::broadcast}; +use tracing::info; + +use crate::{ + cli::CliError, + storage::{start_upload_server, StorageServerState}, +}; + +/// Default storage path. +fn default_storage_path() -> PathBuf { + let mut current_dir = env::current_dir().expect("failed to get current directory"); + current_dir.push("uploads"); + current_dir +} + +/// Default address to bind the storage server to. +pub const STORAGE_SERVER_DEFAULT_BIND_ADDR: &str = "127.0.0.1:9000"; + +/// Command to start the storage provider. +#[derive(Debug, Clone, Parser)] +pub(crate) struct StorageCommand { + /// Address and port used for storage server. + #[arg(long, default_value = STORAGE_SERVER_DEFAULT_BIND_ADDR)] + pub listen_addr: SocketAddr, + /// Directory where uploaded files are stored. + #[arg(long, default_value = default_storage_path().into_os_string())] + pub storage_dir: PathBuf, +} + +impl StorageCommand { + pub async fn run(&self) -> Result<(), CliError> { + let state = Arc::new(StorageServerState { + storage_dir: self.storage_dir.clone(), + }); + + // Setup shutdown channel + let (notify_shutdown_tx, _) = broadcast::channel(1); + + // Start the server in the background + let upload_handler = tokio::spawn(start_upload_server( + state.clone(), + self.listen_addr, + notify_shutdown_tx.subscribe(), + )); + + // Wait for SIGTERM on the main thread and once received "unblock" + signal::ctrl_c().await.expect("failed to listen for event"); + // Send the shutdown signal + let _ = notify_shutdown_tx.send(()); + + // Give uploads some time to finish + let _ = tokio::time::timeout(std::time::Duration::from_secs(30), upload_handler).await; + + info!("storage provider storage stopped"); + Ok(()) + } +} diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index fe3044eca..f474847f3 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -1,7 +1,6 @@ use std::{ fmt::{Debug, Display}, net::SocketAddr, - path::PathBuf, sync::Arc, }; @@ -31,9 +30,6 @@ pub const RPC_SERVER_DEFAULT_BIND_ADDR: &str = "127.0.0.1:8000"; pub struct RpcServerState { pub start_time: chrono::DateTime, pub substrate_client: substrate::Client, - // TODO(no-ref,@cernicc,04/07/2024): Should use config struct that would - // provide the configuration across the provider - pub storage_dir: PathBuf, } /// Start the RPC server. diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index f535a8c45..25cfb8bb1 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -1,4 +1,4 @@ -use std::{io, path::PathBuf, str::FromStr, sync::Arc}; +use std::{io, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc}; use axum::{ body::Body, @@ -10,7 +10,7 @@ use axum::{ }; use futures::TryStreamExt; use mater::{create_filestore, Cid, Config}; -use tempfile::tempdir; +use tempfile::tempdir_in; use tokio::{ fs::{self, File}, io::{AsyncRead, BufWriter}, @@ -21,24 +21,26 @@ use tower_http::trace::TraceLayer; use tracing::{error, info, info_span, instrument}; use uuid::Uuid; -use crate::{cli::CliError, rpc::server::RpcServerState}; +use crate::cli::CliError; + +/// Shared state of the storage server. +pub struct StorageServerState { + pub storage_dir: PathBuf, +} #[instrument(skip_all)] pub async fn start_upload_server( - state: Arc, + state: Arc, + listen_addr: SocketAddr, mut notify_shutdown_rx: Receiver<()>, ) -> Result<(), CliError> { // Configure router let router = configure_router(state); - - // TODO(no-ref,@cernicc,04/07/2024): handle error if the address is already - // in use. This should be done when both servers will listen on the same - // address - let address = "127.0.0.1:3000"; - let listener = tokio::net::TcpListener::bind(address).await?; + let listener = tokio::net::TcpListener::bind(listen_addr).await?; + dbg!("dwadaw"); // Start server - info!("upload server started at: {address}"); + info!("upload server started at: {listen_addr}"); axum::serve(listener, router) .with_graceful_shutdown(async move { let _ = notify_shutdown_rx.recv().await; @@ -49,7 +51,7 @@ pub async fn start_upload_server( } // TODO(no-ref,@cernicc,28/06/2024): Nicer error handling in handlers -fn configure_router(state: Arc) -> Router { +fn configure_router(state: Arc) -> Router { Router::new() .route("/upload", post(upload)) .route("/download/:cid", get(download)) @@ -77,7 +79,7 @@ fn configure_router(state: Arc) -> Router { /// Handler for the upload endpoint. It receives a stream of bytes, coverts them /// to a CAR file and returns the CID of the CAR file to the user. async fn upload( - State(state): State>, + State(state): State>, request: Request, ) -> Result { // Body reader @@ -103,7 +105,7 @@ async fn upload( /// Handler for the download endpoint. It receives a CID and streams the CAR /// file back to the user. async fn download( - State(state): State>, + State(state): State>, Path(cid): Path, ) -> Result { // Path to a CAR file @@ -159,8 +161,9 @@ where } // Temp file which will be used to store the CAR file content. The temp - // director has a randomized name. - let temp_dir = tempdir()?; + // director has a randomized name and is created in the same folder as the + // finalized uploads are stored. + let temp_dir = tempdir_in(folder)?; let temp_file_path = temp_dir.path().join("temp.car"); // Stream the body from source to the temp file. From 02e4fbba6a160b9e2706d67a9fc79a667184c211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 10:32:37 +0200 Subject: [PATCH 21/27] fix: some changes --- ci/PROVIDER.md | 5 +++-- cli/polka-storage-provider/src/storage.rs | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/PROVIDER.md b/ci/PROVIDER.md index 4ea4f285f..dda999972 100644 --- a/ci/PROVIDER.md +++ b/ci/PROVIDER.md @@ -20,8 +20,9 @@ This command creates a volume named storage_provider. Next, start the storage server using the created volume: -`docker run --mount source=storage_provider,destination=/app/uploads eiger/provider storage` +`docker run --net=host --mount source=storage_provider,destination=/app/uploads eiger/provider storage` +- `--net=host`: Uses the host network, allowing the container to communicate with the host machine. - `--mount source=storage_provider,destination=/app/uploads`: Mounts the storage_provider volume to /app/uploads inside the container. - `eiger/provider storage`: Runs the eiger/provider image with the storage command. @@ -35,7 +36,7 @@ This command uploads the file image.jpg to the server running at http://localhos ## Download the CAR File -After uploading, you will receive a CID (Content Identifier) for the file. Use this CID to download the corresponding CAR file. Replace :cid with the actual CID provided: +After uploading, you will receive a CID (Content Identifier) for the file. Use this CID to download the corresponding CAR file. Replace `:cid` with the actual CID provided: `curl -v -X GET http://localhost:9000/download/:cid --output ./content.car` diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index 25cfb8bb1..2e33c0916 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -37,7 +37,6 @@ pub async fn start_upload_server( // Configure router let router = configure_router(state); let listener = tokio::net::TcpListener::bind(listen_addr).await?; - dbg!("dwadaw"); // Start server info!("upload server started at: {listen_addr}"); From d0a7cf66866e1904a86260760e8fde49be08293c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 10:46:15 +0200 Subject: [PATCH 22/27] fix: after merge develop --- Cargo.lock | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90eb77363..91d29f0bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4583,7 +4583,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.29", + "hyper 0.14.28", "log", "rustls 0.21.12", "rustls-native-certs 0.6.3", @@ -4997,7 +4997,7 @@ dependencies = [ "beef", "futures-timer", "futures-util", - "hyper 0.14.29", + "hyper 0.14.28", "jsonrpsee-types", "parking_lot 0.12.3", "pin-project", @@ -5018,7 +5018,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ccf93fc4a0bfe05d851d37d7c32b7f370fe94336b52a2f0efc5f1981895c2e5" dependencies = [ "async-trait", - "hyper 0.14.29", + "hyper 0.14.28", "hyper-rustls", "jsonrpsee-core", "jsonrpsee-types", @@ -5052,7 +5052,7 @@ checksum = "12d8b6a9674422a8572e0b0abb12feeb3f2aeda86528c80d0350c2bd0923ab41" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.29", + "hyper 0.14.28", "jsonrpsee-core", "jsonrpsee-types", "pin-project", @@ -11841,7 +11841,7 @@ dependencies = [ "fnv", "futures", "futures-timer", - "hyper 0.14.29", + "hyper 0.14.28", "hyper-rustls", "libp2p", "log", @@ -11936,7 +11936,7 @@ dependencies = [ "futures", "governor", "http 0.2.12", - "hyper 0.14.29", + "hyper 0.14.28", "ip_network", "jsonrpsee", "log", @@ -14294,7 +14294,7 @@ name = "substrate-prometheus-endpoint" version = "0.17.0" source = "git+https://github.com/paritytech/polkadot-sdk?tag=polkadot-v1.13.0#d5160c1d567cc73c7df6c816d41e21aa3adb188d" dependencies = [ - "hyper 0.14.29", + "hyper 0.14.28", "log", "prometheus", "thiserror", From e6e97ccf1dbd1bb83fa81735227f65b167912927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 11:05:41 +0200 Subject: [PATCH 23/27] chore: remove Serhii from owners --- .github/CODEOWNERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3c7a8ca1b..2db762e86 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,3 @@ # Any change in the repo requests review from the dev team -* @cernicc @th7nder @serg-temchenko @jmg-duarte @aidan46 + +- @cernicc @th7nder @jmg-duarte @aidan46 From 17925027f90e5d525b496c21b384129a739b6775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 11:19:36 +0200 Subject: [PATCH 24/27] chore: remove Dockerfile and markdown --- ci/Dockerfile.provider | 35 ------------------------------ ci/PROVIDER.md | 48 ------------------------------------------ 2 files changed, 83 deletions(-) delete mode 100644 ci/Dockerfile.provider delete mode 100644 ci/PROVIDER.md diff --git a/ci/Dockerfile.provider b/ci/Dockerfile.provider deleted file mode 100644 index b94235bbe..000000000 --- a/ci/Dockerfile.provider +++ /dev/null @@ -1,35 +0,0 @@ -################ -##### Chef -FROM rust:1.79 AS chef -RUN cargo install cargo-chef -WORKDIR /app - -################ -##### Planer -FROM chef AS planner -COPY . . -RUN cargo chef prepare --recipe-path recipe.json - -################ -##### Builder -FROM chef AS builder - -RUN apt update && apt upgrade -y -RUN apt install -y protobuf-compiler -RUN apt install -y clang - -# Copy required files -COPY --from=planner /app/recipe.json recipe.json -COPY --from=planner /app/rust-toolchain.toml rust-toolchain.toml -# Build dependencies - this is the caching Docker layer! -RUN cargo chef cook --release --recipe-path recipe.json -# Build application -COPY . . -RUN cargo build --release --bin polka-storage-provider - -################ -##### Runtime -FROM debian:bookworm-slim AS runtime -WORKDIR /app -COPY --from=builder /app/target/release/polka-storage-provider /usr/local/bin -ENTRYPOINT ["/usr/local/bin/polka-storage-provider"] \ No newline at end of file diff --git a/ci/PROVIDER.md b/ci/PROVIDER.md deleted file mode 100644 index dda999972..000000000 --- a/ci/PROVIDER.md +++ /dev/null @@ -1,48 +0,0 @@ -## Build the Docker Image - -To build the Docker image for the provider, execute the following command: - -`docker build -t eiger/provider --file ./ci/Dockerfile.provider .` - -This command uses the Dockerfile located at ./ci/Dockerfile.provider to create an image named `eiger/provider`. - -## Start the Storage Provider Server - -### Create a Docker Volume - -First, create a Docker volume that the storage provider will use to store uploaded files: - -`docker volume create storage_provider` - -This command creates a volume named storage_provider. - -### Start the Storage Server - -Next, start the storage server using the created volume: - -`docker run --net=host --mount source=storage_provider,destination=/app/uploads eiger/provider storage` - -- `--net=host`: Uses the host network, allowing the container to communicate with the host machine. -- `--mount source=storage_provider,destination=/app/uploads`: Mounts the storage_provider volume to /app/uploads inside the container. -- `eiger/provider storage`: Runs the eiger/provider image with the storage command. - -## Upload a file - -To upload a file to the provider's server, use the following curl command. Replace image.jpg with the path to your file: - -`curl -X POST --data-binary "@image.jpg" http://localhost:9000/upload` - -This command uploads the file image.jpg to the server running at http://localhost:9000/upload. The server converts the uploaded content to a CAR file and saves it to the mounted volume. - -## Download the CAR File - -After uploading, you will receive a CID (Content Identifier) for the file. Use this CID to download the corresponding CAR file. Replace `:cid` with the actual CID provided: - -`curl -v -X GET http://localhost:9000/download/:cid --output ./content.car` - -- `-v`: Enables verbose mode, providing detailed information about the request. -- `-X GET`: Specifies the GET request method. -- `http://localhost:9000/download/:cid`: The URL to download the CAR file, with :cid being the placeholder for the actual CID. -- `--output ./content.car`: Saves the downloaded CAR file as content.car in the current directory. - -By following these steps, you can successfully build the Docker image, start the storage provider server, upload a file, and download the CAR file. From 635bad9a0bfec685f996c8adfb014aec1aa2bbc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 13:45:33 +0200 Subject: [PATCH 25/27] fix: pr related changes --- .../src/commands/run.rs | 19 +++++-------------- .../src/commands/storage.rs | 11 ++++++----- cli/polka-storage-provider/src/rpc/server.rs | 8 ++++---- cli/polka-storage-provider/src/storage.rs | 8 ++++---- 4 files changed, 19 insertions(+), 27 deletions(-) diff --git a/cli/polka-storage-provider/src/commands/run.rs b/cli/polka-storage-provider/src/commands/run.rs index 6a6ddc4aa..baf52df92 100644 --- a/cli/polka-storage-provider/src/commands/run.rs +++ b/cli/polka-storage-provider/src/commands/run.rs @@ -1,8 +1,8 @@ -use std::{env, net::SocketAddr, path::PathBuf, sync::Arc}; +use std::{net::SocketAddr, sync::Arc}; use chrono::Utc; use clap::Parser; -use tokio::{signal, sync::broadcast}; +use tokio::{signal, sync::oneshot}; use tracing::info; use url::Url; @@ -15,13 +15,6 @@ use crate::{ /// Default RPC API endpoint used by the parachain node. const FULL_NODE_DEFAULT_RPC_ADDR: &str = "ws://127.0.0.1:9944"; -/// Default storage path. -fn default_storage_path() -> PathBuf { - let mut current_dir = env::current_dir().expect("failed to get current directory"); - current_dir.push("uploads"); - current_dir -} - /// Command to start the storage provider. #[derive(Debug, Clone, Parser)] pub(crate) struct RunCommand { @@ -31,9 +24,6 @@ pub(crate) struct RunCommand { /// Address and port used for RPC server. #[arg(long, default_value = RPC_SERVER_DEFAULT_BIND_ADDR)] pub listen_addr: SocketAddr, - /// Directory where uploaded files are stored. - #[arg(long, default_value = default_storage_path().into_os_string())] - pub storage_dir: PathBuf, } impl RunCommand { @@ -46,13 +36,13 @@ impl RunCommand { }); // Setup shutdown channel - let (notify_shutdown_tx, _) = broadcast::channel(1); + let (notify_shutdown_tx, notify_shutdown_rx) = oneshot::channel(); // Start the server in the background let rpc_handler = tokio::spawn(start_rpc_server( state.clone(), self.listen_addr, - notify_shutdown_tx.subscribe(), + notify_shutdown_rx, )); // Wait for SIGTERM on the main thread and once received "unblock" @@ -61,6 +51,7 @@ impl RunCommand { let _ = notify_shutdown_tx.send(()); // Give server some time to finish + info!("shutting down server, killing it in 10sec"); let _ = tokio::time::timeout(std::time::Duration::from_secs(10), rpc_handler).await; info!("storage provider stopped"); diff --git a/cli/polka-storage-provider/src/commands/storage.rs b/cli/polka-storage-provider/src/commands/storage.rs index 0928e388d..b71130010 100644 --- a/cli/polka-storage-provider/src/commands/storage.rs +++ b/cli/polka-storage-provider/src/commands/storage.rs @@ -1,7 +1,7 @@ use std::{env, net::SocketAddr, path::PathBuf, sync::Arc}; use clap::Parser; -use tokio::{signal, sync::broadcast}; +use tokio::{signal, sync::oneshot}; use tracing::info; use crate::{ @@ -9,7 +9,7 @@ use crate::{ storage::{start_upload_server, StorageServerState}, }; -/// Default storage path. +/// Creates a path relative to the current directory in the format `./uploads` fn default_storage_path() -> PathBuf { let mut current_dir = env::current_dir().expect("failed to get current directory"); current_dir.push("uploads"); @@ -37,13 +37,13 @@ impl StorageCommand { }); // Setup shutdown channel - let (notify_shutdown_tx, _) = broadcast::channel(1); + let (notify_shutdown_tx, notify_shutdown_rx) = oneshot::channel(); // Start the server in the background let upload_handler = tokio::spawn(start_upload_server( state.clone(), self.listen_addr, - notify_shutdown_tx.subscribe(), + notify_shutdown_rx, )); // Wait for SIGTERM on the main thread and once received "unblock" @@ -52,9 +52,10 @@ impl StorageCommand { let _ = notify_shutdown_tx.send(()); // Give uploads some time to finish + info!("shutting down server, killing it in 30sec"); let _ = tokio::time::timeout(std::time::Duration::from_secs(30), upload_handler).await; - info!("storage provider storage stopped"); + info!("storage provider server stopped"); Ok(()) } } diff --git a/cli/polka-storage-provider/src/rpc/server.rs b/cli/polka-storage-provider/src/rpc/server.rs index f474847f3..dde9c62e9 100644 --- a/cli/polka-storage-provider/src/rpc/server.rs +++ b/cli/polka-storage-provider/src/rpc/server.rs @@ -14,7 +14,7 @@ use jsonrpsee::{ RpcModule, }; use serde_json::Value; -use tokio::sync::broadcast::Receiver; +use tokio::sync::oneshot::Receiver; use tracing::{info, instrument}; use super::{ @@ -37,7 +37,7 @@ pub struct RpcServerState { pub async fn start_rpc_server( state: Arc, listen_addr: SocketAddr, - mut notify_shutdown_rx: Receiver<()>, + notify_shutdown_rx: Receiver<()>, ) -> Result<(), CliError> { let server = Server::builder().build(listen_addr).await?; @@ -45,9 +45,9 @@ pub async fn start_rpc_server( let server_handle = server.start(module); info!("RPC server started at {}", listen_addr); - // Wait for shutdown signal. No need to handle the error. We stop ste server + // Wait for shutdown signal. No need to handle the error. We stop the server // in any case. - let _ = notify_shutdown_rx.recv().await; + let _ = notify_shutdown_rx.await; // Stop returns an error if the server has already been stopped. // PRE-COND: the server is only shutdown by receiving from `notify_shutdown_rx` diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index 2e33c0916..8fa6b4977 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -14,7 +14,7 @@ use tempfile::tempdir_in; use tokio::{ fs::{self, File}, io::{AsyncRead, BufWriter}, - sync::broadcast::Receiver, + sync::oneshot::Receiver, }; use tokio_util::io::{ReaderStream, StreamReader}; use tower_http::trace::TraceLayer; @@ -32,7 +32,7 @@ pub struct StorageServerState { pub async fn start_upload_server( state: Arc, listen_addr: SocketAddr, - mut notify_shutdown_rx: Receiver<()>, + notify_shutdown_rx: Receiver<()>, ) -> Result<(), CliError> { // Configure router let router = configure_router(state); @@ -42,14 +42,13 @@ pub async fn start_upload_server( info!("upload server started at: {listen_addr}"); axum::serve(listener, router) .with_graceful_shutdown(async move { - let _ = notify_shutdown_rx.recv().await; + let _ = notify_shutdown_rx.await; }) .await?; Ok(()) } -// TODO(no-ref,@cernicc,28/06/2024): Nicer error handling in handlers fn configure_router(state: Arc) -> Router { Router::new() .route("/upload", post(upload)) @@ -123,6 +122,7 @@ async fn download( // Open car file let Ok(file) = File::open(path).await else { + error!(?path, "failed to open file"); return Err(( StatusCode::INTERNAL_SERVER_ERROR, "failed to open file".to_string(), From 3976c64a007239b600e51e9b0db7e9081ffe6a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 15:50:41 +0200 Subject: [PATCH 26/27] fix: some pr related changes --- cli/polka-storage-provider/src/storage.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index 8fa6b4977..4f5030b90 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -107,10 +107,11 @@ async fn download( Path(cid): Path, ) -> Result { // Path to a CAR file - let Ok(cid) = Cid::from_str(&cid) else { - error!(cid, "cid incorrect format"); - return Err((StatusCode::BAD_REQUEST, "cid incorrect format".to_string())); - }; + let cid = Cid::from_str(&cid).map_err(|e| { + error!(?e, cid, "cid incorrect format"); + (StatusCode::BAD_REQUEST, "cid incorrect format".to_string()) + })?; + let (file_name, path) = content_path(&state.storage_dir, cid); info!(path = %path.display(), "file requested"); @@ -121,13 +122,13 @@ async fn download( } // Open car file - let Ok(file) = File::open(path).await else { - error!(?path, "failed to open file"); - return Err(( + let file = File::open(&path).await.map_err(|e| { + error!(?e, ?path, "failed to open file"); + ( StatusCode::INTERNAL_SERVER_ERROR, "failed to open file".to_string(), - )); - }; + ) + })?; // Convert the `AsyncRead` into a `Stream` let stream = ReaderStream::new(file); @@ -155,7 +156,7 @@ where { // Create a storage folder if it doesn't exist. if !folder.exists() { - info!("creating storage folder: {}", folder.display()); + info!(?folder, "creating storage folder"); fs::create_dir_all(folder).await?; } From 505c11cf8c0d3c1b7bcde73f8880721a9fd4d440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rok=20=C4=8Cerni=C4=8D?= Date: Mon, 8 Jul 2024 15:54:50 +0200 Subject: [PATCH 27/27] fix: some comments --- cli/polka-storage-provider/src/storage.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/polka-storage-provider/src/storage.rs b/cli/polka-storage-provider/src/storage.rs index 4f5030b90..2b55124ce 100644 --- a/cli/polka-storage-provider/src/storage.rs +++ b/cli/polka-storage-provider/src/storage.rs @@ -113,7 +113,7 @@ async fn download( })?; let (file_name, path) = content_path(&state.storage_dir, cid); - info!(path = %path.display(), "file requested"); + info!(?path, "file requested"); // Check if the file exists if !path.exists() { @@ -175,7 +175,7 @@ where // location. let (_, final_content_path) = content_path(folder, cid); fs::rename(temp_file_path, &final_content_path).await?; - info!(location = %final_content_path.display(), "CAR file created"); + info!(?final_content_path, "CAR file created"); Ok(cid) }