diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f8b244b3..40a1e778 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,17 +13,18 @@ permissions: jobs: Test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 + if: false # Enable once Rust 1.85 is released. steps: - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable + - uses: dtolnay/rust-toolchain@1.85 - name: Run Tests run: cargo test - name: Run Tests (release build) run: cargo test --release Nightly: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - uses: actions/checkout@v4 @@ -37,7 +38,7 @@ jobs: - name: Run Tests (nightly feature, release build) run: cargo test --features nightly --release Clippy: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - uses: actions/checkout@v4 @@ -47,7 +48,7 @@ jobs: - name: Run Clippy run: make clippy Docs: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - uses: actions/checkout@v4 @@ -55,7 +56,7 @@ jobs: - name: Check docs run: RUSTDOCFLAGS="-D warnings" cargo doc --no-deps Rustfmt: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - uses: actions/checkout@v4 @@ -65,7 +66,7 @@ jobs: - name: Check formatting run: cargo fmt --all -- --check Sanitizer: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 5 strategy: fail-fast: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b38cfc2..9a40c66e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +# v0.3.0 (unreleased) + +* Complete rewrite of the internals in an attempt to make it easier to port. +* The methods on `Config` are now implementation specific. +* Removed the `AsFd` implementation for `Ring` as that might not always be + possible to provide. +* The `net::SocketAddress` is now implemented for the socket address types for + in the standard library, not in libc. +* `net::Connect` no longer implements `Extract` as all socket address types in + the library are `Copy`. +* The `BufSlice` and `BufMutSlice` types now use `IoSlice` and `IoMutSlice` as + wrapper around `libc::iovec`. +* The `msg` module now uses the `MsgData` type as type for message data, instead + of `u32` (though `MsgData` is also `u32`). +* `process::ReceiveSignals` is now a proper `AsyncIter`. + # v0.2.2 * Fix possible overflow in ReadBuf::release diff --git a/Cargo.toml b/Cargo.toml index 59a053f0..d0dda9dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "a10" description = "A10 is an io_uring library." -version = "0.2.2" +version = "0.3.0" +publish = false # In development. authors = ["Thomas de Zeeuw "] license = "MIT" documentation = "https://docs.rs/a10" @@ -11,6 +12,7 @@ keywords = ["io_uring", "io", "async", "non-blocking"] categories = ["asynchronous"] include = ["/Cargo.toml", "/src/**/*.rs", "/tests/**/*.rs", "/README.md", "/LICENSE"] edition = "2021" +rust-version = "1.85" [features] default = [] @@ -25,6 +27,7 @@ log = { version = "0.4.21", default-features = false, features = ["kv_ [dev-dependencies] std-logger = { version = "0.5.3", default-features = false } +getrandom = { version = "0.2.15", default-features = false } [[test]] name = "signals" diff --git a/LICENSE b/LICENSE index e464117a..38ab878c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2021-2024 Thomas de Zeeuw +Copyright (C) 2021-2025 Thomas de Zeeuw Permission is hereby granted, free of charge, to any person obtaining a copy of diff --git a/Makefile b/Makefile index 761cbe44..0448542c 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ check: # Disabled lints: # * `doc-markdown`: has some annoying false positives. # * `equatable-if-let`: strongly disagree with this lint. +# * `missing-const-for-fn`: has false positives. # * `missing-errors-doc`, `missing-panics-doc`: not worth it. # * `must-use-candidate`: too many bad suggestions. # * `needless-lifetimes`: lifetimes are additional docs. @@ -34,9 +35,7 @@ check: # * `redundant-pub-crate`: useless lint. # * `single-match-else`: prefer match statements over if statements. # * `use-self`: strongly disagree. -# TODO: resolve `cast-possible-truncation` errors. -# TODO: resolve `manual-c-str-listerals`, `ref-as-ptr` and `inspect_err` once -# the related features are a little older (1.77, 1.76 and 1.75). +# TODO: resolve the lints after the empty line. lint: clippy clippy: cargo clippy --all-features --workspace -- \ @@ -50,7 +49,7 @@ clippy: --deny clippy::cargo \ --allow clippy::doc-markdown \ --allow clippy::equatable-if-let \ - --allow clippy::manual-c-str-literals \ + --allow clippy::missing-const-for-fn \ --allow clippy::missing-errors-doc \ --allow clippy::missing-panics-doc \ --allow clippy::must-use-candidate \ @@ -58,16 +57,15 @@ clippy: --allow clippy::new-without-default \ --allow clippy::option-if-let-else \ --allow clippy::redundant-pub-crate \ - --allow clippy::ref-as-ptr \ --allow clippy::single-match-else \ --allow clippy::use-self \ - --allow clippy::manual-inspect \ \ --allow clippy::cast-possible-truncation \ --allow clippy::cast-possible-wrap \ + --allow clippy::cast-sign-loss \ doc: - cargo doc + cargo doc --all-features doc_private: cargo doc --document-private-items diff --git a/examples/http_client.rs b/examples/http_client.rs index 7f62ea6b..e38a3066 100644 --- a/examples/http_client.rs +++ b/examples/http_client.rs @@ -1,5 +1,5 @@ -use std::net::{SocketAddr, SocketAddrV4}; -use std::{env, io, mem, str}; +use std::net::SocketAddr; +use std::{env, io, str}; use a10::net::socket; use a10::{AsyncFd, Ring, SubmissionQueue}; @@ -22,13 +22,8 @@ fn main() -> io::Result<()> { // Get an IPv4 address for the domain (using blocking I/O). let address = std::net::ToSocketAddrs::to_socket_addrs(&addr_host)? - .filter(SocketAddr::is_ipv4) .next() .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "failed to lookup ip"))?; - let address = match address { - SocketAddr::V4(address) => address, - SocketAddr::V6(_) => unreachable!(), - }; // Create our future that makes the request. let request_future = request(ring.submission_queue().clone(), &host, address); @@ -50,7 +45,7 @@ fn main() -> io::Result<()> { } /// Make a HTTP GET request to `address`. -async fn request(sq: SubmissionQueue, host: &str, address: SocketAddrV4) -> io::Result> { +async fn request(sq: SubmissionQueue, host: &str, address: SocketAddr) -> io::Result> { // Create a new TCP, IPv4 socket. let domain = libc::AF_INET; let r#type = libc::SOCK_STREAM | libc::SOCK_CLOEXEC; @@ -59,14 +54,13 @@ async fn request(sq: SubmissionQueue, host: &str, address: SocketAddrV4) -> io:: let socket: AsyncFd = socket(sq, domain, r#type, protocol, flags).await?; // Connect. - let addr = to_sockaddr_storage(address); - socket.connect(addr).await?; + socket.connect(address).await?; // Send a HTTP GET / request to the socket. let host = host.split_once(':').map(|(h, _)| h).unwrap_or(host); let version = env!("CARGO_PKG_VERSION"); let request = format!("GET / HTTP/1.1\r\nHost: {host}\r\nUser-Agent: A10-example/{version}\r\nAccept: */*\r\n\r\n"); - socket.send(request, 0).await?; + socket.send_all(request, 0).await?; // Receiving the response. let recv_buf = socket.recv(Vec::with_capacity(8192), 0).await?; @@ -77,14 +71,3 @@ async fn request(sq: SubmissionQueue, host: &str, address: SocketAddrV4) -> io:: Ok(recv_buf) } - -fn to_sockaddr_storage(addr: SocketAddrV4) -> libc::sockaddr_in { - // SAFETY: a `sockaddr_in` of all zeros is valid. - let mut storage: libc::sockaddr_in = unsafe { mem::zeroed() }; - storage.sin_family = libc::AF_INET as _; - storage.sin_port = addr.port().to_be(); - storage.sin_addr = libc::in_addr { - s_addr: u32::from_ne_bytes(addr.ip().octets()), - }; - storage -} diff --git a/src/cancel.rs b/src/cancel.rs index dd89168c..b27bf204 100644 --- a/src/cancel.rs +++ b/src/cancel.rs @@ -4,13 +4,13 @@ //! [`AsyncFd::cancel_all`] to cancel all operations on a fd. use std::future::Future; -use std::io; use std::pin::Pin; use std::task::{self, Poll}; +use std::{fmt, io}; use crate::fd::{AsyncFd, Descriptor}; -use crate::op::{op_future, poll_state, OpState}; -use crate::{libc, OpIndex, QueueFull, SubmissionQueue}; +use crate::op::{fd_operation, FdOperation, Operation}; +use crate::{sys, OperationId, SubmissionQueue}; /// Cancelation of operations, also see the [`Cancel`] trait to cancel specific /// operations. @@ -33,26 +33,17 @@ impl AsyncFd { /// Due to the lazyness of [`Future`]s it is possible that this will return /// `Ok(0)` if operations were never polled only to start it after their /// first poll. + /// + /// [`Future`]: std::future::Future pub const fn cancel_all<'fd>(&'fd self) -> CancelAll<'fd, D> { - CancelAll::new(self, libc::IORING_ASYNC_CANCEL_ALL) + CancelAll(FdOperation::new(self, (), ())) } } -// CancelAll. -op_future! { - fn AsyncFd::cancel_all -> usize, - struct CancelAll<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: u32, - setup: |submission, fd, (), flags| unsafe { - submission.cancel(fd.fd(), flags | D::cancel_flag()); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, -} +fd_operation!( + /// [`Future`] behind [`AsyncFd::cancel_all`]. + pub struct CancelAll(sys::cancel::CancelAllOp) -> io::Result; +); /// Cancelation of an in progress operations. pub trait Cancel { @@ -67,6 +58,15 @@ pub trait Cancel { /// Cancel this operation. /// + /// Once the returned future is completed it will asynchronously cancel the + /// related operation. This means that it *may* still return results that + /// were created before the operation was actually canceled. + /// + /// For example using a TCP listener and multishot accept it's possible that + /// `MultishotAccept` will return more accepted connections after it's canceled. + /// Simply keep accepting the connections and it will return `None` after all + /// pending connections have been accepted. + /// /// If this returns `ENOENT` it means the operation was not found. This can /// be caused by the operation never starting, due to the inert nature of /// [`Future`]s, or the operation has already been completed. @@ -79,8 +79,9 @@ pub trait Cancel { /// If this is called on an [`AsyncIterator`] it will cause them to return /// `None` (eventually, it may still return pending items). /// + /// [`Future`]: std::future::Future /// [`AsyncIterator`]: std::async_iter::AsyncIterator - fn cancel(&mut self) -> CancelOp; + fn cancel(&mut self) -> CancelOperation; } /// Result of a cancelation attempt. @@ -101,77 +102,46 @@ pub enum CancelResult { } /// [`Future`] behind [`Cancel::cancel`]. -/// -/// Once this future is completed it will asynchronously cancel the related -/// operation. This means that it *may* still return results that were created -/// before the operation was actually canceled. -/// -/// For example using a TCP listener and multishot accept it's possible that -/// `MultishotAccept` will return more accepted connections after it's canceled. -/// Simply keep accepting the connections and it will return `None` after all -/// pending connections have been accepted. -/// -///[`MultishotAccept::cancel`]: crate::net::MultishotAccept::cancel -#[derive(Debug)] #[must_use = "`Future`s do nothing unless polled"] -#[allow(clippy::module_name_repetitions)] // Don't care. -pub struct CancelOp<'fd> { - sq: &'fd SubmissionQueue, - state: OpState>, -} +pub struct CancelOperation(pub(crate) CancelOperationState); -impl<'fd> CancelOp<'fd> { - /// Create a new `CancelOp`. - pub(crate) const fn new(sq: &'fd SubmissionQueue, op_index: Option) -> CancelOp<'fd> { - CancelOp { - sq, - state: OpState::NotStarted(op_index), +impl CancelOperation { + /// Create a new `CancelOperation`. + pub(crate) fn new(sq: SubmissionQueue, op_id: Option) -> CancelOperation { + if let Some(op_id) = op_id { + let operation = Operation::new(sq, (), op_id); + CancelOperation(CancelOperationState::InProgress(operation)) + } else { + CancelOperation(CancelOperationState::Done) } } } -impl<'fd> Future for CancelOp<'fd> { +/// State of `CancelOperation`. +pub(crate) enum CancelOperationState { + /// Cancellation is already done, or the operation was never started. + Done, + /// Cancellation is in progress. + InProgress(Operation), +} + +impl Future for CancelOperation { type Output = io::Result<()>; fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - // NOTE: don't use `poll_state!` here bacause we return `ENOENT` if it - // hasn't started. - let op_index = match self.state { - OpState::Running(op_index) => op_index, - OpState::NotStarted(Some(to_cancel_op_index)) => { - // SAFETY: this will not panic as the resources are only removed - // after the state is set to `Done`. - let result = self - .sq - .add(|submission| unsafe { submission.cancel_op(to_cancel_op_index) }); - match result { - Ok(op_index) => { - self.state = OpState::Running(op_index); - op_index - } - Err(QueueFull(())) => { - self.sq.wait_for_submission(ctx.waker().clone()); - return Poll::Pending; - } - } - } - // If the operation is not started we pretend like we didn't find - // it. - OpState::NotStarted(None) => { - return Poll::Ready(Err(io::Error::from_raw_os_error(libc::ENOENT))) - } - OpState::Done => poll_state!(__panic CancelOp), - }; + match &mut self.0 { + CancelOperationState::Done => Poll::Ready(Ok(())), + CancelOperationState::InProgress(op) => Pin::new(op).poll(ctx), + } + } +} - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, _)) => Poll::Ready(Ok(())), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, +impl fmt::Debug for CancelOperation { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + const NAME: &str = "a10::CancelOperation"; + match &self.0 { + CancelOperationState::Done => f.debug_struct(NAME).field("state", &"done").finish(), + CancelOperationState::InProgress(op) => op.fmt_dbg(NAME, f), } } } diff --git a/src/config.rs b/src/config.rs index 4daca6ad..966713c7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,463 +1,29 @@ -//! Configuration of a [`Ring`]. +//! [`Config`]uration module. -use std::mem::{self, size_of}; -use std::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, OwnedFd}; -use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; -use std::sync::{Arc, Mutex}; -use std::time::Duration; -use std::{io, ptr}; +use std::io; -use crate::{libc, AtomicBitMap, CompletionQueue, Ring, SharedSubmissionQueue, SubmissionQueue}; +use crate::Ring; /// Configuration of a [`Ring`]. /// /// Created by calling [`Ring::config`]. #[derive(Debug, Clone)] #[must_use = "no ring is created until `a10::Config::build` is called"] -#[allow(clippy::struct_excessive_bools)] // This is just stupid. pub struct Config<'r> { - submission_entries: u32, - completion_entries: Option, - disabled: bool, - single_issuer: bool, - defer_taskrun: bool, - clamp: bool, - kernel_thread: bool, - cpu_affinity: Option, - idle_timeout: Option, - direct_descriptors: Option, - attach: Option<&'r SubmissionQueue>, -} - -macro_rules! check_feature { - ($features: expr, $required: ident $(,)?) => {{ - assert!( - $features & libc::$required != 0, - concat!( - "Kernel doesn't have required `", - stringify!($required), - "` feature" - ) - ); - }}; -} - -macro_rules! remove_flag { - ($parameters: ident, $first_err: ident, $err: ident, $( $flag: ident, )+ ) => { - $( - if $parameters.flags & libc::$flag != 0 { - log::debug!(concat!("failed to create io_uring: {}, dropping ", stringify!($flag), " flag and trying again"), $err); - $parameters.flags &= !libc::$flag; - $first_err.get_or_insert($err); - continue; - } - )+ - }; + pub(crate) queued_operations: usize, + /// Implementation specific configuration. + pub(crate) sys: crate::sys::config::Config<'r>, } impl<'r> Config<'r> { - /// Create a new `Config`. - pub(crate) const fn new(entries: u32) -> Config<'r> { - Config { - submission_entries: entries, - completion_entries: None, - disabled: false, - single_issuer: false, - defer_taskrun: false, - clamp: false, - kernel_thread: true, - cpu_affinity: None, - idle_timeout: None, - direct_descriptors: None, - attach: None, - } - } - - /// Start the ring in a disabled state. - /// - /// While the ring is disabled submissions are not allowed. To enable the - /// ring use [`Ring::enable`]. - #[doc(alias = "IORING_SETUP_R_DISABLED")] - pub const fn disable(mut self) -> Config<'r> { - self.disabled = true; - self - } - - /// Enable single issuer. - /// - /// This hints to the kernel that only a single thread will submit requests, - /// which is used for optimisations within the kernel. This means that only - /// the thread that [`build`] the ring or [`enabled`] it (after starting in - /// disable mode) may register resources with the ring, resources such as - /// the [`ReadBufPool`]. - /// - /// This optimisation is enforces by the kernel, which will return `EEXIST` - /// or `AlreadyExists` if another thread attempt to register resource or - /// otherwise use the [`Ring`] in a way that is not allowed. - /// - /// [`build`]: Config::build - /// [`enabled`]: Ring::enable - /// [`ReadBufPool`]: crate::io::ReadBufPool - #[doc(alias = "IORING_SETUP_SINGLE_ISSUER")] - pub const fn single_issuer(mut self) -> Config<'r> { - self.single_issuer = true; - self - } - - /// Defer task running. - /// - /// By default, kernel will process all outstanding work at the end of any - /// system call or thread interrupt. This can delay the application from - /// making other progress. - /// - /// Enabling this option will hint to kernel that it should defer work until - /// [`Ring::poll`] is called. This way the work is done in the - /// [`Ring::poll`]. - /// - /// This options required [`Config::single_issuer`] to be set. This option - /// does not work with [`Config::with_kernel_thread`] set. - #[doc(alias = "IORING_SETUP_DEFER_TASKRUN")] - pub const fn defer_task_run(mut self) -> Config<'r> { - self.defer_taskrun = true; - self - } - - /// Set the size of the completion queue. - /// - /// By default the kernel will use a completion queue twice as large as the - /// submission queue (`entries` in the call to [`Ring::config`]). - /// - /// Uses `IORING_SETUP_CQSIZE`, added in Linux kernel 5.5. - #[doc(alias = "IORING_SETUP_CQSIZE")] - pub const fn with_completion_queue_size(mut self, entries: u32) -> Self { - self.completion_entries = Some(entries); - self - } - - /// Clamp queue sizes to the maximum. - /// - /// The maximum queue sizes aren't exposed by the kernel, making this the - /// only way (currently) to get the largest possible queues. - /// - /// Uses `IORING_SETUP_CLAMP`, added in Linux kernel 5.6. - #[doc(alias = "IORING_SETUP_CLAMP")] - pub const fn clamp_queue_sizes(mut self) -> Self { - self.clamp = true; - self - } - - /// Start a kernel thread polling the [`Ring`]. - /// - /// When this option is enabled a kernel thread is created to perform - /// submission queue polling. This allows issuing I/O without ever context - /// switching into the kernel. - /// - /// # Notes - /// - /// When setting this to false it significantly changes the way A10 works. - /// With this disabled you need to call [`Ring::poll`] to *submit* I/O work, - /// with this enables this is done by the kernel thread. That means that if - /// multiple threads use the same [`SubmissionQueue`] their submissions - /// might not actually be submitted until `Ring::poll` is called. - #[doc(alias = "IORING_SETUP_SQPOLL")] - pub const fn with_kernel_thread(mut self, enabled: bool) -> Self { - self.kernel_thread = enabled; - self - } - - /// Set the CPU affinity of kernel thread polling the [`Ring`]. - /// - /// Only works in combination with [`Config::with_kernel_thread`]. - #[doc(alias = "IORING_SETUP_SQ_AFF")] - #[doc(alias = "sq_thread_cpu")] - pub const fn with_cpu_affinity(mut self, cpu: u32) -> Self { - self.cpu_affinity = Some(cpu); - self - } - - /// Set the idle timeout of the kernel thread polling the submission queue. - /// After `timeout` time has passed after the last I/O submission the kernel - /// thread will go to sleep. If the I/O is kept busy the kernel thread will - /// never sleep. Note that A10 will ensure the kernel thread is woken up - /// when more submissions are added. - /// - /// The accuracy of `timeout` is only in milliseconds, anything more precise - /// will be discarded. - #[doc(alias = "sq_thread_idle")] - pub const fn with_idle_timeout(mut self, timeout: Duration) -> Self { - let ms = timeout.as_millis(); - let ms = if ms <= u32::MAX as u128 { - // SAFETY: just check above that `millis` is less then `u32::MAX` - ms as u32 - } else { - u32::MAX - }; - self.idle_timeout = Some(ms); - self - } - - /// Enable direct descriptors. - /// - /// This registers a sparse array of `size` direct descriptor slots enabling - /// direct descriptors to be used. If this is not used attempts to create a - /// direct descriptor will result in `ENXIO`. - /// - /// By default direct descriptors are not enabled. - #[doc(alias = "IORING_REGISTER_FILES")] - #[doc(alias = "IORING_REGISTER_FILES2")] - #[doc(alias = "IORING_RSRC_REGISTER_SPARSE")] - pub const fn with_direct_descriptors(mut self, size: u32) -> Self { - self.direct_descriptors = Some(size); - self - } - - /// Attach the new (to be created) ring to `other_ring`. - /// - /// This will cause the `Ring` being created to share the asynchronous - /// worker thread backend of the specified `other_ring`, rather than create - /// a new separate thread pool. - /// - /// Uses `IORING_SETUP_ATTACH_WQ`, added in Linux kernel 5.6. - #[doc(alias = "IORING_SETUP_ATTACH_WQ")] - pub const fn attach(self, other_ring: &'r Ring) -> Self { - self.attach_queue(other_ring.submission_queue()) - } - - /// Same as [`Config::attach`], but accepts a [`SubmissionQueue`]. - #[doc(alias = "IORING_SETUP_ATTACH_WQ")] - pub const fn attach_queue(mut self, other_ring: &'r SubmissionQueue) -> Self { - self.attach = Some(other_ring); - self - } - /// Build a new [`Ring`]. + #[doc(alias = "kqueue")] #[doc(alias = "io_uring_setup")] pub fn build(self) -> io::Result { - // SAFETY: all zero is valid for `io_uring_params`. - let mut parameters: libc::io_uring_params = unsafe { mem::zeroed() }; - parameters.flags = libc::IORING_SETUP_SUBMIT_ALL; // Submit all submissions on error. - if self.kernel_thread { - parameters.flags |= libc::IORING_SETUP_SQPOLL; // Kernel thread for polling. - } else { - // Don't interrupt userspace, the user must call `Ring::poll` any way. - parameters.flags |= libc::IORING_SETUP_COOP_TASKRUN; - } - if self.disabled { - // Start the ring in disabled mode. - parameters.flags |= libc::IORING_SETUP_R_DISABLED; - } - if self.single_issuer { - // Only allow access from a single thread. - parameters.flags |= libc::IORING_SETUP_SINGLE_ISSUER; - } - if self.defer_taskrun { - parameters.flags |= libc::IORING_SETUP_DEFER_TASKRUN; - } - if let Some(completion_entries) = self.completion_entries { - parameters.cq_entries = completion_entries; - parameters.flags |= libc::IORING_SETUP_CQSIZE; - } - if self.clamp { - parameters.flags |= libc::IORING_SETUP_CLAMP; - } - if let Some(cpu) = self.cpu_affinity { - parameters.flags |= libc::IORING_SETUP_SQ_AFF; - parameters.sq_thread_cpu = cpu; - } - if let Some(idle_timeout) = self.idle_timeout { - parameters.sq_thread_idle = idle_timeout; - } - #[allow(clippy::cast_sign_loss)] // File descriptors are always positive. - if let Some(other_ring) = self.attach { - parameters.wq_fd = other_ring.shared.ring_fd.as_raw_fd() as u32; - parameters.flags |= libc::IORING_SETUP_ATTACH_WQ; - } - - let mut first_err = None; - let fd = loop { - match libc::syscall!(io_uring_setup(self.submission_entries, &mut parameters)) { - // SAFETY: just created the fd (and checked the error). - Ok(fd) => break unsafe { OwnedFd::from_raw_fd(fd) }, - Err(err) => { - if let io::ErrorKind::InvalidInput = err.kind() { - // We set some flags which are not strictly required by - // A10, but provide various benefits. However in doing - // so we also increases our minimal supported Kernel - // version. - // Here we remove the flags one by one and try again. - // NOTE: this is mainly done to support the CI, which - // currently uses Linux 5.15. - remove_flag!( - parameters, - first_err, - err, - IORING_SETUP_SUBMIT_ALL, // 5.18. - IORING_SETUP_COOP_TASKRUN, // 5.19. - IORING_SETUP_SINGLE_ISSUER, // 6.0. - ); - } - return Err(first_err.unwrap_or(err)); - } - }; - }; - check_feature!(parameters.features, IORING_FEAT_NODROP); // Never drop completions. - check_feature!(parameters.features, IORING_FEAT_SUBMIT_STABLE); // All data for async offload must be consumed. - check_feature!(parameters.features, IORING_FEAT_RW_CUR_POS); // Allow -1 as current position. - check_feature!(parameters.features, IORING_FEAT_SQPOLL_NONFIXED); // No need for fixed files. - - let cq = mmap_completion_queue(fd.as_fd(), ¶meters)?; - let sq = mmap_submission_queue(fd, ¶meters)?; - - if let Some(size) = self.direct_descriptors { - let register = libc::io_uring_rsrc_register { - flags: libc::IORING_RSRC_REGISTER_SPARSE, - nr: size, - resv2: 0, - data: 0, - tags: 0, - }; - sq.register( - libc::IORING_REGISTER_FILES2, - (®ister as *const libc::io_uring_rsrc_register).cast(), - size_of::() as _, - )?; - } - - Ok(Ring { cq, sq }) - } -} - -/// Memory-map the submission queue. -fn mmap_submission_queue( - ring_fd: OwnedFd, - parameters: &libc::io_uring_params, -) -> io::Result { - let size = parameters.sq_off.array + parameters.sq_entries * (size_of::() as u32); - - let submission_queue = mmap( - size as usize, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_SHARED | libc::MAP_POPULATE, - ring_fd.as_raw_fd(), - libc::off_t::from(libc::IORING_OFF_SQ_RING), - )?; - - let submission_queue_entries = mmap( - parameters.sq_entries as usize * size_of::(), - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_SHARED | libc::MAP_POPULATE, - ring_fd.as_raw_fd(), - libc::off_t::from(libc::IORING_OFF_SQES), - ) - .map_err(|err| { - _ = munmap(submission_queue, size as usize); // Can't handle two errors. - err - })?; - - let op_indices = AtomicBitMap::new(parameters.cq_entries as usize); - let mut queued_ops = Vec::with_capacity(op_indices.capacity()); - queued_ops.resize_with(queued_ops.capacity(), || Mutex::new(None)); - let queued_ops = queued_ops.into_boxed_slice(); - - #[allow(clippy::mutex_integer)] // For `array_index`, need to the lock for more. - unsafe { - Ok(SubmissionQueue { - shared: Arc::new(SharedSubmissionQueue { - ring_fd, - ptr: submission_queue, - size, - // Fields are constant, so we load them once. - len: load_atomic_u32(submission_queue.add(parameters.sq_off.ring_entries as usize)), - ring_mask: load_atomic_u32( - submission_queue.add(parameters.sq_off.ring_mask as usize), - ), - kernel_thread: (parameters.flags & libc::IORING_SETUP_SQPOLL) != 0, - is_polling: AtomicBool::new(false), - op_indices, - queued_ops, - blocked_futures: Mutex::new(Vec::new()), - pending_tail: AtomicU32::new(0), - // Fields are shared with the kernel. - kernel_read: submission_queue.add(parameters.sq_off.head as usize).cast(), - flags: submission_queue - .add(parameters.sq_off.flags as usize) - .cast(), - entries: submission_queue_entries.cast(), - array_index: Mutex::new(0), - array: submission_queue - .add(parameters.sq_off.array as usize) - .cast(), - array_tail: submission_queue.add(parameters.sq_off.tail as usize).cast(), - }), - }) - } -} - -/// Memory-map the completion queue. -fn mmap_completion_queue( - ring_fd: BorrowedFd<'_>, - parameters: &libc::io_uring_params, -) -> io::Result { - let size = - parameters.cq_off.cqes + parameters.cq_entries * (size_of::() as u32); - - let completion_queue = mmap( - size as usize, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_SHARED | libc::MAP_POPULATE, - ring_fd.as_raw_fd(), - libc::off_t::from(libc::IORING_OFF_CQ_RING), - )?; - - unsafe { - Ok(CompletionQueue { - ptr: completion_queue, - size, - // Fields are constant, so we load them once. - /* NOTE: usunused. - len: load_atomic_u32(completion_queue.add(parameters.cq_off.ring_entries as usize)), - */ - ring_mask: load_atomic_u32(completion_queue.add(parameters.cq_off.ring_mask as usize)), - // Fields are shared with the kernel. - head: completion_queue.add(parameters.cq_off.head as usize).cast(), - tail: completion_queue.add(parameters.cq_off.tail as usize).cast(), - entries: completion_queue.add(parameters.cq_off.cqes as usize).cast(), - }) + // NOTE: defined in the implementation specific configuration code. + let queued_operations = self.queued_operations; + let (submissions, shared, completions) = self.build_sys()?; + let ring = Ring::build(submissions, shared, completions, queued_operations); + Ok(ring) } } - -/// `mmap(2)` wrapper that also sets `MADV_DONTFORK`. -fn mmap( - len: libc::size_t, - prot: libc::c_int, - flags: libc::c_int, - fd: libc::c_int, - offset: libc::off_t, -) -> io::Result<*mut libc::c_void> { - let addr = match unsafe { libc::mmap(ptr::null_mut(), len, prot, flags, fd, offset) } { - libc::MAP_FAILED => return Err(io::Error::last_os_error()), - addr => addr, - }; - - match unsafe { libc::madvise(addr, len, libc::MADV_DONTFORK) } { - 0 => Ok(addr), - _ => { - let err = io::Error::last_os_error(); - _ = munmap(addr, len); // Can't handle two errors. - Err(err) - } - } -} - -/// `munmap(2)` wrapper. -pub(crate) fn munmap(addr: *mut libc::c_void, len: libc::size_t) -> io::Result<()> { - match unsafe { libc::munmap(addr, len) } { - 0 => Ok(()), - _ => Err(io::Error::last_os_error()), - } -} - -/// Load a `u32` using relaxed ordering from `ptr`. -unsafe fn load_atomic_u32(ptr: *mut libc::c_void) -> u32 { - (*ptr.cast::()).load(Ordering::Relaxed) -} diff --git a/src/cq.rs b/src/cq.rs new file mode 100644 index 00000000..1d213d6e --- /dev/null +++ b/src/cq.rs @@ -0,0 +1,159 @@ +//! Completion Queue. + +use std::cmp::min; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::time::Duration; +use std::{fmt, io, mem}; + +use crate::{Implementation, OperationId, SharedState, NO_COMPLETION_ID, WAKE_ID}; + +/// Queue of completion events. +pub(crate) struct Queue { + completions: I::Completions, + shared: Arc>, +} + +impl Queue { + pub(crate) const fn new(completions: I::Completions, shared: Arc>) -> Queue { + Queue { + completions, + shared, + } + } + + pub(crate) fn poll(&mut self, timeout: Option) -> io::Result<()> { + self.shared.is_polling.store(true, Ordering::Release); + let result = self.completions.poll(&self.shared.data, timeout); + self.shared.is_polling.store(false, Ordering::Release); + let completions = result?; + + for completion in completions { + let id = completion.id(); + log::trace!(id = id, completion:? = completion; "dequeued completion"); + let Some(queued_op) = self.shared.queued_ops.get(id) else { + if id == WAKE_ID { + /* Wake up only. */ + } else if id == NO_COMPLETION_ID { + log::warn!(id = id; "operation without completion failed"); + } else { + log::trace!(id = id; "got completion for unknown operation"); + } + continue; + }; + + let mut queued_op = queued_op.lock().unwrap(); + let Some(op) = &mut *queued_op else { + log::debug!(id = id; "operation gone, but got completion event"); + continue; + }; + + log::trace!(id = id; "updating operation"); + let more_events = completion.update_state(&mut op.state); + op.done = !more_events; + if op.dropped && op.done { + // The Future was previously dropped so no one is waiting on the + // result. We can make the slot avaiable again. + *queued_op = None; + drop(queued_op); + log::trace!(id = id; "marking slot as available"); + self.shared.op_ids.make_available(id); + } else { + log::trace!(id = id; "waking future"); + op.waker.wake_by_ref(); + } + } + + self.wake_blocked_futures(); + Ok(()) + } + + /// Wake any futures that were blocked on a submission slot. + // Work around . + #[allow(clippy::iter_with_drain, clippy::needless_pass_by_ref_mut)] + fn wake_blocked_futures(&mut self) { + let queue_space = self.completions.queue_space(&self.shared.data); + if queue_space == 0 { + return; + } + + let mut blocked_futures = self.shared.blocked_futures.lock().unwrap(); + if blocked_futures.is_empty() { + return; + } + + let mut wakers = mem::take(&mut *blocked_futures); + drop(blocked_futures); // Unblock other threads. + for waker in wakers.drain(..min(queue_space, wakers.len())) { + waker.wake(); + } + + // Reuse allocation. + let mut blocked_futures = self.shared.blocked_futures.lock().unwrap(); + mem::swap(&mut *blocked_futures, &mut wakers); + drop(blocked_futures); + // In case any wakers where added wake those as well. + for waker in wakers { + waker.wake(); + } + } + + pub(crate) fn shared(&self) -> &SharedState { + &self.shared + } +} + +impl fmt::Debug for Queue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("cq::Queue") + .field("completions", &self.completions) + .field("shared", &self.shared) + .finish() + } +} + +/// Poll for completition events. +pub(crate) trait Completions: fmt::Debug { + /// Data shared between the submission and completion queues. + type Shared: fmt::Debug + Sized; + + /// Completiton [`Event`] (ce). + type Event: Event + Sized; + + /// Poll for new completion events. + fn poll<'a>( + &'a mut self, + shared: &Self::Shared, + timeout: Option, + ) -> io::Result>; + + /// Return the currently available queue space. May return `usize::MAX` is + /// there is no (practical) limit. + /// + /// This value may be outdated due to concurrent access. + fn queue_space(&mut self, shared: &Self::Shared) -> usize; +} + +/// Completition event. +pub(crate) trait Event: fmt::Debug { + /// State of an operation. + type State: OperationState; + + /// Identifier of the operation. + fn id(&self) -> OperationId; + + /// Update the state of the operation. + /// + /// Returns a boolean indicating if more events are expected for the same + /// operation id. + fn update_state(&self, state: &mut Self::State) -> bool; +} + +/// State of an operation. +pub(crate) trait OperationState: fmt::Debug { + /// Create a queued operation. + fn new() -> Self; + + /// Create a queued multishot operation. + fn new_multishot() -> Self; +} diff --git a/src/drop_waker.rs b/src/drop_waker.rs index 51c9571e..894c7496 100644 --- a/src/drop_waker.rs +++ b/src/drop_waker.rs @@ -4,9 +4,10 @@ use std::cell::UnsafeCell; use std::ffi::CString; -use std::ptr; -use std::sync::Arc; -use std::task; +use std::{ptr, task}; + +use crate::io::{Buffer, ReadBufPool}; +use crate::net::AddressStorage; /// Create a [`task::Waker`] that will drop itself when the waker is dropped. /// @@ -14,10 +15,6 @@ use std::task; /// /// The returned `task::Waker` cannot be cloned, it will panic. pub(crate) unsafe fn drop_task_waker(to_drop: T) -> task::Waker { - unsafe fn drop_by_ptr(data: *const ()) { - T::drop_from_waker_data(data); - } - // SAFETY: we meet the `task::Waker` and `task::RawWaker` requirements. unsafe { task::Waker::from_raw(task::RawWaker::new( @@ -25,9 +22,9 @@ pub(crate) unsafe fn drop_task_waker(to_drop: T) -> task::Waker { &task::RawWakerVTable::new( |_| panic!("attempted to clone `a10::drop_task_waker`"), // SAFETY: `wake` takes ownership, so dropping is safe. - drop_by_ptr::, + T::drop_from_waker_data, |_| { /* `wake_by_ref` is a no-op. */ }, - drop_by_ptr::, + T::drop_from_waker_data, ), )) } @@ -42,10 +39,7 @@ pub(crate) trait DropWake { unsafe fn drop_from_waker_data(data: *const ()); } -impl DropWake for UnsafeCell -where - T: DropWake, -{ +impl DropWake for UnsafeCell { fn into_waker_data(self) -> *const () { self.into_inner().into_waker_data() } @@ -55,55 +49,84 @@ where } } -impl DropWake for (T,) -where - T: DropWake, -{ +impl DropWake for Box { + fn into_waker_data(self) -> *const () { + Box::into_raw(self).cast() + } + + unsafe fn drop_from_waker_data(data: *const ()) { + drop(Box::::from_raw(data.cast_mut().cast())); + } +} + +impl DropWake for CString { + fn into_waker_data(self) -> *const () { + CString::into_raw(self).cast() + } + + unsafe fn drop_from_waker_data(data: *const ()) { + drop(CString::from_raw(data.cast_mut().cast())); + } +} + +impl DropWake for AddressStorage> { fn into_waker_data(self) -> *const () { self.0.into_waker_data() } unsafe fn drop_from_waker_data(data: *const ()) { - T::drop_from_waker_data(data); + Box::::drop_from_waker_data(data); + } +} + +impl DropWake for ReadBufPool { + fn into_waker_data(self) -> *const () { + unsafe { ReadBufPool::into_raw(self) } + } + + unsafe fn drop_from_waker_data(data: *const ()) { + drop(ReadBufPool::from_raw(data)); } } +// Don't need to be deallocated. + impl DropWake for () { fn into_waker_data(self) -> *const () { ptr::null() } - unsafe fn drop_from_waker_data(_: *const ()) { - // Nothing. - } + unsafe fn drop_from_waker_data(_: *const ()) {} } -impl DropWake for Box { +// Uses a `Box` to get a single pointer. + +impl DropWake for (T, U) { fn into_waker_data(self) -> *const () { - Box::into_raw(self).cast() + Box::from(self).into_waker_data() } unsafe fn drop_from_waker_data(data: *const ()) { - drop(Box::::from_raw(data.cast_mut().cast())); + Box::<(T, U)>::drop_from_waker_data(data); } } -impl DropWake for Arc { +impl DropWake for (T, U, V) { fn into_waker_data(self) -> *const () { - Arc::into_raw(self).cast() + Box::from(self).into_waker_data() } unsafe fn drop_from_waker_data(data: *const ()) { - drop(Arc::::from_raw(data.cast_mut().cast())); + Box::<(T, U)>::drop_from_waker_data(data); } } -impl DropWake for CString { +impl DropWake for Buffer { fn into_waker_data(self) -> *const () { - CString::into_raw(self).cast() + Box::::from(self.buf).into_waker_data() } unsafe fn drop_from_waker_data(data: *const ()) { - drop(CString::from_raw(data.cast_mut().cast())); + Box::::drop_from_waker_data(data); } } diff --git a/src/extract.rs b/src/extract.rs index d5b806a2..f13aad36 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -2,7 +2,7 @@ //! //! See the [`Extract`] trait for more information. -use crate::cancel::{Cancel, CancelOp, CancelResult}; +use crate::cancel::{Cancel, CancelOperation, CancelResult}; /// Extract input arguments from operations. /// @@ -100,7 +100,7 @@ impl Cancel for Extractor { self.fut.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.fut.cancel() } } diff --git a/src/fd.rs b/src/fd.rs index 228a77a0..18abe334 100644 --- a/src/fd.rs +++ b/src/fd.rs @@ -7,9 +7,10 @@ use std::mem::ManuallyDrop; use std::os::fd::{AsFd, AsRawFd, BorrowedFd, FromRawFd, OwnedFd, RawFd}; use std::{fmt, io}; -use crate::op::{op_future, Submission}; -use crate::SubmissionQueue; -use crate::libc::{self, syscall}; +use crate::{syscall, SubmissionQueue}; + +#[cfg(any(target_os = "android", target_os = "linux"))] +pub use crate::sys::fd::{Direct, ToDirect, ToFd}; /// An open file descriptor. /// @@ -39,9 +40,10 @@ use crate::libc::{self, syscall}; pub struct AsyncFd { /// # Notes /// - /// We use `ManuallyDrop` because we drop the fd using io_uring, not a - /// blocking `close(2)` system call. + /// We use `ManuallyDrop` because we drop the fd using an asynchronous + /// operation, not a blocking `close(2)` system call. fd: ManuallyDrop, + // NOTE: public because it's used by the crate::io::Std{in,out,error}. pub(crate) sq: SubmissionQueue, kind: PhantomData, } @@ -80,54 +82,6 @@ impl AsyncFd { let fd = self.fd.try_clone()?; Ok(AsyncFd::new(fd, self.sq.clone())) } - - /// Convert a regular file descriptor into a direct descriptor. - /// - /// The file descriptor can continued to be used and the lifetimes of the - /// file descriptor and the newly returned direct descriptor are not - /// connected. - /// - /// # Notes - /// - /// The [`Ring`] must be configured [`with_direct_descriptors`] enabled, - /// otherwise this will return `ENXIO`. - /// - /// [`Ring`]: crate::Ring - /// [`with_direct_descriptors`]: crate::Config::with_direct_descriptors - #[doc(alias = "IORING_OP_FILES_UPDATE")] - #[doc(alias = "IORING_FILE_INDEX_ALLOC")] - pub fn to_direct_descriptor<'fd>(&'fd self) -> ToDirect<'fd, File> { - ToDirect::new(self, Box::new(self.fd()), ()) - } -} - -/// Operations only available on direct descriptors. -impl AsyncFd { - /// Create a new `AsyncFd` from a direct descriptor. - /// - /// # Safety - /// - /// The caller must ensure that `direct_fd` is valid and that it's no longer - /// used by anything other than the returned `AsyncFd`. Furthermore the - /// caller must ensure the direct descriptor is actually a direct - /// descriptor. - pub(crate) unsafe fn from_direct_fd(direct_fd: RawFd, sq: SubmissionQueue) -> AsyncFd { - AsyncFd::from_raw(direct_fd, sq) - } - - /// Convert a direct descriptor into a regular file descriptor. - /// - /// The direct descriptor can continued to be used and the lifetimes of the - /// direct descriptor and the newly returned file descriptor are not - /// connected. - /// - /// # Notes - /// - /// Requires Linux 6.8. - #[doc(alias = "IORING_OP_FIXED_FD_INSTALL")] - pub const fn to_file_descriptor<'fd>(&'fd self) -> ToFd<'fd, Direct> { - ToFd::new(self, ()) - } } impl AsyncFd { @@ -153,6 +107,11 @@ impl AsyncFd { pub(crate) fn fd(&self) -> RawFd { self.fd.as_raw_fd() } + + /// Returns the `SubmissionQueue` of this `AsyncFd`. + pub(crate) const fn sq(&self) -> &SubmissionQueue { + &self.sq + } } impl AsFd for AsyncFd { @@ -161,21 +120,13 @@ impl AsFd for AsyncFd { } } +impl Unpin for AsyncFd {} + impl fmt::Debug for AsyncFd { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - struct AsyncFdSubmissionQueue<'a>(&'a SubmissionQueue); - - impl fmt::Debug for AsyncFdSubmissionQueue<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SubmissionQueue") - .field("ring_fd", &self.0.shared.ring_fd.as_raw_fd()) - .finish() - } - } - f.debug_struct("AsyncFd") .field("fd", &self.fd()) - .field("sq", &AsyncFdSubmissionQueue(&self.sq)) + .field("sq", &"SubmissionQueue") .field("kind", &D::fmt_dbg()) .finish() } @@ -183,17 +134,24 @@ impl fmt::Debug for AsyncFd { impl Drop for AsyncFd { fn drop(&mut self) { - let result = self.sq.add_no_result(|submission| unsafe { - submission.close(self.fd()); - submission.no_completion_event(); - D::use_flags(submission); - }); - if let Err(err) = result { - log::warn!("error submitting close operation for a10::AsyncFd: {err}"); - if let Err(err) = D::close(self.fd()) { - log::warn!("error closing a10::AsyncFd: {err}"); + // Try to asynchronously close the desctiptor (if the OS supports it). + #[cfg(any(target_os = "android", target_os = "linux"))] + { + let result = self.sq.inner.submit_no_completion(|submission| { + D::close_flags(self.fd(), submission); + }); + match result { + Ok(()) => return, + Err(crate::sq::QueueFull) => { + log::warn!("error submitting close operation for a10::AsyncFd, queue is full"); + } } } + + // Fall back to synchronously closing the descriptor. + if let Err(err) = D::close(self.fd()) { + log::warn!("error closing a10::AsyncFd: {err}"); + } } } @@ -202,17 +160,15 @@ impl Drop for AsyncFd { pub trait Descriptor: private::Descriptor {} pub(crate) mod private { - use std::os::fd::RawFd; use std::io; - - use crate::op::Submission; + use std::os::fd::RawFd; pub(crate) trait Descriptor { /// Set any additional flags in `submission` when using the descriptor. - fn use_flags(submission: &mut Submission); + fn use_flags(submission: &mut crate::sys::Submission); /// Set any additional flags in `submission` when creating the descriptor. - fn create_flags(submission: &mut Submission); + fn create_flags(submission: &mut crate::sys::Submission); /// Return the equivalant of `O_CLOEXEC` for the descripor. fn cloexec_flag() -> libc::c_int; @@ -224,6 +180,10 @@ pub(crate) mod private { /// Debug representation of the descriptor. fn fmt_dbg() -> &'static str; + /// Set flags in `submission` to close the descriptor. + fn close_flags(fd: RawFd, submission: &mut crate::sys::Submission); + + /// Synchronously close the file descriptor. fn close(fd: RawFd) -> io::Result<()>; } } @@ -235,11 +195,11 @@ pub enum File {} impl Descriptor for File {} impl private::Descriptor for File { - fn use_flags(_: &mut Submission) { + fn use_flags(_: &mut crate::sys::Submission) { // No additional flags needed. } - fn create_flags(_: &mut Submission) { + fn create_flags(_: &mut crate::sys::Submission) { // No additional flags needed. } @@ -255,87 +215,12 @@ impl private::Descriptor for File { "file descriptor" } - fn close(fd: RawFd) -> io::Result<()> { - syscall!(close(fd))?; - Ok(()) - } -} - -/// Direct descriptors are io_uring private file descriptors. -/// -/// They avoid some of the overhead associated with thread shared file tables -/// and can be used in any io_uring request that takes a file descriptor. -/// However they cannot be used outside of io_uring. -#[derive(Copy, Clone, Debug)] -pub enum Direct {} - -impl Descriptor for Direct {} - -impl private::Descriptor for Direct { - fn use_flags(submission: &mut Submission) { - submission.use_direct_fd(); - } - - fn create_flags(submission: &mut Submission) { - submission.create_direct_fd(); - } - - fn cloexec_flag() -> libc::c_int { - 0 // Direct descriptor always have (the equivalant of) `O_CLOEXEC` set. - } - - fn cancel_flag() -> u32 { - libc::IORING_ASYNC_CANCEL_FD_FIXED - } - - fn fmt_dbg() -> &'static str { - "direct descriptor" + fn close_flags(fd: RawFd, submission: &mut crate::sys::Submission) { + crate::sys::io::close_file_fd(fd, submission); } fn close(fd: RawFd) -> io::Result<()> { - // TODO: don't leak the the fd. - log::warn!("leaking direct descriptor {fd}"); + syscall!(close(fd))?; Ok(()) } } - -// ToFd. -op_future! { - fn AsyncFd::to_file_descriptor -> AsyncFd, - struct ToFd<'fd> { - // No state needed. - }, - setup_state: _unused: (), - setup: |submission, fd, (), ()| unsafe { - // NOTE: flags must currently be null. - submission.create_file_descriptor(fd.fd(), 0); - }, - map_result: |this, (), fd| { - let sq = this.fd.sq.clone(); - // SAFETY: the fixed fd intall operation ensures that `fd` is valid. - let fd = unsafe { AsyncFd::from_raw_fd(fd, sq) }; - Ok(fd) - }, -} - -// ToDirect. -// NOTE: keep this in sync with the `process::ToSignalsDirect` implementation. -op_future! { - fn AsyncFd::to_direct_descriptor -> AsyncFd, - struct ToDirect<'fd> { - /// The file descriptor we're changing into a direct descriptor, needs - /// to stay in memory so the kernel can access it safely. - direct_fd: Box, - }, - setup_state: _unused: (), - setup: |submission, fd, (direct_fd,), ()| unsafe { - submission.create_direct_descriptor(&mut **direct_fd, 1); - }, - map_result: |this, (direct_fd,), res| { - debug_assert!(res == 1); - let sq = this.fd.sq.clone(); - // SAFETY: the files update operation ensures that `direct_fd` is valid. - let fd = unsafe { AsyncFd::from_direct_fd(*direct_fd, sq) }; - Ok(fd) - }, -} diff --git a/src/fs.rs b/src/fs.rs index a9e5ffdb..6b6fd8c5 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -3,23 +3,17 @@ //! To open a file ([`AsyncFd`]) use [`open_file`] or [`OpenOptions`]. use std::ffi::{CString, OsString}; -use std::future::Future; -use std::marker::PhantomData; -use std::mem::zeroed; use std::os::unix::ffi::OsStringExt; use std::path::PathBuf; -use std::pin::Pin; -use std::task::{self, Poll}; use std::time::{Duration, SystemTime}; -use std::{fmt, io, str}; +use std::{fmt, io, mem, str}; -use crate::extract::Extractor; use crate::fd::{AsyncFd, Descriptor, File}; -use crate::op::{op_future, poll_state, OpState}; -use crate::{libc, man_link, Extract, SubmissionQueue}; +use crate::op::{fd_operation, operation, FdOperation, Operation}; +use crate::{man_link, sys, SubmissionQueue}; /// Flags needed to fill [`Metadata`]. -const METADATA_FLAGS: u32 = libc::STATX_TYPE +pub(crate) const METADATA_FLAGS: u32 = libc::STATX_TYPE | libc::STATX_MODE | libc::STATX_SIZE | libc::STATX_BLOCKS @@ -178,12 +172,8 @@ impl OpenOptions { #[doc = man_link!(openat(2))] #[doc(alias = "openat")] pub fn open(self, sq: SubmissionQueue, path: PathBuf) -> Open { - Open { - path: Some(path_to_cstring(path)), - sq: Some(sq), - state: OpState::NotStarted((self.flags | D::cloexec_flag(), self.mode)), - kind: PhantomData, - } + let args = (self.flags | D::cloexec_flag(), self.mode); + Open(Operation::new(sq, path_to_cstring(path), args)) } } @@ -193,104 +183,60 @@ pub fn open_file(sq: SubmissionQueue, path: PathBuf) -> Open { OpenOptions::new().read().open(sq, path) } -/// [`Future`] to [`open`] an asynchronous file ([`AsyncFd`]). -/// -/// [`open`]: OpenOptions::open -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct Open { - /// Path used to open the file, need to stay in memory so the kernel can - /// access it safely. - // SAFETY: because this is not modified by the kernel it doesn't need an - // UnsafeCell. It is read-only (as the kernel also has read access). - path: Option, - sq: Option, - state: OpState<(libc::c_int, libc::mode_t)>, - kind: PhantomData, +/// Creates a new, empty directory. +#[doc = man_link!(mkdirat(2))] +pub fn create_dir(sq: SubmissionQueue, path: PathBuf) -> CreateDir { + CreateDir(Operation::new(sq, path_to_cstring(path), ())) } -impl Future for Open { - type Output = io::Result>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let op_index = poll_state!( - Open, - self.state, - self.sq.as_ref().unwrap(), - ctx, - |submission, (flags, mode)| unsafe { - // SAFETY: `path` is only removed after the state is set to `Done`. - let path = self.path.as_ref().unwrap(); - submission.open_at(libc::AT_FDCWD, path.as_ptr(), flags, mode); - D::create_flags(submission); - } - ); - - match self.sq.as_ref().unwrap().poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, fd)) => Poll::Ready(Ok(unsafe { - // SAFETY: the open operation ensures that `fd` is valid. - // SAFETY: unwrapped `sq` above already. - AsyncFd::from_raw(fd, self.sq.take().unwrap()) - })), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } +/// Rename a file or directory to a new name. +#[doc = man_link!(rename(2))] +pub fn rename(sq: SubmissionQueue, from: PathBuf, to: PathBuf) -> Rename { + let resources = (path_to_cstring(from), path_to_cstring(to)); + Rename(Operation::new(sq, resources, ())) } -impl Extract for Open {} - -impl Future for Extractor> { - type Output = io::Result<(AsyncFd, PathBuf)>; +/// Remove a file. +#[doc = man_link!(unlinkat(2))] +#[doc(alias = "unlink")] +#[doc(alias = "unlinkat")] +pub fn remove_file(sq: SubmissionQueue, path: PathBuf) -> Delete { + Delete(Operation::new(sq, path_to_cstring(path), RemoveFlag::File)) +} - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - match Pin::new(&mut self.fut).poll(ctx) { - Poll::Ready(Ok(file)) => { - let path = path_from_cstring(self.fut.path.take().unwrap()); - Poll::Ready(Ok((file, path))) - } - Poll::Ready(Err(err)) => Poll::Ready(Err(err)), - Poll::Pending => Poll::Pending, - } - } +/// Remove a directory. +#[doc = man_link!(unlinkat(2))] +#[doc(alias = "rmdir")] +#[doc(alias = "unlinkat")] +pub fn remove_dir(sq: SubmissionQueue, path: PathBuf) -> Delete { + let path = path_to_cstring(path); + Delete(Operation::new(sq, path, RemoveFlag::Directory)) } -impl Drop for Open { - fn drop(&mut self) { - if let Some(path) = self.path.take() { - match self.state { - OpState::Running(op_index) => { - // SAFETY: only when the `Future` completed is `self.sq` - // `None`, but in that case `self.path` would also be - // `None`. - let sq = self.sq.as_ref().unwrap(); - let result = sq.cancel_op( - op_index, - || path, - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!( - "dropped a10::Open before completion, attempt to cancel failed: {err}" - ); - } - } - OpState::NotStarted(_) | OpState::Done => drop(path), - } - } - } +#[derive(Copy, Clone, Debug)] +pub(crate) enum RemoveFlag { + File, + Directory, } +operation!( + /// [`Future`] behind [`OpenOptions::open`] and [`open_file`]. + pub struct Open(sys::fs::OpenOp) -> io::Result>, + impl Extract -> io::Result<(AsyncFd, PathBuf)>; + + /// [`Future`] behind [`create_dir`]. + pub struct CreateDir(sys::fs::CreateDirOp) -> io::Result<()>, + impl Extract -> io::Result; + + /// [`Future`] behind [`rename`]. + pub struct Rename(sys::fs::RenameOp) -> io::Result<()>, + impl Extract -> io::Result<(PathBuf, PathBuf)>; + + /// [`Future`] behind [`remove_file`] and [`remove_dir`]. + pub struct Delete(sys::fs::DeleteOp) -> io::Result<()>, + impl Extract -> io::Result; +); + /// File(system) related system calls. impl AsyncFd { /// Sync all OS-internal metadata to disk. @@ -301,7 +247,7 @@ impl AsyncFd { #[doc = man_link!(fsync(2))] #[doc(alias = "fsync")] pub const fn sync_all<'fd>(&'fd self) -> SyncData<'fd, D> { - SyncData::new(self, 0) + SyncData(FdOperation::new(self, (), SyncDataFlag::All)) } /// This function is similar to [`sync_all`], except that it may not @@ -319,18 +265,16 @@ impl AsyncFd { #[doc = man_link!(fsync(2))] #[doc(alias = "fdatasync")] pub const fn sync_data<'fd>(&'fd self) -> SyncData<'fd, D> { - SyncData::new(self, libc::IORING_FSYNC_DATASYNC) + SyncData(FdOperation::new(self, (), SyncDataFlag::Data)) } /// Retrieve metadata about the file. #[doc = man_link!(statx(2))] #[doc(alias = "statx")] pub fn metadata<'fd>(&'fd self) -> Stat<'fd, D> { - let metadata = Box::new(Metadata { - // SAFETY: all zero values are valid representations. - inner: unsafe { zeroed() }, - }); - Stat::new(self, metadata, ()) + // SAFETY: fully zeroed `libc::statx` is a valid value. + let metadata = unsafe { Box::new(mem::zeroed()) }; + Stat(FdOperation::new(self, metadata, ())) } /// Predeclare an access pattern for file data. @@ -351,7 +295,7 @@ impl AsyncFd { length: u32, advice: libc::c_int, ) -> Advise<'fd, D> { - Advise::new(self, (offset, length, advice)) + Advise(FdOperation::new(self, (), (offset, length, advice))) } /// Manipulate file space. @@ -367,7 +311,7 @@ impl AsyncFd { length: u32, mode: libc::c_int, ) -> Allocate<'fd, D> { - Allocate::new(self, (offset, length, mode)) + Allocate(FdOperation::new(self, (), (offset, length, mode))) } /// Truncate the file to `length`. @@ -378,88 +322,32 @@ impl AsyncFd { #[doc = man_link!(ftruncate(2))] #[doc(alias = "ftruncate")] pub const fn truncate<'fd>(&'fd self, length: u64) -> Truncate<'fd, D> { - Truncate::new(self, length) + Truncate(FdOperation::new(self, (), length)) } } -// SyncData. -op_future! { - fn AsyncFd::sync_all -> (), - struct SyncData<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: u32, - setup: |submission, fd, (), flags| unsafe { - submission.fsync(fd.fd(), flags); - }, - map_result: |n| Ok(debug_assert!(n == 0)), +#[derive(Copy, Clone, Debug)] +pub(crate) enum SyncDataFlag { + All, + Data, } -// Metadata. -op_future! { - fn AsyncFd::metadata -> Box, - struct Stat<'fd> { - /// Buffer to write the statx data into. - metadata: Box, - }, - setup_state: _unused: (), - setup: |submission, fd, (metadata,), ()| unsafe { - submission.statx_file(fd.fd(), &mut metadata.inner, METADATA_FLAGS); - }, - map_result: |this, (metadata,), n| { - debug_assert!(n == 0); - debug_assert!(metadata.inner.stx_mask & METADATA_FLAGS == METADATA_FLAGS); - Ok(metadata) - }, -} +fd_operation!( + /// [`Future`] behind [`AsyncFd::sync_all`] and [`AsyncFd::sync_data`]. + pub struct SyncData(sys::fs::SyncDataOp) -> io::Result<()>; -// Advise. -op_future! { - fn AsyncFd::advise -> (), - struct Advise<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: (u64, u32, libc::c_int), - setup: |submission, fd, (), (offset, length, advise)| unsafe { - submission.fadvise(fd.fd(), offset, length, advise); - }, - map_result: |this, (), res| { - debug_assert!(res == 0); - Ok(()) - }, -} + /// [`Future`] behind [`AsyncFd::metadata`]. + pub struct Stat(sys::fs::StatOp) -> io::Result; -// Allocate. -op_future! { - fn AsyncFd::allocate -> (), - struct Allocate<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: (u64, u32, libc::c_int), - setup: |submission, fd, (), (offset, length, mode)| unsafe { - submission.fallocate(fd.fd(), offset, length, mode); - }, - map_result: |this, (), res| { - debug_assert!(res == 0); - Ok(()) - }, -} + /// [`Future`] behind [`AsyncFd::advise`]. + pub struct Advise(sys::fs::AdviseOp) -> io::Result<()>; -// Truncate. -op_future! { - fn AsyncFd::truncate -> (), - struct Truncate<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: u64, - setup: |submission, fd, (), length| unsafe { - submission.ftruncate(fd.fd(), length); - }, - map_result: |this, (), res| { - debug_assert!(res == 0); - Ok(()) - }, -} + /// [`Future`] behind [`AsyncFd::allocate`]. + pub struct Allocate(sys::fs::AllocateOp) -> io::Result<()>; + + /// [`Future`] behind [`AsyncFd::truncate`]. + pub struct Truncate(sys::fs::TruncateOp) -> io::Result<()>; +); /// Metadata information about a file. /// @@ -470,6 +358,10 @@ pub struct Metadata { } impl Metadata { + pub(crate) const fn mask(&self) -> u32 { + self.inner.stx_mask + } + /// Returns the file type for this metadata. pub const fn file_type(&self) -> FileType { FileType(self.inner.stx_mode) @@ -743,313 +635,10 @@ impl fmt::Debug for Permissions { } } -/// Creates a new, empty directory. -#[doc = man_link!(mkdirat(2))] -pub fn create_dir(sq: SubmissionQueue, path: PathBuf) -> CreateDir { - CreateDir { - sq, - path: Some(path_to_cstring(path)), - state: OpState::NotStarted(()), - } -} - -/// [`Future`] to [create a directory]. -/// -/// [create a directory]: create_dir -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct CreateDir { - sq: SubmissionQueue, - /// Path used to create the directory, need to stay in memory so the kernel - /// can access it safely. - // SAFETY: because this is not modified by the kernel it doesn't need an - // UnsafeCell. It is read-only (as the kernel also has read access). - path: Option, - state: OpState<()>, -} - -impl Future for CreateDir { - type Output = io::Result<()>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - #[rustfmt::skip] - let op_index = poll_state!(CreateDir, self.state, self.sq, ctx, |submission, ()| unsafe { - // SAFETY: `path` is only removed after the state is set to `Done`. - let path = self.path.as_ref().unwrap(); - let mode = 0o777; // Same as used by the standard library. - submission.mkdirat(libc::AT_FDCWD, path.as_ptr(), mode); - }); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, res)) => Poll::Ready(Ok(debug_assert!(res == 0))), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } -} - -impl Extract for CreateDir {} - -impl Future for Extractor { - type Output = io::Result; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - match Pin::new(&mut self.fut).poll(ctx) { - Poll::Ready(Ok(())) => { - let path = path_from_cstring(self.fut.path.take().unwrap()); - Poll::Ready(Ok(path)) - } - Poll::Ready(Err(err)) => Poll::Ready(Err(err)), - Poll::Pending => Poll::Pending, - } - } -} - -impl Drop for CreateDir { - fn drop(&mut self) { - if let Some(path) = self.path.take() { - match self.state { - OpState::Running(op_index) => { - let result = self.sq.cancel_op( - op_index, - || path, - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!("dropped a10::CreateDir before completion, attempt to cancel failed: {err}"); - } - } - OpState::NotStarted(()) | OpState::Done => drop(path), - } - } - } -} - -/// Rename a file or directory to a new name. -#[doc = man_link!(rename(2))] -pub fn rename(sq: SubmissionQueue, from: PathBuf, to: PathBuf) -> Rename { - Rename { - sq, - from: Some(path_to_cstring(from)), - to: Some(path_to_cstring(to)), - state: OpState::NotStarted(()), - } -} - -/// [`Future`] to [`rename`] a file. -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct Rename { - sq: SubmissionQueue, - /// Paths used to rename the file, need to stay in memory so the kernel can - /// access it safely. - // SAFETY: because this is not modified by the kernel it doesn't need an - // UnsafeCell. It is read-only (as the kernel also has read access). - from: Option, - to: Option, - state: OpState<()>, -} - -impl Future for Rename { - type Output = io::Result<()>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let op_index = poll_state!(Rename, self.state, self.sq, ctx, |submission, ()| unsafe { - // SAFETY: `from` and `to` are only removed after the state is set to `Done`. - let from = self.from.as_ref().unwrap(); - let to = self.to.as_ref().unwrap(); - submission.rename( - libc::AT_FDCWD, - from.as_ptr(), - libc::AT_FDCWD, - to.as_ptr(), - 0, - ); - }); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, res)) => Poll::Ready(Ok(debug_assert!(res == 0))), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } -} - -impl Extract for Rename {} - -impl Future for Extractor { - type Output = io::Result<(PathBuf, PathBuf)>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - match Pin::new(&mut self.fut).poll(ctx) { - Poll::Ready(Ok(())) => { - let from = path_from_cstring(self.fut.from.take().unwrap()); - let to = path_from_cstring(self.fut.to.take().unwrap()); - Poll::Ready(Ok((from, to))) - } - Poll::Ready(Err(err)) => Poll::Ready(Err(err)), - Poll::Pending => Poll::Pending, - } - } -} - -impl Drop for Rename { - fn drop(&mut self) { - if let Some(from) = self.from.take() { - // SAFETY: if `from` is `Some`, so is `to` as we extract both or - // neither. - let to = self.to.take().unwrap(); - - match self.state { - OpState::Running(op_index) => { - let result = self.sq.cancel_op( - op_index, - || Box::from((from, to)), - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!("dropped a10::CreateDir before completion, attempt to cancel failed: {err}"); - } - } - OpState::NotStarted(()) | OpState::Done => { - drop(from); - drop(to); - } - } - } - } -} - -/// Remove a file. -#[doc = man_link!(unlinkat(2))] -#[doc(alias = "unlink")] -#[doc(alias = "unlinkat")] -pub fn remove_file(sq: SubmissionQueue, path: PathBuf) -> Delete { - Delete { - sq, - path: Some(path_to_cstring(path)), - state: OpState::NotStarted(0), - } -} - -/// Remove a directory. -#[doc = man_link!(unlinkat(2))] -#[doc(alias = "rmdir")] -#[doc(alias = "unlinkat")] -pub fn remove_dir(sq: SubmissionQueue, path: PathBuf) -> Delete { - Delete { - sq, - path: Some(path_to_cstring(path)), - state: OpState::NotStarted(libc::AT_REMOVEDIR), - } -} - -/// [`Future`] to remove a [file] or [directory]. -/// -/// [file]: remove_file -/// [directory]: remove_dir -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct Delete { - sq: SubmissionQueue, - /// Paths used to rename the file, need to stay in memory so the kernel can - /// access it safely. - // SAFETY: because this is not modified by the kernel it doesn't need an - // UnsafeCell. It is read-only (as the kernel also has read access). - path: Option, - state: OpState, -} - -impl Future for Delete { - type Output = io::Result<()>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - #[rustfmt::skip] - let op_index = poll_state!(Delete, self.state, self.sq, ctx, |submission, flags| unsafe { - // SAFETY: `path` is only removed after the state is set to `Done`. - let path = self.path.as_ref().unwrap(); - submission.unlinkat(libc::AT_FDCWD, path.as_ptr(), flags); - }); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, res)) => Poll::Ready(Ok(debug_assert!(res == 0))), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } -} - -impl Extract for Delete {} - -impl Future for Extractor { - type Output = io::Result; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - match Pin::new(&mut self.fut).poll(ctx) { - Poll::Ready(Ok(())) => { - let path = path_from_cstring(self.fut.path.take().unwrap()); - Poll::Ready(Ok(path)) - } - Poll::Ready(Err(err)) => Poll::Ready(Err(err)), - Poll::Pending => Poll::Pending, - } - } -} - -impl Drop for Delete { - fn drop(&mut self) { - if let Some(path) = self.path.take() { - match self.state { - OpState::Running(op_index) => { - let result = self.sq.cancel_op( - op_index, - || path, - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!("dropped a10::CreateDir before completion, attempt to cancel failed: {err}"); - } - } - OpState::NotStarted(_) | OpState::Done => drop(path), - } - } - } -} - fn path_to_cstring(path: PathBuf) -> CString { unsafe { CString::from_vec_unchecked(path.into_os_string().into_vec()) } } -fn path_from_cstring(path: CString) -> PathBuf { +pub(crate) fn path_from_cstring(path: CString) -> PathBuf { OsString::from_vec(path.into_bytes()).into() } diff --git a/src/io/mod.rs b/src/io/mod.rs index 35467377..e7b1ce60 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -5,81 +5,86 @@ //! descriptor ([`AsyncFd`]). Additionally the [`BufSlice`] and [`BufMutSlice`] //! traits existing to define the behaviour of buffers in vectored I/O. //! -//! A specialised read buffer pool implementation exists in [`ReadBufPool`], -//! which is a buffer pool managed by the kernel when making `read(2)`-like -//! system calls. +//! A specialised io_uring-only read buffer pool implementation exists in +//! [`ReadBufPool`], which is a buffer pool managed by the kernel when making +//! `read(2)`-like system calls. //! //! Finally we have the [`stdin`], [`stdout`] and [`stderr`] functions to create //! `AsyncFd`s for standard in, out and error respectively. -// This is not ideal. -// This should only be applied to `ReadVectored` and `WriteVectored` as they use -// `libc::iovec` internally, which is `!Send`, while it actually is `Send`. -#![allow(clippy::non_send_fields_in_send_ty)] - use std::future::Future; -use std::marker::PhantomData; -use std::mem::{ManuallyDrop, MaybeUninit}; -use std::os::fd::RawFd; +use std::mem::ManuallyDrop; +use std::os::fd::{AsRawFd, BorrowedFd}; use std::pin::Pin; use std::task::{self, Poll}; use std::{io, ptr}; -use crate::cancel::{Cancel, CancelOp, CancelResult}; +use crate::cancel::{Cancel, CancelOperation, CancelResult}; use crate::extract::{Extract, Extractor}; use crate::fd::{AsyncFd, Descriptor, File}; -use crate::op::{op_future, poll_state, OpState, NO_OFFSET}; -use crate::{libc, man_link, SubmissionQueue}; +use crate::op::{fd_operation, operation, FdOperation, Operation}; +use crate::{man_link, sys}; mod read_buf; -#[doc(hidden)] -pub use read_buf::{BufGroupId, BufIdx}; -pub use read_buf::{ReadBuf, ReadBufPool}; +mod traits; +pub use read_buf::{ReadBuf, ReadBufPool}; +pub use traits::{Buf, BufMut, BufMutSlice, BufSlice, IoMutSlice, IoSlice}; +#[allow(unused_imports)] // Not used by all OS. +pub(crate) use traits::{BufGroupId, BufId}; // Re-export so we don't have to worry about import `std::io` and `crate::io`. pub(crate) use std::io::*; +/// The io_uring_enter(2) manual says for IORING_OP_READ and IORING_OP_WRITE: +/// > If offs is set to -1, the offset will use (and advance) the file +/// > position, like the read(2) and write(2) system calls. +/// +/// `-1` cast as `unsigned long long` in C is the same as as `u64::MAX`. +pub(crate) const NO_OFFSET: u64 = u64::MAX; + +/// Create a function and type to wraps standard {in,out,error}. macro_rules! stdio { ( $fn: ident () -> $name: ident, $fd: expr ) => { - #[doc = concat!("Create a new `", stringify!($name), "`.\n\n")] + #[doc = concat!("Create a new [`", stringify!($name), "`].")] pub fn $fn(sq: $crate::SubmissionQueue) -> $name { - unsafe { $name(std::mem::ManuallyDrop::new($crate::AsyncFd::from_raw_fd($fd, sq))) } + unsafe { $name(::std::mem::ManuallyDrop::new($crate::fd::AsyncFd::from_raw_fd($fd, sq))) } } #[doc = concat!( "An [`AsyncFd`] for ", stringify!($fn), ".\n\n", + "Created by calling [`", stringify!($fn), "`].\n\n", "# Notes\n\n", "This directly writes to the raw file descriptor, which means it's not buffered and will not flush anything buffered by the standard library.\n\n", "When this type is dropped it will not close ", stringify!($fn), ".", )] - pub struct $name(std::mem::ManuallyDrop<$crate::AsyncFd>); + pub struct $name(::std::mem::ManuallyDrop<$crate::fd::AsyncFd>); - impl std::ops::Deref for $name { - type Target = $crate::AsyncFd; + impl ::std::ops::Deref for $name { + type Target = $crate::fd::AsyncFd; fn deref(&self) -> &Self::Target { &self.0 } } - impl std::fmt::Debug for $name { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct(stringify!($name)) + impl ::std::fmt::Debug for $name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct(::std::stringify!($name)) .field("fd", &*self.0) .finish() } } - impl std::ops::Drop for $name { + impl ::std::ops::Drop for $name { fn drop(&mut self) { // We don't want to close the file descriptor, but we do need to // drop our reference to the submission queue. // SAFETY: with `ManuallyDrop` we don't drop the `AsyncFd` so // it's not dropped twice. Otherwise we get access to it using // safe methods. - unsafe { std::ptr::drop_in_place(&mut self.0.sq) }; + unsafe { ::std::ptr::drop_in_place(&mut self.0.sq) }; } } }; @@ -105,11 +110,13 @@ impl AsyncFd { /// The current file cursor is not affected by this function. This means /// that a call `read_at(buf, 1024)` with a buffer of 1kb will **not** /// continue reading at 2kb in the next call to `read`. + #[doc = man_link!(pread(2))] pub const fn read_at<'fd, B>(&'fd self, buf: B, offset: u64) -> Read<'fd, B, D> where B: BufMut, { - Read::new(self, buf, offset) + let buf = Buffer { buf }; + Read(FdOperation::new(self, buf, offset)) } /// Read at least `n` bytes from this fd into `buf`. @@ -127,7 +134,12 @@ impl AsyncFd { where B: BufMut, { - ReadN::new(self, buf, offset, n) + let buf = ReadNBuf { buf, last_read: 0 }; + ReadN { + read: self.read_at(buf, offset), + offset, + left: n, + } } /// Read from this fd into `bufs`. @@ -142,6 +154,7 @@ impl AsyncFd { /// Read from this fd into `bufs` starting at `offset`. /// /// The current file cursor is not affected by this function. + #[doc = man_link!(preadv(2))] pub fn read_vectored_at<'fd, B, const N: usize>( &'fd self, mut bufs: B, @@ -150,8 +163,8 @@ impl AsyncFd { where B: BufMutSlice, { - let iovecs = unsafe { bufs.as_iovecs_mut() }; - ReadVectored::new(self, bufs, iovecs, offset) + let iovecs = Box::new(unsafe { bufs.as_iovecs_mut() }); + ReadVectored(FdOperation::new(self, (bufs, iovecs), offset)) } /// Read at least `n` bytes from this fd into `bufs`. @@ -178,7 +191,15 @@ impl AsyncFd { where B: BufMutSlice, { - ReadNVectored::new(self, bufs, offset, n) + let bufs = ReadNBuf { + buf: bufs, + last_read: 0, + }; + ReadNVectored { + read: self.read_vectored_at(bufs, offset), + offset, + left: n, + } } /// Write `buf` to this fd. @@ -197,7 +218,8 @@ impl AsyncFd { where B: Buf, { - Write::new(self, buf, offset) + let buf = Buffer { buf }; + Write(FdOperation::new(self, buf, offset)) } /// Write all of `buf` to this fd. @@ -215,7 +237,13 @@ impl AsyncFd { where B: Buf, { - WriteAll::new(self, buf, offset) + let buf = SkipBuf { buf, skip: 0 }; + WriteAll { + write: Extractor { + fut: self.write_at(buf, offset), + }, + offset, + } } /// Write `bufs` to this file. @@ -238,8 +266,8 @@ impl AsyncFd { where B: BufSlice, { - let iovecs = unsafe { bufs.as_iovecs() }; - WriteVectored::new(self, bufs, iovecs, offset) + let iovecs = Box::new(unsafe { bufs.as_iovecs() }); + WriteVectored(FdOperation::new(self, (bufs, iovecs), offset)) } /// Write all `bufs` to this file. @@ -264,7 +292,11 @@ impl AsyncFd { where B: BufSlice, { - WriteAllVectored::new(self, bufs, offset) + WriteAllVectored { + write: self.write_vectored_at(bufs, offset).extract(), + offset, + skip: 0, + } } /// Splice `length` bytes to `target` fd. @@ -272,9 +304,9 @@ impl AsyncFd { /// See the `splice(2)` manual for correct usage. #[doc = man_link!(splice(2))] #[doc(alias = "splice")] - pub const fn splice_to<'fd>( + pub fn splice_to<'fd>( &'fd self, - target: RawFd, + target: BorrowedFd<'fd>, length: u32, flags: libc::c_int, ) -> Splice<'fd, D> { @@ -284,10 +316,10 @@ impl AsyncFd { /// Same as [`AsyncFd::splice_to`], but starts reading data at `offset` from /// the file (instead of the current position of the read cursor) and starts /// writing at `target_offset` to `target`. - pub const fn splice_to_at<'fd>( + pub fn splice_to_at<'fd>( &'fd self, offset: u64, - target: RawFd, + target: BorrowedFd<'fd>, target_offset: u64, length: u32, flags: libc::c_int, @@ -306,9 +338,9 @@ impl AsyncFd { /// /// See the `splice(2)` manual for correct usage. #[doc(alias = "splice")] - pub const fn splice_from<'fd>( + pub fn splice_from<'fd>( &'fd self, - target: RawFd, + target: BorrowedFd<'fd>, length: u32, flags: libc::c_int, ) -> Splice<'fd, D> { @@ -319,10 +351,10 @@ impl AsyncFd { /// to the file (instead of the current position of the write cursor) and /// starts reading at `target_offset` from `target`. #[doc(alias = "splice")] - pub const fn splice_from_at<'fd>( + pub fn splice_from_at<'fd>( &'fd self, offset: u64, - target: RawFd, + target: BorrowedFd<'fd>, target_offset: u64, length: u32, flags: libc::c_int, @@ -337,16 +369,18 @@ impl AsyncFd { ) } - const fn splice<'fd>( + fn splice<'fd>( &'fd self, - target: RawFd, + target: BorrowedFd<'fd>, direction: SpliceDirection, off_in: u64, off_out: u64, length: u32, flags: libc::c_int, ) -> Splice<'fd, D> { - Splice::new(self, (target, direction, off_in, off_out, length, flags)) + let target_fd = target.as_raw_fd(); + let args = (target_fd, direction, off_in, off_out, length, flags); + Splice(FdOperation::new(self, (), args)) } /// Explicitly close the file descriptor. @@ -364,67 +398,51 @@ impl AsyncFd { // not touching `this` after reading the fields. let fd = this.fd(); let sq = unsafe { ptr::read(&this.sq) }; - - Close { - sq, - state: OpState::NotStarted(fd), - kind: PhantomData, - } + Close(Operation::new(sq, (), fd)) } } -// Read. -op_future! { - fn AsyncFd::read -> B, - struct Read<'fd, B: BufMut> { - /// Buffer to write into, needs to stay in memory so the kernel can - /// access it safely. - buf: B, - }, - drop_using: Box, - setup_state: offset: u64, - setup: |submission, fd, (buf,), offset| unsafe { - let (ptr, len) = buf.parts_mut(); - submission.read_at(fd.fd(), ptr, len, offset); - if let Some(buf_group) = buf.buffer_group() { - submission.set_buffer_select(buf_group.0); - } - }, - map_result: |this, (mut buf,), buf_idx, n| { - // SAFETY: the kernel initialised the bytes for us as part of the read - // call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { buf.buffer_init(BufIdx(buf_idx), n as u32) }; - Ok(buf) - }, +#[derive(Copy, Clone, Debug)] +pub(crate) enum SpliceDirection { + To, + From, } -/// [`Future`] behind [`AsyncFd::read_n`]. +fd_operation!( + /// [`Future`] behind [`AsyncFd::read`] and [`AsyncFd::read_at`]. + pub struct Read(sys::io::ReadOp) -> io::Result; + + /// [`Future`] behind [`AsyncFd::read_vectored`] and [`AsyncFd::read_vectored_at`]. + pub struct ReadVectored; const N: usize>(sys::io::ReadVectoredOp) -> io::Result; + + /// [`Future`] behind [`AsyncFd::write`] and [`AsyncFd::write_at`]. + pub struct Write(sys::io::WriteOp) -> io::Result, + impl Extract -> io::Result<(B, usize)>; + + /// [`Future`] behind [`AsyncFd::write_vectored`] and [`AsyncFd::write_vectored_at`]. + pub struct WriteVectored; const N: usize>(sys::io::WriteVectoredOp) -> io::Result, + impl Extract -> io::Result<(B, usize)>; + + /// [`Future`] behind [`AsyncFd::splice_to`], [`AsyncFd::splice_to_at`], + /// [`AsyncFd::splice_from`] and [`AsyncFd::splice_from_at`]. + pub struct Splice(sys::io::SpliceOp) -> io::Result; +); + +/// [`Future`] behind [`AsyncFd::read_n`] and [`AsyncFd::read_n_at`]. #[derive(Debug)] -pub struct ReadN<'fd, B, D: Descriptor = File> { +pub struct ReadN<'fd, B: BufMut, D: Descriptor = File> { read: Read<'fd, ReadNBuf, D>, offset: u64, - /// Number of bytes we still need to read to hit our target `N`. + /// Number of bytes we still need to read to hit our minimum. left: usize, } -impl<'fd, B: BufMut, D: Descriptor> ReadN<'fd, B, D> { - const fn new(fd: &'fd AsyncFd, buf: B, offset: u64, n: usize) -> ReadN<'fd, B, D> { - let buf = ReadNBuf { buf, last_read: 0 }; - ReadN { - read: fd.read_at(buf, offset), - offset, - left: n, - } - } -} - -impl<'fd, B, D: Descriptor> Cancel for ReadN<'fd, B, D> { +impl<'fd, B: BufMut, D: Descriptor> Cancel for ReadN<'fd, B, D> { fn try_cancel(&mut self) -> CancelResult { self.read.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.read.cancel() } } @@ -433,7 +451,7 @@ impl<'fd, B: BufMut, D: Descriptor> Future for ReadN<'fd, B, D> { type Output = io::Result; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - // SAFETY: not moving `Future`. + // SAFETY: not moving `self`. let this = unsafe { Pin::into_inner_unchecked(self) }; let mut read = unsafe { Pin::new_unchecked(&mut this.read) }; match read.as_mut().poll(ctx) { @@ -452,7 +470,7 @@ impl<'fd, B: BufMut, D: Descriptor> Future for ReadN<'fd, B, D> { this.offset += buf.last_read as u64; } - read.set(read.fd.read_at(buf, this.offset)); + read.set(read.0.fd().read_at(buf, this.offset)); unsafe { Pin::new_unchecked(this) }.poll(ctx) } Poll::Ready(Err(err)) => Poll::Ready(Err(err)), @@ -461,70 +479,21 @@ impl<'fd, B: BufMut, D: Descriptor> Future for ReadN<'fd, B, D> { } } -// ReadVectored. -op_future! { - fn AsyncFd::read_vectored -> B, - struct ReadVectored<'fd, B: BufMutSlice; const N: usize> { - /// Buffers to write into, needs to stay in memory so the kernel can - /// access it safely. - bufs: B, - /// Buffer references used by the kernel. - /// - /// NOTE: we only need these iovecs in the submission, we don't have to - /// keep around during the operation. Because of this we don't heap - /// allocate it like we for other operations. This leaves a small - /// duration between the submission of the entry and the submission - /// being read by the kernel in which this future could be dropped and - /// the kernel will read memory we don't own. However because we wake - /// the kernel after submitting the timeout entry it's not really worth - /// to heap allocation. - iovecs: [libc::iovec; N], - }, - drop_using: Box, - /// `iovecs` can't move until the kernel has read the submission. - impl !Unpin, - setup_state: offset: u64, - setup: |submission, fd, (_bufs, iovecs), offset| unsafe { - submission.read_vectored_at(fd.fd(), iovecs, offset); - }, - map_result: |this, (mut bufs, _iovecs), _flags, n| { - // SAFETY: the kernel initialised the bytes for us as part of the read - // call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { bufs.set_init(n as usize) }; - Ok(bufs) - }, -} - -/// [`Future`] behind [`AsyncFd::read_n_vectored`]. +/// [`Future`] behind [`AsyncFd::read_n_vectored`] and [`AsyncFd::read_n_vectored_at`]. #[derive(Debug)] -pub struct ReadNVectored<'fd, B, const N: usize, D: Descriptor = File> { +pub struct ReadNVectored<'fd, B: BufMutSlice, const N: usize, D: Descriptor = File> { read: ReadVectored<'fd, ReadNBuf, N, D>, offset: u64, - /// Number of bytes we still need to read to hit our target `N`. + /// Number of bytes we still need to read to hit our minimum. left: usize, } -impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> ReadNVectored<'fd, B, N, D> { - fn new(fd: &'fd AsyncFd, bufs: B, offset: u64, n: usize) -> ReadNVectored<'fd, B, N, D> { - let bufs = ReadNBuf { - buf: bufs, - last_read: 0, - }; - ReadNVectored { - read: fd.read_vectored_at(bufs, offset), - offset, - left: n, - } - } -} - -impl<'fd, B, const N: usize, D: Descriptor> Cancel for ReadNVectored<'fd, B, N, D> { +impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> Cancel for ReadNVectored<'fd, B, N, D> { fn try_cancel(&mut self) -> CancelResult { self.read.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.read.cancel() } } @@ -552,7 +521,7 @@ impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> Future for ReadNVect this.offset += bufs.last_read as u64; } - read.set(read.fd.read_vectored_at(bufs, this.offset)); + read.set(read.0.fd().read_vectored_at(bufs, this.offset)); unsafe { Pin::new_unchecked(this) }.poll(ctx) } Poll::Ready(Err(err)) => Poll::Ready(Err(err)), @@ -561,81 +530,15 @@ impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> Future for ReadNVect } } -/// Wrapper around a buffer `B` to keep track of the number of bytes written, +/// [`Future`] behind [`AsyncFd::write_all`] and [`AsyncFd::write_all_at`]. #[derive(Debug)] -pub(crate) struct ReadNBuf { - pub(crate) buf: B, - pub(crate) last_read: usize, -} - -unsafe impl BufMut for ReadNBuf { - unsafe fn parts_mut(&mut self) -> (*mut u8, u32) { - self.buf.parts_mut() - } - - unsafe fn set_init(&mut self, n: usize) { - self.last_read = n; - self.buf.set_init(n); - } -} - -unsafe impl, const N: usize> BufMutSlice for ReadNBuf { - unsafe fn as_iovecs_mut(&mut self) -> [libc::iovec; N] { - self.buf.as_iovecs_mut() - } - - unsafe fn set_init(&mut self, n: usize) { - self.last_read = n; - self.buf.set_init(n); - } -} - -// Write. -op_future! { - fn AsyncFd::write -> usize, - struct Write<'fd, B: Buf> { - /// Buffer to read from, needs to stay in memory so the kernel can - /// access it safely. - buf: B, - }, - drop_using: Box, - setup_state: offset: u64, - setup: |submission, fd, (buf,), offset| unsafe { - let (ptr, len) = buf.parts(); - submission.write_at(fd.fd(), ptr, len, offset); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, - extract: |this, (buf,), n| -> (B, usize) { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok((buf, n as usize)) - }, -} - -/// [`Future`] behind [`AsyncFd::write_all`]. -#[derive(Debug)] -pub struct WriteAll<'fd, B, D: Descriptor = File> { +pub struct WriteAll<'fd, B: Buf, D: Descriptor = File> { write: Extractor, D>>, offset: u64, } impl<'fd, B: Buf, D: Descriptor> WriteAll<'fd, B, D> { - const fn new(fd: &'fd AsyncFd, buf: B, offset: u64) -> WriteAll<'fd, B, D> { - let buf = SkipBuf { buf, skip: 0 }; - WriteAll { - // TODO: once `Extract` is a constant trait use that. - write: Extractor { - fut: fd.write_at(buf, offset), - }, - offset, - } - } - - /// Poll implementation used by the [`Future`] implement for the naked type - /// and the type wrapper in an [`Extractor`]. - fn inner_poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { + fn poll_inner(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { // SAFETY: not moving `Future`. let this = unsafe { Pin::into_inner_unchecked(self) }; let mut write = unsafe { Pin::new_unchecked(&mut this.write) }; @@ -652,8 +555,8 @@ impl<'fd, B: Buf, D: Descriptor> WriteAll<'fd, B, D> { return Poll::Ready(Ok(buf.buf)); } - write.set(write.fut.fd.write_at(buf, this.offset).extract()); - unsafe { Pin::new_unchecked(this) }.inner_poll(ctx) + write.set(write.fut.0.fd().write_at(buf, this.offset).extract()); + unsafe { Pin::new_unchecked(this) }.poll_inner(ctx) } Poll::Ready(Err(err)) => Poll::Ready(Err(err)), Poll::Pending => Poll::Pending, @@ -661,12 +564,12 @@ impl<'fd, B: Buf, D: Descriptor> WriteAll<'fd, B, D> { } } -impl<'fd, B, D: Descriptor> Cancel for WriteAll<'fd, B, D> { +impl<'fd, B: Buf, D: Descriptor> Cancel for WriteAll<'fd, B, D> { fn try_cancel(&mut self) -> CancelResult { self.write.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.write.cancel() } } @@ -675,7 +578,7 @@ impl<'fd, B: Buf, D: Descriptor> Future for WriteAll<'fd, B, D> { type Output = io::Result<()>; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - self.inner_poll(ctx).map_ok(|_| ()) + self.poll_inner(ctx).map_ok(|_| ()) } } @@ -685,84 +588,24 @@ impl<'fd, B: Buf, D: Descriptor> Future for Extractor> { type Output = io::Result; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.inner_poll(ctx) + // SAFETY: not moving `self.fut` (`s.fut`), directly called + // `Future::poll` on it. + unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.poll_inner(ctx) } } -/// Wrapper around a buffer `B` to skip a number of bytes. -#[derive(Debug)] -pub(crate) struct SkipBuf { - pub(crate) buf: B, - pub(crate) skip: u32, -} - -unsafe impl Buf for SkipBuf { - unsafe fn parts(&self) -> (*const u8, u32) { - let (ptr, size) = self.buf.parts(); - if self.skip >= size { - (ptr, 0) - } else { - (ptr.add(self.skip as usize), size - self.skip) - } - } -} - -// WriteVectored. -op_future! { - fn AsyncFd::write_vectored -> usize, - struct WriteVectored<'fd, B: BufSlice; const N: usize> { - /// Buffers to read from, needs to stay in memory so the kernel can - /// access it safely. - bufs: B, - /// Buffer references used by the kernel. - /// - /// NOTE: we only need these iovecs in the submission, we don't have to - /// keep around during the operation. Because of this we don't heap - /// allocate it like we for other operations. This leaves a small - /// duration between the submission of the entry and the submission - /// being read by the kernel in which this future could be dropped and - /// the kernel will read memory we don't own. However because we wake - /// the kernel after submitting the timeout entry it's not really worth - /// to heap allocation. - iovecs: [libc::iovec; N], - }, - drop_using: Box, - /// `iovecs` can't move until the kernel has read the submission. - impl !Unpin, - setup_state: offset: u64, - setup: |submission, fd, (_bufs, iovecs), offset| unsafe { - submission.write_vectored_at(fd.fd(), iovecs, offset); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, - extract: |this, (buf, _iovecs), n| -> (B, usize) { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok((buf, n as usize)) - }, -} - /// [`Future`] behind [`AsyncFd::write_all_vectored`]. #[derive(Debug)] -pub struct WriteAllVectored<'fd, B, const N: usize, D: Descriptor = File> { +pub struct WriteAllVectored<'fd, B: BufSlice, const N: usize, D: Descriptor = File> { write: Extractor>, offset: u64, skip: u64, } impl<'fd, B: BufSlice, const N: usize, D: Descriptor> WriteAllVectored<'fd, B, N, D> { - fn new(fd: &'fd AsyncFd, buf: B, offset: u64) -> WriteAllVectored<'fd, B, N, D> { - WriteAllVectored { - write: fd.write_vectored_at(buf, offset).extract(), - offset, - skip: 0, - } - } - /// Poll implementation used by the [`Future`] implement for the naked type /// and the type wrapper in an [`Extractor`]. - fn inner_poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { + fn poll_inner(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { // SAFETY: not moving `Future`. let this = unsafe { Pin::into_inner_unchecked(self) }; let mut write = unsafe { Pin::new_unchecked(&mut this.write) }; @@ -777,23 +620,32 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> WriteAllVectored<'fd, B let mut iovecs = unsafe { bufs.as_iovecs() }; let mut skip = this.skip; for iovec in &mut iovecs { - if iovec.iov_len as u64 <= skip { + if iovec.len() as u64 <= skip { // Skip entire buf. - skip -= iovec.iov_len as u64; - iovec.iov_len = 0; + skip -= iovec.len() as u64; + // SAFETY: setting it to zero is always valid. + unsafe { iovec.set_len(0) }; } else { - iovec.iov_len -= skip as usize; + // SAFETY: checked above that the length > skip. + unsafe { iovec.set_len(skip as usize) }; break; } } - if iovecs[N - 1].iov_len == 0 { + if iovecs[N - 1].len() == 0 { // Written everything. return Poll::Ready(Ok(bufs)); } - write.set(WriteVectored::new(write.fut.fd, bufs, iovecs, this.offset).extract()); - unsafe { Pin::new_unchecked(this) }.inner_poll(ctx) + write.set( + WriteVectored(FdOperation::new( + write.fut.0.fd(), + (bufs, Box::new(iovecs)), + this.offset, + )) + .extract(), + ); + unsafe { Pin::new_unchecked(this) }.poll_inner(ctx) } Poll::Ready(Err(err)) => Poll::Ready(Err(err)), Poll::Pending => Poll::Pending, @@ -801,12 +653,12 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> WriteAllVectored<'fd, B } } -impl<'fd, B, const N: usize, D: Descriptor> Cancel for WriteAllVectored<'fd, B, N, D> { +impl<'fd, B: BufSlice, const N: usize, D: Descriptor> Cancel for WriteAllVectored<'fd, B, N, D> { fn try_cancel(&mut self) -> CancelResult { self.write.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.write.cancel() } } @@ -815,7 +667,7 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> Future for WriteAllVect type Output = io::Result<()>; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - self.inner_poll(ctx).map_ok(|_| ()) + self.poll_inner(ctx).map_ok(|_| ()) } } @@ -830,429 +682,77 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> Future type Output = io::Result; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.inner_poll(ctx) + unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.poll_inner(ctx) } } -// Splice. -op_future! { - fn AsyncFd::splice_to -> usize, - struct Splice<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: (RawFd, SpliceDirection, u64, u64, u32, libc::c_int), - setup: |submission, fd, (), (target, direction, off_in, off_out, len, flags)| unsafe { - let (fd_in, fd_out) = match direction { - SpliceDirection::To => (fd.fd(), target), - SpliceDirection::From => (target, fd.fd()), - }; - submission.splice(fd_in, off_in, fd_out, off_out, len, flags); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, -} - -#[derive(Copy, Clone, Debug)] -enum SpliceDirection { - To, - From, -} +operation!( + /// [`Future`] behind [`AsyncFd::close`]. + pub struct Close(sys::io::CloseOp) -> io::Result<()>; +); -/// [`Future`] behind [`AsyncFd::close`]. +/// Wrapper around a buffer `B` to keep track of the number of bytes written. +// Also used in the `net` module. #[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct Close { - sq: SubmissionQueue, - state: OpState, - kind: PhantomData, -} - -impl Future for Close { - type Output = io::Result<()>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let op_index = poll_state!(Close, self.state, self.sq, ctx, |submission, fd| unsafe { - submission.close(fd); - D::use_flags(submission); - }); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok(_) => Poll::Ready(Ok(())), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } -} - -/// Trait that defines the behaviour of buffers used in reading, which requires -/// mutable access. -/// -/// # Safety -/// -/// Unlike normal buffers the buffer implementations for A10 have additional -/// requirements. -/// -/// If the operation (that uses this buffer) is not polled to completion, i.e. -/// the `Future` is dropped before it returns `Poll::Ready`, the kernel still -/// has access to the buffer and will still attempt to write into it. This means -/// that we must delay deallocation in such a way that the kernel will not write -/// into memory we don't have access to any more. This makes, for example, stack -/// based buffers unfit to implement `BufMut`. Because we can't delay the -/// deallocation once its dropped and the kernel will overwrite part of your -/// stack (where the buffer used to be)! -pub unsafe trait BufMut: 'static { - /// Returns the writable buffer as pointer and length parts. - /// - /// # Safety - /// - /// Only initialised bytes may be written to the pointer returned. The - /// pointer *may* point to uninitialised bytes, so reading from the pointer - /// is UB. - /// - /// The implementation must ensure that the pointer is valid, i.e. not null - /// and pointing to memory owned by the buffer. Furthermore it must ensure - /// that the returned length is, in combination with the pointer, valid. In - /// other words the memory the pointer and length are pointing to must be a - /// valid memory address and owned by the buffer. - /// - /// Note that the above requirements are only required for implementations - /// outside of A10. **This trait is unfit for external use!** - /// - /// # Why not a slice? - /// - /// Returning a slice `&[u8]` would prevent us to use unitialised bytes, - /// meaning we have to zero the buffer before usage, not ideal for - /// performance. So, naturally you would suggest `&[MaybeUninit]`, - /// however that would prevent buffer types with only initialised bytes. - /// Returning a slice with `MaybeUninit` to such as type would be unsound as - /// it would allow the caller to write unitialised bytes without using - /// `unsafe`. - /// - /// # Notes - /// - /// Most Rust API use a `usize` for length, but io_uring uses `u32`, hence - /// we do also. - unsafe fn parts_mut(&mut self) -> (*mut u8, u32); - - /// Mark `n` bytes as initialised. - /// - /// # Safety - /// - /// The caller must ensure that `n` bytes, starting at the pointer returned - /// by [`BufMut::parts_mut`], are initialised. - unsafe fn set_init(&mut self, n: usize); - - /// Buffer group id, or `None` if it's not part of a buffer pool. - /// - /// Don't implement this. - #[doc(hidden)] - fn buffer_group(&self) -> Option { - None - } - - /// Mark `n` bytes as initialised in buffer with `idx`. - /// - /// Don't implement this. - #[doc(hidden)] - unsafe fn buffer_init(&mut self, idx: BufIdx, n: u32) { - debug_assert!(idx.0 == 0); - self.set_init(n as usize); - } +pub(crate) struct ReadNBuf { + pub(crate) buf: B, + pub(crate) last_read: usize, } -/// The implementation for `Vec` only uses the unused capacity, so any bytes -/// already in the buffer will be untouched. -// SAFETY: `Vec` manages the allocation of the bytes, so as long as it's -// alive, so is the slice of bytes. When the `Vec`tor is leaked the allocation -// will also be leaked. -unsafe impl BufMut for Vec { +unsafe impl BufMut for ReadNBuf { unsafe fn parts_mut(&mut self) -> (*mut u8, u32) { - let slice = self.spare_capacity_mut(); - (slice.as_mut_ptr().cast(), slice.len() as u32) + self.buf.parts_mut() } unsafe fn set_init(&mut self, n: usize) { - self.set_len(self.len() + n); - } -} - -/// Trait that defines the behaviour of buffers used in reading using vectored -/// I/O, which requires mutable access. -/// -/// # Safety -/// -/// This has the same safety requirements as [`BufMut`], but then for all -/// buffers used. -pub unsafe trait BufMutSlice: 'static { - /// Returns the writable buffers as `iovec` structures. - /// - /// # Safety - /// - /// This has the same safety requirements as [`BufMut::parts_mut`], but then - /// for all buffers used. - unsafe fn as_iovecs_mut(&mut self) -> [libc::iovec; N]; - - /// Mark `n` bytes as initialised. - /// - /// # Safety - /// - /// The caller must ensure that `n` bytes are initialised in the vectors - /// return by [`BufMutSlice::as_iovecs_mut`]. - /// - /// The implementation must ensure that that proper buffer(s) are - /// initialised. For example when this is called with `n = 10` with two - /// buffers of size `8` the implementation should initialise the first - /// buffer with `n = 8` and the second with `n = 10 - 8 = 2`. - unsafe fn set_init(&mut self, n: usize); -} - -// SAFETY: `BufMutSlice` has the same safety requirements as `BufMut` and since -// `B` implements `BufMut` it's safe to implement `BufMutSlice` for an array of -// `B`. -unsafe impl BufMutSlice for [B; N] { - unsafe fn as_iovecs_mut(&mut self) -> [libc::iovec; N] { - // TODO: replace with `MaybeUninit::uninit_array` once stable. - // SAFETY: an uninitialised `MaybeUninit` is valid. - let mut iovecs = - unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() }; - for (buf, iovec) in self.iter_mut().zip(iovecs.iter_mut()) { - debug_assert!( - buf.buffer_group().is_none(), - "can't use a10::ReadBuf as a10::BufMutSlice in vectored I/O" - ); - let (ptr, len) = buf.parts_mut(); - iovec.write(libc::iovec { - iov_base: ptr.cast(), - iov_len: len as _, - }); - } - // TODO: replace with `MaybeUninit::array_assume_init` once stable. - // SAFETY: `MaybeUninit` and `iovec` have the same layout - // as guaranteed by `MaybeUninit`. - unsafe { std::mem::transmute_copy(&std::mem::ManuallyDrop::new(iovecs)) } + self.last_read = n; + self.buf.set_init(n); } - unsafe fn set_init(&mut self, n: usize) { - let mut left = n; - for buf in self { - let (_, len) = buf.parts_mut(); - let len = len as usize; - if len < left { - // Fully initialised the buffer. - buf.set_init(len); - left -= len; - } else { - // Partially initialised the buffer. - buf.set_init(left); - return; - } - } - unreachable!( - "called BufMutSlice::set_init({n}), with buffers totaling in {} in size", - n - left - ); + fn buffer_group(&self) -> Option { + self.buf.buffer_group() } -} - -// NOTE: Also see implementation of `BufMutSlice` for tuples in the macro -// `buf_slice_for_tuple` below. - -/// Trait that defines the behaviour of buffers used in writing, which requires -/// read only access. -/// -/// # Safety -/// -/// Unlike normal buffers the buffer implementations for A10 have additional -/// requirements. -/// -/// If the operation (that uses this buffer) is not polled to completion, i.e. -/// the `Future` is dropped before it returns `Poll::Ready`, the kernel still -/// has access to the buffer and will still attempt to read from it. This means -/// that we must delay deallocation in such a way that the kernel will not read -/// memory we don't have access to any more. This makes, for example, stack -/// based buffers unfit to implement `Buf`. Because we can't delay the -/// deallocation once its dropped and the kernel will read part of your stack -/// (where the buffer used to be)! This would be a huge security risk. -pub unsafe trait Buf: 'static { - /// Returns the reabable buffer as pointer and length parts. - /// - /// # Safety - /// - /// The implementation must ensure that the pointer is valid, i.e. not null - /// and pointing to memory owned by the buffer. Furthermore it must ensure - /// that the returned length is, in combination with the pointer, valid. In - /// other words the memory the pointer and length are pointing to must be a - /// valid memory address and owned by the buffer. - /// - /// # Notes - /// - /// Most Rust API use a `usize` for length, but io_uring uses `u32`, hence - /// we do also. - unsafe fn parts(&self) -> (*const u8, u32); -} -// SAFETY: `Vec` manages the allocation of the bytes, so as long as it's -// alive, so is the slice of bytes. When the `Vec`tor is leaked the allocation -// will also be leaked. -unsafe impl Buf for Vec { - unsafe fn parts(&self) -> (*const u8, u32) { - let slice = self.as_slice(); - (slice.as_ptr().cast(), slice.len() as u32) + unsafe fn buffer_init(&mut self, id: BufId, n: u32) { + self.last_read = n as usize; + self.buf.buffer_init(id, n); } } -// SAFETY: `Box<[u8]>` manages the allocation of the bytes, so as long as it's -// alive, so is the slice of bytes. When the `Box` is leaked the allocation will -// also be leaked. -unsafe impl Buf for Box<[u8]> { - unsafe fn parts(&self) -> (*const u8, u32) { - (self.as_ptr().cast(), self.len() as u32) +unsafe impl, const N: usize> BufMutSlice for ReadNBuf { + unsafe fn as_iovecs_mut(&mut self) -> [IoMutSlice; N] { + self.buf.as_iovecs_mut() } -} -// SAFETY: `String` is just a `Vec`, see it's implementation for the safety -// reasoning. -unsafe impl Buf for String { - unsafe fn parts(&self) -> (*const u8, u32) { - let slice = self.as_bytes(); - (slice.as_ptr().cast(), slice.len() as u32) + unsafe fn set_init(&mut self, n: usize) { + self.last_read = n; + self.buf.set_init(n); } } -// SAFETY: because the reference has a `'static` lifetime we know the bytes -// can't be deallocated, so it's safe to implement `Buf`. -unsafe impl Buf for &'static [u8] { - unsafe fn parts(&self) -> (*const u8, u32) { - (self.as_ptr(), self.len() as u32) - } +/// Wrapper around a buffer `B` to skip a number of bytes. +// Also used in the `net` module. +#[derive(Debug)] +pub(crate) struct SkipBuf { + pub(crate) buf: B, + pub(crate) skip: u32, } -// SAFETY: because the reference has a `'static` lifetime we know the bytes -// can't be deallocated, so it's safe to implement `Buf`. -unsafe impl Buf for &'static str { +unsafe impl Buf for SkipBuf { unsafe fn parts(&self) -> (*const u8, u32) { - (self.as_bytes().as_ptr(), self.len() as u32) - } -} - -/// Trait that defines the behaviour of buffers used in writing using vectored -/// I/O, which requires read only access. -/// -/// # Safety -/// -/// This has the same safety requirements as [`Buf`], but then for all buffers -/// used. -pub unsafe trait BufSlice: 'static { - /// Returns the reabable buffer as `iovec` structures. - /// - /// # Safety - /// - /// This has the same safety requirements as [`Buf::parts`], but then for - /// all buffers used. - unsafe fn as_iovecs(&self) -> [libc::iovec; N]; -} - -// SAFETY: `BufSlice` has the same safety requirements as `Buf` and since `B` -// implements `Buf` it's safe to implement `BufSlice` for an array of `B`. -unsafe impl BufSlice for [B; N] { - unsafe fn as_iovecs(&self) -> [libc::iovec; N] { - // TODO: replace with `MaybeUninit::uninit_array` once stable. - // SAFETY: an uninitialised `MaybeUninit` is valid. - let mut iovecs = - unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() }; - for (buf, iovec) in self.iter().zip(iovecs.iter_mut()) { - let (ptr, len) = buf.parts(); - iovec.write(libc::iovec { - iov_base: ptr as _, - iov_len: len as _, - }); + let (ptr, size) = self.buf.parts(); + if self.skip >= size { + (ptr, 0) + } else { + (ptr.add(self.skip as usize), size - self.skip) } - // TODO: replace with `MaybeUninit::array_assume_init` once stable. - // SAFETY: `MaybeUninit` and `iovec` have the same layout - // as guaranteed by `MaybeUninit`. - unsafe { std::mem::transmute_copy(&std::mem::ManuallyDrop::new(iovecs)) } } } -macro_rules! buf_slice_for_tuple { - ( - // Number of values. - $N: expr, - // Generic parameter name and tuple index. - $( $generic: ident . $index: tt ),+ - ) => { - // SAFETY: `BufMutSlice` has the same safety requirements as `BufMut` - // and since all generic buffers must implement `BufMut` it's safe to - // implement `BufMutSlice` for a tuple of all those buffers. - unsafe impl<$( $generic: BufMut ),+> BufMutSlice<$N> for ($( $generic ),+) { - unsafe fn as_iovecs_mut(&mut self) -> [libc::iovec; $N] { - [ - $({ - debug_assert!( - self.$index.buffer_group().is_none(), - "can't use a10::ReadBuf as a10::BufMutSlice in vectored I/O" - ); - let (ptr, len) = self.$index.parts_mut(); - libc::iovec { - iov_base: ptr.cast(), - iov_len: len as _, - } - }),+ - ] - } - - unsafe fn set_init(&mut self, n: usize) { - let mut left = n; - $({ - let (_, len) = self.$index.parts_mut(); - let len = len as usize; - if len < left { - // Fully initialised the buffer. - self.$index.set_init(len); - left -= len; - } else { - // Partially initialised the buffer. - self.$index.set_init(left); - return; - } - })+ - unreachable!( - "called BufMutSlice::set_init({n}), with buffers totaling in {} in size", - n - left - ); - } - } - - // SAFETY: `BufSlice` has the same safety requirements as `Buf` and - // since all generic buffers must implement `Buf` it's safe to implement - // `BufSlice` for a tuple of all those buffers. - unsafe impl<$( $generic: Buf ),+> BufSlice<$N> for ($( $generic ),+) { - unsafe fn as_iovecs(&self) -> [libc::iovec; $N] { - [ - $({ - let (ptr, len) = self.$index.parts(); - libc::iovec { - iov_base: ptr as _, - iov_len: len as _, - } - }),+ - ] - } - } - }; +/// Wrapper around a buffer `B` to implement [`DropWake`] on. +/// +/// [`DropWake`]: crate::drop_waker::DropWake +#[derive(Debug)] +pub(crate) struct Buffer { + pub(crate) buf: B, } - -buf_slice_for_tuple!(2, A.0, B.1); -buf_slice_for_tuple!(3, A.0, B.1, C.2); -buf_slice_for_tuple!(4, A.0, B.1, C.2, D.3); -buf_slice_for_tuple!(5, A.0, B.1, C.2, D.3, E.4); -buf_slice_for_tuple!(6, A.0, B.1, C.2, D.3, E.4, F.5); -buf_slice_for_tuple!(7, A.0, B.1, C.2, D.3, E.4, F.5, G.6); -buf_slice_for_tuple!(8, A.0, B.1, C.2, D.3, E.4, F.5, G.6, I.7); diff --git a/src/io/read_buf.rs b/src/io/read_buf.rs index c75697b5..c470e24f 100644 --- a/src/io/read_buf.rs +++ b/src/io/read_buf.rs @@ -2,38 +2,15 @@ //! //! See [`ReadBufPool`]. -use std::alloc::{self, alloc, alloc_zeroed, dealloc}; use std::borrow::{Borrow, BorrowMut}; -use std::mem::{size_of, MaybeUninit}; +use std::mem::MaybeUninit; use std::ops::{Bound, Deref, DerefMut, RangeBounds}; -use std::os::fd::AsRawFd; use std::ptr::{self, NonNull}; -use std::sync::atomic::{AtomicU16, Ordering}; -use std::sync::{Arc, Mutex, OnceLock}; +use std::sync::Arc; use std::{fmt, io, slice}; -use crate::io::{Buf, BufMut}; -use crate::{libc, SubmissionQueue}; - -/// Id for a [`BufPool`]. -#[doc(hidden)] // Public because it's used in [`BufMut`]. -#[derive(Copy, Clone, Debug)] -pub struct BufGroupId(pub(crate) u16); - -/// Index for a [`BufPool`]. -#[doc(hidden)] // Public because it's used in [`BufMut`]. -#[derive(Copy, Clone, Debug)] -pub struct BufIdx(pub(crate) u16); - -/// Size of a single page, often 4096. -#[allow(clippy::cast_sign_loss)] // Page size shouldn't be negative. -fn page_size() -> usize { - static PAGE_SIZE: OnceLock = OnceLock::new(); - *PAGE_SIZE.get_or_init(|| unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }) -} - -/// Buffer group ID generator. -static ID: AtomicU16 = AtomicU16::new(0); +use crate::io::{Buf, BufGroupId, BufId, BufMut}; +use crate::{sys, SubmissionQueue}; /// A read buffer pool. /// @@ -50,29 +27,8 @@ static ID: AtomicU16 = AtomicU16::new(0); #[derive(Clone, Debug)] #[allow(clippy::module_name_repetitions)] // Public in `crate::io`, so N/A. pub struct ReadBufPool { - shared: Arc, -} - -/// Shared between one or more [`ReadBufPool`]s and one or more [`ReadBuf`]s. -#[derive(Debug)] -struct Shared { - /// Identifier used by the kernel (aka `bgid`, `buf_group`). - id: BufGroupId, - /// Submission queue used to unregister the pool on drop. - sq: SubmissionQueue, - /// Number of buffers. - pool_size: u16, - /// Size of the buffers. - buf_size: u32, - /// Address of the allocation the buffers, see `alloc_layout_buffers`. - bufs_addr: *mut u8, - /// Address of the ring registration, see `alloc_layout_ring`. - ring_addr: *mut libc::io_uring_buf_ring, - /// Mask used to determin the tail in the ring. - tail_mask: u16, - /// Lock used reregister [`ReadBuf`]s after usage, see the `Drop` implementation - /// of `ReadBuf`. - reregister_lock: Mutex<()>, + /// Shared between one or more [`ReadBufPool`]s and one or more [`ReadBuf`]s. + shared: Arc, } impl ReadBufPool { @@ -83,90 +39,7 @@ impl ReadBufPool { /// grow beyond this capacity. #[doc(alias = "IORING_REGISTER_PBUF_RING")] pub fn new(sq: SubmissionQueue, pool_size: u16, buf_size: u32) -> io::Result { - debug_assert!(pool_size <= 1 << 15); - debug_assert!(pool_size.is_power_of_two()); - - let ring_fd = sq.shared.ring_fd.as_raw_fd(); - let id = ID.fetch_add(1, Ordering::SeqCst); - - // This allocation must be page aligned. - let page_size = page_size(); - // NOTE: do the layout calculations first in case of an error. - let ring_layout = alloc_layout_ring(pool_size, page_size)?; - let bufs_layout = alloc_layout_buffers(pool_size, buf_size, page_size)?; - - // Allocation for the buffer ring, shared with the kernel. - let ring_addr = match unsafe { alloc_zeroed(ring_layout) } { - ring_addr if ring_addr.is_null() => return Err(io::ErrorKind::OutOfMemory.into()), - #[allow(clippy::cast_ptr_alignment)] // Did proper alignment in `alloc_layout_ring`. - ring_addr => ring_addr.cast::(), - }; - - // Register the buffer ring with the kernel. - let buf_register = libc::io_uring_buf_reg { - ring_addr: ring_addr as u64, - ring_entries: u32::from(pool_size), - bgid: id, - flags: 0, - // Reserved for future use. - resv: [0; 3], - }; - log::trace!(ring_fd = ring_fd, bgid = id, size = pool_size; "registering buffer pool"); - - let result = sq.register( - libc::IORING_REGISTER_PBUF_RING, - ptr::addr_of!(buf_register).cast(), - 1, - ); - if let Err(err) = result { - // SAFETY: we just allocated this above. - unsafe { dealloc(ring_addr.cast(), ring_layout) }; - return Err(err); - } - - // Create a `Shared` type to manage the allocations and registration. - let shared = Shared { - id: BufGroupId(id), - sq, - pool_size, - buf_size, - // Allocate the buffer space, checked below. - bufs_addr: unsafe { alloc(bufs_layout) }, - ring_addr, - // NOTE: this works because `pool_size` must be a power of two. - tail_mask: pool_size - 1, - reregister_lock: Mutex::new(()), - }; - - if shared.bufs_addr.is_null() { - // NOTE: dealloc and unregister happen in the `Drop` impl of - // `Shared. - return Err(io::ErrorKind::OutOfMemory.into()); - } - - // Fill the buffer ring to let the kernel know what buffers are - // available. - let ring_tail = shared.ring_tail(); - let ring_addr = unsafe { &mut *ring_addr }; - let bufs = unsafe { - slice::from_raw_parts_mut( - ptr::addr_of_mut!(ring_addr.__bindgen_anon_1.bufs) - .cast::>(), - pool_size as usize, - ) - }; - for (i, ring_buf) in bufs.iter_mut().enumerate() { - let addr = unsafe { shared.bufs_addr.add(i * buf_size as usize) }; - log::trace!(bid = i, addr:? = addr, len = buf_size; "registering buffer"); - ring_buf.write(libc::io_uring_buf { - addr: addr as u64, - len: buf_size, - bid: i as u16, - resv: 0, - }); - } - ring_tail.store(pool_size, Ordering::Release); - + let shared = sys::io::ReadBufPool::new(sq, pool_size, buf_size)?; Ok(ReadBufPool { shared: Arc::new(shared), }) @@ -190,7 +63,7 @@ impl ReadBufPool { /// Returns the group id for this pool. pub(crate) fn group_id(&self) -> BufGroupId { - self.shared.id + self.shared.group_id() } /// Initialise a new buffer with `index` with `len` size. @@ -199,111 +72,32 @@ impl ReadBufPool { /// /// The provided index must come from the kernel, reusing the same index /// will cause data races. - pub(crate) unsafe fn new_buffer(&self, index: BufIdx, len: u32) -> ReadBuf { - let owned = if len == 0 && index.0 == 0 { + pub(crate) unsafe fn new_buffer(&self, id: BufId, n: u32) -> ReadBuf { + let owned = if n == 0 && id.0 == 0 { // If we read 0 bytes it means the kernel didn't actually allocate a // buffer. None } else { - let data = self - .shared - .bufs_addr - .add(index.0 as usize * self.shared.buf_size as usize); - log::trace!(bid = index.0, addr:? = data, len = len; "kernel initialised buffer"); - // SAFETY: `bufs_addr` is not NULL. - let data = unsafe { NonNull::new_unchecked(data) }; - Some(NonNull::slice_from_raw_parts(data, len as usize)) + Some(self.shared.init_buffer(id, n)) }; ReadBuf { shared: self.shared.clone(), owned, } } -} -impl Shared { - /// Returns the tail of buffer ring. - fn ring_tail(&self) -> &AtomicU16 { - unsafe { - &*(ptr::addr_of!(((*self.ring_addr).__bindgen_anon_1.__bindgen_anon_1.tail)) - .cast::()) - } + /// Converts the queue into a raw pointer, used by the [`DropWake`] + /// implementation. + pub(crate) unsafe fn into_raw(self) -> *const () { + Arc::into_raw(self.shared).cast() } -} -unsafe impl Sync for Shared {} -unsafe impl Send for Shared {} - -impl Drop for Shared { - fn drop(&mut self) { - let page_size = page_size(); - - // Unregister the buffer pool with the ring. - let buf_register = libc::io_uring_buf_reg { - bgid: self.id.0, - // Unused in this call. - ring_addr: 0, - ring_entries: 0, - flags: 0, - // Reserved for future use. - resv: [0; 3], - }; - let result = self.sq.register( - libc::IORING_UNREGISTER_PBUF_RING, - ptr::addr_of!(buf_register).cast(), - 1, - ); - if let Err(err) = result { - log::warn!("failed to unregister a10::ReadBufPool: {err}"); + /// Converts the queue into a raw pointer, used by the [`DropWake`] + /// implementation. + pub(crate) unsafe fn from_raw(ptr: *const ()) -> ReadBufPool { + ReadBufPool { + shared: Arc::from_raw(ptr.cast_mut().cast()), } - - // Next deallocate the ring. - unsafe { - // SAFETY: created this layout in `new` and didn't fail, so it's - // still valid here. - let ring_layout = alloc_layout_ring(self.pool_size, page_size).unwrap(); - // SAFETY: we allocated this in `new`, so it's safe to deallocate - // for us. - dealloc(self.ring_addr.cast(), ring_layout); - }; - - // And finally deallocate the buffers themselves. - if !self.bufs_addr.is_null() { - unsafe { - // SAFETY: created this layout in `new` and didn't fail, so it's - // still valid here. - let layout = - alloc_layout_buffers(self.pool_size, self.buf_size, page_size).unwrap(); - // SAFETY: we allocated this in `new`, so it's safe to - // deallocate for us. - dealloc(self.bufs_addr, layout); - } - } - } -} - -fn alloc_layout_buffers( - pool_size: u16, - buf_size: u32, - page_size: usize, -) -> io::Result { - match alloc::Layout::from_size_align(pool_size as usize * buf_size as usize, page_size) { - Ok(layout) => Ok(layout), - // This will only fail if the size is larger then roughly - // `isize::MAX - PAGE_SIZE`, which is a huge allocation. - Err(_) => Err(io::ErrorKind::OutOfMemory.into()), - } -} - -fn alloc_layout_ring(pool_size: u16, page_size: usize) -> io::Result { - match alloc::Layout::from_size_align( - size_of::() * pool_size as usize, - page_size, - ) { - Ok(layout) => Ok(layout), - // This will only fail if the size is larger then roughly - // `isize::MAX - PAGE_SIZE`, which is a huge allocation. - Err(_) => Err(io::ErrorKind::OutOfMemory.into()), } } @@ -319,7 +113,7 @@ fn alloc_layout_ring(pool_size: u16, page_size: usize) -> io::Result, + shared: Arc, /// This is `Some` if the buffer was assigned. owned: Option>, } @@ -327,7 +121,7 @@ pub struct ReadBuf { impl ReadBuf { /// Returns the capacity of the buffer. pub fn capacity(&self) -> usize { - self.shared.buf_size as usize + self.shared.buf_size() } /// Returns the length of the buffer. @@ -337,7 +131,7 @@ impl ReadBuf { /// Returns true if the buffer is empty. pub fn is_empty(&self) -> bool { - self.owned.map_or(true, |ptr| ptr.len() == 0) + self.owned.is_none_or(|ptr| ptr.len() == 0) } /// Returns itself as slice. @@ -432,7 +226,7 @@ impl ReadBuf { /// The caller must ensure `new_len` bytes are initialised and that /// `new_len` is not larger than the buffer's capacity. pub unsafe fn set_len(&mut self, new_len: usize) { - debug_assert!(new_len <= self.shared.buf_size as usize); + debug_assert!(new_len <= self.capacity()); if let Some(ptr) = self.owned { self.owned = Some(change_size(ptr, new_len)); } @@ -446,7 +240,7 @@ impl ReadBuf { pub fn extend_from_slice(&mut self, other: &[u8]) -> Result<(), ()> { if let Some(ptr) = self.owned { let new_len = ptr.len() + other.len(); - if new_len > self.shared.buf_size as usize { + if new_len > self.capacity() { return Err(()); } @@ -470,7 +264,7 @@ impl ReadBuf { #[allow(clippy::needless_pass_by_ref_mut)] // See https://github.com/rust-lang/rust-clippy/issues/12905. pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { if let Some(ptr) = self.owned { - let unused_len = self.shared.buf_size as usize - ptr.len(); + let unused_len = self.capacity() - ptr.len(); // SAFETY: this won't overflow `isize`. let data = unsafe { ptr.as_ptr().cast::().add(ptr.len()) }; // SAFETY: the pointer and length are correct. @@ -492,46 +286,20 @@ impl ReadBuf { /// This is automatically called in the `Drop` implementation. pub fn release(&mut self) { if let Some(ptr) = self.owned.take() { - let ring_tail = self.shared.ring_tail(); - - // Calculate the buffer index based on the `ptr`, which points to - // the start of our buffer, and `bufs_addr`, which points to the - // start of the pool, by calculating the difference and dividing it - // by the buffer size. - // TODO: use `sub_ptr` once stable: - // `ptr_sub_ptr` . - let buf_idx = unsafe { - usize::try_from(ptr.as_ptr().cast::().offset_from(self.shared.bufs_addr)) - .unwrap_unchecked() - / self.shared.buf_size as usize - } as u16; - - // Because we need to fill the `ring_buf` and then atomatically - // update the `ring_tail` we do it while holding a lock. - let guard = self.shared.reregister_lock.lock().unwrap(); - // Get a ring_buf we write into. - // NOTE: that we allocated at least as many `io_uring_buf`s as we - // did buffer, so there is always a slot available for us. - let tail = ring_tail.load(Ordering::Acquire); - let ring_idx = tail & self.shared.tail_mask; - let ring_buf = unsafe { - &mut *(ptr::addr_of_mut!((*self.shared.ring_addr).__bindgen_anon_1.bufs) - .cast::>() - .add(ring_idx as usize)) - }; - log::trace!(bid = buf_idx, addr:? = ptr, len = self.shared.buf_size; "reregistering buffer"); - ring_buf.write(libc::io_uring_buf { - addr: ptr.as_ptr().cast::() as u64, - len: self.shared.buf_size, - bid: buf_idx, - resv: 0, - }); - ring_tail.store(tail.wrapping_add(1), Ordering::SeqCst); - drop(guard); + // SAFETY: this is safe because we're taking the address ensure we + // can't call this method again. + unsafe { self.shared.release(ptr) } } } } +/// Changes the size of `slice` to `new_len`. +const fn change_size(slice: NonNull<[T]>, new_len: usize) -> NonNull<[T]> { + // SAFETY: `ptr` is `NonNull`, thus not NULL. + let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr().cast()) }; + NonNull::slice_from_raw_parts(ptr, new_len) +} + /// The implementation for `ReadBuf` is a special one as we don't actually pass /// a "real" buffer. Instead we pass special flags to the kernel that allows it /// to select a buffer from the connected [`ReadBufPool`] once the actual read @@ -552,10 +320,10 @@ impl ReadBuf { unsafe impl BufMut for ReadBuf { unsafe fn parts_mut(&mut self) -> (*mut u8, u32) { if let Some(ptr) = self.owned { - let len = self.shared.buf_size - ptr.len() as u32; + let len = (self.capacity() - ptr.len()) as u32; (ptr.as_ptr().cast::().add(ptr.len()), len) } else { - (ptr::null_mut(), self.shared.buf_size) + (ptr::null_mut(), self.capacity() as u32) } } @@ -565,39 +333,25 @@ unsafe impl BufMut for ReadBuf { fn buffer_group(&self) -> Option { if self.owned.is_none() { - Some(self.shared.id) + Some(self.shared.group_id()) } else { // Already have an allocated buffer, don't need another one. None } } - unsafe fn buffer_init(&mut self, idx: BufIdx, n: u32) { + unsafe fn buffer_init(&mut self, id: BufId, n: u32) { if let Some(ptr) = self.owned { // We shouldn't be assigned another buffer, we should be resizing // the current one. - debug_assert!(idx.0 == 0); + debug_assert!(id.0 == 0); self.owned = Some(change_size(ptr, ptr.len() + n as usize)); } else { - let data = self - .shared - .bufs_addr - .add(idx.0 as usize * self.shared.buf_size as usize); - log::trace!(bid = idx.0, addr:? = data, len = n; "kernel initialised buffer"); - // SAFETY: `bufs_addr` is not NULL. - let data = unsafe { NonNull::new_unchecked(data) }; - self.owned = Some(NonNull::slice_from_raw_parts(data, n as usize)); + self.owned = Some(self.shared.init_buffer(id, n)); } } } -/// Changes the size of `slice` to `new_len`. -fn change_size(slice: NonNull<[T]>, new_len: usize) -> NonNull<[T]> { - // SAFETY: `ptr` is `NonNull`, thus not NULL. - let ptr = unsafe { NonNull::new_unchecked(slice.as_ptr().cast()) }; - NonNull::slice_from_raw_parts(ptr, new_len) -} - // SAFETY: `ReadBuf` manages the allocation of the bytes once it's assigned a // buffer, so as long as it's alive, so is the slice of bytes. unsafe impl Buf for ReadBuf { diff --git a/src/io/traits.rs b/src/io/traits.rs new file mode 100644 index 00000000..bfa0863f --- /dev/null +++ b/src/io/traits.rs @@ -0,0 +1,427 @@ +//! I/O traits. +//! +//! See [`BufMut`] and [`Buf`], and their vectored counterparts [`BufMutSlice`] +//! and [`BufSlice`]. + +use std::fmt; +use std::mem::MaybeUninit; + +/// Trait that defines the behaviour of buffers used in reading, which requires +/// mutable access. +/// +/// # Safety +/// +/// Unlike normal buffers the buffer implementations for A10 have additional +/// requirements. +/// +/// If the operation (that uses this buffer) is not polled to completion, i.e. +/// the `Future` is dropped before it returns `Poll::Ready`, the kernel still +/// has access to the buffer and will still attempt to write into it. This means +/// that we must delay deallocation in such a way that the kernel will not write +/// into memory we don't have access to any more. This makes, for example, stack +/// based buffers unfit to implement `BufMut`. Because we can't delay the +/// deallocation once its dropped and the kernel will overwrite part of your +/// stack (where the buffer used to be)! +pub unsafe trait BufMut: 'static { + /// Returns the writable buffer as pointer and length parts. + /// + /// # Safety + /// + /// Only initialised bytes may be written to the pointer returned. The + /// pointer *may* point to uninitialised bytes, so reading from the pointer + /// is UB. + /// + /// The implementation must ensure that the pointer is valid, i.e. not null + /// and pointing to memory owned by the buffer. Furthermore it must ensure + /// that the returned length is, in combination with the pointer, valid. In + /// other words the memory the pointer and length are pointing to must be a + /// valid memory address and owned by the buffer. + /// + /// Note that the above requirements are only required for implementations + /// outside of A10. **This trait is unfit for external use!** + /// + /// # Why not a slice? + /// + /// Returning a slice `&[u8]` would prevent us to use unitialised bytes, + /// meaning we have to zero the buffer before usage, not ideal for + /// performance. So, naturally you would suggest `&[MaybeUninit]`, + /// however that would prevent buffer types with only initialised bytes. + /// Returning a slice with `MaybeUninit` to such as type would be unsound as + /// it would allow the caller to write unitialised bytes without using + /// `unsafe`. + /// + /// # Notes + /// + /// Most Rust API use a `usize` for length, but io_uring uses `u32`, hence + /// we do also. + unsafe fn parts_mut(&mut self) -> (*mut u8, u32); + + /// Mark `n` bytes as initialised. + /// + /// # Safety + /// + /// The caller must ensure that `n` bytes, starting at the pointer returned + /// by [`BufMut::parts_mut`], are initialised. + unsafe fn set_init(&mut self, n: usize); + + /// Buffer group id, or `None` if it's not part of a buffer pool. + /// + /// Don't implement this. + #[doc(hidden)] + #[allow(private_interfaces)] + fn buffer_group(&self) -> Option { + None + } + + /// Mark `n` bytes as initialised in buffer with `idx`. + /// + /// Don't implement this. + #[doc(hidden)] + #[allow(private_interfaces)] + unsafe fn buffer_init(&mut self, id: BufId, n: u32) { + debug_assert!(id.0 == 0); + self.set_init(n as usize); + } +} + +/// Id for a [`BufPool`]. +#[derive(Copy, Clone, Debug)] +pub struct BufGroupId(pub(crate) u16); + +/// Index for a [`BufPool`]. +#[derive(Copy, Clone, Debug)] +pub struct BufId(pub(crate) u16); + +/// The implementation for `Vec` only uses the unused capacity, so any bytes +/// already in the buffer will be untouched. +// SAFETY: `Vec` manages the allocation of the bytes, so as long as it's +// alive, so is the slice of bytes. When the `Vec`tor is leaked the allocation +// will also be leaked. +unsafe impl BufMut for Vec { + unsafe fn parts_mut(&mut self) -> (*mut u8, u32) { + let slice = self.spare_capacity_mut(); + (slice.as_mut_ptr().cast(), slice.len() as u32) + } + + unsafe fn set_init(&mut self, n: usize) { + self.set_len(self.len() + n); + } +} + +/// Trait that defines the behaviour of buffers used in reading using vectored +/// I/O, which requires mutable access. +/// +/// # Safety +/// +/// This has the same safety requirements as [`BufMut`], but then for all +/// buffers used. +pub unsafe trait BufMutSlice: 'static { + /// Returns the writable buffers as `IoMutSlice` structures. + /// + /// # Safety + /// + /// This has the same safety requirements as [`BufMut::parts_mut`], but then + /// for all buffers used. + unsafe fn as_iovecs_mut(&mut self) -> [IoMutSlice; N]; + + /// Mark `n` bytes as initialised. + /// + /// # Safety + /// + /// The caller must ensure that `n` bytes are initialised in the vectors + /// return by [`BufMutSlice::as_iovecs_mut`]. + /// + /// The implementation must ensure that that proper buffer(s) are + /// initialised. For example when this is called with `n = 10` with two + /// buffers of size `8` the implementation should initialise the first + /// buffer with `n = 8` and the second with `n = 10 - 8 = 2`. + unsafe fn set_init(&mut self, n: usize); +} + +/// Wrapper around [`libc::iovec`] to perform mutable vectored I/O operations, +/// such as write. +pub struct IoMutSlice(crate::sys::io::IoMutSlice); + +impl IoMutSlice { + /// Create a new `IoMutSlice` from `buf`. + /// + /// # Safety + /// + /// Caller must ensure that `buf` outlives the returned `IoMutSlice`. + pub unsafe fn new(buf: &mut B) -> IoMutSlice { + IoMutSlice(crate::sys::io::IoMutSlice::new(buf)) + } + + #[doc(hidden)] // Used by testing. + #[allow(clippy::len_without_is_empty)] + pub const fn len(&self) -> usize { + self.0.len() + } + + #[doc(hidden)] // Used by testing. + pub unsafe fn set_len(&mut self, new_len: usize) { + self.0.set_len(new_len); + } +} + +impl std::fmt::Debug for IoMutSlice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("IoMutSlice") + .field("len", &self.0.len()) + .finish() + } +} + +// SAFETY: `BufMutSlice` has the same safety requirements as `BufMut` and since +// `B` implements `BufMut` it's safe to implement `BufMutSlice` for an array of +// `B`. +unsafe impl BufMutSlice for [B; N] { + unsafe fn as_iovecs_mut(&mut self) -> [IoMutSlice; N] { + // SAFETY: an uninitialised `MaybeUninit` is valid. + let mut iovecs = + unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() }; + for (buf, iovec) in self.iter_mut().zip(iovecs.iter_mut()) { + debug_assert!( + buf.buffer_group().is_none(), + "can't use a10::ReadBuf as a10::BufMutSlice in vectored I/O" + ); + iovec.write(IoMutSlice::new(buf)); + } + // SAFETY: `MaybeUninit` and `IoMutSlice` have the same + // layout as guaranteed by `MaybeUninit`. + unsafe { std::mem::transmute_copy(&std::mem::ManuallyDrop::new(iovecs)) } + } + + unsafe fn set_init(&mut self, n: usize) { + let mut left = n; + for buf in self { + let (_, len) = buf.parts_mut(); + let len = len as usize; + if len < left { + // Fully initialised the buffer. + buf.set_init(len); + left -= len; + } else { + // Partially initialised the buffer. + buf.set_init(left); + return; + } + } + unreachable!( + "called BufMutSlice::set_init({n}), with buffers totaling in {} in size", + n - left + ); + } +} + +// NOTE: Also see implementation of `BufMutSlice` for tuples in the macro +// `buf_slice_for_tuple` below. + +/// Trait that defines the behaviour of buffers used in writing, which requires +/// read only access. +/// +/// # Safety +/// +/// Unlike normal buffers the buffer implementations for A10 have additional +/// requirements. +/// +/// If the operation (that uses this buffer) is not polled to completion, i.e. +/// the `Future` is dropped before it returns `Poll::Ready`, the kernel still +/// has access to the buffer and will still attempt to read from it. This means +/// that we must delay deallocation in such a way that the kernel will not read +/// memory we don't have access to any more. This makes, for example, stack +/// based buffers unfit to implement `Buf`. Because we can't delay the +/// deallocation once its dropped and the kernel will read part of your stack +/// (where the buffer used to be)! This would be a huge security risk. +pub unsafe trait Buf: 'static { + /// Returns the reabable buffer as pointer and length parts. + /// + /// # Safety + /// + /// The implementation must ensure that the pointer is valid, i.e. not null + /// and pointing to memory owned by the buffer. Furthermore it must ensure + /// that the returned length is, in combination with the pointer, valid. In + /// other words the memory the pointer and length are pointing to must be a + /// valid memory address and owned by the buffer. + /// + /// # Notes + /// + /// Most Rust API use a `usize` for length, but io_uring uses `u32`, hence + /// we do also. + unsafe fn parts(&self) -> (*const u8, u32); +} + +// SAFETY: `Vec` manages the allocation of the bytes, so as long as it's +// alive, so is the slice of bytes. When the `Vec`tor is leaked the allocation +// will also be leaked. +unsafe impl Buf for Vec { + unsafe fn parts(&self) -> (*const u8, u32) { + let slice = self.as_slice(); + (slice.as_ptr().cast(), slice.len() as u32) + } +} + +// SAFETY: `Box<[u8]>` manages the allocation of the bytes, so as long as it's +// alive, so is the slice of bytes. When the `Box` is leaked the allocation will +// also be leaked. +unsafe impl Buf for Box<[u8]> { + unsafe fn parts(&self) -> (*const u8, u32) { + (self.as_ptr().cast(), self.len() as u32) + } +} + +// SAFETY: `String` is just a `Vec`, see it's implementation for the safety +// reasoning. +unsafe impl Buf for String { + unsafe fn parts(&self) -> (*const u8, u32) { + let slice = self.as_bytes(); + (slice.as_ptr().cast(), slice.len() as u32) + } +} + +// SAFETY: because the reference has a `'static` lifetime we know the bytes +// can't be deallocated, so it's safe to implement `Buf`. +unsafe impl Buf for &'static [u8] { + unsafe fn parts(&self) -> (*const u8, u32) { + (self.as_ptr(), self.len() as u32) + } +} + +// SAFETY: because the reference has a `'static` lifetime we know the bytes +// can't be deallocated, so it's safe to implement `Buf`. +unsafe impl Buf for &'static str { + unsafe fn parts(&self) -> (*const u8, u32) { + (self.as_bytes().as_ptr(), self.len() as u32) + } +} + +/// Trait that defines the behaviour of buffers used in writing using vectored +/// I/O, which requires read only access. +/// +/// # Safety +/// +/// This has the same safety requirements as [`Buf`], but then for all buffers +/// used. +pub unsafe trait BufSlice: 'static { + /// Returns the reabable buffer as `IoSlice` structures. + /// + /// # Safety + /// + /// This has the same safety requirements as [`Buf::parts`], but then for + /// all buffers used. + unsafe fn as_iovecs(&self) -> [IoSlice; N]; +} + +/// Wrapper around [`libc::iovec`] to perform immutable vectored I/O operations, +/// such as read. +pub struct IoSlice(crate::sys::io::IoSlice); + +impl IoSlice { + /// Create a new `IoSlice` from `buf`. + /// + /// # Safety + /// + /// Caller must ensure that `buf` outlives the returned `IoSlice`. + #[doc(hidden)] // Used in testing. + pub unsafe fn new(buf: &B) -> IoSlice { + IoSlice(crate::sys::io::IoSlice::new(buf)) + } + + pub(crate) const fn len(&self) -> usize { + self.0.len() + } + + pub(crate) unsafe fn set_len(&mut self, new_len: usize) { + self.0.set_len(new_len); + } +} + +impl std::fmt::Debug for IoSlice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.as_bytes().fmt(f) + } +} + +// SAFETY: `BufSlice` has the same safety requirements as `Buf` and since `B` +// implements `Buf` it's safe to implement `BufSlice` for an array of `B`. +unsafe impl BufSlice for [B; N] { + unsafe fn as_iovecs(&self) -> [IoSlice; N] { + // SAFETY: an uninitialised `MaybeUninit` is valid. + let mut iovecs = + unsafe { MaybeUninit::<[MaybeUninit; N]>::uninit().assume_init() }; + for (buf, iovec) in self.iter().zip(iovecs.iter_mut()) { + iovec.write(IoSlice::new(buf)); + } + // SAFETY: `MaybeUninit` and `IoSlice` have the same layout as + // guaranteed by `MaybeUninit`. + unsafe { std::mem::transmute_copy(&std::mem::ManuallyDrop::new(iovecs)) } + } +} + +macro_rules! buf_slice_for_tuple { + ( + // Number of values. + $N: expr, + // Generic parameter name and tuple index. + $( $generic: ident . $index: tt ),+ + ) => { + // SAFETY: `BufMutSlice` has the same safety requirements as `BufMut` + // and since all generic buffers must implement `BufMut` it's safe to + // implement `BufMutSlice` for a tuple of all those buffers. + unsafe impl<$( $generic: BufMut ),+> BufMutSlice<$N> for ($( $generic ),+) { + unsafe fn as_iovecs_mut(&mut self) -> [IoMutSlice; $N] { + [ + $({ + debug_assert!( + self.$index.buffer_group().is_none(), + "can't use a10::ReadBuf as a10::BufMutSlice in vectored I/O" + ); + IoMutSlice::new(&mut self.$index) + }),+ + ] + } + + unsafe fn set_init(&mut self, n: usize) { + let mut left = n; + $({ + let (_, len) = self.$index.parts_mut(); + let len = len as usize; + if len < left { + // Fully initialised the buffer. + self.$index.set_init(len); + left -= len; + } else { + // Partially initialised the buffer. + self.$index.set_init(left); + return; + } + })+ + unreachable!( + "called BufMutSlice::set_init({n}), with buffers totaling in {} in size", + n - left + ); + } + } + + // SAFETY: `BufSlice` has the same safety requirements as `Buf` and + // since all generic buffers must implement `Buf` it's safe to implement + // `BufSlice` for a tuple of all those buffers. + unsafe impl<$( $generic: Buf ),+> BufSlice<$N> for ($( $generic ),+) { + unsafe fn as_iovecs(&self) -> [IoSlice; $N] { + [ + $({ + IoSlice::new(&self.$index) + }),+ + ] + } + } + }; +} + +buf_slice_for_tuple!(2, A.0, B.1); +buf_slice_for_tuple!(3, A.0, B.1, C.2); +buf_slice_for_tuple!(4, A.0, B.1, C.2, D.3); +buf_slice_for_tuple!(5, A.0, B.1, C.2, D.3, E.4); +buf_slice_for_tuple!(6, A.0, B.1, C.2, D.3, E.4, F.5); +buf_slice_for_tuple!(7, A.0, B.1, C.2, D.3, E.4, F.5, G.6); +buf_slice_for_tuple!(8, A.0, B.1, C.2, D.3, E.4, F.5, G.6, I.7); diff --git a/src/io_uring/cancel.rs b/src/io_uring/cancel.rs new file mode 100644 index 00000000..9f2a884d --- /dev/null +++ b/src/io_uring/cancel.rs @@ -0,0 +1,59 @@ +use crate::fd::{AsyncFd, Descriptor}; +use crate::io_uring::{self, cancel, cq, libc, sq}; +use crate::{OperationId, SubmissionQueue}; + +pub(crate) fn operation(op_id: OperationId, submission: &mut sq::Submission) { + submission.0.opcode = libc::IORING_OP_ASYNC_CANCEL as u8; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: op_id as _ }; +} + +pub(crate) struct CancelAllOp; + +impl io_uring::FdOp for CancelAllOp { + type Output = usize; + type Resources = (); + type Args = (); + + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_ASYNC_CANCEL as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + cancel_flags: libc::IORING_ASYNC_CANCEL_ALL + | libc::IORING_ASYNC_CANCEL_FD + | D::cancel_flag(), + }; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +pub(crate) struct CancelOperationOp; + +impl io_uring::Op for CancelOperationOp { + type Output = (); + type Resources = (); + type Args = OperationId; + + fn fill_submission( + (): &mut Self::Resources, + op_id: &mut Self::Args, + submission: &mut sq::Submission, + ) { + cancel::operation(*op_id, submission); + } + + fn map_ok(_: &SubmissionQueue, (): Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + } +} diff --git a/src/io_uring/config.rs b/src/io_uring/config.rs new file mode 100644 index 00000000..86696f7d --- /dev/null +++ b/src/io_uring/config.rs @@ -0,0 +1,348 @@ +//! Configuration of a [`Ring`]. + +use std::mem::{self, size_of}; +use std::os::fd::{AsFd, AsRawFd, FromRawFd, OwnedFd}; +use std::time::Duration; +use std::{io, ptr}; + +use crate::io_uring::{self, libc, Completions, Shared, Submissions}; +use crate::{syscall, Ring, SubmissionQueue}; + +#[derive(Debug, Clone)] +#[allow(clippy::struct_excessive_bools)] // This is just stupid. +pub(crate) struct Config<'r> { + submission_entries: Option, + completion_entries: Option, + disabled: bool, + single_issuer: bool, + defer_taskrun: bool, + clamp: bool, + kernel_thread: bool, + cpu_affinity: Option, + idle_timeout: Option, + direct_descriptors: Option, + attach: Option<&'r SubmissionQueue>, +} + +impl<'r> Config<'r> { + pub(crate) const fn new() -> Config<'r> { + Config { + submission_entries: None, + completion_entries: None, + disabled: false, + single_issuer: false, + defer_taskrun: false, + clamp: false, + kernel_thread: true, + cpu_affinity: None, + idle_timeout: None, + direct_descriptors: None, + attach: None, + } + } +} + +/// io_uring specific configuration. +impl<'r> crate::Config<'r> { + /// Start the ring in a disabled state. + /// + /// While the ring is disabled submissions are not allowed. To enable the + /// ring use [`Ring::enable`]. + #[doc(alias = "IORING_SETUP_R_DISABLED")] + pub const fn disable(mut self) -> Self { + self.sys.disabled = true; + self + } + + /// Enable single issuer. + /// + /// This hints to the kernel that only a single thread will submit requests, + /// which is used for optimisations within the kernel. This means that only + /// the thread that [`build`] the ring or [`enabled`] it (after starting in + /// disable mode) may register resources with the ring, resources such as + /// the [`ReadBufPool`]. + /// + /// This optimisation is enforces by the kernel, which will return `EEXIST` + /// or `AlreadyExists` if another thread attempt to register resource or + /// otherwise use the [`Ring`] in a way that is not allowed. + /// + /// [`build`]: crate::Config::build + /// [`enabled`]: Ring::enable + /// [`ReadBufPool`]: crate::io::ReadBufPool + #[doc(alias = "IORING_SETUP_SINGLE_ISSUER")] + pub const fn single_issuer(mut self) -> Self { + self.sys.single_issuer = true; + self + } + + /// Defer task running. + /// + /// By default, kernel will process all outstanding work at the end of any + /// system call or thread interrupt. This can delay the application from + /// making other progress. + /// + /// Enabling this option will hint to kernel that it should defer work until + /// [`Ring::poll`] is called. This way the work is done in the + /// [`Ring::poll`]. + /// + /// This options required [`Config::single_issuer`] to be set. This option + /// does not work with [`Config::with_kernel_thread`] set. + /// + /// [`Config::single_issuer`]: crate::Config::with_kernel_thread + /// [`Config::with_kernel_thread`]: crate::Config::with_kernel_thread + #[doc(alias = "IORING_SETUP_DEFER_TASKRUN")] + pub const fn defer_task_run(mut self) -> Self { + self.sys.defer_taskrun = true; + self + } + + /// Set the size of the io_uring submission queue. + /// + /// `entries` is passed to `io_uring_setup(2)`. It must be a power of two + /// and in the range 1..=4096. + /// + /// Defaults to the same value as the maximum number of queued operations + /// (see [`Ring::config`]). + /// + /// [`Ring::config`]: crate::Ring::config + #[doc(alias = "io_uring_setup")] + pub const fn with_submission_queue_size(mut self, entries: u32) -> Self { + self.sys.submission_entries = Some(entries); + self + } + + /// Set the size of the io_uring completion queue. + /// + /// By default the kernel will use a completion queue twice as large as the + /// submission queue (`entries` in the call to [`Ring::config`]). + /// + /// Uses `IORING_SETUP_CQSIZE`, added in Linux kernel 5.5. + #[doc(alias = "IORING_SETUP_CQSIZE")] + pub const fn with_completion_queue_size(mut self, entries: u32) -> Self { + self.sys.completion_entries = Some(entries); + self + } + + /// Clamp queue sizes to the maximum. + /// + /// The maximum queue sizes aren't exposed by the kernel, making this the + /// only way (currently) to get the largest possible queues. + /// + /// Uses `IORING_SETUP_CLAMP`, added in Linux kernel 5.6. + #[doc(alias = "IORING_SETUP_CLAMP")] + pub const fn clamp_queue_sizes(mut self) -> Self { + self.sys.clamp = true; + self + } + + /// Start a kernel thread polling the [`Ring`]. + /// + /// When this option is enabled a kernel thread is created to perform + /// submission queue polling. This allows issuing I/O without ever context + /// switching into the kernel. + /// + /// # Notes + /// + /// When setting this to false it significantly changes the way A10 works. + /// With this disabled you need to call [`Ring::poll`] to *submit* I/O work, + /// with this enables this is done by the kernel thread. That means that if + /// multiple threads use the same [`SubmissionQueue`] their submissions + /// might not actually be submitted until `Ring::poll` is called. + #[doc(alias = "IORING_SETUP_SQPOLL")] + pub const fn with_kernel_thread(mut self, enabled: bool) -> Self { + self.sys.kernel_thread = enabled; + self + } + + /// Set the CPU affinity of kernel thread polling the [`Ring`]. + /// + /// Only works in combination with [`Config::with_kernel_thread`]. + /// + /// [`Config::with_kernel_thread`]: crate::Config::with_kernel_thread + #[doc(alias = "IORING_SETUP_SQ_AFF")] + #[doc(alias = "sq_thread_cpu")] + pub const fn with_cpu_affinity(mut self, cpu: u32) -> Self { + self.sys.cpu_affinity = Some(cpu); + self + } + + /// Set the idle timeout of the kernel thread polling the submission queue. + /// After `timeout` time has passed after the last I/O submission the kernel + /// thread will go to sleep. If the I/O is kept busy the kernel thread will + /// never sleep. Note that A10 will ensure the kernel thread is woken up + /// when more submissions are added. + /// + /// The accuracy of `timeout` is only in milliseconds, anything more precise + /// will be discarded. + #[doc(alias = "sq_thread_idle")] + pub const fn with_idle_timeout(mut self, timeout: Duration) -> Self { + let ms = timeout.as_millis(); + let ms = if ms <= u32::MAX as u128 { + // SAFETY: just check above that `millis` is less then `u32::MAX` + ms as u32 + } else { + u32::MAX + }; + self.sys.idle_timeout = Some(ms); + self + } + + /// Enable direct descriptors. + /// + /// This registers a sparse array of `size` direct descriptor slots enabling + /// direct descriptors to be used. If this is not used attempts to create a + /// direct descriptor will result in `ENXIO`. + /// + /// By default direct descriptors are not enabled. + #[doc(alias = "IORING_REGISTER_FILES")] + #[doc(alias = "IORING_REGISTER_FILES2")] + #[doc(alias = "IORING_RSRC_REGISTER_SPARSE")] + pub const fn with_direct_descriptors(mut self, size: u32) -> Self { + self.sys.direct_descriptors = Some(size); + self + } + + /// Attach the new (to be created) ring to `other_ring`. + /// + /// This will cause the `Ring` being created to share the asynchronous + /// worker thread backend of the specified `other_ring`, rather than create + /// a new separate thread pool. + /// + /// Uses `IORING_SETUP_ATTACH_WQ`, added in Linux kernel 5.6. + #[doc(alias = "IORING_SETUP_ATTACH_WQ")] + pub const fn attach(self, other_ring: &'r Ring) -> Self { + self.attach_queue(other_ring.submission_queue()) + } + + /// Same as [`Config::attach`], but accepts a [`SubmissionQueue`]. + /// + /// [`Config::attach`]: crate::Config::attach + #[doc(alias = "IORING_SETUP_ATTACH_WQ")] + pub const fn attach_queue(mut self, other_sq: &'r SubmissionQueue) -> Self { + self.sys.attach = Some(other_sq); + self + } + + pub(crate) fn build_sys(self) -> io::Result<(Submissions, Shared, Completions)> { + // SAFETY: all zero is valid for `io_uring_params`. + let mut parameters: libc::io_uring_params = unsafe { mem::zeroed() }; + parameters.flags = libc::IORING_SETUP_SUBMIT_ALL; // Submit all submissions on error. + if self.sys.kernel_thread { + parameters.flags |= libc::IORING_SETUP_SQPOLL; // Kernel thread for polling. + } else { + // Don't interrupt userspace, the user must call `Ring::poll` any way. + parameters.flags |= libc::IORING_SETUP_COOP_TASKRUN; + } + if self.sys.disabled { + // Start the ring in disabled mode. + parameters.flags |= libc::IORING_SETUP_R_DISABLED; + } + if self.sys.single_issuer { + // Only allow access from a single thread. + parameters.flags |= libc::IORING_SETUP_SINGLE_ISSUER; + } + if self.sys.defer_taskrun { + parameters.flags |= libc::IORING_SETUP_DEFER_TASKRUN; + } + #[rustfmt::skip] + let submission_entries = self.sys.submission_entries.unwrap_or(self.queued_operations as u32); + if let Some(completion_entries) = self.sys.completion_entries { + parameters.cq_entries = completion_entries; + parameters.flags |= libc::IORING_SETUP_CQSIZE; + } + if self.sys.clamp { + parameters.flags |= libc::IORING_SETUP_CLAMP; + } + if let Some(cpu) = self.sys.cpu_affinity { + parameters.flags |= libc::IORING_SETUP_SQ_AFF; + parameters.sq_thread_cpu = cpu; + } + if let Some(idle_timeout) = self.sys.idle_timeout { + parameters.sq_thread_idle = idle_timeout; + } + #[allow(clippy::cast_sign_loss)] // File descriptors are always positive. + if let Some(other_sq) = self.sys.attach { + parameters.wq_fd = other_sq.inner.shared_data().rfd.as_raw_fd() as u32; + parameters.flags |= libc::IORING_SETUP_ATTACH_WQ; + } + + let mut first_err = None; + let rfd = loop { + match syscall!(io_uring_setup(submission_entries, &mut parameters)) { + // SAFETY: just created the fd (and checked the error). + Ok(rfd) => break unsafe { OwnedFd::from_raw_fd(rfd) }, + Err(err) => { + if let io::ErrorKind::InvalidInput = err.kind() { + // We set some flags which are not strictly required by + // A10, but provide various benefits. However in doing + // so we also increases our minimal supported Kernel + // version. + // Here we remove the flags one by one and try again. + // NOTE: this is mainly done to support the CI, which + // currently uses Linux 5.15. + remove_flag!( + parameters, + first_err, + err, + IORING_SETUP_SUBMIT_ALL, // 5.18. + IORING_SETUP_COOP_TASKRUN, // 5.19. + IORING_SETUP_SINGLE_ISSUER, // 6.0. + ); + } + return Err(first_err.unwrap_or(err)); + } + }; + }; + check_feature!(parameters.features, IORING_FEAT_NODROP); // Never drop completions. + check_feature!(parameters.features, IORING_FEAT_SUBMIT_STABLE); // All data for async offload must be consumed. + check_feature!(parameters.features, IORING_FEAT_RW_CUR_POS); // Allow -1 as current position. + check_feature!(parameters.features, IORING_FEAT_SQPOLL_NONFIXED); // No need for fixed files. + + let shared = io_uring::Shared::new(rfd, ¶meters)?; + let submissions = io_uring::Submissions::new(); + let completions = io_uring::Completions::new(shared.rfd.as_fd(), ¶meters)?; + + if let Some(size) = self.sys.direct_descriptors { + let register = libc::io_uring_rsrc_register { + flags: libc::IORING_RSRC_REGISTER_SPARSE, + nr: size, + resv2: 0, + data: 0, + tags: 0, + }; + let arg = ptr::from_ref(®ister).cast(); + let size = size_of::(); + shared.register(libc::IORING_REGISTER_FILES2, arg, size as _)?; + } + + Ok((submissions, shared, completions)) + } +} + +macro_rules! check_feature { + ($features: expr, $required: ident $(,)?) => {{ + assert!( + $features & libc::$required != 0, + concat!( + "Kernel doesn't have required `", + stringify!($required), + "` feature" + ) + ); + }}; +} + +macro_rules! remove_flag { + ($parameters: ident, $first_err: ident, $err: ident, $( $flag: ident, )+ ) => { + $( + if $parameters.flags & libc::$flag != 0 { + log::debug!(concat!("failed to create io_uring: {}, dropping ", stringify!($flag), " flag and trying again"), $err); + $parameters.flags &= !libc::$flag; + $first_err.get_or_insert($err); + continue; + } + )+ + }; +} + +use {check_feature, remove_flag}; diff --git a/src/io_uring/cq.rs b/src/io_uring/cq.rs new file mode 100644 index 00000000..d2f34a8f --- /dev/null +++ b/src/io_uring/cq.rs @@ -0,0 +1,362 @@ +use std::marker::PhantomData; +use std::os::fd::{AsRawFd, BorrowedFd}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::time::Duration; +use std::{fmt, io, ptr}; + +use crate::io_uring::{self, libc, load_atomic_u32, mmap, munmap, Shared}; +use crate::msg::MsgData; +use crate::op::OpResult; +use crate::{debug_detail, syscall, OperationId}; + +#[derive(Debug)] +pub(crate) struct Completions { + /// Mmap-ed pointer to the completion queue. + ptr: *mut libc::c_void, + /// Mmap-ed size in bytes. + size: libc::c_uint, + // NOTE: the following fields reference mmaped pages shared with the kernel, + // thus all need atomic/synchronised access. + /// Incremented by us when completions have been read. + head: *mut AtomicU32, + /// Incremented by the kernel when adding completions. + tail: *const AtomicU32, + /// Array of `len` completion entries shared with the kernel. The kernel + /// modifies this array, we're only reading from it. + entries: *const Completion, + /// Number of `entries`. + entries_len: u32, + /// Mask used to index into the `entries` queue. + entries_mask: u32, +} + +impl Completions { + pub(crate) fn new( + rfd: BorrowedFd<'_>, + parameters: &libc::io_uring_params, + ) -> io::Result { + let size = parameters.cq_off.cqes + + parameters.cq_entries * (size_of::() as u32); + let completion_queue = mmap( + size as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED | libc::MAP_POPULATE, + rfd.as_raw_fd(), + libc::off_t::from(libc::IORING_OFF_CQ_RING), + )?; + + let entries_len = unsafe { + load_atomic_u32(completion_queue.add(parameters.cq_off.ring_entries as usize)) + }; + debug_assert!(entries_len == parameters.cq_entries); + let entries_mask = + unsafe { load_atomic_u32(completion_queue.add(parameters.cq_off.ring_mask as usize)) }; + debug_assert!(entries_mask == parameters.cq_entries - 1); + + unsafe { + Ok(Completions { + ptr: completion_queue, + size, + // Fields are shared with the kernel. + head: completion_queue.add(parameters.cq_off.head as usize).cast(), + tail: completion_queue.add(parameters.cq_off.tail as usize).cast(), + entries: completion_queue.add(parameters.cq_off.cqes as usize).cast(), + entries_len, + entries_mask, + }) + } + } + + /// Make the `io_uring_enter` system call. + #[allow(clippy::unused_self, clippy::needless_pass_by_ref_mut)] + fn enter(&mut self, shared: &io_uring::Shared, timeout: Option) -> io::Result<()> { + let mut args = libc::io_uring_getevents_arg { + sigmask: 0, + sigmask_sz: 0, + min_wait_usec: 0, + ts: 0, + }; + let mut timespec = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + if let Some(timeout) = timeout { + timespec.tv_sec = timeout.as_secs().try_into().unwrap_or(i64::MAX); + timespec.tv_nsec = libc::c_longlong::from(timeout.subsec_nanos()); + args.ts = ptr::addr_of!(timespec) as u64; + } + + let submissions = if shared.kernel_thread { + 0 // Kernel thread handles the submissions. + } else { + shared.unsubmitted() + }; + + // If there are no completions we'll wait for at least one. + let enter_flags = libc::IORING_ENTER_GETEVENTS // Wait for a completion. + | libc::IORING_ENTER_EXT_ARG; // Passing of `args`. + log::debug!(submissions = submissions; "waiting for completion events"); + let result = syscall!(io_uring_enter2( + shared.rfd.as_raw_fd(), + submissions, + 1, // Wait for at least one completion. + enter_flags, + ptr::addr_of!(args).cast(), + size_of::(), + )); + match result { + Ok(_) => Ok(()), + // Hit timeout, we can ignore it. + Err(ref err) if err.raw_os_error() == Some(libc::ETIME) => Ok(()), + Err(err) => Err(err), + } + } + + /// Returns `Completions.head`. + fn completion_head(&mut self) -> u32 { + // SAFETY: we're the only once writing to it so `Relaxed` is fine. The + // pointer itself is valid as long as `Ring.fd` is alive. + unsafe { (*self.head).load(Ordering::Relaxed) } + } + + /// Returns `Completions.tail`. + fn completion_tail(&self) -> u32 { + // SAFETY: this written to by the kernel so we need to use `Acquire` + // ordering. The pointer itself is valid as long as `Ring.fd` is alive. + unsafe { (*self.tail).load(Ordering::Acquire) } + } +} + +impl crate::cq::Completions for Completions { + type Shared = Shared; + type Event = Completion; + + fn poll<'a>( + &'a mut self, + shared: &Self::Shared, + timeout: Option, + ) -> io::Result> { + let head = self.completion_head(); + let mut tail = self.completion_tail(); + if head == tail && !matches!(timeout, Some(Duration::ZERO)) { + // If we have no completions and we have no, or a non-zero, timeout + // we make a system call to wait for completion events. + self.enter(shared, timeout)?; + // NOTE: we're the only onces writing to the completion `head` so we + // don't need to read it again. + tail = self.completion_tail(); + } + + Ok(CompletionsIter { + entries: self.entries, + local_head: head, + head: self.head, + tail, + mask: self.entries_mask, + _lifetime: PhantomData, + }) + } + + fn queue_space(&mut self, shared: &Self::Shared) -> usize { + // SAFETY: the `kernel_read` pointer itself is valid as long as the ring + // is alive. + // We use relaxed ordering here because the caller knows the value will + // be outdated. + let kernel_read = unsafe { (*shared.kernel_read).load(Ordering::Relaxed) }; + let pending_tail = shared.pending_tail.load(Ordering::Relaxed); + (self.entries_len - (pending_tail - kernel_read)) as usize + } +} + +unsafe impl Send for Completions {} + +unsafe impl Sync for Completions {} + +impl Drop for Completions { + fn drop(&mut self) { + if let Err(err) = munmap(self.ptr, self.size as usize) { + log::warn!(ptr:? = self.ptr, size = self.size; "error unmapping io_uring completions: {err}"); + } + } +} + +/// Iterator of completed operations. +struct CompletionsIter<'a> { + /// Same as [`Completions.entries`]. + entries: *const Completion, + /// Local version of `head`. Used to update `head` once `Completions` is + /// dropped. + local_head: u32, + /// Same as [`Completions.head`], used to let the kernel know we've read + /// the completions once we're dropped. + head: *mut AtomicU32, + /// Tail of `entries`, i.e. number of completions the kernel wrote. + tail: u32, + /// Same as [`Completions.entries_mask`]. + mask: u32, + /// We're depend on the lifetime of [`io_uring::Shared`]. + _lifetime: PhantomData<&'a io_uring::Shared>, +} + +impl<'a> Iterator for CompletionsIter<'a> { + type Item = &'a Completion; + + fn next(&mut self) -> Option { + if self.local_head < self.tail { + // SAFETY: the `mask` ensures we can never get an `idx` larger then + // the size of the queue. We checked above that the kernel has + // written the struct (and isn't writing to now) os we can safely + // read from it. + let idx = (self.local_head & self.mask) as usize; + let completion = unsafe { &*self.entries.add(idx) }; + self.local_head += 1; + Some(completion) + } else { + None + } + } +} + +impl<'a> Drop for CompletionsIter<'a> { + fn drop(&mut self) { + // Let the kernel know we've read the completions. + // SAFETY: the kernel needs to read the value so we need `Release`. The + // pointer itself is valid as long as `Ring.fd` is alive. + unsafe { (*self.head).store(self.local_head, Ordering::Release) } + } +} + +/// Event that represents a completed operation. +#[repr(transparent)] +pub(crate) struct Completion(libc::io_uring_cqe); + +impl Completion { + /// Returns the operation flags that need to be passed to + /// [`QueuedOperation`]. + const fn operation_flags(&self) -> u16 { + if self.0.flags & libc::IORING_CQE_F_BUFFER != 0 { + (self.0.flags >> libc::IORING_CQE_BUFFER_SHIFT) as u16 + } else { + 0 + } + } +} + +impl crate::cq::Event for Completion { + type State = OperationState; + + fn id(&self) -> OperationId { + self.0.user_data as _ + } + + fn update_state(&self, state: &mut Self::State) -> bool { + let completion = CompletionResult { + result: self.0.res, + flags: self.operation_flags(), + }; + match state { + OperationState::Single { .. } if self.0.flags & libc::IORING_CQE_F_NOTIF != 0 => { + // Zero copy completed, we can now mark ourselves as done, not + // overwriting result. + } + OperationState::Single { result } => { + debug_assert!(result.result == i32::MIN); + debug_assert!(result.flags == u16::MAX); + *result = completion; + } + OperationState::Multishot { results } => { + results.push(completion); + } + } + // IORING_CQE_F_MORE indicates that more completions are coming for this + // operation. + self.0.flags & libc::IORING_CQE_F_MORE != 0 + } +} + +impl fmt::Debug for Completion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + debug_detail!( + bitset CompletionFlags(u32), + libc::IORING_CQE_F_BUFFER, + libc::IORING_CQE_F_MORE, + libc::IORING_CQE_F_SOCK_NONEMPTY, + libc::IORING_CQE_F_NOTIF, + libc::IORING_CQE_F_BUF_MORE, + ); + + f.debug_struct("io_uring::Completion") + .field("user_data", &self.0.user_data) + // NOTE this this isn't always an errno, so we can't use + // `io::Error::from_raw_os_error` without being misleading. + .field("res", &self.0.res) + .field("flags", &CompletionFlags(self.0.flags)) + .field("operation_flags", &self.operation_flags()) + .finish() + } +} + +#[derive(Debug)] +pub(crate) enum OperationState { + /// Single result operation. + Single { + /// Result of the operation. + result: CompletionResult, + }, + /// Multishot operation, which expects multiple results for the same + /// operation. + Multishot { + /// Results for the operation. + results: Vec, + }, +} + +impl crate::cq::OperationState for OperationState { + fn new() -> OperationState { + OperationState::Single { + result: CompletionResult { + flags: u16::MAX, + result: i32::MIN, + }, + } + } + + fn new_multishot() -> OperationState { + OperationState::Multishot { + results: Vec::new(), + } + } +} + +/// Completed result of an operation. +#[derive(Copy, Clone, Debug)] +pub(crate) struct CompletionResult { + /// The 16 upper bits of `io_uring_cqe.flags`, e.g. the index of a buffer in + /// a buffer pool. + flags: u16, + /// The result of an operation; negative is a (negative) errno, positive a + /// successful result. The meaning is depended on the operation itself. + result: i32, +} + +impl CompletionResult { + #[allow(clippy::cast_sign_loss)] + pub(crate) fn as_op_result(self) -> OpResult { + if self.result.is_negative() { + OpResult::Err(io::Error::from_raw_os_error(-self.result)) + } else { + // SAFETY: checked if `result` is negative above. + OpResult::Ok((self.flags, self.result as u32)) + } + } + + #[allow(clippy::cast_sign_loss)] + pub(crate) const fn as_msg(self) -> MsgData { + self.result as MsgData + } +} + +/// Return value of a system call. +/// +/// The flags and positive result of a system call. +pub(crate) type OpReturn = (u16, u32); diff --git a/src/io_uring/fd.rs b/src/io_uring/fd.rs new file mode 100644 index 00000000..5856da86 --- /dev/null +++ b/src/io_uring/fd.rs @@ -0,0 +1,174 @@ +use std::os::fd::RawFd; +use std::{io, ptr}; + +use crate::fd::{AsyncFd, Descriptor, File}; +use crate::io_uring::{self, cq, libc, sq}; +use crate::op::{fd_operation, FdOperation}; + +/// Direct descriptors are io_uring private file descriptors. +/// +/// They avoid some of the overhead associated with thread shared file tables +/// and can be used in any io_uring request that takes a file descriptor. +/// However they cannot be used outside of io_uring. +#[derive(Copy, Clone, Debug)] +pub enum Direct {} + +impl Descriptor for Direct {} + +impl crate::fd::private::Descriptor for Direct { + fn use_flags(submission: &mut sq::Submission) { + submission.0.flags |= libc::IOSQE_FIXED_FILE; + } + + #[allow(clippy::cast_sign_loss)] + fn create_flags(submission: &mut sq::Submission) { + submission.0.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { + file_index: libc::IORING_FILE_INDEX_ALLOC as _, + }; + } + + fn cloexec_flag() -> libc::c_int { + 0 // Direct descriptor always have (the equivalant of) `O_CLOEXEC` set. + } + + fn cancel_flag() -> u32 { + libc::IORING_ASYNC_CANCEL_FD_FIXED + } + + fn fmt_dbg() -> &'static str { + "direct descriptor" + } + + fn close_flags(fd: RawFd, submission: &mut sq::Submission) { + submission.0.opcode = libc::IORING_OP_CLOSE as u8; + submission.0.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { + file_index: fd as _, + }; + } + + fn close(fd: RawFd) -> io::Result<()> { + // TODO: don't leak the the fd. + log::warn!(fd = fd; "leaking direct descriptor"); + Ok(()) + } +} + +/// io_uring specific methods. +impl AsyncFd { + /// Convert a regular file descriptor into a direct descriptor. + /// + /// The file descriptor can continued to be used and the lifetimes of the + /// file descriptor and the newly returned direct descriptor are not + /// connected. + /// + /// # Notes + /// + /// The [`Ring`] must be configured [`with_direct_descriptors`] enabled, + /// otherwise this will return `ENXIO`. + /// + /// [`Ring`]: crate::Ring + /// [`with_direct_descriptors`]: crate::Config::with_direct_descriptors + #[doc(alias = "IORING_OP_FILES_UPDATE")] + #[doc(alias = "IORING_FILE_INDEX_ALLOC")] + pub fn to_direct_descriptor<'fd>(&'fd self) -> ToDirect<'fd, File> { + // The `fd` needs to be valid until the operation is complete, so we + // need to heap allocate it so we can delay it's allocation in case of + // an early drop. + let fd = Box::new(self.fd()); + ToDirect(FdOperation::new(self, fd, ())) + } +} + +/// Operations only available on direct descriptors (io_uring only). +impl AsyncFd { + /// Convert a direct descriptor into a regular file descriptor. + /// + /// The direct descriptor can continued to be used and the lifetimes of the + /// direct descriptor and the newly returned file descriptor are not + /// connected. + /// + /// # Notes + /// + /// Requires Linux 6.8. + #[doc(alias = "IORING_OP_FIXED_FD_INSTALL")] + pub const fn to_file_descriptor<'fd>(&'fd self) -> ToFd<'fd, Direct> { + ToFd(FdOperation::new(self, (), ())) + } +} + +fd_operation!( + /// [`Future`] behind [`AsyncFd::to_direct_descriptor`]. + pub struct ToDirect(ToDirectOp) -> io::Result>; + + /// [`Future`] behind [`AsyncFd::to_file_descriptor`]. + pub struct ToFd(ToFdOp) -> io::Result>; +); + +struct ToDirectOp; + +impl io_uring::FdOp for ToDirectOp { + type Output = AsyncFd; + type Resources = Box; + type Args = (); + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + _: &AsyncFd, + fd: &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FILES_UPDATE as u8; + submission.0.fd = -1; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + off: libc::IORING_FILE_INDEX_ALLOC as _, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: ptr::from_mut(&mut **fd).addr() as _, + }; + submission.0.len = 1; + } + + fn map_ok( + ofd: &AsyncFd, + fd: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 1); + let sq = ofd.sq.clone(); + // SAFETY: the kernel ensures that `fd` is valid. + unsafe { AsyncFd::from_raw(*fd, sq) } + } +} + +struct ToFdOp; + +impl io_uring::FdOp for ToFdOp { + type Output = AsyncFd; + type Resources = (); + type Args = (); + + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FIXED_FD_INSTALL as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + // NOTE: must currently be zero. + install_fd_flags: 0, + }; + } + + fn map_ok( + ofd: &AsyncFd, + (): Self::Resources, + (_, fd): cq::OpReturn, + ) -> Self::Output { + let sq = ofd.sq.clone(); + // SAFETY: the kernel ensures that `fd` is valid. + unsafe { AsyncFd::from_raw(fd as _, sq) } + } +} diff --git a/src/io_uring/fs.rs b/src/io_uring/fs.rs new file mode 100644 index 00000000..e3346b68 --- /dev/null +++ b/src/io_uring/fs.rs @@ -0,0 +1,342 @@ +use std::ffi::CString; +use std::marker::PhantomData; +use std::path::PathBuf; +use std::ptr; + +use crate::fd::{AsyncFd, Descriptor}; +use crate::fs::{path_from_cstring, Metadata, RemoveFlag, SyncDataFlag, METADATA_FLAGS}; +use crate::io_uring::{self, cq, libc, sq}; +use crate::op::OpExtract; +use crate::SubmissionQueue; + +pub(crate) struct OpenOp(PhantomData<*const D>); + +impl io_uring::Op for OpenOp { + type Output = AsyncFd; + type Resources = CString; // path. + type Args = (libc::c_int, libc::mode_t); // flags, mode. + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + path: &mut Self::Resources, + (flags, mode): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_OPENAT as u8; + submission.0.fd = libc::AT_FDCWD; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: path.as_ptr() as _, + }; + submission.0.len = *mode; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + open_flags: *flags as _, + }; + D::create_flags(submission); + } + + fn map_ok(sq: &SubmissionQueue, _: Self::Resources, (_, fd): cq::OpReturn) -> Self::Output { + // SAFETY: kernel ensures that `fd` is valid. + unsafe { AsyncFd::from_raw(fd as _, sq.clone()) } + } +} + +impl OpExtract for OpenOp { + type ExtractOutput = (AsyncFd, PathBuf); + + fn map_ok_extract( + sq: &SubmissionQueue, + path: Self::Resources, + (_, fd): Self::OperationOutput, + ) -> Self::ExtractOutput { + // SAFETY: kernel ensures that `fd` is valid. + let fd = unsafe { AsyncFd::from_raw(fd as _, sq.clone()) }; + let path = path_from_cstring(path); + (fd, path) + } +} + +pub(crate) struct CreateDirOp; + +impl io_uring::Op for CreateDirOp { + type Output = (); + type Resources = CString; // path. + type Args = (); + + fn fill_submission( + path: &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_MKDIRAT as u8; + submission.0.fd = libc::AT_FDCWD; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: path.as_ptr() as _, + }; + submission.0.len = 0o777; // Same as used by the standard library. + } + + fn map_ok(_: &SubmissionQueue, _: Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + } +} + +impl OpExtract for CreateDirOp { + type ExtractOutput = PathBuf; + + fn map_ok_extract( + _: &SubmissionQueue, + path: Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + debug_assert!(n == 0); + path_from_cstring(path) + } +} + +pub(crate) struct RenameOp; + +impl io_uring::Op for RenameOp { + type Output = (); + type Resources = (CString, CString); // from path, to path + type Args = (); + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + (from, to): &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_RENAMEAT as u8; + submission.0.fd = libc::AT_FDCWD; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + off: to.as_ptr() as _, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: from.as_ptr() as _, + }; + submission.0.len = libc::AT_FDCWD as _; + } + + fn map_ok(_: &SubmissionQueue, _: Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + } +} + +impl OpExtract for RenameOp { + type ExtractOutput = (PathBuf, PathBuf); + + fn map_ok_extract( + _: &SubmissionQueue, + (from, to): Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + debug_assert!(n == 0); + (path_from_cstring(from), path_from_cstring(to)) + } +} + +pub(crate) struct DeleteOp; + +impl io_uring::Op for DeleteOp { + type Output = (); + type Resources = CString; // path + type Args = RemoveFlag; + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + path: &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_UNLINKAT as u8; + submission.0.fd = libc::AT_FDCWD; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: path.as_ptr() as _, + }; + let flags = match flags { + RemoveFlag::File => 0, + RemoveFlag::Directory => libc::AT_REMOVEDIR, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + unlink_flags: flags as _, + }; + } + + fn map_ok(_: &SubmissionQueue, _: Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + } +} + +impl OpExtract for DeleteOp { + type ExtractOutput = PathBuf; + + fn map_ok_extract( + _: &SubmissionQueue, + path: Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + debug_assert!(n == 0); + path_from_cstring(path) + } +} + +pub(crate) struct SyncDataOp; + +impl io_uring::FdOp for SyncDataOp { + type Output = (); + type Resources = (); + type Args = SyncDataFlag; + + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FSYNC as u8; + submission.0.fd = fd.fd(); + let fsync_flags = match flags { + SyncDataFlag::All => 0, + SyncDataFlag::Data => libc::IORING_FSYNC_DATASYNC, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { fsync_flags }; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} + +pub(crate) struct StatOp; + +impl io_uring::FdOp for StatOp { + type Output = Metadata; + type Resources = Box; + type Args = (); + + fn fill_submission( + fd: &AsyncFd, + metadata: &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_STATX as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + // SAFETY: this is safe because `Metadata` is transparent. + off: ptr::from_mut(&mut **metadata).addr() as _, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: c"".as_ptr() as _, // Not using a path. + }; + submission.0.len = METADATA_FLAGS; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + statx_flags: libc::AT_EMPTY_PATH as _, + }; + } + + fn map_ok( + _: &AsyncFd, + metadata: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + debug_assert!(metadata.mask() & METADATA_FLAGS == METADATA_FLAGS); + *metadata + } +} + +pub(crate) struct AdviseOp; + +impl io_uring::FdOp for AdviseOp { + type Output = (); + type Resources = (); + type Args = (u64, u32, libc::c_int); // offset, length, advice + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + (offset, length, advice): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FADVISE as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *offset }; + submission.0.len = *length; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + fadvise_advice: *advice as _, + }; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} + +pub(crate) struct AllocateOp; + +impl io_uring::FdOp for AllocateOp { + type Output = (); + type Resources = (); + type Args = (u64, u32, libc::c_int); // offset, length, mode + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + (offset, length, mode): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FALLOCATE as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *offset }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: (*length).into(), + }; + submission.0.len = *mode as u32; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} + +pub(crate) struct TruncateOp; + +impl io_uring::FdOp for TruncateOp { + type Output = (); + type Resources = (); + type Args = u64; // length + + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + length: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FTRUNCATE as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *length }; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} diff --git a/src/io_uring/io.rs b/src/io_uring/io.rs new file mode 100644 index 00000000..d794c45a --- /dev/null +++ b/src/io_uring/io.rs @@ -0,0 +1,506 @@ +use std::alloc::{self, alloc, alloc_zeroed, dealloc}; +use std::marker::PhantomData; +use std::mem::MaybeUninit; +use std::os::fd::{AsRawFd, RawFd}; +use std::ptr::{self, NonNull}; +use std::sync::atomic::{AtomicU16, Ordering}; +use std::sync::{Mutex, OnceLock}; +use std::{io, slice}; + +use crate::fd::{AsyncFd, Descriptor}; +use crate::io::{Buf, BufGroupId, BufId, BufMut, BufMutSlice, BufSlice, Buffer, SpliceDirection}; +use crate::io_uring::{self, cq, libc, sq}; +use crate::op::FdOpExtract; +use crate::SubmissionQueue; + +// Re-export so we don't have to worry about import `std::io` and `crate::io`. +pub(crate) use std::io::*; + +pub(crate) use crate::unix::{IoMutSlice, IoSlice}; + +#[derive(Debug)] +pub(crate) struct ReadBufPool { + /// Identifier used by the kernel (aka `bgid`, `buf_group`). + id: BufGroupId, + /// Submission queue used to unregister the pool on drop. + sq: SubmissionQueue, + /// Number of buffers. + pool_size: u16, + /// Size of the buffers. + buf_size: u32, + /// Address of the allocation the buffers, see `alloc_layout_buffers`. + bufs_addr: *mut u8, + /// Address of the ring registration, see `alloc_layout_ring`. + ring_addr: *mut libc::io_uring_buf_ring, + /// Mask used to determin the tail in the ring. + tail_mask: u16, + /// Lock used reregister [`ReadBuf`]s after usage, see the `Drop` implementation + /// of `ReadBuf`. + reregister_lock: Mutex<()>, +} + +/// Buffer group ID generator. +static ID: AtomicU16 = AtomicU16::new(0); + +impl ReadBufPool { + pub(crate) fn new( + sq: SubmissionQueue, + pool_size: u16, + buf_size: u32, + ) -> io::Result { + debug_assert!(pool_size <= 1 << 15); + debug_assert!(pool_size.is_power_of_two()); + + let ring_fd = sq.inner.shared_data().rfd.as_raw_fd(); + let id = ID.fetch_add(1, Ordering::SeqCst); + + // These allocations must be page aligned. + let page_size = page_size(); + // NOTE: do the layout calculations first in case of an error. + let ring_layout = alloc_layout_ring(pool_size, page_size)?; + let bufs_layout = alloc_layout_buffers(pool_size, buf_size, page_size)?; + + // Allocation for the buffer ring, shared with the kernel. + let ring_addr = match unsafe { alloc_zeroed(ring_layout) } { + ring_addr if ring_addr.is_null() => return Err(io::ErrorKind::OutOfMemory.into()), + #[allow(clippy::cast_ptr_alignment)] // Did proper alignment in `alloc_layout_ring`. + ring_addr => ring_addr.cast::(), + }; + + // Register the buffer ring with the kernel. + let buf_register = libc::io_uring_buf_reg { + ring_addr: ring_addr as u64, + ring_entries: u32::from(pool_size), + bgid: id, + flags: 0, + // Reserved for future use. + resv: [0; 3], + }; + log::trace!(ring_fd = ring_fd, buffer_group = id, size = pool_size; "registering buffer pool"); + + let result = sq.inner.shared_data().register( + libc::IORING_REGISTER_PBUF_RING, + ptr::addr_of!(buf_register).cast(), + 1, + ); + if let Err(err) = result { + // SAFETY: we just allocated this above. + unsafe { dealloc(ring_addr.cast(), ring_layout) }; + return Err(err); + } + + // Create a `ReadBufPool` type early to manage the allocations and registration. + let pool = ReadBufPool { + id: BufGroupId(id), + sq, + pool_size, + buf_size, + // Allocate the buffer space, checked below. + bufs_addr: unsafe { alloc(bufs_layout) }, + ring_addr, + // NOTE: this works because `pool_size` must be a power of two. + tail_mask: pool_size - 1, + reregister_lock: Mutex::new(()), + }; + + if pool.bufs_addr.is_null() { + // NOTE: dealloc and unregister happen in the `Drop` impl of + // `ReadBufPool`. + return Err(io::ErrorKind::OutOfMemory.into()); + } + + // Fill the buffer ring to let the kernel know what buffers are + // available. + let ring_tail = pool.ring_tail(); + let ring_addr = unsafe { &mut *ring_addr }; + let bufs = unsafe { + slice::from_raw_parts_mut( + ptr::addr_of_mut!(ring_addr.__bindgen_anon_1.bufs) + .cast::>(), + pool_size as usize, + ) + }; + for (i, ring_buf) in bufs.iter_mut().enumerate() { + let addr = unsafe { pool.bufs_addr.add(i * buf_size as usize) }; + log::trace!(buffer_group = id, buffer = i, addr:? = addr, len = buf_size; "registering buffer"); + ring_buf.write(libc::io_uring_buf { + addr: addr as u64, + len: buf_size, + bid: i as u16, + resv: 0, + }); + } + ring_tail.store(pool_size, Ordering::Release); + + Ok(pool) + } + + pub(crate) const fn buf_size(&self) -> usize { + self.buf_size as usize + } + + /// Returns the group id for this pool. + pub(crate) const fn group_id(&self) -> BufGroupId { + self.id + } + + pub(crate) unsafe fn init_buffer(&self, id: BufId, n: u32) -> NonNull<[u8]> { + let addr = self.bufs_addr.add(id.0 as usize * self.buf_size()); + log::trace!(buffer_group = self.id.0, buffer = id.0, addr:? = addr, len = n; "initialised buffer"); + // SAFETY: `bufs_addr` is not NULL. + let addr = unsafe { NonNull::new_unchecked(addr) }; + NonNull::slice_from_raw_parts(addr, n as usize) + } + + pub(crate) unsafe fn release(&self, ptr: NonNull<[u8]>) { + let ring_tail = self.ring_tail(); + + // Calculate the buffer id based on the `ptr`, which points to the start + // of our buffer, and `bufs_addr`, which points to the start of the + // pool, by calculating the difference and dividing it by the buffer + // size. + let buf_id = unsafe { + usize::try_from(ptr.as_ptr().cast::().offset_from(self.bufs_addr)) + .unwrap_unchecked() + / self.buf_size as usize + } as u16; + + // Because we need to fill the `ring_buf` and then atomatically update + // the `ring_tail` we do it while holding a lock. + let guard = self.reregister_lock.lock().unwrap(); + // Get a ring_buf we write into. + // NOTE: that we allocated at least as many `io_uring_buf`s as we + // did buffer, so there is always a slot available for us. + let tail = ring_tail.load(Ordering::Acquire); + let ring_idx = tail & self.tail_mask; + let ring_buf = unsafe { + &mut *(ptr::addr_of_mut!((*self.ring_addr).__bindgen_anon_1.bufs) + .cast::>() + .add(ring_idx as usize)) + }; + log::trace!(buffer_group = self.id.0, buffer = buf_id, addr:? = ptr; "reregistering buffer"); + ring_buf.write(libc::io_uring_buf { + addr: ptr.as_ptr().cast::() as u64, + len: self.buf_size, + bid: buf_id, + resv: 0, + }); + ring_tail.store(tail.wrapping_add(1), Ordering::SeqCst); + drop(guard); + } + + /// Returns the tail of buffer ring. + fn ring_tail(&self) -> &AtomicU16 { + unsafe { + &*(ptr::addr_of!(((*self.ring_addr).__bindgen_anon_1.__bindgen_anon_1.tail)) + .cast::()) + } + } +} + +unsafe impl Sync for ReadBufPool {} +unsafe impl Send for ReadBufPool {} + +impl Drop for ReadBufPool { + fn drop(&mut self) { + let page_size = page_size(); + + // Unregister the buffer pool with the ring. + let buf_register = libc::io_uring_buf_reg { + bgid: self.id.0, + // Unused in this call. + ring_addr: 0, + ring_entries: 0, + flags: 0, + // Reserved for future use. + resv: [0; 3], + }; + let result = self.sq.inner.shared_data().register( + libc::IORING_UNREGISTER_PBUF_RING, + ptr::addr_of!(buf_register).cast(), + 1, + ); + if let Err(err) = result { + log::warn!("failed to unregister a10::ReadBufPool: {err}"); + } + + // Next deallocate the ring. + unsafe { + // SAFETY: created this layout in `new` and didn't fail, so it's + // still valid here. + let ring_layout = alloc_layout_ring(self.pool_size, page_size).unwrap(); + // SAFETY: we allocated this in `new`, so it's safe to deallocate + // for us. + dealloc(self.ring_addr.cast(), ring_layout); + }; + + // And finally deallocate the buffers themselves. + if !self.bufs_addr.is_null() { + unsafe { + // SAFETY: created this layout in `new` and didn't fail, so it's + // still valid here. + let layout = + alloc_layout_buffers(self.pool_size, self.buf_size, page_size).unwrap(); + // SAFETY: we allocated this in `new`, so it's safe to + // deallocate for us. + dealloc(self.bufs_addr, layout); + } + } + } +} + +fn alloc_layout_buffers( + pool_size: u16, + buf_size: u32, + page_size: usize, +) -> io::Result { + match alloc::Layout::from_size_align(pool_size as usize * buf_size as usize, page_size) { + Ok(layout) => Ok(layout), + // This will only fail if the size is larger then roughly + // `isize::MAX - PAGE_SIZE`, which is a huge allocation. + Err(_) => Err(io::ErrorKind::OutOfMemory.into()), + } +} + +fn alloc_layout_ring(pool_size: u16, page_size: usize) -> io::Result { + match alloc::Layout::from_size_align( + size_of::() * pool_size as usize, + page_size, + ) { + Ok(layout) => Ok(layout), + // This will only fail if the size is larger then roughly + // `isize::MAX - PAGE_SIZE`, which is a huge allocation. + Err(_) => Err(io::ErrorKind::OutOfMemory.into()), + } +} + +pub(crate) struct ReadOp(PhantomData<*const B>); + +impl io_uring::FdOp for ReadOp { + type Output = B; + type Resources = Buffer; + type Args = u64; // Offset. + + fn fill_submission( + fd: &AsyncFd, + buf: &mut Self::Resources, + offset: &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (ptr, len) = unsafe { buf.buf.parts_mut() }; + submission.0.opcode = libc::IORING_OP_READ as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *offset }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as _ }; + submission.0.len = len; + if let Some(buf_group) = buf.buf.buffer_group() { + submission.0.__bindgen_anon_4.buf_group = buf_group.0; + submission.0.flags |= libc::IOSQE_BUFFER_SELECT; + } + } + + fn map_ok( + _: &AsyncFd, + mut buf: Self::Resources, + (buf_id, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: kernel just initialised the bytes for us. + unsafe { + buf.buf.buffer_init(BufId(buf_id), n); + }; + buf.buf + } +} + +pub(crate) struct ReadVectoredOp(PhantomData<*const B>); + +impl, const N: usize> io_uring::FdOp for ReadVectoredOp { + type Output = B; + type Resources = (B, Box<[crate::io::IoMutSlice; N]>); + type Args = u64; // Offset. + + fn fill_submission( + fd: &AsyncFd, + (_, iovecs): &mut Self::Resources, + offset: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_READV as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *offset }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: iovecs.as_mut_ptr().addr() as _, + }; + submission.0.len = iovecs.len() as u32; + } + + fn map_ok( + _: &AsyncFd, + (mut bufs, _): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: kernel just initialised the buffers for us. + unsafe { bufs.set_init(n as usize) }; + bufs + } +} + +pub(crate) struct WriteOp(PhantomData<*const B>); + +impl io_uring::FdOp for WriteOp { + type Output = usize; + type Resources = Buffer; + type Args = u64; // Offset. + + fn fill_submission( + fd: &AsyncFd, + buf: &mut Self::Resources, + offset: &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (ptr, length) = unsafe { buf.buf.parts() }; + submission.0.opcode = libc::IORING_OP_WRITE as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *offset }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as u64 }; + submission.0.len = length; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +impl FdOpExtract for WriteOp { + type ExtractOutput = (B, usize); + + fn map_ok_extract( + _: &AsyncFd, + buf: Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + (buf.buf, n as usize) + } +} + +pub(crate) struct WriteVectoredOp(PhantomData<*const B>); + +impl, const N: usize> io_uring::FdOp for WriteVectoredOp { + type Output = usize; + type Resources = (B, Box<[crate::io::IoSlice; N]>); + type Args = u64; // Offset. + + fn fill_submission( + fd: &AsyncFd, + (_, iovecs): &mut Self::Resources, + offset: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_WRITEV as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *offset }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: iovecs.as_ptr().addr() as _, + }; + submission.0.len = iovecs.len() as _; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +impl, const N: usize> FdOpExtract for WriteVectoredOp { + type ExtractOutput = (B, usize); + + fn map_ok_extract( + _: &AsyncFd, + (buf, _): Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + (buf, n as usize) + } +} + +pub(crate) struct SpliceOp; + +impl io_uring::FdOp for SpliceOp { + type Output = usize; + type Resources = (); + type Args = (RawFd, SpliceDirection, u64, u64, u32, libc::c_int); // target, direction, off_in, off_out, len, flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + (target, direction, off_in, off_out, length, flags): &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (fd_in, fd_out) = match *direction { + SpliceDirection::To => (fd.fd(), target.as_raw_fd()), + SpliceDirection::From => (target.as_raw_fd(), fd.fd()), + }; + submission.0.opcode = libc::IORING_OP_SPLICE as u8; + submission.0.fd = fd_out; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *off_out }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + splice_off_in: *off_in, + }; + submission.0.len = *length; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + splice_flags: *flags as _, + }; + submission.0.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { + splice_fd_in: fd_in, + }; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +pub(crate) struct CloseOp(PhantomData<*const D>); + +impl io_uring::Op for CloseOp { + type Output = (); + type Resources = (); + type Args = RawFd; + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + (): &mut Self::Resources, + fd: &mut Self::Args, + submission: &mut sq::Submission, + ) { + D::close_flags(*fd, submission); + } + + fn map_ok(_: &SubmissionQueue, (): Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + } +} + +pub(crate) fn close_file_fd(fd: RawFd, submission: &mut io_uring::sq::Submission) { + submission.0.opcode = libc::IORING_OP_CLOSE as u8; + submission.0.fd = fd; +} + +/// Size of a single page, often 4096. +#[allow(clippy::cast_sign_loss)] // Page size shouldn't be negative. +fn page_size() -> usize { + static PAGE_SIZE: OnceLock = OnceLock::new(); + *PAGE_SIZE.get_or_init(|| unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }) +} diff --git a/src/io_uring/libc.rs b/src/io_uring/libc.rs new file mode 100644 index 00000000..68be2e9c --- /dev/null +++ b/src/io_uring/libc.rs @@ -0,0 +1,1387 @@ +//! Code that should be moved to libc once C libraries have a wrapper. + +#![allow(warnings, clippy::all, clippy::pedantic, clippy::nursery)] + +pub use libc::*; + +pub unsafe fn io_uring_setup(entries: c_uint, p: *mut io_uring_params) -> c_int { + syscall(SYS_io_uring_setup, entries as c_long, p as c_long) as _ +} + +pub unsafe fn io_uring_register( + fd: c_int, + opcode: c_uint, + arg: *const c_void, + nr_args: c_uint, +) -> c_int { + syscall( + SYS_io_uring_register, + fd as c_long, + opcode as c_long, + arg as c_long, + nr_args as c_long, + ) as _ +} + +pub unsafe fn io_uring_enter2( + fd: c_int, + to_submit: c_uint, + min_complete: c_uint, + flags: c_uint, + arg: *const libc::c_void, + size: usize, +) -> c_int { + syscall( + SYS_io_uring_enter, + fd as c_long, + to_submit as c_long, + min_complete as c_long, + flags as c_long, + arg as c_long, + size as c_long, + ) as _ +} + +// Work around for , +// . +pub const IOSQE_FIXED_FILE: u8 = 1 << IOSQE_FIXED_FILE_BIT as u8; +pub const IOSQE_IO_DRAIN: u8 = 1 << IOSQE_IO_DRAIN_BIT as u8; +pub const IOSQE_IO_LINK: u8 = 1 << IOSQE_IO_LINK_BIT as u8; +pub const IOSQE_IO_HARDLINK: u8 = 1 << IOSQE_IO_HARDLINK_BIT as u8; +pub const IOSQE_ASYNC: u8 = 1 << IOSQE_ASYNC_BIT as u8; +pub const IOSQE_BUFFER_SELECT: u8 = 1 << IOSQE_BUFFER_SELECT_BIT as u8; +pub const IOSQE_CQE_SKIP_SUCCESS: u8 = 1 << IOSQE_CQE_SKIP_SUCCESS_BIT as u8; + +pub type __kernel_time64_t = ::std::os::raw::c_longlong; +pub type __u8 = ::std::os::raw::c_uchar; +pub type __u16 = ::std::os::raw::c_ushort; +pub type __s32 = ::std::os::raw::c_int; +pub type __u32 = ::std::os::raw::c_uint; +pub type __u64 = ::std::os::raw::c_ulonglong; +pub type __kernel_rwf_t = ::std::os::raw::c_int; +pub type io_uring_sqe_flags_bit = ::std::os::raw::c_uint; +pub type io_uring_op = ::std::os::raw::c_uint; +pub type io_uring_msg_ring_flags = ::std::os::raw::c_uint; +pub type io_uring_register_op = ::std::os::raw::c_uint; +pub type _bindgen_ty_13 = ::std::os::raw::c_uint; +pub type _bindgen_ty_14 = ::std::os::raw::c_uint; +pub type _bindgen_ty_15 = ::std::os::raw::c_uint; +pub type io_uring_register_pbuf_ring_flags = ::std::os::raw::c_uint; +pub type io_uring_register_restriction_op = ::std::os::raw::c_uint; +pub type _bindgen_ty_16 = ::std::os::raw::c_uint; +pub type io_uring_socket_op = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Default)] +pub struct __IncompleteArrayField(::std::marker::PhantomData, [T; 0]); +#[repr(C)] +#[derive(Copy, Clone)] +pub struct __kernel_timespec { + pub tv_sec: __kernel_time64_t, + pub tv_nsec: ::std::os::raw::c_longlong, +} +#[repr(C)] +pub struct io_uring_sqe { + pub opcode: __u8, + pub flags: __u8, + pub ioprio: __u16, + pub fd: __s32, + pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_1, + pub __bindgen_anon_2: io_uring_sqe__bindgen_ty_2, + pub len: __u32, + pub __bindgen_anon_3: io_uring_sqe__bindgen_ty_3, + pub user_data: __u64, + pub __bindgen_anon_4: io_uring_sqe__bindgen_ty_4, + pub personality: __u16, + pub __bindgen_anon_5: io_uring_sqe__bindgen_ty_5, + pub __bindgen_anon_6: io_uring_sqe__bindgen_ty_6, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_sqe__bindgen_ty_1__bindgen_ty_1 { + pub cmd_op: __u32, + pub __pad1: __u32, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_sqe__bindgen_ty_2__bindgen_ty_1 { + pub level: __u32, + pub optname: __u32, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_sqe__bindgen_ty_5__bindgen_ty_1 { + pub addr_len: __u16, + pub __pad3: [__u16; 1usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_sqe__bindgen_ty_6__bindgen_ty_1 { + pub addr3: __u64, + pub __pad2: [__u64; 1usize], +} +#[repr(C)] +pub struct io_uring_cqe { + pub user_data: __u64, + pub res: __s32, + pub flags: __u32, + pub big_cqe: __IncompleteArrayField<__u64>, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_sqring_offsets { + pub head: __u32, + pub tail: __u32, + pub ring_mask: __u32, + pub ring_entries: __u32, + pub flags: __u32, + pub dropped: __u32, + pub array: __u32, + pub resv1: __u32, + pub user_addr: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_cqring_offsets { + pub head: __u32, + pub tail: __u32, + pub ring_mask: __u32, + pub ring_entries: __u32, + pub overflow: __u32, + pub cqes: __u32, + pub flags: __u32, + pub resv1: __u32, + pub user_addr: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_params { + pub sq_entries: __u32, + pub cq_entries: __u32, + pub flags: __u32, + pub sq_thread_cpu: __u32, + pub sq_thread_idle: __u32, + pub features: __u32, + pub wq_fd: __u32, + pub resv: [__u32; 3usize], + pub sq_off: io_sqring_offsets, + pub cq_off: io_cqring_offsets, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_files_update { + pub offset: __u32, + pub resv: __u32, + pub fds: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_region_desc { + pub user_addr: __u64, + pub size: __u64, + pub flags: __u32, + pub id: __u32, + pub mmap_offset: __u64, + pub __resv: [__u64; 4usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_mem_region_reg { + pub region_uptr: __u64, + pub flags: __u64, + pub __resv: [__u64; 2usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_rsrc_register { + pub nr: __u32, + pub flags: __u32, + pub resv2: __u64, + pub data: __u64, + pub tags: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_rsrc_update { + pub offset: __u32, + pub resv: __u32, + pub data: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_rsrc_update2 { + pub offset: __u32, + pub resv: __u32, + pub data: __u64, + pub tags: __u64, + pub nr: __u32, + pub resv2: __u32, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_probe_op { + pub op: __u8, + pub resv: __u8, + pub flags: __u16, + pub resv2: __u32, +} +#[repr(C)] +pub struct io_uring_probe { + pub last_op: __u8, + pub ops_len: __u8, + pub resv: __u16, + pub resv2: [__u32; 3usize], + pub ops: __IncompleteArrayField, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_restriction { + pub opcode: __u16, + pub __bindgen_anon_1: io_uring_restriction__bindgen_ty_1, + pub resv: __u8, + pub resv2: [__u32; 3usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_clock_register { + pub clockid: __u32, + pub __resv: [__u32; 3usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_clone_buffers { + pub src_fd: __u32, + pub flags: __u32, + pub src_off: __u32, + pub dst_off: __u32, + pub nr: __u32, + pub pad: [__u32; 3usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_buf { + pub addr: __u64, + pub len: __u32, + pub bid: __u16, + pub resv: __u16, +} +#[repr(C)] +pub struct io_uring_buf_ring { + pub __bindgen_anon_1: io_uring_buf_ring__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1 { + pub resv1: __u64, + pub resv2: __u32, + pub resv3: __u16, + pub tail: __u16, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_buf_reg { + pub ring_addr: __u64, + pub ring_entries: __u32, + pub bgid: __u16, + pub flags: __u16, + pub resv: [__u64; 3usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_buf_status { + pub buf_group: __u32, + pub head: __u32, + pub resv: [__u32; 8usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_napi { + pub busy_poll_to: __u32, + pub prefer_busy_poll: __u8, + pub pad: [__u8; 3usize], + pub resv: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_cqwait_reg_arg { + pub flags: __u32, + pub struct_size: __u32, + pub nr_entries: __u32, + pub pad: __u32, + pub user_addr: __u64, + pub pad2: [__u64; 3usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_reg_wait { + pub ts: __kernel_timespec, + pub min_wait_usec: __u32, + pub flags: __u32, + pub sigmask: __u64, + pub sigmask_sz: __u32, + pub pad: [__u32; 3usize], + pub pad2: [__u64; 2usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_getevents_arg { + pub sigmask: __u64, + pub sigmask_sz: __u32, + pub min_wait_usec: __u32, + pub ts: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_sync_cancel_reg { + pub addr: __u64, + pub fd: __s32, + pub flags: __u32, + pub timeout: __kernel_timespec, + pub opcode: __u8, + pub pad: [__u8; 7usize], + pub pad2: [__u64; 3usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_file_index_range { + pub off: __u32, + pub len: __u32, + pub resv: __u64, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_recvmsg_out { + pub namelen: __u32, + pub controllen: __u32, + pub payloadlen: __u32, + pub flags: __u32, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_sq { + pub khead: *mut ::std::os::raw::c_uint, + pub ktail: *mut ::std::os::raw::c_uint, + pub kring_mask: *mut ::std::os::raw::c_uint, + pub kring_entries: *mut ::std::os::raw::c_uint, + pub kflags: *mut ::std::os::raw::c_uint, + pub kdropped: *mut ::std::os::raw::c_uint, + pub array: *mut ::std::os::raw::c_uint, + pub sqes: *mut io_uring_sqe, + pub sqe_head: ::std::os::raw::c_uint, + pub sqe_tail: ::std::os::raw::c_uint, + pub ring_sz: usize, + pub ring_ptr: *mut ::std::os::raw::c_void, + pub ring_mask: ::std::os::raw::c_uint, + pub ring_entries: ::std::os::raw::c_uint, + pub pad: [::std::os::raw::c_uint; 2usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring_cq { + pub khead: *mut ::std::os::raw::c_uint, + pub ktail: *mut ::std::os::raw::c_uint, + pub kring_mask: *mut ::std::os::raw::c_uint, + pub kring_entries: *mut ::std::os::raw::c_uint, + pub kflags: *mut ::std::os::raw::c_uint, + pub koverflow: *mut ::std::os::raw::c_uint, + pub cqes: *mut io_uring_cqe, + pub ring_sz: usize, + pub ring_ptr: *mut ::std::os::raw::c_void, + pub ring_mask: ::std::os::raw::c_uint, + pub ring_entries: ::std::os::raw::c_uint, + pub pad: [::std::os::raw::c_uint; 2usize], +} +#[repr(C)] +#[derive(Copy, Clone)] +pub struct io_uring { + pub sq: io_uring_sq, + pub cq: io_uring_cq, + pub flags: ::std::os::raw::c_uint, + pub ring_fd: ::std::os::raw::c_int, + pub features: ::std::os::raw::c_uint, + pub enter_ring_fd: ::std::os::raw::c_int, + pub int_flags: __u8, + pub pad: [__u8; 3usize], + pub pad2: ::std::os::raw::c_uint, +} +pub const IORING_FILE_INDEX_ALLOC: i32 = -1; +pub const IORING_SETUP_IOPOLL: u32 = 1; +pub const IORING_SETUP_SQPOLL: u32 = 2; +pub const IORING_SETUP_SQ_AFF: u32 = 4; +pub const IORING_SETUP_CQSIZE: u32 = 8; +pub const IORING_SETUP_CLAMP: u32 = 16; +pub const IORING_SETUP_ATTACH_WQ: u32 = 32; +pub const IORING_SETUP_R_DISABLED: u32 = 64; +pub const IORING_SETUP_SUBMIT_ALL: u32 = 128; +pub const IORING_SETUP_COOP_TASKRUN: u32 = 256; +pub const IORING_SETUP_TASKRUN_FLAG: u32 = 512; +pub const IORING_SETUP_SQE128: u32 = 1024; +pub const IORING_SETUP_CQE32: u32 = 2048; +pub const IORING_SETUP_SINGLE_ISSUER: u32 = 4096; +pub const IORING_SETUP_DEFER_TASKRUN: u32 = 8192; +pub const IORING_SETUP_NO_MMAP: u32 = 16384; +pub const IORING_SETUP_REGISTERED_FD_ONLY: u32 = 32768; +pub const IORING_SETUP_NO_SQARRAY: u32 = 65536; +pub const IORING_SETUP_HYBRID_IOPOLL: u32 = 131072; +pub const IORING_URING_CMD_FIXED: u32 = 1; +pub const IORING_URING_CMD_MASK: u32 = 1; +pub const IORING_FSYNC_DATASYNC: u32 = 1; +pub const IORING_TIMEOUT_ABS: u32 = 1; +pub const IORING_TIMEOUT_UPDATE: u32 = 2; +pub const IORING_TIMEOUT_BOOTTIME: u32 = 4; +pub const IORING_TIMEOUT_REALTIME: u32 = 8; +pub const IORING_LINK_TIMEOUT_UPDATE: u32 = 16; +pub const IORING_TIMEOUT_ETIME_SUCCESS: u32 = 32; +pub const IORING_TIMEOUT_MULTISHOT: u32 = 64; +pub const IORING_TIMEOUT_CLOCK_MASK: u32 = 12; +pub const IORING_TIMEOUT_UPDATE_MASK: u32 = 18; +pub const IORING_POLL_ADD_MULTI: u32 = 1; +pub const IORING_POLL_UPDATE_EVENTS: u32 = 2; +pub const IORING_POLL_UPDATE_USER_DATA: u32 = 4; +pub const IORING_POLL_ADD_LEVEL: u32 = 8; +pub const IORING_ASYNC_CANCEL_ALL: u32 = 1; +pub const IORING_ASYNC_CANCEL_FD: u32 = 2; +pub const IORING_ASYNC_CANCEL_ANY: u32 = 4; +pub const IORING_ASYNC_CANCEL_FD_FIXED: u32 = 8; +pub const IORING_ASYNC_CANCEL_USERDATA: u32 = 16; +pub const IORING_ASYNC_CANCEL_OP: u32 = 32; +pub const IORING_RECVSEND_POLL_FIRST: u32 = 1; +pub const IORING_RECV_MULTISHOT: u32 = 2; +pub const IORING_RECVSEND_FIXED_BUF: u32 = 4; +pub const IORING_SEND_ZC_REPORT_USAGE: u32 = 8; +pub const IORING_RECVSEND_BUNDLE: u32 = 16; +pub const IORING_NOTIF_USAGE_ZC_COPIED: u32 = 2147483648; +pub const IORING_ACCEPT_MULTISHOT: u32 = 1; +pub const IORING_ACCEPT_DONTWAIT: u32 = 2; +pub const IORING_ACCEPT_POLL_FIRST: u32 = 4; +pub const IORING_MSG_RING_CQE_SKIP: u32 = 1; +pub const IORING_MSG_RING_FLAGS_PASS: u32 = 2; +pub const IORING_FIXED_FD_NO_CLOEXEC: u32 = 1; +pub const IORING_NOP_INJECT_RESULT: u32 = 1; +pub const IORING_CQE_F_BUFFER: u32 = 1; +pub const IORING_CQE_F_MORE: u32 = 2; +pub const IORING_CQE_F_SOCK_NONEMPTY: u32 = 4; +pub const IORING_CQE_F_NOTIF: u32 = 8; +pub const IORING_CQE_F_BUF_MORE: u32 = 16; +pub const IORING_CQE_BUFFER_SHIFT: u32 = 16; +pub const IORING_OFF_SQ_RING: u32 = 0; +pub const IORING_OFF_CQ_RING: u32 = 134217728; +pub const IORING_OFF_SQES: u32 = 268435456; +pub const IORING_OFF_PBUF_RING: u32 = 2147483648; +pub const IORING_OFF_PBUF_SHIFT: u32 = 16; +pub const IORING_OFF_MMAP_MASK: u32 = 4160749568; +pub const IORING_SQ_NEED_WAKEUP: u32 = 1; +pub const IORING_SQ_CQ_OVERFLOW: u32 = 2; +pub const IORING_SQ_TASKRUN: u32 = 4; +pub const IORING_CQ_EVENTFD_DISABLED: u32 = 1; +pub const IORING_ENTER_GETEVENTS: u32 = 1; +pub const IORING_ENTER_SQ_WAKEUP: u32 = 2; +pub const IORING_ENTER_SQ_WAIT: u32 = 4; +pub const IORING_ENTER_EXT_ARG: u32 = 8; +pub const IORING_ENTER_REGISTERED_RING: u32 = 16; +pub const IORING_ENTER_ABS_TIMER: u32 = 32; +pub const IORING_ENTER_EXT_ARG_REG: u32 = 64; +pub const IORING_FEAT_SINGLE_MMAP: u32 = 1; +pub const IORING_FEAT_NODROP: u32 = 2; +pub const IORING_FEAT_SUBMIT_STABLE: u32 = 4; +pub const IORING_FEAT_RW_CUR_POS: u32 = 8; +pub const IORING_FEAT_CUR_PERSONALITY: u32 = 16; +pub const IORING_FEAT_FAST_POLL: u32 = 32; +pub const IORING_FEAT_POLL_32BITS: u32 = 64; +pub const IORING_FEAT_SQPOLL_NONFIXED: u32 = 128; +pub const IORING_FEAT_EXT_ARG: u32 = 256; +pub const IORING_FEAT_NATIVE_WORKERS: u32 = 512; +pub const IORING_FEAT_RSRC_TAGS: u32 = 1024; +pub const IORING_FEAT_CQE_SKIP: u32 = 2048; +pub const IORING_FEAT_LINKED_FILE: u32 = 4096; +pub const IORING_FEAT_REG_REG_RING: u32 = 8192; +pub const IORING_FEAT_RECVSEND_BUNDLE: u32 = 16384; +pub const IORING_FEAT_MIN_TIMEOUT: u32 = 32768; +pub const IORING_RSRC_REGISTER_SPARSE: u32 = 1; +pub const IORING_REGISTER_FILES_SKIP: i32 = -2; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of __kernel_timespec"][::std::mem::size_of::<__kernel_timespec>() - 16usize]; + ["Alignment of __kernel_timespec"][::std::mem::align_of::<__kernel_timespec>() - 8usize]; + ["Offset of field: __kernel_timespec::tv_sec"] + [::std::mem::offset_of!(__kernel_timespec, tv_sec) - 0usize]; + ["Offset of field: __kernel_timespec::tv_nsec"] + [::std::mem::offset_of!(__kernel_timespec, tv_nsec) - 8usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_1__bindgen_ty_1"] + [::std::mem::size_of::() - 8usize]; + ["Alignment of io_uring_sqe__bindgen_ty_1__bindgen_ty_1"] + [::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_1__bindgen_ty_1::cmd_op"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1, cmd_op) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_1__bindgen_ty_1::__pad1"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1, __pad1) - 4usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_1"] + [::std::mem::size_of::() - 8usize]; + ["Alignment of io_uring_sqe__bindgen_ty_1"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_1::off"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1, off) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_1::addr2"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1, addr2) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_2__bindgen_ty_1"] + [::std::mem::size_of::() - 8usize]; + ["Alignment of io_uring_sqe__bindgen_ty_2__bindgen_ty_1"] + [::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_2__bindgen_ty_1::level"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1, level) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_2__bindgen_ty_1::optname"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1, optname) - 4usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_2"] + [::std::mem::size_of::() - 8usize]; + ["Alignment of io_uring_sqe__bindgen_ty_2"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_2::addr"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2, addr) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_2::splice_off_in"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2, splice_off_in) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_3"] + [::std::mem::size_of::() - 4usize]; + ["Alignment of io_uring_sqe__bindgen_ty_3"] + [::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::rw_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, rw_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::fsync_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, fsync_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::poll_events"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, poll_events) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::poll32_events"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, poll32_events) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::sync_range_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, sync_range_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::msg_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, msg_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::timeout_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, timeout_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::accept_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, accept_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::cancel_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, cancel_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::open_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, open_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::statx_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, statx_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::fadvise_advice"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, fadvise_advice) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::splice_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, splice_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::rename_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, rename_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::unlink_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, unlink_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::hardlink_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, hardlink_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::xattr_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, xattr_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::msg_ring_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, msg_ring_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::uring_cmd_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, uring_cmd_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::waitid_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, waitid_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::futex_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, futex_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::install_fd_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, install_fd_flags) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_3::nop_flags"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, nop_flags) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_4"] + [::std::mem::size_of::() - 2usize]; + ["Alignment of io_uring_sqe__bindgen_ty_4"] + [::std::mem::align_of::() - 1usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_4::buf_index"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_4, buf_index) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_4::buf_group"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_4, buf_group) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_5__bindgen_ty_1"] + [::std::mem::size_of::() - 4usize]; + ["Alignment of io_uring_sqe__bindgen_ty_5__bindgen_ty_1"] + [::std::mem::align_of::() - 2usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_5__bindgen_ty_1::addr_len"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1, addr_len) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_5__bindgen_ty_1::__pad3"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1, __pad3) - 2usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_5"] + [::std::mem::size_of::() - 4usize]; + ["Alignment of io_uring_sqe__bindgen_ty_5"] + [::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_5::splice_fd_in"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5, splice_fd_in) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_5::file_index"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5, file_index) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_5::optlen"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5, optlen) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_6__bindgen_ty_1"] + [::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_sqe__bindgen_ty_6__bindgen_ty_1"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_6__bindgen_ty_1::addr3"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1, addr3) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_6__bindgen_ty_1::__pad2"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1, __pad2) - 8usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe__bindgen_ty_6"] + [::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_sqe__bindgen_ty_6"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_6::optval"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6, optval) - 0usize]; + ["Offset of field: io_uring_sqe__bindgen_ty_6::cmd"] + [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6, cmd) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sqe"][::std::mem::size_of::() - 64usize]; + ["Alignment of io_uring_sqe"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sqe::opcode"] + [::std::mem::offset_of!(io_uring_sqe, opcode) - 0usize]; + ["Offset of field: io_uring_sqe::flags"][::std::mem::offset_of!(io_uring_sqe, flags) - 1usize]; + ["Offset of field: io_uring_sqe::ioprio"] + [::std::mem::offset_of!(io_uring_sqe, ioprio) - 2usize]; + ["Offset of field: io_uring_sqe::fd"][::std::mem::offset_of!(io_uring_sqe, fd) - 4usize]; + ["Offset of field: io_uring_sqe::len"][::std::mem::offset_of!(io_uring_sqe, len) - 24usize]; + ["Offset of field: io_uring_sqe::user_data"] + [::std::mem::offset_of!(io_uring_sqe, user_data) - 32usize]; + ["Offset of field: io_uring_sqe::personality"] + [::std::mem::offset_of!(io_uring_sqe, personality) - 42usize]; +}; +pub const IOSQE_FIXED_FILE_BIT: io_uring_sqe_flags_bit = 0; +pub const IOSQE_IO_DRAIN_BIT: io_uring_sqe_flags_bit = 1; +pub const IOSQE_IO_LINK_BIT: io_uring_sqe_flags_bit = 2; +pub const IOSQE_IO_HARDLINK_BIT: io_uring_sqe_flags_bit = 3; +pub const IOSQE_ASYNC_BIT: io_uring_sqe_flags_bit = 4; +pub const IOSQE_BUFFER_SELECT_BIT: io_uring_sqe_flags_bit = 5; +pub const IOSQE_CQE_SKIP_SUCCESS_BIT: io_uring_sqe_flags_bit = 6; +pub const IORING_OP_NOP: io_uring_op = 0; +pub const IORING_OP_READV: io_uring_op = 1; +pub const IORING_OP_WRITEV: io_uring_op = 2; +pub const IORING_OP_FSYNC: io_uring_op = 3; +pub const IORING_OP_READ_FIXED: io_uring_op = 4; +pub const IORING_OP_WRITE_FIXED: io_uring_op = 5; +pub const IORING_OP_POLL_ADD: io_uring_op = 6; +pub const IORING_OP_POLL_REMOVE: io_uring_op = 7; +pub const IORING_OP_SYNC_FILE_RANGE: io_uring_op = 8; +pub const IORING_OP_SENDMSG: io_uring_op = 9; +pub const IORING_OP_RECVMSG: io_uring_op = 10; +pub const IORING_OP_TIMEOUT: io_uring_op = 11; +pub const IORING_OP_TIMEOUT_REMOVE: io_uring_op = 12; +pub const IORING_OP_ACCEPT: io_uring_op = 13; +pub const IORING_OP_ASYNC_CANCEL: io_uring_op = 14; +pub const IORING_OP_LINK_TIMEOUT: io_uring_op = 15; +pub const IORING_OP_CONNECT: io_uring_op = 16; +pub const IORING_OP_FALLOCATE: io_uring_op = 17; +pub const IORING_OP_OPENAT: io_uring_op = 18; +pub const IORING_OP_CLOSE: io_uring_op = 19; +pub const IORING_OP_FILES_UPDATE: io_uring_op = 20; +pub const IORING_OP_STATX: io_uring_op = 21; +pub const IORING_OP_READ: io_uring_op = 22; +pub const IORING_OP_WRITE: io_uring_op = 23; +pub const IORING_OP_FADVISE: io_uring_op = 24; +pub const IORING_OP_MADVISE: io_uring_op = 25; +pub const IORING_OP_SEND: io_uring_op = 26; +pub const IORING_OP_RECV: io_uring_op = 27; +pub const IORING_OP_OPENAT2: io_uring_op = 28; +pub const IORING_OP_EPOLL_CTL: io_uring_op = 29; +pub const IORING_OP_SPLICE: io_uring_op = 30; +pub const IORING_OP_PROVIDE_BUFFERS: io_uring_op = 31; +pub const IORING_OP_REMOVE_BUFFERS: io_uring_op = 32; +pub const IORING_OP_TEE: io_uring_op = 33; +pub const IORING_OP_SHUTDOWN: io_uring_op = 34; +pub const IORING_OP_RENAMEAT: io_uring_op = 35; +pub const IORING_OP_UNLINKAT: io_uring_op = 36; +pub const IORING_OP_MKDIRAT: io_uring_op = 37; +pub const IORING_OP_SYMLINKAT: io_uring_op = 38; +pub const IORING_OP_LINKAT: io_uring_op = 39; +pub const IORING_OP_MSG_RING: io_uring_op = 40; +pub const IORING_OP_FSETXATTR: io_uring_op = 41; +pub const IORING_OP_SETXATTR: io_uring_op = 42; +pub const IORING_OP_FGETXATTR: io_uring_op = 43; +pub const IORING_OP_GETXATTR: io_uring_op = 44; +pub const IORING_OP_SOCKET: io_uring_op = 45; +pub const IORING_OP_URING_CMD: io_uring_op = 46; +pub const IORING_OP_SEND_ZC: io_uring_op = 47; +pub const IORING_OP_SENDMSG_ZC: io_uring_op = 48; +pub const IORING_OP_READ_MULTISHOT: io_uring_op = 49; +pub const IORING_OP_WAITID: io_uring_op = 50; +pub const IORING_OP_FUTEX_WAIT: io_uring_op = 51; +pub const IORING_OP_FUTEX_WAKE: io_uring_op = 52; +pub const IORING_OP_FUTEX_WAITV: io_uring_op = 53; +pub const IORING_OP_FIXED_FD_INSTALL: io_uring_op = 54; +pub const IORING_OP_FTRUNCATE: io_uring_op = 55; +pub const IORING_OP_BIND: io_uring_op = 56; +pub const IORING_OP_LISTEN: io_uring_op = 57; +pub const IORING_OP_LAST: io_uring_op = 58; +pub const IORING_MSG_DATA: io_uring_msg_ring_flags = 0; +pub const IORING_MSG_SEND_FD: io_uring_msg_ring_flags = 1; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_cqe"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_cqe"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_cqe::user_data"] + [::std::mem::offset_of!(io_uring_cqe, user_data) - 0usize]; + ["Offset of field: io_uring_cqe::res"][::std::mem::offset_of!(io_uring_cqe, res) - 8usize]; + ["Offset of field: io_uring_cqe::flags"][::std::mem::offset_of!(io_uring_cqe, flags) - 12usize]; + ["Offset of field: io_uring_cqe::big_cqe"] + [::std::mem::offset_of!(io_uring_cqe, big_cqe) - 16usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_sqring_offsets"][::std::mem::size_of::() - 40usize]; + ["Alignment of io_sqring_offsets"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_sqring_offsets::head"] + [::std::mem::offset_of!(io_sqring_offsets, head) - 0usize]; + ["Offset of field: io_sqring_offsets::tail"] + [::std::mem::offset_of!(io_sqring_offsets, tail) - 4usize]; + ["Offset of field: io_sqring_offsets::ring_mask"] + [::std::mem::offset_of!(io_sqring_offsets, ring_mask) - 8usize]; + ["Offset of field: io_sqring_offsets::ring_entries"] + [::std::mem::offset_of!(io_sqring_offsets, ring_entries) - 12usize]; + ["Offset of field: io_sqring_offsets::flags"] + [::std::mem::offset_of!(io_sqring_offsets, flags) - 16usize]; + ["Offset of field: io_sqring_offsets::dropped"] + [::std::mem::offset_of!(io_sqring_offsets, dropped) - 20usize]; + ["Offset of field: io_sqring_offsets::array"] + [::std::mem::offset_of!(io_sqring_offsets, array) - 24usize]; + ["Offset of field: io_sqring_offsets::resv1"] + [::std::mem::offset_of!(io_sqring_offsets, resv1) - 28usize]; + ["Offset of field: io_sqring_offsets::user_addr"] + [::std::mem::offset_of!(io_sqring_offsets, user_addr) - 32usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_cqring_offsets"][::std::mem::size_of::() - 40usize]; + ["Alignment of io_cqring_offsets"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_cqring_offsets::head"] + [::std::mem::offset_of!(io_cqring_offsets, head) - 0usize]; + ["Offset of field: io_cqring_offsets::tail"] + [::std::mem::offset_of!(io_cqring_offsets, tail) - 4usize]; + ["Offset of field: io_cqring_offsets::ring_mask"] + [::std::mem::offset_of!(io_cqring_offsets, ring_mask) - 8usize]; + ["Offset of field: io_cqring_offsets::ring_entries"] + [::std::mem::offset_of!(io_cqring_offsets, ring_entries) - 12usize]; + ["Offset of field: io_cqring_offsets::overflow"] + [::std::mem::offset_of!(io_cqring_offsets, overflow) - 16usize]; + ["Offset of field: io_cqring_offsets::cqes"] + [::std::mem::offset_of!(io_cqring_offsets, cqes) - 20usize]; + ["Offset of field: io_cqring_offsets::flags"] + [::std::mem::offset_of!(io_cqring_offsets, flags) - 24usize]; + ["Offset of field: io_cqring_offsets::resv1"] + [::std::mem::offset_of!(io_cqring_offsets, resv1) - 28usize]; + ["Offset of field: io_cqring_offsets::user_addr"] + [::std::mem::offset_of!(io_cqring_offsets, user_addr) - 32usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_params"][::std::mem::size_of::() - 120usize]; + ["Alignment of io_uring_params"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_params::sq_entries"] + [::std::mem::offset_of!(io_uring_params, sq_entries) - 0usize]; + ["Offset of field: io_uring_params::cq_entries"] + [::std::mem::offset_of!(io_uring_params, cq_entries) - 4usize]; + ["Offset of field: io_uring_params::flags"] + [::std::mem::offset_of!(io_uring_params, flags) - 8usize]; + ["Offset of field: io_uring_params::sq_thread_cpu"] + [::std::mem::offset_of!(io_uring_params, sq_thread_cpu) - 12usize]; + ["Offset of field: io_uring_params::sq_thread_idle"] + [::std::mem::offset_of!(io_uring_params, sq_thread_idle) - 16usize]; + ["Offset of field: io_uring_params::features"] + [::std::mem::offset_of!(io_uring_params, features) - 20usize]; + ["Offset of field: io_uring_params::wq_fd"] + [::std::mem::offset_of!(io_uring_params, wq_fd) - 24usize]; + ["Offset of field: io_uring_params::resv"] + [::std::mem::offset_of!(io_uring_params, resv) - 28usize]; + ["Offset of field: io_uring_params::sq_off"] + [::std::mem::offset_of!(io_uring_params, sq_off) - 40usize]; + ["Offset of field: io_uring_params::cq_off"] + [::std::mem::offset_of!(io_uring_params, cq_off) - 80usize]; +}; +pub const IORING_REGISTER_BUFFERS: io_uring_register_op = 0; +pub const IORING_UNREGISTER_BUFFERS: io_uring_register_op = 1; +pub const IORING_REGISTER_FILES: io_uring_register_op = 2; +pub const IORING_UNREGISTER_FILES: io_uring_register_op = 3; +pub const IORING_REGISTER_EVENTFD: io_uring_register_op = 4; +pub const IORING_UNREGISTER_EVENTFD: io_uring_register_op = 5; +pub const IORING_REGISTER_FILES_UPDATE: io_uring_register_op = 6; +pub const IORING_REGISTER_EVENTFD_ASYNC: io_uring_register_op = 7; +pub const IORING_REGISTER_PROBE: io_uring_register_op = 8; +pub const IORING_REGISTER_PERSONALITY: io_uring_register_op = 9; +pub const IORING_UNREGISTER_PERSONALITY: io_uring_register_op = 10; +pub const IORING_REGISTER_RESTRICTIONS: io_uring_register_op = 11; +pub const IORING_REGISTER_ENABLE_RINGS: io_uring_register_op = 12; +pub const IORING_REGISTER_FILES2: io_uring_register_op = 13; +pub const IORING_REGISTER_FILES_UPDATE2: io_uring_register_op = 14; +pub const IORING_REGISTER_BUFFERS2: io_uring_register_op = 15; +pub const IORING_REGISTER_BUFFERS_UPDATE: io_uring_register_op = 16; +pub const IORING_REGISTER_IOWQ_AFF: io_uring_register_op = 17; +pub const IORING_UNREGISTER_IOWQ_AFF: io_uring_register_op = 18; +pub const IORING_REGISTER_IOWQ_MAX_WORKERS: io_uring_register_op = 19; +pub const IORING_REGISTER_RING_FDS: io_uring_register_op = 20; +pub const IORING_UNREGISTER_RING_FDS: io_uring_register_op = 21; +pub const IORING_REGISTER_PBUF_RING: io_uring_register_op = 22; +pub const IORING_UNREGISTER_PBUF_RING: io_uring_register_op = 23; +pub const IORING_REGISTER_SYNC_CANCEL: io_uring_register_op = 24; +pub const IORING_REGISTER_FILE_ALLOC_RANGE: io_uring_register_op = 25; +pub const IORING_REGISTER_PBUF_STATUS: io_uring_register_op = 26; +pub const IORING_REGISTER_NAPI: io_uring_register_op = 27; +pub const IORING_UNREGISTER_NAPI: io_uring_register_op = 28; +pub const IORING_REGISTER_CLOCK: io_uring_register_op = 29; +pub const IORING_REGISTER_CLONE_BUFFERS: io_uring_register_op = 30; +pub const IORING_REGISTER_RESIZE_RINGS: io_uring_register_op = 33; +pub const IORING_REGISTER_MEM_REGION: io_uring_register_op = 34; +pub const IORING_REGISTER_LAST: io_uring_register_op = 35; +pub const IORING_REGISTER_USE_REGISTERED_RING: io_uring_register_op = 2147483648; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_files_update"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_files_update"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_files_update::offset"] + [::std::mem::offset_of!(io_uring_files_update, offset) - 0usize]; + ["Offset of field: io_uring_files_update::resv"] + [::std::mem::offset_of!(io_uring_files_update, resv) - 4usize]; + ["Offset of field: io_uring_files_update::fds"] + [::std::mem::offset_of!(io_uring_files_update, fds) - 8usize]; +}; +pub const IORING_MEM_REGION_TYPE_USER: _bindgen_ty_13 = 1; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_region_desc"][::std::mem::size_of::() - 64usize]; + ["Alignment of io_uring_region_desc"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_region_desc::user_addr"] + [::std::mem::offset_of!(io_uring_region_desc, user_addr) - 0usize]; + ["Offset of field: io_uring_region_desc::size"] + [::std::mem::offset_of!(io_uring_region_desc, size) - 8usize]; + ["Offset of field: io_uring_region_desc::flags"] + [::std::mem::offset_of!(io_uring_region_desc, flags) - 16usize]; + ["Offset of field: io_uring_region_desc::id"] + [::std::mem::offset_of!(io_uring_region_desc, id) - 20usize]; + ["Offset of field: io_uring_region_desc::mmap_offset"] + [::std::mem::offset_of!(io_uring_region_desc, mmap_offset) - 24usize]; + ["Offset of field: io_uring_region_desc::__resv"] + [::std::mem::offset_of!(io_uring_region_desc, __resv) - 32usize]; +}; +pub const IORING_MEM_REGION_REG_WAIT_ARG: _bindgen_ty_14 = 1; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_mem_region_reg"][::std::mem::size_of::() - 32usize]; + ["Alignment of io_uring_mem_region_reg"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_mem_region_reg::region_uptr"] + [::std::mem::offset_of!(io_uring_mem_region_reg, region_uptr) - 0usize]; + ["Offset of field: io_uring_mem_region_reg::flags"] + [::std::mem::offset_of!(io_uring_mem_region_reg, flags) - 8usize]; + ["Offset of field: io_uring_mem_region_reg::__resv"] + [::std::mem::offset_of!(io_uring_mem_region_reg, __resv) - 16usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_rsrc_register"][::std::mem::size_of::() - 32usize]; + ["Alignment of io_uring_rsrc_register"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_rsrc_register::nr"] + [::std::mem::offset_of!(io_uring_rsrc_register, nr) - 0usize]; + ["Offset of field: io_uring_rsrc_register::flags"] + [::std::mem::offset_of!(io_uring_rsrc_register, flags) - 4usize]; + ["Offset of field: io_uring_rsrc_register::resv2"] + [::std::mem::offset_of!(io_uring_rsrc_register, resv2) - 8usize]; + ["Offset of field: io_uring_rsrc_register::data"] + [::std::mem::offset_of!(io_uring_rsrc_register, data) - 16usize]; + ["Offset of field: io_uring_rsrc_register::tags"] + [::std::mem::offset_of!(io_uring_rsrc_register, tags) - 24usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_rsrc_update"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_rsrc_update"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_rsrc_update::offset"] + [::std::mem::offset_of!(io_uring_rsrc_update, offset) - 0usize]; + ["Offset of field: io_uring_rsrc_update::resv"] + [::std::mem::offset_of!(io_uring_rsrc_update, resv) - 4usize]; + ["Offset of field: io_uring_rsrc_update::data"] + [::std::mem::offset_of!(io_uring_rsrc_update, data) - 8usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_rsrc_update2"][::std::mem::size_of::() - 32usize]; + ["Alignment of io_uring_rsrc_update2"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_rsrc_update2::offset"] + [::std::mem::offset_of!(io_uring_rsrc_update2, offset) - 0usize]; + ["Offset of field: io_uring_rsrc_update2::resv"] + [::std::mem::offset_of!(io_uring_rsrc_update2, resv) - 4usize]; + ["Offset of field: io_uring_rsrc_update2::data"] + [::std::mem::offset_of!(io_uring_rsrc_update2, data) - 8usize]; + ["Offset of field: io_uring_rsrc_update2::tags"] + [::std::mem::offset_of!(io_uring_rsrc_update2, tags) - 16usize]; + ["Offset of field: io_uring_rsrc_update2::nr"] + [::std::mem::offset_of!(io_uring_rsrc_update2, nr) - 24usize]; + ["Offset of field: io_uring_rsrc_update2::resv2"] + [::std::mem::offset_of!(io_uring_rsrc_update2, resv2) - 28usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_probe_op"][::std::mem::size_of::() - 8usize]; + ["Alignment of io_uring_probe_op"][::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_probe_op::op"] + [::std::mem::offset_of!(io_uring_probe_op, op) - 0usize]; + ["Offset of field: io_uring_probe_op::resv"] + [::std::mem::offset_of!(io_uring_probe_op, resv) - 1usize]; + ["Offset of field: io_uring_probe_op::flags"] + [::std::mem::offset_of!(io_uring_probe_op, flags) - 2usize]; + ["Offset of field: io_uring_probe_op::resv2"] + [::std::mem::offset_of!(io_uring_probe_op, resv2) - 4usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_probe"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_probe"][::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_probe::last_op"] + [::std::mem::offset_of!(io_uring_probe, last_op) - 0usize]; + ["Offset of field: io_uring_probe::ops_len"] + [::std::mem::offset_of!(io_uring_probe, ops_len) - 1usize]; + ["Offset of field: io_uring_probe::resv"] + [::std::mem::offset_of!(io_uring_probe, resv) - 2usize]; + ["Offset of field: io_uring_probe::resv2"] + [::std::mem::offset_of!(io_uring_probe, resv2) - 4usize]; + ["Offset of field: io_uring_probe::ops"][::std::mem::offset_of!(io_uring_probe, ops) - 16usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_restriction__bindgen_ty_1"] + [::std::mem::size_of::() - 1usize]; + ["Alignment of io_uring_restriction__bindgen_ty_1"] + [::std::mem::align_of::() - 1usize]; + ["Offset of field: io_uring_restriction__bindgen_ty_1::register_op"] + [::std::mem::offset_of!(io_uring_restriction__bindgen_ty_1, register_op) - 0usize]; + ["Offset of field: io_uring_restriction__bindgen_ty_1::sqe_op"] + [::std::mem::offset_of!(io_uring_restriction__bindgen_ty_1, sqe_op) - 0usize]; + ["Offset of field: io_uring_restriction__bindgen_ty_1::sqe_flags"] + [::std::mem::offset_of!(io_uring_restriction__bindgen_ty_1, sqe_flags) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_restriction"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_restriction"][::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_restriction::opcode"] + [::std::mem::offset_of!(io_uring_restriction, opcode) - 0usize]; + ["Offset of field: io_uring_restriction::resv"] + [::std::mem::offset_of!(io_uring_restriction, resv) - 3usize]; + ["Offset of field: io_uring_restriction::resv2"] + [::std::mem::offset_of!(io_uring_restriction, resv2) - 4usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_clock_register"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_clock_register"] + [::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_clock_register::clockid"] + [::std::mem::offset_of!(io_uring_clock_register, clockid) - 0usize]; + ["Offset of field: io_uring_clock_register::__resv"] + [::std::mem::offset_of!(io_uring_clock_register, __resv) - 4usize]; +}; +pub const IORING_REGISTER_SRC_REGISTERED: _bindgen_ty_15 = 1; +pub const IORING_REGISTER_DST_REPLACE: _bindgen_ty_15 = 2; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_clone_buffers"][::std::mem::size_of::() - 32usize]; + ["Alignment of io_uring_clone_buffers"] + [::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_clone_buffers::src_fd"] + [::std::mem::offset_of!(io_uring_clone_buffers, src_fd) - 0usize]; + ["Offset of field: io_uring_clone_buffers::flags"] + [::std::mem::offset_of!(io_uring_clone_buffers, flags) - 4usize]; + ["Offset of field: io_uring_clone_buffers::src_off"] + [::std::mem::offset_of!(io_uring_clone_buffers, src_off) - 8usize]; + ["Offset of field: io_uring_clone_buffers::dst_off"] + [::std::mem::offset_of!(io_uring_clone_buffers, dst_off) - 12usize]; + ["Offset of field: io_uring_clone_buffers::nr"] + [::std::mem::offset_of!(io_uring_clone_buffers, nr) - 16usize]; + ["Offset of field: io_uring_clone_buffers::pad"] + [::std::mem::offset_of!(io_uring_clone_buffers, pad) - 20usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_buf"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_buf"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_buf::addr"][::std::mem::offset_of!(io_uring_buf, addr) - 0usize]; + ["Offset of field: io_uring_buf::len"][::std::mem::offset_of!(io_uring_buf, len) - 8usize]; + ["Offset of field: io_uring_buf::bid"][::std::mem::offset_of!(io_uring_buf, bid) - 12usize]; + ["Offset of field: io_uring_buf::resv"][::std::mem::offset_of!(io_uring_buf, resv) - 14usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1"] + [::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::resv1"] + [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, resv1) - 0usize]; + ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::resv2"] + [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, resv2) - 8usize]; + ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::resv3"] + [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, resv3) - 12usize]; + ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::tail"] + [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, tail) - 14usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_buf_ring__bindgen_ty_1"] + [::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_buf_ring__bindgen_ty_1"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_buf_ring__bindgen_ty_1::bufs"] + [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1, bufs) - 0usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_buf_ring"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_buf_ring"][::std::mem::align_of::() - 8usize]; +}; +pub const IOU_PBUF_RING_MMAP: io_uring_register_pbuf_ring_flags = 1; +pub const IOU_PBUF_RING_INC: io_uring_register_pbuf_ring_flags = 2; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_buf_reg"][::std::mem::size_of::() - 40usize]; + ["Alignment of io_uring_buf_reg"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_buf_reg::ring_addr"] + [::std::mem::offset_of!(io_uring_buf_reg, ring_addr) - 0usize]; + ["Offset of field: io_uring_buf_reg::ring_entries"] + [::std::mem::offset_of!(io_uring_buf_reg, ring_entries) - 8usize]; + ["Offset of field: io_uring_buf_reg::bgid"] + [::std::mem::offset_of!(io_uring_buf_reg, bgid) - 12usize]; + ["Offset of field: io_uring_buf_reg::flags"] + [::std::mem::offset_of!(io_uring_buf_reg, flags) - 14usize]; + ["Offset of field: io_uring_buf_reg::resv"] + [::std::mem::offset_of!(io_uring_buf_reg, resv) - 16usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_buf_status"][::std::mem::size_of::() - 40usize]; + ["Alignment of io_uring_buf_status"][::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_buf_status::buf_group"] + [::std::mem::offset_of!(io_uring_buf_status, buf_group) - 0usize]; + ["Offset of field: io_uring_buf_status::head"] + [::std::mem::offset_of!(io_uring_buf_status, head) - 4usize]; + ["Offset of field: io_uring_buf_status::resv"] + [::std::mem::offset_of!(io_uring_buf_status, resv) - 8usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_napi"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_napi"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_napi::busy_poll_to"] + [::std::mem::offset_of!(io_uring_napi, busy_poll_to) - 0usize]; + ["Offset of field: io_uring_napi::prefer_busy_poll"] + [::std::mem::offset_of!(io_uring_napi, prefer_busy_poll) - 4usize]; + ["Offset of field: io_uring_napi::pad"][::std::mem::offset_of!(io_uring_napi, pad) - 5usize]; + ["Offset of field: io_uring_napi::resv"][::std::mem::offset_of!(io_uring_napi, resv) - 8usize]; +}; +pub const IORING_RESTRICTION_REGISTER_OP: io_uring_register_restriction_op = 0; +pub const IORING_RESTRICTION_SQE_OP: io_uring_register_restriction_op = 1; +pub const IORING_RESTRICTION_SQE_FLAGS_ALLOWED: io_uring_register_restriction_op = 2; +pub const IORING_RESTRICTION_SQE_FLAGS_REQUIRED: io_uring_register_restriction_op = 3; +pub const IORING_RESTRICTION_LAST: io_uring_register_restriction_op = 4; +pub const IORING_REG_WAIT_TS: _bindgen_ty_16 = 1; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_cqwait_reg_arg"][::std::mem::size_of::() - 48usize]; + ["Alignment of io_uring_cqwait_reg_arg"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_cqwait_reg_arg::flags"] + [::std::mem::offset_of!(io_uring_cqwait_reg_arg, flags) - 0usize]; + ["Offset of field: io_uring_cqwait_reg_arg::struct_size"] + [::std::mem::offset_of!(io_uring_cqwait_reg_arg, struct_size) - 4usize]; + ["Offset of field: io_uring_cqwait_reg_arg::nr_entries"] + [::std::mem::offset_of!(io_uring_cqwait_reg_arg, nr_entries) - 8usize]; + ["Offset of field: io_uring_cqwait_reg_arg::pad"] + [::std::mem::offset_of!(io_uring_cqwait_reg_arg, pad) - 12usize]; + ["Offset of field: io_uring_cqwait_reg_arg::user_addr"] + [::std::mem::offset_of!(io_uring_cqwait_reg_arg, user_addr) - 16usize]; + ["Offset of field: io_uring_cqwait_reg_arg::pad2"] + [::std::mem::offset_of!(io_uring_cqwait_reg_arg, pad2) - 24usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_reg_wait"][::std::mem::size_of::() - 64usize]; + ["Alignment of io_uring_reg_wait"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_reg_wait::ts"] + [::std::mem::offset_of!(io_uring_reg_wait, ts) - 0usize]; + ["Offset of field: io_uring_reg_wait::min_wait_usec"] + [::std::mem::offset_of!(io_uring_reg_wait, min_wait_usec) - 16usize]; + ["Offset of field: io_uring_reg_wait::flags"] + [::std::mem::offset_of!(io_uring_reg_wait, flags) - 20usize]; + ["Offset of field: io_uring_reg_wait::sigmask"] + [::std::mem::offset_of!(io_uring_reg_wait, sigmask) - 24usize]; + ["Offset of field: io_uring_reg_wait::sigmask_sz"] + [::std::mem::offset_of!(io_uring_reg_wait, sigmask_sz) - 32usize]; + ["Offset of field: io_uring_reg_wait::pad"] + [::std::mem::offset_of!(io_uring_reg_wait, pad) - 36usize]; + ["Offset of field: io_uring_reg_wait::pad2"] + [::std::mem::offset_of!(io_uring_reg_wait, pad2) - 48usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_getevents_arg"][::std::mem::size_of::() - 24usize]; + ["Alignment of io_uring_getevents_arg"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_getevents_arg::sigmask"] + [::std::mem::offset_of!(io_uring_getevents_arg, sigmask) - 0usize]; + ["Offset of field: io_uring_getevents_arg::sigmask_sz"] + [::std::mem::offset_of!(io_uring_getevents_arg, sigmask_sz) - 8usize]; + ["Offset of field: io_uring_getevents_arg::min_wait_usec"] + [::std::mem::offset_of!(io_uring_getevents_arg, min_wait_usec) - 12usize]; + ["Offset of field: io_uring_getevents_arg::ts"] + [::std::mem::offset_of!(io_uring_getevents_arg, ts) - 16usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sync_cancel_reg"] + [::std::mem::size_of::() - 64usize]; + ["Alignment of io_uring_sync_cancel_reg"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sync_cancel_reg::addr"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, addr) - 0usize]; + ["Offset of field: io_uring_sync_cancel_reg::fd"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, fd) - 8usize]; + ["Offset of field: io_uring_sync_cancel_reg::flags"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, flags) - 12usize]; + ["Offset of field: io_uring_sync_cancel_reg::timeout"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, timeout) - 16usize]; + ["Offset of field: io_uring_sync_cancel_reg::opcode"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, opcode) - 32usize]; + ["Offset of field: io_uring_sync_cancel_reg::pad"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, pad) - 33usize]; + ["Offset of field: io_uring_sync_cancel_reg::pad2"] + [::std::mem::offset_of!(io_uring_sync_cancel_reg, pad2) - 40usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_file_index_range"] + [::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_file_index_range"] + [::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_file_index_range::off"] + [::std::mem::offset_of!(io_uring_file_index_range, off) - 0usize]; + ["Offset of field: io_uring_file_index_range::len"] + [::std::mem::offset_of!(io_uring_file_index_range, len) - 4usize]; + ["Offset of field: io_uring_file_index_range::resv"] + [::std::mem::offset_of!(io_uring_file_index_range, resv) - 8usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_recvmsg_out"][::std::mem::size_of::() - 16usize]; + ["Alignment of io_uring_recvmsg_out"][::std::mem::align_of::() - 4usize]; + ["Offset of field: io_uring_recvmsg_out::namelen"] + [::std::mem::offset_of!(io_uring_recvmsg_out, namelen) - 0usize]; + ["Offset of field: io_uring_recvmsg_out::controllen"] + [::std::mem::offset_of!(io_uring_recvmsg_out, controllen) - 4usize]; + ["Offset of field: io_uring_recvmsg_out::payloadlen"] + [::std::mem::offset_of!(io_uring_recvmsg_out, payloadlen) - 8usize]; + ["Offset of field: io_uring_recvmsg_out::flags"] + [::std::mem::offset_of!(io_uring_recvmsg_out, flags) - 12usize]; +}; +pub const SOCKET_URING_OP_SIOCINQ: io_uring_socket_op = 0; +pub const SOCKET_URING_OP_SIOCOUTQ: io_uring_socket_op = 1; +pub const SOCKET_URING_OP_GETSOCKOPT: io_uring_socket_op = 2; +pub const SOCKET_URING_OP_SETSOCKOPT: io_uring_socket_op = 3; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_sq"][::std::mem::size_of::() - 104usize]; + ["Alignment of io_uring_sq"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_sq::khead"][::std::mem::offset_of!(io_uring_sq, khead) - 0usize]; + ["Offset of field: io_uring_sq::ktail"][::std::mem::offset_of!(io_uring_sq, ktail) - 8usize]; + ["Offset of field: io_uring_sq::kring_mask"] + [::std::mem::offset_of!(io_uring_sq, kring_mask) - 16usize]; + ["Offset of field: io_uring_sq::kring_entries"] + [::std::mem::offset_of!(io_uring_sq, kring_entries) - 24usize]; + ["Offset of field: io_uring_sq::kflags"][::std::mem::offset_of!(io_uring_sq, kflags) - 32usize]; + ["Offset of field: io_uring_sq::kdropped"] + [::std::mem::offset_of!(io_uring_sq, kdropped) - 40usize]; + ["Offset of field: io_uring_sq::array"][::std::mem::offset_of!(io_uring_sq, array) - 48usize]; + ["Offset of field: io_uring_sq::sqes"][::std::mem::offset_of!(io_uring_sq, sqes) - 56usize]; + ["Offset of field: io_uring_sq::sqe_head"] + [::std::mem::offset_of!(io_uring_sq, sqe_head) - 64usize]; + ["Offset of field: io_uring_sq::sqe_tail"] + [::std::mem::offset_of!(io_uring_sq, sqe_tail) - 68usize]; + ["Offset of field: io_uring_sq::ring_sz"] + [::std::mem::offset_of!(io_uring_sq, ring_sz) - 72usize]; + ["Offset of field: io_uring_sq::ring_ptr"] + [::std::mem::offset_of!(io_uring_sq, ring_ptr) - 80usize]; + ["Offset of field: io_uring_sq::ring_mask"] + [::std::mem::offset_of!(io_uring_sq, ring_mask) - 88usize]; + ["Offset of field: io_uring_sq::ring_entries"] + [::std::mem::offset_of!(io_uring_sq, ring_entries) - 92usize]; + ["Offset of field: io_uring_sq::pad"][::std::mem::offset_of!(io_uring_sq, pad) - 96usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring_cq"][::std::mem::size_of::() - 88usize]; + ["Alignment of io_uring_cq"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring_cq::khead"][::std::mem::offset_of!(io_uring_cq, khead) - 0usize]; + ["Offset of field: io_uring_cq::ktail"][::std::mem::offset_of!(io_uring_cq, ktail) - 8usize]; + ["Offset of field: io_uring_cq::kring_mask"] + [::std::mem::offset_of!(io_uring_cq, kring_mask) - 16usize]; + ["Offset of field: io_uring_cq::kring_entries"] + [::std::mem::offset_of!(io_uring_cq, kring_entries) - 24usize]; + ["Offset of field: io_uring_cq::kflags"][::std::mem::offset_of!(io_uring_cq, kflags) - 32usize]; + ["Offset of field: io_uring_cq::koverflow"] + [::std::mem::offset_of!(io_uring_cq, koverflow) - 40usize]; + ["Offset of field: io_uring_cq::cqes"][::std::mem::offset_of!(io_uring_cq, cqes) - 48usize]; + ["Offset of field: io_uring_cq::ring_sz"] + [::std::mem::offset_of!(io_uring_cq, ring_sz) - 56usize]; + ["Offset of field: io_uring_cq::ring_ptr"] + [::std::mem::offset_of!(io_uring_cq, ring_ptr) - 64usize]; + ["Offset of field: io_uring_cq::ring_mask"] + [::std::mem::offset_of!(io_uring_cq, ring_mask) - 72usize]; + ["Offset of field: io_uring_cq::ring_entries"] + [::std::mem::offset_of!(io_uring_cq, ring_entries) - 76usize]; + ["Offset of field: io_uring_cq::pad"][::std::mem::offset_of!(io_uring_cq, pad) - 80usize]; +}; +#[allow(clippy::unnecessary_operation, clippy::identity_op)] +const _: () = { + ["Size of io_uring"][::std::mem::size_of::() - 216usize]; + ["Alignment of io_uring"][::std::mem::align_of::() - 8usize]; + ["Offset of field: io_uring::sq"][::std::mem::offset_of!(io_uring, sq) - 0usize]; + ["Offset of field: io_uring::cq"][::std::mem::offset_of!(io_uring, cq) - 104usize]; + ["Offset of field: io_uring::flags"][::std::mem::offset_of!(io_uring, flags) - 192usize]; + ["Offset of field: io_uring::ring_fd"][::std::mem::offset_of!(io_uring, ring_fd) - 196usize]; + ["Offset of field: io_uring::features"][::std::mem::offset_of!(io_uring, features) - 200usize]; + ["Offset of field: io_uring::enter_ring_fd"] + [::std::mem::offset_of!(io_uring, enter_ring_fd) - 204usize]; + ["Offset of field: io_uring::int_flags"] + [::std::mem::offset_of!(io_uring, int_flags) - 208usize]; + ["Offset of field: io_uring::pad"][::std::mem::offset_of!(io_uring, pad) - 209usize]; + ["Offset of field: io_uring::pad2"][::std::mem::offset_of!(io_uring, pad2) - 212usize]; +}; +#[repr(C)] +#[derive(Copy, Clone)] +pub union io_uring_sqe__bindgen_ty_1 { + pub off: __u64, + pub addr2: __u64, + pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_1__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union io_uring_sqe__bindgen_ty_2 { + pub addr: __u64, + pub splice_off_in: __u64, + pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_2__bindgen_ty_1, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union io_uring_sqe__bindgen_ty_3 { + pub rw_flags: __kernel_rwf_t, + pub fsync_flags: __u32, + pub poll_events: __u16, + pub poll32_events: __u32, + pub sync_range_flags: __u32, + pub msg_flags: __u32, + pub timeout_flags: __u32, + pub accept_flags: __u32, + pub cancel_flags: __u32, + pub open_flags: __u32, + pub statx_flags: __u32, + pub fadvise_advice: __u32, + pub splice_flags: __u32, + pub rename_flags: __u32, + pub unlink_flags: __u32, + pub hardlink_flags: __u32, + pub xattr_flags: __u32, + pub msg_ring_flags: __u32, + pub uring_cmd_flags: __u32, + pub waitid_flags: __u32, + pub futex_flags: __u32, + pub install_fd_flags: __u32, + pub nop_flags: __u32, +} +#[repr(C, packed)] +#[derive(Copy, Clone)] +pub union io_uring_sqe__bindgen_ty_4 { + pub buf_index: __u16, + pub buf_group: __u16, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union io_uring_sqe__bindgen_ty_5 { + pub splice_fd_in: __s32, + pub file_index: __u32, + pub optlen: __u32, + pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_5__bindgen_ty_1, +} +#[repr(C)] +pub union io_uring_sqe__bindgen_ty_6 { + pub __bindgen_anon_1: ::std::mem::ManuallyDrop, + pub optval: ::std::mem::ManuallyDrop<__u64>, + pub cmd: ::std::mem::ManuallyDrop<[__u8; 0usize]>, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union io_uring_restriction__bindgen_ty_1 { + pub register_op: __u8, + pub sqe_op: __u8, + pub sqe_flags: __u8, +} +#[repr(C)] +pub union io_uring_buf_ring__bindgen_ty_1 { + pub __bindgen_anon_1: ::std::mem::ManuallyDrop, + pub bufs: ::std::mem::ManuallyDrop<[io_uring_buf; 0usize]>, +} +impl __IncompleteArrayField { + #[inline] + pub const fn new() -> Self { + __IncompleteArrayField(::std::marker::PhantomData, []) + } + #[inline] + pub fn as_ptr(&self) -> *const T { + self as *const _ as *const T + } + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + self as *mut _ as *mut T + } + #[inline] + pub unsafe fn as_slice(&self, len: usize) -> &[T] { + ::std::slice::from_raw_parts(self.as_ptr(), len) + } + #[inline] + pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { + ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) + } +} +impl ::std::fmt::Debug for __IncompleteArrayField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + fmt.write_str("__IncompleteArrayField") + } +} diff --git a/src/io_uring/mem.rs b/src/io_uring/mem.rs new file mode 100644 index 00000000..8f5676af --- /dev/null +++ b/src/io_uring/mem.rs @@ -0,0 +1,31 @@ +use crate::io_uring::{self, cq, libc, sq}; +use crate::SubmissionQueue; + +pub(crate) struct AdviseOp; + +impl io_uring::Op for AdviseOp { + type Output = (); + type Resources = (); + type Args = (*mut (), u32, libc::c_int); // address, length, advice. + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + (): &mut Self::Resources, + (address, length, advice): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_MADVISE as u8; + submission.0.fd = -1; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: *address as _, + }; + submission.0.len = *length; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + fadvise_advice: *advice as _, + }; + } + + fn map_ok(_: &SubmissionQueue, (): Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + } +} diff --git a/src/io_uring/mod.rs b/src/io_uring/mod.rs new file mode 100644 index 00000000..bdb88965 --- /dev/null +++ b/src/io_uring/mod.rs @@ -0,0 +1,401 @@ +//! io_uring implementation. + +use std::os::fd::{AsRawFd, OwnedFd}; +use std::ptr; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::Mutex; + +use crate::drop_waker::DropWake; +use crate::fd::{AsyncFd, Descriptor}; +use crate::op::OpResult; +use crate::syscall; + +pub(crate) mod cancel; +pub(crate) mod config; +mod cq; +pub(crate) mod fd; +pub(crate) mod fs; +pub(crate) mod io; +mod libc; +pub(crate) mod mem; +pub(crate) mod msg; +pub(crate) mod net; +pub(crate) mod poll; +pub(crate) mod process; +mod sq; + +pub(crate) use config::Config; +pub(crate) use cq::Completions; +pub(crate) use sq::{Submission, Submissions}; + +/// io_uring specific methods. +impl crate::Ring { + /// Enable the ring. + /// + /// This only required when starting the ring in disabled mode, see + /// [`Config::disable`]. + /// + /// [`Config::disable`]: crate::Config::disable + #[allow(clippy::needless_pass_by_ref_mut)] + #[doc(alias = "IORING_REGISTER_ENABLE_RINGS")] + pub fn enable(&mut self) -> io::Result<()> { + self.cq + .shared() + .data + .register(libc::IORING_REGISTER_ENABLE_RINGS, ptr::null(), 0) + } +} + +/// io_uring implementation. +pub(crate) enum Implementation {} + +impl crate::Implementation for Implementation { + type Shared = Shared; + type Submissions = Submissions; + type Completions = Completions; +} + +#[derive(Debug)] +#[allow(clippy::mutex_integer)] +pub(crate) struct Shared { + /// File descriptor of the io_uring. + rfd: OwnedFd, + /// Mmap-ed pointer. + ptr: *mut libc::c_void, + /// Mmap-ed size in bytes. + size: libc::c_uint, + /// Increased in `queue` to give the caller mutable access to a + /// submission in [`Submissions`]. + /// Used by [`Completions`] to determine the number of submissions to + /// submit. + pending_tail: AtomicU32, + /// True if we're using a kernel thread to do submission polling, i.e. if + /// `IORING_SETUP_SQPOLL` is enabled. + kernel_thread: bool, + // NOTE: the following fields reference mmaped pages shared with the kernel, + // thus all need atomic/synchronised access. + /// Flags set by the kernel to communicate state information. + flags: *const AtomicU32, + /// Head to queue, i.e. the submussions read by the kernel. Incremented by + /// the kernel when submissions has succesfully been processed. + kernel_read: *const AtomicU32, + /// Array of `len` submission entries shared with the kernel. We're the only + /// one modifiying the structures, but the kernel can read from them. + /// + /// This pointer is also used in the `unmmap` call. + entries: *mut sq::Submission, + /// Number of `entries`. + entries_len: u32, + /// Mask used to index into the `entries` queue. + entries_mask: u32, + /// Variable used to get an index into `array`. The lock must be held while + /// writing into `array` to prevent race conditions with other threads. + array_index: Mutex, + /// Array of `len` indices (into `entries`) shared with the kernel. We're + /// the only one modifiying the structures, but the kernel can read from it. + /// + /// This is protected by `array_index`. + array: *mut AtomicU32, + /// Incremented by us when submitting new submissions. + array_tail: *mut AtomicU32, +} + +impl Shared { + pub(crate) fn new(rfd: OwnedFd, parameters: &libc::io_uring_params) -> io::Result { + let submission_queue_size = + parameters.sq_off.array + parameters.sq_entries * (size_of::() as u32); + let submission_queue = mmap( + submission_queue_size as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED | libc::MAP_POPULATE, + rfd.as_raw_fd(), + libc::off_t::from(libc::IORING_OFF_SQ_RING), + )?; + + let submission_queue_entries = mmap( + parameters.sq_entries as usize * size_of::(), + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED | libc::MAP_POPULATE, + rfd.as_raw_fd(), + libc::off_t::from(libc::IORING_OFF_SQES), + ) + .inspect_err(|_| { + _ = munmap(submission_queue, submission_queue_size as usize); // Can't handle two errors. + })?; + + let entries_len = unsafe { + load_atomic_u32(submission_queue.add(parameters.sq_off.ring_entries as usize)) + }; + debug_assert!(entries_len == parameters.sq_entries); + let entries_mask = + unsafe { load_atomic_u32(submission_queue.add(parameters.sq_off.ring_mask as usize)) }; + debug_assert!(entries_mask == parameters.sq_entries - 1); + + // SAFETY: we do a whole bunch of pointer manipulations, the kernel + // ensures all of this stuff is set up for us with the mmap calls above. + #[allow(clippy::mutex_integer)] // For `array_index`, need to the lock for more. + Ok(unsafe { + Shared { + rfd, + ptr: submission_queue, + size: submission_queue_size, + pending_tail: AtomicU32::new(0), + kernel_thread: (parameters.flags & libc::IORING_SETUP_SQPOLL) != 0, + // Fields are shared with the kernel. + kernel_read: submission_queue.add(parameters.sq_off.head as usize).cast(), + flags: submission_queue + .add(parameters.sq_off.flags as usize) + .cast(), + entries: submission_queue_entries.cast(), + entries_len, + entries_mask, + array_index: Mutex::new(0), + array: submission_queue + .add(parameters.sq_off.array as usize) + .cast(), + array_tail: submission_queue.add(parameters.sq_off.tail as usize).cast(), + } + }) + } + + /// Make a `io_uring_register(2)` system call. + pub(crate) fn register( + &self, + op: libc::c_uint, + arg: *const libc::c_void, + nr_args: libc::c_uint, + ) -> io::Result<()> { + syscall!(io_uring_register(self.rfd.as_raw_fd(), op, arg, nr_args))?; + Ok(()) + } + + /// Wake up the kernel thread polling for submission events, if the kernel + /// thread needs a wakeup. + fn maybe_wake_kernel_thread(&self) { + if self.kernel_thread && (self.flags() & libc::IORING_SQ_NEED_WAKEUP != 0) { + log::debug!("waking io_uring submission queue polling kernel thread"); + let res = syscall!(io_uring_enter2( + self.rfd.as_raw_fd(), + 0, // We've already queued our submissions. + 0, // Don't wait for any completion events. + libc::IORING_ENTER_SQ_WAKEUP, // Wake up the kernel. + ptr::null(), // We don't pass any additional arguments. + 0, + )); + if let Err(err) = res { + log::warn!("failed to wake io_uring submission queue polling kernel thread: {err}"); + } + } + } + + /// Submit the event to the kernel when not using a kernel polling thread + /// and another thread is currently [`Ring::poll`]ing. + fn maybe_submit_event(&self, is_polling: &AtomicBool) { + if !self.kernel_thread && is_polling.load(Ordering::Relaxed) { + log::debug!("submitting submission event while another thread is `Ring::poll`ing"); + let rfd = self.rfd.as_raw_fd(); + let res = syscall!(io_uring_enter2(rfd, 1, 0, 0, ptr::null(), 0)); + if let Err(err) = res { + log::warn!("failed to io_uring submit event: {err}"); + } + } + } + + /// Returns the number of unsumitted submission queue entries. + pub(crate) fn unsubmitted(&self) -> u32 { + // SAFETY: the `kernel_read` pointer itself is valid as long as + // `Ring.fd` is alive. + // We use Relaxed here because it can already be outdated the moment we + // return it, the caller has to deal with that. + let kernel_read = unsafe { (*self.kernel_read).load(Ordering::Relaxed) }; + let pending_tail = self.pending_tail.load(Ordering::Relaxed); + pending_tail - kernel_read + } + + /// Returns `self.kernel_read`. + fn kernel_read(&self) -> u32 { + // SAFETY: this written to by the kernel so we need to use `Acquire` + // ordering. The pointer itself is valid as long as `Ring.fd` is alive. + unsafe { (*self.kernel_read).load(Ordering::Acquire) } + } + + /// Returns `self.flags`. + fn flags(&self) -> u32 { + // SAFETY: this written to by the kernel so we need to use `Acquire` + // ordering. The pointer itself is valid as long as `Ring.fd` is alive. + unsafe { (*self.flags).load(Ordering::Acquire) } + } +} + +unsafe impl Send for Shared {} + +unsafe impl Sync for Shared {} + +impl Drop for Shared { + fn drop(&mut self) { + let ptr = self.entries.cast(); + let size = self.entries_len as usize * size_of::(); + if let Err(err) = munmap(ptr, size) { + log::warn!(ptr:? = ptr, size = size; "error unmapping io_uring entries: {err}"); + } + + if let Err(err) = munmap(self.ptr, self.size as usize) { + log::warn!(ptr:? = self.ptr, size = self.size; "error unmapping io_uring submission queue: {err}"); + } + } +} + +/// io_uring specific [`crate::op::Op`] trait. +pub(crate) trait Op { + type Output; + type Resources: DropWake; + type Args; + + fn fill_submission( + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut sq::Submission, + ); + + fn map_ok( + sq: &crate::SubmissionQueue, + resources: Self::Resources, + op_output: cq::OpReturn, + ) -> Self::Output; +} + +impl crate::op::Op for T { + type Output = T::Output; + type Resources = T::Resources; + type Args = T::Args; + type Submission = sq::Submission; + type OperationState = cq::OperationState; + type OperationOutput = cq::OpReturn; + + fn fill_submission( + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut Self::Submission, + ) { + T::fill_submission(resources, args, submission); + } + + fn check_result( + _: &mut Self::Resources, + _: &mut Self::Args, + state: &mut Self::OperationState, + ) -> OpResult { + match state { + cq::OperationState::Single { result } => result.as_op_result(), + cq::OperationState::Multishot { results } if results.is_empty() => { + OpResult::Again(false) + } + cq::OperationState::Multishot { results } => results.remove(0).as_op_result(), + } + } + + fn map_ok( + sq: &crate::SubmissionQueue, + resources: Self::Resources, + op_output: Self::OperationOutput, + ) -> Self::Output { + T::map_ok(sq, resources, op_output) + } +} + +/// io_uring specific [`crate::op::FdOp`] trait. +pub(crate) trait FdOp { + type Output; + type Resources: DropWake; + type Args; + + fn fill_submission( + fd: &AsyncFd, + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut sq::Submission, + ); + + fn map_ok( + fd: &AsyncFd, + resources: Self::Resources, + op_output: cq::OpReturn, + ) -> Self::Output; +} + +impl crate::op::FdOp for T { + type Output = T::Output; + type Resources = T::Resources; + type Args = T::Args; + type Submission = sq::Submission; + type OperationState = cq::OperationState; + type OperationOutput = cq::OpReturn; + + fn fill_submission( + fd: &AsyncFd, + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut Self::Submission, + ) { + T::fill_submission(fd, resources, args, submission); + } + + fn check_result( + _: &AsyncFd, + _: &mut Self::Resources, + _: &mut Self::Args, + state: &mut Self::OperationState, + ) -> OpResult { + match state { + cq::OperationState::Single { result } => result.as_op_result(), + cq::OperationState::Multishot { results } if results.is_empty() => { + OpResult::Again(false) + } + cq::OperationState::Multishot { results } => results.remove(0).as_op_result(), + } + } + + fn map_ok( + fd: &AsyncFd, + resources: Self::Resources, + op_output: Self::OperationOutput, + ) -> Self::Output { + T::map_ok(fd, resources, op_output) + } +} + +/// `mmap(2)` wrapper that also sets `MADV_DONTFORK`. +fn mmap( + len: libc::size_t, + prot: libc::c_int, + flags: libc::c_int, + fd: libc::c_int, + offset: libc::off_t, +) -> io::Result<*mut libc::c_void> { + let addr = match unsafe { libc::mmap(ptr::null_mut(), len, prot, flags, fd, offset) } { + libc::MAP_FAILED => return Err(io::Error::last_os_error()), + addr => addr, + }; + + match unsafe { libc::madvise(addr, len, libc::MADV_DONTFORK) } { + 0 => Ok(addr), + _ => { + let err = io::Error::last_os_error(); + _ = munmap(addr, len); // Can't handle two errors. + Err(err) + } + } +} + +/// `munmap(2)` wrapper. +pub(crate) fn munmap(addr: *mut libc::c_void, len: libc::size_t) -> io::Result<()> { + match unsafe { libc::munmap(addr, len) } { + 0 => Ok(()), + _ => Err(io::Error::last_os_error()), + } +} + +/// Load a `u32` using relaxed ordering from `ptr`. +unsafe fn load_atomic_u32(ptr: *mut libc::c_void) -> u32 { + (*ptr.cast::()).load(Ordering::Relaxed) +} diff --git a/src/io_uring/msg.rs b/src/io_uring/msg.rs new file mode 100644 index 00000000..1d16c385 --- /dev/null +++ b/src/io_uring/msg.rs @@ -0,0 +1,30 @@ +use std::os::fd::AsRawFd; + +use crate::io_uring::{cq, libc, sq}; +use crate::msg::MsgData; +use crate::{OperationId, SubmissionQueue}; + +pub(crate) fn next(state: &mut cq::OperationState) -> Option { + let result = match state { + cq::OperationState::Single { result } => *result, + cq::OperationState::Multishot { results } if results.is_empty() => return None, + cq::OperationState::Multishot { results } => results.remove(0), + }; + Some(result.as_msg()) +} + +pub(crate) fn send( + sq: &SubmissionQueue, + op_id: OperationId, + data: MsgData, + submission: &mut sq::Submission, +) { + submission.0.opcode = libc::IORING_OP_MSG_RING as u8; + submission.0.fd = sq.inner.shared_data().rfd.as_raw_fd(); + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: u64::from(libc::IORING_MSG_DATA), + }; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: op_id as _ }; + submission.0.len = data; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { msg_ring_flags: 0 }; +} diff --git a/src/io_uring/net.rs b/src/io_uring/net.rs new file mode 100644 index 00000000..b5fc0136 --- /dev/null +++ b/src/io_uring/net.rs @@ -0,0 +1,674 @@ +use std::marker::PhantomData; +use std::mem::{ManuallyDrop, MaybeUninit}; +use std::os::fd::RawFd; +use std::{ptr, slice}; + +use crate::fd::{AsyncFd, Descriptor}; +use crate::io::{Buf, BufId, BufMut, BufMutSlice, BufSlice, Buffer, ReadBuf, ReadBufPool}; +use crate::io_uring::{self, cq, libc, sq}; +use crate::net::{AddressStorage, NoAddress, SendCall, SocketAddress}; +use crate::op::{FdIter, FdOpExtract}; +use crate::SubmissionQueue; + +pub(crate) use crate::unix::MsgHeader; + +pub(crate) struct SocketOp(PhantomData<*const D>); + +impl io_uring::Op for SocketOp { + type Output = AsyncFd; + type Resources = (); + type Args = (libc::c_int, libc::c_int, libc::c_int, libc::c_int); // domain, type, protocol, flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + (): &mut Self::Resources, + (domain, r#type, protocol, flags): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_SOCKET as u8; + submission.0.fd = *domain; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: *r#type as _ }; + submission.0.len = *protocol as _; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { rw_flags: *flags }; + D::create_flags(submission); + } + + fn map_ok(sq: &SubmissionQueue, (): Self::Resources, (_, fd): cq::OpReturn) -> Self::Output { + // SAFETY: kernel ensures that `fd` is valid. + unsafe { AsyncFd::from_raw(fd as _, sq.clone()) } + } +} + +pub(crate) struct ConnectOp(PhantomData<*const A>); + +impl io_uring::FdOp for ConnectOp { + type Output = (); + type Resources = AddressStorage>; + type Args = (); + + fn fill_submission( + fd: &AsyncFd, + address: &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_CONNECT as u8; + submission.0.fd = fd.fd(); + let (ptr, length) = unsafe { A::as_ptr(&address.0) }; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + off: u64::from(length), + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as _ }; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} + +pub(crate) struct RecvOp(PhantomData<*const B>); + +impl io_uring::FdOp for RecvOp { + type Output = B; + type Resources = Buffer; + type Args = libc::c_int; // flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + buf: &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_RECV as u8; + submission.0.fd = fd.fd(); + let (ptr, length) = unsafe { buf.buf.parts_mut() }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as _ }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + msg_flags: *flags as _, + }; + submission.0.len = length; + if let Some(buf_group) = buf.buf.buffer_group() { + submission.0.__bindgen_anon_4.buf_group = buf_group.0; + submission.0.flags |= libc::IOSQE_BUFFER_SELECT; + } + } + + fn map_ok( + _: &AsyncFd, + mut buf: Self::Resources, + (buf_id, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: kernel just initialised the bytes for us. + unsafe { + buf.buf.buffer_init(BufId(buf_id), n); + }; + buf.buf + } +} + +pub(crate) struct MultishotRecvOp; + +impl io_uring::FdOp for MultishotRecvOp { + type Output = ReadBuf; + type Resources = ReadBufPool; + type Args = libc::c_int; // flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + buf_pool: &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_RECV as u8; + submission.0.flags = libc::IOSQE_BUFFER_SELECT; + submission.0.ioprio = libc::IORING_RECV_MULTISHOT as _; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + msg_flags: *flags as _, + }; + submission.0.__bindgen_anon_4.buf_group = buf_pool.group_id().0; + } + + fn map_ok( + fd: &AsyncFd, + mut buf_pool: Self::Resources, + (buf_id, n): cq::OpReturn, + ) -> Self::Output { + MultishotRecvOp::map_next(fd, &mut buf_pool, (buf_id, n)) + } +} + +impl FdIter for MultishotRecvOp { + fn map_next( + _: &AsyncFd, + buf_pool: &mut Self::Resources, + (buf_id, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: the kernel initialised the buffers for us as part of the read + // call. + unsafe { buf_pool.new_buffer(BufId(buf_id), n) } + } +} + +pub(crate) struct RecvVectoredOp(PhantomData<*const B>); + +impl, const N: usize> io_uring::FdOp for RecvVectoredOp { + type Output = (B, libc::c_int); + type Resources = (B, Box<(MsgHeader, [crate::io::IoMutSlice; N])>); + type Args = libc::c_int; // flags + + fn fill_submission( + fd: &AsyncFd, + (_, resources): &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (msg, iovecs) = &mut **resources; + let address = &mut MaybeUninit::new(NoAddress); + fill_recvmsg_submission::(fd.fd(), msg, iovecs, address, *flags, submission); + } + + fn map_ok( + _: &AsyncFd, + (mut bufs, resources): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: the kernel initialised the bytes for us as part of the + // recvmsg call. + unsafe { bufs.set_init(n as usize) }; + (bufs, resources.0.flags()) + } +} + +pub(crate) struct RecvFromOp(PhantomData<*const (B, A)>); + +impl io_uring::FdOp for RecvFromOp { + type Output = (B, A, libc::c_int); + type Resources = ( + B, + // These types need a stable address for the duration of the operation. + Box<(MsgHeader, crate::io::IoMutSlice, MaybeUninit)>, + ); + type Args = libc::c_int; // flags + + fn fill_submission( + fd: &AsyncFd, + (buf, resources): &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (msg, iovec, address) = &mut **resources; + let iovecs = slice::from_mut(&mut *iovec); + fill_recvmsg_submission::(fd.fd(), msg, iovecs, address, *flags, submission); + if let Some(buf_group) = buf.buffer_group() { + submission.0.__bindgen_anon_4.buf_group = buf_group.0; + submission.0.flags |= libc::IOSQE_BUFFER_SELECT; + } + } + + fn map_ok( + _: &AsyncFd, + (mut buf, resources): Self::Resources, + (buf_id, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: the kernel initialised the bytes for us as part of the + // recvmsg call. + unsafe { buf.buffer_init(BufId(buf_id), n) }; + // SAFETY: kernel initialised the address for us. + let address = unsafe { A::init(resources.2, resources.0.address_len()) }; + (buf, address, resources.0.flags()) + } +} + +pub(crate) struct RecvFromVectoredOp(PhantomData<*const (B, A)>); + +impl, A: SocketAddress, const N: usize> io_uring::FdOp + for RecvFromVectoredOp +{ + type Output = (B, A, libc::c_int); + type Resources = ( + B, + // These types need a stable address for the duration of the operation. + Box<( + MsgHeader, + [crate::io::IoMutSlice; N], + MaybeUninit, + )>, + ); + type Args = libc::c_int; // flags + + fn fill_submission( + fd: &AsyncFd, + (_, resources): &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (msg, iovecs, address) = &mut **resources; + fill_recvmsg_submission::(fd.fd(), msg, iovecs, address, *flags, submission); + } + + fn map_ok( + _: &AsyncFd, + (mut bufs, resources): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + // SAFETY: the kernel initialised the bytes for us as part of the + // recvmsg call. + unsafe { bufs.set_init(n as usize) }; + // SAFETY: kernel initialised the address for us. + let address = unsafe { A::init(resources.2, resources.0.address_len()) }; + (bufs, address, resources.0.flags()) + } +} + +fn fill_recvmsg_submission( + fd: RawFd, + msg: &mut MsgHeader, + iovecs: &mut [crate::io::IoMutSlice], + address: &mut MaybeUninit, + flags: libc::c_int, + submission: &mut sq::Submission, +) { + // SAFETY: `address` and `iovecs` outlive `msg`. + unsafe { msg.init_recv::(address, iovecs) }; + + submission.0.opcode = libc::IORING_OP_RECVMSG as u8; + submission.0.fd = fd; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: ptr::from_mut(&mut *msg).addr() as _, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + msg_flags: flags as _, + }; + submission.0.len = 1; +} + +pub(crate) struct SendOp(PhantomData<*const B>); + +impl io_uring::FdOp for SendOp { + type Output = usize; + type Resources = Buffer; + type Args = (SendCall, libc::c_int); // send_op, flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + buf: &mut Self::Resources, + (send_op, flags): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = match *send_op { + SendCall::Normal => libc::IORING_OP_SEND as u8, + SendCall::ZeroCopy => libc::IORING_OP_SEND_ZC as u8, + }; + let (buf_ptr, buf_length) = unsafe { buf.buf.parts() }; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: buf_ptr.addr() as u64, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + msg_flags: *flags as _, + }; + submission.0.len = buf_length; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +impl FdOpExtract for SendOp { + type ExtractOutput = (B, usize); + + fn map_ok_extract( + _: &AsyncFd, + buf: Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + (buf.buf, n as usize) + } +} + +pub(crate) struct SendToOp(PhantomData<*const (B, A)>); + +impl io_uring::FdOp for SendToOp { + type Output = usize; + type Resources = (B, Box); + type Args = (SendCall, libc::c_int); // send_op, flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + (buf, address): &mut Self::Resources, + (send_op, flags): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = match *send_op { + SendCall::Normal => libc::IORING_OP_SEND as u8, + SendCall::ZeroCopy => libc::IORING_OP_SEND_ZC as u8, + }; + let (buf_ptr, buf_length) = unsafe { buf.parts() }; + let (address_ptr, address_length) = unsafe { A::as_ptr(address) }; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1.addr2 = address_ptr.addr() as u64; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: buf_ptr.addr() as u64, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + msg_flags: *flags as _, + }; + submission.0.__bindgen_anon_5.__bindgen_anon_1.addr_len = address_length as u16; + submission.0.len = buf_length; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +impl FdOpExtract for SendToOp { + type ExtractOutput = (B, usize); + + fn map_ok_extract( + _: &AsyncFd, + (buf, _): Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + (buf, n as usize) + } +} + +pub(crate) struct SendMsgOp(PhantomData<*const (B, A)>); + +impl, A: SocketAddress, const N: usize> io_uring::FdOp for SendMsgOp { + type Output = usize; + type Resources = ( + B, + // These types need a stable address for the duration of the operation. + Box<(MsgHeader, [crate::io::IoSlice; N], A::Storage)>, + ); + type Args = (SendCall, libc::c_int); // send_op, flags + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + fd: &AsyncFd, + (_, resources): &mut Self::Resources, + (send_op, flags): &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (msg, iovecs, address) = &mut **resources; + // SAFETY: `address` and `iovecs` outlive `msg`. + unsafe { msg.init_send::(address, iovecs) }; + + submission.0.opcode = match *send_op { + SendCall::Normal => libc::IORING_OP_SENDMSG as u8, + SendCall::ZeroCopy => libc::IORING_OP_SENDMSG_ZC as u8, + }; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: ptr::from_mut(&mut *msg).addr() as _, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + msg_flags: *flags as _, + }; + submission.0.len = 1; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + n as usize + } +} + +impl, A: SocketAddress, const N: usize> FdOpExtract for SendMsgOp { + type ExtractOutput = (B, usize); + + fn map_ok_extract( + _: &AsyncFd, + (buf, _): Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + (buf, n as usize) + } +} + +pub(crate) struct AcceptOp(PhantomData<*const (A, D)>); + +impl io_uring::FdOp for AcceptOp { + type Output = (AsyncFd, A); + type Resources = AddressStorage, libc::socklen_t)>>; + type Args = libc::c_int; // flags + + fn fill_submission( + fd: &AsyncFd, + resources: &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (ptr, length) = unsafe { A::as_mut_ptr(&mut (resources.0).0) }; + let address_length = &mut (resources.0).1; + *address_length = length; + submission.0.opcode = libc::IORING_OP_ACCEPT as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + off: ptr::from_mut(address_length).addr() as _, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: ptr.addr() as _, + }; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + accept_flags: *flags as _, + }; + submission.0.flags |= libc::IOSQE_ASYNC; + D::create_flags(submission); + } + + fn map_ok( + lfd: &AsyncFd, + resources: Self::Resources, + (_, fd): cq::OpReturn, + ) -> Self::Output { + let sq = lfd.sq.clone(); + // SAFETY: the accept operation ensures that `fd` is valid. + let socket = unsafe { AsyncFd::from_raw(fd as _, sq) }; + // SAFETY: the kernel has written the address for us. + let address = unsafe { A::init((resources.0).0, (resources.0).1) }; + (socket, address) + } +} + +pub(crate) struct MultishotAcceptOp(PhantomData<*const D>); + +impl io_uring::FdOp for MultishotAcceptOp { + type Output = AsyncFd; + type Resources = (); + type Args = libc::c_int; // flags + + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + flags: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_ACCEPT as u8; + submission.0.ioprio = libc::IORING_ACCEPT_MULTISHOT as _; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + accept_flags: *flags as _, + }; + submission.0.flags = libc::IOSQE_ASYNC; + D::create_flags(submission); + } + + fn map_ok( + lfd: &AsyncFd, + (): Self::Resources, + ok: cq::OpReturn, + ) -> Self::Output { + MultishotAcceptOp::map_next(lfd, &mut (), ok) + } +} + +impl FdIter for MultishotAcceptOp { + fn map_next( + lfd: &AsyncFd, + (): &mut Self::Resources, + (_, fd): cq::OpReturn, + ) -> Self::Output { + let sq = lfd.sq.clone(); + // SAFETY: the accept operation ensures that `fd` is valid. + unsafe { AsyncFd::from_raw(fd as _, sq) } + } +} + +pub(crate) struct SocketOptionOp(PhantomData<*const T>); + +impl io_uring::FdOp for SocketOptionOp { + type Output = T; + type Resources = Box>; + type Args = (libc::c_int, libc::c_int); // level, optname. + + fn fill_submission( + fd: &AsyncFd, + value: &mut Self::Resources, + (level, optname): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_URING_CMD as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + __bindgen_anon_1: libc::io_uring_sqe__bindgen_ty_1__bindgen_ty_1 { + cmd_op: libc::SOCKET_URING_OP_GETSOCKOPT, + __pad1: 0, + }, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + __bindgen_anon_1: libc::io_uring_sqe__bindgen_ty_2__bindgen_ty_1 { + level: *level as _, + optname: *optname as _, + }, + }; + submission.0.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { + optlen: size_of::() as u32, + }; + submission.0.__bindgen_anon_6 = libc::io_uring_sqe__bindgen_ty_6 { + optval: ManuallyDrop::new(value.as_mut_ptr().addr() as _), + }; + } + + fn map_ok( + _: &AsyncFd, + value: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == (size_of::() as _)); + // SAFETY: the kernel initialised the value for us as part of the + // getsockopt call. + unsafe { MaybeUninit::assume_init(*value) } + } +} + +pub(crate) struct SetSocketOptionOp(PhantomData<*const T>); + +impl io_uring::FdOp for SetSocketOptionOp { + type Output = (); + type Resources = Box; + type Args = (libc::c_int, libc::c_int); // level, optname. + + fn fill_submission( + fd: &AsyncFd, + value: &mut Self::Resources, + (level, optname): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_URING_CMD as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + __bindgen_anon_1: libc::io_uring_sqe__bindgen_ty_1__bindgen_ty_1 { + cmd_op: libc::SOCKET_URING_OP_SETSOCKOPT, + __pad1: 0, + }, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + __bindgen_anon_1: libc::io_uring_sqe__bindgen_ty_2__bindgen_ty_1 { + level: *level as _, + optname: *optname as _, + }, + }; + submission.0.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { + optlen: size_of::() as u32, + }; + submission.0.__bindgen_anon_6 = libc::io_uring_sqe__bindgen_ty_6 { + optval: ManuallyDrop::new(ptr::from_ref(&**value).addr() as _), + }; + } + + fn map_ok( + _: &AsyncFd, + _: Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} + +impl FdOpExtract for SetSocketOptionOp { + type ExtractOutput = T; + + fn map_ok_extract( + _: &AsyncFd, + value: Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::ExtractOutput { + debug_assert!(n == 0); + *value + } +} + +pub(crate) struct ShutdownOp; + +impl io_uring::FdOp for ShutdownOp { + type Output = (); + type Resources = (); + type Args = std::net::Shutdown; + + fn fill_submission( + fd: &AsyncFd, + (): &mut Self::Resources, + how: &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_SHUTDOWN as u8; + submission.0.fd = fd.fd(); + submission.0.len = match how { + std::net::Shutdown::Read => libc::SHUT_RD, + std::net::Shutdown::Write => libc::SHUT_WR, + std::net::Shutdown::Both => libc::SHUT_RDWR, + } as u32; + } + + fn map_ok( + _: &AsyncFd, + (): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 0); + } +} diff --git a/src/io_uring/poll.rs b/src/io_uring/poll.rs new file mode 100644 index 00000000..184adbb5 --- /dev/null +++ b/src/io_uring/poll.rs @@ -0,0 +1,61 @@ +use std::os::fd::RawFd; + +use crate::io_uring::{self, cq, libc, sq}; +use crate::op::Iter; +use crate::poll::PollEvent; +use crate::SubmissionQueue; + +pub(crate) struct OneshotPollOp; + +impl io_uring::Op for OneshotPollOp { + type Output = PollEvent; + type Resources = (); + type Args = (RawFd, libc::c_int); // mask; + + fn fill_submission( + (): &mut Self::Resources, + (fd, mask): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_POLL_ADD as u8; + submission.0.fd = *fd; + submission.0.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { + poll32_events: *mask as _, + }; + } + + fn map_ok(_: &SubmissionQueue, (): Self::Resources, (_, events): cq::OpReturn) -> Self::Output { + PollEvent(events as _) + } +} + +pub(crate) struct MultishotPollOp; + +impl io_uring::Op for MultishotPollOp { + type Output = PollEvent; + type Resources = (); + type Args = (RawFd, libc::c_int); // mask; + + fn fill_submission( + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut sq::Submission, + ) { + OneshotPollOp::fill_submission(resources, args, submission); + submission.0.len = libc::IORING_POLL_ADD_MULTI; + } + + fn map_ok(sq: &SubmissionQueue, (): Self::Resources, ok: cq::OpReturn) -> Self::Output { + MultishotPollOp::map_next(sq, &mut (), ok) + } +} + +impl Iter for MultishotPollOp { + fn map_next( + _: &SubmissionQueue, + (): &mut Self::Resources, + (_, events): cq::OpReturn, + ) -> Self::Output { + PollEvent(events as _) + } +} diff --git a/src/io_uring/process.rs b/src/io_uring/process.rs new file mode 100644 index 00000000..63aec85e --- /dev/null +++ b/src/io_uring/process.rs @@ -0,0 +1,123 @@ +use std::os::fd::RawFd; +use std::ptr; + +use crate::fd::{AsyncFd, Descriptor, Direct, File}; +use crate::io::NO_OFFSET; +use crate::io_uring::{self, cq, libc, sq}; +use crate::op::FdIter; +use crate::process::{Signals, WaitOn}; +use crate::SubmissionQueue; + +pub(crate) struct WaitIdOp; + +impl io_uring::Op for WaitIdOp { + type Output = Box; + type Resources = Box; + type Args = (WaitOn, libc::c_int); // options. + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + info: &mut Self::Resources, + (wait, options): &mut Self::Args, + submission: &mut sq::Submission, + ) { + let (id_type, pid) = match *wait { + WaitOn::Process(pid) => (libc::P_PID, pid), + WaitOn::Group(pid) => (libc::P_PGID, pid), + WaitOn::All => (libc::P_ALL, 0), // NOTE: id is ignored. + }; + submission.0.opcode = libc::IORING_OP_WAITID as u8; + submission.0.fd = pid as _; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + addr2: ptr::from_mut(&mut **info).addr() as _, + }; + submission.0.len = id_type; + submission.0.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { + file_index: *options as _, + }; + } + + fn map_ok(_: &SubmissionQueue, info: Self::Resources, (_, n): cq::OpReturn) -> Self::Output { + debug_assert!(n == 0); + info + } +} + +pub(crate) struct ToSignalsDirectOp; + +impl io_uring::Op for ToSignalsDirectOp { + type Output = Signals; + type Resources = (Signals, Box); + type Args = (); + + #[allow(clippy::cast_sign_loss)] + fn fill_submission( + (_, fd): &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_FILES_UPDATE as u8; + submission.0.fd = -1; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { + off: libc::IORING_FILE_INDEX_ALLOC as _, + }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: ptr::from_mut(&mut **fd).addr() as _, + }; + submission.0.len = 1; + } + + fn map_ok( + sq: &SubmissionQueue, + (signals, dfd): Self::Resources, + (_, n): cq::OpReturn, + ) -> Self::Output { + debug_assert!(n == 1); + // SAFETY: the kernel ensures that `dfd` is valid. + let dfd = unsafe { AsyncFd::from_raw(*dfd, sq.clone()) }; + unsafe { signals.change_fd(dfd) } + } +} + +pub(crate) struct ReceiveSignalOp; + +impl io_uring::FdOp for ReceiveSignalOp { + type Output = libc::signalfd_siginfo; + type Resources = Box; + type Args = (); + + fn fill_submission( + fd: &AsyncFd, + info: &mut Self::Resources, + (): &mut Self::Args, + submission: &mut sq::Submission, + ) { + submission.0.opcode = libc::IORING_OP_READ as u8; + submission.0.fd = fd.fd(); + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: NO_OFFSET }; + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: ptr::from_mut(&mut **info).addr() as _, + }; + submission.0.len = size_of::() as u32; + submission.set_async(); + } + + fn map_ok( + fd: &AsyncFd, + mut info: Self::Resources, + ok: cq::OpReturn, + ) -> Self::Output { + ReceiveSignalOp::map_next(fd, &mut info, ok) + } +} + +impl FdIter for ReceiveSignalOp { + fn map_next( + _: &AsyncFd, + info: &mut Self::Resources, + (_, n): Self::OperationOutput, + ) -> Self::Output { + debug_assert!(n == size_of::() as u32); + **info + } +} diff --git a/src/io_uring/sq.rs b/src/io_uring/sq.rs new file mode 100644 index 00000000..77df54f3 --- /dev/null +++ b/src/io_uring/sq.rs @@ -0,0 +1,449 @@ +use std::os::fd::AsRawFd; +use std::sync::atomic::{self, AtomicBool, Ordering}; +use std::{fmt, io, ptr}; + +use crate::io_uring::{cancel, libc, Shared}; +use crate::sq::{Cancelled, QueueFull}; +use crate::{OperationId, WAKE_ID}; + +/// NOTE: all the state is in [`Shared`]. +#[derive(Debug)] +pub(crate) struct Submissions { + // All state is in `Shared`. +} + +impl Submissions { + pub(crate) const fn new() -> Submissions { + Submissions {} + } +} + +impl crate::sq::Submissions for Submissions { + type Shared = Shared; + type Submission = Submission; + + #[allow(clippy::mutex_integer)] + fn add( + &self, + shared: &Self::Shared, + is_polling: &AtomicBool, + submit: F, + ) -> Result<(), QueueFull> + where + F: FnOnce(&mut Self::Submission), + { + // First we need to acquire mutable access to an `Submission` entry in + // the `entries` array. + // + // We do this by increasing `pending_tail` by 1, reserving + // `entries[pending_tail]` for ourselves, while ensuring we don't go + // beyond what the kernel has processed by checking `tail - kernel_read` + // is less then the length of the submission queue. + let kernel_read = shared.kernel_read(); + let tail = shared + .pending_tail + .fetch_update(Ordering::AcqRel, Ordering::Acquire, |tail| { + if tail - kernel_read < shared.entries_len { + // Still an entry available. + Some(tail.wrapping_add(1)) + } else { + None + } + }); + let Ok(tail) = tail else { + // If the kernel thread is not awake we'll need to wake it to make + // space in the submission queue. + shared.maybe_wake_kernel_thread(); + return Err(QueueFull); + }; + + // SAFETY: the `ring_mask` ensures we can never get an index larger + // then the size of the queue. Above we've already ensured that + // we're the only thread with mutable access to the entry. + let submission_index = tail & shared.entries_mask; + let submission = unsafe { &mut *shared.entries.add(submission_index as usize) }; + + // Let the caller fill the `submission`. + submission.reset(); + submit(submission); + #[cfg(debug_assertions)] + debug_assert!(!submission.is_unchanged()); + + // Ensure that all writes to the `submission` are done. + atomic::fence(Ordering::SeqCst); + + // Now that we've written our submission we need add it to the + // `array` so that the kernel can process it. + log::trace!(submission:? = submission; "queueing submission"); + { + // Now that the submission is filled we need to add it to the + // `shared.array` so that the kernel can read from it. + // + // We do this with a lock to avoid a race condition between two + // threads incrementing `shared.tail` concurrently. Consider the + // following execution: + // + // Thread A | Thread B + // ... | ... + // ... | Got `array_index` 0. + // Got `array_index` 1. | + // Writes index to `shared.array[1]`. | + // `shared.tail.fetch_add` to 1. | + // At this point the kernel will/can read `shared.array[0]`, but + // thread B hasn't filled it yet. So the kernel will read an invalid + // index! + // | Writes index to `shared.array[0]`. + // | `shared.tail.fetch_add` to 2. + + let mut array_index = shared.array_index.lock().unwrap(); + let idx = (*array_index & shared.entries_mask) as usize; + // SAFETY: `idx` is masked above to be within the correct bounds. + unsafe { (*shared.array.add(idx)).store(submission_index, Ordering::Release) }; + // SAFETY: we filled the array above. + let old_tail = unsafe { (*shared.array_tail).fetch_add(1, Ordering::AcqRel) }; + debug_assert!(old_tail == *array_index); + *array_index += 1; + } + + // If the kernel thread is not awake we'll need to wake it for it to + // process our submission. + shared.maybe_wake_kernel_thread(); + // When we're not using the kernel polling thread we might have to + // submit the event ourselves to ensure we can make progress while the + // (user space) polling thread is calling `Ring::poll`. + shared.maybe_submit_event(is_polling); + Ok(()) + } + + fn cancel( + &self, + shared: &Self::Shared, + is_polling: &AtomicBool, + op_id: OperationId, + ) -> Cancelled { + let result = self.add(shared, is_polling, |submission| { + use crate::sq::Submission; + submission.set_id(op_id); + // We'll get a canceled completion event if we succeeded, which is + // sufficient to cleanup the operation. + submission.no_completion_event(); + cancel::operation(op_id, submission); + }); + if let Ok(()) = result { + return Cancelled::Async; + } + + // We failed to asynchronously cancel the operation, so we'll fallback + // to doing it synchronously. + let cancel = libc::io_uring_sync_cancel_reg { + addr: op_id as u64, + fd: 0, + flags: 0, + // No timeout, saves the kernel from setting up a timer etc. + timeout: libc::__kernel_timespec { + tv_sec: libc::__kernel_time64_t::MAX, + tv_nsec: std::os::raw::c_longlong::MAX, + }, + opcode: 0, + pad: [0; 7], + pad2: [0; 3], + }; + let arg = ptr::from_ref(&cancel).cast(); + match shared.register(libc::IORING_REGISTER_SYNC_CANCEL, arg, 1) { + Ok(()) => Cancelled::Immediate, + Err(err) => match err.raw_os_error() { + // Operation was already completed. + Some(libc::ENOENT) => Cancelled::Immediate, + // Operation is nearly complete, can't be cancelled + // anymore (EALREADY), or the cancellation failed + // (ETIME, EINVAL). Either way we'll have to wait until + // the operation is completed. + Some(libc::EALREADY | libc::ETIME | libc::EINVAL) => Cancelled::Async, + _ => { + log::error!(id = op_id; "unexpected error cancelling operation: {err}"); + // Waiting is the safest thing we can do. + Cancelled::Async + } + }, + } + } + + fn wake(&self, shared: &Self::Shared) -> io::Result<()> { + // This is only called if we're not polling, so we can set `is_polling` + // to false and we ignore the queue full error. + let is_polling = AtomicBool::new(false); + let _: Result<(), QueueFull> = self.add(shared, &is_polling, |submission| { + submission.0.opcode = libc::IORING_OP_MSG_RING as u8; + submission.0.fd = shared.rfd.as_raw_fd(); + submission.0.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { + addr: u64::from(libc::IORING_MSG_DATA), + }; + submission.0.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: WAKE_ID as _ }; + submission.no_completion_event(); + }); + Ok(()) + } +} + +/// Submission event. +/// +/// # Safety +/// +/// It is up to the caller to ensure any data passed to the kernel outlives the +/// operation. +#[repr(transparent)] +pub(crate) struct Submission(pub(crate) libc::io_uring_sqe); + +impl Submission { + /// Reset the submission. + #[allow(clippy::assertions_on_constants)] + fn reset(&mut self) { + debug_assert!(libc::IORING_OP_NOP == 0); + unsafe { ptr::addr_of_mut!(self.0).write_bytes(0, 1) }; + } + + /// Don't return a completion event for this submission. + pub(crate) fn no_completion_event(&mut self) { + self.0.flags |= libc::IOSQE_CQE_SKIP_SUCCESS; + } + + /// Returns `true` if the submission is unchanged after a [`reset`]. + /// + /// [`reset`]: Submission::reset + #[cfg(debug_assertions)] + const fn is_unchanged(&self) -> bool { + self.0.opcode == libc::IORING_OP_NOP as u8 + } + + /// Don't attempt to do the operation non-blocking first, always execute it + /// in an async manner. + pub(crate) fn set_async(&mut self) { + self.0.flags |= libc::IOSQE_ASYNC; + } +} + +impl crate::sq::Submission for Submission { + fn set_id(&mut self, id: OperationId) { + self.0.user_data = id as _; + } +} + +impl fmt::Debug for Submission { + #[allow(clippy::too_many_lines)] // Not beneficial to split this up. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Helper functions with common patterns. + fn io_op(f: &mut fmt::DebugStruct<'_, '_>, submission: &libc::io_uring_sqe, name: &str) { + f.field("opcode", &name) + .field("fd", &submission.fd) + .field("offset", unsafe { &submission.__bindgen_anon_1.off }) + .field("addr", unsafe { &submission.__bindgen_anon_2.addr }) + .field("len", &submission.len); + } + fn net_op(f: &mut fmt::DebugStruct<'_, '_>, submission: &libc::io_uring_sqe, name: &str) { + // NOTE: can't reference a packed struct's field. + let buf_group = unsafe { submission.__bindgen_anon_4.buf_group }; + f.field("opcode", &name) + .field("fd", &submission.fd) + .field("addr", unsafe { &submission.__bindgen_anon_2.addr }) + .field("len", &submission.len) + .field("msg_flags", unsafe { + &submission.__bindgen_anon_3.msg_flags + }) + .field("ioprio", &submission.ioprio) + .field("buf_group", &buf_group); + } + + let mut f = f.debug_struct("io_uring::Submission"); + match u32::from(self.0.opcode) { + libc::IORING_OP_NOP => { + f.field("opcode", &"IORING_OP_NOP"); + } + libc::IORING_OP_FSYNC => { + f.field("opcode", &"IORING_OP_FSYNC") + .field("fd", &self.0.fd) + .field("fsync_flags", unsafe { + &self.0.__bindgen_anon_3.fsync_flags + }); + } + libc::IORING_OP_READ => io_op(&mut f, &self.0, "IORING_OP_READ"), + libc::IORING_OP_READV => io_op(&mut f, &self.0, "IORING_OP_READV"), + libc::IORING_OP_WRITE => io_op(&mut f, &self.0, "IORING_OP_WRITE"), + libc::IORING_OP_WRITEV => io_op(&mut f, &self.0, "IORING_OP_WRITEV"), + libc::IORING_OP_RENAMEAT => { + f.field("opcode", &"IORING_OP_RENAMEAT") + .field("old_fd", &self.0.fd) + .field("old_path", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("new_fd", &self.0.len) + .field("new_path", unsafe { &self.0.__bindgen_anon_1.off }) + .field("rename_flags", unsafe { + &self.0.__bindgen_anon_3.rename_flags + }); + } + libc::IORING_OP_SOCKET => { + f.field("opcode", &"IORING_OP_SOCKET") + .field("domain", &self.0.fd) + .field("type", unsafe { &self.0.__bindgen_anon_1.off }) + .field("file_index", unsafe { &self.0.__bindgen_anon_5.file_index }) + .field("protocol", &self.0.len) + .field("rw_flags", unsafe { &self.0.__bindgen_anon_3.rw_flags }); + } + libc::IORING_OP_CONNECT => { + f.field("opcode", &"IORING_OP_CONNECT") + .field("fd", &self.0.fd) + .field("addr", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("addr_size", unsafe { &self.0.__bindgen_anon_1.off }); + } + libc::IORING_OP_SEND => net_op(&mut f, &self.0, "IORING_OP_SEND"), + libc::IORING_OP_SEND_ZC => net_op(&mut f, &self.0, "IORING_OP_SEND_ZC"), + libc::IORING_OP_SENDMSG => net_op(&mut f, &self.0, "IORING_OP_SENDMSG"), + libc::IORING_OP_SENDMSG_ZC => net_op(&mut f, &self.0, "IORING_OP_SENDMSG_ZC"), + libc::IORING_OP_RECV => net_op(&mut f, &self.0, "IORING_OP_RECV"), + libc::IORING_OP_RECVMSG => net_op(&mut f, &self.0, "IORING_OP_RECVMSG"), + libc::IORING_OP_SHUTDOWN => { + f.field("opcode", &"IORING_OP_SHUTDOWN") + .field("fd", &self.0.fd) + .field("how", &self.0.len); + } + libc::IORING_OP_ACCEPT => { + f.field("opcode", &"IORING_OP_ACCEPT") + .field("fd", &self.0.fd) + .field("addr", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("addr_size", unsafe { &self.0.__bindgen_anon_1.off }) + .field("accept_flags", unsafe { + &self.0.__bindgen_anon_3.accept_flags + }) + .field("file_index", unsafe { &self.0.__bindgen_anon_5.file_index }) + .field("ioprio", &self.0.ioprio); + } + libc::IORING_OP_ASYNC_CANCEL => { + f.field("opcode", &"IORING_OP_ASYNC_CANCEL"); + let cancel_flags = unsafe { self.0.__bindgen_anon_3.cancel_flags }; + #[allow(clippy::if_not_else)] + if (cancel_flags & libc::IORING_ASYNC_CANCEL_FD) != 0 { + f.field("fd", &self.0.fd) + .field("cancel_flags", &cancel_flags); + } else { + f.field("addr", unsafe { &self.0.__bindgen_anon_2.addr }); + } + } + libc::IORING_OP_OPENAT => { + f.field("opcode", &"IORING_OP_OPENAT") + .field("dirfd", &self.0.fd) + .field("pathname", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("mode", &self.0.len) + .field("open_flags", unsafe { &self.0.__bindgen_anon_3.open_flags }) + .field("file_index", unsafe { &self.0.__bindgen_anon_5.file_index }); + } + libc::IORING_OP_SPLICE => { + f.field("opcode", &"IORING_OP_SPLICE") + .field("fd_in", unsafe { &self.0.__bindgen_anon_5.splice_fd_in }) + .field("off_in", unsafe { &self.0.__bindgen_anon_2.splice_off_in }) + .field("fd_out", &self.0.fd) + .field("off_out", unsafe { &self.0.__bindgen_anon_1.off }) + .field("len", &self.0.len) + .field("splice_flags", unsafe { + &self.0.__bindgen_anon_3.splice_flags + }); + } + libc::IORING_OP_CLOSE => { + f.field("opcode", &"IORING_OP_CLOSE") + .field("fd", &self.0.fd); + } + libc::IORING_OP_FILES_UPDATE => { + f.field("opcode", &"IORING_OP_FILES_UPDATE") + .field("fd", &self.0.fd) + .field("offset", unsafe { &self.0.__bindgen_anon_1.off }) + .field("fds", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("len", &self.0.len); + } + libc::IORING_OP_STATX => { + f.field("opcode", &"IORING_OP_STATX") + .field("fd", &self.0.fd) + .field("pathname", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("statx_flags", unsafe { + &self.0.__bindgen_anon_3.statx_flags + }) + .field("mask", &self.0.len) + .field("statx", unsafe { &self.0.__bindgen_anon_1.off }); + } + libc::IORING_OP_FADVISE => { + f.field("opcode", &"IORING_OP_FADVISE") + .field("fd", &self.0.fd) + .field("offset", unsafe { &self.0.__bindgen_anon_1.off }) + .field("len", &self.0.len) + .field("advise", unsafe { &self.0.__bindgen_anon_3.fadvise_advice }); + } + libc::IORING_OP_FALLOCATE => { + f.field("opcode", &"IORING_OP_FALLOCATE") + .field("fd", &self.0.fd) + .field("offset", unsafe { &self.0.__bindgen_anon_1.off }) + .field("len", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("mode", &self.0.len); + } + libc::IORING_OP_UNLINKAT => { + f.field("opcode", &"IORING_OP_UNLINKAT") + .field("dirfd", &self.0.fd) + .field("path", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("unlink_flags", unsafe { + &self.0.__bindgen_anon_3.unlink_flags + }); + } + libc::IORING_OP_MKDIRAT => { + f.field("opcode", &"IORING_OP_MKDIRAT") + .field("dirfd", &self.0.fd) + .field("path", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("mode", &self.0.len); + } + libc::IORING_OP_POLL_ADD => { + f.field("opcode", &"IORING_OP_POLL_ADD") + .field("fd", &self.0.fd) + .field("poll_events", unsafe { + &self.0.__bindgen_anon_3.poll32_events + }) + .field("multishot", &(self.0.len == libc::IORING_POLL_ADD_MULTI)); + } + libc::IORING_OP_POLL_REMOVE => { + f.field("opcode", &"IORING_OP_POLL_REMOVE") + .field("target_user_data", unsafe { &self.0.__bindgen_anon_2.addr }); + } + libc::IORING_OP_MADVISE => { + f.field("opcode", &"IORING_OP_MADVISE") + .field("address", unsafe { &self.0.__bindgen_anon_2.addr }) + .field("len", &self.0.len) + .field("advise", unsafe { &self.0.__bindgen_anon_3.fadvise_advice }); + } + libc::IORING_OP_MSG_RING => { + f.field("opcode", &"IORING_OP_MSG_RING") + .field("ringfd", &self.0.fd) + .field("msg1", &self.0.len) + .field("msg2", unsafe { &self.0.__bindgen_anon_1.off }); + } + libc::IORING_OP_WAITID => { + f.field("opcode", &"IORING_OP_WAITID") + .field("id", &self.0.fd) + .field("id_type", &self.0.len) + .field("options", unsafe { &self.0.__bindgen_anon_5.file_index }) + .field("info", unsafe { &self.0.__bindgen_anon_1.addr2 }); + } + libc::IORING_OP_FIXED_FD_INSTALL => { + f.field("opcode", &"IORING_OP_FIXED_FD_INSTALL") + .field("fd", &self.0.fd) + .field("install_fd_flags", unsafe { + &self.0.__bindgen_anon_3.install_fd_flags + }); + } + _ => { + // NOTE: we can't access the unions safely without know what + // fields to read. + f.field("opcode", &self.0.opcode) + .field("ioprio", &self.0.ioprio) + .field("fd", &self.0.fd) + .field("len", &self.0.len) + .field("personality", &self.0.personality); + } + } + f.field("flags", &self.0.flags) + .field("user_data", &self.0.user_data) + .finish() + } +} diff --git a/src/kqueue/config.rs b/src/kqueue/config.rs new file mode 100644 index 00000000..02cb7dd3 --- /dev/null +++ b/src/kqueue/config.rs @@ -0,0 +1,90 @@ +//! kqueue configuration. + +use std::marker::PhantomData; +use std::os::fd::{AsRawFd, FromRawFd, OwnedFd}; +use std::sync::Mutex; +use std::{io, mem, ptr}; + +use crate::kqueue::{self, Completions, Shared, Submissions}; +use crate::{syscall, WAKE_ID}; + +#[derive(Debug, Clone)] +pub(crate) struct Config<'r> { + max_events: Option, + max_change_list_size: Option, + _unused: PhantomData<&'r ()>, +} + +impl<'r> Config<'r> { + pub(crate) const fn new() -> Config<'r> { + Config { + max_events: None, + max_change_list_size: None, + _unused: PhantomData, + } + } +} + +/// kqueue specific configuration. +impl<'r> crate::Config<'r> { + /// Set the maximum number of events that can be collected in a single call + /// to `kevent(2)`. + /// + /// Defaults to the same value as the maximum number of queued operations + /// (see [`Ring::config`]). + /// + /// [`Ring::config`]: crate::Ring::config + pub const fn max_events(mut self, max_events: usize) -> Self { + self.sys.max_events = Some(max_events); + self + } + + /// Set the maximum number of submissions that are buffered before + /// submitting to the kernel, without waiting on a call to [`Ring::poll`]. + /// + /// This must be larger or equal to [`max_events`]. Defaults to 64 changes. + /// + /// [`Ring::poll`]: crate::Ring::poll + /// [`max_events`]: crate::Config::max_events + pub const fn max_change_list_size(mut self, max: usize) -> Self { + self.sys.max_change_list_size = Some(max); + self + } + + pub(crate) fn build_sys(self) -> io::Result<(Submissions, Shared, Completions)> { + let max_events = self.sys.max_events.unwrap_or(self.queued_operations); + #[rustfmt::skip] + let max_change_list_size = self.sys.max_change_list_size.unwrap_or(if max_events >= 64 { 64 } else { max_events }); + // NOTE: `max_events` must be at least `max_change_list_size` to ensure + // we can handle all submission errors. + debug_assert!( + max_events >= max_change_list_size, + "a10::kqueue::Config: max_change_list_size larger than max_events" + ); + + // SAFETY: `kqueue(2)` ensures the fd is valid. + let kq = unsafe { OwnedFd::from_raw_fd(syscall!(kqueue())?) }; + let kq_fd = kq.as_raw_fd(); + syscall!(fcntl(kq_fd, libc::F_SETFD, libc::FD_CLOEXEC))?; + + // Set up waking. + let mut kevent = libc::kevent { + ident: 0, + filter: libc::EVFILT_USER, + flags: libc::EV_ADD | libc::EV_CLEAR | libc::EV_RECEIPT, + udata: WAKE_ID as _, + // SAFETY: all zeros is valid for `libc::kevent`. + ..unsafe { mem::zeroed() } + }; + syscall!(kevent(kq_fd, &kevent, 1, &mut kevent, 1, ptr::null()))?; + if (kevent.flags & libc::EV_ERROR) != 0 && kevent.data != 0 { + return Err(io::Error::from_raw_os_error(kevent.data as i32)); + } + + let submissions = kqueue::Submissions::new(max_change_list_size); + let change_list = Mutex::new(Vec::new()); + let shared = kqueue::Shared { kq, change_list }; + let completions = kqueue::Completions::new(max_events); + Ok((submissions, shared, completions)) + } +} diff --git a/src/kqueue/cq.rs b/src/kqueue/cq.rs new file mode 100644 index 00000000..019a8d9b --- /dev/null +++ b/src/kqueue/cq.rs @@ -0,0 +1,97 @@ +use std::os::fd::AsRawFd; +use std::time::Duration; +use std::{cmp, io, mem, ptr}; + +use crate::{kqueue, syscall}; + +#[derive(Debug)] +pub(crate) struct Completions { + events: Vec, +} + +impl Completions { + pub(crate) fn new(events_capacity: usize) -> Completions { + let events = Vec::with_capacity(events_capacity); + Completions { events } + } +} + +impl crate::cq::Completions for Completions { + type Shared = kqueue::Shared; + type Event = kqueue::Event; + + fn poll<'a>( + &'a mut self, + shared: &Self::Shared, + timeout: Option, + ) -> io::Result> { + self.events.clear(); + + let timeout = timeout.map(|to| libc::timespec { + tv_sec: cmp::min(to.as_secs(), libc::time_t::MAX as u64) as libc::time_t, + // `Duration::subsec_nanos` is guaranteed to be less than one + // billion (the number of nanoseconds in a second), making the + // cast to i32 safe. The cast itself is needed for platforms + // where C's long is only 32 bits. + tv_nsec: libc::c_long::from(to.subsec_nanos() as i32), + }); + + // Submit any submissions (changes) to the kernel. + let mut change_list = shared.change_list.lock().unwrap(); + let mut changes = if change_list.is_empty() { + Vec::new() // No point in taking an empty vector. + } else { + mem::replace(&mut *change_list, Vec::new()) + }; + drop(change_list); // Unlock, to not block others. + + let result = syscall!(kevent( + shared.kq.as_raw_fd(), + // SAFETY: casting `Event` to `libc::kevent` is safe due to + // `repr(transparent)` on `Event`. + changes.as_ptr().cast(), + changes.capacity() as _, + // SAFETY: casting `Event` to `libc::kevent` is safe due to + // `repr(transparent)` on `Event`. + self.events.as_mut_ptr().cast(), + self.events.capacity() as _, + timeout + .as_ref() + .map(|s| s as *const _) + .unwrap_or(ptr::null_mut()), + )); + let mut result_err = None; + match result { + // SAFETY: `kevent` ensures that `n` events are written. + Ok(n) => unsafe { self.events.set_len(n as usize) }, + Err(err) => { + // According to the manual page of FreeBSD: "When kevent() call + // fails with EINTR error, all changes in the changelist have been + // applied", so we can safely ignore it. We'll have zero + // completions though. + if err.raw_os_error() != Some(libc::EINTR) { + if !changes.is_empty() { + // TODO: do we want to put in fake error events or + // something to ensure the Futures don't stall? + log::warn!(change_list:? = changes; "failed to submit change list: {err}, dropping changes"); + } + result_err = Some(err); + } + } + } + + changes.clear(); + shared.merge_change_list(changes); + + if let Some(err) = result_err { + Err(err) + } else { + Ok(self.events.iter()) + } + } + + fn queue_space(&mut self, shared: &Self::Shared) -> usize { + // No practical limit. + usize::MAX + } +} diff --git a/src/kqueue/io.rs b/src/kqueue/io.rs new file mode 100644 index 00000000..f7cfc4a7 --- /dev/null +++ b/src/kqueue/io.rs @@ -0,0 +1,98 @@ +use std::io; +use std::marker::PhantomData; + +use crate::fd::{AsyncFd, Descriptor}; +use crate::io::{BufMut, BufMutSlice, NO_OFFSET}; +use crate::op::OpResult; +use crate::{kqueue, syscall}; + +// Re-export so we don't have to worry about import `std::io` and `crate::io`. +pub(crate) use std::io::*; + +pub(crate) use crate::unix::{IoMutSlice, IoSlice}; + +pub(crate) struct ReadOp(PhantomData<*const B>); + +impl kqueue::FdOp for ReadOp { + type Output = B; + type Resources = B; + type Args = u64; // Offset. + type OperationOutput = usize; + + fn fill_submission(fd: &AsyncFd, kevent: &mut kqueue::Event) { + kevent.0.ident = fd.fd() as _; + kevent.0.filter = libc::EVFILT_READ; + } + + fn check_result( + fd: &AsyncFd, + buf: &mut Self::Resources, + offset: &mut Self::Args, + ) -> OpResult { + let (ptr, len) = unsafe { buf.parts_mut() }; + // io_uring uses `NO_OFFSET` to issue a `read` system call, + // otherwise it uses `pread`. We emulate the same thing. + let result = if *offset == NO_OFFSET { + syscall!(read(fd.fd(), ptr.cast(), len as _)) + } else { + syscall!(pread(fd.fd(), ptr.cast(), len as _, *offset as _)) + }; + match result { + // SAFETY: negative result is mapped to an error. + Ok(n) => OpResult::Ok(n as usize), + Err(ref err) if err.kind() == io::ErrorKind::WouldBlock => OpResult::Again(true), + Err(err) => OpResult::Err(err), + } + } + + fn map_ok(mut buf: Self::Resources, n: Self::OperationOutput) -> Self::Output { + // SAFETY: kernel just initialised the bytes for us. + unsafe { buf.set_init(n) } + buf + } +} + +pub(crate) struct ReadVectoredOp(PhantomData<*const B>); + +impl, const N: usize> kqueue::FdOp for ReadVectoredOp { + type Output = B; + type Resources = (B, [crate::io::IoMutSlice; N]); + type Args = u64; // Offset. + type OperationOutput = usize; + + fn fill_submission(fd: &AsyncFd, kevent: &mut kqueue::Event) { + kevent.0.ident = fd.fd() as _; + kevent.0.filter = libc::EVFILT_READ; + } + + fn check_result( + fd: &AsyncFd, + (_, iovecs): &mut Self::Resources, + offset: &mut Self::Args, + ) -> OpResult { + // io_uring uses `NO_OFFSET` to issue a `readv` system call, + // otherwise it uses `preadv`. We emulate the same thing. + let result = if *offset == NO_OFFSET { + syscall!(readv(fd.fd(), iovecs.as_ptr() as _, iovecs.len() as _)) + } else { + syscall!(preadv( + fd.fd(), + iovecs.as_ptr() as _, + iovecs.len() as _, + *offset as _ + )) + }; + match result { + // SAFETY: negative result is mapped to an error. + Ok(n) => OpResult::Ok(n as usize), + Err(ref err) if err.kind() == io::ErrorKind::WouldBlock => OpResult::Again(true), + Err(err) => OpResult::Err(err), + } + } + + fn map_ok((mut bufs, _): Self::Resources, n: Self::OperationOutput) -> Self::Output { + // SAFETY: kernel just initialised the buffers for us. + unsafe { bufs.set_init(n) }; + bufs + } +} diff --git a/src/kqueue/mod.rs b/src/kqueue/mod.rs new file mode 100644 index 00000000..bf25933c --- /dev/null +++ b/src/kqueue/mod.rs @@ -0,0 +1,639 @@ +//! kqueue implementation. +//! +//! Manuals: +//! * +//! * +//! * +//! * + +use std::os::fd::OwnedFd; +use std::sync::Mutex; +use std::{fmt, mem}; + +use crate::fd::{AsyncFd, Descriptor}; +use crate::op::OpResult; +use crate::{debug_detail, OperationId}; + +pub(crate) mod config; +mod cq; +pub(crate) mod io; +mod sq; + +pub(crate) use config::Config; +pub(crate) use cq::Completions; +pub(crate) use sq::Submissions; +pub(crate) use Event as Submission; + +/// kqueue implementation. +pub(crate) enum Implementation {} + +impl crate::Implementation for Implementation { + type Shared = Shared; + type Submissions = Submissions; + type Completions = Completions; +} + +#[derive(Debug)] +pub(crate) struct Shared { + /// kqueue(2) file descriptor. + kq: OwnedFd, + change_list: Mutex>, +} + +impl Shared { + /// Merge the change list. + /// + /// Reusing allocations (if it makes sense). + fn merge_change_list(&self, mut changes: Vec) { + if changes.capacity() == 0 { + return; + } + + let mut change_list = self.change_list.lock().unwrap(); + if change_list.len() < changes.capacity() { + // Existing is smaller than `changes` alloc, reuse it. + mem::swap(&mut *change_list, &mut changes); + } + change_list.append(&mut changes); + drop(change_list); // Unlock before any deallocations. + } +} + +/// kqueue specific [`crate::op::Op`] trait. +pub(crate) trait Op { + type Output; + type Resources; + type Args; + type OperationOutput; + + fn fill_submission(kevent: &mut Event); + + fn check_result( + resources: &mut Self::Resources, + args: &mut Self::Args, + ) -> OpResult; + + fn map_ok( + sq: &crate::SubmissionQueue, + resources: Self::Resources, + output: Self::OperationOutput, + ) -> Self::Output; +} + +impl crate::op::Op for T { + type Output = T::Output; + type Resources = T::Resources; + type Args = T::Args; + type Submission = Event; + type OperationState = OperationState; + type OperationOutput = T::OperationOutput; + + fn fill_submission(_: &mut Self::Resources, _: &mut Self::Args, kevent: &mut Self::Submission) { + T::fill_submission(kevent) + } + + fn check_result( + resources: &mut Self::Resources, + args: &mut Self::Args, + _: &mut Self::OperationState, + ) -> OpResult { + T::check_result(resources, args) + } + + fn map_ok( + sq: &crate::SubmissionQueue, + resources: Self::Resources, + output: Self::OperationOutput, + ) -> Self::Output { + T::map_ok(sq, resources, output) + } +} + +/// kqueue specific [`crate::op::FdOp`] trait. +pub(crate) trait FdOp { + type Output; + type Resources; + type Args; + type OperationOutput; + + fn fill_submission(fd: &AsyncFd, kevent: &mut Event); + + fn check_result( + fd: &AsyncFd, + resources: &mut Self::Resources, + args: &mut Self::Args, + ) -> OpResult; + + fn map_ok(resources: Self::Resources, output: Self::OperationOutput) -> Self::Output; +} + +impl crate::op::FdOp for T { + type Output = T::Output; + type Resources = T::Resources; + type Args = T::Args; + type Submission = Event; + type OperationState = OperationState; + type OperationOutput = T::OperationOutput; + + fn fill_submission( + fd: &AsyncFd, + _: &mut Self::Resources, + _: &mut Self::Args, + kevent: &mut Self::Submission, + ) { + T::fill_submission(fd, kevent) + } + + fn check_result( + fd: &AsyncFd, + resources: &mut Self::Resources, + args: &mut Self::Args, + _: &mut Self::OperationState, + ) -> OpResult { + T::check_result(fd, resources, args) + } + + fn map_ok( + _: &AsyncFd, + resources: Self::Resources, + output: Self::OperationOutput, + ) -> Self::Output { + T::map_ok(resources, output) + } +} + +/// Wrapper around `libc::kevent` to implementation traits and methods. +/// +/// This is both a submission and a completion event. +#[repr(transparent)] // Requirement for `kevent` calls. +pub(crate) struct Event(libc::kevent); + +impl Event { + /// Returns an error from the event, if any. + fn error(&self) -> Option { + // We can't use references to packed structures (in checking the ignored + // errors), so we need copy the data out before use. + let data = self.0.data as i64; + // Check for the error flag, the actual error will be in the `data` + // field. + // + // Older versions of macOS (OS X 10.11 and 10.10 have been witnessed) + // can return EPIPE when registering a pipe file descriptor where the + // other end has already disappeared. For example code that creates a + // pipe, closes a file descriptor, and then registers the other end will + // see an EPIPE returned from `register`. + // + // It also turns out that kevent will still report events on the file + // descriptor, telling us that it's readable/hup at least after we've + // done this registration. As a result we just ignore `EPIPE` here + // instead of propagating it. + // + // More info can be found at tokio-rs/mio#582. + // + // The ENOENT error informs us that a filter we're trying to remove + // wasn't there in first place, but we don't really care since our goal + // is accomplished. + if (self.0.flags & libc::EV_ERROR != 0) + && data != 0 + && data != libc::EPIPE as i64 + && data != libc::ENOENT as i64 + { + Some(io::Error::from_raw_os_error(data as i32)) + } else { + None + } + } +} + +impl crate::cq::Event for Event { + type State = OperationState; + + fn id(&self) -> OperationId { + self.0.udata as OperationId + } + + fn update_state(&self, _: &mut Self::State) -> bool { + false // Using `EV_ONESHOT`, so expecting one event. + } +} + +/// No additional state is needed. +#[derive(Copy, Clone, Debug)] +pub(crate) struct OperationState; + +impl crate::cq::OperationState for OperationState { + fn new() -> OperationState { + OperationState + } + + fn new_multishot() -> OperationState { + OperationState + } +} + +impl crate::sq::Submission for Event { + fn set_id(&mut self, id: OperationId) { + self.0.udata = id as _; + } +} + +// SAFETY: `libc::kevent` is thread safe. +unsafe impl Send for Event {} +unsafe impl Sync for Event {} + +impl fmt::Debug for Event { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + debug_detail!( + match FilterDetails(libc::c_short), + libc::EVFILT_READ, + libc::EVFILT_WRITE, + libc::EVFILT_AIO, + libc::EVFILT_VNODE, + libc::EVFILT_PROC, + libc::EVFILT_SIGNAL, + libc::EVFILT_TIMER, + #[cfg(target_os = "freebsd")] + libc::EVFILT_PROCDESC, + #[cfg(any( + target_os = "freebsd", + target_os = "dragonfly", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::EVFILT_FS, + #[cfg(target_os = "freebsd")] + libc::EVFILT_LIO, + #[cfg(any( + target_os = "freebsd", + target_os = "dragonfly", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::EVFILT_USER, + #[cfg(target_os = "freebsd")] + libc::EVFILT_SENDFILE, + #[cfg(target_os = "freebsd")] + libc::EVFILT_EMPTY, + #[cfg(target_os = "dragonfly")] + libc::EVFILT_EXCEPT, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::EVFILT_MACHPORT, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::EVFILT_VM, + ); + + debug_detail!( + bitset FlagsDetails(libc::c_ushort), + libc::EV_ADD, + libc::EV_DELETE, + libc::EV_ENABLE, + libc::EV_DISABLE, + libc::EV_ONESHOT, + libc::EV_CLEAR, + libc::EV_RECEIPT, + libc::EV_DISPATCH, + #[cfg(target_os = "freebsd")] + libc::EV_DROP, + libc::EV_FLAG1, + libc::EV_ERROR, + libc::EV_EOF, + // Not stable across OS versions on OpenBSD. + #[cfg(not(target_os = "openbsd"))] + libc::EV_SYSFLAGS, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::EV_FLAG0, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::EV_POLL, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::EV_OOBAND, + #[cfg(target_os = "dragonfly")] + libc::EV_NODATA, + ); + + debug_detail!( + bitset FflagsDetails(libc::c_uint), + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_TRIGGER, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_FFNOP, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_FFAND, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_FFOR, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_FFCOPY, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_FFCTRLMASK, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", + ))] + libc::NOTE_FFLAGSMASK, + libc::NOTE_LOWAT, + libc::NOTE_DELETE, + libc::NOTE_WRITE, + #[cfg(target_os = "dragonfly")] + libc::NOTE_OOB, + #[cfg(target_os = "openbsd")] + libc::NOTE_EOF, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXTEND, + libc::NOTE_ATTRIB, + libc::NOTE_LINK, + libc::NOTE_RENAME, + libc::NOTE_REVOKE, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_NONE, + #[cfg(any(target_os = "openbsd"))] + libc::NOTE_TRUNCATE, + libc::NOTE_EXIT, + libc::NOTE_FORK, + libc::NOTE_EXEC, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_SIGNAL, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXITSTATUS, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXIT_DETAIL, + libc::NOTE_PDATAMASK, + libc::NOTE_PCTRLMASK, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + ))] + libc::NOTE_TRACK, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + ))] + libc::NOTE_TRACKERR, + #[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + ))] + libc::NOTE_CHILD, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXIT_DETAIL_MASK, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXIT_DECRYPTFAIL, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXIT_MEMORY, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_EXIT_CSERROR, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_VM_PRESSURE, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_VM_PRESSURE_TERMINATE, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_VM_PRESSURE_SUDDEN_TERMINATE, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_VM_ERROR, + #[cfg(any( + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_SECONDS, + #[cfg(any(target_os = "freebsd"))] + libc::NOTE_MSECONDS, + #[cfg(any( + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_USECONDS, + #[cfg(any( + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_NSECONDS, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_ABSOLUTE, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_LEEWAY, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_CRITICAL, + #[cfg(any( + target_os = "ios", + target_os = "macos", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos" + ))] + libc::NOTE_BACKGROUND, + ); + + // Can't reference fields in packed structures. + let ident = self.0.ident; + let data = self.0.data; + f.debug_struct("kqueue::Event") + .field("id", &crate::cq::Event::id(self)) + .field("ident", &ident) + .field("filter", &FilterDetails(self.0.filter)) + .field("flags", &FlagsDetails(self.0.flags)) + .field("fflags", &FflagsDetails(self.0.fflags)) + .field("data", &data) + .finish() + } +} diff --git a/src/kqueue/sq.rs b/src/kqueue/sq.rs new file mode 100644 index 00000000..0b9d3e27 --- /dev/null +++ b/src/kqueue/sq.rs @@ -0,0 +1,120 @@ +use std::os::fd::AsRawFd; +use std::{io, mem, ptr}; + +use crate::sq::QueueFull; +use crate::{kqueue, syscall, WAKE_ID}; + +/// NOTE: all the state is in [`Shared`]. +#[derive(Debug)] +pub(crate) struct Submissions { + /// Maximum size of the change list before it's submitted to the kernel, + /// without waiting on a call to poll. + max_change_list_size: usize, +} + +impl Submissions { + pub(crate) fn new(max_change_list_size: usize) -> Submissions { + Submissions { + max_change_list_size, + } + } +} + +impl crate::sq::Submissions for Submissions { + type Shared = kqueue::Shared; + type Submission = kqueue::Event; + + fn add( + &self, + shared: &Self::Shared, + is_polling: &AtomicBool, + submit: F, + ) -> Result<(), QueueFull> + where + F: FnOnce(&mut Self::Submission), + { + // Create and fill the submission event. + // SAFETY: all zero is valid for `libc::kevent`. + let mut event = unsafe { mem::zeroed() }; + submit(&mut event); + event.0.flags = libc::EV_ADD | libc::EV_RECEIPT | libc::EV_ONESHOT; + + // Add the event to the list of waiting events. + let mut change_list = shared.change_list.lock().unwrap(); + change_list.push(event); + // If we haven't collected enough events yet and we're not polling, + // we're done quickly. + if change_list.len() < self.max_change_list_size && !is_polling.load(Ordering::Relaxed) { + drop(change_list); // Unlock first. + return Ok(()); + } + + // Take ownership of the change list to submit it to the kernel. + let mut changes = mem::replace(&mut *change_list, Vec::new()); + drop(change_list); // Unlock, to not block others. + + // Submit the all changes to the kernel. + let timeout = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + let result = syscall!(kevent( + shared.kq.as_raw_fd(), + // SAFETY: casting `Event` to `libc::kevent` is safe due to + // `repr(transparent)` on `Event`. + changes.as_ptr().cast(), + changes.len() as _, + // SAFETY: casting `Event` to `libc::kevent` is safe due to + // `repr(transparent)` on `Event`. + changes.as_mut_ptr().cast(), + changes.capacity() as _, + &timeout, + )); + if let Err(err) = result { + // According to the manual page of FreeBSD: "When kevent() call + // fails with EINTR error, all changes in the changelist have been + // applied", so we can safely ignore it. + if err.raw_os_error() != Some(libc::EINTR) { + // TODO: do we want to put in fake error events or something to + // ensure the Futures don't stall? + log::warn!(change_list:? = changes; "failed to submit change list: {err}, dropping changes"); + } + } + // Check all events for possible errors and log them. + for event in &changes { + // NOTE: this can happen if one of the file descriptors was closed + // before the change was submitted to the kernel. We'll log it, but + // otherwise ignore it. + if let Some(err) = event.error() { + // TODO: see if we can some how get this error to the operation + // that submitted it or something to ensure the Future doesn't + // stall. + log::warn!(kevent:? = event; "submitted change has an error: {err}, dropping it"); + } + } + + // Reuse the change list allocation (if it makes sense). + changes.clear(); + shared.merge_change_list(changes); + Ok(()) + } + + fn wake(&self, shared: &Self::Shared) -> io::Result<()> { + let mut kevent = libc::kevent { + ident: 0, + filter: libc::EVFILT_USER, + flags: libc::EV_ADD | libc::EV_RECEIPT, + fflags: libc::NOTE_TRIGGER, + udata: WAKE_ID as _, + // SAFETY: all zeros is valid for `libc::kevent`. + ..unsafe { mem::zeroed() } + }; + let kq = shared.kq.as_raw_fd(); + syscall!(kevent(kq, &kevent, 1, &mut kevent, 1, ptr::null()))?; + if (kevent.flags & libc::EV_ERROR) != 0 && kevent.data != 0 { + Err(io::Error::from_raw_os_error(kevent.data as i32)) + } else { + Ok(()) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 20b9a70f..2b59fc25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,7 @@ //! being polled to completion. This data can be retrieved again by using the //! [`Extract`] trait. //! -//! ## Examples +//! # Examples //! //! The example below implements the `cat(1)` program that concatenates files //! and prints them to standard out. @@ -135,24 +135,54 @@ variant_size_differences )] -use std::cmp::min; -use std::marker::PhantomData; -use std::mem::{needs_drop, replace, size_of, take}; -use std::os::fd::{AsFd, AsRawFd, BorrowedFd, OwnedFd}; -use std::sync::atomic::{self, AtomicBool, AtomicU32, Ordering}; -use std::sync::{Arc, Mutex}; -use std::task::{self, Poll}; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex, MutexGuard}; use std::time::Duration; -use std::{fmt, ptr}; +use std::{fmt, task}; + +// This must come before the other modules for the documentation. +pub mod fd; mod bitmap; mod config; +mod cq; mod drop_waker; mod op; -mod sys; +mod sq; +#[cfg(unix)] +mod unix; + +#[cfg(any(target_os = "android", target_os = "linux"))] +mod io_uring; +#[cfg(any(target_os = "android", target_os = "linux"))] +use io_uring as sys; + +#[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "netbsd", + target_os = "openbsd", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", +))] +mod kqueue; + +#[cfg(any( + target_os = "dragonfly", + target_os = "freebsd", + target_os = "ios", + target_os = "macos", + target_os = "netbsd", + target_os = "openbsd", + target_os = "tvos", + target_os = "visionos", + target_os = "watchos", +))] +use kqueue as sys; -#[rustfmt::skip] // This must come before the other modules for the documentation. -pub mod fd; pub mod cancel; pub mod extract; pub mod fs; @@ -163,18 +193,16 @@ pub mod net; pub mod poll; pub mod process; -use bitmap::AtomicBitMap; #[doc(no_inline)] pub use cancel::Cancel; -use config::munmap; +#[doc(inline)] pub use config::Config; -use drop_waker::{drop_task_waker, DropWake}; #[doc(no_inline)] pub use extract::Extract; #[doc(no_inline)] pub use fd::AsyncFd; -use op::{QueuedOperation, Submission}; -use sys as libc; // TODO: replace this with definitions from the `libc` crate once available. + +use crate::bitmap::AtomicBitMap; /// This type represents the user space side of an io_uring. /// @@ -192,40 +220,49 @@ use sys as libc; // TODO: replace this with definitions from the `libc` crate on /// [`Write`]: io::Write #[derive(Debug)] pub struct Ring { - /// # Notes - /// - /// `CompletionQueue` musted be dropped before the `SubmissionQueue` because - /// the `ring_fd` in `SubmissionQueue` is used in the memory mappings - /// backing `CompletionQueue`. - cq: CompletionQueue, - /// Shared between this `Ring` and all types that queue any operations. - /// - /// Because it depends on memory mapping from the file descriptor of the - /// ring the file descriptor is stored in the `SubmissionQueue` itself. sq: SubmissionQueue, + cq: cq::Queue, } impl Ring { /// Configure a `Ring`. /// - /// `entries` must be a power of two and in the range 1..=4096. + /// `queued_operations` is the number of queued operations, i.e. the number + /// of concurrent A10 operation. /// /// # Notes /// - /// A10 uses `IORING_SETUP_SQPOLL` by default, which required Linux kernel - /// 5.11 to work correctly. Furthermore before Linux 5.13 the user needs the - /// `CAP_SYS_NICE` capability if run as non-root. This can be disabled by - /// [`Config::with_kernel_thread`]. - pub const fn config<'r>(entries: u32) -> Config<'r> { - Config::new(entries) + /// A10 uses `IORING_SETUP_SQPOLL` by default for io_uring, which required + /// Linux kernel 5.11 to work correctly. Furthermore before Linux 5.13 the + /// user needs the `CAP_SYS_NICE` capability if run as non-root. This can be + /// disabled by [`Config::with_kernel_thread`]. + pub const fn config<'r>(queued_operations: usize) -> Config<'r> { + Config { + queued_operations, + sys: crate::sys::Config::new(), + } } /// Create a new `Ring` with the default configuration. /// /// For more configuration options see [`Config`]. #[doc(alias = "io_uring_setup")] - pub fn new(entries: u32) -> io::Result { - Config::new(entries).build() + #[doc(alias = "kqueue")] + pub fn new(queued_operations: usize) -> io::Result { + Ring::config(queued_operations).build() + } + + /// Build a new `Ring`. + fn build( + submissions: sys::Submissions, + shared_data: sys::Shared, + completions: sys::Completions, + queued_operations: usize, + ) -> Ring { + let shared = SharedState::new(submissions, shared_data, queued_operations); + let sq = SubmissionQueue::new(shared.clone()); + let cq = cq::Queue::new(completions, shared); + Ring { sq, cq } } /// Returns the `SubmissionQueue` used by this ring. @@ -235,932 +272,197 @@ impl Ring { &self.sq } - /// Enable the ring. - /// - /// This only required when starting the ring in disabled mode, see - /// [`Config::disable`]. - #[allow(clippy::needless_pass_by_ref_mut)] - pub fn enable(&mut self) -> io::Result<()> { - self.sq - .register(libc::IORING_REGISTER_ENABLE_RINGS, ptr::null(), 0) - } - /// Poll the ring for completions. /// /// This will wake all completed [`Future`]s with the result of their /// operations. /// /// If a zero duration timeout (i.e. `Some(Duration::ZERO)`) is passed this - /// function will only wake all already completed operations. It then - /// guarantees to not make a system call, but it also means it doesn't - /// guarantee at least one completion was processed. + /// function will only wake all already completed operations. When using + /// io_uring it also guarantees to not make a system call, but it also means + /// it doesn't guarantee at least one completion was processed. /// /// [`Future`]: std::future::Future #[doc(alias = "io_uring_enter")] + #[doc(alias = "kevent")] pub fn poll(&mut self, timeout: Option) -> io::Result<()> { - let sq = self.sq.clone(); // TODO: remove clone. - for completion in self.completions(timeout)? { - log::trace!(completion:? = completion; "dequeued completion event"); - // SAFETY: we're calling this based on information from the kernel. - unsafe { sq.update_op(completion) }; - } - - self.wake_blocked_futures(); - Ok(()) - } - - /// Returns an iterator for all completion events, makes a system call if no - /// completions are queued. - fn completions(&mut self, timeout: Option) -> io::Result { - let head = self.completion_head(); - let mut tail = self.completion_tail(); - if head == tail && !matches!(timeout, Some(Duration::ZERO)) { - // If we have no completions and we have no, or a non-zero, timeout - // we make a system call to wait for completion events. - self.enter(timeout)?; - // NOTE: we're the only onces writing to the completion `head` so we - // don't need to read it again. - tail = self.completion_tail(); - } - - Ok(Completions { - entries: self.cq.entries, - local_head: head, - head: self.cq.head, - tail, - ring_mask: self.cq.ring_mask, - _lifetime: PhantomData, - }) - } - - /// Make the `io_uring_enter` system call. - fn enter(&mut self, timeout: Option) -> io::Result<()> { - let mut args = libc::io_uring_getevents_arg { - sigmask: 0, - sigmask_sz: 0, - pad: 0, - ts: 0, - }; - let mut timespec = libc::timespec { - tv_sec: 0, - tv_nsec: 0, - }; - if let Some(timeout) = timeout { - timespec.tv_sec = timeout.as_secs().try_into().unwrap_or(i64::MAX); - timespec.tv_nsec = libc::c_longlong::from(timeout.subsec_nanos()); - args.ts = ptr::addr_of!(timespec) as u64; - } - - let submissions = if self.sq.shared.kernel_thread { - 0 // Kernel thread handles the submissions. - } else { - self.sq.shared.is_polling.store(true, Ordering::Release); - self.sq.unsubmitted() - }; - - // If there are no completions we'll wait for at least one. - let enter_flags = libc::IORING_ENTER_GETEVENTS // Wait for a completion. - | libc::IORING_ENTER_EXT_ARG; // Passing of `args`. - - log::debug!(submissions = submissions; "waiting for completion events"); - let result = libc::syscall!(io_uring_enter2( - self.sq.shared.ring_fd.as_raw_fd(), - submissions, - 1, // Wait for at least one completion. - enter_flags, - ptr::addr_of!(args).cast(), - size_of::(), - )); - if !self.sq.shared.kernel_thread { - self.sq.shared.is_polling.store(false, Ordering::Release); - } - match result { - Ok(_) => Ok(()), - // Hit timeout, we can ignore it. - Err(ref err) if err.raw_os_error() == Some(libc::ETIME) => Ok(()), - Err(err) => Err(err), - } - } - - /// Returns `CompletionQueue.head`. - fn completion_head(&mut self) -> u32 { - // SAFETY: we're the only once writing to it so `Relaxed` is fine. The - // pointer itself is valid as long as `Ring.fd` is alive. - unsafe { (*self.cq.head).load(Ordering::Relaxed) } - } - - /// Returns `CompletionQueue.tail`. - fn completion_tail(&self) -> u32 { - // SAFETY: this written to by the kernel so we need to use `Acquire` - // ordering. The pointer itself is valid as long as `Ring.fd` is alive. - unsafe { (*self.cq.tail).load(Ordering::Acquire) } - } - - /// Wake [`SharedSubmissionQueue::blocked_futures`]. - #[allow(clippy::needless_pass_by_ref_mut)] - fn wake_blocked_futures(&mut self) { - // This not particullary efficient, but with a large enough number of - // entries, `IORING_SETUP_SQPOLL` and suffcient calls to [`Ring::poll`] - // this shouldn't be used at all. - - let n = self.sq.available_space(); - if n == 0 { - return; - } - - let mut blocked_futures = { - let blocked_futures = &mut *self.sq.shared.blocked_futures.lock().unwrap(); - if blocked_futures.is_empty() { - return; - } - - take(blocked_futures) - }; - // Do the waking outside of the lock. - let waking = min(n, blocked_futures.len()); - log::trace!(waking_amount = n, waiting_futures = blocked_futures.len(); "waking blocked futures"); - for waker in blocked_futures.drain(..waking) { - waker.wake(); - } - - // Put the remaining wakers back, even if it's empty to keep the - // allocation. - let got = &mut *self.sq.shared.blocked_futures.lock().unwrap(); - let mut added = replace(got, blocked_futures); - got.append(&mut added); - } -} - -impl AsFd for Ring { - fn as_fd(&self) -> BorrowedFd<'_> { - self.sq.shared.ring_fd.as_fd() + self.cq.poll(timeout) } } /// Queue to submit asynchronous operations to. /// -/// This type doesn't have many public methods, but is used by all I/O types, -/// such as [`OpenOptions`], to queue asynchronous operations. The queue can be -/// acquired by using [`Ring::submission_queue`]. +/// This type doesn't have many public methods, but is used by all I/O types, to +/// queue asynchronous operations. The queue can be acquired by using +/// [`Ring::submission_queue`]. /// /// The submission queue can be shared by cloning it, it's a cheap operation. -/// -/// [`OpenOptions`]: fs::OpenOptions #[derive(Clone)] pub struct SubmissionQueue { - shared: Arc, -} - -/// Shared internals of [`SubmissionQueue`]. -struct SharedSubmissionQueue { - /// File descriptor of the io_uring. - ring_fd: OwnedFd, - - /// Mmap-ed pointer. - ptr: *mut libc::c_void, - /// Mmap-ed size in bytes. - size: libc::c_uint, - - /// Local version of `tail`. - /// Increased in `queue` to give the caller mutable access to a - /// [`Submission`] in `entries`. - /// NOTE: this does not mean that `pending_tail` number of submissions are - /// ready, this is determined by `tail`. - pending_tail: AtomicU32, - - // NOTE: the following two fields are constant. We read them once from the - // mmap area and then copied them here to avoid the need for the atomics. - /// Number of entries in the queue. - len: u32, - /// Mask used to index into the `sqes` queue. - ring_mask: u32, - /// True if we're using a kernel thread to do submission polling, i.e. if - /// `IORING_SETUP_SQPOLL` is enabled. - kernel_thread: bool, - /// Boolean indicating a thread is [`Ring::poll`]ing. Only used when - /// `kernel_thread` is false. - is_polling: AtomicBool, - - /// Bitmap which can be used to create an index into `op_queue`. - op_indices: Box, - /// State of queued operations, holds the (would be) result and - /// `task::Waker`. It's used when adding new operations and when marking - /// operations as complete (by the kernel). - queued_ops: Box<[Mutex>]>, - /// Futures that are waiting for a slot in `queued_ops`. - blocked_futures: Mutex>, - - // NOTE: the following fields reference mmaped pages shared with the kernel, - // thus all need atomic access. - /// Head to queue, i.e. the submussions read by the kernel. Incremented by - /// the kernel when submissions has succesfully been processed. - kernel_read: *const AtomicU32, - /// Flags set by the kernel to communicate state information. - flags: *const AtomicU32, - /// Array of `len` submission entries shared with the kernel. We're the only - /// one modifiying the structures, but the kernel can read from them. - /// - /// This pointer is also used in the `unmmap` call. - entries: *mut Submission, - - /// Variable used to get an index into `array`. The lock must be held while - /// writing into `array` to prevent race conditions with other threads. - array_index: Mutex, - /// Array of `len` indices (into `entries`) shared with the kernel. We're - /// the only one modifiying the structures, but the kernel can read from it. - /// - /// This is protected by `array_index`. - array: *mut AtomicU32, - /// Incremented by us when submitting new submissions. - array_tail: *mut AtomicU32, + inner: sq::Queue, } impl SubmissionQueue { - /// Wake the connected [`Ring`]. - /// - /// All this does is interrupt a call to [`Ring::poll`]. - pub fn wake(&self) { - // We ignore the queue full error as it means that is *very* unlikely - // that the Ring is currently being polling if the submission queue is - // filled. More likely the Ring hasn't been polled in a while. - let _: Result<(), QueueFull> = self.add_no_result(|submission| unsafe { - submission.wake(self.shared.ring_fd.as_raw_fd()); - }); - } - - /// Make a `io_uring_register(2)` system call. - fn register( - &self, - op: libc::c_uint, - arg: *const libc::c_void, - nr_args: libc::c_uint, - ) -> io::Result<()> { - libc::syscall!(io_uring_register( - self.shared.ring_fd.as_raw_fd(), - op, - arg, - nr_args - ))?; - Ok(()) - } - - /// Add a submission to the queue. - /// - /// Returns an index into the `op_queue` which can be used to check the - /// progress of the operation. Once the operation is completed and the - /// result read the index should be made avaiable again in `op_indices` and - /// the value set to `None`. - /// - /// Returns an error if the submission queue is full. To fix this call - /// [`Ring::poll`] (and handle the completed operations) and try queueing - /// again. - fn add(&self, submit: F) -> Result - where - F: FnOnce(&mut Submission), - { - self._add(submit, QueuedOperation::new) - } - - /// Same as [`SubmissionQueue::add`] but uses a multishot `QueuedOperation`. - fn add_multishot(&self, submit: F) -> Result - where - F: FnOnce(&mut Submission), - { - self._add(submit, QueuedOperation::new_multishot) - } - - /// See [`SubmissionQueue::add`] or [`SubmissionQueue::add_multishot`]. - fn _add(&self, submit: F, new_op: O) -> Result - where - F: FnOnce(&mut Submission), - O: FnOnce() -> QueuedOperation, - { - // Get an index to the queued operation queue. - let shared = &*self.shared; - let Some(op_index) = shared.op_indices.next_available() else { - return Err(QueueFull(())); - }; - - let queued_op = new_op(); - // SAFETY: the `AtomicBitMap` always returns valid indices for - // `op_queue` (it's the whole point of it). - let mut op = shared.queued_ops[op_index].lock().unwrap(); - let old_queued_op = replace(&mut *op, Some(queued_op)); - debug_assert!(old_queued_op.is_none()); - - let res = self.add_no_result(|submission| { - submit(submission); - submission.set_user_data(op_index as u64); - }); - - match res { - Ok(()) => Ok(OpIndex(op_index)), - Err(err) => { - // Make the index available, we're not going to use it. - *op = None; - drop(op); - shared.op_indices.make_available(op_index); - Err(err) - } - } - } - - /// Queue a new operation without making a submission. - fn queue_multishot(&self) -> Result { - self._queue(QueuedOperation::new_multishot) - } - - /// See [`SubmissionQueue::queue_multishot`]. - fn _queue(&self, new_op: O) -> Result - where - O: FnOnce() -> QueuedOperation, - { - // Get an index to the queued operation queue. - let shared = &*self.shared; - let Some(op_index) = shared.op_indices.next_available() else { - return Err(QueueFull(())); - }; - - let queued_op = new_op(); - // SAFETY: the `AtomicBitMap` always returns valid indices for - // `op_queue` (it's the whole point of it). - let old_queued_op = replace( - &mut *shared.queued_ops[op_index].lock().unwrap(), - Some(queued_op), - ); - debug_assert!(old_queued_op.is_none()); - - Ok(OpIndex(op_index)) - } - - /// Same as [`SubmissionQueue::add`], but ignores the result. - #[allow(clippy::mutex_integer)] // For `array_index`, need to the lock for more. - fn add_no_result(&self, submit: F) -> Result<(), QueueFull> - where - F: FnOnce(&mut Submission), - { - let shared = &*self.shared; - // First we need to acquire mutable access to an `Submission` entry in - // the `entries` array. - // - // We do this by increasing `pending_tail` by 1, reserving - // `entries[pending_tail]` for ourselves, while ensuring we don't go - // beyond what the kernel has processed by checking `tail - kernel_read` - // is less then the length of the submission queue. - let kernel_read = self.kernel_read(); - let tail = shared - .pending_tail - .fetch_update(Ordering::AcqRel, Ordering::Acquire, |tail| { - if tail - kernel_read < shared.len { - // Still an entry available. - Some(tail.wrapping_add(1)) - } else { - None - } - }); - let Ok(tail) = tail else { - // If the kernel thread is not awake we'll need to wake it to make - // space in the submission queue. - self.maybe_wake_kernel_thread(); - return Err(QueueFull(())); - }; - - // SAFETY: the `ring_mask` ensures we can never get an index larger - // then the size of the queue. Above we've already ensured that - // we're the only thread with mutable access to the entry. - let submission_index = tail & shared.ring_mask; - let submission = unsafe { &mut *shared.entries.add(submission_index as usize) }; - - // Let the caller fill the `submission`. - submission.reset(); - submission.set_user_data(u64::MAX); - submit(submission); - #[cfg(debug_assertions)] - debug_assert!(!submission.is_unchanged()); - - // Ensure that all writes to the `submission` are done. - atomic::fence(Ordering::SeqCst); - - // Now that we've written our submission we need add it to the - // `array` so that the kernel can process it. - log::trace!(submission:? = submission; "queueing submission"); - { - // Now that the submission is filled we need to add it to the - // `shared.array` so that the kernel can read from it. - // - // We do this with a lock to avoid a race condition between two - // threads incrementing `shared.tail` concurrently. Consider the - // following execution: - // - // Thread A | Thread B - // ... | ... - // ... | Got `array_index` 0. - // Got `array_index` 1. | - // Writes index to `shared.array[1]`. | - // `shared.tail.fetch_add` to 1. | - // At this point the kernel will/can read `shared.array[0]`, but - // thread B hasn't filled it yet. So the kernel will read an invalid - // index! - // | Writes index to `shared.array[0]`. - // | `shared.tail.fetch_add` to 2. - - let mut array_index = shared.array_index.lock().unwrap(); - let idx = (*array_index & shared.ring_mask) as usize; - // SAFETY: `idx` is masked above to be within the correct bounds. - unsafe { (*shared.array.add(idx)).store(submission_index, Ordering::Release) }; - // SAFETY: we filled the array above. - let old_tail = unsafe { (*shared.array_tail).fetch_add(1, Ordering::AcqRel) }; - debug_assert!(old_tail == *array_index); - *array_index += 1; + const fn new(shared: Arc>) -> SubmissionQueue { + SubmissionQueue { + inner: sq::Queue::new(shared), } - - // If the kernel thread is not awake we'll need to wake it for it to - // process our submission. - self.maybe_wake_kernel_thread(); - // When we're not using the kernel polling thread we might have to - // submit the event ourselves to ensure we can make progress while the - // (user space) polling thread is calling `Ring::poll`. - self.maybe_submit_event(); - Ok(()) - } - - /// Wait for a submission slot, waking `waker` once one is available. - fn wait_for_submission(&self, waker: task::Waker) { - log::trace!(waker:? = waker; "adding blocked future"); - self.shared.blocked_futures.lock().unwrap().push(waker); } - /// Returns the number of slots available. - /// - /// # Notes - /// - /// The value return can be outdated the nanosecond it is returned, don't - /// make a safety decisions based on it. - fn available_space(&self) -> usize { - // SAFETY: the `kernel_read` pointer itself is valid as long as - // `Ring.fd` is alive. - // We use Relaxed here because the caller knows the value will be - // outdated. - let kernel_read = unsafe { (*self.shared.kernel_read).load(Ordering::Relaxed) }; - let pending_tail = self.shared.pending_tail.load(Ordering::Relaxed); - (self.shared.len - (pending_tail - kernel_read)) as usize - } - - /// Returns the number of unsumitted submission queue entries. - fn unsubmitted(&self) -> u32 { - // SAFETY: the `kernel_read` pointer itself is valid as long as - // `Ring.fd` is alive. - // We use Relaxed here because it can already be outdated the moment we - // return it, the caller has to deal with that. - let kernel_read = unsafe { (*self.shared.kernel_read).load(Ordering::Relaxed) }; - let pending_tail = self.shared.pending_tail.load(Ordering::Relaxed); - pending_tail - kernel_read - } - - /// Wake up the kernel thread polling for submission events, if the kernel - /// thread needs a wakeup. - fn maybe_wake_kernel_thread(&self) { - if self.shared.kernel_thread && (self.flags() & libc::IORING_SQ_NEED_WAKEUP != 0) { - log::debug!("waking submission queue polling kernel thread"); - let res = libc::syscall!(io_uring_enter2( - self.shared.ring_fd.as_raw_fd(), - 0, // We've already queued our submissions. - 0, // Don't wait for any completion events. - libc::IORING_ENTER_SQ_WAKEUP, // Wake up the kernel. - ptr::null(), // We don't pass any additional arguments. - 0, - )); - if let Err(err) = res { - log::warn!("failed to wake submission queue polling kernel thread: {err}"); - } - } - } - - /// Submit the event to the kernel when not using a kernel polling thread - /// and another thread is currently [`Ring::poll`]ing. - fn maybe_submit_event(&self) { - if !self.shared.kernel_thread && self.shared.is_polling.load(Ordering::Relaxed) { - log::debug!("submitting submission event while another thread is `Ring::poll`ing"); - let ring_fd = self.shared.ring_fd.as_raw_fd(); - let res = libc::syscall!(io_uring_enter2(ring_fd, 1, 0, 0, ptr::null(), 0)); - if let Err(err) = res { - log::warn!("failed to submit event: {err}"); - } - } - } - - /// Poll a queued operation with `op_index` to check if it's ready. - /// - /// # Notes + /// Wake the connected [`Ring`]. /// - /// If this return [`Poll::Ready`] it marks `op_index` slot as available. - pub(crate) fn poll_op( - &self, - ctx: &mut task::Context<'_>, - op_index: OpIndex, - ) -> Poll> { - log::trace!(op_index = op_index.0; "polling operation"); - if let Some(operation) = self.shared.queued_ops.get(op_index.0) { - let mut operation = operation.lock().unwrap(); - if let Some(op) = &mut *operation { - let res = op.poll(ctx); - if res.is_ready() { - *operation = None; - drop(operation); - self.shared.op_indices.make_available(op_index.0); - } - return res; - } - } - panic!("a10::SubmissionQueue::poll called incorrectly"); + /// All this does is interrupt a call to [`Ring::poll`]. + pub fn wake(&self) { + self.inner.wake(); } - /// Poll a queued multishot operation with `op_index` to check if it's - /// ready. - /// - /// # Notes - /// - /// If this return [`Poll::Ready(None)`] it marks `op_index` slot as - /// available. - pub(crate) fn poll_multishot_op( + /// See [`sq::Queue::get_op`]. + #[allow(clippy::type_complexity)] + pub(crate) unsafe fn get_op( &self, - ctx: &mut task::Context<'_>, - op_index: OpIndex, - ) -> Poll>> { - log::trace!(op_index = op_index.0; "polling multishot operation"); - if let Some(operation) = self.shared.queued_ops.get(op_index.0) { - let mut operation = operation.lock().unwrap(); - if let Some(op) = &mut *operation { - return match op.poll(ctx) { - Poll::Ready(res) => Poll::Ready(Some(res)), - Poll::Pending if op.is_done() => { - *operation = None; - drop(operation); - self.shared.op_indices.make_available(op_index.0); - Poll::Ready(None) - } - Poll::Pending => Poll::Pending, - }; - } - } - panic!("a10::SubmissionQueue::poll_multishot called incorrectly"); + op_id: OperationId, + ) -> MutexGuard< + Option::Completions as cq::Completions>::Event as cq::Event>::State>>, + >{ + self.inner.get_op(op_id) } - /// Mark the operation with `op_index` as dropped, attempting to cancel it. - /// - /// Because the kernel still has access to the resources, we might have to - /// do some trickery to delay the deallocation of resources and making the - /// queued operation slot available again. - /// - /// When the operation is still in progress we attempt to cancel it using - /// submission created by `cancel`. If the operation has completed it will - /// just drop resources (using `create_drop_waker`) and make the slot - /// available again. - /// - /// # Notes - /// - /// `cancel` should most likely use [`Submission::no_completion_event`] - pub(crate) fn cancel_op( + /// See [`sq::Queue::make_op_available`]. + #[allow(clippy::type_complexity)] + pub(crate) unsafe fn make_op_available( &self, - op_index: OpIndex, - create_drop_waker: R, - cancel: F, - ) -> Result<(), QueueFull> - where - R: FnOnce() -> D, - D: DropWake, - F: FnOnce(&mut Submission), - { - log::trace!(op_index = op_index.0; "canceling operation"); - if let Some(operation) = self.shared.queued_ops.get(op_index.0) { - let mut operation = operation.lock().unwrap(); - if let Some(op) = &mut *operation { - if op.no_more_events() { - // Easy path, the operation has already been completed. - *operation = None; - // Unlock defore dropping `create_drop_waker`, which might take a - // while. - drop(operation); - self.shared.op_indices.make_available(op_index.0); - - // We can safely drop the resources. - drop(create_drop_waker); - return Ok(()); - } - - // Hard path, the operation is not done, but the Future holding - // the resource is about to be dropped, so we need to apply some - // trickery here. - // - // We need to do two things: - // 1. Delay the dropping of `resources` until the kernel is done - // with the operation. - // 2. Delay the available making of the queued operation slot - // until the kernel is done with the operation. - // - // We achieve 1 by creating a special waker that just drops the - // resources (created by `create_drop_waker`). - let waker = if needs_drop::() { - // SAFETY: we're not going to clone the `waker`. - Some(unsafe { drop_task_waker(create_drop_waker()) }) - } else { - // Of course if we don't need to drop `T`, then we don't - // have to use a special waker. But we still don't want to - // wake up the `Future` as that not longer used. - None - }; - // We achive 2 by setting the operation state to dropped, so - // that `QueuedOperation::set_result` returns true, which makes - // `complete` below make the queued operation slot available - // again. - op.set_dropped(waker); - // Cancel the operation. - return self.add_no_result(cancel); - } - } - panic!("a10::SubmissionQueue::cancel_op called incorrectly"); - } - - /// Update an operation based on `completion`. - /// - /// # Safety - /// - /// This may only be called based on information form the kernel. - unsafe fn update_op(&self, completion: &Completion) { - let op_index = completion.index(); - if let Some(operation) = self.shared.queued_ops.get(op_index) { - let mut operation = operation.lock().unwrap(); - if let Some(op) = &mut *operation { - log::trace!(op_index = op_index, completion:? = completion; "updating operation"); - let is_dropped = op.update(completion); - if is_dropped && op.no_more_events() { - // The Future was previously dropped so no one is waiting on - // the result. We can make the slot avaiable again. - *operation = None; - drop(operation); - self.shared.op_indices.make_available(op_index); - } - } else { - log::trace!(op_index = op_index, completion:? = completion; "operation gone, but got completion event"); - } - } - } - - /// Returns `self.kernel_read`. - fn kernel_read(&self) -> u32 { - // SAFETY: this written to by the kernel so we need to use `Acquire` - // ordering. The pointer itself is valid as long as `Ring.fd` is alive. - unsafe { (*self.shared.kernel_read).load(Ordering::Acquire) } - } - - /// Returns `self.flags`. - fn flags(&self) -> u32 { - // SAFETY: this written to by the kernel so we need to use `Acquire` - // ordering. The pointer itself is valid as long as `Ring.fd` is alive. - unsafe { (*self.shared.flags).load(Ordering::Acquire) } + op_id: OperationId, + op: MutexGuard< + Option::Completions as cq::Completions>::Event as cq::Event>::State>>, + >, + ) { + self.inner.make_op_available(op_id, op); } } -#[allow(clippy::mutex_integer)] // For `array_index`, need to the lock for more. impl fmt::Debug for SubmissionQueue { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - /// Load a `u32` using relaxed ordering from `ptr`. - fn load_atomic_u32(ptr: *const AtomicU32) -> u32 { - unsafe { (*ptr).load(Ordering::Relaxed) } - } - - let shared = &*self.shared; - let all = f.alternate(); - let mut f = f.debug_struct("SubmissionQueue"); - - f.field("ring_fd", &shared.ring_fd.as_raw_fd()) - .field("len", &shared.len) - .field("ring_mask", &shared.ring_mask) - .field("flags", &load_atomic_u32(shared.flags)) - .field("pending_tail", &shared.pending_tail) - .field("kernel_read", &load_atomic_u32(shared.kernel_read)) - .field( - "array_index", - &shared.array_index.lock().map(|i| *i).unwrap_or(u32::MAX), - ) - .field("array_tail", &load_atomic_u32(shared.array_tail)); - - if all { - f.field("op_indices", &shared.op_indices) - .field("queued_ops", &shared.queued_ops) - .field("blocked_futures", &shared.blocked_futures) - .field("mmap_ptr", &shared.ptr) - .field("mmap_size", &shared.size); - } - - f.finish() + self.inner.fmt(f) } } -unsafe impl Send for SharedSubmissionQueue {} - -unsafe impl Sync for SharedSubmissionQueue {} - -impl Drop for SharedSubmissionQueue { - fn drop(&mut self) { - if let Err(err) = munmap( - self.entries.cast(), - self.len as usize * size_of::(), - ) { - log::warn!("error unmapping a10::SubmissionQueue entries: {err}"); - } - - if let Err(err) = munmap(self.ptr, self.size as usize) { - log::warn!("error unmapping a10::SubmissionQueue: {err}"); - } - } -} - -/// Index into [`SharedSubmissionQueue::op_indices`]. -/// -/// Returned by [`SubmissionQueue::add`] and used by -/// [`SubmissionQueue::poll_op`] to check for a result. -#[derive(Copy, Clone)] -#[must_use] -struct OpIndex(usize); - -impl fmt::Debug for OpIndex { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -/// Error returned when the submission queue is full. -/// -/// To resolve this issue call [`Ring::poll`]. -/// -/// Can be convert into [`io::Error`]. -struct QueueFull(()); - -impl From for io::Error { - fn from(_: QueueFull) -> io::Error { - #[cfg(not(feature = "nightly"))] - let kind = io::ErrorKind::Other; - #[cfg(feature = "nightly")] - let kind = io::ErrorKind::ResourceBusy; - io::Error::new(kind, "submission queue is full") - } +/// State shared between the submission and completion side. +struct SharedState { + /// [`sq::Submissions`] implementation. + submissions: I::Submissions, + /// Data shared between the submission and completion queues. + data: I::Shared, + /// Boolean indicating a thread is [`Ring::poll`]ing. + is_polling: AtomicBool, + /// Bitmap which can be used to create [`OperationIds`], used as index into + /// `queued_ops`. + op_ids: Box, + /// State of queued operations. + /// + /// Indexed by a [`OperationIds`], created by `op_ids`. + #[rustfmt::skip] + #[allow(clippy::type_complexity)] + queued_ops: Box<[Mutex::Event as cq::Event>::State>>>]>, + /// Futures that are waiting for a slot in `queued_ops`. + blocked_futures: Mutex>, } -impl fmt::Debug for QueueFull { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("QueueFull").finish() +impl SharedState { + /// `queued_operations` is the maximum number of queued operations, will be + /// rounded up depending on the capacity of `AtomicBitMap`. + fn new( + submissions: I::Submissions, + data: I::Shared, + queued_operations: usize, + ) -> Arc> { + let op_ids = AtomicBitMap::new(queued_operations); + let mut queued_ops = Vec::with_capacity(op_ids.capacity()); + queued_ops.resize_with(queued_ops.capacity(), || Mutex::new(None)); + let queued_ops = queued_ops.into_boxed_slice(); + let blocked_futures = Mutex::new(Vec::new()); + Arc::new(SharedState { + submissions, + data, + is_polling: AtomicBool::new(false), + op_ids, + queued_ops, + blocked_futures, + }) } } -impl fmt::Display for QueueFull { +impl fmt::Debug for SharedState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("`a10::Ring` submission queue is full") + f.debug_struct("SharedState") + .field("submissions", &self.submissions) + .field("data", &self.data) + .field("is_polling", &self.is_polling) + .field("op_ids", &self.op_ids) + .field("queued_ops", &self.queued_ops) + .field("blocked_futures", &self.blocked_futures) + .finish() } } -/// Queue of completion events. +/// In progress/queued operation. #[derive(Debug)] -struct CompletionQueue { - /// Mmap-ed pointer to the completion queue. - ptr: *mut libc::c_void, - /// Mmap-ed size in bytes. - size: libc::c_uint, - - // NOTE: the following field is constant. we read them once from the mmap - // area and then copied them here to avoid the need for the atomics. - /// Mask used to index into the `sqes` queue. - ring_mask: u32, - - // NOTE: the following fields reference mmaped pages shared with the kernel, - // thus all need atomic access. - /// Incremented by us when completions have been read. - head: *mut AtomicU32, - /// Incremented by the kernel when adding completions. - tail: *const AtomicU32, - /// Array of `len` completion entries shared with the kernel. The kernel - /// modifies this array, we're only reading from it. - entries: *const Completion, +struct QueuedOperation { + /// State of the operation. + state: T, + /// True if the connected `Future`/`AsyncIterator` is dropped and thus no + /// longer will retrieve the result. + dropped: bool, + /// Boolean used by operations that result in multiple completion events. + /// For example zero copy: one completion to report the result another to + /// indicate the resources are no longer used. + /// For io_uring multishot this will be true if no more completion events + /// are coming, for example in case a previous event returned an error. + done: bool, + /// Waker to wake when the operation is done. + waker: task::Waker, } -unsafe impl Send for CompletionQueue {} - -unsafe impl Sync for CompletionQueue {} - -impl Drop for CompletionQueue { - fn drop(&mut self) { - if let Err(err) = munmap(self.ptr, self.size as usize) { - log::warn!("error unmapping a10::CompletionQueue: {err}"); +impl QueuedOperation { + const fn new(state: T, waker: task::Waker) -> QueuedOperation { + QueuedOperation { + state, + dropped: false, + done: false, + waker, } } -} - -/// Iterator of completed operations. -struct Completions<'ring> { - /// Same as [`CompletionQueue.entries`]. - entries: *const Completion, - /// Local version of `head`. Used to updated `head` once `Completions` is - /// dropped. - local_head: u32, - /// Same as [`CompletionQueue.head`], used to let the kernel know we've read - /// the completions once we're dropped. - head: *mut AtomicU32, - /// Tail of `entries`, i.e. number of completions the kernel wrote. - tail: u32, - /// Same as [`CompletionQueue.ring_mask`]. - ring_mask: u32, - /// We're depend on the lifetime of [`Ring`]. - _lifetime: PhantomData<&'ring Ring>, -} - -impl<'ring> Iterator for Completions<'ring> { - type Item = &'ring Completion; - - fn next(&mut self) -> Option { - let head = self.local_head; - let tail = self.tail; - if head < tail { - // SAFETY: the `ring_mask` ensures we can never get an `idx` larger - // then the size of the queue. We checked above that the kernel has - // written the struct (and isn't writing to now) os we can safely - // read from it. - let idx = (head & self.ring_mask) as usize; - let completion = unsafe { &*self.entries.add(idx) }; - self.local_head += 1; - Some(completion) - } else { - None - } - } -} - -impl<'ring> Drop for Completions<'ring> { - fn drop(&mut self) { - // Let the kernel know we've read the completions. - // SAFETY: the kernel needs to read the value so we need `Release`. The - // pointer itself is valid as long as `Ring.fd` is alive. - unsafe { (*self.head).store(self.local_head, Ordering::Release) } - } -} - -/// Event that represents a completed operation. -#[repr(transparent)] -struct Completion { - inner: libc::io_uring_cqe, -} - -impl Completion { - /// Returns the operation index. - const fn index(&self) -> usize { - self.inner.user_data as usize - } - - /// Returns the result of the operation. - const fn result(&self) -> i32 { - self.inner.res - } - - /// Return `true` if `IORING_CQE_F_MORE` is set. - const fn is_in_progress(&self) -> bool { - self.inner.flags & libc::IORING_CQE_F_MORE != 0 - } - - /// Return `true` if `IORING_CQE_F_NOTIF` is set. - const fn is_notification(&self) -> bool { - self.inner.flags & libc::IORING_CQE_F_NOTIF != 0 - } - - /// Return `true` if `IORING_CQE_F_BUFFER` is set. - const fn is_buffer_select(&self) -> bool { - self.inner.flags & libc::IORING_CQE_F_BUFFER != 0 - } - const fn flags(&self) -> u16 { - (self.inner.flags & ((1 << libc::IORING_CQE_BUFFER_SHIFT) - 1)) as u16 - } - - /// Returns the operation flags that need to be passed to - /// [`QueuedOperation`]. - const fn operation_flags(&self) -> u16 { - if self.is_buffer_select() { - (self.inner.flags >> libc::IORING_CQE_BUFFER_SHIFT) as u16 - } else { - 0 + /// Update the waker to `waker`, if it's different. + fn update_waker(&mut self, waker: &task::Waker) { + if !self.waker.will_wake(waker) { + self.waker.clone_from(waker); } } } -impl fmt::Debug for Completion { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Completion") - .field("user_data", &self.inner.user_data) - // NOTE this this isn't always an errno, so we can't use - // `io::Error::from_raw_os_error` without being misleading. - .field("res", &self.inner.res) - .field("flags", &self.flags()) - .field("operation_flags", &self.operation_flags()) - .finish() - } +/// Operation id. +/// +/// Used to relate completion events to submission events and operations. Also +/// used as index into [`SharedState::queued_ops`], created by +/// [`SharedState::op_ids`]. +type OperationId = usize; + +/// Id to use for internal wake ups. +const WAKE_ID: OperationId = usize::MAX; +/// Id to use for submissions without a completions event (in the case we do +/// actually get a completion event). +const NO_COMPLETION_ID: OperationId = usize::MAX - 1; + +/// Platform specific implementation. +trait Implementation { + /// Data shared between the submission and completion queues. + type Shared: fmt::Debug + Sized; + + /// See [`sq::Submissions`]. + type Submissions: sq::Submissions; + + /// See [`cq::Completions`]. + type Completions: cq::Completions; } /// Link to online manual. @@ -1176,4 +478,74 @@ macro_rules! man_link { }; } -use man_link; +/// Helper macro to execute a system call that returns an `io::Result`. +macro_rules! syscall { + ($fn: ident ( $($arg: expr),* $(,)? ) ) => {{ + #[allow(unused_unsafe)] + let res = unsafe { libc::$fn($( $arg, )*) }; + if res == -1 { + ::std::result::Result::Err(::std::io::Error::last_os_error()) + } else { + ::std::result::Result::Ok(res) + } + }}; +} + +#[allow(unused_macros)] // Not used on all OS. +macro_rules! debug_detail { + ( + // Match a value exactly. + match $type: ident ($event_type: ty), + $( $( #[$target: meta] )* $libc: ident :: $flag: ident ),+ $(,)? + ) => { + struct $type($event_type); + + impl fmt::Debug for $type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self.0 { + $( + $(#[$target])* + #[allow(clippy::bad_bit_mask)] // Apparently some flags are zero. + $libc :: $flag => stringify!($flag), + )+ + _ => "", + }) + } + } + }; + ( + // Integer bitset. + bitset $type: ident ($event_type: ty), + $( $( #[$target: meta] )* $libc: ident :: $flag: ident ),+ $(,)? + ) => { + struct $type($event_type); + + impl fmt::Debug for $type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut written_one = false; + $( + $(#[$target])* + #[allow(clippy::bad_bit_mask)] // Apparently some flags are zero. + { + if self.0 & $libc :: $flag != 0 { + if !written_one { + write!(f, "{}", stringify!($flag))?; + written_one = true; + } else { + write!(f, "|{}", stringify!($flag))?; + } + } + } + )+ + if !written_one { + write!(f, "(empty)") + } else { + Ok(()) + } + } + } + }; +} + +#[allow(unused_imports)] // Not used on all OS. +use {debug_detail, man_link, syscall}; diff --git a/src/mem.rs b/src/mem.rs index 61b14ff8..7f8f2971 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1,12 +1,9 @@ //! Memory operations. -use std::future::Future; use std::io; -use std::pin::Pin; -use std::task::{self, Poll}; -use crate::op::{poll_state, OpState}; -use crate::{libc, man_link, SubmissionQueue}; +use crate::op::{operation, Operation}; +use crate::{man_link, sys, SubmissionQueue}; /// Give advice about use of memory. /// @@ -22,42 +19,16 @@ pub const fn advise( length: u32, advice: libc::c_int, ) -> Advise { - Advise { - sq, - state: OpState::NotStarted((address, length, advice)), - } -} - -/// [`Future`] behind [`advise`]. -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct Advise { - sq: SubmissionQueue, - state: OpState<(*mut (), u32, libc::c_int)>, -} - -impl Future for Advise { - type Output = io::Result<()>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - #[rustfmt::skip] // Rustfmt makes a right mess of this. - let op_index = poll_state!( - Advise, self.state, self.sq, ctx, - |submission, (address, length, advice)| unsafe { submission.madvise(address, length, advice); } - ); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, res)) => Poll::Ready(Ok(debug_assert!(res == 0))), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } + Advise(Operation::new(sq, (), (address, length, advice))) } +// SAFETY: `!Send` due to address, but the future is `Send`. +#[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Sync for Advise {} +#[allow(clippy::non_send_fields_in_send_ty)] unsafe impl Send for Advise {} + +operation!( + /// [`Future`] behind [`advise`]. + pub struct Advise(sys::mem::AdviseOp) -> io::Result<()>; +); diff --git a/src/msg.rs b/src/msg.rs index 49d5316a..d6a50d0a 100644 --- a/src/msg.rs +++ b/src/msg.rs @@ -6,18 +6,10 @@ use std::future::Future; use std::io; -use std::os::fd::AsRawFd; use std::pin::Pin; use std::task::{self, Poll}; -use crate::{OpIndex, SubmissionQueue}; - -/// Token used to the messages. -/// -/// See [`msg_listener`]. -#[derive(Copy, Clone, Debug)] -#[allow(clippy::module_name_repetitions)] -pub struct MsgToken(pub(crate) OpIndex); +use crate::{sys, OperationId, SubmissionQueue}; /// Setup a listener for user space messages. /// @@ -26,25 +18,23 @@ pub struct MsgToken(pub(crate) OpIndex); /// /// # Notes /// -/// This will return an error if too many operations are already queued, -/// this is usually resolved by calling [`Ring::poll`]. +/// This will return an error if too many operations are already queued, this is +/// usually resolved by calling [`Ring::poll`]. /// -/// The returned `MsgToken` has an implicitly lifetime linked to -/// `MsgListener`. If `MsgListener` is dropped the `MsgToken` will -/// become invalid. +/// The returned `MsgToken` has an implicit lifetime linked to `MsgListener`. If +/// `MsgListener` is dropped the `MsgToken` will become invalid. /// /// Due to the limitations mentioned above it's advised to consider the -/// usefulness of the type severly limited. The returned `MsgListener` -/// iterator should live for the entire lifetime of the `Ring`, to ensure we -/// don't use `MsgToken` after it became invalid. Furthermore to ensure -/// the creation of it succeeds it should be done early in the lifetime of -/// `Ring`. +/// usefulness of the type severly limited. The returned `MsgListener` iterator +/// should live for the entire lifetime of the `Ring`, to ensure we don't use +/// `MsgToken` after it became invalid. Furthermore to ensure the creation of it +/// succeeds it should be done early in the lifetime of `Ring`. /// /// [`Ring::poll`]: crate::Ring::poll #[allow(clippy::module_name_repetitions)] pub fn msg_listener(sq: SubmissionQueue) -> io::Result<(MsgListener, MsgToken)> { - let op_index = sq.queue_multishot()?; - Ok((MsgListener { sq, op_index }, MsgToken(op_index))) + let op_id = sq.inner.queue_multishot()?; + Ok((MsgListener { sq, op_id }, MsgToken(op_id))) } /// [`AsyncIterator`] behind [`msg_listener`]. @@ -55,41 +45,46 @@ pub fn msg_listener(sq: SubmissionQueue) -> io::Result<(MsgListener, MsgToken)> #[allow(clippy::module_name_repetitions)] pub struct MsgListener { sq: SubmissionQueue, - op_index: OpIndex, + op_id: OperationId, } impl MsgListener { - /// This is the same as the `AsyncIterator::poll_next` function, but then + /// This is the same as the [`AsyncIterator::poll_next`] function, but then /// available on stable Rust. - pub fn poll_next(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { - log::trace!(op_index = self.op_index.0; "polling multishot messages"); - if let Some(operation) = self.sq.shared.queued_ops.get(self.op_index.0) { - let mut operation = operation.lock().unwrap(); - if let Some(op) = &mut *operation { - return match op.poll_msg(ctx) { - Poll::Ready(data) => Poll::Ready(Some(data.1)), - Poll::Pending => Poll::Pending, - }; - } + /// + /// [`AsyncIterator::poll_next`]: std::async_iter::AsyncIterator::poll_next + pub fn poll_next(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { + let op_id = self.op_id; + // SAFETY: we've ensured that `op_id` is valid. + let mut queued_op_slot = unsafe { self.sq.get_op(op_id) }; + match queued_op_slot.as_mut() { + Some(queued_op) => match sys::msg::next(&mut queued_op.state) { + Some(data) => Poll::Ready(Some(data)), + None => { + queued_op.update_waker(ctx.waker()); + Poll::Pending + } + }, + // Somehow the queued operation is gone. This shouldn't happen. + None => Poll::Ready(None), } - panic!("a10::MsgListener called incorrectly"); } } #[cfg(feature = "nightly")] impl std::async_iter::AsyncIterator for MsgListener { - type Item = u32; + type Item = MsgData; fn poll_next(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { self.poll_next(ctx) } } -/// Try to send a message to iterator listening for message using [`MsgToken`]. +/// Try to send a message to [`MsgListener`] using [`MsgToken`]. /// /// This will use the io_uring submission queue to share `data` with the -/// receiving end. This means that it will wake up the thread if it's -/// currently [polling]. +/// receiving end. This means that it will wake up the thread if it's currently +/// [polling]. /// /// This will fail if the submission queue is currently full. See [`send_msg`] /// for a version that tries again when the submission queue is full. @@ -98,17 +93,15 @@ impl std::async_iter::AsyncIterator for MsgListener { /// /// [polling]: crate::Ring::poll #[allow(clippy::module_name_repetitions)] -pub fn try_send_msg(sq: &SubmissionQueue, token: MsgToken, data: u32) -> io::Result<()> { - sq.add_no_result(|submission| unsafe { - submission.msg(sq.shared.ring_fd.as_raw_fd(), (token.0).0 as u64, data, 0); - submission.no_completion_event(); - })?; +pub fn try_send_msg(sq: &SubmissionQueue, token: MsgToken, data: MsgData) -> io::Result<()> { + sq.inner + .submit_no_completion(|submission| sys::msg::send(sq, token.0, data, submission))?; Ok(()) } /// Send a message to iterator listening for message using [`MsgToken`]. #[allow(clippy::module_name_repetitions)] -pub const fn send_msg<'sq>(sq: &'sq SubmissionQueue, token: MsgToken, data: u32) -> SendMsg<'sq> { +pub const fn send_msg(sq: SubmissionQueue, token: MsgToken, data: MsgData) -> SendMsg { SendMsg { sq, token, data } } @@ -116,22 +109,32 @@ pub const fn send_msg<'sq>(sq: &'sq SubmissionQueue, token: MsgToken, data: u32) #[derive(Debug)] #[must_use = "`Future`s do nothing unless polled"] #[allow(clippy::module_name_repetitions)] -pub struct SendMsg<'sq> { - sq: &'sq SubmissionQueue, +pub struct SendMsg { + sq: SubmissionQueue, token: MsgToken, - data: u32, + data: MsgData, } -impl<'sq> Future for SendMsg<'sq> { +impl Future for SendMsg { type Output = io::Result<()>; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - match try_send_msg(self.sq, self.token, self.data) { + match try_send_msg(&self.sq, self.token, self.data) { Ok(()) => Poll::Ready(Ok(())), Err(_) => { - self.sq.wait_for_submission(ctx.waker().clone()); + self.sq.inner.wait_for_submission(ctx.waker().clone()); Poll::Pending } } } } + +/// Type of data this module can send. +pub type MsgData = u32; + +/// Token used to the messages. +/// +/// See [`msg_listener`]. +#[derive(Copy, Clone, Debug)] +#[allow(clippy::module_name_repetitions)] +pub struct MsgToken(pub(crate) OperationId); diff --git a/src/net.rs b/src/net.rs index c93e14e4..aec7db09 100644 --- a/src/net.rs +++ b/src/net.rs @@ -3,27 +3,27 @@ //! To create a new socket ([`AsyncFd`]) use the [`socket`] function, which //! issues a non-blocking `socket(2)` call. -// This is not ideal. -// This should only be applied to `SendMsg` and `RecvVectored` `RecvFrom` and -// `RecvFromVectored` as they use `libc::iovec` internally, which is `!Send`, -// while it actually is `Send`. -#![allow(clippy::non_send_fields_in_send_ty)] - +use std::ffi::OsStr; use std::future::Future; -use std::marker::PhantomData; -use std::mem::{self, size_of, MaybeUninit}; +use std::mem::{self, MaybeUninit}; +use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}; +use std::os::linux::net::SocketAddrExt; +use std::os::unix; +use std::os::unix::ffi::OsStrExt; +use std::path::Path; use std::pin::Pin; use std::task::{self, Poll}; -use std::{io, ptr}; +use std::{fmt, io, ptr, slice}; -use crate::cancel::{Cancel, CancelOp, CancelResult}; +use crate::cancel::{Cancel, CancelOperation, CancelResult}; use crate::extract::{Extract, Extractor}; use crate::fd::{AsyncFd, Descriptor, File}; use crate::io::{ - Buf, BufIdx, BufMut, BufMutSlice, BufSlice, ReadBuf, ReadBufPool, ReadNBuf, SkipBuf, + Buf, BufMut, BufMutSlice, BufSlice, Buffer, IoMutSlice, ReadBuf, ReadBufPool, ReadNBuf, SkipBuf, }; -use crate::op::{op_async_iter, op_future, poll_state, OpState}; -use crate::{libc, man_link, SubmissionQueue}; +use crate::op::{fd_iter_operation, fd_operation, operation, FdOperation, Operation}; +use crate::sys::net::MsgHeader; +use crate::{man_link, sys, SubmissionQueue}; /// Creates a new socket. #[doc = man_link!(socket(2))] @@ -34,23 +34,133 @@ pub const fn socket( protocol: libc::c_int, flags: libc::c_int, ) -> Socket { - Socket { - sq: Some(sq), - state: OpState::NotStarted((domain, r#type, protocol, flags)), - kind: PhantomData, - } + Socket(Operation::new(sq, (), (domain, r#type, protocol, flags))) } +operation!( + /// [`Future`] behind [`socket`]. + /// + /// If you're looking for a socket type, there is none, see [`AsyncFd`]. + pub struct Socket(sys::net::SocketOp) -> io::Result>; +); + /// Socket related system calls. impl AsyncFd { /// Initiate a connection on this socket to the specified address. #[doc = man_link!(connect(2))] - pub fn connect<'fd, A>(&'fd self, address: impl Into>) -> Connect<'fd, A, D> + pub fn connect<'fd, A>(&'fd self, address: A) -> Connect<'fd, A, D> + where + A: SocketAddress, + { + let storage = AddressStorage(Box::from(address.into_storage())); + Connect(FdOperation::new(self, storage, ())) + } + + /// Receives data on the socket from the remote address to which it is + /// connected. + #[doc = man_link!(recv(2))] + pub const fn recv<'fd, B>(&'fd self, buf: B, flags: libc::c_int) -> Recv<'fd, B, D> + where + B: BufMut, + { + let buf = Buffer { buf }; + Recv(FdOperation::new(self, buf, flags)) + } + + /// Continuously receive data on the socket from the remote address to which + /// it is connected. + /// + /// # Notes + /// + /// This will return `ENOBUFS` if no buffer is available in the `pool` to + /// read into. + /// + /// Be careful when using this as a peer sending a lot data might take up + /// all your buffers from your pool! + pub const fn multishot_recv<'fd>( + &'fd self, + pool: ReadBufPool, + flags: libc::c_int, + ) -> MultishotRecv<'fd, D> { + MultishotRecv(FdOperation::new(self, pool, flags)) + } + + /// Receives at least `n` bytes on the socket from the remote address to + /// which it is connected. + pub const fn recv_n<'fd, B>(&'fd self, buf: B, n: usize) -> RecvN<'fd, B, D> + where + B: BufMut, + { + let buf = ReadNBuf { buf, last_read: 0 }; + RecvN { + recv: self.recv(buf, 0), + left: n, + } + } + + /// Receives data on the socket from the remote address to which it is + /// connected, using vectored I/O. + #[doc = man_link!(recvmsg(2))] + pub fn recv_vectored<'fd, B, const N: usize>( + &'fd self, + mut bufs: B, + flags: libc::c_int, + ) -> RecvVectored<'fd, B, N, D> + where + B: BufMutSlice, + { + let iovecs = unsafe { bufs.as_iovecs_mut() }; + let resources = Box::new((MsgHeader::empty(), iovecs)); + RecvVectored(FdOperation::new(self, (bufs, resources), flags)) + } + + /// Receives at least `n` bytes on the socket from the remote address to + /// which it is connected, using vectored I/O. + pub fn recv_n_vectored<'fd, B, const N: usize>( + &'fd self, + bufs: B, + n: usize, + ) -> RecvNVectored<'fd, B, N, D> + where + B: BufMutSlice, + { + let bufs = ReadNBuf { + buf: bufs, + last_read: 0, + }; + RecvNVectored { + recv: self.recv_vectored(bufs, 0), + left: n, + } + } + + /// Receives data on the socket and returns the source address. + #[doc = man_link!(recvmsg(2))] + pub fn recv_from<'fd, B, A>(&'fd self, mut buf: B, flags: libc::c_int) -> RecvFrom<'fd, B, A, D> + where + B: BufMut, + A: SocketAddress, + { + // SAFETY: we're ensure that `iovec` doesn't outlive the `buf`fer. + let iovec = unsafe { IoMutSlice::new(&mut buf) }; + let resources = Box::new((MsgHeader::empty(), iovec, MaybeUninit::uninit())); + RecvFrom(FdOperation::new(self, (buf, resources), flags)) + } + + /// Receives data on the socket and the source address using vectored I/O. + #[doc = man_link!(recvmsg(2))] + pub fn recv_from_vectored<'fd, B, A, const N: usize>( + &'fd self, + mut bufs: B, + flags: libc::c_int, + ) -> RecvFromVectored<'fd, B, A, N, D> where + B: BufMutSlice, A: SocketAddress, { - let address = address.into(); - Connect::new(self, address, ()) + let iovecs = unsafe { bufs.as_iovecs_mut() }; + let resources = Box::new((MsgHeader::empty(), iovecs, MaybeUninit::uninit())); + RecvFromVectored(FdOperation::new(self, (bufs, resources), flags)) } /// Sends data on the socket to a connected peer. @@ -59,7 +169,8 @@ impl AsyncFd { where B: Buf, { - Send::new(self, buf, (libc::IORING_OP_SEND as u8, flags)) + let buf = Buffer { buf }; + Send(FdOperation::new(self, buf, (SendCall::Normal, flags))) } /// Same as [`AsyncFd::send`], but tries to avoid making intermediate copies @@ -78,16 +189,40 @@ impl AsyncFd { where B: Buf, { - Send::new(self, buf, (libc::IORING_OP_SEND_ZC as u8, flags)) + let buf = Buffer { buf }; + Send(FdOperation::new(self, buf, (SendCall::ZeroCopy, flags))) } /// Sends all data in `buf` on the socket to a connected peer. /// Returns [`io::ErrorKind::WriteZero`] if not all bytes could be written. - pub const fn send_all<'fd, B>(&'fd self, buf: B) -> SendAll<'fd, B, D> + pub const fn send_all<'fd, B>(&'fd self, buf: B, flags: libc::c_int) -> SendAll<'fd, B, D> + where + B: Buf, + { + let buf = SkipBuf { buf, skip: 0 }; + SendAll { + send: Extractor { + fut: self.send(buf, flags), + }, + send_op: SendCall::Normal, + flags, + } + } + + /// Same as [`AsyncFd::send_all`], but tries to avoid making intermediate + /// copies of `buf`. + pub const fn send_all_zc<'fd, B>(&'fd self, buf: B, flags: libc::c_int) -> SendAll<'fd, B, D> where B: Buf, { - SendAll::new(self, buf) + let buf = SkipBuf { buf, skip: 0 }; + SendAll { + send: Extractor { + fut: self.send(buf, flags), + }, + send_op: SendCall::ZeroCopy, + flags, + } } /// Sends data in `bufs` on the socket to a connected peer. @@ -100,7 +235,7 @@ impl AsyncFd { where B: BufSlice, { - self.sendmsg(libc::IORING_OP_SENDMSG as u8, bufs, NoAddress, flags) + self.sendmsg(SendCall::Normal, bufs, NoAddress, flags) } /// Same as [`AsyncFd::send_vectored`], but tries to avoid making @@ -113,7 +248,7 @@ impl AsyncFd { where B: BufSlice, { - self.sendmsg(libc::IORING_OP_SENDMSG_ZC as u8, bufs, NoAddress, flags) + self.sendmsg(SendCall::ZeroCopy, bufs, NoAddress, flags) } /// Sends all data in `bufs` on the socket to a connected peer, using @@ -126,12 +261,33 @@ impl AsyncFd { where B: BufSlice, { - SendAllVectored::new(self, bufs) + SendAllVectored { + send: self.send_vectored(bufs, 0).extract(), + skip: 0, + send_op: SendCall::Normal, + } + } + + /// Sends all data in `bufs` on the socket to a connected peer, using + /// vectored I/O. + /// Returns [`io::ErrorKind::WriteZero`] if not all bytes could be written. + pub fn send_all_vectored_zc<'fd, B, const N: usize>( + &'fd self, + bufs: B, + ) -> SendAllVectored<'fd, B, N, D> + where + B: BufSlice, + { + SendAllVectored { + send: self.send_vectored(bufs, 0).extract(), + skip: 0, + send_op: SendCall::ZeroCopy, + } } /// Sends data on the socket to a connected peer. - #[doc = man_link!(send(2))] - pub const fn sendto<'fd, B, A>( + #[doc = man_link!(sendto(2))] + pub fn send_to<'fd, B, A>( &'fd self, buf: B, address: A, @@ -141,14 +297,16 @@ impl AsyncFd { B: Buf, A: SocketAddress, { - SendTo::new(self, buf, address, (libc::IORING_OP_SEND as u8, flags)) + let resources = (buf, Box::new(address.into_storage())); + let args = (SendCall::Normal, flags); + SendTo(FdOperation::new(self, resources, args)) } - /// Same as [`AsyncFd::sendto`], but tries to avoid making intermediate copies - /// of `buf`. + /// Same as [`AsyncFd::send_to`], but tries to avoid making intermediate + /// copies of `buf`. /// /// See [`AsyncFd::send_zc`] for additional notes. - pub const fn sendto_zc<'fd, B, A>( + pub fn send_to_zc<'fd, B, A>( &'fd self, buf: B, address: A, @@ -158,12 +316,14 @@ impl AsyncFd { B: Buf, A: SocketAddress, { - SendTo::new(self, buf, address, (libc::IORING_OP_SEND_ZC as u8, flags)) + let resources = (buf, Box::new(address.into_storage())); + let args = (SendCall::ZeroCopy, flags); + SendTo(FdOperation::new(self, resources, args)) } /// Sends data in `bufs` on the socket to a connected peer. #[doc = man_link!(sendmsg(2))] - pub fn sendto_vectored<'fd, B, A, const N: usize>( + pub fn send_to_vectored<'fd, B, A, const N: usize>( &'fd self, bufs: B, address: A, @@ -173,12 +333,12 @@ impl AsyncFd { B: BufSlice, A: SocketAddress, { - self.sendmsg(libc::IORING_OP_SENDMSG as u8, bufs, address, flags) + self.sendmsg(SendCall::Normal, bufs, address, flags) } - /// Same as [`AsyncFd::sendto_vectored`], but tries to avoid making + /// Same as [`AsyncFd::send_to_vectored`], but tries to avoid making /// intermediate copies of `buf`. - pub fn sendto_vectored_zc<'fd, B, A, const N: usize>( + pub fn send_to_vectored_zc<'fd, B, A, const N: usize>( &'fd self, bufs: B, address: A, @@ -188,12 +348,12 @@ impl AsyncFd { B: BufSlice, A: SocketAddress, { - self.sendmsg(libc::IORING_OP_SENDMSG_ZC as u8, bufs, address, flags) + self.sendmsg(SendCall::ZeroCopy, bufs, address, flags) } fn sendmsg<'fd, B, A, const N: usize>( &'fd self, - op: u8, + send_op: SendCall, bufs: B, address: A, flags: libc::c_int, @@ -202,125 +362,10 @@ impl AsyncFd { B: BufSlice, A: SocketAddress, { - // SAFETY: zeroed `msghdr` is valid. - let msg = unsafe { mem::zeroed() }; let iovecs = unsafe { bufs.as_iovecs() }; - SendMsg::new(self, bufs, address, msg, iovecs, (op, flags)) - } - - /// Receives data on the socket from the remote address to which it is - /// connected. - #[doc = man_link!(recv(2))] - pub const fn recv<'fd, B>(&'fd self, buf: B, flags: libc::c_int) -> Recv<'fd, B, D> - where - B: BufMut, - { - Recv::new(self, buf, flags) - } - - /// Continuously receive data on the socket from the remote address to which - /// it is connected. - /// - /// # Notes - /// - /// This will return `ENOBUFS` if no buffer is available in the `pool` to - /// read into. - /// - /// Be careful when using this as a peer sending a lot data might take up - /// all your buffers from your pool! - pub const fn multishot_recv<'fd>( - &'fd self, - pool: ReadBufPool, - flags: libc::c_int, - ) -> MultishotRecv<'fd, D> { - MultishotRecv::new(self, pool, flags) - } - - /// Receives at least `n` bytes on the socket from the remote address to - /// which it is connected. - pub const fn recv_n<'fd, B>(&'fd self, buf: B, n: usize) -> RecvN<'fd, B, D> - where - B: BufMut, - { - RecvN::new(self, buf, n) - } - - /// Receives data on the socket from the remote address to which it is - /// connected, using vectored I/O. - #[doc = man_link!(recvmsg(2))] - pub fn recv_vectored<'fd, B, const N: usize>( - &'fd self, - mut bufs: B, - flags: libc::c_int, - ) -> RecvVectored<'fd, B, N, D> - where - B: BufMutSlice, - { - // TODO: replace with `Box::new_zeroed` once `new_uninit` is stable. - // SAFETY: zeroed `msghdr` is valid. - let msg = unsafe { Box::new(mem::zeroed()) }; - let iovecs = unsafe { bufs.as_iovecs_mut() }; - RecvVectored::new(self, bufs, msg, iovecs, flags) - } - - /// Receives at least `n` bytes on the socket from the remote address to - /// which it is connected, using vectored I/O. - pub fn recv_n_vectored<'fd, B, const N: usize>( - &'fd self, - bufs: B, - n: usize, - ) -> RecvNVectored<'fd, B, N, D> - where - B: BufMutSlice, - { - RecvNVectored::new(self, bufs, n) - } - - /// Receives data on the socket and returns the source address. - #[doc = man_link!(recvmsg(2))] - pub fn recvfrom<'fd, B, A>(&'fd self, mut buf: B, flags: libc::c_int) -> RecvFrom<'fd, B, A, D> - where - B: BufMut, - A: SocketAddress, - { - // SAFETY: zeroed `msghdr` is valid. - let msg = unsafe { mem::zeroed() }; - let (buf_ptr, buf_len) = unsafe { buf.parts_mut() }; - let iovec = libc::iovec { - iov_base: buf_ptr.cast(), - iov_len: buf_len as _, - }; - let msg = Box::new((msg, MaybeUninit::uninit())); - RecvFrom::new(self, buf, msg, iovec, flags) - } - - /// Receives data on the socket and the source address using vectored I/O. - #[doc = man_link!(recvmsg(2))] - pub fn recvfrom_vectored<'fd, B, A, const N: usize>( - &'fd self, - mut bufs: B, - flags: libc::c_int, - ) -> RecvFromVectored<'fd, B, A, N, D> - where - B: BufMutSlice, - A: SocketAddress, - { - // SAFETY: zeroed `msghdr` is valid. - let msg = unsafe { mem::zeroed() }; - let iovecs = unsafe { bufs.as_iovecs_mut() }; - let msg = Box::new((msg, MaybeUninit::uninit())); - RecvFromVectored::new(self, bufs, msg, iovecs, flags) - } - - /// Shuts down the read, write, or both halves of this connection. - #[doc = man_link!(shutdown(2))] - pub const fn shutdown<'fd>(&'fd self, how: std::net::Shutdown) -> Shutdown<'fd, D> { - let how = match how { - std::net::Shutdown::Read => libc::SHUT_RD, - std::net::Shutdown::Write => libc::SHUT_WR, - std::net::Shutdown::Both => libc::SHUT_RDWR, - }; - Shutdown::new(self, how) + let address = address.into_storage(); + let resources = Box::new((MsgHeader::empty(), iovecs, address)); + SendMsg(FdOperation::new(self, (bufs, resources), (send_op, flags))) } /// Accept a new socket stream ([`AsyncFd`]). @@ -328,7 +373,10 @@ impl AsyncFd { /// If an accepted stream is returned, the remote address of the peer is /// returned along with it. #[doc = man_link!(accept(2))] - pub fn accept<'fd, A>(&'fd self) -> Accept<'fd, A, D> { + pub fn accept<'fd, A>(&'fd self) -> Accept<'fd, A, D> + where + A: SocketAddress, + { // `cloexec_flag` returns `O_CLOEXEC`, technically we should use // `SOCK_CLOEXEC`, so ensure the value is the same so it works as // expected. @@ -341,9 +389,12 @@ impl AsyncFd { /// /// Also see [`AsyncFd::accept`]. #[doc = man_link!(accept4(2))] - pub fn accept4<'fd, A>(&'fd self, flags: libc::c_int) -> Accept<'fd, A, D> { - let address = Box::new((MaybeUninit::uninit(), 0)); - Accept::new(self, address, flags) + pub fn accept4<'fd, A>(&'fd self, flags: libc::c_int) -> Accept<'fd, A, D> + where + A: SocketAddress, + { + let address = AddressStorage(Box::new((MaybeUninit::uninit(), 0))); + Accept(FdOperation::new(self, address, flags)) } /// Accept multiple socket streams. @@ -364,27 +415,26 @@ impl AsyncFd { /// /// Also see [`AsyncFd::multishot_accept`]. pub const fn multishot_accept4<'fd>(&'fd self, flags: libc::c_int) -> MultishotAccept<'fd, D> { - MultishotAccept::new(self, flags) + MultishotAccept(FdOperation::new(self, (), flags)) } /// Get socket option. /// - /// At the time of writing this limited to the `SOL_SOCKET` level. + /// At the time of writing this limited to the `SOL_SOCKET` level for + /// io_uring. /// /// # Safety /// /// The caller must ensure that `T` is the valid type for the option. #[doc = man_link!(getsockopt(2))] #[doc(alias = "getsockopt")] - #[allow(clippy::cast_sign_loss)] // No valid negative level or optnames. pub fn socket_option<'fd, T>( &'fd self, level: libc::c_int, optname: libc::c_int, ) -> SocketOption<'fd, T, D> { - // TODO: replace with `Box::new_uninit` once `new_uninit` is stable. let value = Box::new(MaybeUninit::uninit()); - SocketOption::new(self, value, (level as libc::__u32, optname as libc::__u32)) + SocketOption(FdOperation::new(self, value, (level, optname))) } /// Set socket option. @@ -396,7 +446,6 @@ impl AsyncFd { /// The caller must ensure that `T` is the valid type for the option. #[doc = man_link!(setsockopt(2))] #[doc(alias = "setsockopt")] - #[allow(clippy::cast_sign_loss)] // No valid negative level or optnames. pub fn set_socket_option<'fd, T>( &'fd self, level: libc::c_int, @@ -404,136 +453,148 @@ impl AsyncFd { optvalue: T, ) -> SetSocketOption<'fd, T, D> { let value = Box::new(optvalue); - SetSocketOption::new(self, value, (level as libc::__u32, optname as libc::__u32)) + SetSocketOption(FdOperation::new(self, value, (level, optname))) + } + + /// Shuts down the read, write, or both halves of this connection. + #[doc = man_link!(shutdown(2))] + pub const fn shutdown<'fd>(&'fd self, how: std::net::Shutdown) -> Shutdown<'fd, D> { + Shutdown(FdOperation::new(self, (), how)) } } -/// [`Future`] to create a new [`socket`] asynchronously. -/// -/// If you're looking for a socket type, there is none, see [`AsyncFd`]. +#[derive(Copy, Clone, Debug)] +pub(crate) enum SendCall { + Normal, + ZeroCopy, +} + +fd_operation! { + /// [`Future`] behind [`AsyncFd::connect`]. + pub struct Connect(sys::net::ConnectOp) -> io::Result<()>; + + /// [`Future`] behind [`AsyncFd::recv`]. + pub struct Recv(sys::net::RecvOp) -> io::Result; + + /// [`Future`] behind [`AsyncFd::recv_vectored`]. + pub struct RecvVectored; const N: usize>(sys::net::RecvVectoredOp) -> io::Result<(B, libc::c_int)>; + + /// [`Future`] behind [`AsyncFd::recv_from`]. + pub struct RecvFrom(sys::net::RecvFromOp) -> io::Result<(B, A, libc::c_int)>; + + /// [`Future`] behind [`AsyncFd::recv_from_vectored`]. + pub struct RecvFromVectored, A: SocketAddress; const N: usize>(sys::net::RecvFromVectoredOp) -> io::Result<(B, A, libc::c_int)>; + + /// [`Future`] behind [`AsyncFd::send`] and [`AsyncFd::send_zc`]. + pub struct Send(sys::net::SendOp) -> io::Result, + impl Extract -> io::Result<(B, usize)>; + + /// [`Future`] behind [`AsyncFd::send_to`] and [`AsyncFd::send_to_zc`]. + pub struct SendTo(sys::net::SendToOp) -> io::Result, + impl Extract -> io::Result<(B, usize)>; + + /// [`Future`] behind [`AsyncFd::send_vectored`], + /// [`AsyncFd::send_vectored_zc`], [`AsyncFd::send_to_vectored`], + /// [`AsyncFd::send_to_vectored_zc`]. + pub struct SendMsg, A: SocketAddress; const N: usize>(sys::net::SendMsgOp) -> io::Result, + impl Extract -> io::Result<(B, usize)>; + + /// [`Future`] behind [`AsyncFd::accept`]. + pub struct Accept(sys::net::AcceptOp) -> io::Result<(AsyncFd, A)>; + + /// [`Future`] behind [`AsyncFd::socket_option`]. + pub struct SocketOption(sys::net::SocketOptionOp) -> io::Result; + + /// [`Future`] behind [`AsyncFd::set_socket_option`]. + pub struct SetSocketOption(sys::net::SetSocketOptionOp) -> io::Result<()>, + impl Extract -> io::Result; + + /// [`Future`] behind [`AsyncFd::shutdown`]. + pub struct Shutdown(sys::net::ShutdownOp) -> io::Result<()>; +} + +fd_iter_operation! { + /// [`AsyncIterator`] behind [`AsyncFd::multishot_recv`]. + pub struct MultishotRecv(sys::net::MultishotRecvOp) -> io::Result; + + /// [`AsyncIterator`] behind [`AsyncFd::multishot_accept`] and [`AsyncFd::multishot_accept4`]. + pub struct MultishotAccept(sys::net::MultishotAcceptOp) -> io::Result>; +} + +/// [`Future`] behind [`AsyncFd::recv_n`]. #[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct Socket { - sq: Option, - state: OpState<(libc::c_int, libc::c_int, libc::c_int, libc::c_int)>, - kind: PhantomData, +pub struct RecvN<'fd, B: BufMut, D: Descriptor = File> { + recv: Recv<'fd, ReadNBuf, D>, + /// Number of bytes we still need to receive to hit our target `N`. + left: usize, } -impl Future for Socket { - type Output = io::Result>; +impl<'fd, B: BufMut, D: Descriptor> Cancel for RecvN<'fd, B, D> { + fn try_cancel(&mut self) -> CancelResult { + self.recv.try_cancel() + } - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let op_index = poll_state!( - Socket, - self.state, - // SAFETY: `poll_state!` will panic if `OpState == Done`, if not - // this unwrap is safe. - self.sq.as_ref().unwrap(), - ctx, - |submission, (domain, r#type, protocol, flags)| unsafe { - submission.socket(domain, r#type, protocol, flags); - D::create_flags(submission); - }, - ); - - // SAFETY: this is only `None` if `OpState == Done`, which would mean - // `poll_state!` above would panic. - let sq = self.sq.as_ref().unwrap(); - match sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, fd)) => Poll::Ready(Ok(unsafe { - // SAFETY: the socket operation ensures that `fd` is valid. - // SAFETY: unwrapping `sq` is safe as used it above. - AsyncFd::from_raw(fd, self.sq.take().unwrap()) - })), - Err(err) => Poll::Ready(Err(err)), + fn cancel(&mut self) -> CancelOperation { + self.recv.cancel() + } +} + +impl<'fd, B: BufMut, D: Descriptor> Future for RecvN<'fd, B, D> { + type Output = io::Result; + + fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { + // SAFETY: not moving the `Recv` future. + let this = unsafe { Pin::into_inner_unchecked(self) }; + let mut recv = unsafe { Pin::new_unchecked(&mut this.recv) }; + match recv.as_mut().poll(ctx) { + Poll::Ready(Ok(buf)) => { + if buf.last_read == 0 { + return Poll::Ready(Err(io::ErrorKind::UnexpectedEof.into())); + } + + if buf.last_read >= this.left { + // Received the required amount of bytes. + return Poll::Ready(Ok(buf.buf)); } + + this.left -= buf.last_read; + + recv.set(recv.0.fd().recv(buf, 0)); + unsafe { Pin::new_unchecked(this) }.poll(ctx) } + Poll::Ready(Err(err)) => Poll::Ready(Err(err)), Poll::Pending => Poll::Pending, } } } -// Connect. -op_future! { - fn AsyncFd::connect -> (), - struct Connect<'fd, A: SocketAddress> { - /// Address needs to stay alive for as long as the kernel is connecting. - address: Box, - }, - setup_state: _unused: (), - setup: |submission, fd, (address,), ()| unsafe { - let (ptr, len) = SocketAddress::as_ptr(&**address); - submission.connect(fd.fd(), ptr, len); - }, - map_result: |result| Ok(debug_assert!(result == 0)), - extract: |this, (address,), res| -> Box { - debug_assert!(res == 0); - Ok(address) - }, -} - -// Send. -op_future! { - fn AsyncFd::send -> usize, - struct Send<'fd, B: Buf> { - /// Buffer to read from, needs to stay in memory so the kernel can - /// access it safely. - buf: B, - }, - drop_using: Box, - setup_state: flags: (u8, libc::c_int), - setup: |submission, fd, (buf,), (op, flags)| unsafe { - let (ptr, len) = buf.parts(); - submission.send(op, fd.fd(), ptr, len, flags); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, - extract: |this, (buf,), n| -> (B, usize) { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok((buf, n as usize)) - }, -} - /// [`Future`] behind [`AsyncFd::send_all`]. #[derive(Debug)] -pub struct SendAll<'fd, B, D: Descriptor = File> { +pub struct SendAll<'fd, B: Buf, D: Descriptor = File> { send: Extractor, D>>, + send_op: SendCall, + flags: libc::c_int, } impl<'fd, B: Buf, D: Descriptor> SendAll<'fd, B, D> { - const fn new(fd: &'fd AsyncFd, buf: B) -> SendAll<'fd, B, D> { - let buf = SkipBuf { buf, skip: 0 }; - SendAll { - // TODO: once `Extract` is a constant trait use that. - send: Extractor { - fut: fd.send(buf, 0), - }, - } - } - - /// Poll implementation used by the [`Future`] implement for the naked type - /// and the type wrapper in an [`Extractor`]. - fn inner_poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { - // SAFETY: not moving `Future`. - let this = unsafe { Pin::into_inner_unchecked(self) }; - let mut send = unsafe { Pin::new_unchecked(&mut this.send) }; - match send.as_mut().poll(ctx) { + fn poll_inner(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { + match Pin::new(&mut self.send).poll(ctx) { Poll::Ready(Ok((_, 0))) => Poll::Ready(Err(io::ErrorKind::WriteZero.into())), Poll::Ready(Ok((mut buf, n))) => { buf.skip += n as u32; if let (_, 0) = unsafe { buf.parts() } { - // Written everything. + // Send everything. return Poll::Ready(Ok(buf.buf)); } - send.set(send.fut.fd.send(buf, 0).extract()); - unsafe { Pin::new_unchecked(this) }.inner_poll(ctx) + // Send some more. + self.send = match self.send_op { + SendCall::Normal => self.send.fut.0.fd().send(buf, self.flags), + SendCall::ZeroCopy => self.send.fut.0.fd().send_zc(buf, self.flags), + } + .extract(); + self.poll_inner(ctx) } Poll::Ready(Err(err)) => Poll::Ready(Err(err)), Poll::Pending => Poll::Pending, @@ -541,12 +602,12 @@ impl<'fd, B: Buf, D: Descriptor> SendAll<'fd, B, D> { } } -impl<'fd, B, D: Descriptor> Cancel for SendAll<'fd, B, D> { +impl<'fd, B: Buf, D: Descriptor> Cancel for SendAll<'fd, B, D> { fn try_cancel(&mut self) -> CancelResult { self.send.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.send.cancel() } } @@ -555,7 +616,7 @@ impl<'fd, B: Buf, D: Descriptor> Future for SendAll<'fd, B, D> { type Output = io::Result<()>; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - self.inner_poll(ctx).map_ok(|_| ()) + self.poll_inner(ctx).map_ok(|_| ()) } } @@ -564,99 +625,70 @@ impl<'fd, B: Buf, D: Descriptor> Extract for SendAll<'fd, B, D> {} impl<'fd, B: Buf, D: Descriptor> Future for Extractor> { type Output = io::Result; - fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - // SAFETY: not moving `fut`. - unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.inner_poll(ctx) + fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { + Pin::new(&mut self.fut).poll_inner(ctx) } } -// SendTo. -op_future! { - fn AsyncFd::sendto -> usize, - struct SendTo<'fd, B: Buf, A: SocketAddress> { - /// Buffer to read from, needs to stay in memory so the kernel can - /// access it safely. - buf: B, - /// Address to send to. - address: A, - }, - drop_using: Box, - setup_state: flags: (u8, libc::c_int), - setup: |submission, fd, (buf, address), (op, flags)| unsafe { - let (buf, buf_len) = buf.parts(); - let (addr, addr_len) = SocketAddress::as_ptr(address); - submission.sendto(op, fd.fd(), buf, buf_len, addr, addr_len, flags); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, - extract: |this, (buf, _), n| -> (B, usize) { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok((buf, n as usize)) - }, +/// [`Future`] behind [`AsyncFd::recv_n_vectored`]. +#[derive(Debug)] +pub struct RecvNVectored<'fd, B: BufMutSlice, const N: usize, D: Descriptor = File> { + recv: RecvVectored<'fd, ReadNBuf, N, D>, + /// Number of bytes we still need to receive to hit our target `N`. + left: usize, } -// SendMsg. -op_future! { - fn AsyncFd::send_vectored -> usize, - struct SendMsg<'fd, B: BufSlice, A: SocketAddress; const N: usize> { - /// Buffer to read from, needs to stay in memory so the kernel can - /// access it safely. - bufs: B, - /// Address to send to. - address: A, - /// NOTE: we only need `msg` and `iovec` in the submission, we don't - /// have to keep around during the operation. Because of this we don't - /// heap allocate it like we for other operations. This leaves a small - /// duration between the submission of the entry and the submission - /// being read by the kernel in which this future could be dropped and - /// the kernel will read memory we don't own. However because we wake - /// the kernel after submitting the timeout entry it's not really worth - /// to heap allocation. - msg: libc::msghdr, - iovecs: [libc::iovec; N], - }, - drop_using: Box, - /// `msg` and `iovecs` can't move until the kernel has read the submission. - impl !Unpin, - setup_state: flags: (u8, libc::c_int), - setup: |submission, fd, (_, address, msg, iovecs), (op, flags)| unsafe { - msg.msg_iov = iovecs.as_mut_ptr(); - msg.msg_iovlen = N; - let (addr, addr_len) = SocketAddress::as_ptr(address); - msg.msg_name = addr.cast_mut().cast(); - msg.msg_namelen = addr_len; - submission.sendmsg(op, fd.fd(), &*msg, flags); - }, - map_result: |n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok(n as usize) - }, - extract: |this, (buf, _, _, _), n| -> (B, usize) { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - Ok((buf, n as usize)) - }, +impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> Cancel for RecvNVectored<'fd, B, N, D> { + fn try_cancel(&mut self) -> CancelResult { + self.recv.try_cancel() + } + + fn cancel(&mut self) -> CancelOperation { + self.recv.cancel() + } +} + +impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> Future for RecvNVectored<'fd, B, N, D> { + type Output = io::Result; + + fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { + // SAFETY: not moving `Future`. + let this = unsafe { Pin::into_inner_unchecked(self) }; + let mut recv = unsafe { Pin::new_unchecked(&mut this.recv) }; + match recv.as_mut().poll(ctx) { + Poll::Ready(Ok((bufs, _))) => { + if bufs.last_read == 0 { + return Poll::Ready(Err(io::ErrorKind::UnexpectedEof.into())); + } + + if bufs.last_read >= this.left { + // Read the required amount of bytes. + return Poll::Ready(Ok(bufs.buf)); + } + + this.left -= bufs.last_read; + + recv.set(recv.0.fd().recv_vectored(bufs, 0)); + unsafe { Pin::new_unchecked(this) }.poll(ctx) + } + Poll::Ready(Err(err)) => Poll::Ready(Err(err)), + Poll::Pending => Poll::Pending, + } + } } -/// [`Future`] behind [`AsyncFd::send_all_vectored`]. +/// [`Future`] behind [`AsyncFd::send_all_vectored`] and [`AsyncFd::send_all_vectored_zc`]. #[derive(Debug)] -pub struct SendAllVectored<'fd, B, const N: usize, D: Descriptor = File> { +pub struct SendAllVectored<'fd, B: BufSlice, const N: usize, D: Descriptor = File> { send: Extractor>, skip: u64, + send_op: SendCall, } impl<'fd, B: BufSlice, const N: usize, D: Descriptor> SendAllVectored<'fd, B, N, D> { - fn new(fd: &'fd AsyncFd, bufs: B) -> SendAllVectored<'fd, B, N, D> { - SendAllVectored { - send: fd.send_vectored(bufs, 0).extract(), - skip: 0, - } - } - /// Poll implementation used by the [`Future`] implement for the naked type /// and the type wrapper in an [`Extractor`]. - fn inner_poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { + fn poll_inner(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { // SAFETY: not moving `Future`. let this = unsafe { Pin::into_inner_unchecked(self) }; let mut send = unsafe { Pin::new_unchecked(&mut this.send) }; @@ -668,28 +700,33 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> SendAllVectored<'fd, B, let mut iovecs = unsafe { bufs.as_iovecs() }; let mut skip = this.skip; for iovec in &mut iovecs { - if iovec.iov_len as u64 <= skip { + if iovec.len() as u64 <= skip { // Skip entire buf. - skip -= iovec.iov_len as u64; - iovec.iov_len = 0; + skip -= iovec.len() as u64; + // SAFETY: setting it to zero is always valid. + unsafe { iovec.set_len(0) }; } else { - iovec.iov_len -= skip as usize; + // SAFETY: checked above that the length > skip. + unsafe { iovec.set_len(skip as usize) }; break; } } - if iovecs[N - 1].iov_len == 0 { - // Written everything. + if iovecs[N - 1].len() == 0 { + // Send everything. return Poll::Ready(Ok(bufs)); } - // SAFETY: zeroed `msghdr` is valid. - let msg = unsafe { mem::zeroed() }; - let op = libc::IORING_OP_SENDMSG as u8; + let resources = Box::new((MsgHeader::empty(), iovecs, NoAddress)); send.set( - SendMsg::new(send.fut.fd, bufs, NoAddress, msg, iovecs, (op, 0)).extract(), + SendMsg(FdOperation::new( + send.fut.0.fd(), + (bufs, resources), + (this.send_op, 0), + )) + .extract(), ); - unsafe { Pin::new_unchecked(this) }.inner_poll(ctx) + unsafe { Pin::new_unchecked(this) }.poll_inner(ctx) } Poll::Ready(Err(err)) => Poll::Ready(Err(err)), Poll::Pending => Poll::Pending, @@ -697,12 +734,12 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> SendAllVectored<'fd, B, } } -impl<'fd, B, const N: usize, D: Descriptor> Cancel for SendAllVectored<'fd, B, N, D> { +impl<'fd, B: BufSlice, const N: usize, D: Descriptor> Cancel for SendAllVectored<'fd, B, N, D> { fn try_cancel(&mut self) -> CancelResult { self.send.try_cancel() } - fn cancel(&mut self) -> CancelOp { + fn cancel(&mut self) -> CancelOperation { self.send.cancel() } } @@ -711,7 +748,7 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> Future for SendAllVecto type Output = io::Result<()>; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - self.inner_poll(ctx).map_ok(|_| ()) + self.poll_inner(ctx).map_ok(|_| ()) } } @@ -723,566 +760,242 @@ impl<'fd, B: BufSlice, const N: usize, D: Descriptor> Future type Output = io::Result; fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.inner_poll(ctx) - } -} - -// Recv. -op_future! { - fn AsyncFd::recv -> B, - struct Recv<'fd, B: BufMut> { - /// Buffer to write into, needs to stay in memory so the kernel can - /// access it safely. - buf: B, - }, - drop_using: Box, - setup_state: flags: libc::c_int, - setup: |submission, fd, (buf,), flags| unsafe { - let (ptr, len) = buf.parts_mut(); - submission.recv(fd.fd(), ptr, len, flags); - if let Some(buf_group) = buf.buffer_group() { - submission.set_buffer_select(buf_group.0); - } - }, - map_result: |this, (mut buf,), buf_idx, n| { - // SAFETY: the kernel initialised the bytes for us as part of the read - // call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { buf.buffer_init(BufIdx(buf_idx), n as u32) }; - Ok(buf) - }, -} - -// MultishotRecv. -op_async_iter! { - fn AsyncFd::multishot_recv -> ReadBuf, - struct MultishotRecv<'fd> { - /// Buffer pool used in the receive operation. - buf_pool: ReadBufPool, - }, - setup_state: flags: libc::c_int, - setup: |submission, this, flags| unsafe { - submission.multishot_recv(this.fd.fd(), flags, this.buf_pool.group_id().0); - }, - map_result: |this, buf_idx, n| { - if n == 0 { - // Peer closed it's writing half. - this.state = crate::op::OpState::Done; - } - // SAFETY: the kernel initialised the buffers for us as part of the read - // call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { this.buf_pool.new_buffer(BufIdx(buf_idx), n as u32) } - }, -} - -/// [`Future`] behind [`AsyncFd::recv_n`]. -#[derive(Debug)] -pub struct RecvN<'fd, B, D: Descriptor = File> { - recv: Recv<'fd, ReadNBuf, D>, - /// Number of bytes we still need to receive to hit our target `N`. - left: usize, -} - -impl<'fd, B: BufMut, D: Descriptor> RecvN<'fd, B, D> { - const fn new(fd: &'fd AsyncFd, buf: B, n: usize) -> RecvN<'fd, B, D> { - let buf = ReadNBuf { buf, last_read: 0 }; - RecvN { - recv: fd.recv(buf, 0), - left: n, - } + unsafe { Pin::map_unchecked_mut(self, |s| &mut s.fut) }.poll_inner(ctx) } } -impl<'fd, B, D: Descriptor> Cancel for RecvN<'fd, B, D> { - fn try_cancel(&mut self) -> CancelResult { - self.recv.try_cancel() - } - - fn cancel(&mut self) -> CancelOp { - self.recv.cancel() +/// Trait that defines the behaviour of socket addresses. +/// +/// Unix uses different address types for different sockets, to support +/// all of them A10 uses this trait. +pub trait SocketAddress: private::SocketAddress + Sized {} + +mod private { + use std::mem::MaybeUninit; + + pub trait SocketAddress { + type Storage: Sized; + + /// Returns itself as storage. + fn into_storage(self) -> Self::Storage; + + /// Returns a raw pointer and length to the storage. + /// + /// # Safety + /// + /// The pointer must be valid to read up to length bytes from. + /// + /// The implementation must ensure that the pointer is valid, i.e. not null + /// and pointing to memory owned by the address. Furthermore it must ensure + /// that the returned length is, in combination with the pointer, valid. In + /// other words the memory the pointer and length are pointing to must be a + /// valid memory address and owned by the address. + unsafe fn as_ptr(storage: &Self::Storage) -> (*const libc::sockaddr, libc::socklen_t); + + /// Returns a mutable raw pointer and length to `storage`. + /// + /// # Safety + /// + /// Only initialised bytes may be written to the pointer returned. + unsafe fn as_mut_ptr( + storage: &mut MaybeUninit, + ) -> (*mut libc::sockaddr, libc::socklen_t); + + /// Initialise the address from `storage`, to which at least `length` + /// bytes have been written (by the kernel). + /// + /// # Safety + /// + /// Caller must ensure that at least `length` bytes have been written to + /// `address`. + unsafe fn init(storage: MaybeUninit, length: libc::socklen_t) -> Self; } } -impl<'fd, B: BufMut, D: Descriptor> Future for RecvN<'fd, B, D> { - type Output = io::Result; - - fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - // SAFETY: not moving the `Recv` future. - let this = unsafe { Pin::into_inner_unchecked(self) }; - let mut recv = unsafe { Pin::new_unchecked(&mut this.recv) }; - match recv.as_mut().poll(ctx) { - Poll::Ready(Ok(buf)) => { - if buf.last_read == 0 { - return Poll::Ready(Err(io::ErrorKind::UnexpectedEof.into())); - } - - if buf.last_read >= this.left { - // Received the required amount of bytes. - return Poll::Ready(Ok(buf.buf)); +impl SocketAddress for SocketAddr {} + +impl private::SocketAddress for SocketAddr { + type Storage = libc::sockaddr_in6; // Fits both v4 and v6. + + fn into_storage(self) -> Self::Storage { + match self { + SocketAddr::V4(addr) => { + // SAFETY: all zeroes is valid for `sockaddr_in6`. + let mut storage = unsafe { mem::zeroed::() }; + // SAFETY: `sockaddr_in` fits in `sockaddr_in6`. + unsafe { + ptr::from_mut(&mut storage) + .cast::() + .write(addr.into_storage()); } - - this.left -= buf.last_read; - - recv.set(recv.fd.recv(buf, 0)); - unsafe { Pin::new_unchecked(this) }.poll(ctx) + storage } - Poll::Ready(Err(err)) => Poll::Ready(Err(err)), - Poll::Pending => Poll::Pending, - } - } -} - -// RecvVectored. -op_future! { - fn AsyncFd::recv_vectored -> (B, libc::c_int), - struct RecvVectored<'fd, B: BufMutSlice; const N: usize> { - /// Buffers to read from, needs to stay in memory so the kernel can - /// access it safely. - bufs: B, - /// The kernel will write to `msghdr`, so it needs to stay in memory so - /// the kernel can access it safely. - msg: Box, - /// NOTE: we only need `iovec` in the submission, we don't have to keep - /// around during the operation. Because of this we don't heap allocate - /// it like we for other operations. This leaves a small duration - /// between the submission of the entry and the submission being read by - /// the kernel in which this future could be dropped and the kernel will - /// read memory we don't own. However because we wake the kernel after - /// submitting the timeout entry it's not really worth to heap - /// allocation. - iovecs: [libc::iovec; N], - }, - drop_using: Box, - /// `iovecs` can't move until the kernel has read the submission. - impl !Unpin, - setup_state: flags: libc::c_int, - setup: |submission, fd, (_, msg, iovecs), flags| unsafe { - msg.msg_iov = iovecs.as_mut_ptr(); - msg.msg_iovlen = N; - submission.recvmsg(fd.fd(), &**msg, flags); - }, - map_result: |this, (mut bufs, msg, _), n| { - // SAFETY: the kernel initialised the bytes for us as part of the - // recvmsg call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { bufs.set_init(n as usize) }; - Ok((bufs, msg.msg_flags)) - }, -} - -/// [`Future`] behind [`AsyncFd::recv_n_vectored`]. -#[derive(Debug)] -pub struct RecvNVectored<'fd, B, const N: usize, D: Descriptor = File> { - recv: RecvVectored<'fd, ReadNBuf, N, D>, - /// Number of bytes we still need to receive to hit our target `N`. - left: usize, -} - -impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> RecvNVectored<'fd, B, N, D> { - fn new(fd: &'fd AsyncFd, buf: B, n: usize) -> RecvNVectored<'fd, B, N, D> { - let bufs = ReadNBuf { buf, last_read: 0 }; - RecvNVectored { - recv: fd.recv_vectored(bufs, 0), - left: n, + SocketAddr::V6(addr) => addr.into_storage(), } } -} -impl<'fd, B, const N: usize, D: Descriptor> Cancel for RecvNVectored<'fd, B, N, D> { - fn try_cancel(&mut self) -> CancelResult { - self.recv.try_cancel() + unsafe fn as_ptr(storage: &Self::Storage) -> (*const libc::sockaddr, libc::socklen_t) { + let ptr = ptr::from_ref(storage).cast(); + let size = if ::from(storage.sin6_family) == libc::AF_INET { + size_of::() + } else { + size_of::() + }; + (ptr, size as libc::socklen_t) } - fn cancel(&mut self) -> CancelOp { - self.recv.cancel() + unsafe fn as_mut_ptr( + storage: &mut MaybeUninit, + ) -> (*mut libc::sockaddr, libc::socklen_t) { + (storage.as_mut_ptr().cast(), size_of::() as _) } -} - -impl<'fd, B: BufMutSlice, const N: usize, D: Descriptor> Future for RecvNVectored<'fd, B, N, D> { - type Output = io::Result; - - fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - // SAFETY: not moving `Future`. - let this = unsafe { Pin::into_inner_unchecked(self) }; - let mut recv = unsafe { Pin::new_unchecked(&mut this.recv) }; - match recv.as_mut().poll(ctx) { - Poll::Ready(Ok((bufs, _))) => { - if bufs.last_read == 0 { - return Poll::Ready(Err(io::ErrorKind::UnexpectedEof.into())); - } - - if bufs.last_read >= this.left { - // Read the required amount of bytes. - return Poll::Ready(Ok(bufs.buf)); - } - this.left -= bufs.last_read; - - recv.set(recv.fd.recv_vectored(bufs, 0)); - unsafe { Pin::new_unchecked(this) }.poll(ctx) - } - Poll::Ready(Err(err)) => Poll::Ready(Err(err)), - Poll::Pending => Poll::Pending, + unsafe fn init(storage: MaybeUninit, length: libc::socklen_t) -> Self { + debug_assert!(length as usize >= size_of::()); + let family = unsafe { ptr::addr_of!((*storage.as_ptr()).sin6_family).read() }; + if family == libc::AF_INET as libc::sa_family_t { + let storage = storage.as_ptr().cast::().read(); + SocketAddrV4::init(MaybeUninit::new(storage), length).into() + } else { + SocketAddrV6::init(storage, length).into() } } } -// RecvFrom. -op_future! { - fn AsyncFd::recvfrom -> (B, A, libc::c_int), - struct RecvFrom<'fd, B: BufMut, A: SocketAddress> { - /// Buffer to read from, needs to stay in memory so the kernel can - /// access it safely. - buf: B, - /// The kernel will write to `msghdr` and the address, so both need to - /// stay in memory so the kernel can access it safely. - msg: Box<(libc::msghdr, MaybeUninit)>, - /// NOTE: we only need `iovec` in the submission, we don't have to keep - /// around during the operation. Because of this we don't heap allocate - /// it like we for other operations. This leaves a small duration - /// between the submission of the entry and the submission being read by - /// the kernel in which this future could be dropped and the kernel will - /// read memory we don't own. However because we wake the kernel after - /// submitting the timeout entry it's not really worth to heap - /// allocation. - iovec: libc::iovec, - }, - drop_using: Box, - /// `iovec` can't move until the kernel has read the submission. - impl !Unpin, - setup_state: flags: libc::c_int, - setup: |submission, fd, (buf, msg, iovec), flags| unsafe { - let address = &mut msg.1; - let msg = &mut msg.0; - msg.msg_iov = &mut *iovec; - msg.msg_iovlen = 1; - let (addr, addr_len) = SocketAddress::as_mut_ptr(address); - msg.msg_name = addr.cast(); - msg.msg_namelen = addr_len; - submission.recvmsg(fd.fd(), &*msg, flags); - if let Some(buf_group) = buf.buffer_group() { - submission.set_buffer_select(buf_group.0); - } - }, - map_result: |this, (mut buf, msg, _), buf_idx, n| { - // SAFETY: the kernel initialised the bytes for us as part of the - // recvmsg call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { buf.buffer_init(BufIdx(buf_idx), n as u32) }; - // SAFETY: kernel initialised the address for us. - let address = unsafe { SocketAddress::init(msg.1, msg.0.msg_namelen) }; - Ok((buf, address, msg.0.msg_flags)) - }, -} - -// RecvFromVectored. -op_future! { - fn AsyncFd::recvfrom_vectored -> (B, A, libc::c_int), - struct RecvFromVectored<'fd, B: BufMutSlice, A: SocketAddress; const N: usize> { - /// Buffers to read from, needs to stay in memory so the kernel can - /// access it safely. - bufs: B, - /// The kernel will write to `msghdr` and the address, so both need to - /// stay in memory so the kernel can access it safely. - msg: Box<(libc::msghdr, MaybeUninit)>, - /// NOTE: we only need `iovec` in the submission, we don't have to keep - /// around during the operation. Because of this we don't heap allocate - /// it like we for other operations. This leaves a small duration - /// between the submission of the entry and the submission being read by - /// the kernel in which this future could be dropped and the kernel will - /// read memory we don't own. However because we wake the kernel after - /// submitting the timeout entry it's not really worth to heap - /// allocation. - iovecs: [libc::iovec; N], - }, - drop_using: Box, - /// `iovecs` can't move until the kernel has read the submission. - impl !Unpin, - setup_state: flags: libc::c_int, - setup: |submission, fd, (_, msg, iovecs), flags| unsafe { - let address = &mut msg.1; - let msg = &mut msg.0; - msg.msg_iov = iovecs.as_mut_ptr(); - msg.msg_iovlen = N; - let (addr, addr_len) = SocketAddress::as_mut_ptr(address); - msg.msg_name = addr.cast(); - msg.msg_namelen = addr_len; - submission.recvmsg(fd.fd(), &*msg, flags); - }, - map_result: |this, (mut bufs, msg, _), n| { - // SAFETY: the kernel initialised the buffers for us as part of the - // recvmsg call. - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - unsafe { bufs.set_init(n as usize) }; - // SAFETY: kernel initialised the address for us. - let address = unsafe { SocketAddress::init(msg.1, msg.0.msg_namelen) }; - Ok((bufs, address, msg.0.msg_flags)) - }, -} - -// Shutdown. -op_future! { - fn AsyncFd::shutdown -> (), - struct Shutdown<'fd> { - // Doesn't need any fields. - }, - setup_state: flags: libc::c_int, - setup: |submission, fd, (), how| unsafe { - submission.shutdown(fd.fd(), how); - }, - map_result: |n| Ok(debug_assert!(n == 0)), -} - -// Accept. -op_future! { - fn AsyncFd::accept -> (AsyncFd, A), - struct Accept<'fd, A: SocketAddress> { - /// Address for the accepted connection, needs to stay in memory so the - /// kernel can access it safely. - address: Box<(MaybeUninit, libc::socklen_t)>, - }, - setup_state: flags: libc::c_int, - setup: |submission, fd, (address,), flags| unsafe { - let (ptr, len) = SocketAddress::as_mut_ptr(&mut address.0); - address.1 = len; - submission.accept(fd.fd(), ptr, &mut address.1, flags); - submission.set_async(); - D::create_flags(submission); - }, - map_result: |this, (address,), fd| { - let sq = this.fd.sq.clone(); - // SAFETY: the accept operation ensures that `fd` is valid. - let stream = unsafe { AsyncFd::from_raw(fd, sq) }; - let len = address.1; - // SAFETY: kernel initialised the memory for us. - let address = unsafe { SocketAddress::init(address.0, len) }; - Ok((stream, address)) - }, -} - -// MultishotAccept. -op_async_iter! { - fn AsyncFd::multishot_accept -> AsyncFd, - struct MultishotAccept<'fd> { - // No additional state. - }, - setup_state: flags: libc::c_int, - setup: |submission, this, flags| unsafe { - submission.multishot_accept(this.fd.fd(), flags); - submission.set_async(); - D::create_flags(submission); - }, - map_result: |this, _flags, fd| { - let sq = this.fd.sq.clone(); - // SAFETY: the accept operation ensures that `fd` is valid. - unsafe { AsyncFd::from_raw(fd, sq) } - }, -} - -// SocketOption. -op_future! { - fn AsyncFd::socket_option -> T, - struct SocketOption<'fd, T> { - /// Value for the socket option, needs to stay in memory so the kernel - /// can access it safely. - value: Box>, - }, - setup_state: flags: (libc::__u32, libc::__u32), - setup: |submission, fd, (value,), (level, optname)| unsafe { - let optvalue = ptr::addr_of_mut!(**value).cast(); - let optlen = size_of::() as u32; - submission.uring_command(libc::SOCKET_URING_OP_GETSOCKOPT, fd.fd(), level, optname, optvalue, optlen); - }, - map_result: |this, (value,), optlen| { - debug_assert!(optlen == (size_of::() as i32)); - // SAFETY: the kernel initialised the value for us as part of the - // getsockopt call. - Ok(unsafe { MaybeUninit::assume_init(*value) }) - }, -} - -// SetSocketOption. -op_future! { - fn AsyncFd::set_socket_option -> (), - struct SetSocketOption<'fd, T> { - /// Value for the socket option, needs to stay in memory so the kernel - /// can access it safely. - value: Box, - }, - setup_state: flags: (libc::__u32, libc::__u32), - setup: |submission, fd, (value,), (level, optname)| unsafe { - let optvalue = ptr::addr_of_mut!(**value).cast(); - let optlen = size_of::() as u32; - submission.uring_command(libc::SOCKET_URING_OP_SETSOCKOPT, fd.fd(), level, optname, optvalue, optlen); - }, - map_result: |result| Ok(debug_assert!(result == 0)), - extract: |this, (value,), res| -> Box { - debug_assert!(res == 0); - Ok(value) - }, -} - -/// Trait that defines the behaviour of socket addresses. -/// -/// Linux (Unix) uses different address types for different sockets, to support -/// all of them A10 uses a trait to define the behaviour. -/// -/// Current implementations include -/// * IPv4 addresses: [`libc::sockaddr_in`], -/// * IPv6 addresses: [`libc::sockaddr_in6`], -/// * Unix addresses: [`libc::sockaddr_un`], -/// * Storage of any address [`libc::sockaddr_storage`] kind. -/// -/// For the last two types we need to keep track of the length of the address, -/// for which it uses a tuple `(addr, `[`libc::socklen_t`]`)`. -pub trait SocketAddress: Sized { - /// Returns itself as raw pointer and length. - /// - /// # Safety - /// - /// The pointer must be valid to read up to length bytes from. - /// - /// The implementation must ensure that the pointer is valid, i.e. not null - /// and pointing to memory owned by the address. Furthermore it must ensure - /// that the returned length is, in combination with the pointer, valid. In - /// other words the memory the pointer and length are pointing to must be a - /// valid memory address and owned by the address. - /// - /// Note that the above requirements are only required for implementations - /// outside of A10. **This trait is unfit for external use!** - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t); - - /// Returns `address` as an adress storage and it's length. - /// - /// # Safety - /// - /// Only initialised bytes may be written to the pointer returned. - /// - /// The implementation must ensure that the pointer is valid, i.e. not null - /// and pointing to memory owned by the address. Furthermore it must ensure - /// that the returned length is, in combination with the pointer, valid. In - /// other words the memory the pointer and length are pointing to must be a - /// valid memory address and owned by the address. - /// - /// Note that the above requirements are only required for implementations - /// outside of A10. **This trait is unfit for external use!** - unsafe fn as_mut_ptr(address: &mut MaybeUninit) - -> (*mut libc::sockaddr, libc::socklen_t); - - /// Initialise `address` to which at least `length` bytes have been written - /// (by the kernel). - /// - /// # Safety - /// - /// Caller must ensure that at least `length` bytes have been written to - /// `address`. - unsafe fn init(address: MaybeUninit, length: libc::socklen_t) -> Self; -} - -/// Socket address. -impl SocketAddress for (libc::sockaddr, libc::socklen_t) { - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t) { - (ptr::addr_of!(self.0).cast(), self.1) - } +impl SocketAddress for SocketAddrV4 {} - unsafe fn as_mut_ptr(this: &mut MaybeUninit) -> (*mut libc::sockaddr, libc::socklen_t) { - ( - ptr::addr_of_mut!((*this.as_mut_ptr()).0).cast(), - size_of::() as _, - ) - } +impl private::SocketAddress for SocketAddrV4 { + type Storage = libc::sockaddr_in; - unsafe fn init(this: MaybeUninit, length: libc::socklen_t) -> Self { - debug_assert!(length >= size_of::() as _); - // SAFETY: caller must initialise the address. - let mut this = this.assume_init(); - this.1 = length; - this + fn into_storage(self) -> Self::Storage { + libc::sockaddr_in { + sin_family: libc::AF_INET as libc::sa_family_t, + sin_port: self.port().to_be(), + sin_addr: libc::in_addr { + s_addr: u32::from_ne_bytes(self.ip().octets()), + }, + sin_zero: [0; 8], + } } -} -/// Any kind of address. -impl SocketAddress for (libc::sockaddr_storage, libc::socklen_t) { - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t) { - (ptr::addr_of!(self.0).cast(), self.1) + unsafe fn as_ptr(storage: &Self::Storage) -> (*const libc::sockaddr, libc::socklen_t) { + let ptr = ptr::from_ref(storage).cast(); + (ptr, size_of::() as _) } - unsafe fn as_mut_ptr(this: &mut MaybeUninit) -> (*mut libc::sockaddr, libc::socklen_t) { - ( - ptr::addr_of_mut!((*this.as_mut_ptr()).0).cast(), - size_of::() as _, - ) + unsafe fn as_mut_ptr( + storage: &mut MaybeUninit, + ) -> (*mut libc::sockaddr, libc::socklen_t) { + (storage.as_mut_ptr().cast(), size_of::() as _) } - unsafe fn init(this: MaybeUninit, length: libc::socklen_t) -> Self { - debug_assert!(length >= size_of::() as _); + unsafe fn init(storage: MaybeUninit, length: libc::socklen_t) -> Self { + debug_assert!(length == size_of::() as _); // SAFETY: caller must initialise the address. - let mut this = this.assume_init(); - this.1 = length; - this + let storage = unsafe { storage.assume_init() }; + debug_assert!(::from(storage.sin_family) == libc::AF_INET); + let ip = Ipv4Addr::from(storage.sin_addr.s_addr.to_ne_bytes()); + let port = u16::from_be(storage.sin_port); + SocketAddrV4::new(ip, port) } } -/// IPv4 address. -impl SocketAddress for libc::sockaddr_in { - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t) { - ( - (self as *const libc::sockaddr_in).cast(), - size_of::() as _, - ) - } +impl SocketAddress for SocketAddrV6 {} - unsafe fn as_mut_ptr(this: &mut MaybeUninit) -> (*mut libc::sockaddr, libc::socklen_t) { - (this.as_mut_ptr().cast(), size_of::() as _) - } +impl private::SocketAddress for SocketAddrV6 { + type Storage = libc::sockaddr_in6; - unsafe fn init(this: MaybeUninit, length: libc::socklen_t) -> Self { - debug_assert!(length == size_of::() as _); - // SAFETY: caller must initialise the address. - this.assume_init() + fn into_storage(self) -> Self::Storage { + libc::sockaddr_in6 { + sin6_family: libc::AF_INET6 as libc::sa_family_t, + sin6_port: self.port().to_be(), + sin6_flowinfo: self.flowinfo(), + sin6_addr: libc::in6_addr { + s6_addr: self.ip().octets(), + }, + sin6_scope_id: self.scope_id(), + } } -} -/// IPv6 address. -impl SocketAddress for libc::sockaddr_in6 { - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t) { - ( - (self as *const libc::sockaddr_in6).cast(), - size_of::() as _, - ) + unsafe fn as_ptr(storage: &Self::Storage) -> (*const libc::sockaddr, libc::socklen_t) { + let ptr = ptr::from_ref(storage).cast(); + (ptr, size_of::() as _) } - unsafe fn as_mut_ptr(this: &mut MaybeUninit) -> (*mut libc::sockaddr, libc::socklen_t) { - (this.as_mut_ptr().cast(), size_of::() as _) + unsafe fn as_mut_ptr( + storage: &mut MaybeUninit, + ) -> (*mut libc::sockaddr, libc::socklen_t) { + (storage.as_mut_ptr().cast(), size_of::() as _) } - unsafe fn init(this: MaybeUninit, length: libc::socklen_t) -> Self { - debug_assert!(length == size_of::() as _); + unsafe fn init(storage: MaybeUninit, length: libc::socklen_t) -> Self { + debug_assert!(length == size_of::() as _); // SAFETY: caller must initialise the address. - this.assume_init() + let storage = unsafe { storage.assume_init() }; + debug_assert!(::from(storage.sin6_family) == libc::AF_INET6); + let ip = Ipv6Addr::from(storage.sin6_addr.s6_addr); + let port = u16::from_be(storage.sin6_port); + SocketAddrV6::new(ip, port, storage.sin6_flowinfo, storage.sin6_scope_id) } } -/// Unix address. -impl SocketAddress for (libc::sockaddr_un, libc::socklen_t) { - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t) { - (ptr::addr_of!(self.0).cast(), self.1) - } +impl SocketAddress for unix::net::SocketAddr {} - unsafe fn as_mut_ptr(this: &mut MaybeUninit) -> (*mut libc::sockaddr, libc::socklen_t) { - ( - ptr::addr_of_mut!((*this.as_mut_ptr()).0).cast(), - size_of::() as _, - ) - } +impl private::SocketAddress for unix::net::SocketAddr { + type Storage = libc::sockaddr_un; - unsafe fn init(this: MaybeUninit, length: libc::socklen_t) -> Self { - debug_assert!(length >= size_of::() as _); - // SAFETY: caller must initialise the address. - let mut this = this.assume_init(); - this.1 = length; - this + fn into_storage(self) -> Self::Storage { + let mut storage = libc::sockaddr_un { + sun_family: libc::AF_UNIX as libc::sa_family_t, + // SAFETY: all zero is valid for `sockaddr_un`. + ..unsafe { mem::zeroed() } + }; + // SAFETY: casting `[i8]` to `[u8]` is safe. + let path = unsafe { + slice::from_raw_parts_mut::( + storage.sun_path.as_mut_ptr().cast(), + storage.sun_path.len(), + ) + }; + if let Some(pathname) = self.as_pathname() { + let bytes = pathname.as_os_str().as_bytes(); + path[..bytes.len()].copy_from_slice(bytes); + } else if let Some(bytes) = self.as_abstract_name() { + path[1..][..bytes.len()].copy_from_slice(bytes); + } else { + // Unnamed address, we'll leave it all zero. + } + storage + } + + unsafe fn as_ptr(storage: &Self::Storage) -> (*const libc::sockaddr, libc::socklen_t) { + let ptr = ptr::from_ref(storage).cast(); + (ptr, size_of::() as _) + } + + unsafe fn as_mut_ptr( + storage: &mut MaybeUninit, + ) -> (*mut libc::sockaddr, libc::socklen_t) { + (storage.as_mut_ptr().cast(), size_of::() as _) + } + + unsafe fn init(storage: MaybeUninit, length: libc::socklen_t) -> Self { + debug_assert!(length as usize >= size_of::()); + let family = unsafe { ptr::addr_of!((*storage.as_ptr()).sun_family).read() }; + debug_assert!(family == libc::AF_UNIX as libc::sa_family_t); + let path_ptr = ptr::addr_of!((*storage.as_ptr()).sun_path); + let length = length as usize - (storage.as_ptr().addr() - path_ptr.addr()); + // SAFETY: the kernel ensures that at least `length` bytes are + // initialised. + let path = unsafe { slice::from_raw_parts::(path_ptr.cast(), length) }; + if let Some(0) = path.first() { + // NOTE: `from_abstract_name` adds a starting null byte. + unix::net::SocketAddr::from_abstract_name(&path[1..]) + } else { + unix::net::SocketAddr::from_pathname(Path::new(OsStr::from_bytes(path))) + } + // Fallback to an unnamed address. + .unwrap_or_else(|_| unix::net::SocketAddr::from_pathname("").unwrap()) } } @@ -1292,23 +1005,41 @@ impl SocketAddress for (libc::sockaddr_un, libc::socklen_t) { /// /// [`accept`]: AsyncFd::accept /// [`connect`]: AsyncFd::connect -#[derive(Debug)] +#[derive(Copy, Clone, Debug)] pub struct NoAddress; -impl SocketAddress for NoAddress { - unsafe fn as_ptr(&self) -> (*const libc::sockaddr, libc::socklen_t) { +impl SocketAddress for NoAddress {} + +impl private::SocketAddress for NoAddress { + type Storage = Self; + + fn into_storage(self) -> Self::Storage { + NoAddress + } + + unsafe fn as_ptr(_: &Self::Storage) -> (*const libc::sockaddr, libc::socklen_t) { // NOTE: this goes against the requirements of `cast_ptr`. (ptr::null_mut(), 0) } - unsafe fn as_mut_ptr(this: &mut MaybeUninit) -> (*mut libc::sockaddr, libc::socklen_t) { - _ = this; + unsafe fn as_mut_ptr( + _: &mut MaybeUninit, + ) -> (*mut libc::sockaddr, libc::socklen_t) { // NOTE: this goes against the requirements of `as_mut_ptr`. (ptr::null_mut(), 0) } - unsafe fn init(this: MaybeUninit, length: libc::socklen_t) -> Self { + unsafe fn init(_: MaybeUninit, length: libc::socklen_t) -> Self { debug_assert!(length == 0); - this.assume_init() + NoAddress + } +} + +/// Implement [`fmt::Debug`] for [`SocketAddress::Storage`]. +pub(crate) struct AddressStorage(pub(crate) A); + +impl fmt::Debug for AddressStorage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Address").finish() } } diff --git a/src/op.rs b/src/op.rs index 1b583173..3f5a0206 100644 --- a/src/op.rs +++ b/src/op.rs @@ -1,1623 +1,918 @@ -//! Code related to executing an asynchronous operations. +//! Module with [`Operation`] and [`FdOperation`] [`Future`]s. -use std::mem::{replace, ManuallyDrop}; -use std::os::fd::RawFd; +use std::cell::UnsafeCell; +use std::panic::RefUnwindSafe; +use std::pin::Pin; use std::task::{self, Poll}; -use std::{fmt, io, ptr}; - -use crate::cancel::{CancelOp, CancelResult}; -use crate::{libc, Completion, OpIndex, QueueFull, SubmissionQueue}; - -/// State of a queued operation. -#[derive(Debug)] -pub(crate) struct QueuedOperation { - /// Operation kind. - kind: QueuedOperationKind, - /// True if the connected `Future`/`AsyncIterator` is dropped and thus no - /// longer will retrieve the result. - dropped: bool, - /// Boolean used by operations that result in multiple completion events. - /// For example zero copy: one completion to report the result another to - /// indicate the resources are no longer used. - /// For multishot this will be true if no more completion events are coming, - /// for example in case a previous event returned an error. - done: bool, - /// Waker to wake when the operation is done. - waker: Option, +use std::{fmt, io, mem}; + +use crate::cancel::{Cancel, CancelOperation, CancelResult}; +use crate::drop_waker::DropWake; +use crate::fd::{AsyncFd, Descriptor, File}; +use crate::sq::QueueFull; +use crate::{cq, sq, sys, OperationId, QueuedOperation, SubmissionQueue}; + +/// Generic [`Future`] that powers other I/O operation futures. +pub(crate) struct Operation { + sq: SubmissionQueue, + state: State, } -impl QueuedOperation { - /// Create a queued operation. - pub(crate) const fn new() -> QueuedOperation { - QueuedOperation::_new(QueuedOperationKind::Single { result: None }) - } - - /// Create a queued multishot operation. - pub(crate) const fn new_multishot() -> QueuedOperation { - QueuedOperation::_new(QueuedOperationKind::Multishot { - results: Vec::new(), - }) - } - - const fn _new(kind: QueuedOperationKind) -> QueuedOperation { - QueuedOperation { - kind, - done: false, - dropped: false, - waker: None, +impl Operation { + /// Create a new `Operation`. + pub(crate) const fn new( + sq: SubmissionQueue, + resources: O::Resources, + args: O::Args, + ) -> Operation { + Operation { + sq, + state: State::new(resources, args), } } +} - /// Update the operation based on a completion `event`. - pub(crate) fn update(&mut self, event: &Completion) -> bool { - let completion = CompletionResult { - result: event.result(), - flags: event.operation_flags(), - }; - match &mut self.kind { - QueuedOperationKind::Single { result } => { - if event.is_notification() { - // Zero copy completed, we can now mark ourselves as done. - self.done = true; - } else { - let old = replace(result, Some(completion)); - debug_assert!(old.is_none()); - // For zero copy this may be false, in which case we get a - // notification (see above) in a future completion event. - self.done = !event.is_in_progress(); - } - - if self.done { - // NOTE: if `dropped` is true this drops the operations's - // resources (e.g. buffers). - if let Some(waker) = self.waker.take() { - waker.wake(); - } - } - } - QueuedOperationKind::Multishot { results } => { - results.push(completion); - if let Some(waker) = self.waker.take() { - waker.wake(); - } - } - } - self.dropped +impl Operation +where + // TODO: this is silly. + O: Op< + Submission = <::Submissions as sq::Submissions>::Submission, + OperationState = <<::Completions as cq::Completions>::Event as cq::Event>::State, + >, + O::OperationOutput: fmt::Debug, +{ + pub(crate) fn poll(self: Pin<&mut Self>, ctx: &task::Context<'_>) -> Poll> { + // SAFETY: not moving `fd` or `state`. + let Operation { sq, state } = unsafe { self.get_unchecked_mut() }; + state.poll( + ctx, + sq, + O::fill_submission, + O::check_result, + O::map_ok, + ) + } + + pub(crate) fn poll_next(self: Pin<&mut Self>, ctx: &task::Context<'_>) -> Poll>> + where O: Iter, + { + // SAFETY: not moving `fd` or `state`. + let Operation { sq, state } = unsafe { self.get_unchecked_mut() }; + state.poll_next( + ctx, + sq, + O::fill_submission, + O::check_result, + O::map_next, + ) + } + + pub(crate) fn poll_extract(self: Pin<&mut Self>, ctx: &task::Context<'_>) -> Poll> + where O: OpExtract, + { + // SAFETY: not moving `fd` or `state`. + let Operation { sq, state } = unsafe { self.get_unchecked_mut() }; + state.poll( + ctx, + sq, + O::fill_submission, + O::check_result, + O::map_ok_extract, + ) } +} - /// Poll the operation check if it's ready. - /// - /// Returns the `flags` and the `result` (always positive). - /// - /// For multishot operations: if this returns `Poll::Pending` the caller - /// should check `is_done` to determine if the previous result was the last - /// one. - #[allow(clippy::needless_pass_by_ref_mut)] // Match `Future` API. - pub(crate) fn poll(&mut self, ctx: &mut task::Context<'_>) -> Poll> { - match &mut self.kind { - QueuedOperationKind::Single { result } => { - if let (true, Some(result)) = (self.done, result.as_ref()) { - return Poll::Ready(result.as_result()); - } - } - QueuedOperationKind::Multishot { results } => { - if !results.is_empty() { - let completion = results.remove(0); - if completion.result.is_negative() { - // If we get an error the multishot operation is done. - self.done = true; - } - return Poll::Ready(completion.as_result()); - } - } - } - - if !self.done { - // Still in progress. - let waker = ctx.waker(); - if !matches!(&self.waker, Some(w) if w.will_wake(waker)) { - self.waker = Some(waker.clone()); +impl Cancel for Operation { + fn try_cancel(&mut self) -> CancelResult { + if let Some(op_id) = self.state.cancel() { + let result = self.sq.inner.submit_no_completion(|submission| { + sys::cancel::operation(op_id, submission); + }); + match result { + Ok(()) => CancelResult::Canceled, + Err(QueueFull) => CancelResult::QueueFull, } + } else { + CancelResult::NotStarted } - // NOTE: we can get here multishot operations (see note in docs) or if - // the `Future` is used in an invalid way (e.g. poll after completion). - // In either case we don't to set the waker. - Poll::Pending } - /// Poll the operation for a message. - /// - /// Returns the `flags` and the `result` (always positive). - #[allow(clippy::needless_pass_by_ref_mut)] // Match `Future` API. - pub(crate) fn poll_msg(&mut self, ctx: &mut task::Context<'_>) -> Poll<(u16, u32)> { - match &mut self.kind { - QueuedOperationKind::Multishot { results } => { - if !results.is_empty() { - let completion = results.remove(0); - let data = u32::from_ne_bytes(completion.result.to_ne_bytes()); - return Poll::Ready((completion.flags, data)); - } - } - QueuedOperationKind::Single { .. } => { - panic!("QueuedOperation::poll_msg called incorrectly") - } - } - - // Still in progress. - let waker = ctx.waker(); - if !matches!(&self.waker, Some(w) if w.will_wake(waker)) { - self.waker = Some(waker.clone()); - } - Poll::Pending + fn cancel(&mut self) -> CancelOperation { + CancelOperation::new(self.sq.clone(), self.state.cancel()) } +} - /// Returns true if the operation is done and processed. - pub(crate) const fn is_done(&self) -> bool { - self.done - } +/// Only implement `Unpin` if the underlying operation implement `Unpin`. +impl Unpin for Operation {} - /// Returns true if no more completion events are expected, but may still - /// contain results. - pub(crate) fn no_more_events(&self) -> bool { - self.done || - // If a multishot operation returns an error it's stopped. - matches!(&self.kind, QueuedOperationKind::Multishot { results } if results.iter().any(|c|c.result.is_negative())) +impl Operation { + pub(crate) fn fmt_dbg(&self, name: &'static str, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct(name) + .field("sq", &self.sq) + .field("state", &self.state) + .finish() } +} - /// Set the state of the operation as dropped, but still in progress kernel - /// side. This set the waker to `waker` and make `set_result` return `true`. - pub(crate) fn set_dropped(&mut self, waker: Option) { - self.dropped = true; - self.waker = waker; +impl Drop for Operation { + fn drop(&mut self) { + // SAFETY: we're in the `Drop` implementation. + unsafe { self.state.drop(&self.sq) }; } } -/// [`QueuedOperation`] kind. -#[derive(Debug)] -enum QueuedOperationKind { - /// Single result operation. - Single { - /// Result of the operation. - result: Option, - }, - /// Multishot operation, which expects multiple results for the same - /// operation. - Multishot { - /// Results for the operation. - results: Vec, - }, +/// Implementation of a [`Operation`]. +pub(crate) trait Op { + /// Output of the operation. + type Output; + /// Resources used in the operation, e.g. a buffer in a read call. + type Resources: DropWake; + /// Arguments in the system call. + type Args; + /// [`sq::Submission`]. + type Submission; + /// [`cq::Event::State`]. + type OperationState; + /// Output of the operation specific operation. This can differ from + /// `Output`, e.g. for a read this will be the amount bytes read, but the + /// `Output` will be the buffer the bytes are read into. + type OperationOutput; + + /// Fill a submission for the operation. + fn fill_submission( + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut Self::Submission, + ); + + /// Check the result of an operation based on the `QueuedOperation.state` + /// (`Self::OperationState`). + fn check_result( + resources: &mut Self::Resources, + args: &mut Self::Args, + state: &mut Self::OperationState, + ) -> OpResult; + + /// Map the system call output to the future's output. + fn map_ok( + sq: &SubmissionQueue, + resources: Self::Resources, + operation_output: Self::OperationOutput, + ) -> Self::Output; } -/// Completed result of an operation. -#[derive(Copy, Clone, Debug)] -struct CompletionResult { - /// The 16 upper bits of `io_uring_cqe::flags`, e.g. the index of a buffer - /// in a buffer pool. - flags: u16, - /// The result of an operation; negative is a (negative) errno, positive a - /// successful result. The meaning is depended on the operation itself. - result: i32, +/// Extension of [`Op`] to extract the resources used in the operation. To +/// support the [`Extract`] trait. +pub(crate) trait OpExtract: Op { + /// Output of the operation. + type ExtractOutput; + + /// Map the system call output to the future's output. + fn map_ok_extract( + sq: &SubmissionQueue, + resources: Self::Resources, + operation_output: Self::OperationOutput, + ) -> Self::ExtractOutput; } -impl CompletionResult { - fn as_result(self) -> io::Result<(u16, i32)> { - if self.result.is_negative() { - // TODO: handle `-EBUSY` on operations. - // TODO: handle io_uring specific errors here, read CQE - // ERRORS in the manual. - Err(io::Error::from_raw_os_error(-self.result)) - } else { - Ok((self.flags, self.result)) - } - } +/// [`AsyncIterator`] implementation of a [`Operation`]. +pub(crate) trait Iter: Op { + /// Map the system call output to the future's output. + fn map_next( + sq: &SubmissionQueue, + resources: &mut Self::Resources, + operation_output: Self::OperationOutput, + ) -> Self::Output; } -/// Submission event. -/// -/// # Safety -/// -/// It is up to the caller to ensure any data passed to the kernel outlives the -/// operation. -#[repr(transparent)] -pub(crate) struct Submission { - inner: libc::io_uring_sqe, +/// Generic [`Future`] that powers other I/O operation futures on a file +/// descriptor. +pub(crate) struct FdOperation<'fd, O: FdOp, D: Descriptor = File> { + fd: &'fd AsyncFd, + state: State, } -/// The manual says: -/// > If offs is set to -1, the offset will use (and advance) the file -/// > position, like the read(2) and write(2) system calls. -/// -/// `-1` cast as `unsigned long long` in C is the same as as `u64::MAX`. -pub(crate) const NO_OFFSET: u64 = u64::MAX; - -// Can't do much about this, flags are defined as signed, but io_uring mostly -// uses unsigned. -#[allow(clippy::cast_sign_loss)] -impl Submission { - /// Reset the submission. - #[allow(clippy::assertions_on_constants)] - pub(crate) fn reset(&mut self) { - debug_assert!(libc::IORING_OP_NOP == 0); - unsafe { ptr::addr_of_mut!(self.inner).write_bytes(0, 1) }; +impl<'fd, O: FdOp, D: Descriptor> FdOperation<'fd, O, D> { + /// Create a new `FdOperation`. + pub(crate) const fn new( + fd: &'fd AsyncFd, + resources: O::Resources, + args: O::Args, + ) -> FdOperation<'fd, O, D> { + FdOperation { + fd, + state: State::new(resources, args), + } } - /// Set the user data to `user_data`. - pub(crate) fn set_user_data(&mut self, user_data: u64) { - self.inner.user_data = user_data; + pub(crate) const fn fd(&self) -> &'fd AsyncFd { + self.fd } +} - /// Mark the submission as using `IOSQE_BUFFER_SELECT`. - pub(crate) fn set_buffer_select(&mut self, buf_group: u16) { - self.inner.__bindgen_anon_4.buf_group = buf_group; - self.inner.flags |= libc::IOSQE_BUFFER_SELECT; +impl<'fd, O, D> FdOperation<'fd, O, D> +where + // TODO: this is silly. + O: FdOp< + Submission = <::Submissions as sq::Submissions>::Submission, + OperationState = <<::Completions as cq::Completions>::Event as cq::Event>::State, + >, + D: Descriptor, + O::OperationOutput: fmt::Debug, +{ + pub(crate) fn poll(self: Pin<&mut Self>, ctx: &task::Context<'_>) -> Poll> { + // SAFETY: not moving `fd` or `state`. + let FdOperation { fd, state } = unsafe { self.get_unchecked_mut() }; + state.poll( + ctx, + fd.sq(), + |resources, args, submission| { + O::fill_submission(fd, resources, args, submission); + D::use_flags(submission); + }, + |resources, args, state| O::check_result(fd, resources, args, state), + |_, resources, operation_output| O::map_ok(fd, resources, operation_output), + ) + } + + pub(crate) fn poll_next(self: Pin<&mut Self>, ctx: &task::Context<'_>) -> Poll>> + where O: FdIter, + { + // SAFETY: not moving `fd` or `state`. + let FdOperation { fd, state } = unsafe { self.get_unchecked_mut() }; + state.poll_next( + ctx, + fd.sq(), + |resources, args, submission| { + O::fill_submission(fd, resources, args, submission); + D::use_flags(submission); + }, + |resources, args, state| O::check_result(fd, resources, args, state), + |_, resources, operation_output| O::map_next(fd, resources, operation_output), + ) + } + + pub(crate) fn poll_extract(self: Pin<&mut Self>, ctx: &task::Context<'_>) -> Poll> + where O: FdOpExtract, + { + // SAFETY: not moving `fd` or `state`. + let FdOperation { fd, state } = unsafe { self.get_unchecked_mut() }; + state.poll( + ctx, + fd.sq(), + |resources, args, submission| { + O::fill_submission(fd, resources, args, submission); + D::use_flags(submission); + }, + |resources, args, state| O::check_result(fd, resources, args, state), + |_, resources, operation_output| O::map_ok_extract(fd, resources, operation_output), + ) } +} - /// Don't return a completion event for this submission. - pub(crate) fn no_completion_event(&mut self) { - self.inner.flags |= libc::IOSQE_CQE_SKIP_SUCCESS; +impl<'fd, O: FdOp, D: Descriptor> Cancel for FdOperation<'fd, O, D> { + fn try_cancel(&mut self) -> CancelResult { + if let Some(op_id) = self.state.cancel() { + let result = self.fd.sq.inner.submit_no_completion(|submission| { + sys::cancel::operation(op_id, submission); + }); + match result { + Ok(()) => CancelResult::Canceled, + Err(QueueFull) => CancelResult::QueueFull, + } + } else { + CancelResult::NotStarted + } } - /// Don't attempt to do the operation non-blocking first, always execute it - /// in an async manner. - pub(crate) fn set_async(&mut self) { - self.inner.flags |= libc::IOSQE_ASYNC; + fn cancel(&mut self) -> CancelOperation { + CancelOperation::new(self.fd.sq.clone(), self.state.cancel()) } +} - /// Set the flag to use direct descriptors. - pub(crate) fn use_direct_fd(&mut self) { - self.inner.flags |= libc::IOSQE_FIXED_FILE; - } +/// Only implement `Unpin` if the underlying operation implement `Unpin`. +impl<'fd, O: FdOp + Unpin, D: Descriptor> Unpin for FdOperation<'fd, O, D> {} - /// Set the flag to create direct descriptors. - pub(crate) fn create_direct_fd(&mut self) { - self.inner.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { - file_index: libc::IORING_FILE_INDEX_ALLOC as _, - }; +impl<'fd, O: FdOp, D: Descriptor> FdOperation<'fd, O, D> { + pub(crate) fn fmt_dbg(&self, name: &'static str, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct(name) + .field("fd", &self.fd) + .field("state", &self.state) + .finish() } +} - /// Returns `true` if the submission is unchanged after a [`reset`]. - /// - /// [`reset`]: Submission::reset - #[cfg(debug_assertions)] - pub(crate) const fn is_unchanged(&self) -> bool { - self.inner.opcode == libc::IORING_OP_NOP as u8 +impl<'fd, O: FdOp, D: Descriptor> Drop for FdOperation<'fd, O, D> { + fn drop(&mut self) { + // SAFETY: we're in the `Drop` implementation. + unsafe { self.state.drop(self.fd.sq()) }; } +} - /// Create a regular file descriptor for `direct_fd` (which must be a direct - /// descriptor). - pub(crate) unsafe fn create_file_descriptor(&mut self, direct_fd: RawFd, flags: libc::__u32) { - self.inner.opcode = libc::IORING_OP_FIXED_FD_INSTALL as u8; - self.inner.fd = direct_fd; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - install_fd_flags: flags, - }; - } +/// Implementation of a [`FdOperation`]. +pub(crate) trait FdOp { + /// Output of the operation. + type Output; + /// Resources used in the operation, e.g. a buffer in a read call. + type Resources: DropWake; + /// Arguments in the system call. + type Args; + /// [`sq::Submission`]. + type Submission; + /// [`cq::Event::State`]. + type OperationState; + /// Output of the operation specific operation. This can differ from + /// `Output`, e.g. for a read this will be the amount bytes read, but the + /// `Output` will be the buffer the bytes are read into. + type OperationOutput; + + /// Fill a submission for the operation. + fn fill_submission( + fd: &AsyncFd, + resources: &mut Self::Resources, + args: &mut Self::Args, + submission: &mut Self::Submission, + ); + + /// Check the result of an operation based on the `QueuedOperation.state` + /// (`Self::OperationState`). + fn check_result( + fd: &AsyncFd, + resources: &mut Self::Resources, + args: &mut Self::Args, + state: &mut Self::OperationState, + ) -> OpResult; + + /// Map the system call output to the future's output. + fn map_ok( + fd: &AsyncFd, + resources: Self::Resources, + operation_output: Self::OperationOutput, + ) -> Self::Output; +} - /// Create a direct descriptor for `fd` (which must be a regular file descriptor). - pub(crate) unsafe fn create_direct_descriptor(&mut self, fds: *mut RawFd, len: u32) { - self.inner.opcode = libc::IORING_OP_FILES_UPDATE as u8; - self.inner.fd = -1; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { - off: libc::IORING_FILE_INDEX_ALLOC as _, - }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: fds as _ }; - self.inner.len = len; - } +/// Extension of [`FdOp`] to extract the resources used in the operation. To +/// support the [`Extract`] trait. +pub(crate) trait FdOpExtract: FdOp { + /// Output of the operation. + type ExtractOutput; + + /// Map the system call output to the future's output. + fn map_ok_extract( + fd: &AsyncFd, + resources: Self::Resources, + operation_output: Self::OperationOutput, + ) -> Self::ExtractOutput; +} - /// Sync the `fd` with `fsync_flags`. - pub(crate) unsafe fn fsync(&mut self, fd: RawFd, fsync_flags: libc::__u32) { - self.inner.opcode = libc::IORING_OP_FSYNC as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { fsync_flags }; - } +/// [`AsyncIterator`] implementation of a [`FdOperation`]. +pub(crate) trait FdIter: FdOp { + /// Map the system call output to the future's output. + fn map_next( + fd: &AsyncFd, + resources: &mut Self::Resources, + operation_output: Self::OperationOutput, + ) -> Self::Output; +} - /// Create a read submission starting at `offset`. - /// - /// Avaialable since Linux kernel 5.6. - pub(crate) unsafe fn read_at(&mut self, fd: RawFd, ptr: *mut u8, len: u32, offset: u64) { - self.inner.opcode = libc::IORING_OP_READ as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as _ }; - self.inner.len = len; - } +/// State of an [`Operation`] or [`FdOperation`]. +/// +/// Generics: +/// * `R` is [`Op::Resources`] or [`FdOp::Resources`]. +/// * `A` is [`Op::Args`] or [`FdOp::Args`]. +pub(crate) enum State { + /// Operation has not started yet. First has to be submitted. + NotStarted { resources: UnsafeCell, args: A }, + /// Operation has been submitted and is running. + Running { + resources: UnsafeCell, + args: A, + op_id: OperationId, + }, + /// Operation was cancelled. + Cancelled, + /// Operation is done, don't poll again. + Done, +} - /// Create a read vectored submission starting at `offset`. - #[allow(clippy::needless_pass_by_ref_mut)] // Needed for `bufs`. - pub(crate) unsafe fn read_vectored_at( - &mut self, - fd: RawFd, - bufs: &mut [libc::iovec], - offset: u64, - ) { - self.inner.opcode = libc::IORING_OP_READV as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: bufs.as_ptr() as _, - }; - self.inner.len = bufs.len() as _; +impl State { + pub(crate) const fn new(resources: R, args: A) -> State { + State::NotStarted { + resources: UnsafeCell::new(resources), + args, + } } - /// Create a write submission starting at `offset`. + /// Poll the state of this operation. /// - /// Avaialable since Linux kernel 5.6. - pub(crate) unsafe fn write_at(&mut self, fd: RawFd, ptr: *const u8, len: u32, offset: u64) { - self.inner.opcode = libc::IORING_OP_WRITE as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as u64 }; - self.inner.len = len; - } - - /// Create a write vectored submission starting at `offset`. - pub(crate) unsafe fn write_vectored_at( - &mut self, - fd: RawFd, - bufs: &[libc::iovec], - offset: u64, - ) { - self.inner.opcode = libc::IORING_OP_WRITEV as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: bufs.as_ptr() as _, - }; - self.inner.len = bufs.len() as _; - } - - pub(crate) unsafe fn mkdirat( - &mut self, - dirfd: RawFd, - path: *const libc::c_char, - mode: libc::mode_t, - ) { - self.inner.opcode = libc::IORING_OP_MKDIRAT as u8; - self.inner.fd = dirfd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: path as _ }; - self.inner.len = mode; - } - - pub(crate) unsafe fn rename( - &mut self, - old_fd: RawFd, - old_path: *const libc::c_char, - new_fd: RawFd, - new_path: *const libc::c_char, - flags: libc::c_int, - ) { - self.inner.opcode = libc::IORING_OP_RENAMEAT as u8; - self.inner.fd = old_fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: new_path as _ }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: old_path as _, - }; - self.inner.len = new_fd as _; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - rename_flags: flags as _, - }; - } - - pub(crate) unsafe fn unlinkat( - &mut self, - dirfd: RawFd, - path: *const libc::c_char, - flags: libc::c_int, - ) { - self.inner.opcode = libc::IORING_OP_UNLINKAT as u8; - self.inner.fd = dirfd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: path as _ }; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - unlink_flags: flags as _, - }; - } - - pub(crate) unsafe fn socket( - &mut self, - domain: libc::c_int, - r#type: libc::c_int, - protocol: libc::c_int, - flags: libc::c_int, - ) { - self.inner.opcode = libc::IORING_OP_SOCKET as u8; - self.inner.fd = domain; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: r#type as _ }; - self.inner.len = protocol as _; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { rw_flags: flags }; - } - - pub(crate) unsafe fn uring_command( - &mut self, - cmd_op: libc::__u32, - fd: RawFd, - level: libc::__u32, - optname: libc::__u32, - optvalue: *mut libc::c_void, - optlen: libc::__u32, - ) { - self.inner.opcode = libc::IORING_OP_URING_CMD as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { - __bindgen_anon_1: libc::io_uring_sqe__bindgen_ty_1__bindgen_ty_1 { cmd_op, __pad1: 0 }, - }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - __bindgen_anon_1: libc::io_uring_sqe__bindgen_ty_2__bindgen_ty_1 { level, optname }, - }; - self.inner.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { optlen }; - self.inner.__bindgen_anon_6 = libc::io_uring_sqe__bindgen_ty_6 { - optval: ManuallyDrop::new(optvalue as libc::__u64), - }; - } - - pub(crate) unsafe fn connect( - &mut self, - fd: RawFd, - address: *const libc::sockaddr, - address_length: libc::socklen_t, - ) { - self.inner.opcode = libc::IORING_OP_CONNECT as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { - off: u64::from(address_length), - }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: address as _ }; - } - - /// `opcode` must be `IORING_OP_SEND` or `IORING_OP_SEND_ZC`. - pub(crate) unsafe fn send( - &mut self, - opcode: u8, - fd: RawFd, - ptr: *const u8, - len: u32, - flags: libc::c_int, - ) { - debug_assert!( - opcode == libc::IORING_OP_SEND as u8 || opcode == libc::IORING_OP_SEND_ZC as u8 - ); - self.inner.opcode = opcode; - self.inner.fd = fd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as u64 }; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - msg_flags: flags as _, - }; - self.inner.len = len; - } - - /// `opcode` must be `IORING_OP_SEND` or `IORING_OP_SEND_ZC`. - #[allow(clippy::too_many_arguments)] - pub(crate) unsafe fn sendto( + /// NOTE: that the functions match those of the [`FdOp`] and [`Op`] traits. + pub(crate) fn poll( &mut self, - opcode: u8, - fd: RawFd, - buf_ptr: *const u8, - buf_len: u32, - address: *const libc::sockaddr, - address_length: libc::socklen_t, - flags: libc::c_int, - ) { - self.send(opcode, fd, buf_ptr, buf_len, flags); - self.inner.__bindgen_anon_1.addr2 = address as _; - self.inner.__bindgen_anon_5.__bindgen_anon_1.addr_len = address_length as _; + ctx: &task::Context<'_>, + sq: &SubmissionQueue, + fill_submission: FillSubmission, + check_result: CheckResult, + map_ok: MapOk, + ) -> Poll> + where + FillSubmission: FnOnce(&mut R, &mut A, &mut <::Submissions as sq::Submissions>::Submission), + CheckResult: FnOnce(&mut R, &mut A, &mut <<::Completions as cq::Completions>::Event as cq::Event>::State) -> OpResult, + OperationOutput: fmt::Debug, + MapOk: FnOnce(&SubmissionQueue, R, OperationOutput) -> Output, + { + match self { + State::NotStarted { resources, args } => { + let result = sq.inner.submit( + |submission| fill_submission(resources.get_mut(), args, submission), + ctx.waker().clone(), + ); + if let Ok(op_id) = result { + self.running(op_id); + } + // We'll be awoken once the operation is done, or if the + // submission queue is full we'll be awoken once a submission + // slot is available. + Poll::Pending + } + State::Running { + resources, + args, + op_id, + } => { + let op_id = *op_id; + // SAFETY: we've ensured that `op_id` is valid. + let mut queued_op_slot = unsafe { sq.get_op(op_id) }; + log::trace!(queued_op:? = &*queued_op_slot; "mapping operation result"); + let result = match queued_op_slot.as_mut() { + // Only map the result if the operation is marked as done. + // Otherwise we wait for another event. + Some(queued_op) if !queued_op.done => { + queued_op.update_waker(ctx.waker()); + return Poll::Pending; + } + Some(queued_op) => { + check_result(resources.get_mut(), args, &mut queued_op.state) + } + // Somehow the queued operation is gone. This shouldn't + // happen, but we'll deal with it anyway. + None => OpResult::Again(true), + }; + log::trace!(result:? = result; "mapped operation result"); + match result { + OpResult::Ok(ok) => { + let resources = self.done(); + // SAFETY: we've ensured that `op_id` is valid. + unsafe { sq.make_op_available(op_id, queued_op_slot) }; + Poll::Ready(Ok(map_ok(sq, resources, ok))) + } + OpResult::Again(resubmit) => { + // Operation wasn't completed, need to try again. + update_waker(queued_op_slot.as_mut(), ctx.waker()); + drop(queued_op_slot); // Unlock. + if resubmit { + // SAFETY: we've ensured that we own the `op_id`. + // Furthermore we don't use it in case an error is + // returned. + let result = unsafe { + sq.inner.resubmit(op_id, |submission| { + fill_submission(resources.get_mut(), args, submission); + }) + }; + match result { + Ok(()) => { /* Running again using the same operation id. */ } + Err(QueueFull) => self.not_started(), + } + } + // We'll be awoken once the operation is ready again or + // if we can submit again (in case of QueueFull). + Poll::Pending + } + OpResult::Err(err) => { + *self = State::Done; + // SAFETY: we've ensured that `op_id` is valid. + unsafe { sq.make_op_available(op_id, queued_op_slot) }; + Poll::Ready(Err(err)) + } + } + } + State::Cancelled => Poll::Ready(Err(io::Error::from_raw_os_error(libc::ECANCELED))), + State::Done => unreachable!("Future polled after completion"), + } } - /// `opcode` must be `IORING_OP_SENDMSG` or `IORING_OP_SENDMSG_ZC`. - pub(crate) unsafe fn sendmsg( + /// Poll the next item from the state of this operation. + /// + /// NOTE: that the functions match those of the [`FdOp`] and [`Op`] traits. + fn poll_next( &mut self, - opcode: u8, - fd: RawFd, - msg: *const libc::msghdr, - flags: libc::c_int, - ) { - debug_assert!( - opcode == libc::IORING_OP_SENDMSG as u8 || opcode == libc::IORING_OP_SENDMSG_ZC as u8 - ); - self.inner.opcode = opcode; - self.inner.fd = fd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: msg as u64 }; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - msg_flags: flags as _, - }; - self.inner.len = 1; - } - - pub(crate) unsafe fn recv(&mut self, fd: RawFd, ptr: *mut u8, len: u32, flags: libc::c_int) { - self.inner.opcode = libc::IORING_OP_RECV as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: ptr as _ }; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - msg_flags: flags as _, - }; - self.inner.len = len; - } - - pub(crate) unsafe fn multishot_recv(&mut self, fd: RawFd, flags: libc::c_int, buf_group: u16) { - self.inner.opcode = libc::IORING_OP_RECV as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - msg_flags: flags as _, - }; - self.inner.ioprio = libc::IORING_RECV_MULTISHOT as _; - self.set_buffer_select(buf_group); - } + ctx: &task::Context<'_>, + sq: &SubmissionQueue, + fill_submission: FillSubmission, + check_result: CheckResult, + map_ok: MapOk, + ) -> Poll>> + where + FillSubmission: FnOnce(&mut R, &mut A, &mut <::Submissions as sq::Submissions>::Submission), + CheckResult: FnOnce(&mut R, &mut A, &mut <<::Completions as cq::Completions>::Event as cq::Event>::State) -> OpResult, + OperationOutput: fmt::Debug, + MapOk: FnOnce(&SubmissionQueue, &mut R, OperationOutput) -> Output, + { + match self { + State::NotStarted { resources, args } => { + let result = sq.inner.submit_multishot( + |submission| fill_submission(resources.get_mut(), args, submission), + ctx.waker().clone(), + ); + if let Ok(op_id) = result { + self.running(op_id); + } + // We'll be awoken once the operation is done, or if the + // submission queue is full we'll be awoken once a submission + // slot is available. + Poll::Pending + } + State::Running { + resources, + args, + op_id, + } => { + let op_id = *op_id; + // SAFETY: we've ensured that `op_id` is valid. + let mut queued_op_slot = unsafe { sq.get_op(op_id) }; + log::trace!(queued_op:? = &*queued_op_slot; "mapping operation result"); + let result = match queued_op_slot.as_mut() { + Some(queued_op) => { + check_result(resources.get_mut(), args, &mut queued_op.state) + } + // Somehow the queued operation is gone. This shouldn't + // happen, but we'll deal with it anyway. + None => OpResult::Again(true), + }; + log::trace!(result:? = result; "mapped operation result"); + match result { + OpResult::Ok(ok) => Poll::Ready(Some(Ok(map_ok(sq, resources.get_mut(), ok)))), + OpResult::Again(false) if matches!(&*queued_op_slot, Some(o) if o.done) => { + // Multishot operation is complete, mark ourselves as + // done. + *self = State::Done; + // SAFETY: we've ensured that `op_id` is valid. + unsafe { sq.make_op_available(op_id, queued_op_slot) }; + Poll::Ready(None) + } + OpResult::Again(false) => { + // We'll be awoken once the operation is ready again. + update_waker(queued_op_slot.as_mut(), ctx.waker()); + Poll::Pending + } + OpResult::Again(true) => { + // Operation wasn't completed, need to try again. + update_waker(queued_op_slot.as_mut(), ctx.waker()); + drop(queued_op_slot); // Unlock. + + // SAFETY: we've ensured that we own the `op_id`. + // Furthermore we don't use it in case an error is + // returned. + let result = unsafe { + sq.inner.resubmit(op_id, |submission| { + fill_submission(resources.get_mut(), args, submission); + }) + }; + match result { + Ok(()) => { /* Running again using the same operation id. */ } + Err(QueueFull) => self.not_started(), + } + // We'll be awoken once can submit again. + Poll::Pending + } + OpResult::Err(err) => { + *self = State::Done; + // SAFETY: we've ensured that `op_id` is valid. + unsafe { sq.make_op_available(op_id, queued_op_slot) }; - pub(crate) unsafe fn recvmsg( - &mut self, - fd: RawFd, - msg: *const libc::msghdr, - flags: libc::c_int, - ) { - self.inner.opcode = libc::IORING_OP_RECVMSG as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: msg as u64 }; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - msg_flags: flags as _, - }; - self.inner.len = 1; + if let Some(libc::ECANCELED) = err.raw_os_error() { + // Operation was canceled, so we expect no more + // results. + Poll::Ready(None) + } else { + Poll::Ready(Some(Err(err))) + } + } + } + } + State::Cancelled | State::Done => Poll::Ready(None), + } } - pub(crate) unsafe fn shutdown(&mut self, fd: RawFd, how: libc::c_int) { - self.inner.opcode = libc::IORING_OP_SHUTDOWN as u8; - self.inner.fd = fd; - self.inner.len = how as u32; + /// Cancel the operation, returning the operation id if the operation is + /// running. + fn cancel(&mut self) -> Option { + match self { + State::NotStarted { .. } => { + *self = State::Cancelled; + None + } + State::Running { op_id, .. } => Some(*op_id), + State::Cancelled | State::Done => None, + } } - /// Create a accept submission starting. + /// Marks the state as not started. /// - /// Avaialable since Linux kernel 5.5. - pub(crate) unsafe fn accept( - &mut self, - fd: RawFd, - address: *mut libc::sockaddr, - address_length: *mut libc::socklen_t, - flags: libc::c_int, - ) { - self.inner.opcode = libc::IORING_OP_ACCEPT as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { - off: address_length as _, - }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: address as _ }; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - accept_flags: flags as _, - }; - } - - pub(crate) unsafe fn multishot_accept(&mut self, fd: RawFd, flags: libc::c_int) { - self.inner.opcode = libc::IORING_OP_ACCEPT as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - accept_flags: flags as _, - }; - self.inner.ioprio = libc::IORING_ACCEPT_MULTISHOT as _; - } - - /// Attempt to cancel an already issued request. + /// # Panics /// - /// Avaialable since Linux kernel 5.5. - pub(crate) unsafe fn cancel(&mut self, fd: RawFd, flags: u32) { - self.inner.opcode = libc::IORING_OP_ASYNC_CANCEL as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - cancel_flags: flags | libc::IORING_ASYNC_CANCEL_FD, - }; - } - - pub(crate) unsafe fn cancel_op(&mut self, op_index: OpIndex) { - self.inner.opcode = libc::IORING_OP_ASYNC_CANCEL as u8; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: op_index.0 as u64, - }; - } - - /// Open a file by `pathname` in directory `dir_fd`. - pub(crate) unsafe fn open_at( - &mut self, - dir_fd: RawFd, - pathname: *const libc::c_char, - flags: libc::c_int, - mode: libc::mode_t, - ) { - self.inner.opcode = libc::IORING_OP_OPENAT as u8; - self.inner.fd = dir_fd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: pathname as _, - }; - self.inner.len = mode; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - open_flags: flags as _, + /// Panics if `self` is `Done`. + fn not_started(&mut self) { + let (resources, args) = match mem::replace(self, State::Done) { + State::NotStarted { resources, args } + | State::Running { + resources, args, .. + } => (resources, args), + State::Cancelled | State::Done => unreachable!(), }; + *self = State::NotStarted { resources, args } } - pub(crate) unsafe fn splice( - &mut self, - fd_in: RawFd, - off_in: u64, - fd_out: RawFd, - off_out: u64, - len: u32, - flags: libc::c_int, - ) { - self.inner.opcode = libc::IORING_OP_SPLICE as u8; - self.inner.fd = fd_out; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: off_out }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - splice_off_in: off_in, - }; - self.inner.len = len; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - splice_flags: flags as u32, - }; - self.inner.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { - splice_fd_in: fd_in, - }; - } - - /// Close the `fd`. - pub(crate) unsafe fn close(&mut self, fd: RawFd) { - self.inner.opcode = libc::IORING_OP_CLOSE as u8; - self.inner.fd = fd; - } - - /// Call `statx(2)` on `fd`, where `fd` points to a file. + /// Marks the state as running with `op_id`. /// - /// Avaialable since Linux kernel 5.6. - pub(crate) unsafe fn statx_file(&mut self, fd: RawFd, statx: &mut libc::statx, flags: u32) { - self.inner.opcode = libc::IORING_OP_STATX as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { - off: statx as *mut _ as _, - }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: "\0".as_ptr() as _, // Not using a path. - }; - self.inner.len = flags; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - statx_flags: libc::AT_EMPTY_PATH as _, - }; - } - - pub(crate) unsafe fn fadvise(&mut self, fd: RawFd, offset: u64, len: u32, advise: libc::c_int) { - self.inner.opcode = libc::IORING_OP_FADVISE as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; - self.inner.len = len; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - fadvise_advice: advise as _, - }; - } - - pub(crate) unsafe fn fallocate(&mut self, fd: RawFd, offset: u64, len: u32, mode: libc::c_int) { - self.inner.opcode = libc::IORING_OP_FALLOCATE as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: u64::from(len), - }; - self.inner.len = mode as u32; - } - - pub(crate) unsafe fn poll(&mut self, fd: RawFd, mask: u32) { - self.inner.opcode = libc::IORING_OP_POLL_ADD as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - poll32_events: mask, - }; - } - - pub(crate) unsafe fn remove_poll(&mut self, user_data: OpIndex) { - self.inner.opcode = libc::IORING_OP_POLL_REMOVE as u8; - self.inner.fd = -1; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: user_data.0 as _, - }; - } - - pub(crate) unsafe fn multishot_poll(&mut self, fd: RawFd, mask: u32) { - self.poll(fd, mask); - self.inner.len = libc::IORING_POLL_ADD_MULTI; + /// # Panics + /// + /// Panics if `self` is `Done`. + fn running(&mut self, op_id: OperationId) { + let (resources, args) = match mem::replace(self, State::Done) { + State::NotStarted { resources, args } + | State::Running { + resources, args, .. + } => (resources, args), + State::Cancelled | State::Done => unreachable!(), + }; + *self = State::Running { + resources, + args, + op_id, + } } - pub(crate) unsafe fn madvise(&mut self, address: *mut (), len: u32, advice: libc::c_int) { - self.inner.opcode = libc::IORING_OP_MADVISE as u8; - self.inner.fd = -1; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { addr: address as _ }; - self.inner.len = len; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { - fadvise_advice: advice as _, - }; + /// Marks the state as done, returning the resources. + /// + /// # Panics + /// + /// Panics if `self` is `Done`. + fn done(&mut self) -> R { + match mem::replace(self, State::Done) { + State::NotStarted { resources, .. } | State::Running { resources, .. } => resources, + State::Cancelled | State::Done => unreachable!(), + } + .into_inner() } - #[allow(clippy::cast_possible_wrap)] - pub(crate) unsafe fn waitid( - &mut self, - id: libc::id_t, - id_type: libc::idtype_t, - options: libc::c_int, - info: *mut libc::signalfd_siginfo, - ) { - self.inner.opcode = libc::IORING_OP_WAITID as u8; - self.inner.fd = id as _; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { addr2: info as _ }; - self.inner.len = id_type; - self.inner.__bindgen_anon_5 = libc::io_uring_sqe__bindgen_ty_5 { - file_index: options as _, - }; + /// Drop the state. + /// + /// # Safety + /// + /// Only call this in the `Drop` implementation. + pub(crate) unsafe fn drop(&mut self, sq: &SubmissionQueue) + where + R: DropWake, + { + if let State::Running { .. } = self { + let State::Running { + resources, + args, + op_id, + } = mem::replace(self, State::Done) + else { + unreachable!() + }; + // Can safely drop the argument already as they're not used by the + // kernel. + drop(args); + // SAFETY: we marked the state as done above so we won't reuse + // `op_id`. + unsafe { sq.inner.cancel(op_id, resources) }; + } else { + // If we haven't started or if we're done we can safely drop the + // remaining resources. + } } +} - pub(crate) unsafe fn ftruncate(&mut self, fd: RawFd, offset: u64) { - self.inner.opcode = libc::IORING_OP_FTRUNCATE as u8; - self.inner.fd = fd; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: offset }; +fn update_waker(queued_op_slot: Option<&mut QueuedOperation>, waker: &task::Waker) { + if let Some(queued_op) = queued_op_slot { + queued_op.update_waker(waker); } +} - pub(crate) unsafe fn wake(&mut self, ring_fd: RawFd) { - self.msg(ring_fd, u64::MAX, 0, 0); - self.no_completion_event(); - } +// SAFETY: `UnsafeCell` is `!Sync`, but as long as `R` is `Sync` so it while +// wrapped in `UnsafeCell`. +unsafe impl Send for State {} +unsafe impl Sync for State {} - /// Note that the argument `user_data` and `res` names are the same as the - /// field names on the completion event. - // TODO: we can add another ~`u32` by setting `file_index` (returned as - // `flags` on the completion) and using `IORING_MSG_RING_FLAGS_PASS`, should - // be available in 6.3. - pub(crate) unsafe fn msg( - &mut self, - ring_fd: RawFd, - user_data: u64, - res: u32, - msg_ring_flags: u32, - ) { - self.inner.opcode = libc::IORING_OP_MSG_RING as u8; - self.inner.fd = ring_fd; - self.inner.__bindgen_anon_2 = libc::io_uring_sqe__bindgen_ty_2 { - addr: u64::from(libc::IORING_MSG_DATA), - }; - self.inner.__bindgen_anon_1 = libc::io_uring_sqe__bindgen_ty_1 { off: user_data }; - self.inner.len = res; - self.inner.__bindgen_anon_3 = libc::io_uring_sqe__bindgen_ty_3 { msg_ring_flags }; - } -} +impl RefUnwindSafe for State {} -impl fmt::Debug for Submission { - #[allow(clippy::too_many_lines)] // Not beneficial to split this up. +impl fmt::Debug for State { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // Helper functions with common patterns. - fn io_op(f: &mut fmt::DebugStruct<'_, '_>, submission: &libc::io_uring_sqe, name: &str) { - f.field("opcode", &name) - .field("fd", &submission.fd) - .field("offset", unsafe { &submission.__bindgen_anon_1.off }) - .field("addr", unsafe { &submission.__bindgen_anon_2.addr }) - .field("len", &submission.len); - } - fn net_op(f: &mut fmt::DebugStruct<'_, '_>, submission: &libc::io_uring_sqe, name: &str) { - // NOTE: can't reference a packed struct's field. - let buf_group = unsafe { submission.__bindgen_anon_4.buf_group }; - f.field("opcode", &name) - .field("fd", &submission.fd) - .field("addr", unsafe { &submission.__bindgen_anon_2.addr }) - .field("len", &submission.len) - .field("msg_flags", unsafe { - &submission.__bindgen_anon_3.msg_flags - }) - .field("ioprio", &submission.ioprio) - .field("buf_group", &buf_group); - } - - let mut f = f.debug_struct("Submission"); - match u32::from(self.inner.opcode) { - libc::IORING_OP_NOP => { - f.field("opcode", &"IORING_OP_NOP"); - } - libc::IORING_OP_FSYNC => { - f.field("opcode", &"IORING_OP_FSYNC") - .field("fd", &self.inner.fd) - .field("fsync_flags", unsafe { - &self.inner.__bindgen_anon_3.fsync_flags - }); - } - libc::IORING_OP_READ => io_op(&mut f, &self.inner, "IORING_OP_READ"), - libc::IORING_OP_READV => io_op(&mut f, &self.inner, "IORING_OP_READV"), - libc::IORING_OP_WRITE => io_op(&mut f, &self.inner, "IORING_OP_WRITE"), - libc::IORING_OP_WRITEV => io_op(&mut f, &self.inner, "IORING_OP_WRITEV"), - libc::IORING_OP_RENAMEAT => { - f.field("opcode", &"IORING_OP_RENAMEAT") - .field("old_fd", &self.inner.fd) - .field("old_path", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("new_fd", &self.inner.len) - .field("new_path", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("rename_flags", unsafe { - &self.inner.__bindgen_anon_3.rename_flags - }); - } - libc::IORING_OP_SOCKET => { - f.field("opcode", &"IORING_OP_SOCKET") - .field("domain", &self.inner.fd) - .field("type", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("file_index", unsafe { - &self.inner.__bindgen_anon_5.file_index - }) - .field("protocol", &self.inner.len) - .field("rw_flags", unsafe { &self.inner.__bindgen_anon_3.rw_flags }); - } - libc::IORING_OP_CONNECT => { - f.field("opcode", &"IORING_OP_CONNECT") - .field("fd", &self.inner.fd) - .field("addr", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("addr_size", unsafe { &self.inner.__bindgen_anon_1.off }); - } - libc::IORING_OP_SEND => net_op(&mut f, &self.inner, "IORING_OP_SEND"), - libc::IORING_OP_SEND_ZC => net_op(&mut f, &self.inner, "IORING_OP_SEND_ZC"), - libc::IORING_OP_SENDMSG => net_op(&mut f, &self.inner, "IORING_OP_SENDMSG"), - libc::IORING_OP_SENDMSG_ZC => net_op(&mut f, &self.inner, "IORING_OP_SENDMSG_ZC"), - libc::IORING_OP_RECV => net_op(&mut f, &self.inner, "IORING_OP_RECV"), - libc::IORING_OP_RECVMSG => net_op(&mut f, &self.inner, "IORING_OP_RECVMSG"), - libc::IORING_OP_SHUTDOWN => { - f.field("opcode", &"IORING_OP_SHUTDOWN") - .field("fd", &self.inner.fd) - .field("how", &self.inner.len); - } - libc::IORING_OP_ACCEPT => { - f.field("opcode", &"IORING_OP_ACCEPT") - .field("fd", &self.inner.fd) - .field("addr", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("addr_size", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("accept_flags", unsafe { - &self.inner.__bindgen_anon_3.accept_flags - }) - .field("file_index", unsafe { - &self.inner.__bindgen_anon_5.file_index - }) - .field("ioprio", &self.inner.ioprio); - } - libc::IORING_OP_ASYNC_CANCEL => { - f.field("opcode", &"IORING_OP_ASYNC_CANCEL"); - let cancel_flags = unsafe { self.inner.__bindgen_anon_3.cancel_flags }; - #[allow(clippy::if_not_else)] - if (cancel_flags & libc::IORING_ASYNC_CANCEL_FD) != 0 { - f.field("fd", &self.inner.fd) - .field("cancel_flags", &cancel_flags); - } else { - f.field("addr", unsafe { &self.inner.__bindgen_anon_2.addr }); - } - } - libc::IORING_OP_OPENAT => { - f.field("opcode", &"IORING_OP_OPENAT") - .field("dirfd", &self.inner.fd) - .field("pathname", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("mode", &self.inner.len) - .field("open_flags", unsafe { - &self.inner.__bindgen_anon_3.open_flags - }) - .field("file_index", unsafe { - &self.inner.__bindgen_anon_5.file_index - }); - } - libc::IORING_OP_SPLICE => { - f.field("opcode", &"IORING_OP_SPLICE") - .field("fd_in", unsafe { - &self.inner.__bindgen_anon_5.splice_fd_in - }) - .field("off_in", unsafe { - &self.inner.__bindgen_anon_2.splice_off_in - }) - .field("fd_out", &self.inner.fd) - .field("off_out", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("len", &self.inner.len) - .field("splice_flags", unsafe { - &self.inner.__bindgen_anon_3.splice_flags - }); - } - libc::IORING_OP_CLOSE => { - f.field("opcode", &"IORING_OP_CLOSE") - .field("fd", &self.inner.fd); - } - libc::IORING_OP_FILES_UPDATE => { - f.field("opcode", &"IORING_OP_FILES_UPDATE") - .field("fd", &self.inner.fd) - .field("offset", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("fds", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("len", &self.inner.len); - } - libc::IORING_OP_STATX => { - f.field("opcode", &"IORING_OP_STATX") - .field("fd", &self.inner.fd) - .field("pathname", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("statx_flags", unsafe { - &self.inner.__bindgen_anon_3.statx_flags - }) - .field("mask", &self.inner.len) - .field("statx", unsafe { &self.inner.__bindgen_anon_1.off }); - } - libc::IORING_OP_FADVISE => { - f.field("opcode", &"IORING_OP_FADVISE") - .field("fd", &self.inner.fd) - .field("offset", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("len", &self.inner.len) - .field("advise", unsafe { - &self.inner.__bindgen_anon_3.fadvise_advice - }); - } - libc::IORING_OP_FALLOCATE => { - f.field("opcode", &"IORING_OP_FALLOCATE") - .field("fd", &self.inner.fd) - .field("offset", unsafe { &self.inner.__bindgen_anon_1.off }) - .field("len", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("mode", &self.inner.len); - } - libc::IORING_OP_UNLINKAT => { - f.field("opcode", &"IORING_OP_UNLINKAT") - .field("dirfd", &self.inner.fd) - .field("path", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("unlink_flags", unsafe { - &self.inner.__bindgen_anon_3.unlink_flags - }); - } - libc::IORING_OP_MKDIRAT => { - f.field("opcode", &"IORING_OP_MKDIRAT") - .field("dirfd", &self.inner.fd) - .field("path", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("mode", &self.inner.len); - } - libc::IORING_OP_POLL_ADD => { - f.field("opcode", &"IORING_OP_POLL_ADD") - .field("fd", &self.inner.fd) - .field("poll_events", unsafe { - &self.inner.__bindgen_anon_3.poll32_events - }) - .field( - "multishot", - &(self.inner.len == libc::IORING_POLL_ADD_MULTI), - ); - } - libc::IORING_OP_POLL_REMOVE => { - f.field("opcode", &"IORING_OP_POLL_REMOVE") - .field("target_user_data", unsafe { - &self.inner.__bindgen_anon_2.addr - }); - } - libc::IORING_OP_MADVISE => { - f.field("opcode", &"IORING_OP_MADVISE") - .field("address", unsafe { &self.inner.__bindgen_anon_2.addr }) - .field("len", &self.inner.len) - .field("advise", unsafe { - &self.inner.__bindgen_anon_3.fadvise_advice - }); - } - libc::IORING_OP_MSG_RING => { - f.field("opcode", &"IORING_OP_MSG_RING") - .field("ringfd", &self.inner.fd) - .field("msg1", &self.inner.len) - .field("msg2", unsafe { &self.inner.__bindgen_anon_1.off }); - } - libc::IORING_OP_WAITID => { - f.field("opcode", &"IORING_OP_WAITID") - .field("id", &self.inner.fd) - .field("id_type", &self.inner.len) - .field("options", unsafe { - &self.inner.__bindgen_anon_5.file_index - }) - .field("info", unsafe { &self.inner.__bindgen_anon_1.addr2 }); - } - libc::IORING_OP_FIXED_FD_INSTALL => { - f.field("opcode", &"IORING_OP_FIXED_FD_INSTALL") - .field("fd", &self.inner.fd) - .field("install_fd_flags", unsafe { - &self.inner.__bindgen_anon_3.install_fd_flags - }); - } - _ => { - // NOTE: we can't access the unions safely without know what - // fields to read. - f.field("opcode", &self.inner.opcode) - .field("ioprio", &self.inner.ioprio) - .field("fd", &self.inner.fd) - .field("len", &self.inner.len) - .field("personality", &self.inner.personality); - } + // When the state is `Running` we can't access + match self { + State::NotStarted { .. } => f.debug_struct("State::NotStarted").finish(), + State::Running { op_id, .. } => f + .debug_struct("State::Running") + .field("op_id", &op_id) + .finish(), + State::Cancelled { .. } => f.debug_struct("State::Cancelled").finish(), + State::Done { .. } => f.debug_struct("State::Done").finish(), } - f.field("flags", &self.inner.flags) - .field("user_data", &self.inner.user_data) - .finish() } } -/// Macro to create an operation [`Future`] structure. -/// -/// [`Future`]: std::future::Future -macro_rules! op_future { +/// [`Op`] and [`FdOp`] result. +#[derive(Debug)] +pub(crate) enum OpResult { + /// [`Result::Ok`]. + Ok(T), + /// Try the operation again. + /// + /// The boolean indicates whether or not we should resubmit. + Again(bool), + /// [`Result::Err`]. + Err(io::Error), +} + +/// Create a [`Future`] based on [`Operation`]. +macro_rules! operation { ( - // File type and function name. - fn $type: ident :: $method: ident -> $result: ty, - // Future structure. - struct $name: ident < $lifetime: lifetime $(, $generic: ident $(: $trait: path )? )* $(; const $const_generic: ident : $const_ty: ty )* > { - $( - // Field(s) passed to io_uring, always wrapped in an `Option`. - // Syntax is the same a struct definition. - $(#[ $field_doc: meta ])* - $field: ident : $value: ty, - )* - }, - // Whether or not to use a special type to implemnt `DropWake`. - $( - drop_using: $drop_wake: tt, - )? - // Whether or not the structure should be `!Unpin` by including - // `PhantomPinned`. $( - $(#[ $phantom_doc: meta ])* - impl !Unpin, - )? - // State held in the setup function. - setup_state: $setup_field: ident : $setup_ty: ty, - // Function to setup the operation. - setup: |$setup_submission: ident, $setup_fd: ident, $setup_resources: tt, $setup_state: tt| $setup_fn: expr, - // Mapping function that maps the returned `$arg`ument into - // `$map_result`. The `$resources` is a tuple of the `$field`s on the - // future. - map_result: |$self: ident, $resources: tt, $flags: ident, $map_arg: ident| $map_result: expr, - // Mapping function for `Extractor` implementation. See above. - extract: |$extract_self: ident, $extract_resources: tt, $extract_flags: ident, $extract_arg: ident| -> $extract_result: ty $extract_map: block, + $(#[ $meta: meta ])* + $vis: vis struct $name: ident $( < $( $resources: ident $( : $trait: path )? )+ $(; const $const_generic: ident : $const_ty: ty )?> )? ($sys: ty) -> $output: ty $( , impl Extract -> $extract_output: ty )? ; + )+ ) => { - $crate::op::op_future! { - fn $type::$method -> $result, - struct $name<$lifetime $(, $generic $(: $trait )? )* $(; const $const_generic: $const_ty )*> { - $( - $(#[$field_doc])* - $field: $value, - )* - }, - $( - drop_using: $drop_wake, - )? - $( - $(#[ $phantom_doc ])* - impl !Unpin, - )? - setup_state: $setup_field : $setup_ty, - setup: |$setup_submission, $setup_fd, $setup_resources, $setup_state| $setup_fn, - map_result: |$self, $resources, $flags, $map_arg| $map_result, - } - - impl<$lifetime $(, $generic $(: $trait )? )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> $crate::Extract for $name<$lifetime $(, $generic)* $(, $const_generic )*, D> {} - - impl<$lifetime $(, $generic $(: $trait )? )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> std::future::Future for $crate::extract::Extractor<$name<$lifetime $(, $generic)* $(, $const_generic )*, D>> { - type Output = std::io::Result<$extract_result>; - - fn poll(self: std::pin::Pin<&mut Self>, ctx: &mut std::task::Context<'_>) -> std::task::Poll { - // SAFETY: we're not moving anything out of `self. - let $self = unsafe { std::pin::Pin::into_inner_unchecked(self) }; - let op_index = std::task::ready!($self.fut.poll_op_index(ctx)); - - match $self.fut.fd.sq.poll_op(ctx, op_index) { - std::task::Poll::Ready(result) => { - $self.fut.state = $crate::op::OpState::Done; - match result { - std::result::Result::Ok(($extract_flags, $extract_arg)) => { - let $extract_self = &mut $self.fut; - // SAFETY: this will not panic because we need - // to keep the resources around until the - // operation is completed. - let $extract_resources = $extract_self.resources.take().unwrap().into_inner(); - std::task::Poll::Ready($extract_map) - }, - std::result::Result::Err(err) => std::task::Poll::Ready(std::result::Result::Err(err)), - } - }, - std::task::Poll::Pending => std::task::Poll::Pending, - } - } - } + $( + $crate::op::new_operation!( + $(#[ $meta ])* + $vis struct $name $( < $( $resources $( : $trait )? )+ $(; const $const_generic : $const_ty )?> )? (Operation($sys)) + impl Future -> $output, + $( impl Extract -> $extract_output, )? + ); + )+ }; - // Base version (without any additional implementations). +} + +/// Create an [`AsyncIterator`] based on multishot [`Operation`]s. +macro_rules! iter_operation { ( - fn $type: ident :: $method: ident -> $result: ty, - struct $name: ident < $lifetime: lifetime $(, $generic: ident $(: $trait: path )? )* $(; const $const_generic: ident : $const_ty: ty )* > { - $( - $(#[ $field_doc: meta ])* - $field: ident : $value: ty, - )* - }, $( - drop_using: $drop_wake: tt, - )? - $( - $(#[ $phantom_doc: meta ])* - impl !Unpin, - )? - setup_state: $setup_field: ident : $setup_ty: ty, - setup: |$setup_submission: ident, $setup_fd: ident, $setup_resources: tt, $setup_state: tt| $setup_fn: expr, - map_result: |$self: ident, $resources: tt, $flags: ident, $map_arg: ident| $map_result: expr, + $(#[ $meta: meta ])* + $vis: vis struct $name: ident $( < $( $resources: ident $( : $trait: path )? )+ $(; const $const_generic: ident : $const_ty: ty )?> )? ($sys: ty) -> $output: ty $( , impl Extract -> $extract_output: ty )? ; + )+ ) => { - #[doc = concat!("[`Future`](std::future::Future) behind [`", stringify!($type), "::", stringify!($method), "`].")] - #[derive(Debug)] - #[must_use = "`Future`s do nothing unless polled"] - pub struct $name<$lifetime $(, $generic)* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor = $crate::fd::File> { - /// Resoures used in the operation. - /// - /// If this is `Some` when the future is dropped it will assume it - /// was dropped before completion and set the operation state to - /// dropped. - resources: std::option::Option>, - /// File descriptor used in the operation. - fd: &$lifetime $crate::AsyncFd, - /// State of the operation. - state: $crate::op::OpState<$setup_ty>, - $( - $( #[ $phantom_doc ] )* - _phantom: std::marker::PhantomPinned, - )? - } - - impl<$lifetime $(, $generic )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> $name<$lifetime $(, $generic)* $(, $const_generic )*, D> { - #[doc = concat!("Create a new `", stringify!($name), "`.")] - const fn new(fd: &$lifetime $crate::AsyncFd, $( $field: $value, )* $setup_field : $setup_ty) -> $name<$lifetime $(, $generic)* $(, $const_generic )*, D> { - // This is needed because of the usage of `$phantom_doc`, which - // is needed for the macro to work, even though it doesn't - // create any documentation. - #[allow(unused_doc_comments)] - $name { - resources: std::option::Option::Some(std::cell::UnsafeCell::new(( - $( $field, )* - ))), - fd, - state: $crate::op::OpState::NotStarted($setup_field), - $( - $( #[ $phantom_doc ] )* - _phantom: std::marker::PhantomPinned, - )? - } - } - } - - impl<$lifetime $(, $generic )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> $crate::cancel::Cancel for $name<$lifetime $(, $generic)* $(, $const_generic )*, D> { - fn try_cancel(&mut self) -> $crate::cancel::CancelResult { - self.state.try_cancel(&self.fd.sq) - } - - fn cancel(&mut self) -> $crate::cancel::CancelOp { - self.state.cancel(&self.fd.sq) - } - } - - impl<$lifetime $(, $generic $(: $trait )? )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> $name<$lifetime $(, $generic)* $(, $const_generic )*, D> { - /// Poll for the `OpIndex`. - fn poll_op_index(&mut self, ctx: &mut std::task::Context<'_>) -> std::task::Poll<$crate::OpIndex> { - std::task::Poll::Ready($crate::op::poll_state!($name, *self, ctx, |$setup_submission, $setup_fd, $setup_resources, $setup_state| { - $setup_fn - D::use_flags($setup_submission); - })) - } - } - - impl<$lifetime $(, $generic $(: $trait )? )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> std::future::Future for $name<$lifetime $(, $generic)* $(, $const_generic )*, D> { - type Output = std::io::Result<$result>; - - fn poll(self: std::pin::Pin<&mut Self>, ctx: &mut std::task::Context<'_>) -> std::task::Poll { - // SAFETY: we're not moving anything out of `self. - let $self = unsafe { std::pin::Pin::into_inner_unchecked(self) }; - let op_index = std::task::ready!($self.poll_op_index(ctx)); - - match $self.fd.sq.poll_op(ctx, op_index) { - std::task::Poll::Ready(result) => { - $self.state = $crate::op::OpState::Done; - match result { - std::result::Result::Ok(($flags, $map_arg)) => { - // SAFETY: this will not panic because we need - // to keep the resources around until the - // operation is completed. - let $resources = $self.resources.take().unwrap().into_inner(); - std::task::Poll::Ready($map_result) - }, - std::result::Result::Err(err) => std::task::Poll::Ready(std::result::Result::Err(err)), - } - }, - std::task::Poll::Pending => std::task::Poll::Pending, - } - } - } - - unsafe impl<$lifetime $(, $generic: std::marker::Send )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor + std::marker::Send> std::marker::Send for $name<$lifetime $(, $generic)* $(, $const_generic )*, D> {} - unsafe impl<$lifetime $(, $generic: std::marker::Sync )* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor + std::marker::Sync> std::marker::Sync for $name<$lifetime $(, $generic)* $(, $const_generic )*, D> {} - - impl<$lifetime $(, $generic)* $(, const $const_generic: $const_ty )*, D: $crate::fd::Descriptor> std::ops::Drop for $name<$lifetime $(, $generic)* $(, $const_generic )*, D> { - fn drop(&mut self) { - if let std::option::Option::Some(resources) = self.resources.take() { - match self.state { - $crate::op::OpState::Running(op_index) => { - // Use a different type for the `DropWake` - // implementation. - let drop_resource = || { - $( let resources = $drop_wake::from(resources); )? - resources - }; - let result = self.fd.sq.cancel_op(op_index, drop_resource, |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }); - if let std::result::Result::Err(err) = result { - log::error!(concat!("dropped a10::", stringify!($name), " before completion, attempt to cancel failed: {}"), err); - } - }, - // NOTE: `Done` should not be reachable, but no point in - // creating another branch. - #[allow(clippy::drop_non_drop)] - $crate::op::OpState::NotStarted(_) | $crate::op::OpState::Done => drop(resources), - } - } - } - } + $( + $crate::op::new_operation!( + $(#[ $meta ])* + $vis struct $name $( < $( $resources $( : $trait )? )+ $(; const $const_generic : $const_ty )?> )? (Operation($sys)) + impl AsyncIter -> $output, + $( impl Extract -> $extract_output, )? + ); + )+ }; - // Version that doesn't need the `flags` from the result in `$map_result`. +} + +/// Create a [`Future`] based on [`FdOperation`]. +macro_rules! fd_operation { ( - fn $type: ident :: $method: ident -> $result: ty, - struct $name: ident < $lifetime: lifetime $(, $generic: ident $(: $trait: path )? )* $(; const $const_generic: ident : $const_ty: ty )* > { - $( - $(#[ $field_doc: meta ])* - $field: ident : $value: ty, - )* - }, - $( - drop_using: $drop_wake: tt, - )? $( - $(#[ $phantom_doc: meta ])* - impl !Unpin, - )? - setup_state: $setup_data: ident : $setup_ty: ty, - setup: |$setup_submission: ident, $setup_fd: ident, $setup_resources: tt, $setup_state: tt| $setup_fn: expr, - map_result: |$self: ident, $resources: tt, $map_arg: ident| $map_result: expr, - $( extract: |$extract_self: ident, $extract_resources: tt, $extract_flags: ident, $extract_arg: ident| -> $extract_result: ty $extract_map: block, )? + $(#[ $meta: meta ])* + $vis: vis struct $name: ident $( < $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )?> )? ($sys: ty) -> $output: ty $( , impl Extract -> $extract_output: ty )? ; + )+ ) => { - $crate::op::op_future! { - fn $type::$method -> $result, - struct $name<$lifetime $(, $generic $(: $trait )? )* $(; const $const_generic: $const_ty )*> { - $( - $(#[$field_doc])* - $field: $value, - )* - }, - $( - drop_using: $drop_wake, - )? - $( - $(#[ $phantom_doc ])* - impl !Unpin, - )? - setup_state: $setup_data: $setup_ty, - setup: |$setup_submission, $setup_fd, $setup_resources, $setup_state| $setup_fn, - map_result: |$self, $resources, _unused_flags, $map_arg| $map_result, - $( extract: |$extract_self, $extract_resources, _unused_flags, $extract_arg| -> $extract_result $extract_map, )? - } + $( + $crate::op::new_operation!( + $(#[ $meta ])* + $vis struct $name <'fd, $( $( $resources $( : $trait )? ),+ $(; const $const_generic : $const_ty )? )? ;; D: $crate::fd::Descriptor = $crate::fd::File> (FdOperation($sys)) + impl Future -> $output, + $( impl Extract -> $extract_output, )? + ); + )+ }; - // Version that doesn't need `self` (this) or resources in `$map_result`. +} + +/// Create an [`AsyncIterator`] based on multishot [`FdOperation`]s. +macro_rules! fd_iter_operation { ( - fn $type: ident :: $method: ident -> $result: ty, - struct $name: ident < $lifetime: lifetime $(, $generic: ident $(: $trait: path )? )* $(; const $const_generic: ident : $const_ty: ty )* > { - $( - $(#[ $field_doc: meta ])* - $field: ident : $value: ty, - )* - }, - $( - drop_using: $drop_wake: tt, - )? $( - $(#[ $phantom_doc: meta ])* - impl !Unpin, - )? - setup_state: $setup_field: ident : $setup_ty: ty, - setup: |$setup_submission: ident, $setup_fd: ident, $setup_resources: tt, $setup_state: tt| $setup_fn: expr, - map_result: |$map_arg: ident| $map_result: expr, // Only difference: 1 argument. - $( extract: |$extract_self: ident, $extract_resources: tt, $extract_arg: ident| -> $extract_result: ty $extract_map: block, )? + $(#[ $meta: meta ])* + $vis: vis struct $name: ident $( < $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )?> )? ($sys: ty) -> $output: ty $( , impl Extract -> $extract_output: ty )? ; + )+ ) => { - $crate::op::op_future!{ - fn $type::$method -> $result, - struct $name<$lifetime $(, $generic $(: $trait )? )* $(; const $const_generic: $const_ty )*> { - $( - $(#[$field_doc])* - $field: $value, - )* - }, - $( - drop_using: $drop_wake, - )? - $( - $(#[ $phantom_doc ])* - impl !Unpin, - )? - setup_state: $setup_field : $setup_ty, - setup: |$setup_submission, $setup_fd, $setup_resources, $setup_state| $setup_fn, - map_result: |_unused_this, _unused_resources, _unused_flags, $map_arg| $map_result, - $( extract: |$extract_self, $extract_resources, _unused_flags, $extract_arg| -> $extract_result $extract_map, )? - } + $( + $crate::op::new_operation!( + $(#[ $meta ])* + $vis struct $name <'fd, $( $( $resources $( : $trait )? ),+ $(; const $const_generic : $const_ty )? )? ;; D: $crate::fd::Descriptor = $crate::fd::File> (FdOperation($sys)) + impl AsyncIter -> $output, + $( impl Extract -> $extract_output, )? + ); + )+ }; } -pub(crate) use op_future; +/// Helper macro for [`operation`] and [`fd_operation`], use those instead. +macro_rules! new_operation { + ( + $(#[ $meta: meta ])* + $vis: vis struct $name: ident $( < $( $lifetime: lifetime, )* $( $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )? )? $(;; $gen: ident : $gen_trait: path = $gen_default: path )? > )? ($op_type: ident ( $sys: ty ) ) + $( impl Future -> $future_output: ty , )? + $( impl AsyncIter -> $iter_output: ty , )? + $( impl Extract -> $extract_output: ty , )? + ) => { + // NOTE: the weird meta ordering is required here. + $( + $crate::op::new_operation!(ignore $future_output); + #[doc = "\n\n[`Future`]: std::future::Future"] + #[must_use = "`Future`s do nothing unless polled"] + )? + $( + $crate::op::new_operation!(ignore $iter_output); + #[doc = "\n\n[`AsyncIterator`]: std::async_iter::AsyncIterator"] + #[must_use = "`AsyncIterator`s do nothing unless polled"] + )? + $(#[ $meta ])* + $vis struct $name<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait = $gen_default )? )?>($crate::op::$op_type<$( $( $lifetime, )* )? $sys $( $(, $gen )? )? >); -/// State of an [`op_future!`] [`Future`] or [`op_async_iter!`] -/// [`AsyncIterator`]. -/// -/// [`Future`]: std::future::Future -/// [`AsyncIterator`]: std::async_iter::AsyncIterator -#[derive(Debug)] -pub(crate) enum OpState { - /// The operation has not started yet. - NotStarted(S), - /// Operation is running, waiting for the (next) result. - Running(OpIndex), - /// Operation is done. - Done, -} + $crate::op::new_operation!(Future for $name $( <$( $lifetime, )* $( $( $resources $( : $trait )? ),+ $(; const $const_generic: $const_ty )? )? $(;; $gen : $gen_trait = $gen_default )? > )? -> $( $future_output )?); + $crate::op::new_operation!(AsyncIter for $name $( <$( $lifetime, )* $( $( $resources $( : $trait )? ),+ $(; const $const_generic: $const_ty )? )? $(;; $gen : $gen_trait = $gen_default )? > )? -> $( $iter_output )?); + $crate::op::new_operation!(Extract for $name $( <$( $lifetime, )* $( $( $resources $( : $trait )? ),+ $(; const $const_generic: $const_ty )? )? $(;; $gen : $gen_trait = $gen_default )? > )? -> $( $extract_output )?); -impl OpState { - pub(crate) fn try_cancel(&mut self, sq: &SubmissionQueue) -> CancelResult { - match self { - OpState::NotStarted(_) => CancelResult::NotStarted, - OpState::Running(op_index) => { - let result = sq.add_no_result(|submission| unsafe { - submission.cancel_op(*op_index); - submission.no_completion_event(); - }); - match result { - Ok(()) => CancelResult::Canceled, - Err(QueueFull(())) => CancelResult::QueueFull, - } + impl<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> $crate::cancel::Cancel for $name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?> { + fn try_cancel(&mut self) -> $crate::cancel::CancelResult { + self.0.try_cancel() } - OpState::Done => CancelResult::Canceled, - } - } - pub(crate) fn cancel<'a>(&mut self, sq: &'a SubmissionQueue) -> CancelOp<'a> { - let op_index = match self { - OpState::Running(op_index) => Some(*op_index), - OpState::NotStarted(_) | OpState::Done => None, - }; - CancelOp::new(sq, op_index) - } -} - -/// Poll an [`OpState`]. -macro_rules! poll_state { - // Variant used by `op_future!`. - ( - $name: ident, $self: expr, $ctx: expr, - |$setup_submission: ident, $setup_fd: ident, $setup_resources: tt, $setup_state: tt| $setup_fn: expr $(,)? - ) => { - match $self.state { - $crate::op::OpState::Running(op_index) => op_index, - $crate::op::OpState::NotStarted($setup_state) => { - let $name { - fd: $setup_fd, - resources, - .. - } = &mut $self; - // SAFETY: this will not panic as the resources are only removed - // after the state is set to `Done`. - #[allow(clippy::let_unit_value)] - let $setup_resources = resources.as_mut().take().unwrap().get_mut(); - let result = $setup_fd.sq.add(|$setup_submission| $setup_fn); - match result { - Ok(op_index) => { - $self.state = $crate::op::OpState::Running(op_index); - op_index - } - Err($crate::QueueFull(())) => { - $self.fd.sq.wait_for_submission($ctx.waker().clone()); - return std::task::Poll::Pending; - } - } + fn cancel(&mut self) -> $crate::cancel::CancelOperation { + self.0.cancel() } - $crate::op::OpState::Done => $crate::op::poll_state!(__panic $name), } - }; - // Without `$setup_resources`, but expects `$self.fd` to be `AsyncFd`. - ( - $name: ident, $self: expr, $ctx: expr, - |$setup_submission: ident, $setup_fd: ident, $setup_state: tt| $setup_fn: expr $(,)? - ) => { - match $self.state { - $crate::op::OpState::Running(op_index) => op_index, - $crate::op::OpState::NotStarted($setup_state) => { - let $setup_fd = $self.fd; - let result = $self.fd.sq.add(|$setup_submission| $setup_fn); - match result { - Ok(op_index) => { - $self.state = $crate::op::OpState::Running(op_index); - op_index - } - Err($crate::QueueFull(())) => { - $self.fd.sq.wait_for_submission($ctx.waker().clone()); - return std::task::Poll::Pending; - } - } + + impl<$( $( $lifetime, )* $( $( $resources: $( $trait + )? ::std::fmt::Debug, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> ::std::fmt::Debug for $name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?> { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + self.0.fmt_dbg(::std::stringify!("a10::", $name), f) } - $crate::op::OpState::Done => $crate::op::poll_state!(__panic $name), } }; - // No `AsyncFd` or `$setup_resources`. - // NOTE: this doesn't take `$self`, but `$state` and `$sq`. ( - $name: ident, $state: expr, $sq: expr, $ctx: expr, - |$setup_submission: ident, $setup_state: tt| $setup_fn: expr $(,)? + Future for $name: ident $( < $( $lifetime: lifetime, )* $( $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )? )? $(;; $gen: ident : $gen_trait: path = $gen_default: path)? > )? -> $output: ty ) => { - match $state { - $crate::op::OpState::Running(op_index) => op_index, - $crate::op::OpState::NotStarted($setup_state) => { - let result = $sq.add(|$setup_submission| $setup_fn); - match result { - Ok(op_index) => { - $state = $crate::op::OpState::Running(op_index); - op_index - } - Err($crate::QueueFull(())) => { - $sq.wait_for_submission($ctx.waker().clone()); - return std::task::Poll::Pending; - } - } + impl<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> ::std::future::Future for $name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?> { + type Output = $output; + + fn poll(self: ::std::pin::Pin<&mut Self>, ctx: &mut ::std::task::Context<'_>) -> ::std::task::Poll { + // SAFETY: not moving `self.0` (`s.0`), directly called `poll` on it. + unsafe { ::std::pin::Pin::map_unchecked_mut(self, |s| &mut s.0) }.poll(ctx) } - $crate::op::OpState::Done => $crate::op::poll_state!(__panic $name), } }; - (__panic $name: ident) => { - unreachable!(concat!("a10::", stringify!($name), " polled after completion")) - } -} - -pub(crate) use poll_state; - -/// Macro to create an operation [`AsyncIterator`] structure based on multishot -/// operations. -/// -/// [`AsyncIterator`]: std::async_iter::AsyncIterator -macro_rules! op_async_iter { ( - fn $type: ident :: $method: ident -> $result: ty, - struct $name: ident < $lifetime: lifetime > { - // Additional and optional state field. The lifetime of this field - // MUST NOT connected to the operation, in other words this must be - // able to be dropped before the operation completes. - $( - $(#[ $field_doc: meta ])* - $field: ident : $value: ty, - )? - }, - setup_state: $state: ident : $setup_ty: ty, - setup: |$setup_submission: ident, $setup_self: ident, $setup_state: tt| $setup_fn: expr, - map_result: |$self: ident, $map_flags: ident, $map_arg: ident| $map_result: expr, + AsyncIter for $name: ident $( < $( $lifetime: lifetime, )* $( $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )? )? $(;; $gen: ident : $gen_trait: path = $gen_default: path)? > )? -> $output: ty ) => { - #[doc = concat!("[`AsyncIterator`](std::async_iter::AsyncIterator) behind [`", stringify!($type), "::", stringify!($method), "`].")] - #[derive(Debug)] - #[must_use = "`AsyncIterator`s do nothing unless polled"] - pub struct $name<$lifetime, D: $crate::fd::Descriptor = $crate::fd::File> { - /// File descriptor used in the operation. - fd: &$lifetime $crate::AsyncFd, - $( - $(#[ $field_doc ])* - $field: $value, - )? - /// State of the operation. - state: $crate::op::OpState<$setup_ty>, - } - - impl<$lifetime, D: $crate::fd::Descriptor> $name<$lifetime, D> { - #[doc = concat!("Create a new `", stringify!($name), "`.")] - const fn new(fd: &$lifetime $crate::AsyncFd, $($field: $value, )? $state : $setup_ty) -> $name<$lifetime, D> { - $name { - fd, - $( $field, )? - state: $crate::op::OpState::NotStarted($state), - } - } - - /// This is the same as the `AsyncIterator::poll_next` function, but - /// available on stable Rust. - pub fn poll_next(self: std::pin::Pin<&mut Self>, ctx: &mut std::task::Context<'_>) -> std::task::Poll>> { - // SAFETY: we're not moving anything out of `self. - let $self = unsafe { std::pin::Pin::into_inner_unchecked(self) }; - let op_index = match $self.state { - $crate::op::OpState::Running(op_index) => op_index, - $crate::op::OpState::NotStarted($setup_state) => { - let result = $self.fd.sq.add_multishot(|$setup_submission| { - let $setup_self = &mut *$self; - $setup_fn - }); - match result { - Ok(op_index) => { - $self.state = $crate::op::OpState::Running(op_index); - op_index - } - Err($crate::QueueFull(())) => { - $self.fd.sq.wait_for_submission(ctx.waker().clone()); - return std::task::Poll::Pending; - } - } - } - // We can reach this if we return an error, but not `None` - // yet. - $crate::op::OpState::Done => return std::task::Poll::Ready(std::option::Option::None), - }; - - match $self.fd.sq.poll_multishot_op(ctx, op_index) { - std::task::Poll::Ready(std::option::Option::Some(std::result::Result::Ok(($map_flags, $map_arg)))) => { - std::task::Poll::Ready(std::option::Option::Some(std::result::Result::Ok($map_result))) - }, - std::task::Poll::Ready(std::option::Option::Some(std::result::Result::Err(err))) => { - // After an error we also don't expect any more results. - $self.state = $crate::op::OpState::Done; - if let Some(libc::ECANCELED) = err.raw_os_error() { - // Operation was canceled, so we expect no more - // results. - std::task::Poll::Ready(std::option::Option::None) - } else { - std::task::Poll::Ready(std::option::Option::Some(std::result::Result::Err(err))) - } - }, - std::task::Poll::Ready(std::option::Option::None) => { - $self.state = $crate::op::OpState::Done; - std::task::Poll::Ready(std::option::Option::None) - }, - std::task::Poll::Pending => std::task::Poll::Pending, - } - } - } - - impl<$lifetime, D: $crate::fd::Descriptor> $crate::cancel::Cancel for $name<$lifetime, D> { - fn try_cancel(&mut self) -> $crate::cancel::CancelResult { - self.state.try_cancel(&self.fd.sq) - } - - fn cancel(&mut self) -> $crate::cancel::CancelOp { - self.state.cancel(&self.fd.sq) + impl<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> $name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?> { + /// This is the same as the [`AsyncIterator::poll_next`] function, but + /// then available on stable Rust. + /// + /// [`AsyncIterator::poll_next`]: std::async_iter::AsyncIterator::poll_next + pub fn poll_next(self: ::std::pin::Pin<&mut Self>, ctx: &mut ::std::task::Context<'_>) -> ::std::task::Poll> { + // SAFETY: not moving `self.0` (`s.0`), directly called `poll_next` on it. + unsafe { ::std::pin::Pin::map_unchecked_mut(self, |s| &mut s.0) }.poll_next(ctx) } } #[cfg(feature = "nightly")] - impl<$lifetime, D: $crate::fd::Descriptor> std::async_iter::AsyncIterator for $name<$lifetime, D> { - type Item = std::io::Result<$result>; + impl<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> ::std::async_iter::AsyncIterator for $name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?> { + type Item = $output; - fn poll_next(self: std::pin::Pin<&mut Self>, ctx: &mut std::task::Context<'_>) -> std::task::Poll> { + fn poll_next(self: ::std::pin::Pin<&mut Self>, ctx: &mut ::std::task::Context<'_>) -> ::std::task::Poll> { self.poll_next(ctx) } } + }; + ( + Extract for $name: ident $( < $( $lifetime: lifetime, )* $( $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )? )? $(;; $gen: ident : $gen_trait: path = $gen_default: path)? > )? -> $output: ty + ) => { + impl<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> $crate::extract::Extract for $name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?> {} - impl<$lifetime, D: $crate::fd::Descriptor> std::ops::Drop for $name<$lifetime, D> { - fn drop(&mut self) { - if let $crate::op::OpState::Running(op_index) = self.state { - let result = self.fd.sq.cancel_op(op_index, || (), |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }); - if let std::result::Result::Err(err) = result { - log::error!(concat!("dropped a10::", stringify!($name), " before canceling it, attempt to cancel failed: {}"), err); - } - } + impl<$( $( $lifetime, )* $( $( $resources $( : $trait )?, )+ $(const $const_generic: $const_ty, )? )? $( $gen : $gen_trait )? )?> ::std::future::Future for $crate::extract::Extractor<$name<$( $( $lifetime, )* $( $( $resources, )+ $( $const_generic, )? )? $( $gen )? )?>> { + type Output = $output; + + fn poll(self: ::std::pin::Pin<&mut Self>, ctx: &mut ::std::task::Context<'_>) -> ::std::task::Poll { + // SAFETY: not moving `self.0` (`s.0`), directly called `poll_extract` on it. + unsafe { ::std::pin::Pin::map_unchecked_mut(self, |s| &mut s.fut.0) }.poll_extract(ctx) } } }; + ( + $trait_name: ident for $name: ident $( < $( $lifetime: lifetime, )* $( $( $resources: ident $( : $trait: path )? ),+ $(; const $const_generic: ident : $const_ty: ty )? )? $(;; $gen: ident : $gen_trait: path = $gen_default: path)? > )? -> + ) => { + // No `$trait_name` implementation. + }; + (ignore $( $tt: tt )*) => { + // Ignore. + }; } -pub(crate) use op_async_iter; - -#[test] -fn size_assertion() { - assert_eq!(std::mem::size_of::(), 8); - // On nightly this is 24 bytes, on stable 32. - assert!(std::mem::size_of::() <= 32); - assert_eq!(std::mem::size_of::>(), 16); - // On nightly the following two are 48 bytes, on stable 56. - assert!(std::mem::size_of::() <= 56); - assert!(std::mem::size_of::>() <= 56); - assert_eq!(std::mem::size_of::>(), 16); - assert_eq!(std::mem::size_of::>(), 16); - assert_eq!(std::mem::size_of::>(), 16); - assert_eq!(std::mem::size_of::>(), 16); - assert_eq!(std::mem::size_of::>(), 16); -} +pub(crate) use {fd_iter_operation, fd_operation, iter_operation, new_operation, operation}; diff --git a/src/poll.rs b/src/poll.rs index ecf2a2a2..4fd726a1 100644 --- a/src/poll.rs +++ b/src/poll.rs @@ -9,107 +9,36 @@ //! descriptors as it doesn't make much sense to poll a direct descriptor, //! instead start the I/O operation you want to perform. //! +//! [`Future`]: std::future::Future //! [`AsyncIterator`]: std::async_iter::AsyncIterator -use std::future::Future; -use std::os::fd::{AsRawFd, BorrowedFd, RawFd}; -use std::pin::Pin; -use std::task::{self, Poll}; +use std::os::fd::{AsRawFd, BorrowedFd}; use std::{fmt, io}; -use crate::cancel::{Cancel, CancelOp, CancelResult}; -use crate::op::{poll_state, OpState}; -use crate::{man_link, QueueFull, SubmissionQueue}; +use crate::op::{iter_operation, operation, Operation}; +use crate::{man_link, sys, SubmissionQueue}; /// Wait for an event specified in `mask` on the file descriptor `fd`. /// /// Ths is similar to calling `poll(2)` on the file descriptor. +/// +/// # Notes +/// +/// In general it's more efficient to perform the I/O operation you want to +/// perform instead of polling for it to be ready. #[doc = man_link!(poll(2))] #[doc(alias = "poll")] #[doc(alias = "epoll")] #[doc(alias = "select")] #[allow(clippy::module_name_repetitions)] -pub fn oneshot_poll<'sq>( - sq: &'sq SubmissionQueue, - fd: BorrowedFd, - mask: libc::c_int, -) -> OneshotPoll<'sq> { - OneshotPoll { - sq, - state: OpState::NotStarted((fd.as_raw_fd(), mask)), - } -} - -/// [`Future`] behind [`oneshot_poll`]. -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -#[allow(clippy::module_name_repetitions)] -pub struct OneshotPoll<'sq> { - sq: &'sq SubmissionQueue, - state: OpState<(RawFd, libc::c_int)>, +pub fn oneshot_poll(sq: SubmissionQueue, fd: BorrowedFd, mask: libc::c_int) -> OneshotPoll { + OneshotPoll(Operation::new(sq, (), (fd.as_raw_fd(), mask))) } -impl<'sq> Future for OneshotPoll<'sq> { - type Output = io::Result; - - #[allow(clippy::cast_sign_loss)] - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let op_index = poll_state!( - OneshotPoll, - self.state, - self.sq, - ctx, - |submission, (fd, mask)| unsafe { - submission.poll(fd, mask as u32); - submission.set_async(); - } - ); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, events)) => Poll::Ready(Ok(PollEvent { events })), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } -} - -impl<'sq> Cancel for OneshotPoll<'sq> { - fn try_cancel(&mut self) -> CancelResult { - self.state.try_cancel(self.sq) - } - - fn cancel(&mut self) -> CancelOp { - self.state.cancel(self.sq) - } -} - -impl<'sq> Drop for OneshotPoll<'sq> { - fn drop(&mut self) { - if let OpState::Running(op_index) = self.state { - let result = self.sq.cancel_op( - op_index, - || (), - |submission| unsafe { - submission.remove_poll(op_index); - submission.set_async(); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!( - "dropped a10::OneshotPoll before completion, attempt to cancel failed: {err}" - ); - } - } - } -} +operation!( + /// [`Future`] behind [`oneshot_poll`]. + pub struct OneshotPoll(sys::poll::OneshotPollOp) -> io::Result; +); /// Returns an [`AsyncIterator`] that returns multiple events as specified /// in `mask` on the file descriptor `fd`. @@ -119,176 +48,72 @@ impl<'sq> Drop for OneshotPoll<'sq> { /// side, making this more efficient. /// /// [`AsyncIterator`]: std::async_iter::AsyncIterator +/// +/// # Notes +/// +/// In general it's more efficient to perform the I/O operation you want to +/// perform instead of polling for it to be ready. #[doc = man_link!(poll(2))] #[doc(alias = "poll")] #[doc(alias = "epoll")] #[doc(alias = "select")] #[allow(clippy::module_name_repetitions)] -pub fn multishot_poll<'sq>( - sq: &'sq SubmissionQueue, - fd: BorrowedFd, - mask: libc::c_int, -) -> MultishotPoll<'sq> { - MultishotPoll { - sq, - state: OpState::NotStarted((fd.as_raw_fd(), mask)), - } -} - -/// [`AsyncIterator`] behind [`multishot_poll`]. -/// -/// [`AsyncIterator`]: std::async_iter::AsyncIterator -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -#[allow(clippy::module_name_repetitions)] -pub struct MultishotPoll<'sq> { - sq: &'sq SubmissionQueue, - state: OpState<(RawFd, libc::c_int)>, +pub fn multishot_poll(sq: SubmissionQueue, fd: BorrowedFd, mask: libc::c_int) -> MultishotPoll { + MultishotPoll(Operation::new(sq, (), (fd.as_raw_fd(), mask))) } -impl<'sq> MultishotPoll<'sq> { - /// This is the same as the `AsyncIterator::poll_next` function, but then - /// available on stable Rust. - #[allow(clippy::cast_sign_loss)] - pub fn poll_next( - mut self: Pin<&mut Self>, - ctx: &mut task::Context<'_>, - ) -> Poll>> { - // NOTE: doesn't use `poll_state!` because it uses multishot operation. - let op_index = match self.state { - OpState::Running(op_index) => op_index, - OpState::NotStarted((fd, mask)) => { - let result = self.sq.add_multishot(|submission| unsafe { - submission.multishot_poll(fd, mask as u32); - }); - match result { - Ok(op_index) => { - self.state = OpState::Running(op_index); - op_index - } - Err(QueueFull(())) => { - self.sq.wait_for_submission(ctx.waker().clone()); - return Poll::Pending; - } - } - } - OpState::Done => return Poll::Ready(None), - }; - - match self.sq.poll_multishot_op(ctx, op_index) { - Poll::Ready(Some(Result::Ok((_, events)))) => { - Poll::Ready(Some(Result::Ok(PollEvent { events }))) - } - Poll::Ready(Some(Result::Err(err))) => { - // After an error we also don't expect any more results. - self.state = OpState::Done; - if let Some(libc::ECANCELED) = err.raw_os_error() { - // Operation was canceled, so we expect no more - // results. - Poll::Ready(None) - } else { - Poll::Ready(Some(Result::Err(err))) - } - } - Poll::Ready(None) => { - self.state = OpState::Done; - Poll::Ready(None) - } - Poll::Pending => Poll::Pending, - } - } -} - -#[cfg(feature = "nightly")] -impl<'sq> std::async_iter::AsyncIterator for MultishotPoll<'sq> { - type Item = io::Result; - - fn poll_next(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { - self.poll_next(ctx) - } -} - -impl<'sq> Cancel for MultishotPoll<'sq> { - fn try_cancel(&mut self) -> CancelResult { - self.state.try_cancel(self.sq) - } - - fn cancel(&mut self) -> CancelOp { - self.state.cancel(self.sq) - } -} - -impl<'sq> Drop for MultishotPoll<'sq> { - fn drop(&mut self) { - if let OpState::Running(op_index) = self.state { - let result = self.sq.cancel_op( - op_index, - || (), - |submission| unsafe { - submission.remove_poll(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!( - "dropped a10::MultishotPoll before canceling it, attempt to cancel failed: {err}" - ); - } - } - } -} +iter_operation!( + /// [`AsyncIterator`] behind [`multishot_poll`]. + pub struct MultishotPoll(sys::poll::MultishotPollOp) -> io::Result; +); /// Event returned by [`OneshotPoll`]. #[derive(Copy, Clone)] #[allow(clippy::module_name_repetitions)] -pub struct PollEvent { - events: libc::c_int, -} +pub struct PollEvent(pub(crate) libc::c_int); impl PollEvent { /// There is data to read. #[doc(alias = "POLLIN")] pub const fn is_readable(&self) -> bool { - (self.events & libc::POLLIN as libc::c_int) != 0 + (self.0 & libc::POLLIN as libc::c_int) != 0 } /// There is some exceptional condition on the file descriptor. #[doc(alias = "POLLPRI")] pub const fn is_priority(&self) -> bool { - (self.events & libc::POLLPRI as libc::c_int) != 0 + (self.0 & libc::POLLPRI as libc::c_int) != 0 } /// Writing is now possible. #[doc(alias = "POLLOUT")] pub const fn is_writable(&self) -> bool { - (self.events & libc::POLLOUT as libc::c_int) != 0 + (self.0 & libc::POLLOUT as libc::c_int) != 0 } /// Stream socket peer closed connection, or shut down writing half of /// connection. #[doc(alias = "POLLRDHUP")] pub const fn is_read_hup(&self) -> bool { - (self.events & libc::POLLRDHUP as libc::c_int) != 0 + (self.0 & libc::POLLRDHUP as libc::c_int) != 0 } /// Error condition. #[doc(alias = "POLLERR")] pub const fn is_error(&self) -> bool { - (self.events & libc::POLLERR as libc::c_int) != 0 + (self.0 & libc::POLLERR as libc::c_int) != 0 } /// Hang up. #[doc(alias = "POLLHUP")] pub const fn is_hup(&self) -> bool { - (self.events & libc::POLLHUP as libc::c_int) != 0 + (self.0 & libc::POLLHUP as libc::c_int) != 0 } /// Returns a bitmask indicating which events occured, see the `poll(2)` /// system call manual and `libc::POLL*` constants, e.g. `libc::POLLIN`. pub const fn events_mask(&self) -> libc::c_int { - self.events + self.0 } } @@ -309,9 +134,9 @@ const KNOWN_EVENTS: [(libc::c_short, &str); 11] = [ impl fmt::Debug for PollEvent { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let events = KNOWN_EVENTS.into_iter().filter_map(|(event, name)| { - (self.events & libc::c_int::from(event) != 0).then_some(name) - }); + let events = KNOWN_EVENTS + .into_iter() + .filter_map(|(event, name)| (self.0 & libc::c_int::from(event) != 0).then_some(name)); f.debug_list().entries(events).finish() } } diff --git a/src/process.rs b/src/process.rs index 5cfbc5b0..23122c8f 100644 --- a/src/process.rs +++ b/src/process.rs @@ -3,22 +3,15 @@ //! In this module process signal handling is also supported. For that See the //! documentation of [`Signals`]. -use std::cell::UnsafeCell; -use std::future::Future; -use std::mem::{self, size_of, ManuallyDrop, MaybeUninit}; -use std::os::fd::RawFd; +use std::mem::{self, ManuallyDrop, MaybeUninit}; use std::pin::Pin; use std::process::Child; use std::task::{self, Poll}; use std::{fmt, io, ptr}; -use log::{error, trace}; - -use crate::cancel::{Cancel, CancelOp, CancelResult}; use crate::fd::{AsyncFd, Descriptor, Direct, File}; -use crate::libc::{self, syscall}; -use crate::op::{op_future, poll_state, OpState, NO_OFFSET}; -use crate::{man_link, QueueFull, SubmissionQueue}; +use crate::op::{self, fd_operation, operation, FdIter, FdOp, FdOperation, Operation}; +use crate::{man_link, sys, syscall, SubmissionQueue}; /// Wait on the child `process`. /// @@ -34,11 +27,9 @@ pub fn wait_on(sq: SubmissionQueue, process: &Child, options: libc::c_int) -> Wa #[doc = man_link!(waitid(2))] #[doc(alias = "waitid")] pub fn wait(sq: SubmissionQueue, wait: WaitOn, options: libc::c_int) -> WaitId { - WaitId { - sq, - state: OpState::NotStarted((wait, options)), - info: Some(Box::new(UnsafeCell::new(MaybeUninit::uninit()))), - } + // SAFETY: fully zeroed `libc::signalfd_siginfo` is a valid value. + let info = unsafe { Box::new(mem::zeroed()) }; + WaitId(Operation::new(sq, info, (wait, options))) } /// Defines on what process (or processes) to wait. @@ -57,93 +48,10 @@ pub enum WaitOn { All, } -/// [`Future`] behind [`wait`]. -#[derive(Debug)] -#[must_use = "`Future`s do nothing unless polled"] -pub struct WaitId { - sq: SubmissionQueue, - /// Buffer to write into, needs to stay in memory so the kernel can - /// access it safely. - info: Option>>>, - state: OpState<(WaitOn, libc::c_int)>, -} - -// SAFETY: `!Sync` due to `UnsafeCell`, but it's actually `Sync`. -unsafe impl Sync for WaitId {} -unsafe impl Send for WaitId {} - -impl Future for WaitId { - type Output = io::Result>; - - fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let op_index = poll_state!( - WaitId, - self.state, - self.sq, - ctx, - |submission, (wait, options)| unsafe { - let (id_type, pid) = match wait { - WaitOn::Process(pid) => (libc::P_PID, pid), - WaitOn::Group(pid) => (libc::P_PGID, pid), - WaitOn::All => (libc::P_ALL, 0), // NOTE: id is ignored. - }; - let info = self.info.as_ref().unwrap().get().cast(); - submission.waitid(pid, id_type, options, info); - } - ); - - match self.sq.poll_op(ctx, op_index) { - Poll::Ready(result) => { - self.state = OpState::Done; - match result { - Ok((_, _)) => Poll::Ready(Ok(unsafe { - Box::from_raw(Box::into_raw(self.info.take().unwrap()).cast()) - })), - Err(err) => Poll::Ready(Err(err)), - } - } - Poll::Pending => Poll::Pending, - } - } -} - -impl Cancel for WaitId { - fn try_cancel(&mut self) -> CancelResult { - self.state.try_cancel(&self.sq) - } - - fn cancel(&mut self) -> CancelOp { - self.state.cancel(&self.sq) - } -} - -impl Drop for WaitId { - fn drop(&mut self) { - if let Some(info) = self.info.take() { - match self.state { - OpState::Running(op_index) => { - // Only drop the signal `info` field once we know the - // operation has finished, otherwise the kernel might write - // into memory we have deallocated. - let result = self.sq.cancel_op( - op_index, - || info, - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!("dropped a10::WaitId before canceling it, attempt to cancel failed: {err}"); - } - } - OpState::NotStarted((_, _)) | OpState::Done => drop(info), - } - } - } -} +operation!( + /// [`Future`] behind [`wait_on`] and [`wait`]. + pub struct WaitId(sys::process::WaitIdOp) -> io::Result>; +); /// Notification of process signals. /// @@ -201,16 +109,12 @@ pub struct Signals { signals: SignalSet, } -/// Wrapper around [`libc::sigset_t`] to implement [`fmt::Debug`]. -#[repr(transparent)] -struct SignalSet(libc::sigset_t); - impl Signals { /// Create a new signal notifier from a signal set. pub fn from_set(sq: SubmissionQueue, signals: libc::sigset_t) -> io::Result { let signals = SignalSet(signals); - trace!(signals:? = signals; "setting up signal handling"); - let fd = libc::syscall!(signalfd(-1, &signals.0, libc::SFD_CLOEXEC))?; + log::trace!(signals:? = signals; "setting up signal handling"); + let fd = syscall!(signalfd(-1, &signals.0, libc::SFD_CLOEXEC))?; // SAFETY: `signalfd(2)` ensures that `fd` is valid. let fd = unsafe { AsyncFd::from_raw_fd(fd, sq) }; // Block all `signals` as we're going to read them from the signalfd. @@ -241,21 +145,32 @@ impl Signals { /// /// See [`AsyncFd::to_direct_descriptor`]. pub fn to_direct_descriptor(self) -> ToSignalsDirect { + let sq = self.fd.sq().clone(); let fd = self.fd.fd(); - ToSignalsDirect { - signals: ManuallyDrop::new(self), - direct_fd: ManuallyDrop::new(Box::new(UnsafeCell::new(fd))), - state: OpState::NotStarted(()), - } + ToSignalsDirect(Operation::new(sq, (self, Box::new(fd)), ())) + } + + /// Change the file descriptor on the `Signals`. + /// + /// # Safety + /// + /// Caller must ensure `fd` is a signalfd valid descriptor. + pub(crate) unsafe fn change_fd(self, fd: AsyncFd) -> Signals { + let Signals { fd: _, signals: _ } = &self; + // SAFETY: reading or dropping all fields of `Signals`. + let mut signals = ManuallyDrop::new(self); + unsafe { ptr::drop_in_place(&mut signals.fd) } + let signals = unsafe { ptr::read(&signals.signals) }; + Signals { fd, signals } } } impl Signals { /// Receive a signal. pub fn receive<'fd>(&'fd self) -> ReceiveSignal<'fd, D> { - // TODO: replace with `Box::new_uninit` once `new_uninit` is stable. - let info = Box::new(MaybeUninit::uninit()); - ReceiveSignal::new(&self.fd, info, ()) + // SAFETY: fully zeroed `libc::signalfd_siginfo` is a valid value. + let info = unsafe { Box::new(mem::zeroed()) }; + ReceiveSignal(FdOperation::new(&self.fd, info, ())) } /// Receive multiple signals. @@ -264,12 +179,11 @@ impl Signals { /// (the future behind `Signals::receive`). This is useful if you don't want /// to deal with the `'fd` lifetime. pub fn receive_signals(self) -> ReceiveSignals { + // SAFETY: fully zeroed `libc::signalfd_siginfo` is a valid value. + let resources = unsafe { Box::new(mem::zeroed()) }; ReceiveSignals { signals: self, - // TODO: replace with `Box::new_zeroed` once stable. - // SAFETY: all zero is valid for `signalfd_siginfo`. - info: ManuallyDrop::new(Box::new(unsafe { mem::zeroed() })), - state: OpState::NotStarted(()), + state: op::State::new(resources, ()), } } } @@ -283,6 +197,106 @@ impl fmt::Debug for Signals { } } +impl Drop for Signals { + fn drop(&mut self) { + // Reverse the blocking of signals. + if let Err(err) = sigprocmask(libc::SIG_UNBLOCK, &self.signals.0) { + log::error!(signals:? = self.signals; "error unblocking signals: {err}"); + } + } +} + +operation!( + /// [`Future`] behind [`Signals::to_direct_descriptor`]. + pub struct ToSignalsDirect(sys::process::ToSignalsDirectOp) -> io::Result>; +); + +fd_operation!( + /// [`Future`] behind [`Signals::receive`]. + pub struct ReceiveSignal(sys::process::ReceiveSignalOp) -> io::Result; +); + +/// [`AsyncIterator`] behind [`Signals::receive_signals`]. +/// +/// [`AsyncIterator`]: std::async_iter::AsyncIterator +#[must_use = "`AsyncIterator`s do nothing unless polled"] +#[derive(Debug)] +pub struct ReceiveSignals { + signals: Signals, + state: op::State, ()>, +} + +impl ReceiveSignals { + /// This is the same as the [`AsyncIterator::poll_next`] function, but + /// then available on stable Rust. + /// + /// [`AsyncIterator::poll_next`]: std::async_iter::AsyncIterator::poll_next + pub fn poll_next( + self: Pin<&mut Self>, + ctx: &mut task::Context<'_>, + ) -> Poll>> { + // SAFETY: not moving `signals` or `state`. + let ReceiveSignals { signals, state } = unsafe { self.get_unchecked_mut() }; + let fd = &signals.fd; + let mut reset = None; + // NOTE: not using `poll_next` as it's not a multishot operation. + let result = state.poll( + ctx, + fd.sq(), + |resources, args, submission| { + sys::process::ReceiveSignalOp::fill_submission(fd, resources, args, submission); + D::use_flags(submission); + }, + |resources, args, state| { + sys::process::ReceiveSignalOp::check_result(fd, resources, args, state) + }, + |_, mut resources, output| { + let info = sys::process::ReceiveSignalOp::map_next(fd, &mut resources, output); + reset = Some(resources); + info + }, + ); + if let Some(resources) = reset { + *state = op::State::new(resources, ()); + } + result.map(Some) + } + + /// Returns the underlying [`Signals`]. + pub fn into_inner(self) -> Signals { + let mut this = ManuallyDrop::new(self); + let ReceiveSignals { signals, state } = &mut *this; + // SAFETY: not using `state` any more. + unsafe { + state.drop(signals.fd.sq()); + ptr::drop_in_place(state); + } + // SAFETY: we're not dropping `self`/ (due to the the `ManuallyDrop`, so + // `signals` is safe to return. + unsafe { ptr::read(signals) } + } +} + +#[cfg(feature = "nightly")] +impl std::async_iter::AsyncIterator for ReceiveSignals { + type Item = io::Result; + + fn poll_next(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll> { + self.poll_next(ctx) + } +} + +impl Drop for ReceiveSignals { + fn drop(&mut self) { + // SAFETY: we're in the `Drop` implementation. + unsafe { self.state.drop(self.signals.fd.sq()) } + } +} + +/// Wrapper around [`libc::sigset_t`] to implement [`fmt::Debug`]. +#[repr(transparent)] +struct SignalSet(libc::sigset_t); + /// Create a `sigset_t` from `signals`. fn create_sigset>(signals: I) -> io::Result { let mut set: MaybeUninit = MaybeUninit::uninit(); @@ -339,294 +353,11 @@ impl fmt::Debug for SignalSet { // SAFETY: we ensure the pointer to the signal set is valid. (unsafe { libc::sigismember(&self.0, signal) } == 1).then_some(name) }); - f.debug_list().entries(signals).finish() - } -} - -impl Drop for Signals { - fn drop(&mut self) { - // Reverse the blocking of signals. - if let Err(err) = sigprocmask(libc::SIG_UNBLOCK, &self.signals.0) { - error!(signals:? = self.signals; "error unblocking signals: {err}"); - } + f.debug_set().entries(signals).finish() } } fn sigprocmask(how: libc::c_int, set: &libc::sigset_t) -> io::Result<()> { - libc::syscall!(pthread_sigmask(how, set, ptr::null_mut()))?; + syscall!(pthread_sigmask(how, set, ptr::null_mut()))?; Ok(()) } - -/// [`Future`] behind [`Signals::to_direct_descriptor`]. -#[derive(Debug)] -pub struct ToSignalsDirect { - /// The content of `signals` will be take by the newly returned - /// `Signals` value. - signals: ManuallyDrop>, - /// The file descriptor we're changing into a direct descriptor, needs to - /// stay in memory so the kernel can access it safely. - direct_fd: ManuallyDrop>>, - state: OpState<()>, -} - -// NOTE: keep this in sync with the `fd::ToDirect` implementation. -impl Future for ToSignalsDirect { - type Output = io::Result>; - - fn poll(self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { - let this = Pin::get_mut(self); - let op_index = poll_state!( - ToSignalsDirect, - this.state, - this.signals.fd.sq, - ctx, - |submission, ()| unsafe { - submission.create_direct_descriptor(this.direct_fd.get(), 1); - } - ); - - match this.signals.fd.sq.poll_op(ctx, op_index) { - Poll::Ready(Ok((_, res))) => { - this.state = OpState::Done; - debug_assert!(res == 1); - let sq = this.signals.fd.sq.clone(); - let direct_fd = unsafe { - // SAFETY: the kernel is done modifying the descriptor - // value. - let direct_fd = ptr::read(this.direct_fd.get()); - // SAFETY: the files update operation ensures that - // `direct_fd` is valid. - AsyncFd::from_direct_fd(direct_fd, sq) - }; - let direct_signals = Signals { - fd: direct_fd, - // SAFETY: we're not dropping `signals`, thus not dropping - // `signals.signals`. - signals: unsafe { ptr::read(&this.signals.signals) }, - }; - // SAFETY: we're not dropping `signals`, thus not dropping - // `signals.fd`. But we don't want to leak the file descriptor, - // so we're manually dropping it. - unsafe { ptr::drop_in_place(&mut this.signals.fd) }; - // NOTE: we don't run the `Drop` implementation of `Signals`. - Poll::Ready(Ok(direct_signals)) - } - Poll::Ready(Err(err)) => { - this.state = OpState::Done; // Consider the error as fatal. - - // Drop the file descriptor and remove the error handling. - unsafe { ManuallyDrop::drop(&mut this.signals) } - Poll::Ready(Err(err)) - } - Poll::Pending => Poll::Pending, - } - } -} - -impl Drop for ToSignalsDirect { - fn drop(&mut self) { - match self.state { - OpState::Running(op_index) => { - // Can't drop `direct_fd` as the kernel might be writing into - // it, so we'll attempt to cancel the operation and delay the - // deallocting of `direct_fd`. - let direct_fd = unsafe { ManuallyDrop::take(&mut self.direct_fd) }; - // Similar for `signals`, we don't want to file descriptor to be - // reused and turn that into a direct descriptor. - let signals = unsafe { ManuallyDrop::take(&mut self.signals) }; - let result = self.signals.fd.sq.cancel_op( - op_index, - || Box::from((signals, direct_fd)), - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!( - "dropped a10::ToSignalsDirect before canceling it, attempt to cancel failed: {err}" - ); - } - } - OpState::NotStarted(()) => unsafe { - // Make sure we drop the `Signals` to not leak the file descriptor - // and ensure we remove our signal handling. - ManuallyDrop::drop(&mut self.signals); - ManuallyDrop::drop(&mut self.direct_fd); - }, - OpState::Done => unsafe { - // Signals was returned in the `Future` impl. - ManuallyDrop::drop(&mut self.direct_fd); - }, - } - } -} - -// ReceiveSignal. -op_future! { - fn Signals::receive -> Box, - struct ReceiveSignal<'fd> { - /// Buffer to write into, needs to stay in memory so the kernel can - /// access it safely. - info: Box>, - }, - setup_state: _unused: (), - setup: |submission, fd, (info,), _unused| unsafe { - let ptr = (**info).as_mut_ptr().cast(); - submission.read_at(fd.fd(), ptr, size_of::() as u32, NO_OFFSET); - submission.set_async(); - D::use_flags(submission); - }, - map_result: |this, (info,), n| { - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - { debug_assert_eq!(n as usize, size_of::()) }; - // TODO: replace with `Box::assume_init` once `new_uninit` is stable. - // SAFETY: the kernel initialised the info allocation for us as part of - // the read call. - Ok(unsafe { Box::from_raw(Box::into_raw(info).cast()) }) - }, -} - -/// [`AsyncIterator`] behind [`Signals::receive_signals`]. -/// -/// [`AsyncIterator`]: std::async_iter::AsyncIterator -#[must_use = "`Future`s do nothing unless polled"] -#[allow(clippy::module_name_repetitions)] -pub struct ReceiveSignals { - signals: Signals, - info: ManuallyDrop>>, - state: OpState<()>, -} - -// SAFETY: `!Sync` due to `UnsafeCell`, but it's actually `Sync`. -unsafe impl Sync for ReceiveSignals {} -unsafe impl Send for ReceiveSignals {} - -impl ReceiveSignals { - /// Poll the next signal. - pub fn poll_signal<'a>( - &'a mut self, - ctx: &mut task::Context<'_>, - ) -> Poll>> { - let ReceiveSignals { - signals, - info, - state, - } = self; - // NOTE: can't use `poll_state!` because we return `None` when the - // operation is done. - let op_index = match state { - OpState::Running(op_index) => *op_index, - OpState::NotStarted(()) => { - let result = signals.fd.sq.add(|submission| unsafe { - submission.read_at( - signals.fd.fd(), - info.get().cast(), - size_of::() as u32, - NO_OFFSET, - ); - submission.set_async(); - D::use_flags(submission); - }); - match result { - Ok(op_index) => { - *state = OpState::Running(op_index); - op_index - } - Err(QueueFull(())) => { - signals.fd.sq.wait_for_submission(ctx.waker().clone()); - return Poll::Pending; - } - } - } - OpState::Done => return Poll::Ready(None), - }; - - match signals.fd.sq.poll_op(ctx, op_index) { - Poll::Ready(Ok((_, n))) => { - *state = OpState::Done; - // Reset the state so that we start reading another signal in - // the next call. - *state = OpState::NotStarted(()); - #[allow(clippy::cast_sign_loss)] // Negative values are mapped to errors. - { - debug_assert_eq!(n as usize, size_of::()); - } - // SAFETY: the kernel initialised the info allocation for us as - // part of the read call. - Poll::Ready(Some(Ok(unsafe { &*info.get() }))) - } - Poll::Ready(Err(err)) => { - *state = OpState::Done; // Consider the error as fatal. - Poll::Ready(Some(Err(err))) - } - Poll::Pending => Poll::Pending, - } - } - - /// Returns the underlying [`Signals`]. - pub fn into_inner(self) -> Signals { - let mut this = ManuallyDrop::new(self); - unsafe { - // SAFETY: not using `self.info`. - this._drop(); - // SAFETY: we're not dropping `self`/ (due to the the - // `ManuallyDrop`, so `this.signals` is safe to return. - ptr::read(&this.signals) - } - } - - /// This [`Drop`]s the `ReceiveSignals`. - /// - /// # Safety - /// - /// This takes `self.info`, after this call returns `self.info` is - /// uninitialised and MUST not be used any more. - unsafe fn _drop(&mut self) { - let signal_info = unsafe { ManuallyDrop::take(&mut self.info) }; - match self.state { - OpState::Running(op_index) => { - // Only drop the signal `info` field once we know the operation has - // finished, otherwise the kernel might write into memory we have - // deallocated. - // SAFETY: we're in the `Drop` implementation, so `self.info` can't - // be used anymore making it safe to take ownership. - let result = self.signals.fd.sq.cancel_op( - op_index, - || signal_info, - |submission| unsafe { - submission.cancel_op(op_index); - // We'll get a canceled completion event if we succeeded, which - // is sufficient to cleanup the operation. - submission.no_completion_event(); - }, - ); - if let Err(err) = result { - log::error!( - "dropped a10::ReceiveSignals before canceling it, attempt to cancel failed: {err}" - ); - } - } - OpState::NotStarted(()) | OpState::Done => drop(signal_info), - } - } -} - -#[allow(clippy::missing_fields_in_debug)] -impl fmt::Debug for ReceiveSignals { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("ReceiveSignals") - .field("signals", &self.signals) - // NOTE: `info` can't be read as the kernel might be writing to it. - .field("state", &self.state) - .finish() - } -} - -impl Drop for ReceiveSignals { - fn drop(&mut self) { - unsafe { self._drop() } - } -} diff --git a/src/sq.rs b/src/sq.rs new file mode 100644 index 00000000..735feca4 --- /dev/null +++ b/src/sq.rs @@ -0,0 +1,399 @@ +//! Submission Queue. + +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, MutexGuard}; +use std::{fmt, io, mem, task}; + +use crate::drop_waker::{drop_task_waker, DropWake}; +use crate::{cq, Implementation, OperationId, QueuedOperation, SharedState}; + +/// Queue of completion events. +pub(crate) struct Queue { + shared: Arc>, +} + +impl Queue { + pub(crate) const fn new(shared: Arc>) -> Queue { + Queue { shared } + } + + /// Add a new submission, returns the id (index). + /// + /// If this returns `QueueFull` it will use the `waker` to wait for a + /// submission. + pub(crate) fn submit(&self, fill: F, waker: task::Waker) -> Result + where + F: FnOnce(&mut ::Submission), + { + self.submit2( + <<::Event as cq::Event>::State as cq::OperationState>::new, + fill, + waker, + ) + } + + /// Add a new multishot submission, returns the id (index). + /// + /// If this returns `QueueFull` it will use the `waker` to wait for a + /// submission. + pub(crate) fn submit_multishot( + &self, + fill: F, + waker: task::Waker, + ) -> Result + where + F: FnOnce(&mut ::Submission), + { + self.submit2( + <<::Event as cq::Event>::State as cq::OperationState>::new_multishot, + fill, + waker, + ) + } + + /// Add a new submission, returns the id (index). + /// + /// If this returns `QueueFull` it will use the `waker` to wait for a + /// submission. + fn submit2( + &self, + new_state: S, + fill: F, + waker: task::Waker, + ) -> Result + where + F: FnOnce(&mut ::Submission), + S: FnOnce() -> <::Event as cq::Event>::State, + { + let op_id = self.queue2(new_state, waker)?; + + // SAFETY: we just got the `op_id` above so we own it. Furthermore we + // don't use it in case an error is returned. + unsafe { self.submit_with_id(op_id, fill)? }; + + Ok(op_id) + } + + /// Queue a new multishot operation, without submitting an operation, + /// returns the operation id. + pub(crate) fn queue_multishot(&self) -> Result { + self.queue2( + <<::Event as cq::Event>::State as cq::OperationState>::new_multishot, + task::Waker::noop().clone(), + ) + } + + fn queue2(&self, new_state: S, waker: task::Waker) -> Result + where + S: FnOnce() -> <::Event as cq::Event>::State, + { + // Get an `OperationId` to the queued operation list. + let shared = &*self.shared; + let Some(op_id) = shared.op_ids.next_available() else { + self.wait_for_submission(waker); + return Err(QueueFull); + }; + + let queued_op = QueuedOperation::new(new_state(), waker); + // SAFETY: the `AtomicBitMap` always returns valid indices for + // `op_queue` (it's the whole point of it). + { + let mut op = shared.queued_ops[op_id].lock().unwrap(); + debug_assert!(op.is_none()); + *op = Some(queued_op); + } + + Ok(op_id) + } + + /// Re-adds a submission, reusing `op_id`. + /// + /// # Safety + /// + /// The caller must ensure that `op_id` is valid and owned by them. + /// + /// If this returns `QueueFull` `op_id` becomes invalid. + pub(crate) unsafe fn resubmit(&self, op_id: OperationId, fill: F) -> Result<(), QueueFull> + where + F: FnOnce(&mut ::Submission), + { + self.submit_with_id(op_id, fill) + } + + /// Add a new submission using an existing operation `id`. + /// + /// # Safety + /// + /// The caller must ensure that `op_id` is valid and owned by them. + /// + /// If this returns `QueueFull`it will use `op_id` to remove the queued + /// operation, invalidating `op_id`, and use it's waker to wait for a + /// submission slot. + unsafe fn submit_with_id(&self, op_id: OperationId, fill: F) -> Result<(), QueueFull> + where + F: FnOnce(&mut ::Submission), + { + let shared = &*self.shared; + let result = shared + .submissions + .add(&shared.data, &shared.is_polling, |submission| { + fill(submission); + submission.set_id(op_id); + }); + match result { + Ok(()) => Ok(()), + Err(QueueFull) => { + // Release operation slot. + // SAFETY: `unwrap`s are safe as the caller must ensure it's + // valid. + let queued_op = { shared.queued_ops[op_id].lock().unwrap().take().unwrap() }; + shared.op_ids.make_available(op_id); + self.wait_for_submission(queued_op.waker); + Err(QueueFull) + } + } + } + + /// Cancel an operation with `op_id`. + /// + /// # Safety + /// + /// After this function is called the queued operation with `op_id` may no + /// longer be accessed. + pub(crate) unsafe fn cancel(&self, op_id: OperationId, resources: T) { + let shared = &*self.shared; + let mut queued_op_slot = { shared.queued_ops[op_id].lock().unwrap() }; + let queued_op = { queued_op_slot.as_mut().unwrap() }; + + if queued_op.done { + // Easy path, the operation has already been completed. + *queued_op_slot = None; + // Unlock defore dropping `resources`, which might take a while. + drop(queued_op_slot); + shared.op_ids.make_available(op_id); + + // We can safely drop the resources. + drop(resources); + return; + } + + // Harder path, the operation is not done, but the Future holding the + // resource is about to be dropped, so we need to cancel the operation. + match shared + .submissions + .cancel(&shared.data, &shared.is_polling, op_id) + { + Cancelled::Immediate => { + // Operation has been cancelled, we can drop the resources and + // make the slot available. + *queued_op_slot = None; + // Unlock defore dropping `resources`, which might take a while. + drop(queued_op_slot); + shared.op_ids.make_available(op_id); + + // We can safely drop the resources. + drop(resources); + } + Cancelled::Async => { + // Hardest path, the operation is cancelled asynchronously, + // which means the kernel still has access to the resources and + // we can't drop them yet. + // + // We need to do two things: + // 1. Delay the dropping of `resources` until the kernel is done + // with the operation. + // 2. Delay the available making of the queued operation slot + // until the kernel is done with the operation. + // + // We achieve 1 by creating a special waker that just drops the + // resources (created by `drop_task_waker`). + // 2. is achieved by `cq::Queue::poll`, which makes the slot + // available if the operation is dropped and expects no more + // events. + queued_op.dropped = true; + if mem::needs_drop::() { + // SAFETY: not cloning the waker. + queued_op.waker = unsafe { drop_task_waker(resources) }; + } + } + } + } + + /// Wait for a submission slot, waking `waker` once one is available. + pub(crate) fn wait_for_submission(&self, waker: task::Waker) { + log::trace!(waker:? = waker; "adding blocked future"); + self.shared.blocked_futures.lock().unwrap().push(waker); + } + + pub(crate) fn wake(&self) { + if !self.shared.is_polling.load(Ordering::Acquire) { + // Not polling, no need to wake up. + return; + } + + if let Err(err) = self.shared.submissions.wake(&self.shared.data) { + log::error!("failed to wake a10::Ring: {err}"); + } + } + + /// Get the queued operation with `id`. + /// + /// # Safety + /// + /// The `id` must come from [`Queue::submit`] and must not be invalid, e.g. + /// by using [`Queue::resubmit`]. + #[allow(clippy::type_complexity)] + pub(crate) unsafe fn get_op( + &self, + op_id: OperationId, + ) -> MutexGuard< + Option::Event as cq::Event>::State>>, + > { + // SAFETY: we don't poison locks. + self.shared.queued_ops[op_id].lock().unwrap() + } + + /// Make operation with `id` available. + /// + /// # Safety + /// + /// The `id` must come from [`Queue::submit`] and must not be invalid, e.g. + /// by using [`Queue::resubmit`]. + /// + /// After this call `id` is invalid. + #[allow(clippy::type_complexity)] + pub(crate) unsafe fn make_op_available( + &self, + op_id: OperationId, + mut op: MutexGuard< + Option< + QueuedOperation<<::Event as cq::Event>::State>, + >, + >, + ) { + // SAFETY: we don't poison locks. + *op = None; + drop(op); + self.shared.op_ids.make_available(op_id); + } + + /// Returns the implementation specific shared data. + pub(crate) fn shared_data(&self) -> &I::Shared { + &self.shared.data + } +} + +#[cfg(any(target_os = "android", target_os = "linux"))] +impl Queue { + /// Add a new submission, without waiting for a result. + /// + /// This marks the submission to not generate a completion event (as it will + /// be discarded any way). + pub(crate) fn submit_no_completion(&self, fill: F) -> Result<(), QueueFull> + where + F: FnOnce(&mut crate::sys::Submission), + { + let shared = &*self.shared; + shared + .submissions + .add(&shared.data, &shared.is_polling, |submission| { + fill(submission); + submission.no_completion_event(); + submission.set_id(crate::NO_COMPLETION_ID); + }) + } +} + +impl Clone for Queue { + fn clone(&self) -> Self { + Queue { + shared: self.shared.clone(), + } + } + + fn clone_from(&mut self, source: &Self) { + self.shared.clone_from(&source.shared); + } +} + +impl fmt::Debug for Queue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("sq::Queue") + .field("shared", &self.shared) + .finish() + } +} + +/// Submit operations. +pub(crate) trait Submissions: fmt::Debug { + /// Data shared between the submission and completion queues. + type Shared: fmt::Debug + Sized; + + /// Type of the submission. + type Submission: Submission; + + /// Try to add a new submission. + fn add( + &self, + shared: &Self::Shared, + is_polling: &AtomicBool, + submit: F, + ) -> Result<(), QueueFull> + where + F: FnOnce(&mut Self::Submission); + + /// Try to cancel an operation. + fn cancel( + &self, + shared: &Self::Shared, + is_polling: &AtomicBool, + op_id: OperationId, + ) -> Cancelled; + + /// Wake a polling thread. + fn wake(&self, shared: &Self::Shared) -> io::Result<()>; +} + +/// Result of [cancelling] an operation. +/// +/// [cancelling]: Submissions::cancel +pub(crate) enum Cancelled { + /// Operation is cancelled synchronously, operation is already cancelled. + Immediate, + /// Operation is cancelled asynchronously, operation is still in progress. + Async, +} + +/// Submission event. +pub(crate) trait Submission: fmt::Debug { + /// Set the identifier of operation. + /// + /// This must cause the relevant [`cq::Event::id`] of the completion event + /// to return `id`. + fn set_id(&mut self, id: OperationId); +} + +/// Submission queue is full. +pub(crate) struct QueueFull; + +impl From for io::Error { + fn from(_: QueueFull) -> io::Error { + #[cfg(not(feature = "nightly"))] + let kind = io::ErrorKind::Other; + #[cfg(feature = "nightly")] + let kind = io::ErrorKind::ResourceBusy; + io::Error::new(kind, "submission queue is full") + } +} + +impl fmt::Debug for QueueFull { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("QueueFull").finish() + } +} + +impl fmt::Display for QueueFull { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("`a10::Ring` submission queue is full") + } +} diff --git a/src/sys.rs b/src/sys.rs deleted file mode 100644 index d5cd0890..00000000 --- a/src/sys.rs +++ /dev/null @@ -1,3150 +0,0 @@ -//! Code that should be moved to libc once C libraries have a wrapper. - -#![allow(warnings, clippy::all, clippy::pedantic, clippy::nursery)] - -/// Helper macro to execute a system call that returns an `io::Result`. -macro_rules! syscall { - ($fn: ident ( $($arg: expr),* $(,)? ) ) => {{ - let res = unsafe { libc::$fn($( $arg, )*) }; - if res == -1 { - Err(std::io::Error::last_os_error()) - } else { - Ok(res) - } - }}; -} - -pub use libc::*; -pub use syscall; - -pub unsafe fn io_uring_setup(entries: c_uint, p: *mut io_uring_params) -> c_int { - syscall(SYS_io_uring_setup, entries as c_long, p as c_long) as _ -} - -pub unsafe fn io_uring_register( - fd: c_int, - opcode: c_uint, - arg: *const c_void, - nr_args: c_uint, -) -> c_int { - syscall( - SYS_io_uring_register, - fd as c_long, - opcode as c_long, - arg as c_long, - nr_args as c_long, - ) as _ -} - -pub unsafe fn io_uring_enter2( - fd: c_int, - to_submit: c_uint, - min_complete: c_uint, - flags: c_uint, - arg: *const libc::c_void, - size: usize, -) -> c_int { - syscall( - SYS_io_uring_enter, - fd as c_long, - to_submit as c_long, - min_complete as c_long, - flags as c_long, - arg as c_long, - size as c_long, - ) as _ -} - -// Work around for , -// . -pub const IOSQE_FIXED_FILE: u8 = 1 << IOSQE_FIXED_FILE_BIT as u8; -pub const IOSQE_IO_DRAIN: u8 = 1 << IOSQE_IO_DRAIN_BIT as u8; -pub const IOSQE_IO_LINK: u8 = 1 << IOSQE_IO_LINK_BIT as u8; -pub const IOSQE_IO_HARDLINK: u8 = 1 << IOSQE_IO_HARDLINK_BIT as u8; -pub const IOSQE_ASYNC: u8 = 1 << IOSQE_ASYNC_BIT as u8; -pub const IOSQE_BUFFER_SELECT: u8 = 1 << IOSQE_BUFFER_SELECT_BIT as u8; -pub const IOSQE_CQE_SKIP_SUCCESS: u8 = 1 << IOSQE_CQE_SKIP_SUCCESS_BIT as u8; -pub const SOCKET_URING_OP_SIOCINQ: __u32 = 0; -pub const SOCKET_URING_OP_SIOCOUTQ: __u32 = 1; -pub const SOCKET_URING_OP_GETSOCKOPT: __u32 = 2; -pub const SOCKET_URING_OP_SETSOCKOPT: __u32 = 3; - -pub type __kernel_time64_t = ::std::os::raw::c_longlong; -pub type __u8 = ::std::os::raw::c_uchar; -pub type __u16 = ::std::os::raw::c_ushort; -pub type __s32 = ::std::os::raw::c_int; -pub type __u32 = ::std::os::raw::c_uint; -pub type __u64 = ::std::os::raw::c_ulonglong; -pub type __kernel_rwf_t = ::std::os::raw::c_int; -pub type _bindgen_ty_13 = ::std::os::raw::c_uint; -pub type io_uring_op = ::std::os::raw::c_uint; -pub type _bindgen_ty_14 = ::std::os::raw::c_uint; -pub type _bindgen_ty_15 = ::std::os::raw::c_uint; -pub type _bindgen_ty_16 = ::std::os::raw::c_uint; -pub type _bindgen_ty_19 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Default)] -pub struct __IncompleteArrayField(::std::marker::PhantomData, [T; 0]); -#[repr(C)] -#[derive(Copy, Clone)] -pub struct __kernel_timespec { - pub tv_sec: __kernel_time64_t, - pub tv_nsec: ::std::os::raw::c_longlong, -} -#[repr(C)] -pub struct io_uring_sqe { - pub opcode: __u8, - pub flags: __u8, - pub ioprio: __u16, - pub fd: __s32, - pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_1, - pub __bindgen_anon_2: io_uring_sqe__bindgen_ty_2, - pub len: __u32, - pub __bindgen_anon_3: io_uring_sqe__bindgen_ty_3, - pub user_data: __u64, - pub __bindgen_anon_4: io_uring_sqe__bindgen_ty_4, - pub personality: __u16, - pub __bindgen_anon_5: io_uring_sqe__bindgen_ty_5, - pub __bindgen_anon_6: io_uring_sqe__bindgen_ty_6, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_sqe__bindgen_ty_1__bindgen_ty_1 { - pub cmd_op: __u32, - pub __pad1: __u32, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_sqe__bindgen_ty_2__bindgen_ty_1 { - pub level: __u32, - pub optname: __u32, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_sqe__bindgen_ty_5__bindgen_ty_1 { - pub addr_len: __u16, - pub __pad3: [__u16; 1usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_sqe__bindgen_ty_6__bindgen_ty_1 { - pub addr3: __u64, - pub __pad2: [__u64; 1usize], -} -#[repr(C)] -pub struct io_uring_cqe { - pub user_data: __u64, - pub res: __s32, - pub flags: __u32, - pub big_cqe: __IncompleteArrayField<__u64>, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_sqring_offsets { - pub head: __u32, - pub tail: __u32, - pub ring_mask: __u32, - pub ring_entries: __u32, - pub flags: __u32, - pub dropped: __u32, - pub array: __u32, - pub resv1: __u32, - pub user_addr: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_cqring_offsets { - pub head: __u32, - pub tail: __u32, - pub ring_mask: __u32, - pub ring_entries: __u32, - pub overflow: __u32, - pub cqes: __u32, - pub flags: __u32, - pub resv1: __u32, - pub user_addr: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_params { - pub sq_entries: __u32, - pub cq_entries: __u32, - pub flags: __u32, - pub sq_thread_cpu: __u32, - pub sq_thread_idle: __u32, - pub features: __u32, - pub wq_fd: __u32, - pub resv: [__u32; 3usize], - pub sq_off: io_sqring_offsets, - pub cq_off: io_cqring_offsets, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_files_update { - pub offset: __u32, - pub resv: __u32, - pub fds: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_rsrc_register { - pub nr: __u32, - pub flags: __u32, - pub resv2: __u64, - pub data: __u64, - pub tags: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_rsrc_update { - pub offset: __u32, - pub resv: __u32, - pub data: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_rsrc_update2 { - pub offset: __u32, - pub resv: __u32, - pub data: __u64, - pub tags: __u64, - pub nr: __u32, - pub resv2: __u32, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_probe_op { - pub op: __u8, - pub resv: __u8, - pub flags: __u16, - pub resv2: __u32, -} -#[repr(C)] -pub struct io_uring_probe { - pub last_op: __u8, - pub ops_len: __u8, - pub resv: __u16, - pub resv2: [__u32; 3usize], - pub ops: __IncompleteArrayField, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_restriction { - pub opcode: __u16, - pub __bindgen_anon_1: io_uring_restriction__bindgen_ty_1, - pub resv: __u8, - pub resv2: [__u32; 3usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_buf { - pub addr: __u64, - pub len: __u32, - pub bid: __u16, - pub resv: __u16, -} -#[repr(C)] -pub struct io_uring_buf_ring { - pub __bindgen_anon_1: io_uring_buf_ring__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1 { - pub resv1: __u64, - pub resv2: __u32, - pub resv3: __u16, - pub tail: __u16, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_buf_reg { - pub ring_addr: __u64, - pub ring_entries: __u32, - pub bgid: __u16, - pub flags: __u16, - pub resv: [__u64; 3usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_buf_status { - pub buf_group: __u32, - pub head: __u32, - pub resv: [__u32; 8usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_napi { - pub busy_poll_to: __u32, - pub prefer_busy_poll: __u8, - pub pad: [__u8; 3usize], - pub resv: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_getevents_arg { - pub sigmask: __u64, - pub sigmask_sz: __u32, - pub pad: __u32, - pub ts: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_sync_cancel_reg { - pub addr: __u64, - pub fd: __s32, - pub flags: __u32, - pub timeout: __kernel_timespec, - pub pad: [__u64; 4usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_file_index_range { - pub off: __u32, - pub len: __u32, - pub resv: __u64, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_recvmsg_out { - pub namelen: __u32, - pub controllen: __u32, - pub payloadlen: __u32, - pub flags: __u32, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_sq { - pub khead: *mut ::std::os::raw::c_uint, - pub ktail: *mut ::std::os::raw::c_uint, - pub kring_mask: *mut ::std::os::raw::c_uint, - pub kring_entries: *mut ::std::os::raw::c_uint, - pub kflags: *mut ::std::os::raw::c_uint, - pub kdropped: *mut ::std::os::raw::c_uint, - pub array: *mut ::std::os::raw::c_uint, - pub sqes: *mut io_uring_sqe, - pub sqe_head: ::std::os::raw::c_uint, - pub sqe_tail: ::std::os::raw::c_uint, - pub ring_sz: usize, - pub ring_ptr: *mut ::std::os::raw::c_void, - pub ring_mask: ::std::os::raw::c_uint, - pub ring_entries: ::std::os::raw::c_uint, - pub pad: [::std::os::raw::c_uint; 2usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring_cq { - pub khead: *mut ::std::os::raw::c_uint, - pub ktail: *mut ::std::os::raw::c_uint, - pub kring_mask: *mut ::std::os::raw::c_uint, - pub kring_entries: *mut ::std::os::raw::c_uint, - pub kflags: *mut ::std::os::raw::c_uint, - pub koverflow: *mut ::std::os::raw::c_uint, - pub cqes: *mut io_uring_cqe, - pub ring_sz: usize, - pub ring_ptr: *mut ::std::os::raw::c_void, - pub ring_mask: ::std::os::raw::c_uint, - pub ring_entries: ::std::os::raw::c_uint, - pub pad: [::std::os::raw::c_uint; 2usize], -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct io_uring { - pub sq: io_uring_sq, - pub cq: io_uring_cq, - pub flags: ::std::os::raw::c_uint, - pub ring_fd: ::std::os::raw::c_int, - pub features: ::std::os::raw::c_uint, - pub enter_ring_fd: ::std::os::raw::c_int, - pub int_flags: __u8, - pub pad: [__u8; 3usize], - pub pad2: ::std::os::raw::c_uint, -} -pub const IORING_FILE_INDEX_ALLOC: i32 = -1; -pub const IORING_SETUP_IOPOLL: u32 = 1; -pub const IORING_SETUP_SQPOLL: u32 = 2; -pub const IORING_SETUP_SQ_AFF: u32 = 4; -pub const IORING_SETUP_CQSIZE: u32 = 8; -pub const IORING_SETUP_CLAMP: u32 = 16; -pub const IORING_SETUP_ATTACH_WQ: u32 = 32; -pub const IORING_SETUP_R_DISABLED: u32 = 64; -pub const IORING_SETUP_SUBMIT_ALL: u32 = 128; -pub const IORING_SETUP_COOP_TASKRUN: u32 = 256; -pub const IORING_SETUP_TASKRUN_FLAG: u32 = 512; -pub const IORING_SETUP_SQE128: u32 = 1024; -pub const IORING_SETUP_CQE32: u32 = 2048; -pub const IORING_SETUP_SINGLE_ISSUER: u32 = 4096; -pub const IORING_SETUP_DEFER_TASKRUN: u32 = 8192; -pub const IORING_SETUP_NO_MMAP: u32 = 16384; -pub const IORING_SETUP_REGISTERED_FD_ONLY: u32 = 32768; -pub const IORING_SETUP_NO_SQARRAY: u32 = 65536; -pub const IORING_URING_CMD_FIXED: u32 = 1; -pub const IORING_FSYNC_DATASYNC: u32 = 1; -pub const IORING_TIMEOUT_ABS: u32 = 1; -pub const IORING_TIMEOUT_UPDATE: u32 = 2; -pub const IORING_TIMEOUT_BOOTTIME: u32 = 4; -pub const IORING_TIMEOUT_REALTIME: u32 = 8; -pub const IORING_LINK_TIMEOUT_UPDATE: u32 = 16; -pub const IORING_TIMEOUT_ETIME_SUCCESS: u32 = 32; -pub const IORING_TIMEOUT_MULTISHOT: u32 = 64; -pub const IORING_TIMEOUT_CLOCK_MASK: u32 = 12; -pub const IORING_TIMEOUT_UPDATE_MASK: u32 = 18; -pub const IORING_POLL_ADD_MULTI: u32 = 1; -pub const IORING_POLL_UPDATE_EVENTS: u32 = 2; -pub const IORING_POLL_UPDATE_USER_DATA: u32 = 4; -pub const IORING_POLL_ADD_LEVEL: u32 = 8; -pub const IORING_ASYNC_CANCEL_ALL: u32 = 1; -pub const IORING_ASYNC_CANCEL_FD: u32 = 2; -pub const IORING_ASYNC_CANCEL_ANY: u32 = 4; -pub const IORING_ASYNC_CANCEL_FD_FIXED: u32 = 8; -pub const IORING_RECVSEND_POLL_FIRST: u32 = 1; -pub const IORING_RECV_MULTISHOT: u32 = 2; -pub const IORING_RECVSEND_FIXED_BUF: u32 = 4; -pub const IORING_SEND_ZC_REPORT_USAGE: u32 = 8; -pub const IORING_RECVSEND_BUNDLE: u32 = 16; -pub const IORING_NOTIF_USAGE_ZC_COPIED: u32 = 2147483648; -pub const IORING_ACCEPT_MULTISHOT: u32 = 1; -pub const IORING_ACCEPT_DONTWAIT: u32 = 2; -pub const IORING_ACCEPT_POLL_FIRST: u32 = 4; -pub const IORING_MSG_RING_CQE_SKIP: u32 = 1; -pub const IORING_MSG_RING_FLAGS_PASS: u32 = 2; -pub const IORING_FIXED_FD_NO_CLOEXEC: u32 = 1; -pub const IORING_NOP_INJECT_RESULT: u32 = 1; -pub const IORING_CQE_F_BUFFER: u32 = 1; -pub const IORING_CQE_F_MORE: u32 = 2; -pub const IORING_CQE_F_SOCK_NONEMPTY: u32 = 4; -pub const IORING_CQE_F_NOTIF: u32 = 8; -pub const IORING_OFF_SQ_RING: u32 = 0; -pub const IORING_OFF_CQ_RING: u32 = 134217728; -pub const IORING_OFF_SQES: u32 = 268435456; -pub const IORING_OFF_PBUF_RING: u32 = 2147483648; -pub const IORING_OFF_PBUF_SHIFT: u32 = 16; -pub const IORING_OFF_MMAP_MASK: u32 = 4160749568; -pub const IORING_SQ_NEED_WAKEUP: u32 = 1; -pub const IORING_SQ_CQ_OVERFLOW: u32 = 2; -pub const IORING_SQ_TASKRUN: u32 = 4; -pub const IORING_CQ_EVENTFD_DISABLED: u32 = 1; -pub const IORING_ENTER_GETEVENTS: u32 = 1; -pub const IORING_ENTER_SQ_WAKEUP: u32 = 2; -pub const IORING_ENTER_SQ_WAIT: u32 = 4; -pub const IORING_ENTER_EXT_ARG: u32 = 8; -pub const IORING_ENTER_REGISTERED_RING: u32 = 16; -pub const IORING_FEAT_SINGLE_MMAP: u32 = 1; -pub const IORING_FEAT_NODROP: u32 = 2; -pub const IORING_FEAT_SUBMIT_STABLE: u32 = 4; -pub const IORING_FEAT_RW_CUR_POS: u32 = 8; -pub const IORING_FEAT_CUR_PERSONALITY: u32 = 16; -pub const IORING_FEAT_FAST_POLL: u32 = 32; -pub const IORING_FEAT_POLL_32BITS: u32 = 64; -pub const IORING_FEAT_SQPOLL_NONFIXED: u32 = 128; -pub const IORING_FEAT_EXT_ARG: u32 = 256; -pub const IORING_FEAT_NATIVE_WORKERS: u32 = 512; -pub const IORING_FEAT_RSRC_TAGS: u32 = 1024; -pub const IORING_FEAT_CQE_SKIP: u32 = 2048; -pub const IORING_FEAT_LINKED_FILE: u32 = 4096; -pub const IORING_FEAT_REG_REG_RING: u32 = 8192; -pub const IORING_FEAT_RECVSEND_BUNDLE: u32 = 16384; -pub const IORING_RSRC_REGISTER_SPARSE: u32 = 1; -pub const IORING_REGISTER_FILES_SKIP: i32 = -2; -pub const IOSQE_FIXED_FILE_BIT: _bindgen_ty_13 = 0; -pub const IOSQE_IO_DRAIN_BIT: _bindgen_ty_13 = 1; -pub const IOSQE_IO_LINK_BIT: _bindgen_ty_13 = 2; -pub const IOSQE_IO_HARDLINK_BIT: _bindgen_ty_13 = 3; -pub const IOSQE_ASYNC_BIT: _bindgen_ty_13 = 4; -pub const IOSQE_BUFFER_SELECT_BIT: _bindgen_ty_13 = 5; -pub const IOSQE_CQE_SKIP_SUCCESS_BIT: _bindgen_ty_13 = 6; -pub const IORING_OP_NOP: io_uring_op = 0; -pub const IORING_OP_READV: io_uring_op = 1; -pub const IORING_OP_WRITEV: io_uring_op = 2; -pub const IORING_OP_FSYNC: io_uring_op = 3; -pub const IORING_OP_READ_FIXED: io_uring_op = 4; -pub const IORING_OP_WRITE_FIXED: io_uring_op = 5; -pub const IORING_OP_POLL_ADD: io_uring_op = 6; -pub const IORING_OP_POLL_REMOVE: io_uring_op = 7; -pub const IORING_OP_SYNC_FILE_RANGE: io_uring_op = 8; -pub const IORING_OP_SENDMSG: io_uring_op = 9; -pub const IORING_OP_RECVMSG: io_uring_op = 10; -pub const IORING_OP_TIMEOUT: io_uring_op = 11; -pub const IORING_OP_TIMEOUT_REMOVE: io_uring_op = 12; -pub const IORING_OP_ACCEPT: io_uring_op = 13; -pub const IORING_OP_ASYNC_CANCEL: io_uring_op = 14; -pub const IORING_OP_LINK_TIMEOUT: io_uring_op = 15; -pub const IORING_OP_CONNECT: io_uring_op = 16; -pub const IORING_OP_FALLOCATE: io_uring_op = 17; -pub const IORING_OP_OPENAT: io_uring_op = 18; -pub const IORING_OP_CLOSE: io_uring_op = 19; -pub const IORING_OP_FILES_UPDATE: io_uring_op = 20; -pub const IORING_OP_STATX: io_uring_op = 21; -pub const IORING_OP_READ: io_uring_op = 22; -pub const IORING_OP_WRITE: io_uring_op = 23; -pub const IORING_OP_FADVISE: io_uring_op = 24; -pub const IORING_OP_MADVISE: io_uring_op = 25; -pub const IORING_OP_SEND: io_uring_op = 26; -pub const IORING_OP_RECV: io_uring_op = 27; -pub const IORING_OP_OPENAT2: io_uring_op = 28; -pub const IORING_OP_EPOLL_CTL: io_uring_op = 29; -pub const IORING_OP_SPLICE: io_uring_op = 30; -pub const IORING_OP_PROVIDE_BUFFERS: io_uring_op = 31; -pub const IORING_OP_REMOVE_BUFFERS: io_uring_op = 32; -pub const IORING_OP_TEE: io_uring_op = 33; -pub const IORING_OP_SHUTDOWN: io_uring_op = 34; -pub const IORING_OP_RENAMEAT: io_uring_op = 35; -pub const IORING_OP_UNLINKAT: io_uring_op = 36; -pub const IORING_OP_MKDIRAT: io_uring_op = 37; -pub const IORING_OP_SYMLINKAT: io_uring_op = 38; -pub const IORING_OP_LINKAT: io_uring_op = 39; -pub const IORING_OP_MSG_RING: io_uring_op = 40; -pub const IORING_OP_FSETXATTR: io_uring_op = 41; -pub const IORING_OP_SETXATTR: io_uring_op = 42; -pub const IORING_OP_FGETXATTR: io_uring_op = 43; -pub const IORING_OP_GETXATTR: io_uring_op = 44; -pub const IORING_OP_SOCKET: io_uring_op = 45; -pub const IORING_OP_URING_CMD: io_uring_op = 46; -pub const IORING_OP_SEND_ZC: io_uring_op = 47; -pub const IORING_OP_SENDMSG_ZC: io_uring_op = 48; -pub const IORING_OP_READ_MULTISHOT: io_uring_op = 49; -pub const IORING_OP_WAITID: io_uring_op = 50; -pub const IORING_OP_FUTEX_WAIT: io_uring_op = 51; -pub const IORING_OP_FUTEX_WAKE: io_uring_op = 52; -pub const IORING_OP_FUTEX_WAITV: io_uring_op = 53; -pub const IORING_OP_FIXED_FD_INSTALL: io_uring_op = 54; -pub const IORING_OP_FTRUNCATE: io_uring_op = 55; -pub const IORING_OP_LAST: io_uring_op = 56; -pub const IORING_MSG_DATA: _bindgen_ty_14 = 0; -pub const IORING_MSG_SEND_FD: _bindgen_ty_14 = 1; -pub const IORING_CQE_BUFFER_SHIFT: _bindgen_ty_15 = 16; -pub const IORING_REGISTER_BUFFERS: _bindgen_ty_16 = 0; -pub const IORING_UNREGISTER_BUFFERS: _bindgen_ty_16 = 1; -pub const IORING_REGISTER_FILES: _bindgen_ty_16 = 2; -pub const IORING_UNREGISTER_FILES: _bindgen_ty_16 = 3; -pub const IORING_REGISTER_EVENTFD: _bindgen_ty_16 = 4; -pub const IORING_UNREGISTER_EVENTFD: _bindgen_ty_16 = 5; -pub const IORING_REGISTER_FILES_UPDATE: _bindgen_ty_16 = 6; -pub const IORING_REGISTER_EVENTFD_ASYNC: _bindgen_ty_16 = 7; -pub const IORING_REGISTER_PROBE: _bindgen_ty_16 = 8; -pub const IORING_REGISTER_PERSONALITY: _bindgen_ty_16 = 9; -pub const IORING_UNREGISTER_PERSONALITY: _bindgen_ty_16 = 10; -pub const IORING_REGISTER_RESTRICTIONS: _bindgen_ty_16 = 11; -pub const IORING_REGISTER_ENABLE_RINGS: _bindgen_ty_16 = 12; -pub const IORING_REGISTER_FILES2: _bindgen_ty_16 = 13; -pub const IORING_REGISTER_FILES_UPDATE2: _bindgen_ty_16 = 14; -pub const IORING_REGISTER_BUFFERS2: _bindgen_ty_16 = 15; -pub const IORING_REGISTER_BUFFERS_UPDATE: _bindgen_ty_16 = 16; -pub const IORING_REGISTER_IOWQ_AFF: _bindgen_ty_16 = 17; -pub const IORING_UNREGISTER_IOWQ_AFF: _bindgen_ty_16 = 18; -pub const IORING_REGISTER_IOWQ_MAX_WORKERS: _bindgen_ty_16 = 19; -pub const IORING_REGISTER_RING_FDS: _bindgen_ty_16 = 20; -pub const IORING_UNREGISTER_RING_FDS: _bindgen_ty_16 = 21; -pub const IORING_REGISTER_PBUF_RING: _bindgen_ty_16 = 22; -pub const IORING_UNREGISTER_PBUF_RING: _bindgen_ty_16 = 23; -pub const IORING_REGISTER_SYNC_CANCEL: _bindgen_ty_16 = 24; -pub const IORING_REGISTER_FILE_ALLOC_RANGE: _bindgen_ty_16 = 25; -pub const IORING_REGISTER_PBUF_STATUS: _bindgen_ty_16 = 26; -pub const IORING_REGISTER_NAPI: _bindgen_ty_16 = 27; -pub const IORING_UNREGISTER_NAPI: _bindgen_ty_16 = 28; -pub const IORING_REGISTER_LAST: _bindgen_ty_16 = 29; -pub const IORING_REGISTER_USE_REGISTERED_RING: _bindgen_ty_16 = 2147483648; -pub const IORING_RESTRICTION_REGISTER_OP: _bindgen_ty_19 = 0; -pub const IORING_RESTRICTION_SQE_OP: _bindgen_ty_19 = 1; -pub const IORING_RESTRICTION_SQE_FLAGS_ALLOWED: _bindgen_ty_19 = 2; -pub const IORING_RESTRICTION_SQE_FLAGS_REQUIRED: _bindgen_ty_19 = 3; -pub const IORING_RESTRICTION_LAST: _bindgen_ty_19 = 4; -#[test] -fn bindgen_test_layout___kernel_timespec() { - const UNINIT: ::std::mem::MaybeUninit<__kernel_timespec> = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::<__kernel_timespec>(), - 16usize, - concat!("Size of: ", stringify!(__kernel_timespec)) - ); - assert_eq!( - ::std::mem::align_of::<__kernel_timespec>(), - 8usize, - concat!("Alignment of ", stringify!(__kernel_timespec)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tv_sec) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(__kernel_timespec), - "::", - stringify!(tv_sec) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tv_nsec) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(__kernel_timespec), - "::", - stringify!(tv_nsec) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_1__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cmd_op) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(cmd_op) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).__pad1) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(__pad1) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(io_uring_sqe__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_sqe__bindgen_ty_1)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).off) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_1), - "::", - stringify!(off) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).addr2) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_1), - "::", - stringify!(addr2) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_2__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!( - "Size of: ", - stringify!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).level) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(level) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).optname) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1), - "::", - stringify!(optname) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_2() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(io_uring_sqe__bindgen_ty_2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_sqe__bindgen_ty_2)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).addr) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_2), - "::", - stringify!(addr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).splice_off_in) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_2), - "::", - stringify!(splice_off_in) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_3() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 4usize, - concat!("Size of: ", stringify!(io_uring_sqe__bindgen_ty_3)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_sqe__bindgen_ty_3)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).rw_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(rw_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).fsync_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(fsync_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).poll_events) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(poll_events) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).poll32_events) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(poll32_events) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sync_range_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(sync_range_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).msg_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(msg_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).timeout_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(timeout_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).accept_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(accept_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cancel_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(cancel_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).open_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(open_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).statx_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(statx_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).fadvise_advice) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(fadvise_advice) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).splice_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(splice_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).rename_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(rename_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).unlink_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(unlink_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).hardlink_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(hardlink_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).xattr_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(xattr_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).msg_ring_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(msg_ring_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).uring_cmd_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(uring_cmd_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).waitid_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(waitid_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).futex_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(futex_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).install_fd_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(install_fd_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).nop_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_3), - "::", - stringify!(nop_flags) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_4() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 2usize, - concat!("Size of: ", stringify!(io_uring_sqe__bindgen_ty_4)) - ); - assert_eq!( - ::std::mem::align_of::(), - 1usize, - concat!("Alignment of ", stringify!(io_uring_sqe__bindgen_ty_4)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).buf_index) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_4), - "::", - stringify!(buf_index) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).buf_group) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_4), - "::", - stringify!(buf_group) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_5__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 4usize, - concat!( - "Size of: ", - stringify!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 2usize, - concat!( - "Alignment of ", - stringify!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).addr_len) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1), - "::", - stringify!(addr_len) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).__pad3) as usize - ptr as usize }, - 2usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1), - "::", - stringify!(__pad3) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_5() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 4usize, - concat!("Size of: ", stringify!(io_uring_sqe__bindgen_ty_5)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_sqe__bindgen_ty_5)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).splice_fd_in) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_5), - "::", - stringify!(splice_fd_in) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).file_index) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_5), - "::", - stringify!(file_index) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).optlen) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_5), - "::", - stringify!(optlen) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_6__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).addr3) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1), - "::", - stringify!(addr3) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).__pad2) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1), - "::", - stringify!(__pad2) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe__bindgen_ty_6() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_sqe__bindgen_ty_6)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_sqe__bindgen_ty_6)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).optval) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_6), - "::", - stringify!(optval) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cmd) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe__bindgen_ty_6), - "::", - stringify!(cmd) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sqe() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 64usize, - concat!("Size of: ", stringify!(io_uring_sqe)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_sqe)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).opcode) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(opcode) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 1usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ioprio) as usize - ptr as usize }, - 2usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(ioprio) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).fd) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(fd) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).len) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(len) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).user_data) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(user_data) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).personality) as usize - ptr as usize }, - 42usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sqe), - "::", - stringify!(personality) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_cqe() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_cqe)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_cqe)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).user_data) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cqe), - "::", - stringify!(user_data) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).res) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cqe), - "::", - stringify!(res) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cqe), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).big_cqe) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cqe), - "::", - stringify!(big_cqe) - ) - ); -} -#[test] -fn bindgen_test_layout_io_sqring_offsets() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(io_sqring_offsets)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_sqring_offsets)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).head) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(head) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tail) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(tail) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_mask) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(ring_mask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_entries) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(ring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).dropped) as usize - ptr as usize }, - 20usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(dropped) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).array) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(array) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv1) as usize - ptr as usize }, - 28usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(resv1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).user_addr) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(io_sqring_offsets), - "::", - stringify!(user_addr) - ) - ); -} -#[test] -fn bindgen_test_layout_io_cqring_offsets() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(io_cqring_offsets)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_cqring_offsets)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).head) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(head) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tail) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(tail) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_mask) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(ring_mask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_entries) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(ring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).overflow) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(overflow) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cqes) as usize - ptr as usize }, - 20usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(cqes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv1) as usize - ptr as usize }, - 28usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(resv1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).user_addr) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(io_cqring_offsets), - "::", - stringify!(user_addr) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_params() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 120usize, - concat!("Size of: ", stringify!(io_uring_params)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_params)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sq_entries) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(sq_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cq_entries) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(cq_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sq_thread_cpu) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(sq_thread_cpu) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sq_thread_idle) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(sq_thread_idle) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).features) as usize - ptr as usize }, - 20usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(features) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).wq_fd) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(wq_fd) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 28usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sq_off) as usize - ptr as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(sq_off) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cq_off) as usize - ptr as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(io_uring_params), - "::", - stringify!(cq_off) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_files_update() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_files_update)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_files_update)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).offset) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_files_update), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_files_update), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).fds) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_files_update), - "::", - stringify!(fds) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_rsrc_register() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(io_uring_rsrc_register)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_rsrc_register)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).nr) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_register), - "::", - stringify!(nr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_register), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv2) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_register), - "::", - stringify!(resv2) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).data) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_register), - "::", - stringify!(data) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tags) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_register), - "::", - stringify!(tags) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_rsrc_update() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_rsrc_update)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_rsrc_update)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).offset) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).data) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update), - "::", - stringify!(data) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_rsrc_update2() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(io_uring_rsrc_update2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_rsrc_update2)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).offset) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update2), - "::", - stringify!(offset) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update2), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).data) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update2), - "::", - stringify!(data) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tags) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update2), - "::", - stringify!(tags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).nr) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update2), - "::", - stringify!(nr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv2) as usize - ptr as usize }, - 28usize, - concat!( - "Offset of field: ", - stringify!(io_uring_rsrc_update2), - "::", - stringify!(resv2) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_probe_op() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(io_uring_probe_op)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_probe_op)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).op) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe_op), - "::", - stringify!(op) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 1usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe_op), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 2usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe_op), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv2) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe_op), - "::", - stringify!(resv2) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_probe() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_probe)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_probe)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).last_op) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe), - "::", - stringify!(last_op) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ops_len) as usize - ptr as usize }, - 1usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe), - "::", - stringify!(ops_len) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 2usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv2) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe), - "::", - stringify!(resv2) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ops) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_probe), - "::", - stringify!(ops) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_restriction__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 1usize, - concat!("Size of: ", stringify!(io_uring_restriction__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 1usize, - concat!( - "Alignment of ", - stringify!(io_uring_restriction__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).register_op) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_restriction__bindgen_ty_1), - "::", - stringify!(register_op) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sqe_op) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_restriction__bindgen_ty_1), - "::", - stringify!(sqe_op) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sqe_flags) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_restriction__bindgen_ty_1), - "::", - stringify!(sqe_flags) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_restriction() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_restriction)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_restriction)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).opcode) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_restriction), - "::", - stringify!(opcode) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 3usize, - concat!( - "Offset of field: ", - stringify!(io_uring_restriction), - "::", - stringify!(resv) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv2) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_restriction), - "::", - stringify!(resv2) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_buf() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_buf)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_buf)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).addr) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf), - "::", - stringify!(addr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).len) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf), - "::", - stringify!(len) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).bid) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf), - "::", - stringify!(bid) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 14usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf), - "::", - stringify!(resv) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!( - "Size of: ", - stringify!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv1) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(resv1) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv2) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(resv2) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv3) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(resv3) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).tail) as usize - ptr as usize }, - 14usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(tail) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_buf_ring__bindgen_ty_1() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_buf_ring__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_buf_ring__bindgen_ty_1)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).bufs) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_ring__bindgen_ty_1), - "::", - stringify!(bufs) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_buf_ring() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_buf_ring)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_buf_ring)) - ); -} -#[test] -fn bindgen_test_layout_io_uring_buf_reg() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(io_uring_buf_reg)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_buf_reg)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_addr) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_reg), - "::", - stringify!(ring_addr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_entries) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_reg), - "::", - stringify!(ring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).bgid) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_reg), - "::", - stringify!(bgid) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 14usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_reg), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_reg), - "::", - stringify!(resv) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_buf_status() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 40usize, - concat!("Size of: ", stringify!(io_uring_buf_status)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_buf_status)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).buf_group) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_status), - "::", - stringify!(buf_group) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).head) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_status), - "::", - stringify!(head) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_buf_status), - "::", - stringify!(resv) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_napi() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_napi)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_napi)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).busy_poll_to) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_napi), - "::", - stringify!(busy_poll_to) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).prefer_busy_poll) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_napi), - "::", - stringify!(prefer_busy_poll) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad) as usize - ptr as usize }, - 5usize, - concat!( - "Offset of field: ", - stringify!(io_uring_napi), - "::", - stringify!(pad) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_napi), - "::", - stringify!(resv) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_getevents_arg() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(io_uring_getevents_arg)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_getevents_arg)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sigmask) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_getevents_arg), - "::", - stringify!(sigmask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sigmask_sz) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_getevents_arg), - "::", - stringify!(sigmask_sz) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_getevents_arg), - "::", - stringify!(pad) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_getevents_arg), - "::", - stringify!(ts) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sync_cancel_reg() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 64usize, - concat!("Size of: ", stringify!(io_uring_sync_cancel_reg)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_sync_cancel_reg)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).addr) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sync_cancel_reg), - "::", - stringify!(addr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).fd) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sync_cancel_reg), - "::", - stringify!(fd) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sync_cancel_reg), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).timeout) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sync_cancel_reg), - "::", - stringify!(timeout) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sync_cancel_reg), - "::", - stringify!(pad) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_file_index_range() { - const UNINIT: ::std::mem::MaybeUninit = - ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_file_index_range)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_file_index_range)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).off) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_file_index_range), - "::", - stringify!(off) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).len) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_file_index_range), - "::", - stringify!(len) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).resv) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_file_index_range), - "::", - stringify!(resv) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_recvmsg_out() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(io_uring_recvmsg_out)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(io_uring_recvmsg_out)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).namelen) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_recvmsg_out), - "::", - stringify!(namelen) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).controllen) as usize - ptr as usize }, - 4usize, - concat!( - "Offset of field: ", - stringify!(io_uring_recvmsg_out), - "::", - stringify!(controllen) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).payloadlen) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_recvmsg_out), - "::", - stringify!(payloadlen) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 12usize, - concat!( - "Offset of field: ", - stringify!(io_uring_recvmsg_out), - "::", - stringify!(flags) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_sq() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 104usize, - concat!("Size of: ", stringify!(io_uring_sq)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_sq)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).khead) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(khead) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ktail) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(ktail) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kring_mask) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(kring_mask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kring_entries) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(kring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kflags) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(kflags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kdropped) as usize - ptr as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(kdropped) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).array) as usize - ptr as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(array) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sqes) as usize - ptr as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(sqes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sqe_head) as usize - ptr as usize }, - 64usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(sqe_head) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sqe_tail) as usize - ptr as usize }, - 68usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(sqe_tail) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_sz) as usize - ptr as usize }, - 72usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(ring_sz) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_ptr) as usize - ptr as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(ring_ptr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_mask) as usize - ptr as usize }, - 88usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(ring_mask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_entries) as usize - ptr as usize }, - 92usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(ring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad) as usize - ptr as usize }, - 96usize, - concat!( - "Offset of field: ", - stringify!(io_uring_sq), - "::", - stringify!(pad) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring_cq() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 88usize, - concat!("Size of: ", stringify!(io_uring_cq)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring_cq)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).khead) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(khead) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ktail) as usize - ptr as usize }, - 8usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(ktail) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kring_mask) as usize - ptr as usize }, - 16usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(kring_mask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kring_entries) as usize - ptr as usize }, - 24usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(kring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).kflags) as usize - ptr as usize }, - 32usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(kflags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).koverflow) as usize - ptr as usize }, - 40usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(koverflow) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cqes) as usize - ptr as usize }, - 48usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(cqes) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_sz) as usize - ptr as usize }, - 56usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(ring_sz) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_ptr) as usize - ptr as usize }, - 64usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(ring_ptr) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_mask) as usize - ptr as usize }, - 72usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(ring_mask) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_entries) as usize - ptr as usize }, - 76usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(ring_entries) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad) as usize - ptr as usize }, - 80usize, - concat!( - "Offset of field: ", - stringify!(io_uring_cq), - "::", - stringify!(pad) - ) - ); -} -#[test] -fn bindgen_test_layout_io_uring() { - const UNINIT: ::std::mem::MaybeUninit = ::std::mem::MaybeUninit::uninit(); - let ptr = UNINIT.as_ptr(); - assert_eq!( - ::std::mem::size_of::(), - 216usize, - concat!("Size of: ", stringify!(io_uring)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(io_uring)) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).sq) as usize - ptr as usize }, - 0usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(sq) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).cq) as usize - ptr as usize }, - 104usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(cq) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).flags) as usize - ptr as usize }, - 192usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).ring_fd) as usize - ptr as usize }, - 196usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(ring_fd) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).features) as usize - ptr as usize }, - 200usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(features) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).enter_ring_fd) as usize - ptr as usize }, - 204usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(enter_ring_fd) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).int_flags) as usize - ptr as usize }, - 208usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(int_flags) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad) as usize - ptr as usize }, - 209usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(pad) - ) - ); - assert_eq!( - unsafe { ::std::ptr::addr_of!((*ptr).pad2) as usize - ptr as usize }, - 212usize, - concat!( - "Offset of field: ", - stringify!(io_uring), - "::", - stringify!(pad2) - ) - ); -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union io_uring_sqe__bindgen_ty_1 { - pub off: __u64, - pub addr2: __u64, - pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_1__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union io_uring_sqe__bindgen_ty_2 { - pub addr: __u64, - pub splice_off_in: __u64, - pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_2__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union io_uring_sqe__bindgen_ty_3 { - pub rw_flags: __kernel_rwf_t, - pub fsync_flags: __u32, - pub poll_events: __u16, - pub poll32_events: __u32, - pub sync_range_flags: __u32, - pub msg_flags: __u32, - pub timeout_flags: __u32, - pub accept_flags: __u32, - pub cancel_flags: __u32, - pub open_flags: __u32, - pub statx_flags: __u32, - pub fadvise_advice: __u32, - pub splice_flags: __u32, - pub rename_flags: __u32, - pub unlink_flags: __u32, - pub hardlink_flags: __u32, - pub xattr_flags: __u32, - pub msg_ring_flags: __u32, - pub uring_cmd_flags: __u32, - pub waitid_flags: __u32, - pub futex_flags: __u32, - pub install_fd_flags: __u32, - pub nop_flags: __u32, -} -#[repr(C, packed)] -#[derive(Copy, Clone)] -pub union io_uring_sqe__bindgen_ty_4 { - pub buf_index: __u16, - pub buf_group: __u16, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union io_uring_sqe__bindgen_ty_5 { - pub splice_fd_in: __s32, - pub file_index: __u32, - pub optlen: __u32, - pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_5__bindgen_ty_1, -} -#[repr(C)] -pub union io_uring_sqe__bindgen_ty_6 { - pub __bindgen_anon_1: ::std::mem::ManuallyDrop, - pub optval: ::std::mem::ManuallyDrop<__u64>, - pub cmd: ::std::mem::ManuallyDrop<[__u8; 0usize]>, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union io_uring_restriction__bindgen_ty_1 { - pub register_op: __u8, - pub sqe_op: __u8, - pub sqe_flags: __u8, -} -#[repr(C)] -pub union io_uring_buf_ring__bindgen_ty_1 { - pub __bindgen_anon_1: ::std::mem::ManuallyDrop, - pub bufs: ::std::mem::ManuallyDrop<[io_uring_buf; 0usize]>, -} -impl __IncompleteArrayField { - #[inline] - pub const fn new() -> Self { - __IncompleteArrayField(::std::marker::PhantomData, []) - } - #[inline] - pub fn as_ptr(&self) -> *const T { - self as *const _ as *const T - } - #[inline] - pub fn as_mut_ptr(&mut self) -> *mut T { - self as *mut _ as *mut T - } - #[inline] - pub unsafe fn as_slice(&self, len: usize) -> &[T] { - ::std::slice::from_raw_parts(self.as_ptr(), len) - } - #[inline] - pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { - ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) - } -} -impl ::std::fmt::Debug for __IncompleteArrayField { - fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { - fmt.write_str("__IncompleteArrayField") - } -} diff --git a/src/unix.rs b/src/unix.rs new file mode 100644 index 00000000..936e8a11 --- /dev/null +++ b/src/unix.rs @@ -0,0 +1,122 @@ +//! Types shared across Unix-like implementations. + +use std::mem::{self, MaybeUninit}; + +use crate::io::{Buf, BufMut}; +use crate::net::SocketAddress; + +#[repr(transparent)] // Needed for I/O. +pub(crate) struct IoMutSlice(libc::iovec); + +impl IoMutSlice { + pub(crate) fn new(buf: &mut B) -> IoMutSlice { + let (ptr, len) = unsafe { buf.parts_mut() }; + IoMutSlice(libc::iovec { + iov_base: ptr.cast(), + iov_len: len as _, + }) + } + + // NOTE: can't implement `as_bytes` as we don't know if the bytes are + // initialised. `len` will have to do. + pub(crate) const fn len(&self) -> usize { + self.0.iov_len + } + + pub(crate) fn set_len(&mut self, new_len: usize) { + self.0.iov_len = new_len; + } +} + +// SAFETY: `libc::iovec` is `!Sync`, but it's just a pointer to some bytes, so +// it's actually `Send` and `Sync`. +unsafe impl Send for IoMutSlice {} +unsafe impl Sync for IoMutSlice {} + +#[repr(transparent)] // Needed for I/O. +pub(crate) struct IoSlice(libc::iovec); + +impl IoSlice { + pub(crate) fn new(buf: &B) -> IoSlice { + let (ptr, len) = unsafe { buf.parts() }; + IoSlice(libc::iovec { + iov_base: ptr.cast_mut().cast(), + iov_len: len as _, + }) + } + + pub(crate) const fn as_bytes(&self) -> &[u8] { + // SAFETY: on creation we've ensure that `iov_base` and `iov_len` are + // valid. + unsafe { std::slice::from_raw_parts(self.0.iov_base.cast(), self.0.iov_len) } + } + + pub(crate) const fn len(&self) -> usize { + self.0.iov_len + } + + pub(crate) fn set_len(&mut self, new_len: usize) { + debug_assert!(self.0.iov_len >= new_len); + self.0.iov_len = new_len; + } +} + +// SAFETY: `libc::iovec` is `!Sync`, but it's just a pointer to some bytes, so +// it's actually `Send` and `Sync`. +unsafe impl Send for IoSlice {} +unsafe impl Sync for IoSlice {} + +#[repr(transparent)] // Needed for system calls. +pub(crate) struct MsgHeader(libc::msghdr); + +impl MsgHeader { + pub(crate) const fn empty() -> MsgHeader { + // SAFETY: zeroed `msghdr` is valid. + unsafe { mem::zeroed() } + } + + /// # Safety + /// + /// Caller must ensure that `address` and `iovecs` outlives `MsgHeader`. + pub(crate) unsafe fn init_recv( + &mut self, + address: &mut MaybeUninit, + iovecs: &mut [crate::io::IoMutSlice], + ) { + let (address_ptr, address_length) = unsafe { A::as_mut_ptr(address) }; + self.0.msg_name = address_ptr.cast(); + self.0.msg_namelen = address_length; + // SAFETY: this cast is safe because `IoMutSlice` is `repr(transparent)`. + self.0.msg_iov = iovecs.as_mut_ptr().cast(); + self.0.msg_iovlen = iovecs.len(); + } + + /// # Safety + /// + /// Caller must ensure that `address` and `iovecs` outlives `MsgHeader`. + pub(crate) unsafe fn init_send( + &mut self, + address: &mut A::Storage, + iovecs: &mut [crate::io::IoSlice], + ) { + let (address_ptr, address_length) = unsafe { A::as_ptr(address) }; + self.0.msg_name = address_ptr.cast_mut().cast(); + self.0.msg_namelen = address_length; + // SAFETY: this cast is safe because `IoSlice` is `repr(transparent)`. + self.0.msg_iov = iovecs.as_mut_ptr().cast(); + self.0.msg_iovlen = iovecs.len(); + } + + pub(crate) const fn address_len(&self) -> libc::socklen_t { + self.0.msg_namelen + } + + pub(crate) const fn flags(&self) -> libc::c_int { + self.0.msg_flags + } +} + +// SAFETY: `libc::msghr` is `!Sync`, but the two pointers two the address and +// `iovecs` (`IoMutSlice`/`IoSlice`) are `Send` and `Sync`. +unsafe impl Send for MsgHeader {} +unsafe impl Sync for MsgHeader {} diff --git a/sys/Cargo.toml b/sys/Cargo.toml index 6ebddfac..d643a5f8 100644 --- a/sys/Cargo.toml +++ b/sys/Cargo.toml @@ -10,4 +10,4 @@ edition = "2021" libc = { version = "0.2.132", default-features = false } [build-dependencies] -bindgen = { version = "0.69.4", default-features = false } +bindgen = { version = "0.71.1", default-features = false } diff --git a/sys/README.md b/sys/README.md index 0c4b72c5..89ec99a7 100644 --- a/sys/README.md +++ b/sys/README.md @@ -6,4 +6,4 @@ e.g. from . Then run `make` and copy all files from `liburing/src/include` into `include`. After the headers are prepared run `cargo build` and copy `src/sys.rs` to -`../src/sys.rs` and run `cargo fmt` at the root. +`../src/io_uring/libc.rs` and run `cargo fmt` at the root. diff --git a/sys/build.rs b/sys/build.rs index 5d4149fd..911dfa53 100644 --- a/sys/build.rs +++ b/sys/build.rs @@ -45,20 +45,7 @@ const HEADER: &str = " #![allow(warnings, clippy::all, clippy::pedantic, clippy::nursery)] -/// Helper macro to execute a system call that returns an `io::Result`. -macro_rules! syscall { - ($fn: ident ( $($arg: expr),* $(,)? ) ) => {{ - let res = unsafe { libc::$fn($( $arg, )*) }; - if res == -1 { - Err(std::io::Error::last_os_error()) - } else { - Ok(res) - } - }}; -} - pub use libc::*; -pub use syscall; pub unsafe fn io_uring_setup(entries: c_uint, p: *mut io_uring_params) -> c_int { syscall(SYS_io_uring_setup, entries as c_long, p as c_long) as _ @@ -107,8 +94,4 @@ pub const IOSQE_IO_HARDLINK: u8 = 1 << IOSQE_IO_HARDLINK_BIT as u8; pub const IOSQE_ASYNC: u8 = 1 << IOSQE_ASYNC_BIT as u8; pub const IOSQE_BUFFER_SELECT: u8 = 1 << IOSQE_BUFFER_SELECT_BIT as u8; pub const IOSQE_CQE_SKIP_SUCCESS: u8 = 1 << IOSQE_CQE_SKIP_SUCCESS_BIT as u8; -pub const SOCKET_URING_OP_SIOCINQ: __u32 = 0; -pub const SOCKET_URING_OP_SIOCOUTQ: __u32 = 1; -pub const SOCKET_URING_OP_GETSOCKOPT: __u32 = 2; -pub const SOCKET_URING_OP_SETSOCKOPT: __u32 = 3; "; diff --git a/tests/async_fd.rs b/tests/async_fd.rs index d43cd943..21fd964f 100644 --- a/tests/async_fd.rs +++ b/tests/async_fd.rs @@ -2,7 +2,7 @@ #![cfg_attr(feature = "nightly", feature(async_iterator))] -use a10::fd::{AsyncFd, Direct, File}; +use a10::fd::{AsyncFd, File}; mod util; use util::{is_send, is_sync}; @@ -16,9 +16,15 @@ mod async_fd { } #[test] -fn is_send_and_sync() { +fn async_fd_is_send_and_sync() { is_send::>(); is_sync::>(); +} + +#[test] +#[cfg(any(target_os = "linux"))] +fn async_direct_fd_is_send_and_sync() { + use a10::fd::Direct; is_send::>(); is_sync::>(); } diff --git a/tests/async_fd/fs.rs b/tests/async_fd/fs.rs index c0d58e72..186495b7 100644 --- a/tests/async_fd/fs.rs +++ b/tests/async_fd/fs.rs @@ -206,8 +206,8 @@ fn read_vectored_array() { let sq = test_queue(); let waker = Waker::new(); - is_send::, 2>>(); - is_sync::, 1>>(); + is_send::; 2], 2>>(); + is_sync::; 2], 2>>(); let test_file = &LOREM_IPSUM_50; let path = test_file.path.into(); @@ -344,8 +344,8 @@ fn write_vectored() { let sq = test_queue(); let waker = Waker::new(); - is_send::, 1>>(); - is_sync::, 1>>(); + is_send::; 2], 2>>(); + is_sync::; 2], 2>>(); let mut path = temp_dir(); path.push("write_vectored"); diff --git a/tests/async_fd/io.rs b/tests/async_fd/io.rs index 04d30029..8bd7126b 100644 --- a/tests/async_fd/io.rs +++ b/tests/async_fd/io.rs @@ -2,23 +2,24 @@ use std::cell::Cell; use std::env::temp_dir; +use std::future::Future; use std::io; use std::ops::Bound; -use std::os::fd::{AsFd, AsRawFd, RawFd}; +use std::os::fd::{AsFd, FromRawFd, RawFd}; use std::panic::{self, AssertUnwindSafe}; -use a10::fd::{AsyncFd, File}; -use a10::fs::{Open, OpenOptions}; +use a10::fd::{AsyncFd, Descriptor, Direct, File}; +use a10::fs::{self, Open, OpenOptions}; use a10::io::{ - stderr, stdout, Buf, BufMut, BufMutSlice, BufSlice, Close, ReadBuf, ReadBufPool, Splice, - Stderr, Stdout, + stderr, stdout, Buf, BufMut, BufMutSlice, BufSlice, Close, IoMutSlice, IoSlice, ReadBuf, + ReadBufPool, Splice, Stderr, Stdout, }; use a10::{Extract, Ring, SubmissionQueue}; use crate::util::{ bind_and_listen_ipv4, block_on, cancel_all, defer, expect_io_errno, init, is_send, is_sync, - remove_test_file, require_kernel, start_op, tcp_ipv4_socket, test_queue, Waker, LOREM_IPSUM_5, - LOREM_IPSUM_50, + remove_test_file, require_kernel, start_op, syscall, tcp_ipv4_socket, test_queue, tmp_path, + Waker, LOREM_IPSUM_5, LOREM_IPSUM_50, }; const BUF_SIZE: usize = 4096; @@ -496,9 +497,9 @@ fn write_all_vectored() { waker.block_on(w.write_all_vectored(buf)).unwrap(); let buf = waker.block_on(r.read(Vec::with_capacity(31))).unwrap(); - assert_eq!(buf[..10], BadBufSlice::DATA1); - assert_eq!(buf[10..20], BadBufSlice::DATA2); - assert_eq!(buf[20..], BadBufSlice::DATA3); + assert_eq!(buf[..10], *BadBufSlice::DATA1); + assert_eq!(buf[10..20], *BadBufSlice::DATA2); + assert_eq!(buf[20..], *BadBufSlice::DATA3); } #[test] @@ -527,9 +528,9 @@ fn write_all_vectored_at_extract() { waker.block_on(file.write_all_vectored_at(buf, 5)).unwrap(); let got = std::fs::read(&path).unwrap(); - expected.extend_from_slice(BadBufSlice::DATA1.as_slice()); - expected.extend_from_slice(BadBufSlice::DATA2.as_slice()); - expected.extend_from_slice(BadBufSlice::DATA3.as_slice()); + expected.extend_from_slice(BadBufSlice::DATA1); + expected.extend_from_slice(BadBufSlice::DATA2); + expected.extend_from_slice(BadBufSlice::DATA3); assert!(got == expected, "file can't be read back"); } @@ -541,33 +542,20 @@ pub(crate) struct BadBufSlice { } impl BadBufSlice { - pub(crate) const DATA1: [u8; 10] = [123, 123, 123, 123, 123, 123, 123, 123, 123, 123]; - pub(crate) const DATA2: [u8; 10] = [200, 200, 200, 200, 200, 200, 200, 200, 200, 200]; - pub(crate) const DATA3: [u8; 10] = [255, 255, 255, 255, 255, 255, 255, 255, 255, 255]; + pub(crate) const DATA1: &'static [u8] = &[123, 123, 123, 123, 123, 123, 123, 123, 123, 123]; + pub(crate) const DATA2: &'static [u8] = &[200, 200, 200, 200, 200, 200, 200, 200, 200, 200]; + pub(crate) const DATA3: &'static [u8] = &[255, 255, 255, 255, 255, 255, 255, 255, 255, 255]; } unsafe impl BufSlice<3> for BadBufSlice { - unsafe fn as_iovecs(&self) -> [libc::iovec; 3] { + unsafe fn as_iovecs(&self) -> [IoSlice; 3] { let calls = self.calls.get(); self.calls.set(calls + 1); - - fn cast(ptr: &[u8]) -> *mut libc::c_void { - ptr.as_ptr().cast_mut().cast() - } - + let max_length = if calls == 0 { 5 } else { 10 }; [ - libc::iovec { - iov_base: cast(BadBufSlice::DATA1.as_slice()), - iov_len: 10, - }, - libc::iovec { - iov_base: cast(BadBufSlice::DATA2.as_slice()), - iov_len: 10, - }, - libc::iovec { - iov_base: cast(BadBufSlice::DATA3.as_slice()), - iov_len: if calls == 0 { 5 } else { 10 }, - }, + IoSlice::new(&Self::DATA1), + IoSlice::new(&Self::DATA2), + IoSlice::new(&&Self::DATA3[0..max_length]), ] } } @@ -657,11 +645,11 @@ pub(crate) struct BadReadBufSlice { } unsafe impl BufMutSlice<2> for BadReadBufSlice { - unsafe fn as_iovecs_mut(&mut self) -> [libc::iovec; 2] { + unsafe fn as_iovecs_mut(&mut self) -> [IoMutSlice; 2] { let mut iovecs = self.data.as_iovecs_mut(); - if iovecs[0].iov_len >= 10 { - iovecs[0].iov_len = 10; - iovecs[1].iov_len = 5; + if iovecs[0].len() >= 10 { + iovecs[0].set_len(10); + iovecs[1].set_len(5); } iovecs } @@ -671,7 +659,7 @@ unsafe impl BufMutSlice<2> for BadReadBufSlice { return; } - if self.as_iovecs_mut()[0].iov_len == 10 { + if self.as_iovecs_mut()[0].len() == 10 { self.data[0].set_init(10); self.data[1].set_init(n - 10); } else { @@ -711,7 +699,7 @@ struct GrowingBufSlice { } unsafe impl BufMutSlice<2> for GrowingBufSlice { - unsafe fn as_iovecs_mut(&mut self) -> [libc::iovec; 2] { + unsafe fn as_iovecs_mut(&mut self) -> [IoMutSlice; 2] { self.data.as_iovecs_mut() } @@ -791,13 +779,7 @@ fn splice_to() { let file = waker.block_on(open_file).unwrap(); let n = waker - .block_on(file.splice_to_at( - 10, - w.as_fd().as_raw_fd(), - NO_OFFSET, - expected.len() as u32, - 0, - )) + .block_on(file.splice_to_at(10, w.as_fd(), NO_OFFSET, expected.len() as u32, 0)) .expect("failed to splice"); assert_eq!(n, expected.len() - 10); @@ -831,13 +813,7 @@ fn splice_from() { .expect("failed to write all"); let n = waker - .block_on(file.splice_from_at( - 10, - r.as_fd().as_raw_fd(), - NO_OFFSET, - expected.len() as u32, - 0, - )) + .block_on(file.splice_from_at(10, r.as_fd(), NO_OFFSET, expected.len() as u32, 0)) .expect("failed to splice"); assert_eq!(n, expected.len()); @@ -852,8 +828,10 @@ fn close_socket_fd() { let sq = test_queue(); let waker = Waker::new(); - is_send::(); - is_sync::(); + is_send::>(); + is_sync::>(); + is_send::>(); + is_sync::>(); let socket = waker.block_on(tcp_ipv4_socket(sq)); waker.block_on(socket.close()).expect("failed to close fd"); @@ -939,3 +917,144 @@ fn pipe2(sq: SubmissionQueue) -> io::Result<(AsyncFd, AsyncFd)> { let w = unsafe { AsyncFd::from_raw_fd(fds[1], sq) }; Ok((r, w)) } + +#[test] +fn read_small_file() { + all_bufs!(for new_buf in bufs { + test_read(&LOREM_IPSUM_5.content, open_file, new_buf) + }); +} + +#[test] +fn read_large_file() { + all_bufs!(for new_buf in bufs { + test_read(&LOREM_IPSUM_50.content, open_file, new_buf) + }); +} + +#[test] +fn read_small_pipe() { + all_bufs!(for new_buf in bufs { + test_read(&LOREM_IPSUM_5.content, open_read_pipe, new_buf) + }); +} + +#[test] +fn read_large_pipe() { + all_bufs!(for new_buf in bufs { + test_read(&LOREM_IPSUM_50.content, open_read_pipe, new_buf) + }); +} + +fn test_read(expected: &'static [u8], open_fd: F, new_buf: fn() -> B) +where + F: FnOnce(&'static [u8], SubmissionQueue) -> Fut, + Fut: Future>, + B: TestBuf, + D: Descriptor, +{ + let sq = test_queue(); + let waker = Waker::new(); + + let fd = waker.block_on(open_fd(expected, sq.clone())); + let mut buf = new_buf(); + + let mut expected = expected; + loop { + buf = waker.block_on(fd.read(buf)).expect("failed to read"); + assert_ne!(buf.len(), 0); + assert_eq!(buf.bytes().len(), buf.len()); + if buf.bytes().len() > expected.len() { + panic!( + "read too much: buf: {}, expected: {}", + buf.len(), + expected.len(), + ); + } + assert_eq!(buf.bytes(), &expected[..buf.len()]); + expected = &expected[buf.len()..]; + if expected.is_empty() { + break; + } + buf.reset(); + } +} + +async fn open_file(expected: &'static [u8], sq: SubmissionQueue) -> AsyncFd { + let tmp_path = tmp_path(); + std::fs::write(&tmp_path, expected).expect("failed to write to file"); + fs::open_file(sq, tmp_path) + .await + .expect("failed to open file") +} + +async fn open_read_pipe(expected: &'static [u8], sq: SubmissionQueue) -> AsyncFd { + let mut fds: [RawFd; 2] = [-1, -1]; + syscall!(pipe2(fds.as_mut_ptr(), libc::O_CLOEXEC)).expect("failed to create pipe"); + // SAFETY: we just initialised the `fds` above. + let r = unsafe { AsyncFd::from_raw_fd(fds[0], sq) }; + let mut w = unsafe { std::fs::File::from_raw_fd(fds[1]) }; + + std::thread::spawn(move || { + std::io::Write::write_all(&mut w, expected).expect("failed to write all data to pipe"); + }); + + r +} + +/// Macro to run a code block with all buffer kinds. +macro_rules! all_bufs { + ( + for $new_buf: ident in bufs + $code: block + ) => {{ + all_bufs!(for $new_buf in [ small_vec, vec, large_vec] $code); + }}; + ( + // Private. + for $new_buf: ident in [ $( $create_buf: ident ),+ ] + $code: block + ) => {{ + $( + { + let $new_buf = $create_buf; + $code + } + )+ + }}; +} + +use all_bufs; + +/// NOTE: all implementations should be in the `all_bufs` macro. +trait TestBuf: BufMut { + fn len(&self) -> usize; + fn bytes(&self) -> &[u8]; + fn reset(&mut self); +} + +impl TestBuf for Vec { + fn len(&self) -> usize { + self.len() + } + + fn bytes(&self) -> &[u8] { + &*self + } + + fn reset(&mut self) { + self.clear() + } +} + +fn small_vec() -> Vec { + Vec::with_capacity(64) +} + +fn vec() -> Vec { + Vec::with_capacity(4 * 1024) // 4KB. +} + +fn large_vec() -> Vec { + Vec::with_capacity(1024 * 1024) // 1MB. +} diff --git a/tests/async_fd/net.rs b/tests/async_fd/net.rs index de4e5721..c0f5e9b6 100644 --- a/tests/async_fd/net.rs +++ b/tests/async_fd/net.rs @@ -11,6 +11,7 @@ use std::os::fd::{AsFd, AsRawFd, BorrowedFd}; use std::ptr; use a10::cancel::{Cancel, CancelResult}; +use a10::fd::{Direct, File}; use a10::io::ReadBufPool; use a10::net::{ Accept, MultishotAccept, MultishotRecv, NoAddress, Recv, RecvN, RecvNVectored, Send, SendAll, @@ -33,8 +34,8 @@ fn accept() { let sq = test_queue(); let waker = Waker::new(); - is_send::>(); - is_sync::>(); + is_send::>(); + is_sync::>(); // Bind a socket. let listener = waker.block_on(tcp_ipv4_socket(sq)); @@ -42,9 +43,8 @@ fn accept() { // Accept a connection. let mut stream = TcpStream::connect(local_addr).expect("failed to connect"); - let accept = listener.accept::(); - let (client, addr) = waker.block_on(accept).expect("failed to accept connection"); - let address = from_storage(addr); + let accept = listener.accept::(); + let (client, address) = waker.block_on(accept).expect("failed to accept connection"); assert_eq!(stream.peer_addr().unwrap(), local_addr); assert_eq!(stream.local_addr().unwrap(), address.into()); @@ -76,8 +76,8 @@ fn accept_no_address() { let sq = test_queue(); let waker = Waker::new(); - is_send::>(); - is_sync::>(); + is_send::>(); + is_sync::>(); // Bind a socket. let listener = waker.block_on(tcp_ipv4_socket(sq)); @@ -138,7 +138,7 @@ fn try_cancel_accept_before_poll() { let listener = waker.block_on(tcp_ipv4_socket(sq)); bind_and_listen_ipv4(&listener); - let mut accept = listener.accept::(); + let mut accept = listener.accept::(); // Before we accept we cancel the accept call. if !matches!(accept.try_cancel(), CancelResult::NotStarted) { @@ -327,7 +327,7 @@ fn cancel_multishot_accept_before_poll() { let mut accept_stream = listener.multishot_accept(); - expect_io_errno(waker.block_on(accept_stream.cancel()), libc::ENOENT) + waker.block_on(accept_stream.cancel()).unwrap(); } #[test] @@ -350,21 +350,19 @@ fn connect() { let sq = test_queue(); let waker = Waker::new(); - is_send::(); - is_sync::(); + is_send::>(); + is_sync::>(); + is_send::>(); + is_sync::>(); // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -390,43 +388,6 @@ fn connect() { assert_eq!(n, 0); } -#[test] -fn connect_extractor() { - let sq = test_queue(); - let waker = Waker::new(); - - // Bind a socket. - let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; - - // Create a socket and connect the listener. - let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); - let _addr = waker - .block_on(stream.connect(addr).extract()) - .expect("failed to connect"); - - let (mut client, _) = listener.accept().expect("failed to accept connection"); - - // Write some data. - waker - .block_on(stream.write(DATA1)) - .expect("failed to write"); - let mut buf = vec![0; DATA1.len() + 1]; - let n = client.read(&mut buf).expect("failed to read"); - assert_eq!(&buf[0..n], DATA1); - - // Read some data. - client.write_all(DATA2).expect("failed to write"); - buf.clear(); - buf.reserve(DATA2.len() + 1); - let buf = waker.block_on(stream.read(buf)).expect("failed to read"); - assert_eq!(buf, DATA2); -} - #[test] fn recv() { let sq = test_queue(); @@ -437,16 +398,12 @@ fn recv() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -479,15 +436,11 @@ fn recv_read_buf_pool() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = block_on(&mut ring, tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); - block_on(&mut ring, stream.connect(addr)).expect("failed to connect"); + block_on(&mut ring, stream.connect(local_addr)).expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -516,15 +469,11 @@ fn recv_read_buf_pool_send_read_buf() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = block_on(&mut ring, tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); - block_on(&mut ring, stream.connect(addr)).expect("failed to connect"); + block_on(&mut ring, stream.connect(local_addr)).expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -565,15 +514,11 @@ fn multishot_recv() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = block_on(&mut ring, tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); - block_on(&mut ring, stream.connect(addr)).expect("failed to connect"); + block_on(&mut ring, stream.connect(local_addr)).expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -612,15 +557,11 @@ fn multishot_recv_large_send() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = block_on(&mut ring, tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); - block_on(&mut ring, stream.connect(addr)).expect("failed to connect"); + block_on(&mut ring, stream.connect(local_addr)).expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -662,15 +603,11 @@ fn multishot_recv_all_buffers_used() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = block_on(&mut ring, tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); - block_on(&mut ring, stream.connect(addr)).expect("failed to connect"); + block_on(&mut ring, stream.connect(local_addr)).expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -711,16 +648,12 @@ fn recv_n() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -747,16 +680,12 @@ fn recv_vectored() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -796,15 +725,11 @@ fn recv_vectored_truncated() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind socket"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(socket.connect(addr)) + .block_on(socket.connect(local_addr)) .expect("failed to connect"); let socket_addr = sock_addr(socket.as_fd()).expect("failed to get local address"); @@ -827,21 +752,17 @@ fn recv_n_vectored() { let sq = test_queue(); let waker = Waker::new(); - is_send::, 1>>(); - is_sync::, 1>>(); + is_send::; 2], 2>>(); + is_sync::; 2], 2>>(); // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -868,16 +789,13 @@ fn recv_n_vectored() { } #[test] -fn recvfrom() { +fn recv_from() { let sq = test_queue(); let waker = Waker::new(); // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind socket"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); bind_ipv4(&socket); @@ -888,17 +806,16 @@ fn recvfrom() { .expect("failed to send data"); // Receive some data. - let (buf, address, flags) = waker - .block_on(socket.recvfrom(Vec::with_capacity(DATA1.len() + 1), 0)) + let (buf, address, flags): (_, SocketAddr, _) = waker + .block_on(socket.recv_from(Vec::with_capacity(DATA1.len() + 1), 0)) .expect("failed to receive"); assert_eq!(buf, DATA1); - let address = from_storage(address); assert_eq!(address, local_addr); assert_eq!(flags, 0); } #[test] -fn recvfrom_read_buf_pool() { +fn recv_from_read_buf_pool() { const BUF_SIZE: usize = 4096; require_kernel!(5, 19); @@ -910,10 +827,7 @@ fn recvfrom_read_buf_pool() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind socket"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = block_on(&mut ring, udp_ipv4_socket(sq)); bind_ipv4(&socket); @@ -924,25 +838,21 @@ fn recvfrom_read_buf_pool() { .expect("failed to send data"); // Receive some data. - let (buf, address, flags) = - block_on(&mut ring, socket.recvfrom(buf_pool.get(), 0)).expect("failed to receive"); + let (buf, address, flags): (_, SocketAddr, _) = + block_on(&mut ring, socket.recv_from(buf_pool.get(), 0)).expect("failed to receive"); assert_eq!(&*buf, DATA1); - let address = from_storage(address); assert_eq!(address, local_addr); assert_eq!(flags, 0); } #[test] -fn recvfrom_vectored() { +fn recv_from_vectored() { let sq = test_queue(); let waker = Waker::new(); // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind socket"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); bind_ipv4(&socket); @@ -958,13 +868,12 @@ fn recvfrom_vectored() { Vec::with_capacity(2), Vec::with_capacity(7), ]; - let (bufs, address, flags) = waker - .block_on(socket.recvfrom_vectored(bufs, 0)) + let (bufs, address, flags): (_, SocketAddr, _) = waker + .block_on(socket.recv_from_vectored(bufs, 0)) .expect("failed to receive"); assert_eq!(&bufs[0], b"Hello"); assert_eq!(&bufs[1], b", "); assert_eq!(&bufs[2], b"World!"); - let address = from_storage(address); assert_eq!(address, local_addr); assert_eq!(flags, 0); } @@ -979,16 +888,12 @@ fn send() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1012,16 +917,12 @@ fn send_zc() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1043,16 +944,12 @@ fn send_extractor() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1077,16 +974,12 @@ fn send_zc_extractor() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1112,16 +1005,12 @@ fn send_all() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1131,7 +1020,7 @@ fn send_all() { calls: Cell::new(0), }; waker - .block_on(stream.send_all(buf)) + .block_on(stream.send_all(buf, 0)) .expect("failed to send"); let mut buf = vec![0; BadBuf::DATA.len() + 1]; let n = client.read(&mut buf).unwrap(); @@ -1147,16 +1036,12 @@ fn send_all_extract() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1166,7 +1051,7 @@ fn send_all_extract() { calls: Cell::new(0), }; let buf = waker - .block_on(stream.send_all(buf).extract()) + .block_on(stream.send_all(buf, 0).extract()) .expect("failed to send"); assert_eq!(buf.calls.get(), 6); let mut buf = vec![0; BadBuf::DATA.len() + 1]; @@ -1186,16 +1071,12 @@ fn send_vectored() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1223,16 +1104,12 @@ fn send_vectored_zc() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1255,16 +1132,12 @@ fn send_vectored_extractor() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1290,16 +1163,12 @@ fn send_vectored_zc_extractor() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1321,21 +1190,17 @@ fn send_all_vectored() { let sq = test_queue(); let waker = Waker::new(); - is_send::, 1>>(); - is_sync::, 1>>(); + is_send::; 2], 2>>(); + is_sync::; 2], 2>>(); // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1350,9 +1215,9 @@ fn send_all_vectored() { let n = client.read(&mut buf).unwrap(); assert_eq!(n, 30); buf.resize(n, 0); - assert_eq!(buf[..10], BadBufSlice::DATA1); - assert_eq!(buf[10..20], BadBufSlice::DATA2); - assert_eq!(buf[20..], BadBufSlice::DATA3); + assert_eq!(&buf[..10], BadBufSlice::DATA1); + assert_eq!(&buf[10..20], BadBufSlice::DATA2); + assert_eq!(&buf[20..], BadBufSlice::DATA3); } #[test] @@ -1362,16 +1227,12 @@ fn send_all_vectored_extract() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1389,35 +1250,31 @@ fn send_all_vectored_extract() { let n = client.read(&mut buf).unwrap(); assert_eq!(n, 30); buf.resize(n, 0); - assert_eq!(buf[..10], BadBufSlice::DATA1); - assert_eq!(buf[10..20], BadBufSlice::DATA2); - assert_eq!(buf[20..], BadBufSlice::DATA3); + assert_eq!(&buf[..10], BadBufSlice::DATA1); + assert_eq!(&buf[10..20], BadBufSlice::DATA2); + assert_eq!(&buf[20..], BadBufSlice::DATA3); } #[test] -fn sendto() { +fn send_to() { require_kernel!(6, 0); let sq = test_queue(); let waker = Waker::new(); - is_send::, ()>>(); - is_sync::, ()>>(); + is_send::, SocketAddr>>(); + is_sync::, SocketAddr>>(); // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. - let addr = addr_storage(&local_addr); let n = waker - .block_on(socket.sendto(DATA1, addr, 0)) - .expect("failed to sendto"); + .block_on(socket.send_to(DATA1, local_addr, 0)) + .expect("failed to send_to"); assert_eq!(n, DATA1.len()); let mut buf = vec![0; DATA1.len() + 2]; @@ -1427,7 +1284,7 @@ fn sendto() { } #[test] -fn sendto_zc() { +fn send_to_zc() { require_kernel!(6, 0); let sq = test_queue(); @@ -1435,18 +1292,14 @@ fn sendto_zc() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. - let addr = addr_storage(&local_addr); let n = waker - .block_on(socket.sendto_zc(DATA1, addr, 0)) - .expect("failed to sendto"); + .block_on(socket.send_to_zc(DATA1, local_addr, 0)) + .expect("failed to send_to"); assert_eq!(n, DATA1.len()); let mut buf = vec![0; DATA1.len() + 2]; @@ -1456,7 +1309,7 @@ fn sendto_zc() { } #[test] -fn sendto_extractor() { +fn send_to_extractor() { require_kernel!(6, 0); let sq = test_queue(); @@ -1464,18 +1317,14 @@ fn sendto_extractor() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. - let addr = addr_storage(&local_addr); let (buf, n) = waker - .block_on(socket.sendto(DATA1, addr.clone(), 0).extract()) - .expect("failed to sendto"); + .block_on(socket.send_to(DATA1, local_addr, 0).extract()) + .expect("failed to send_to"); assert!(buf == DATA1); assert_eq!(n, DATA1.len()); @@ -1486,7 +1335,7 @@ fn sendto_extractor() { } #[test] -fn sendto_zc_extractor() { +fn send_to_zc_extractor() { require_kernel!(6, 0); let sq = test_queue(); @@ -1494,18 +1343,14 @@ fn sendto_zc_extractor() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. - let addr = addr_storage(&local_addr); let (buf, n) = waker - .block_on(socket.sendto_zc(DATA1, addr.clone(), 0).extract()) - .expect("failed to sendto"); + .block_on(socket.send_to_zc(DATA1, local_addr, 0).extract()) + .expect("failed to send_to"); assert!(buf == DATA1); assert_eq!(n, DATA1.len()); @@ -1516,28 +1361,24 @@ fn sendto_zc_extractor() { } #[test] -fn sendto_vectored() { +fn send_to_vectored() { let sq = test_queue(); let waker = Waker::new(); - is_send::, ()>>(); - is_sync::, ()>>(); + is_send::, SocketAddr>>(); + is_sync::, SocketAddr>>(); // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. let bufs = ["Hello", ", ", "World!"]; - let addr = addr_storage(&local_addr); let n = waker - .block_on(socket.sendto_vectored(bufs, addr, 0)) - .expect("failed to sendto"); + .block_on(socket.send_to_vectored(bufs, local_addr, 0)) + .expect("failed to send_to"); assert_eq!(n, DATA1.len()); let mut buf = vec![0; DATA1.len() + 2]; @@ -1547,7 +1388,7 @@ fn sendto_vectored() { } #[test] -fn sendto_vectored_zc() { +fn send_to_vectored_zc() { require_kernel!(6, 1); let sq = test_queue(); @@ -1555,19 +1396,15 @@ fn sendto_vectored_zc() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. let bufs = ["Hello", ", ", "World!"]; - let addr = addr_storage(&local_addr); let n = waker - .block_on(socket.sendto_vectored_zc(bufs, addr, 0)) - .expect("failed to sendto"); + .block_on(socket.send_to_vectored_zc(bufs, local_addr, 0)) + .expect("failed to send_to"); assert_eq!(n, DATA1.len()); let mut buf = vec![0; DATA1.len() + 2]; @@ -1577,25 +1414,21 @@ fn sendto_vectored_zc() { } #[test] -fn sendto_vectored_extractor() { +fn send_to_vectored_extractor() { let sq = test_queue(); let waker = Waker::new(); // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. let bufs = ["Hello", ", ", "Mars!"]; - let addr = addr_storage(&local_addr); let (buf, n) = waker - .block_on(socket.sendto_vectored(bufs, addr.clone(), 0).extract()) - .expect("failed to sendto"); + .block_on(socket.send_to_vectored(bufs, local_addr, 0).extract()) + .expect("failed to send_to"); assert!(buf[2] == "Mars!"); assert_eq!(n, DATA2.len()); @@ -1606,7 +1439,7 @@ fn sendto_vectored_extractor() { } #[test] -fn sendto_vectored_zc_extractor() { +fn send_to_vectored_zc_extractor() { require_kernel!(6, 1); let sq = test_queue(); @@ -1614,19 +1447,15 @@ fn sendto_vectored_zc_extractor() { // Bind a socket. let listener = UdpSocket::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); let socket = waker.block_on(udp_ipv4_socket(sq)); // Send some data. let bufs = ["Hello", ", ", "Mars!"]; - let addr = addr_storage(&local_addr); let (bufs, n) = waker - .block_on(socket.sendto_vectored_zc(bufs, addr.clone(), 0).extract()) - .expect("failed to sendto"); + .block_on(socket.send_to_vectored_zc(bufs, local_addr, 0).extract()) + .expect("failed to send_to"); assert!(bufs[0] == "Hello"); assert_eq!(n, DATA2.len()); @@ -1646,16 +1475,12 @@ fn shutdown() { // Bind a socket. let listener = TcpListener::bind("127.0.0.1:0").expect("failed to bind listener"); - let local_addr = match listener.local_addr().unwrap() { - SocketAddr::V4(addr) => addr, - _ => unreachable!(), - }; + let local_addr = listener.local_addr().unwrap(); // Create a socket and connect the listener. let stream = waker.block_on(tcp_ipv4_socket(sq)); - let addr = addr_storage(&local_addr); waker - .block_on(stream.connect(addr)) + .block_on(stream.connect(local_addr)) .expect("failed to connect"); let (mut client, _) = listener.accept().expect("failed to accept connection"); @@ -1754,24 +1579,6 @@ fn set_socket_option() { assert_eq!(linger.l_linger, got_linger.l_linger); } -fn addr_storage(address: &SocketAddrV4) -> libc::sockaddr_in { - // SAFETY: a `sockaddr_in` of all zeros is valid. - let mut storage: libc::sockaddr_in = unsafe { mem::zeroed() }; - storage.sin_family = libc::AF_INET as libc::sa_family_t; - storage.sin_port = address.port().to_be(); - storage.sin_addr = libc::in_addr { - s_addr: u32::from_ne_bytes(address.ip().octets()), - }; - storage -} - -fn from_storage(addr: libc::sockaddr_in) -> SocketAddrV4 { - assert!(addr.sin_family as libc::c_int == libc::AF_INET); - let ip = Ipv4Addr::from(addr.sin_addr.s_addr.to_ne_bytes()); - let port = u16::from_be(addr.sin_port); - SocketAddrV4::new(ip, port) -} - fn peer_addr(fd: BorrowedFd) -> io::Result { let mut storage: libc::sockaddr_storage = unsafe { mem::zeroed() }; let mut len = size_of::() as u32; diff --git a/tests/ring.rs b/tests/ring.rs index ac7a1446..e38f3899 100644 --- a/tests/ring.rs +++ b/tests/ring.rs @@ -237,18 +237,6 @@ fn wake_ring() { handle.join().unwrap(); } -#[test] -fn wake_ring_before_poll_nop() { - init(); - let mut ring = Ring::new(2).unwrap(); - let sq = ring.submission_queue().clone(); - - sq.wake(); - - // Should be awoken by the wake call above. - ring.poll(None).unwrap(); -} - #[test] fn wake_ring_after_ring_dropped() { init(); @@ -283,7 +271,7 @@ fn message_sending() { // Send some messages. try_send_msg(&sq, msg_token, DATA1).unwrap(); waker - .block_on(pin!(send_msg(&sq, msg_token, DATA2))) + .block_on(pin!(send_msg(sq, msg_token, DATA2))) .unwrap(); assert_eq!(waker.block_on(next(msg_listener.as_mut())), Some(DATA1)); @@ -302,8 +290,8 @@ fn test_oneshot_poll() { let (mut receiver, mut sender) = pipe2().unwrap(); - let sender_write = pin!(oneshot_poll(&sq, sender.as_fd(), libc::POLLOUT as _)); - let receiver_read = pin!(oneshot_poll(&sq, receiver.as_fd(), libc::POLLIN as _)); + let sender_write = pin!(oneshot_poll(sq.clone(), sender.as_fd(), libc::POLLOUT as _)); + let receiver_read = pin!(oneshot_poll(sq, receiver.as_fd(), libc::POLLIN as _)); let event = waker.block_on(sender_write).unwrap(); assert!(event.is_writable()); @@ -323,7 +311,7 @@ fn drop_oneshot_poll() { let (receiver, sender) = pipe2().unwrap(); - let mut receiver_read = oneshot_poll(&sq, receiver.as_fd(), libc::POLLIN as _); + let mut receiver_read = oneshot_poll(sq, receiver.as_fd(), libc::POLLIN as _); start_op(&mut receiver_read); @@ -339,7 +327,7 @@ fn cancel_oneshot_poll() { let (receiver, sender) = pipe2().unwrap(); - let mut receiver_read = oneshot_poll(&sq, receiver.as_fd(), libc::POLLIN as _); + let mut receiver_read = oneshot_poll(sq, receiver.as_fd(), libc::POLLIN as _); cancel(&waker, &mut receiver_read, start_op); expect_io_errno(waker.block_on(receiver_read), libc::ECANCELED); @@ -356,7 +344,7 @@ fn test_multishot_poll() { let (mut receiver, mut sender) = pipe2().unwrap(); - let mut receiver_read = pin!(multishot_poll(&sq, receiver.as_fd(), libc::POLLIN as _)); + let mut receiver_read = pin!(multishot_poll(sq, receiver.as_fd(), libc::POLLIN as _)); start_mulitshot_op(&mut receiver_read); let mut buf = vec![0; DATA.len() + 1]; @@ -382,7 +370,7 @@ fn cancel_multishot_poll() { let (receiver, sender) = pipe2().unwrap(); - let mut receiver_read = multishot_poll(&sq, receiver.as_fd(), libc::POLLIN as _); + let mut receiver_read = multishot_poll(sq, receiver.as_fd(), libc::POLLIN as _); cancel(&waker, &mut receiver_read, start_mulitshot_op); assert!(waker.block_on(next(receiver_read)).is_none()); @@ -395,7 +383,7 @@ fn drop_multishot_poll() { let (receiver, sender) = pipe2().unwrap(); - let mut receiver_read = multishot_poll(&sq, receiver.as_fd(), libc::POLLIN as _); + let mut receiver_read = multishot_poll(sq, receiver.as_fd(), libc::POLLIN as _); start_mulitshot_op(&mut receiver_read); @@ -462,8 +450,6 @@ fn process_wait_on() { // SAFETY: these fields are set if `si_signo == SIGCHLD`. assert_eq!(unsafe { info.si_pid() }, pid as i32); assert_eq!(unsafe { info.si_status() }, libc::EXIT_SUCCESS); - assert!(unsafe { info.si_utime() } >= 1); - assert!(unsafe { info.si_stime() } >= 1); } #[test] diff --git a/tests/signals.rs b/tests/signals.rs index f281e252..00692451 100644 --- a/tests/signals.rs +++ b/tests/signals.rs @@ -251,7 +251,7 @@ impl TestHarness { // Check if the signals can be received. let signal_info = loop { - match receive_signal.poll_signal(&mut task_ctx) { + match Pin::new(&mut receive_signal).poll_next(&mut task_ctx) { Poll::Ready(result) => break result.unwrap().unwrap(), Poll::Pending => self.ring.poll(None).unwrap(), } diff --git a/tests/util/mod.rs b/tests/util/mod.rs index f6fb8b93..47061031 100644 --- a/tests/util/mod.rs +++ b/tests/util/mod.rs @@ -11,13 +11,15 @@ use std::future::{Future, IntoFuture}; use std::io::{self, Write}; use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; use std::os::fd::{AsFd, AsRawFd}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::pin::Pin; use std::sync::{Arc, Once, OnceLock}; use std::task::{self, Poll}; use std::thread::{self, Thread}; use std::{fmt, mem, panic, process, ptr, str}; +use getrandom::getrandom; + use a10::fd::Descriptor; use a10::net::socket; use a10::{AsyncFd, Cancel, Ring, SubmissionQueue}; @@ -400,7 +402,7 @@ macro_rules! op_async_iter { op_async_iter!(a10::msg::MsgListener => u32); op_async_iter!(a10::net::MultishotAccept<'_> => io::Result); op_async_iter!(a10::net::MultishotRecv<'_> => io::Result); -op_async_iter!(a10::poll::MultishotPoll<'_> => io::Result); +op_async_iter!(a10::poll::MultishotPoll => io::Result); /// Return a [`Future`] that return the next item in the `iter` or `None`. pub(crate) fn next(iter: I) -> Next { @@ -458,6 +460,20 @@ pub(crate) fn remove_test_dir(path: &Path) { } } +pub(crate) fn tmp_path() -> PathBuf { + static CREATE_TEMP_DIR: Once = Once::new(); + let mut tmp_dir = std::env::temp_dir(); + tmp_dir.push("a10_tests"); + CREATE_TEMP_DIR.call_once(|| { + std::fs::create_dir_all(&tmp_dir).expect("failed to create temporary directory"); + }); + let mut n = [0; 8]; + getrandom(&mut n).expect("failed to get random data"); + let n = u64::from_ne_bytes(n); + tmp_dir.push(&format!("{n}")); + tmp_dir +} + fn panic_message<'a>(err: &'a (dyn Any + Send + 'static)) -> &'a str { match err.downcast_ref::<&str>() { Some(s) => *s,