diff --git a/flatgfa-py/.gitignore b/flatgfa-py/.gitignore new file mode 100644 index 00000000..2f7896d1 --- /dev/null +++ b/flatgfa-py/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/flatgfa-py/Cargo.lock b/flatgfa-py/Cargo.lock new file mode 100644 index 00000000..6539964e --- /dev/null +++ b/flatgfa-py/Cargo.lock @@ -0,0 +1,546 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "argh" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7af5ba06967ff7214ce4c7419c7d185be7ecd6cc4965a8f6e1d8ce0398aad219" +dependencies = [ + "argh_derive", + "argh_shared", +] + +[[package]] +name = "argh_derive" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56df0aeedf6b7a2fc67d06db35b09684c3e8da0c95f8f27685cb17e08413d87a" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "argh_shared" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531" +dependencies = [ + "serde", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "bstr" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "flatgfa" +version = "0.1.0" +dependencies = [ + "argh", + "atoi", + "bstr", + "memchr", + "memmap", + "num_enum", + "tinyvec", + "zerocopy", +] + +[[package]] +name = "flatgfa-py" +version = "0.1.0" +dependencies = [ + "flatgfa", + "memmap", + "pyo3", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "libc" +version = "0.2.154" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_enum" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.200" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc6f9cc94d67c0e21aaf7eda3a010fd3af78ebf6e096aa6e2e13c79749cce4f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.200" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "856f046b9400cee3c8c94ed572ecdb752444c24528c035cd35882aad6f492bcb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "syn" +version = "2.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" + +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/flatgfa-py/Cargo.toml b/flatgfa-py/Cargo.toml new file mode 100644 index 00000000..e7579488 --- /dev/null +++ b/flatgfa-py/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "flatgfa-py" +version = "0.1.0" +edition = "2021" + +[lib] +name = "flatgfa" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.21.2", features = ["abi3-py38"] } +flatgfa = { path = "../flatgfa" } +memmap = "0.7.0" diff --git a/flatgfa-py/README.md b/flatgfa-py/README.md new file mode 100644 index 00000000..453bcfff --- /dev/null +++ b/flatgfa-py/README.md @@ -0,0 +1,24 @@ +# Python Bindings for FlatGFA + +This is a Python wrapper for the FlatGFA library. +It is currently in a "proof of concept" state. + +To build it, first install [Maturin][]: + + pipx install maturin + +Next, we'll build and install the Python library in our virtualenv. +Starting from the repository root: + + uv venv # Unless you already created the virtualenv. + uv pip install pip # Maturin depends on pip. + source .venv/bin/activate + cd flatgfa-py + maturin develop + +Now the `flatgfa` module is available to Python programs. +Try our example: + + python example.py + +[maturin]: https://www.maturin.rs diff --git a/flatgfa-py/example.py b/flatgfa-py/example.py new file mode 100644 index 00000000..ac396156 --- /dev/null +++ b/flatgfa-py/example.py @@ -0,0 +1,9 @@ +import flatgfa + +g = flatgfa.parse("../bench/graphs/test.k.gfa") +print(g.segments[2]) +for seg in g.segments: + print(seg.name, seg.id, seg.sequence()) + +g = flatgfa.load("../bench/graphs/test.k.flatgfa") +print(len(g.segments)) diff --git a/flatgfa-py/pyproject.toml b/flatgfa-py/pyproject.toml new file mode 100644 index 00000000..19e5d9eb --- /dev/null +++ b/flatgfa-py/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/flatgfa-py/src/lib.rs b/flatgfa-py/src/lib.rs new file mode 100644 index 00000000..1a2b1955 --- /dev/null +++ b/flatgfa-py/src/lib.rs @@ -0,0 +1,143 @@ +use flatgfa::flatgfa::{FlatGFA, GFABuilder, HeapStore}; +use pyo3::prelude::*; +use pyo3::types::PyBytes; + +#[pyfunction] +fn parse(filename: &str) -> PyFlatGFA { + let file = flatgfa::file::map_file(filename); + let store = flatgfa::parse::Parser::for_heap().parse_mem(file.as_ref()); + PyFlatGFA(InternalStore::Heap(Box::new(store))) +} + +#[pyfunction] +fn load(filename: &str) -> PyFlatGFA { + let mmap = flatgfa::file::map_file(filename); + PyFlatGFA(InternalStore::File(mmap)) +} + +enum InternalStore { + Heap(Box), + File(memmap::Mmap), +} + +#[pyclass(frozen)] +#[pyo3(name = "FlatGFA")] +struct PyFlatGFA(InternalStore); + +#[pymethods] +impl PyFlatGFA { + #[getter] + fn segments(self_: Py) -> SegmentList { + SegmentList { gfa: GFARef(self_) } + } +} + +#[derive(Clone)] +struct GFARef(Py); + +impl GFARef { + fn view(&self) -> FlatGFA { + // TK It seems wasteful to check the type of store every time... and to construct + // the view every time. It would be great if we could somehow construct the view + // once up front and hand it out to the various ancillary objects, but they need + // to be assured that the store will survive long enough. + match self.0.get().0 { + InternalStore::Heap(ref store) => store.view(), + InternalStore::File(ref mmap) => flatgfa::file::view(mmap), + } + } +} + +#[pyclass] +struct SegmentList { + gfa: GFARef, +} + +#[pymethods] +impl SegmentList { + fn __getitem__(&self, idx: u32) -> PySegment { + PySegment { + gfa: self.gfa.clone(), + id: idx, + } + } + + fn __iter__(&self) -> SegmentIter { + SegmentIter { + gfa: self.gfa.clone(), + idx: 0, + } + } + + fn __len__(&self) -> usize { + self.gfa.view().segs.len() + } +} + +#[pyclass] +struct SegmentIter { + gfa: GFARef, + idx: u32, +} + +#[pymethods] +impl SegmentIter { + fn __iter__(self_: Py) -> Py { + self_ + } + + fn __next__(&mut self) -> Option { + let view = self.gfa.view(); + if self.idx < view.segs.len() as u32 { + let seg = PySegment { + gfa: self.gfa.clone(), + id: self.idx, + }; + self.idx += 1; + Some(seg) + } else { + None + } + } +} + +#[pyclass(frozen)] +#[pyo3(name = "Segment")] +struct PySegment { + gfa: GFARef, + #[pyo3(get)] + id: u32, +} + +#[pymethods] +impl PySegment { + /// Get the nucleotide sequence for the segment as a byte string. + /// + /// This copies the underlying sequence data to contruct the Python bytes object, + /// so it is slow to use for large sequences. + fn sequence<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> { + let view = self.gfa.view(); + let seg = view.segs[self.id as usize]; + let seq = view.get_seq(&seg); + PyBytes::new_bound(py, seq) + } + + #[getter] + fn name(&self) -> usize { + let view = self.gfa.view(); + let seg = view.segs[self.id as usize]; + seg.name + } + + fn __repr__(&self) -> String { + format!("", self.id) + } +} + +#[pymodule] +#[pyo3(name = "flatgfa")] +fn pymod(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(parse, m)?)?; + m.add_function(wrap_pyfunction!(load, m)?)?; + Ok(()) +} diff --git a/flatgfa/src/file.rs b/flatgfa/src/file.rs index af471615..f8b1e5b6 100644 --- a/flatgfa/src/file.rs +++ b/flatgfa/src/file.rs @@ -1,5 +1,6 @@ use crate::flatgfa; use crate::pool::Span; +use memmap::{Mmap, MmapMut}; use std::mem::{size_of, size_of_val}; use tinyvec::SliceVec; use zerocopy::{AsBytes, FromBytes, FromZeroes}; @@ -308,3 +309,28 @@ pub fn dump(gfa: &flatgfa::FlatGFA, buf: &mut [u8]) { pub fn size(gfa: &flatgfa::FlatGFA) -> usize { Toc::full(gfa).size() } + +pub fn map_file(name: &str) -> Mmap { + let file = std::fs::File::open(name).unwrap(); + unsafe { Mmap::map(&file) }.unwrap() +} + +pub fn map_new_file(name: &str, size: u64) -> MmapMut { + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(name) + .unwrap(); + file.set_len(size).unwrap(); + unsafe { MmapMut::map_mut(&file) }.unwrap() +} + +pub fn map_file_mut(name: &str) -> MmapMut { + let file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(name) + .unwrap(); + unsafe { MmapMut::map_mut(&file) }.unwrap() +} diff --git a/flatgfa/src/main.rs b/flatgfa/src/main.rs index 87440454..bbd6abe7 100644 --- a/flatgfa/src/main.rs +++ b/flatgfa/src/main.rs @@ -2,32 +2,6 @@ use argh::FromArgs; use flatgfa::flatgfa::{FlatGFA, GFABuilder}; use flatgfa::parse::Parser; use flatgfa::{cmds, file, parse, print}; -use memmap::{Mmap, MmapMut}; - -fn map_file(name: &str) -> Mmap { - let file = std::fs::File::open(name).unwrap(); - unsafe { Mmap::map(&file) }.unwrap() -} - -fn map_new_file(name: &str, size: u64) -> MmapMut { - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(name) - .unwrap(); - file.set_len(size).unwrap(); - unsafe { MmapMut::map_mut(&file) }.unwrap() -} - -fn map_file_mut(name: &str) -> MmapMut { - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .open(name) - .unwrap(); - unsafe { MmapMut::map_mut(&file) }.unwrap() -} #[derive(FromArgs)] /// Convert between GFA text and FlatGFA binary formats. @@ -86,11 +60,11 @@ fn main() -> Result<(), &'static str> { let gfa = match args.input { Some(name) => { if args.mutate { - mmap_mut = map_file_mut(&name); + mmap_mut = file::map_file_mut(&name); slice_store = file::view_store(&mut mmap_mut); slice_store.view() } else { - mmap = map_file(&name); + mmap = file::map_file(&name); file::view(&mmap) } } @@ -98,7 +72,7 @@ fn main() -> Result<(), &'static str> { // Parse from stdin or a file. store = match args.input_gfa { Some(name) => { - let file = map_file(&name); + let file = file::map_file(&name); Parser::for_heap().parse_mem(file.as_ref()) } None => { @@ -144,7 +118,7 @@ fn main() -> Result<(), &'static str> { fn dump(gfa: &FlatGFA, output: &Option) { match output { Some(name) => { - let mut mmap = map_new_file(name, file::size(gfa) as u64); + let mut mmap = file::map_new_file(name, file::size(gfa) as u64); file::dump(gfa, &mut mmap); mmap.flush().unwrap(); } @@ -159,7 +133,7 @@ fn prealloc_translate(in_name: Option<&str>, out_name: &str, prealloc_factor: us let (input_buf, empty_toc) = match in_name { // If we have an input GFA file, we can estimate its sizes for the TOC. Some(name) => { - file = map_file(name); + file = file::map_file(name); let toc = parse::estimate_toc(file.as_ref()); (Some(file.as_ref()), toc) } @@ -169,7 +143,7 @@ fn prealloc_translate(in_name: Option<&str>, out_name: &str, prealloc_factor: us }; // Create a file with an empty table of contents. - let mut mmap = map_new_file(out_name, empty_toc.size() as u64); + let mut mmap = file::map_new_file(out_name, empty_toc.size() as u64); let (toc, store) = file::init(&mut mmap, empty_toc); // Parse the input into the file.