diff --git a/flatgfa/Cargo.lock b/flatgfa/Cargo.lock index 1a1a4b4c..4416ef80 100644 --- a/flatgfa/Cargo.lock +++ b/flatgfa/Cargo.lock @@ -48,6 +48,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bstr" version = "1.10.0" @@ -108,6 +114,7 @@ version = "0.1.0" dependencies = [ "argh", "atoi", + "bit-vec", "bstr", "memchr", "memmap", diff --git a/flatgfa/Cargo.toml b/flatgfa/Cargo.toml index b46e4528..56cc9151 100644 --- a/flatgfa/Cargo.toml +++ b/flatgfa/Cargo.toml @@ -10,6 +10,7 @@ path = "src/cli/main.rs" [dependencies] argh = "0.1.12" atoi = "2.0.0" +bit-vec = "0.8.0" bstr = "1.10.0" memchr = "2.7.4" memmap = "0.7.0" diff --git a/flatgfa/src/cli/cmds.rs b/flatgfa/src/cli/cmds.rs index f0c7592c..a9d1274d 100644 --- a/flatgfa/src/cli/cmds.rs +++ b/flatgfa/src/cli/cmds.rs @@ -204,7 +204,7 @@ pub fn depth(gfa: &flatgfa::FlatGFA) { "{}\t{}\t{}", name, depths[id.index()], - uniq_paths[id.index()].len() + uniq_paths[id.index()], ); } } diff --git a/flatgfa/src/ops/depth.rs b/flatgfa/src/ops/depth.rs index 5307afa7..3d5ab66b 100644 --- a/flatgfa/src/ops/depth.rs +++ b/flatgfa/src/ops/depth.rs @@ -1,22 +1,35 @@ use crate::flatgfa; -use std::collections::HashSet; +use bit_vec::BitVec; -pub fn depth(gfa: &flatgfa::FlatGFA) -> (Vec<usize>, Vec<HashSet<usize>>) { - // Initialize node depth +/// Compute the *depth* of each segment in the variation graph. +/// +/// The depth is defined to be the number of times that a path traverses a given +/// segment. We return two values: the ordinary depth and the *unique* depth, +/// which only counts each path that tarverses a given segment once. +/// +/// Both outputs are depth values indexed by segment ID. +pub fn depth(gfa: &flatgfa::FlatGFA) -> (Vec<usize>, Vec<usize>) { + // Our output vectors: the ordinary and unique depths of each segment. let mut depths = vec![0; gfa.segs.len()]; - // Initialize uniq_paths - let mut uniq_paths = Vec::<HashSet<usize>>::new(); - uniq_paths.resize(gfa.segs.len(), HashSet::new()); - // do not assume that each handle in `gfa.steps()` is unique - for (idx, path) in gfa.paths.all().iter().enumerate() { + let mut uniq_depths = vec![0; gfa.segs.len()]; + + // This bit vector keeps track of whether the current path has already + // traversed a given segment, and therefore whether we should ignore + // subsequent traversals (for the purpose of counting unique depth). + let mut seen = BitVec::from_elem(gfa.segs.len(), false); + + for path in gfa.paths.all().iter() { + seen.clear(); // All segments are unseen. for step in &gfa.steps[path.steps] { let seg_id = step.segment().index(); - // Increment depths depths[seg_id] += 1; - // Update uniq_paths - uniq_paths[seg_id].insert(idx); + if seen[seg_id] { + // The first traversal of this path over this segment. + uniq_depths[seg_id] += 1; + seen.set(seg_id, true); + } } } - (depths, uniq_paths) + (depths, uniq_depths) }