Skip to content

Commit

Permalink
Replace HashMaps with a bit-vector for unique depth computation (#201)
Browse files Browse the repository at this point in the history
  • Loading branch information
sampsyo authored Dec 29, 2024
2 parents 49594a6 + f02cfe4 commit 0d5d820
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 13 deletions.
7 changes: 7 additions & 0 deletions flatgfa/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions flatgfa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ path = "src/cli/main.rs"
[dependencies]
argh = "0.1.12"
atoi = "2.0.0"
bit-vec = "0.8.0"
bstr = "1.10.0"
memchr = "2.7.4"
memmap = "0.7.0"
Expand Down
2 changes: 1 addition & 1 deletion flatgfa/src/cli/cmds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ pub fn depth(gfa: &flatgfa::FlatGFA) {
"{}\t{}\t{}",
name,
depths[id.index()],
uniq_paths[id.index()].len()
uniq_paths[id.index()],
);
}
}
Expand Down
37 changes: 25 additions & 12 deletions flatgfa/src/ops/depth.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,35 @@
use crate::flatgfa;
use std::collections::HashSet;
use bit_vec::BitVec;

pub fn depth(gfa: &flatgfa::FlatGFA) -> (Vec<usize>, Vec<HashSet<usize>>) {
// Initialize node depth
/// Compute the *depth* of each segment in the variation graph.
///
/// The depth is defined to be the number of times that a path traverses a given
/// segment. We return two values: the ordinary depth and the *unique* depth,
/// which only counts each path that tarverses a given segment once.
///
/// Both outputs are depth values indexed by segment ID.
pub fn depth(gfa: &flatgfa::FlatGFA) -> (Vec<usize>, Vec<usize>) {
// Our output vectors: the ordinary and unique depths of each segment.
let mut depths = vec![0; gfa.segs.len()];
// Initialize uniq_paths
let mut uniq_paths = Vec::<HashSet<usize>>::new();
uniq_paths.resize(gfa.segs.len(), HashSet::new());
// do not assume that each handle in `gfa.steps()` is unique
for (idx, path) in gfa.paths.all().iter().enumerate() {
let mut uniq_depths = vec![0; gfa.segs.len()];

// This bit vector keeps track of whether the current path has already
// traversed a given segment, and therefore whether we should ignore
// subsequent traversals (for the purpose of counting unique depth).
let mut seen = BitVec::from_elem(gfa.segs.len(), false);

for path in gfa.paths.all().iter() {
seen.clear(); // All segments are unseen.
for step in &gfa.steps[path.steps] {
let seg_id = step.segment().index();
// Increment depths
depths[seg_id] += 1;
// Update uniq_paths
uniq_paths[seg_id].insert(idx);
if seen[seg_id] {
// The first traversal of this path over this segment.
uniq_depths[seg_id] += 1;
seen.set(seg_id, true);
}
}
}

(depths, uniq_paths)
(depths, uniq_depths)
}

0 comments on commit 0d5d820

Please sign in to comment.