Skip to content

Commit

Permalink
simplifications, code comments, and better benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
susan-garry committed Jul 10, 2024
1 parent a9b0032 commit 71a7e53
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 50 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ jobs:
# Install odgi
- name: Pull odgi container
run: |
docker pull quay.io/biocontainers/odgi:0.8.3--py310h6cc9453_0
docker tag quay.io/biocontainers/odgi:0.8.3--py310h6cc9453_0 odgi
docker pull quay.io/biocontainers/odgi:0.8.6--py310hdf79db3_1
docker tag quay.io/biocontainers/odgi:0.8.6--py310hdf79db3_1 odgi
- name: Install odgi alias
run: |
mkdir -p $HOME/.local/bin
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ test-slow-odgi: fetch
make -C slow_odgi test

.PHONY: test-flatgfa
test-flatgfa: fetch-og
test-flatgfa: fetch
cd flatgfa ; cargo build

turnt -e flatgfa_mem -e flatgfa_file -e flatgfa_file_inplace tests/*.gfa

-turnt --save -v -e chop_oracle_fgfa tests/*.og
-turnt --save -v -e chop_oracle_fgfa tests/*.gfa
turnt -v -e flatgfa_chop tests/*.gfa

-turnt --save -v -e odgi_depth tests/*.og
-turnt --save -v -e odgi_depth tests/*.gfa
turnt -v -e flatgfa_depth tests/*.gfa

clean:
Expand Down
1 change: 0 additions & 1 deletion bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def hyperfine(cmds):
"hyperfine",
"--export-json",
tmp.name,
# "--shell=none",
"--warmup=1",
"--min-runs=3",
"--max-runs=16",
Expand Down
2 changes: 1 addition & 1 deletion bench/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ cmd.slow_odgi = '{slow_odgi} depth {files[gfa]}'

[modes.chop]
cmd.flatgfa = '{fgfa} -i {files[flatgfa]} chop -c 3'
cmd.odgi = '{odgi} chop -i {files[og]} -c 3 -o - | {odgi} view -g -i - | {slow_odgi} norm --nl'
cmd.odgi = '{odgi} chop -i {files[og]} -c 3 -o -'
cmd.slow_odgi = '{slow_odgi} chop {files[gfa]} -n 3'
61 changes: 18 additions & 43 deletions flatgfa/src/cmds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,16 +347,15 @@ pub fn chop<'a>(

let mut flat = flatgfa::HeapGFAStore::default();

// when segment S is chopped into segments S1 through S2 (exclusive),
// seg_map[S.name] = (Id(S1.name), Id(S2.name)). If S is not chopped: S=S1, S2.name = S1.name+1
let mut seg_map: Vec<(Id<Segment>, Id<Segment>)> = Vec::new();
// The smallest id (>0) which does not already belong to a segment in the new graph
let mut max_node_id = 1;

fn empty_span<T>() -> Span<T> {
Span::new(Id::new(0), Id::new(0))
}

fn link_forward(flat: &mut GFAStore<'static, HeapFamily>, range: &(Id<Segment>, Id<Segment>)) {
// Link segments range.0 through range.1 from head to tail
let overlap = empty_span();
let overlap = Span::new_empty();
flat.add_links(
(range.0.index()..(range.1.index()-1)).map(|idx| {
Link {
Expand All @@ -376,7 +375,7 @@ pub fn chop<'a>(
let id = flat.segs.add(Segment {
name: max_node_id,
seq: seg.seq,
optional: empty_span()
optional: Span::new_empty()
// TODO: Optional data may stay valid when seg not chopped?
});
max_node_id += 1;
Expand All @@ -394,7 +393,7 @@ pub fn chop<'a>(
flat.segs.add(Segment {
name: max_node_id,
seq: Span::new(Id::new(offset), Id::new(offset + args.c)),
optional: empty_span()
optional: Span::new_empty()
});
offset += args.c;
max_node_id += 1;
Expand All @@ -403,7 +402,7 @@ pub fn chop<'a>(
flat.segs.add(Segment {
name: max_node_id,
seq: Span::new(Id::new(offset), seq_end),
optional: empty_span()
optional: Span::new_empty()
});
max_node_id += 1;
let new_seg_range = (segs_start, flat.segs.next_id());
Expand Down Expand Up @@ -452,7 +451,7 @@ pub fn chop<'a>(
flat.paths.add(Path{
name: path.name,
steps: Span::new(path_start, path_end),
overlaps: Span::new(flat.overlaps.next_id(), flat.overlaps.next_id())
overlaps: Span::new_empty()
});
}

Expand All @@ -467,38 +466,20 @@ pub fn chop<'a>(
let new_from = {
let old_from = link.from;
let chopped_segs = seg_map[old_from.segment().index()];
match old_from.orient() {
Orientation::Forward => {
Handle::new(
chopped_segs.1 - 1,
Orientation::Forward
)
},
Orientation::Backward => {
Handle::new(
chopped_segs.0,
Orientation::Backward
)
}
}
let seg_id = match old_from.orient() {
Orientation::Forward => chopped_segs.1 - 1,
Orientation::Backward => chopped_segs.0
};
Handle::new(seg_id, old_from.orient())
};
let new_to = {
let old_to = link.to;
let chopped_segs = seg_map[old_to.segment().index()];
match old_to.orient() {
Orientation::Forward => {
Handle::new(
chopped_segs.0,
Orientation::Forward
)
},
Orientation::Backward => {
Handle::new(
chopped_segs.1 - 1,
Orientation::Backward
)
}
}
let seg_id = match old_to.orient() {
Orientation::Forward => chopped_segs.0,
Orientation::Backward => chopped_segs.1 - 1,
};
Handle::new(seg_id, old_to.orient())
};
flat.add_link(
new_from,
Expand All @@ -509,10 +490,4 @@ pub fn chop<'a>(
}

Ok(flat)

// TODO: Once we figure out how to handle links, fix them?
// * Is there any logical correspondence between links and edges?
// * Should we preserve/generate updated CIGAR string information? Do we care about links if not?
// * * * Maybe generating links/CIGAR strings should be left to whoever is analyzing the graph?
// * Go back and add/update links between chopped nodes
}

0 comments on commit 71a7e53

Please sign in to comment.