Skip to content

Commit

Permalink
Merge branch 'fix/export-tests' into fix/circular-new-pos
Browse files Browse the repository at this point in the history
  • Loading branch information
mmolari committed Jan 8, 2025
2 parents 01028e3 + fa0489f commit 4dfce17
Show file tree
Hide file tree
Showing 3 changed files with 296 additions and 140 deletions.
19 changes: 19 additions & 0 deletions packages/pangraph/src/circularize/circularize_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ impl Edge {
pub fn oriented_equal(&self, other: &Edge) -> bool {
self.n1 == other.n1 && self.n2 == other.n2
}

// orient such that the first node has a lower block id
// if block ids are equal, orient such that the first node has a forward strand
// if possible
pub fn conventional_orientation(&self) -> Edge {
if (self.n1.bid < self.n2.bid) || (self.n1.bid == self.n2.bid && self.n1.strand == Strand::Forward) {
*self
} else {
self.invert()
}
}

// returns a tuple (bid1, bid2, 0/1, 0/1) where 0/1 indicates the strand
// used to decide ordering between different edges
pub fn to_tuple(&self) -> (BlockId, BlockId, isize, isize) {
let s1 = if self.n1.strand == Strand::Forward { 0 } else { 1 };
let s2 = if self.n2.strand == Strand::Forward { 0 } else { 1 };
(self.n1.bid, self.n2.bid, s1, s2)
}
}

impl PartialEq for Edge {
Expand Down
159 changes: 122 additions & 37 deletions packages/pangraph/src/commands/export/export_core_genome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use crate::pangraph::pangraph::Pangraph;
use crate::pangraph::pangraph_block::RecordNaming;
use clap::Parser;
use eyre::{Context, Report};
use itertools::{izip, Itertools};
use std::collections::{BTreeMap, BTreeSet};
use itertools::Itertools;
use std::collections::BTreeMap;

#[derive(Parser, Debug, Default, Clone)]
pub struct ExportCoreAlignmentParams {
Expand Down Expand Up @@ -51,29 +51,22 @@ pub fn export_core_genome(args: PangraphExportCoreAlignmentArgs) -> Result<(), R
/// If aligned is True, it returns aligned sequences, with gaps for deletions and no insertions.
/// If aligned is False, it returns the full unaligned sequences.
fn core_block_aln(graph: &Pangraph, params: &ExportCoreAlignmentParams) -> Result<Vec<(String, String)>, Report> {
let core_block_ids = graph.core_block_ids();
let core_block_ids: Vec<_> = graph.core_block_ids().collect();
let guide_path_id = graph.path_id_by_name(&params.guide_strain)?;
let guide_path = &graph.paths[&guide_path_id];

let guide_block_info: BTreeMap<_, _> = guide_path
// Extract sequences for all core blocks
let mut records = vec![];
for (bid, strand) in guide_path
.nodes
.iter()
.map(|node_id| {
let node = &graph.nodes[node_id];
(node.block_id(), node.strand())
})
.collect();

let guide_block_ids: BTreeSet<_> = guide_block_info.keys().collect();
let guide_block_values: Vec<_> = guide_block_info.values().collect();

// Extract sequences for all core blocks
let mut records = vec![];
for (bid, &strand) in izip!(core_block_ids, guide_block_values) {
if !guide_block_ids.contains(&bid) {
continue; // Only consider core blocks
}

.filter(|(bid, _)| core_block_ids.contains(bid))
// Filter out non-core blocks
{
let block = &graph.blocks[&bid];
let mut block_records: Vec<_> = block
.sequences(graph, !params.unaligned, RecordNaming::Path)
Expand Down Expand Up @@ -150,6 +143,35 @@ mod tests {

#[test]
fn test_core_block_aln_general_case() {
// path A: n1+, n2-
// path B: n4+, n5+, n3-

// block 1:
// cons: ACCTATCGTGATCGTTCGAT
// A n1: ACCTATCGT---CGTTCGAT
// B n3: ACTTATCGTGATCGTTCGAT
// reverse-complement:
// cons: ATCGAACGATCACGATAGGT
// A n1: ATCGAACG---ACGATAGGT
// B n3: ATCGAACGATCACGATAAGT

// block 2:
// cons: CTGCAA GTCTGATCTAGTTA
// A n2: CTGCAATTTGTCTGATGTAGTTA
// B n4: CT--AA GTCTGATCTAGTTA
// reverse-complement
// cons: TAACTAGATCAGAC TTGCAG
// A n2: TAACTACATCAGACAAATTGCAG
// B n4: TAACTAGATCAGAC TT--AG

// core (guide A):
// A: ACCTATCGT---CGTTCGATTAACTACATCAGACAAATTGCAG
// B: ACTTATCGTGATCGTTCGATTAACTAGATCAGAC TT--AG

// core (guide B):
// A: CTGCAATTTGTCTGATGTAGTTAATCGAACG---ACGATAGGT
// B: CT--AA GTCTGATCTAGTTAATCGAACGATCACGATAAGT

let graph = Pangraph::from_str(indoc! {
// language=json
r#"
Expand All @@ -158,48 +180,59 @@ mod tests {
"0": {
"id": 0,
"nodes": [1, 2],
"tot_len": 100,
"tot_len": 40,
"circular": false,
"name": "Path A"
},
"1": {
"id": 1,
"nodes": [3, 4],
"tot_len": 100,
"nodes": [4, 5, 3],
"tot_len": 48,
"circular": false,
"name": "Path B"
}
},
"blocks": {
"1": {
"id": 1,
"consensus": "ACGTACGT",
"consensus": "ACCTATCGTGATCGTTCGAT",
"alignments": {
"1": {
"subs": [],
"dels": [],
"dels": [{"pos": 9, "len": 3}],
"inss": []
},
"2": {
"subs": [{"pos": 3, "alt": "T"}],
"3": {
"subs": [{"pos": 2, "alt": "T"}],
"dels": [],
"inss": []
}
}
},
"2": {
"id": 2,
"consensus": "TTGCA",
"consensus": "CTGCAAGTCTGATCTAGTTA",
"alignments": {
"3": {
"subs": [],
"dels": [{"pos": 2, "len": 1}],
"inss": []
"2": {
"subs": [{"pos": 13, "alt": "G"}],
"dels": [],
"inss": [{"pos": 6, "seq": "TTT"}]
},
"4": {
"subs": [],
"dels": [{"pos": 2, "len": 2}],
"inss": []
}
}
},
"3": {
"id": 3,
"consensus": "AGGCTACGAT",
"alignments": {
"5": {
"subs": [],
"dels": [],
"inss": [{"pos": 0, "seq": "AAA"}]
"inss": []
}
}
}
Expand All @@ -210,40 +243,92 @@ mod tests {
"block_id": 1,
"path_id": 0,
"strand": "+",
"position": [0, 8]
"position": [0, 17]
},
"2": {
"id": 2,
"block_id": 1,
"block_id": 2,
"path_id": 0,
"strand": "-",
"position": [0, 8]
"position": [17, 40]
},
"3": {
"id": 3,
"block_id": 2,
"block_id": 1,
"path_id": 1,
"strand": "+",
"position": [0, 5]
"strand": "-",
"position": [28, 48]
},
"4": {
"id": 4,
"block_id": 2,
"path_id": 1,
"strand": "+",
"position": [0, 5]
"position": [0, 18]
},
"5": {
"id": 5,
"block_id": 3,
"path_id": 1,
"strand": "+",
"position": [18, 28]
}
}
}
"#})
.unwrap();

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path A"),
unaligned: false,
};

let expected = vec![
(o!("Path A"), o!("ACCTATCGT---CGTTCGATTAACTACATCAGACTTGCAG")),
(o!("Path B"), o!("ACTTATCGTGATCGTTCGATTAACTAGATCAGACTT--AG")),
];
let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path A"),
unaligned: true,
};

let expected = vec![(o!("Path A"), o!("ACGTTGTCA--")), (o!("Path B"), o!("TT--CGAATTTGCA"))];
let expected = vec![
(o!("Path A"), o!("ACCTATCGTCGTTCGATTAACTACATCAGACAAATTGCAG")),
(o!("Path B"), o!("ACTTATCGTGATCGTTCGATTAACTAGATCAGACTTAG")),
];

let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path B"),
unaligned: false,
};

let expected = vec![
(o!("Path A"), o!("CTGCAAGTCTGATGTAGTTAATCGAACG---ACGATAGGT")),
(o!("Path B"), o!("CT--AAGTCTGATCTAGTTAATCGAACGATCACGATAAGT")),
];

let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path B"),
unaligned: true,
};

let expected = vec![
(o!("Path A"), o!("CTGCAATTTGTCTGATGTAGTTAATCGAACGACGATAGGT")),
(o!("Path B"), o!("CTAAGTCTGATCTAGTTAATCGAACGATCACGATAAGT")),
];

let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);
Expand Down
Loading

0 comments on commit 4dfce17

Please sign in to comment.