Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: node positions #104

Merged
merged 7 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions packages/pangraph/src/circularize/circularize_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ impl Edge {
pub fn oriented_equal(&self, other: &Edge) -> bool {
self.n1 == other.n1 && self.n2 == other.n2
}

// orient such that the first node has a lower block id
// if block ids are equal, orient such that the first node has a forward strand
// if possible
pub fn conventional_orientation(&self) -> Edge {
if (self.n1.bid < self.n2.bid) || (self.n1.bid == self.n2.bid && self.n1.strand == Strand::Forward) {
*self
} else {
self.invert()
}
}

// returns a tuple (bid1, bid2, 0/1, 0/1) where 0/1 indicates the strand
// used to decide ordering between different edges
pub fn to_tuple(&self) -> (BlockId, BlockId, isize, isize) {
let s1 = if self.n1.strand == Strand::Forward { 0 } else { 1 };
let s2 = if self.n2.strand == Strand::Forward { 0 } else { 1 };
(self.n1.bid, self.n2.bid, s1, s2)
}
}

impl PartialEq for Edge {
Expand Down
159 changes: 122 additions & 37 deletions packages/pangraph/src/commands/export/export_core_genome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use crate::pangraph::pangraph::Pangraph;
use crate::pangraph::pangraph_block::RecordNaming;
use clap::Parser;
use eyre::{Context, Report};
use itertools::{izip, Itertools};
use std::collections::{BTreeMap, BTreeSet};
use itertools::Itertools;
use std::collections::BTreeMap;

#[derive(Parser, Debug, Default, Clone)]
pub struct ExportCoreAlignmentParams {
Expand Down Expand Up @@ -51,29 +51,22 @@ pub fn export_core_genome(args: PangraphExportCoreAlignmentArgs) -> Result<(), R
/// If aligned is True, it returns aligned sequences, with gaps for deletions and no insertions.
/// If aligned is False, it returns the full unaligned sequences.
fn core_block_aln(graph: &Pangraph, params: &ExportCoreAlignmentParams) -> Result<Vec<(String, String)>, Report> {
let core_block_ids = graph.core_block_ids();
let core_block_ids: Vec<_> = graph.core_block_ids().collect();
let guide_path_id = graph.path_id_by_name(&params.guide_strain)?;
let guide_path = &graph.paths[&guide_path_id];

let guide_block_info: BTreeMap<_, _> = guide_path
// Extract sequences for all core blocks
let mut records = vec![];
for (bid, strand) in guide_path
.nodes
.iter()
.map(|node_id| {
let node = &graph.nodes[node_id];
(node.block_id(), node.strand())
})
.collect();

let guide_block_ids: BTreeSet<_> = guide_block_info.keys().collect();
let guide_block_values: Vec<_> = guide_block_info.values().collect();

// Extract sequences for all core blocks
let mut records = vec![];
for (bid, &strand) in izip!(core_block_ids, guide_block_values) {
if !guide_block_ids.contains(&bid) {
continue; // Only consider core blocks
}

.filter(|(bid, _)| core_block_ids.contains(bid))
// Filter out non-core blocks
{
let block = &graph.blocks[&bid];
let mut block_records: Vec<_> = block
.sequences(graph, !params.unaligned, RecordNaming::Path)
Expand Down Expand Up @@ -150,6 +143,35 @@ mod tests {

#[test]
fn test_core_block_aln_general_case() {
// path A: n1+, n2-
// path B: n4+, n5+, n3-

// block 1:
// cons: ACCTATCGTGATCGTTCGAT
// A n1: ACCTATCGT---CGTTCGAT
// B n3: ACTTATCGTGATCGTTCGAT
// reverse-complement:
// cons: ATCGAACGATCACGATAGGT
// A n1: ATCGAACG---ACGATAGGT
// B n3: ATCGAACGATCACGATAAGT

// block 2:
// cons: CTGCAA GTCTGATCTAGTTA
// A n2: CTGCAATTTGTCTGATGTAGTTA
// B n4: CT--AA GTCTGATCTAGTTA
// reverse-complement
// cons: TAACTAGATCAGAC TTGCAG
// A n2: TAACTACATCAGACAAATTGCAG
// B n4: TAACTAGATCAGAC TT--AG

// core (guide A):
// A: ACCTATCGT---CGTTCGATTAACTACATCAGACAAATTGCAG
// B: ACTTATCGTGATCGTTCGATTAACTAGATCAGAC TT--AG

// core (guide B):
// A: CTGCAATTTGTCTGATGTAGTTAATCGAACG---ACGATAGGT
// B: CT--AA GTCTGATCTAGTTAATCGAACGATCACGATAAGT

let graph = Pangraph::from_str(indoc! {
// language=json
r#"
Expand All @@ -158,48 +180,59 @@ mod tests {
"0": {
"id": 0,
"nodes": [1, 2],
"tot_len": 100,
"tot_len": 40,
"circular": false,
"name": "Path A"
},
"1": {
"id": 1,
"nodes": [3, 4],
"tot_len": 100,
"nodes": [4, 5, 3],
"tot_len": 48,
"circular": false,
"name": "Path B"
}
},
"blocks": {
"1": {
"id": 1,
"consensus": "ACGTACGT",
"consensus": "ACCTATCGTGATCGTTCGAT",
"alignments": {
"1": {
"subs": [],
"dels": [],
"dels": [{"pos": 9, "len": 3}],
"inss": []
},
"2": {
"subs": [{"pos": 3, "alt": "T"}],
"3": {
"subs": [{"pos": 2, "alt": "T"}],
"dels": [],
"inss": []
}
}
},
"2": {
"id": 2,
"consensus": "TTGCA",
"consensus": "CTGCAAGTCTGATCTAGTTA",
"alignments": {
"3": {
"subs": [],
"dels": [{"pos": 2, "len": 1}],
"inss": []
"2": {
"subs": [{"pos": 13, "alt": "G"}],
"dels": [],
"inss": [{"pos": 6, "seq": "TTT"}]
},
"4": {
"subs": [],
"dels": [{"pos": 2, "len": 2}],
"inss": []
}
}
},
"3": {
"id": 3,
"consensus": "AGGCTACGAT",
"alignments": {
"5": {
"subs": [],
"dels": [],
"inss": [{"pos": 0, "seq": "AAA"}]
"inss": []
}
}
}
Expand All @@ -210,40 +243,92 @@ mod tests {
"block_id": 1,
"path_id": 0,
"strand": "+",
"position": [0, 8]
"position": [0, 17]
},
"2": {
"id": 2,
"block_id": 1,
"block_id": 2,
"path_id": 0,
"strand": "-",
"position": [0, 8]
"position": [17, 40]
},
"3": {
"id": 3,
"block_id": 2,
"block_id": 1,
"path_id": 1,
"strand": "+",
"position": [0, 5]
"strand": "-",
"position": [28, 48]
},
"4": {
"id": 4,
"block_id": 2,
"path_id": 1,
"strand": "+",
"position": [0, 5]
"position": [0, 18]
},
"5": {
"id": 5,
"block_id": 3,
"path_id": 1,
"strand": "+",
"position": [18, 28]
}
}
}
"#})
.unwrap();

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path A"),
unaligned: false,
};

let expected = vec![
(o!("Path A"), o!("ACCTATCGT---CGTTCGATTAACTACATCAGACTTGCAG")),
(o!("Path B"), o!("ACTTATCGTGATCGTTCGATTAACTAGATCAGACTT--AG")),
];
let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path A"),
unaligned: true,
};

let expected = vec![(o!("Path A"), o!("ACGTTGTCA--")), (o!("Path B"), o!("TT--CGAATTTGCA"))];
let expected = vec![
(o!("Path A"), o!("ACCTATCGTCGTTCGATTAACTACATCAGACAAATTGCAG")),
(o!("Path B"), o!("ACTTATCGTGATCGTTCGATTAACTAGATCAGACTTAG")),
];

let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path B"),
unaligned: false,
};

let expected = vec![
(o!("Path A"), o!("CTGCAAGTCTGATGTAGTTAATCGAACG---ACGATAGGT")),
(o!("Path B"), o!("CT--AAGTCTGATCTAGTTAATCGAACGATCACGATAAGT")),
];

let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);

let params = ExportCoreAlignmentParams {
guide_strain: o!("Path B"),
unaligned: true,
};

let expected = vec![
(o!("Path A"), o!("CTGCAATTTGTCTGATGTAGTTAATCGAACGACGATAGGT")),
(o!("Path B"), o!("CTAAGTCTGATCTAGTTAATCGAACGATCACGATAAGT")),
];

let actual = core_block_aln(&graph, &params).unwrap();

assert_eq!(expected, actual);
Expand Down
Loading