From ed5994d8330bc186cbb3cc71f38ed8e8ce32dc8e Mon Sep 17 00:00:00 2001 From: Marco Molari Date: Fri, 3 Jan 2025 13:34:41 +0100 Subject: [PATCH 1/6] expand core alignment tests --- .../src/commands/export/export_core_genome.rs | 134 +++++++++++++++--- 1 file changed, 113 insertions(+), 21 deletions(-) diff --git a/packages/pangraph/src/commands/export/export_core_genome.rs b/packages/pangraph/src/commands/export/export_core_genome.rs index 62aeb6ef..ed93d5a9 100644 --- a/packages/pangraph/src/commands/export/export_core_genome.rs +++ b/packages/pangraph/src/commands/export/export_core_genome.rs @@ -150,6 +150,35 @@ mod tests { #[test] fn test_core_block_aln_general_case() { + // path A: n1+, n2- + // path B: n4+, n5+, n3- + + // block 1: + // cons: ACCTATCGTGATCGTTCGAT + // A n1: ACCTATCGT---CGTTCGAT + // B n3: ACTTATCGTGATCGTTCGAT + // reverse-complement: + // cons: ATCGAACGATCACGATAGGT + // A n1: ATCGAACG---ACGATAGGT + // B n3: ATCGAACGATCACGATAAGT + + // block 2: + // cons: CTGCAA GTCTGATCTAGTTA + // A n2: CTGCAATTTGTCTGATGTAGTTA + // B n4: CT--AA GTCTGATCTAGTTA + // reverse-complement + // cons: TAACTAGATCAGAC TTGCAG + // A n2: TAACTACATCAGACAAATTGCAG + // B n4: TAACTAGATCAGAC TT--AG + + // core (guide A): + // A: ACCTATCGT---CGTTCGATTAACTACATCAGACAAATTGCAG + // B: ACTTATCGTGATCGTTCGATTAACTAGATCAGAC TT--AG + + // core (guide B): + // A: CTGCAATTTGTCTGATGTAGTTAATCGAACG---ACGATAGGT + // B: CT--AA GTCTGATCTAGTTAATCGAACGATCACGATAAGT + let graph = Pangraph::from_str(indoc! { // language=json r#" @@ -158,14 +187,14 @@ mod tests { "0": { "id": 0, "nodes": [1, 2], - "tot_len": 100, + "tot_len": 40, "circular": false, "name": "Path A" }, "1": { "id": 1, - "nodes": [3, 4], - "tot_len": 100, + "nodes": [4, 5, 3], + "tot_len": 48, "circular": false, "name": "Path B" } @@ -173,15 +202,15 @@ mod tests { "blocks": { "1": { "id": 1, - "consensus": "ACGTACGT", + "consensus": "ACCTATCGTGATCGTTCGAT", "alignments": { "1": { "subs": [], - "dels": [], + "dels": [{"pos": 9, "len": 3}], "inss": [] }, - "2": { - "subs": [{"pos": 3, "alt": "T"}], + "3": { + "subs": [{"pos": 2, "alt": "T"}], "dels": [], "inss": [] } @@ -189,17 +218,28 @@ mod tests { }, "2": { "id": 2, - "consensus": "TTGCA", + "consensus": "CTGCAAGTCTGATCTAGTTA", "alignments": { - "3": { - "subs": [], - "dels": [{"pos": 2, "len": 1}], - "inss": [] + "2": { + "subs": [{"pos": 13, "alt": "G"}], + "dels": [], + "inss": [{"pos": 6, "seq": "TTT"}] }, "4": { + "subs": [], + "dels": [{"pos": 2, "len": 2}], + "inss": [] + } + } + }, + "3": { + "id": 3, + "consensus": "AGGCTACGAT", + "alignments": { + "5": { "subs": [], "dels": [], - "inss": [{"pos": 0, "seq": "AAA"}] + "inss": [] } } } @@ -210,28 +250,35 @@ mod tests { "block_id": 1, "path_id": 0, "strand": "+", - "position": [0, 8] + "position": [0, 17] }, "2": { "id": 2, - "block_id": 1, + "block_id": 2, "path_id": 0, "strand": "-", - "position": [0, 8] + "position": [17, 40] }, "3": { "id": 3, - "block_id": 2, + "block_id": 1, "path_id": 1, - "strand": "+", - "position": [0, 5] + "strand": "-", + "position": [28, 48] }, "4": { "id": 4, "block_id": 2, "path_id": 1, "strand": "+", - "position": [0, 5] + "position": [0, 18] + }, + "5": { + "id": 5, + "block_id": 3, + "path_id": 1, + "strand": "+", + "position": [18, 28] } } } @@ -240,10 +287,55 @@ mod tests { let params = ExportCoreAlignmentParams { guide_strain: o!("Path A"), + unaligned: false, + }; + + let expected = vec![ + (o!("Path A"), o!("ACCTATCGT---CGTTCGATTAACTACATCAGACTTGCAG")), + (o!("Path B"), o!("ACTTATCGTGATCGTTCGATTAACTAGATCAGACTT--AG")), + ]; + let actual = core_block_aln(&graph, ¶ms).unwrap(); + + assert_eq!(expected, actual); + + let params = ExportCoreAlignmentParams { + guide_strain: o!("Path A"), + unaligned: true, + }; + + let expected = vec![ + (o!("Path A"), o!("ACCTATCGTCGTTCGATTAACTACATCAGACAAATTGCAG")), + (o!("Path B"), o!("ACTTATCGTGATCGTTCGATTAACTAGATCAGACTTAG")), + ]; + + let actual = core_block_aln(&graph, ¶ms).unwrap(); + + assert_eq!(expected, actual); + + let params = ExportCoreAlignmentParams { + guide_strain: o!("Path B"), + unaligned: false, + }; + + let expected = vec![ + (o!("Path A"), o!("CTGCAAGTCTGATGTAGTTAATCGAACG---ACGATAGGT")), + (o!("Path B"), o!("CT--AAGTCTGATCTAGTTAATCGAACGATCACGATAAGT")), + ]; + + let actual = core_block_aln(&graph, ¶ms).unwrap(); + + assert_eq!(expected, actual); + + let params = ExportCoreAlignmentParams { + guide_strain: o!("Path B"), unaligned: true, }; - let expected = vec![(o!("Path A"), o!("ACGTTGTCA--")), (o!("Path B"), o!("TT--CGAATTTGCA"))]; + let expected = vec![ + (o!("Path A"), o!("CTGCAATTTGTCTGATGTAGTTAATCGAACGACGATAGGT")), + (o!("Path B"), o!("CTAAGTCTGATCTAGTTAATCGAACGATCACGATAAGT")), + ]; + let actual = core_block_aln(&graph, ¶ms).unwrap(); assert_eq!(expected, actual); From 17f211f5eb705af3d715bb6ed4c3e0d37d407c7e Mon Sep 17 00:00:00 2001 From: Marco Molari Date: Fri, 3 Jan 2025 13:55:32 +0100 Subject: [PATCH 2/6] fix export core alignment --- .../src/commands/export/export_core_genome.rs | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/packages/pangraph/src/commands/export/export_core_genome.rs b/packages/pangraph/src/commands/export/export_core_genome.rs index ed93d5a9..df026cf7 100644 --- a/packages/pangraph/src/commands/export/export_core_genome.rs +++ b/packages/pangraph/src/commands/export/export_core_genome.rs @@ -7,8 +7,8 @@ use crate::pangraph::pangraph::Pangraph; use crate::pangraph::pangraph_block::RecordNaming; use clap::Parser; use eyre::{Context, Report}; -use itertools::{izip, Itertools}; -use std::collections::{BTreeMap, BTreeSet}; +use itertools::Itertools; +use std::collections::BTreeMap; #[derive(Parser, Debug, Default, Clone)] pub struct ExportCoreAlignmentParams { @@ -51,29 +51,22 @@ pub fn export_core_genome(args: PangraphExportCoreAlignmentArgs) -> Result<(), R /// If aligned is True, it returns aligned sequences, with gaps for deletions and no insertions. /// If aligned is False, it returns the full unaligned sequences. fn core_block_aln(graph: &Pangraph, params: &ExportCoreAlignmentParams) -> Result, Report> { - let core_block_ids = graph.core_block_ids(); + let core_block_ids: Vec<_> = graph.core_block_ids().collect(); let guide_path_id = graph.path_id_by_name(¶ms.guide_strain)?; let guide_path = &graph.paths[&guide_path_id]; - let guide_block_info: BTreeMap<_, _> = guide_path + // Extract sequences for all core blocks + let mut records = vec![]; + for (bid, strand) in guide_path .nodes .iter() .map(|node_id| { let node = &graph.nodes[node_id]; (node.block_id(), node.strand()) }) - .collect(); - - let guide_block_ids: BTreeSet<_> = guide_block_info.keys().collect(); - let guide_block_values: Vec<_> = guide_block_info.values().collect(); - - // Extract sequences for all core blocks - let mut records = vec![]; - for (bid, &strand) in izip!(core_block_ids, guide_block_values) { - if !guide_block_ids.contains(&bid) { - continue; // Only consider core blocks - } - + .filter(|(bid, _)| core_block_ids.contains(bid)) + // Filter out non-core blocks + { let block = &graph.blocks[&bid]; let mut block_records: Vec<_> = block .sequences(graph, !params.unaligned, RecordNaming::Path) From 7ff88d6dc719cec229ada795f329976d53ed053c Mon Sep 17 00:00:00 2001 From: Marco Molari Date: Fri, 3 Jan 2025 15:38:54 +0100 Subject: [PATCH 3/6] fix gfa test --- packages/pangraph/src/io/gfa.rs | 266 +++++++++++++++++++------------- 1 file changed, 163 insertions(+), 103 deletions(-) diff --git a/packages/pangraph/src/io/gfa.rs b/packages/pangraph/src/io/gfa.rs index fc8de646..8b6cd5ef 100644 --- a/packages/pangraph/src/io/gfa.rs +++ b/packages/pangraph/src/io/gfa.rs @@ -1,12 +1,12 @@ +use crate::circularize::circularize_utils::{Edge, SimpleNode}; use crate::io::file::create_file_or_stdout; use crate::pangraph::pangraph::Pangraph; use crate::pangraph::pangraph_block::BlockId; use crate::pangraph::strand::Strand; use clap::Parser; -use derive_more::Constructor; use eyre::{Context, Report}; use itertools::Itertools; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::io::Write; use std::path::Path; @@ -104,7 +104,22 @@ pub fn gfa_write(mut writer: W, g: &Pangraph, params: &GfaWriteParams) writeln!(writer, "# edges")?; } - for (edge, read_count) in &gfa.links.edge_ct { + for (edge, read_count) in gfa + .links + .edge_ct + .iter() + .map(|(edge, &read_count)| { + ( + if edge.n1.bid > edge.n2.bid { + edge.invert() + } else { + *edge + }, + read_count, + ) + }) + .sorted_by_key(|(edge, _)| (edge.n1.bid, edge.n2.bid)) + { let bid1 = edge.n1.bid; let strand1 = edge.n1.strand; let bid2 = edge.n2.bid; @@ -144,21 +159,9 @@ pub struct GfaSegment { duplicated: bool, } -#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Constructor)] -pub struct SimpleNode { - bid: BlockId, - strand: Strand, -} - -#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Constructor)] -pub struct Edge { - n1: SimpleNode, - n2: SimpleNode, -} - #[derive(Debug, Clone, Default)] pub struct GfaLinks { - edge_ct: BTreeMap, + edge_ct: HashMap, } impl GfaLinks { @@ -300,95 +303,144 @@ mod tests { #[test] fn test_gfa_general_case() { + // graph (circular): + // path A: 1+ -> 2- -> 3+ + // n1 n2 n3 + // path B: 2+ -> 1- -> 3+ -> 4+ + // n4 n5 n6 n7 let g = Pangraph::from_str(indoc! { // language=json r#" - { - "paths": { - "0": { - "id": 0, - "nodes": [ - 14515840915932838377 - ], - "tot_len": 1737, - "circular": false, - "name": "Path A" - }, - "1": { - "id": 1, - "nodes": [ - 15291847754458130853 - ], - "tot_len": 1737, - "circular": false, - "name": "Path B" + { + "paths": { + "0": { + "id": 0, + "nodes": [1, 2, 3], + "tot_len": 50, + "circular": true, + "name": "Path A" + }, + "1": { + "id": 1, + "nodes": [4, 5, 6, 7], + "tot_len": 60, + "circular": true, + "name": "Path B" + } }, - "2": { - "id": 2, - "nodes": [ - 15109482180931348145 - ], - "tot_len": 1737, - "circular": false, - "name": "Path C" - } - }, - "blocks": { - "12778560093473594666": { - "id": 12778560093473594666, - "consensus": "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT", - "alignments": { - "14515840915932838377": { - "subs": [], - "dels": [], - "inss": [] - }, - "15291847754458130853": { - "subs": [], - "dels": [], - "inss": [] - }, - "15109482180931348145": { - "subs": [], - "dels": [], - "inss": [] + "blocks": { + "1": { + "id": 1, + "consensus": "ACCTATCGTGATCGTTCGAT", + "alignments": { + "1": { + "subs": [], + "dels": [], + "inss": [] + }, + "4": { + "subs": [], + "dels": [], + "inss": [] + } + } + }, + "2": { + "id": 2, + "consensus": "CTGCAAGTCTGATCTAGTTA", + "alignments": { + "2": { + "subs": [], + "dels": [], + "inss": [] + }, + "6": { + "subs": [], + "dels": [], + "inss": [] + } + } + }, + "3": { + "id": 3, + "consensus": "AGGCTACGAT", + "alignments": { + "3": { + "subs": [], + "dels": [], + "inss": [] + }, + "5": { + "subs": [], + "dels": [], + "inss": [] + } + } + }, + "4": { + "id": 4, + "consensus": "CTTCAGCAAG", + "alignments": { + "7": { + "subs": [], + "dels": [], + "inss": [] + } } } - } - }, - "nodes": { - "14515840915932838377": { - "id": 14515840915932838377, - "block_id": 12778560093473594666, - "path_id": 0, - "strand": "+", - "position": [ - 0, - 0 - ] - }, - "15291847754458130853": { - "id": 15291847754458130853, - "block_id": 12778560093473594666, - "path_id": 1, - "strand": "+", - "position": [ - 0, - 0 - ] }, - "15109482180931348145": { - "id": 15109482180931348145, - "block_id": 12778560093473594666, - "path_id": 2, - "strand": "+", - "position": [ - 0, - 0 - ] + "nodes": { + "1": { + "id": 1, + "block_id": 1, + "path_id": 0, + "strand": "+", + "position": [0, 0] + }, + "2": { + "id": 2, + "block_id": 2, + "path_id": 0, + "strand": "-", + "position": [0, 0] + }, + "3": { + "id": 3, + "block_id": 3, + "path_id": 0, + "strand": "+", + "position": [0, 0] + }, + "4": { + "id": 4, + "block_id": 2, + "path_id": 1, + "strand": "+", + "position": [0, 0] + }, + "5": { + "id": 5, + "block_id": 1, + "path_id": 1, + "strand": "-", + "position": [0, 0] + }, + "6": { + "id": 6, + "block_id": 3, + "path_id": 1, + "strand": "+", + "position": [0, 0] + }, + "7": { + "id": 7, + "block_id": 4, + "path_id": 1, + "strand": "+", + "position": [0, 0] + } } } - } "#}) .unwrap(); @@ -401,13 +453,21 @@ mod tests { let expected = indoc! {r#" H VN:Z:1.0 - S 12778560093473594666 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT - L A - B + 1M - L A - B + 1M - L A - B + 1M - P Path A 12778560093473594666 *,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,* - P Path B 12778560093473594666 *,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,* - P Path C 12778560093473594666 *,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,* + # blocks + S 1 ACCTATCGTGATCGTTCGAT RC:i:40 LN:i:20 + S 2 CTGCAAGTCTGATCTAGTTA RC:i:40 LN:i:20 + S 3 AGGCTACGAT RC:i:20 LN:i:10 + S 4 CTTCAGCAAG RC:i:10 LN:i:10 + # edges + L 1 + 2 - * RC:i:2 + L 1 - 3 - * RC:i:1 + L 1 - 3 + * RC:i:1 + L 2 - 3 + * RC:i:1 + L 2 - 4 - * RC:i:1 + L 3 + 4 + * RC:i:1 + # paths + P Path A 1+,2-,3+ * TP:Z:circular + P Path B 2+,1-,3+,4+ * TP:Z:circular "#}; assert_eq!(expected, actual); From 4df3eac5156ea8c160b336a6742e30ff436be99f Mon Sep 17 00:00:00 2001 From: Marco Molari Date: Fri, 3 Jan 2025 18:03:04 +0100 Subject: [PATCH 4/6] chore: more concise gfa export --- .../src/circularize/circularize_utils.rs | 14 ++++++++++++++ packages/pangraph/src/io/gfa.rs | 16 ++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/packages/pangraph/src/circularize/circularize_utils.rs b/packages/pangraph/src/circularize/circularize_utils.rs index 05f41758..74e6b2cf 100644 --- a/packages/pangraph/src/circularize/circularize_utils.rs +++ b/packages/pangraph/src/circularize/circularize_utils.rs @@ -58,6 +58,20 @@ impl Edge { pub fn oriented_equal(&self, other: &Edge) -> bool { self.n1 == other.n1 && self.n2 == other.n2 } + + pub fn smaller_bid_first(&self) -> Edge { + if self.n1.bid < self.n2.bid { + *self + } else { + self.invert() + } + } + + pub fn to_tuple(&self) -> (BlockId, BlockId, isize, isize) { + let s1 = if self.n1.strand == Strand::Forward { 0 } else { 1 }; + let s2 = if self.n2.strand == Strand::Forward { 0 } else { 1 }; + (self.n1.bid, self.n2.bid, s1, s2) + } } impl PartialEq for Edge { diff --git a/packages/pangraph/src/io/gfa.rs b/packages/pangraph/src/io/gfa.rs index 8b6cd5ef..237054ec 100644 --- a/packages/pangraph/src/io/gfa.rs +++ b/packages/pangraph/src/io/gfa.rs @@ -108,17 +108,9 @@ pub fn gfa_write(mut writer: W, g: &Pangraph, params: &GfaWriteParams) .links .edge_ct .iter() - .map(|(edge, &read_count)| { - ( - if edge.n1.bid > edge.n2.bid { - edge.invert() - } else { - *edge - }, - read_count, - ) - }) - .sorted_by_key(|(edge, _)| (edge.n1.bid, edge.n2.bid)) + .map(|(edge, &read_count)| (edge.smaller_bid_first(), read_count)) // sort by smaller bid first + .sorted_by_key(|(edge, _)| edge.to_tuple()) + // sort by (bid1, bid2, strand1, strand2) { let bid1 = edge.n1.bid; let strand1 = edge.n1.strand; @@ -460,8 +452,8 @@ mod tests { S 4 CTTCAGCAAG RC:i:10 LN:i:10 # edges L 1 + 2 - * RC:i:2 - L 1 - 3 - * RC:i:1 L 1 - 3 + * RC:i:1 + L 1 - 3 - * RC:i:1 L 2 - 3 + * RC:i:1 L 2 - 4 - * RC:i:1 L 3 + 4 + * RC:i:1 From fa0489fbb7464b34e4b92d7068d7048b4303a46b Mon Sep 17 00:00:00 2001 From: Marco Molari Date: Fri, 3 Jan 2025 18:28:01 +0100 Subject: [PATCH 5/6] chore: refactor --- packages/pangraph/src/circularize/circularize_utils.rs | 9 +++++++-- packages/pangraph/src/io/gfa.rs | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/pangraph/src/circularize/circularize_utils.rs b/packages/pangraph/src/circularize/circularize_utils.rs index 74e6b2cf..fa0691b6 100644 --- a/packages/pangraph/src/circularize/circularize_utils.rs +++ b/packages/pangraph/src/circularize/circularize_utils.rs @@ -59,14 +59,19 @@ impl Edge { self.n1 == other.n1 && self.n2 == other.n2 } - pub fn smaller_bid_first(&self) -> Edge { - if self.n1.bid < self.n2.bid { + // orient such that the first node has a lower block id + // if block ids are equal, orient such that the first node has a forward strand + // if possible + pub fn conventional_orientation(&self) -> Edge { + if (self.n1.bid < self.n2.bid) || (self.n1.bid == self.n2.bid && self.n1.strand == Strand::Forward) { *self } else { self.invert() } } + // returns a tuple (bid1, bid2, 0/1, 0/1) where 0/1 indicates the strand + // used to decide ordering between different edges pub fn to_tuple(&self) -> (BlockId, BlockId, isize, isize) { let s1 = if self.n1.strand == Strand::Forward { 0 } else { 1 }; let s2 = if self.n2.strand == Strand::Forward { 0 } else { 1 }; diff --git a/packages/pangraph/src/io/gfa.rs b/packages/pangraph/src/io/gfa.rs index 237054ec..35964180 100644 --- a/packages/pangraph/src/io/gfa.rs +++ b/packages/pangraph/src/io/gfa.rs @@ -108,7 +108,7 @@ pub fn gfa_write(mut writer: W, g: &Pangraph, params: &GfaWriteParams) .links .edge_ct .iter() - .map(|(edge, &read_count)| (edge.smaller_bid_first(), read_count)) // sort by smaller bid first + .map(|(edge, &read_count)| (edge.conventional_orientation(), read_count)) // sort by smaller bid first .sorted_by_key(|(edge, _)| edge.to_tuple()) // sort by (bid1, bid2, strand1, strand2) { From 01028e3f22b11a6e488a70aabb117d273d283db9 Mon Sep 17 00:00:00 2001 From: Marco Molari Date: Wed, 8 Jan 2025 16:49:54 +0100 Subject: [PATCH 6/6] fix: separate new node position for circular / non-circular paths --- packages/pangraph/src/pangraph/pangraph.rs | 3 +- packages/pangraph/src/pangraph/reweave.rs | 12 ++-- packages/pangraph/src/pangraph/slice.rs | 74 ++++++++++++++++++++-- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/packages/pangraph/src/pangraph/pangraph.rs b/packages/pangraph/src/pangraph/pangraph.rs index 73039a36..b4e30093 100644 --- a/packages/pangraph/src/pangraph/pangraph.rs +++ b/packages/pangraph/src/pangraph/pangraph.rs @@ -30,7 +30,8 @@ impl Pangraph { let block_id = BlockId(fasta.index); let block = PangraphBlock::from_consensus(fasta.seq, block_id, node_id); let path_id = PathId(fasta.index); - let node = PangraphNode::new(Some(node_id), block.id(), path_id, strand, (0, 0)); + let node_position = if circular { (0, 0) } else { (0, tot_len) }; // path wraps around if circular + let node = PangraphNode::new(Some(node_id), block.id(), path_id, strand, node_position); let path = PangraphPath::new(Some(path_id), [node.id()], tot_len, circular, Some(fasta.seq_name)); Self { blocks: btreemap! {block.id() => block}, diff --git a/packages/pangraph/src/pangraph/reweave.rs b/packages/pangraph/src/pangraph/reweave.rs index 96a1e4bc..b6a5bd2e 100644 --- a/packages/pangraph/src/pangraph/reweave.rs +++ b/packages/pangraph/src/pangraph/reweave.rs @@ -528,9 +528,9 @@ mod tests { }, ); - let p1 = PangraphPath::new(Some(PathId(100)), [nid1], 2000, false, None); - let p2 = PangraphPath::new(Some(PathId(200)), [nid2], 2000, false, None); - let p3 = PangraphPath::new(Some(PathId(300)), [nid3], 200, false, None); + let p1 = PangraphPath::new(Some(PathId(100)), [nid1], 2000, true, None); + let p2 = PangraphPath::new(Some(PathId(200)), [nid2], 2000, true, None); + let p3 = PangraphPath::new(Some(PathId(300)), [nid3], 200, true, None); let G = Pangraph { paths: btreemap! { @@ -683,9 +683,9 @@ mod tests { }; let paths = btreemap! { - PathId(100) => PangraphPath::new(Some(PathId(100)), [NodeId(1), NodeId(2)], 1000, false, None), - PathId(200) => PangraphPath::new(Some(PathId(200)), [NodeId(3), NodeId(4), NodeId(5)], 1000, false, None), - PathId(300) => PangraphPath::new(Some(PathId(300)), [NodeId(6), NodeId(7), NodeId(8)], 1000, false, None), + PathId(100) => PangraphPath::new(Some(PathId(100)), [NodeId(1), NodeId(2)], 1000, true, None), + PathId(200) => PangraphPath::new(Some(PathId(200)), [NodeId(3), NodeId(4), NodeId(5)], 1000, true, None), + PathId(300) => PangraphPath::new(Some(PathId(300)), [NodeId(6), NodeId(7), NodeId(8)], 1000, true, None), }; #[rustfmt::skip] diff --git a/packages/pangraph/src/pangraph/slice.rs b/packages/pangraph/src/pangraph/slice.rs index 6c4a7c11..170009fe 100644 --- a/packages/pangraph/src/pangraph/slice.rs +++ b/packages/pangraph/src/pangraph/slice.rs @@ -59,7 +59,11 @@ pub fn new_strandedness(old_strandedness: Strand, orientation: Strand, is_anchor } } -pub fn new_position( +// given the old position of a node, the coordinates of the node in the new block, +// the length of the path and the strandedness of the node, return the new node position. +// Accounts for circular paths. +// Nb: a node that spans the whole path [0, L] will have position [0, 0] +pub fn new_position_circular( old_position: (usize, usize), node_coords: (usize, usize), path_L: usize, @@ -77,6 +81,24 @@ pub fn new_position( } } +// given the old position of a node, the coordinates of the node in the new block, +// and the strandedness of the node, return the new node position. +// Does not account for circular paths. +// Nb: a node that spans the whole path [0, L] will have position [0, L] +pub fn new_position_non_circular( + old_position: (usize, usize), + node_coords: (usize, usize), + old_strandedness: Strand, +) -> (usize, usize) { + let (old_s, old_e) = old_position; + let (new_s_in_node, new_e_in_node) = node_coords; + if old_strandedness.is_forward() { + (old_s + new_s_in_node, old_s + new_e_in_node) + } else { + (old_e - new_e_in_node, old_e - new_s_in_node) + } +} + pub fn interval_node_coords(i: &PangraphInterval, edits: &Edit, block_L: usize) -> (usize, usize) { let (mut s, mut e) = (i.interval.start, i.interval.end); for d in &edits.dels { @@ -134,7 +156,12 @@ pub fn block_slice( let path_L = G.paths[&old_node.path_id()].tot_len; let node_coords = interval_node_coords(i, edits, block_L); - let new_pos = new_position(old_node.position(), node_coords, path_L, old_strandedness); + let circular = G.paths[&old_node.path_id()].circular(); + let new_pos = if circular { + new_position_circular(old_node.position(), node_coords, path_L, old_strandedness) + } else { + new_position_non_circular(old_node.position(), node_coords, old_strandedness) + }; let new_node = PangraphNode::new(None, i.new_block_id, old_node.path_id(), new_strand, new_pos); node_updates.insert(*old_node_id, new_node.clone()); @@ -315,27 +342,60 @@ mod tests { } #[test] - fn test_new_position() { + fn test_new_position_circular() { let path_L = 100; let strandedness = Forward; let node_coords = (10, 20); let old_position = (10, 40); - let new_pos = new_position(old_position, node_coords, path_L, strandedness); + let new_pos = new_position_circular(old_position, node_coords, path_L, strandedness); assert_eq!(new_pos, (20, 30)); let old_position = (95, 20); - let new_pos = new_position(old_position, node_coords, path_L, strandedness); + let new_pos = new_position_circular(old_position, node_coords, path_L, strandedness); assert_eq!(new_pos, (5, 15)); let strandedness = Reverse; let old_position = (10, 50); - let new_pos = new_position(old_position, node_coords, path_L, strandedness); + let new_pos = new_position_circular(old_position, node_coords, path_L, strandedness); assert_eq!(new_pos, (30, 40)); let old_position = (40, 5); - let new_pos = new_position(old_position, node_coords, path_L, strandedness); + let new_pos = new_position_circular(old_position, node_coords, path_L, strandedness); assert_eq!(new_pos, (85, 95)); + + let strandedness = Forward; + let old_position = (0, 100); + let node_coords = (0, 100); + let new_pos = new_position_circular(old_position, node_coords, path_L, strandedness); + assert_eq!(new_pos, (0, 0)); + } + + #[test] + fn test_new_position_non_circular() { + let strandedness = Forward; + let node_coords = (10, 20); + let old_position = (10, 40); + let new_pos = new_position_non_circular(old_position, node_coords, strandedness); + assert_eq!(new_pos, (20, 30)); + + let strandedness = Reverse; + let node_coords = (10, 20); + let old_position = (10, 50); + let new_pos = new_position_non_circular(old_position, node_coords, strandedness); + assert_eq!(new_pos, (30, 40)); + + let strandedness = Forward; + let node_coords = (0, 10); + let old_position = (0, 20); + let new_pos = new_position_non_circular(old_position, node_coords, strandedness); + assert_eq!(new_pos, (0, 10)); + + let strandedness = Forward; + let node_coords = (0, 100); + let old_position = (0, 100); + let new_pos = new_position_non_circular(old_position, node_coords, strandedness); + assert_eq!(new_pos, (0, 100)); } #[test]