diff --git a/polbin/src/cmds.rs b/polbin/src/cmds.rs index d12613ce..685c53dc 100644 --- a/polbin/src/cmds.rs +++ b/polbin/src/cmds.rs @@ -73,6 +73,68 @@ pub fn stats(gfa: &flatgfa::FlatGFA, args: Stats) { } } +/// find a nucleotide position within a path +#[derive(FromArgs, PartialEq, Debug)] +#[argh(subcommand, name = "position")] +pub struct Position { + /// path_name,offset,orientation + #[argh(option, short = 'p')] + path_pos: String, +} + +pub fn position(gfa: &flatgfa::FlatGFA, args: Position) -> Result<(), &'static str> { + // Parse the position triple, which looks like `path,42,+`. + let (path_name, offset, orientation) = { + let parts: Vec<_> = args.path_pos.split(",").collect(); + if parts.len() != 3 { + return Err("position must be path_name,offset,orientation"); + } + let off: usize = parts[1].parse().or(Err("offset must be a number"))?; + let ori: flatgfa::Orientation = parts[2].parse().or(Err("orientation must be + or -"))?; + (parts[0], off, ori) + }; + + let path_id = gfa.find_path(path_name.into()).ok_or("path not found")?; + let path = &gfa.paths[path_id as usize]; + assert_eq!( + orientation, + flatgfa::Orientation::Forward, + "only + is implemented so far" + ); + + // Traverse the path until we reach the position. + let mut cur_pos = 0; + let mut found = None; + for step in gfa.get_steps(path) { + let seg = gfa.get_handle_seg(*step); + let end_pos = cur_pos + seg.len(); + if offset < end_pos { + // Found it! + found = Some((*step, offset - cur_pos)); + break; + } + cur_pos = end_pos; + } + + // Print the match. + if let Some((handle, seg_off)) = found { + let seg = gfa.get_handle_seg(handle); + let seg_name = seg.name; + println!("#source.path.pos\ttarget.graph.pos"); + println!( + "{},{},{}\t{},{},{}", + path_name, + offset, + orientation, + seg_name, + seg_off, + handle.orient() + ); + } + + Ok(()) +} + /// create a subset graph #[derive(FromArgs, PartialEq, Debug)] #[argh(subcommand, name = "extract")] diff --git a/polbin/src/flatgfa.rs b/polbin/src/flatgfa.rs index 3e9ad063..07848ca5 100644 --- a/polbin/src/flatgfa.rs +++ b/polbin/src/flatgfa.rs @@ -1,3 +1,5 @@ +use std::str::FromStr; + use crate::pool::{Index, Pool, Span}; use bstr::BStr; use num_enum::{IntoPrimitive, TryFromPrimitive}; @@ -133,6 +135,20 @@ pub enum Orientation { Backward, // - } +impl FromStr for Orientation { + type Err = (); + + fn from_str(s: &str) -> Result { + if s == "+" { + Ok(Orientation::Forward) + } else if s == "-" { + Ok(Orientation::Backward) + } else { + Err(()) + } + } +} + /// An oriented reference to a segment. /// /// A Handle refers to the forward (+) or backward (-) orientation for a given segment. @@ -234,6 +250,14 @@ impl<'a> FlatGFA<'a> { .map(|i| i as Index) } + /// Look up a path by its name. + pub fn find_path(&self, name: &BStr) -> Option { + self.paths + .iter() + .position(|path| self.get_path_name(path) == name) + .map(|i| i as Index) + } + /// Get all the steps for a path. pub fn get_steps(&self, path: &Path) -> &[Handle] { &self.steps[path.steps.range()] diff --git a/polbin/src/main.rs b/polbin/src/main.rs index f2c479d7..56af38f2 100644 --- a/polbin/src/main.rs +++ b/polbin/src/main.rs @@ -68,6 +68,7 @@ enum Command { Toc(cmds::Toc), Paths(cmds::Paths), Stats(cmds::Stats), + Position(cmds::Position), Extract(cmds::Extract), } @@ -124,6 +125,9 @@ fn main() -> Result<(), &'static str> { Some(Command::Stats(sub_args)) => { cmds::stats(&gfa, sub_args); } + Some(Command::Position(sub_args)) => { + cmds::position(&gfa, sub_args)?; + } Some(Command::Extract(sub_args)) => { let store = cmds::extract(&gfa, sub_args)?; dump(&store.view(), &args.output);