Skip to content

Commit

Permalink
[WIP]: doesn't compile. Begin implementing RPT calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
a-frantz committed Dec 5, 2023
1 parent 23d8a6c commit 5ed13e6
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 1 deletion.
26 changes: 26 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ noodles = { version = "0.34.0", features = [
num-format = "0.4.0"
plotly = "0.8.1"
prettytable-rs = "0.9.0"
radix_trie = "0.2.1"
rand = "0.8.5"
rand_distr = "0.4.3"
regex = "1.5.5"
Expand Down
49 changes: 48 additions & 1 deletion src/derive/command/endedness.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
//! Functionality relating to the `ngs derive endedness` subcommand itself.
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::PathBuf;

use clap::Args;
use noodles::sam::record::data::field::Tag;
use radix_trie::Trie;
use tracing::info;
use tracing::trace;

use crate::derive::endedness::compute;
use crate::utils::formats::bam::ParsedBAMFile;
Expand Down Expand Up @@ -53,6 +56,31 @@ pub struct DeriveEndednessArgs {
round_rpt: bool,
}

struct ReadGroup {
name: String,
first: usize,
last: usize,
both: usize,
neither: usize,
}

struct FoundReadGroups {
read_groups: HashSet<String>,
}

impl FoundReadGroups {
fn new() -> Self {
FoundReadGroups {
read_groups: HashSet::new(),
}
}

fn insert_and_get_ref(&mut self, read_group: &str) -> &String {
self.read_groups.insert(read_group.to_string());
self.read_groups.get(read_group).unwrap()
}
}

/// Main function for the `ngs derive endedness` subcommand.
pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
info!("Starting derive endedness subcommand.");
Expand Down Expand Up @@ -80,6 +108,10 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
.entry("f-l-".to_string())
.and_modify(|e| *e += 1);

// only used if args.calc_rpt is true
let mut found_rgs = FoundReadGroups::new();
let mut read_names = Trie::<String, Vec<&str>>::new();

let ParsedBAMFile {
mut reader, header, ..
} = crate::utils::formats::bam::open_and_parse(args.src, IndexCheck::Full)?;
Expand Down Expand Up @@ -109,6 +141,21 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
.and_then(|v| v.as_str())
.unwrap_or("unknown_read_group");

if args.calc_rpt {
let rg_ref = found_rgs.insert_and_get_ref(read_group);

match record.read_name() {
Some(rn) => {
read_names.insert(rn.to_string(), vec![rg_ref]);
}
None => {
trace!("Could not parse a QNAME from a read in the file.");
trace!("Skipping this read and proceeding.");
continue;
}
}
}

if record.flags().is_first_segment() && !record.flags().is_last_segment() {
ordering_flags.entry("overall".to_string()).and_modify(|e| {
e.entry("f+l-".to_string()).and_modify(|e| *e += 1);
Expand Down Expand Up @@ -169,5 +216,5 @@ pub fn derive(args: DeriveEndednessArgs) -> anyhow::Result<()> {
let output = serde_json::to_string_pretty(&result).unwrap();
print!("{}", output);

Ok(())
anyhow::Ok(())
}

0 comments on commit 5ed13e6

Please sign in to comment.