Skip to content

Commit

Permalink
Merge pull request #42 from ghuls/add_more_bigwigaverageoverbed_stats
Browse files Browse the repository at this point in the history
Add more bigwigaverageoverbed stats
  • Loading branch information
jackh726 authored Jun 28, 2024
2 parents fad0d12 + 34a3691 commit 5161c9a
Show file tree
Hide file tree
Showing 5 changed files with 339 additions and 57 deletions.
2 changes: 2 additions & 0 deletions bigtools/resources/test/bwaob_intervals.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chr17 59800 60000 one
chr17 62000 64000 two
59 changes: 48 additions & 11 deletions bigtools/src/utils/cli/bigwigaverageoverbed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ pub struct BigWigAverageOverBedArgs {
#[arg(long)]
pub end: Option<u32>,

/// If set, include two additional columns containing the min and max observed in the area
#[arg(long)]
pub min_max: bool,

/// Set the number of threads to use. This tool will nearly always benefit from more cores.
/// Note: for parts of the runtime, the actual usage may be nthreads+1
#[arg(short = 't', long)]
Expand All @@ -62,6 +66,7 @@ pub fn bigwigaverageoverbed(
let bigwigpath = args.bigwig;
let bedinpath = args.bedin;
let bedoutpath = args.output;
let add_min_max = args.min_max;

let reopen = ReopenableFile {
path: bigwigpath.to_string(),
Expand Down Expand Up @@ -103,6 +108,7 @@ pub fn bigwigaverageoverbed(
end: u64,
bedinpath: String,
name: Name,
add_min_max: bool,
inbigwig: &mut BigWigRead<R>,
) -> Result<File, Box<dyn Error + Send + Sync>> {
let mut tmp = tempfile::tempfile()?;
Expand Down Expand Up @@ -130,10 +136,22 @@ pub fn bigwigaverageoverbed(
}
};

let stats = format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
);
let stats = match add_min_max {
true => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}",
entry.size,
entry.bases,
entry.sum,
entry.mean0,
entry.mean,
entry.min,
entry.max
),
false => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
),
};
writeln!(&mut tmp, "{}\t{}", entry.name, stats)?
}

Expand Down Expand Up @@ -164,7 +182,8 @@ pub fn bigwigaverageoverbed(
Err(_) => break,
};

let result = process_chunk(start, end, bedinpath, name, &mut inbigwig);
let result =
process_chunk(start, end, bedinpath, name, add_min_max, &mut inbigwig);
result_sender.send(result).unwrap();
}
};
Expand Down Expand Up @@ -194,8 +213,14 @@ pub fn bigwigaverageoverbed(
}
};

let result =
process_chunk(start, chrom, bedinpath, name, &mut inbigwig);
let result = process_chunk(
start,
chrom,
bedinpath,
name,
add_min_max,
&mut inbigwig,
);
result_sender.send(result).unwrap();
}
}
Expand Down Expand Up @@ -239,10 +264,22 @@ pub fn bigwigaverageoverbed(
Err(e) => return Err(e.into()),
};

let stats = format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
);
let stats = match add_min_max {
true => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}\t{:.3}\t{:.3}",
entry.size,
entry.bases,
entry.sum,
entry.mean0,
entry.mean,
entry.min,
entry.max
),
false => format!(
"{}\t{}\t{:.3}\t{:.3}\t{:.3}",
entry.size, entry.bases, entry.sum, entry.mean0, entry.mean
),
};
writeln!(&mut bedoutwriter, "{}\t{}", entry.name, stats)?
}
}
Expand Down
17 changes: 14 additions & 3 deletions bigtools/src/utils/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub struct BigWigAverageOverBedEntry {
pub sum: f64,
pub mean0: f64,
pub mean: f64,
pub min: f64,
pub max: f64,
}

#[derive(Error, Debug)]
Expand All @@ -32,6 +34,9 @@ pub enum StatsError {
InvalidNameCol(String),
}

/// Returns a `BigWigAverageOverBedEntry` for a bigWig over a given interval.
/// If there are no values for the given region, then `f64::NAN` is given for
/// `mean`, `min`, and `max`, and `0` is given for `mean0`.
pub fn stats_for_bed_item<R: BBIFileRead>(
name: Name,
chrom: &str,
Expand All @@ -51,17 +56,21 @@ pub fn stats_for_bed_item<R: BBIFileRead>(

let mut bases = 0;
let mut sum = 0.0;
let mut min = f64::MAX;
let mut max = f64::MIN;
for val in interval {
let num_bases = val.end - val.start;
bases += num_bases;
sum += f64::from(num_bases) * f64::from(val.value);
min = min.min(f64::from(val.value));
max = max.max(f64::from(val.value));
}
let size = end - start;
let mean0 = sum / f64::from(size);
let mean = if bases == 0 {
0.0
let (mean, min, max) = if bases == 0 {
(f64::NAN, f64::NAN, f64::NAN)
} else {
sum / f64::from(bases)
(sum / f64::from(bases), min, max)
};

let name = match name {
Expand Down Expand Up @@ -89,6 +98,8 @@ pub fn stats_for_bed_item<R: BBIFileRead>(
sum,
mean0,
mean,
min,
max,
})
}

Expand Down
Loading

0 comments on commit 5161c9a

Please sign in to comment.