Skip to content

Commit

Permalink
Merge pull request #44 from jackh726/pybigtools_refactor_rebase
Browse files Browse the repository at this point in the history
Refactor pybigtools API
  • Loading branch information
jackh726 authored Jun 27, 2024
2 parents 9e29c8b + fc83f82 commit fad0d12
Show file tree
Hide file tree
Showing 14 changed files with 2,612 additions and 799 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ jobs:

- name: Install pybigtools
run: |
pip install maturin pytest
pip install maturin
cd pybigtools
pip install -e .
pip install -e .[test]
- name: Install
run: |
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 36 additions & 13 deletions bigtools/src/bbi/bbiread.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::BorrowMut;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::fs::File;
Expand Down Expand Up @@ -54,6 +55,16 @@ pub struct BBIHeader {
pub(crate) uncompress_buf_size: u32,
}

impl BBIHeader {
pub fn is_compressed(&self) -> bool {
self.uncompress_buf_size > 0
}

pub fn primary_data_size(&self) -> u64 {
self.full_index_offset - self.full_data_offset
}
}

/// Information on a chromosome in a bbi file
#[derive(Clone, Debug)]
pub struct ChromInfo {
Expand Down Expand Up @@ -546,12 +557,18 @@ impl<S: SeekableRead> BBIFileRead for S {
}
}

pub struct CachedBBIFileRead<S: SeekableRead> {
pub struct CachedBBIFileRead<S> {
read: S,
cir_tree_node_map: HashMap<u64, Either<Vec<CirTreeNodeLeaf>, Vec<CirTreeNodeNonLeaf>>>,
block_data: HashMap<Block, Vec<u8>>,
}

impl<S> CachedBBIFileRead<S> {
pub fn inner_read(&self) -> &S {
&self.read
}
}

impl<S: SeekableRead> CachedBBIFileRead<S> {
pub fn new(read: S) -> Self {
CachedBBIFileRead {
Expand Down Expand Up @@ -1308,7 +1325,7 @@ pub(crate) fn get_zoom_block_values<B: BBIRead>(
chrom: u32,
start: u32,
end: u32,
) -> Result<Box<dyn Iterator<Item = ZoomRecord> + Send>, BBIReadError> {
) -> Result<std::vec::IntoIter<ZoomRecord>, BBIReadError> {
let (read, info) = bbifile.reader_and_info();
let data = read.get_block_data(info, &block)?;
let mut bytes = BytesMut::with_capacity(data.len());
Expand Down Expand Up @@ -1379,30 +1396,34 @@ pub(crate) fn get_zoom_block_values<B: BBIRead>(
}

*known_offset = block.offset + block.size;
Ok(Box::new(records.into_iter()))
Ok(records.into_iter())
}

pub(crate) struct ZoomIntervalIter<'a, I, B>
pub(crate) struct ZoomIntervalIter<I, R, B>
where
I: Iterator<Item = Block> + Send,
B: BBIRead,
R: BBIRead,
B: BorrowMut<R>,
{
bbifile: &'a mut B,
_r: std::marker::PhantomData<R>,
bbifile: B,
known_offset: u64,
blocks: I,
vals: Option<Box<dyn Iterator<Item = ZoomRecord> + Send + 'a>>,
vals: Option<std::vec::IntoIter<ZoomRecord>>,
chrom: u32,
start: u32,
end: u32,
}

impl<'a, I, B> ZoomIntervalIter<'a, I, B>
impl<I, R, B> ZoomIntervalIter<I, R, B>
where
I: Iterator<Item = Block> + Send,
B: BBIRead,
R: BBIRead,
B: BorrowMut<R>,
{
pub fn new(bbifile: &'a mut B, blocks: I, chrom: u32, start: u32, end: u32) -> Self {
pub fn new(bbifile: B, blocks: I, chrom: u32, start: u32, end: u32) -> Self {
ZoomIntervalIter {
_r: std::marker::PhantomData,
bbifile,
known_offset: 0,
blocks,
Expand All @@ -1414,10 +1435,11 @@ where
}
}

impl<'a, I, B> Iterator for ZoomIntervalIter<'a, I, B>
impl<I, R, B> Iterator for ZoomIntervalIter<I, R, B>
where
I: Iterator<Item = Block> + Send,
B: BBIRead,
R: BBIRead,
B: BorrowMut<R>,
{
type Item = Result<ZoomRecord, BBIReadError>;

Expand All @@ -1434,8 +1456,9 @@ where
},
None => {
let current_block = self.blocks.next()?;
let file = self.bbifile.borrow_mut();
match get_zoom_block_values(
self.bbifile,
file,
current_block,
&mut self.known_offset,
self.chrom,
Expand Down
35 changes: 33 additions & 2 deletions bigtools/src/bbi/bigbedread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,11 @@ impl<R: BBIFileRead> BigBedRead<R> {
}

/// Reads the autosql from this bigBed
pub fn autosql(&mut self) -> Result<String, BBIReadError> {
pub fn autosql(&mut self) -> Result<Option<String>, BBIReadError> {
let auto_sql_offset = self.info.header.auto_sql_offset;
if auto_sql_offset == 0 {
return Ok(None);
}
let reader = self.reader().raw_reader();
let mut reader = BufReader::new(reader);
reader.seek(SeekFrom::Start(auto_sql_offset))?;
Expand All @@ -200,7 +203,7 @@ impl<R: BBIFileRead> BigBedRead<R> {
buffer.pop();
let autosql = String::from_utf8(buffer)
.map_err(|_| BBIReadError::InvalidFile("Invalid autosql: not UTF-8".to_owned()))?;
Ok(autosql)
Ok(Some(autosql))
}

pub fn item_count(&mut self) -> Result<u64, BBIReadError> {
Expand All @@ -214,6 +217,11 @@ impl<R: BBIFileRead> BigBedRead<R> {
})
}

/// Gets a reference to the inner `R` type, in order to access any info
pub fn inner_read(&self) -> &R {
&self.read
}

/// Returns the summary data from bigBed
///
/// Note: For version 1 of bigBeds, there is no total summary. In that
Expand Down Expand Up @@ -328,6 +336,29 @@ impl<R: BBIFileRead> BigBedRead<R> {

let chrom = self.info.chrom_id(chrom_name)?;

let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;
Ok(ZoomIntervalIter::<
std::vec::IntoIter<Block>,
BigBedRead<R>,
&mut BigBedRead<R>,
>::new(self, blocks.into_iter(), chrom, start, end))
}

/// For a given chromosome, start, and end, returns an `Iterator` of the
/// intersecting `ZoomRecord`s.
pub fn get_zoom_interval_move<'a>(
mut self,
chrom_name: &str,
start: u32,
end: u32,
reduction_level: u32,
) -> Result<impl Iterator<Item = Result<ZoomRecord, BBIReadError>>, ZoomIntervalError> {
let cir_tree = self
.zoom_cir_tree(reduction_level)
.map_err(|_| ZoomIntervalError::ReductionLevelNotFound)?;

let chrom = self.info.chrom_id(chrom_name)?;

let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;
Ok(ZoomIntervalIter::new(
self,
Expand Down
28 changes: 25 additions & 3 deletions bigtools/src/bbi/bigwigread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,28 @@ where

let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;

Ok(ZoomIntervalIter::<
std::vec::IntoIter<Block>,
BigWigRead<R>,
&mut BigWigRead<R>,
>::new(self, blocks.into_iter(), chrom, start, end))
}

/// For a given chromosome, start, and end, returns an `Iterator` of the
/// intersecting `ZoomRecord`s.
pub fn get_zoom_interval_move<'a>(
mut self,
chrom_name: &str,
start: u32,
end: u32,
reduction_level: u32,
) -> Result<impl Iterator<Item = Result<ZoomRecord, BBIReadError>>, ZoomIntervalError> {
let cir_tree = self.zoom_cir_tree(reduction_level)?;

let chrom = self.info.chrom_id(chrom_name)?;

let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;

Ok(ZoomIntervalIter::new(
self,
blocks.into_iter(),
Expand Down Expand Up @@ -505,7 +527,7 @@ fn get_block_values<R: BBIFileRead>(
end: chrom_end,
value,
};
if value.end >= start && value.start <= end {
if value.end > start && value.start < end {
value.start = value.start.max(start);
value.end = value.end.min(end);
values.push(value)
Expand Down Expand Up @@ -533,7 +555,7 @@ fn get_block_values<R: BBIFileRead>(
end: chrom_end,
value,
};
if value.end >= start && value.start <= end {
if value.end > start && value.start < end {
value.start = value.start.max(start);
value.end = value.end.min(end);
values.push(value)
Expand Down Expand Up @@ -562,7 +584,7 @@ fn get_block_values<R: BBIFileRead>(
end: chrom_end,
value,
};
if value.end >= start && value.start <= end {
if value.end > start && value.start < end {
value.start = value.start.max(start);
value.end = value.end.min(end);
values.push(value)
Expand Down
Loading

0 comments on commit fad0d12

Please sign in to comment.