From e25f47196d454cf0fc15b75b76609ee8d207f8b1 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Thu, 22 Feb 2024 21:18:21 -0500
Subject: [PATCH 01/30] Extract out a BBIFile trait for pybigtools and use it
 in start_end

---
 pybigtools/src/lib.rs | 46 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 12 deletions(-)
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 75386b8..ec66738 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -13,10 +13,10 @@ use bigtools::utils::misc::{
     bigwig_average_over_bed, BigWigAverageOverBedEntry, BigWigAverageOverBedError, Name,
 };
 use bigtools::{
-    BBIRead, BedEntry, BigBedRead as BigBedReadRaw, BigBedWrite as BigBedWriteRaw,
+    BBIReadError, BedEntry, BigBedRead as BigBedReadRaw, BigBedWrite as BigBedWriteRaw,
     BigWigRead as BigWigReadRaw, BigWigWrite as BigWigWriteRaw, CachedBBIFileRead, Value,
+    ZoomRecord,
 };
-use bigtools::{BBIReadError, ZoomRecord};
 
 use bigtools::utils::reopen::Reopen;
 use file_like::PyFileLikeObject;
@@ -34,13 +34,13 @@ mod file_like;
 type ValueTuple = (u32, u32, f32);
 type BedEntryTuple = (u32, u32, String);
 
-fn start_end<B: BBIRead>(
-    bigwig: &B,
+fn start_end<B: BBIFile>(
+    bbi: &B,
     chrom_name: &str,
     start: Option<u32>,
     end: Option<u32>,
 ) -> PyResult<(u32, u32)> {
-    let chrom = bigwig.chroms().into_iter().find(|x| x.name == chrom_name);
+    let chrom = bbi.chroms().into_iter().find(|x| x.name == chrom_name);
     let length = match chrom {
         None => {
             return Err(PyErr::new::<exceptions::PyException, _>(format!(
@@ -59,6 +59,32 @@ impl Reopen for PyFileLikeObject {
     }
 }
 
+trait BBIFile {
+    fn chroms(&self) -> &[bigtools::ChromInfo];
+}
+
+impl BBIFile for BigWigRead {
+    fn chroms(&self) -> &[bigtools::ChromInfo] {
+        match &self.bigwig {
+            BigWigRaw::File(b) => b.chroms(),
+            #[cfg(feature = "remote")]
+            BigWigRaw::Remote(b) => b.chroms(),
+            BigWigRaw::FileLike(b) => b.chroms(),
+        }
+    }
+}
+
+impl BBIFile for BigBedRead {
+    fn chroms(&self) -> &[bigtools::ChromInfo] {
+        match &self.bigbed {
+            BigBedRaw::File(b) => b.chroms(),
+            #[cfg(feature = "remote")]
+            BigBedRaw::Remote(b) => b.chroms(),
+            BigBedRaw::FileLike(b) => b.chroms(),
+        }
+    }
+}
+
 enum BigWigRaw {
     File(BigWigReadRaw<CachedBBIFileRead<ReopenableFile>>),
     #[cfg(feature = "remote")]
@@ -89,9 +115,9 @@ impl BigWigRead {
         start: Option<u32>,
         end: Option<u32>,
     ) -> PyResult<IntervalIterator> {
+        let (start, end) = start_end(self, &chrom, start, end)?;
         match &self.bigwig {
             BigWigRaw::File(b) => {
-                let (start, end) = start_end(b, &chrom, start, end)?;
                 let b = b.reopen()?;
                 Ok(IntervalIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
@@ -99,14 +125,12 @@ impl BigWigRead {
             }
             #[cfg(feature = "remote")]
             BigWigRaw::Remote(b) => {
-                let (start, end) = start_end(b, &chrom, start, end)?;
                 let b = b.reopen()?;
                 Ok(IntervalIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
                 })
             }
             BigWigRaw::FileLike(b) => {
-                let (start, end) = start_end(b, &chrom, start, end)?;
                 let b = b.reopen()?;
                 Ok(IntervalIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
@@ -272,9 +296,9 @@ impl BigWigRead {
                 )));
             }
         };
+        let (start, end) = start_end(self, &chrom, start, end)?;
         macro_rules! to_array {
             ($b:ident) => {{
-                let (start, end) = start_end($b, &chrom, start, end)?;
                 match bins {
                     Some(bins) if !exact.unwrap_or(false) => {
                         let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
@@ -630,9 +654,9 @@ impl BigBedRead {
         start: Option<u32>,
         end: Option<u32>,
     ) -> PyResult<EntriesIterator> {
+        let (start, end) = start_end(self, &chrom, start, end)?;
         match &mut self.bigbed {
             BigBedRaw::File(b) => {
-                let (start, end) = start_end(b, &chrom, start, end)?;
                 let b = b.reopen()?;
                 Ok(EntriesIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
@@ -640,14 +664,12 @@ impl BigBedRead {
             }
             #[cfg(feature = "remote")]
             BigBedRaw::Remote(b) => {
-                let (start, end) = start_end(b, &chrom, start, end)?;
                 let b = b.reopen()?;
                 Ok(EntriesIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
                 })
             }
             BigBedRaw::FileLike(b) => {
-                let (start, end) = start_end(b, &chrom, start, end)?;
                 let b = b.reopen()?;
                 Ok(EntriesIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),

From d881b5e34abe27d36291668b8d6b0843fafa70d5 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Sun, 3 Mar 2024 23:31:14 -0500
Subject: [PATCH 02/30] Merge BigWigRead and BigBedRead into BBIRead, add sql
 fn

---
 bigtools/src/bed/autosql.rs | 158 ++++---
 pybigtools/src/lib.rs       | 793 ++++++++++++++++++++----------------
 2 files changed, 553 insertions(+), 398 deletions(-)

diff --git a/bigtools/src/bed/autosql.rs b/bigtools/src/bed/autosql.rs
index 922c7f8..b5f33f8 100644
--- a/bigtools/src/bed/autosql.rs
+++ b/bigtools/src/bed/autosql.rs
@@ -356,6 +356,62 @@ pub mod parse {
         pub auto: bool,
     }
 
+    impl DeclareName {
+        fn parse(parser: &mut parser::Parser<'_>) -> Result<Self, ParseError> {
+            let declare_name = parser.eat_word();
+            if !declare_name.chars().next().unwrap_or(' ').is_alphabetic()
+                || declare_name.chars().any(|c| !c.is_alphanumeric())
+            {
+                return Err(ParseError::InvalidDeclareName(declare_name.to_string()));
+            }
+            let declare_name = declare_name.to_string();
+
+            let next_word = parser.peek_word();
+            let index_type = match next_word {
+                "primary" => {
+                    parser.eat_word();
+                    Some(IndexType::Primary)
+                }
+                "index" => {
+                    parser.eat_word();
+
+                    let next = parser.peek_one();
+                    let size = if next == "[" {
+                        parser.eat_one();
+                        let size = parser.eat_word().to_string();
+                        let close = parser.eat_one();
+                        if close != "]" {
+                            return Err(ParseError::InvalidIndexSizeBrackets(close.to_string()));
+                        }
+                        Some(size)
+                    } else {
+                        None
+                    };
+                    Some(IndexType::Index(size))
+                }
+                "unique" => {
+                    parser.eat_word();
+                    Some(IndexType::Unique)
+                }
+                "auto" => None,
+                _ => None,
+            };
+
+            let next_word = parser.peek_word();
+            let auto = if next_word == "auto" {
+                parser.eat_word();
+                true
+            } else {
+                false
+            };
+            Ok(DeclareName {
+                name: declare_name,
+                index_type,
+                auto,
+            })
+        }
+    }
+
     #[derive(Clone, Debug)]
     pub struct Declaration {
         pub declaration_type: DeclarationType,
@@ -445,7 +501,7 @@ pub mod parse {
                 }
                 "simple" => {
                     parser.take();
-                    let declare_name = parse_declare_name(parser)?;
+                    let declare_name = DeclareName::parse(parser)?;
                     return Ok(Some(FieldType::Declaration(
                         DeclarationType::Simple,
                         declare_name,
@@ -453,7 +509,7 @@ pub mod parse {
                 }
                 "object" => {
                     parser.take();
-                    let declare_name = parse_declare_name(parser)?;
+                    let declare_name = DeclareName::parse(parser)?;
                     return Ok(Some(FieldType::Declaration(
                         DeclarationType::Object,
                         declare_name,
@@ -461,7 +517,7 @@ pub mod parse {
                 }
                 "table" => {
                     parser.take();
-                    let declare_name = parse_declare_name(parser)?;
+                    let declare_name = DeclareName::parse(parser)?;
                     return Ok(Some(FieldType::Declaration(
                         DeclarationType::Object,
                         declare_name,
@@ -472,6 +528,46 @@ pub mod parse {
             parser.take();
             return Ok(Some(field_type));
         }
+
+        /// This is currently incomplete when printing `simple`, `table`, and `object`.
+        pub fn to_string(&self) -> String {
+            match self {
+                FieldType::Int => "int".to_string(),
+                FieldType::Uint => "uint".to_string(),
+                FieldType::Short => "short".to_string(),
+                FieldType::Ushort => "ushort".to_string(),
+                FieldType::Byte => "byte".to_string(),
+                FieldType::Ubyte => "ubyte".to_string(),
+                FieldType::Float => "float".to_string(),
+                FieldType::Double => "double".to_string(),
+                FieldType::Char => "char".to_string(),
+                FieldType::String => "string".to_string(),
+                FieldType::Lstring => "lstring".to_string(),
+                FieldType::Bigint => "bigint".to_string(),
+                FieldType::Enum(values) => {
+                    let mut e = "enum(".to_string();
+                    for v in values {
+                        e.push_str(v)
+                    }
+                    e.push(')');
+                    e
+                }
+                FieldType::Set(values) => {
+                    let mut e = "set(".to_string();
+                    for v in values {
+                        e.push_str(v)
+                    }
+                    e.push(')');
+                    e
+                }
+                FieldType::Declaration(decl_type, _decl_name) => match decl_type {
+                    DeclarationType::Simple => "simple ...",
+                    DeclarationType::Object => "object ...",
+                    DeclarationType::Table => "table ...",
+                }
+                .to_string(),
+            }
+        }
     }
 
     #[derive(Clone, Debug)]
@@ -523,7 +619,7 @@ pub mod parse {
             _ => return Err(ParseError::InvalidDeclareType(declare_type.to_string())),
         };
 
-        let declare_name = parse_declare_name(parser)?;
+        let declare_name = DeclareName::parse(parser)?;
 
         let comment = parser.eat_quoted_string().to_string();
 
@@ -553,60 +649,6 @@ pub mod parse {
         }))
     }
 
-    fn parse_declare_name(parser: &mut parser::Parser<'_>) -> Result<DeclareName, ParseError> {
-        let declare_name = parser.eat_word();
-        if !declare_name.chars().next().unwrap_or(' ').is_alphabetic()
-            || declare_name.chars().any(|c| !c.is_alphanumeric())
-        {
-            return Err(ParseError::InvalidDeclareName(declare_name.to_string()));
-        }
-        let declare_name = declare_name.to_string();
-
-        let next_word = parser.peek_word();
-        let index_type = match next_word {
-            "primary" => {
-                parser.eat_word();
-                Some(IndexType::Primary)
-            }
-            "index" => {
-                parser.eat_word();
-
-                let next = parser.peek_one();
-                let size = if next == "[" {
-                    parser.eat_one();
-                    let size = parser.eat_word().to_string();
-                    let close = parser.eat_one();
-                    if close != "]" {
-                        return Err(ParseError::InvalidIndexSizeBrackets(close.to_string()));
-                    }
-                    Some(size)
-                } else {
-                    None
-                };
-                Some(IndexType::Index(size))
-            }
-            "unique" => {
-                parser.eat_word();
-                Some(IndexType::Unique)
-            }
-            "auto" => None,
-            _ => None,
-        };
-
-        let next_word = parser.peek_word();
-        let auto = if next_word == "auto" {
-            parser.eat_word();
-            true
-        } else {
-            false
-        };
-        Ok(DeclareName {
-            name: declare_name,
-            index_type,
-            auto,
-        })
-    }
-
     fn parse_field_list(parser: &mut parser::Parser<'_>) -> Result<Vec<Field>, ParseError> {
         let mut fields = vec![];
         loop {
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index ec66738..08f0d8b 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -4,6 +4,7 @@ use std::fs::File;
 use std::io::{self, BufReader};
 use std::path::Path;
 
+use bigtools::bed::autosql::parse::parse_autosql;
 use bigtools::bed::bedparser::BedParser;
 use bigtools::bedchromdata::BedParserStreamingIterator;
 #[cfg(feature = "remote")]
@@ -13,9 +14,9 @@ use bigtools::utils::misc::{
     bigwig_average_over_bed, BigWigAverageOverBedEntry, BigWigAverageOverBedError, Name,
 };
 use bigtools::{
-    BBIReadError, BedEntry, BigBedRead as BigBedReadRaw, BigBedWrite as BigBedWriteRaw,
-    BigWigRead as BigWigReadRaw, BigWigWrite as BigWigWriteRaw, CachedBBIFileRead, Value,
-    ZoomRecord,
+    BBIFileRead, BBIReadError, BedEntry, BigBedRead as BigBedReadRaw,
+    BigBedWrite as BigBedWriteRaw, BigWigRead as BigWigReadRaw, BigWigWrite as BigWigWriteRaw,
+    CachedBBIFileRead, Value, ZoomRecord,
 };
 
 use bigtools::utils::reopen::Reopen;
@@ -34,13 +35,23 @@ mod file_like;
 type ValueTuple = (u32, u32, f32);
 type BedEntryTuple = (u32, u32, String);
 
-fn start_end<B: BBIFile>(
-    bbi: &B,
+fn start_end(
+    bbi: &BBIReadRaw,
     chrom_name: &str,
     start: Option<u32>,
     end: Option<u32>,
 ) -> PyResult<(u32, u32)> {
-    let chrom = bbi.chroms().into_iter().find(|x| x.name == chrom_name);
+    let chroms = match &bbi {
+        BBIReadRaw::BigWigFile(b) => b.chroms(),
+        #[cfg(feature = "remote")]
+        BBIReadRaw::BigWigRemote(b) => b.chroms(),
+        BBIReadRaw::BigWigFileLike(b) => b.chroms(),
+        BBIReadRaw::BigBedFile(b) => b.chroms(),
+        #[cfg(feature = "remote")]
+        BBIReadRaw::BigBedRemote(b) => b.chroms(),
+        BBIReadRaw::BigBedFileLike(b) => b.chroms(),
+    };
+    let chrom = chroms.into_iter().find(|x| x.name == chrom_name);
     let length = match chrom {
         None => {
             return Err(PyErr::new::<exceptions::PyException, _>(format!(
@@ -59,88 +70,175 @@ impl Reopen for PyFileLikeObject {
     }
 }
 
-trait BBIFile {
-    fn chroms(&self) -> &[bigtools::ChromInfo];
-}
-
-impl BBIFile for BigWigRead {
-    fn chroms(&self) -> &[bigtools::ChromInfo] {
-        match &self.bigwig {
-            BigWigRaw::File(b) => b.chroms(),
-            #[cfg(feature = "remote")]
-            BigWigRaw::Remote(b) => b.chroms(),
-            BigWigRaw::FileLike(b) => b.chroms(),
-        }
-    }
-}
-
-impl BBIFile for BigBedRead {
-    fn chroms(&self) -> &[bigtools::ChromInfo] {
-        match &self.bigbed {
-            BigBedRaw::File(b) => b.chroms(),
-            #[cfg(feature = "remote")]
-            BigBedRaw::Remote(b) => b.chroms(),
-            BigBedRaw::FileLike(b) => b.chroms(),
-        }
-    }
-}
-
-enum BigWigRaw {
-    File(BigWigReadRaw<CachedBBIFileRead<ReopenableFile>>),
+enum BBIReadRaw {
+    BigWigFile(BigWigReadRaw<CachedBBIFileRead<ReopenableFile>>),
+    #[cfg(feature = "remote")]
+    BigWigRemote(BigWigReadRaw<CachedBBIFileRead<RemoteFile>>),
+    BigWigFileLike(BigWigReadRaw<CachedBBIFileRead<PyFileLikeObject>>),
+    BigBedFile(BigBedReadRaw<CachedBBIFileRead<ReopenableFile>>),
     #[cfg(feature = "remote")]
-    Remote(BigWigReadRaw<CachedBBIFileRead<RemoteFile>>),
-    FileLike(BigWigReadRaw<CachedBBIFileRead<PyFileLikeObject>>),
+    BigBedRemote(BigBedReadRaw<CachedBBIFileRead<RemoteFile>>),
+    BigBedFileLike(BigBedReadRaw<CachedBBIFileRead<PyFileLikeObject>>),
 }
 
-/// This class is the interface for reading a bigWig.
+/// This class is the interface for reading a bigWig or bigBed.
 #[pyclass(module = "pybigtools")]
-struct BigWigRead {
-    bigwig: BigWigRaw,
+struct BBIRead {
+    bbi: BBIReadRaw,
 }
 
 #[pymethods]
-impl BigWigRead {
-    /// Returns the intervals of a given range on a chromosome. The result is an iterator of (int, int, float) in the format (start, end, value).
-    /// The intervals may not be contiguous if the values in the bigwig are not.  
+impl BBIRead {
+    /// Returns the autosql of this bbi file.
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.  
-    ///  If end is not provided, it defaults to the length of the chromosome.  
-    ///  If start is not provided, it defaults to the beginning of the chromosome.  
+    /// For bigBeds, this comes directly from the autosql stored in the file.
+    /// For bigWigs, the autosql returned matches that of a bedGraph file.
+    ///
+    /// By default, the autosql is returned as a string. Passing `parse = true`
+    /// returns instead a dictionary of the format:
+    /// ```
+    /// {
+    ///   "name": <declared name>,
+    ///   "comment": <declaration coment>,
+    ///   "fields": [(<field name>, <field type>, <field comment>), ...],
+    /// }
+    /// ```
+    fn sql(&mut self, py: Python, parse: Option<bool>) -> PyResult<PyObject> {
+        let parse = parse.unwrap_or(false);
+        pub const BEDGRAPH: &str = r#"
+            table bedGraph
+            "bedGraph file"
+            (
+                string chrom;        "Reference sequence chromosome or scaffold"
+                uint   chromStart;   "Start position in chromosome"
+                uint   chromEnd;     "End position in chromosome"
+                float  value;        "Value for a given interval"
+            )
+            "#;
+        let schema = match &mut self.bbi {
+            BBIReadRaw::BigWigFile(_)
+            | BBIReadRaw::BigWigRemote(_)
+            | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),
+            BBIReadRaw::BigBedFile(b) => b
+                .autosql()
+                .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
+            BBIReadRaw::BigBedRemote(b) => b
+                .autosql()
+                .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
+            BBIReadRaw::BigBedFileLike(b) => b
+                .autosql()
+                .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
+        };
+        let obj = if parse {
+            let mut declarations = parse_autosql(&schema).map_err(|_| {
+                PyErr::new::<exceptions::PyException, _>("Unable to parse autosql.")
+            })?;
+            if declarations.len() > 1 {
+                return Err(PyErr::new::<exceptions::PyException, _>(
+                    "Unexpected extra declarations.",
+                ));
+            }
+            let declaration = declarations.pop();
+            match declaration {
+                None => PyDict::new(py).to_object(py),
+                Some(d) => {
+                    let fields = d
+                        .fields
+                        .iter()
+                        .map(|f| (&f.name, f.field_type.to_string(), &f.comment))
+                        .collect::<Vec<_>>()
+                        .to_object(py);
+                    [
+                        ("name", d.name.name.to_object(py)),
+                        ("comment", d.comment.to_object(py)),
+                        ("fields", fields),
+                    ]
+                    .into_py_dict(py)
+                    .to_object(py)
+                }
+            }
+        } else {
+            schema.to_object(py)
+        };
+        Ok(obj)
+    }
+
+    /// Returns the records of a given range on a chromosome.
+    ///
+    /// The result is an iterator of tuples. For bigWigs, these tuples are in
+    /// the format (start: int, end: int, value: float). For bigBeds, these
+    /// tuples are in the format (start: int, end: int, ...), where the "rest"
+    /// fields are split by whitespace.
     ///
-    /// This returns an `IntervalIterator`.
-    fn intervals(
+    /// Missing values in bigWigs will results in non-contiguous records.
+    ///
+    /// The chrom argument is the name of the chromosome.  
+    /// The start and end arguments denote the range to get values for.
+    ///  If end is not provided, it defaults to the length of the chromosome.
+    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    fn records(
         &mut self,
+        py: Python<'_>,
         chrom: String,
         start: Option<u32>,
         end: Option<u32>,
-    ) -> PyResult<IntervalIterator> {
-        let (start, end) = start_end(self, &chrom, start, end)?;
-        match &self.bigwig {
-            BigWigRaw::File(b) => {
+    ) -> PyResult<PyObject> {
+        let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
+        match &self.bbi {
+            BBIReadRaw::BigWigFile(b) => {
                 let b = b.reopen()?;
-                Ok(IntervalIterator {
+                Ok(BigWigIntervalIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
-                })
+                }
+                .into_py(py))
             }
             #[cfg(feature = "remote")]
-            BigWigRaw::Remote(b) => {
+            BBIReadRaw::BigWigRemote(b) => {
                 let b = b.reopen()?;
-                Ok(IntervalIterator {
+                Ok(BigWigIntervalIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
-                })
+                }
+                .into_py(py))
             }
-            BigWigRaw::FileLike(b) => {
+            BBIReadRaw::BigWigFileLike(b) => {
                 let b = b.reopen()?;
-                Ok(IntervalIterator {
+                Ok(BigWigIntervalIterator {
                     iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
-                })
+                }
+                .into_py(py))
+            }
+            BBIReadRaw::BigBedFile(b) => {
+                let b = b.reopen()?;
+                Ok(BigBedEntriesIterator {
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                }
+                .into_py(py))
+            }
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigBedRemote(b) => {
+                let b = b.reopen()?;
+                Ok(BigBedEntriesIterator {
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                }
+                .into_py(py))
+            }
+            BBIReadRaw::BigBedFileLike(b) => {
+                let b = b.reopen()?;
+                Ok(BigBedEntriesIterator {
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                }
+                .into_py(py))
             }
         }
     }
 
-    /// Returns the values of a given range on a chromosome. The result is an iterator of floats, of length (end - start).  
-    /// If a value does not exist in the bigwig for a specific base, it will be nan.  
+    /// Returns the values of a given range on a chromosome.
+    ///
+    /// For bigWigs, the result is an array of length (end - start).
+    /// If a value does not exist in the bigwig for a specific base, it will be nan.
+    ///
+    /// For bigBeds, the returned array instead represents a pileup of the count
+    /// of intervals overlapping each base.
     ///
     /// The chrom argument is the name of the chromosome.  
     /// The start and end arguments denote the range to get values for.  
@@ -284,6 +382,65 @@ impl BigWigRead {
             }
             Ok(Array::from(v))
         }
+        fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+            start: usize,
+            end: usize,
+            iter: I,
+        ) -> Result<Array1<f64>, BBIReadError> {
+            use numpy::ndarray::Array;
+            let mut v = vec![0.0; end - start];
+            for interval in iter {
+                let interval = interval?;
+                let interval_start = (interval.start as usize) - start;
+                let interval_end = (interval.end as usize) - start;
+                for i in v[interval_start..interval_end].iter_mut() {
+                    *i += 1.0;
+                }
+            }
+            Ok(Array::from(v))
+        }
+        fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+            start: usize,
+            end: usize,
+            iter: I,
+            bins: usize,
+        ) -> Result<Array1<f64>, BBIReadError> {
+            use numpy::ndarray::Array;
+            let mut v = vec![0.0; bins];
+            let bin_size = (end - start) as f64 / bins as f64;
+            for interval in iter {
+                let interval = interval?;
+                let interval_start = (interval.start as usize) - start;
+                let interval_end = (interval.end as usize) - start;
+                let bin_start = ((interval_start as f64) / bin_size) as usize;
+                let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                for bin in bin_start..bin_end {
+                    v[bin] += 1.0;
+                }
+            }
+            Ok(Array::from(v))
+        }
+        fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
+            start: usize,
+            end: usize,
+            iter: I,
+            bins: usize,
+        ) -> Result<Array1<f64>, BBIReadError> {
+            use numpy::ndarray::Array;
+            let mut v = vec![0.0; bins];
+            let bin_size = (end - start) as f64 / bins as f64;
+            for interval in iter {
+                let interval = interval?;
+                let interval_start = (interval.start as usize).max(start) - start;
+                let interval_end = (interval.end as usize).min(end) - start;
+                let bin_start = ((interval_start as f64) / bin_size) as usize;
+                let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                for bin in bin_start..bin_end {
+                    v[bin] += interval.summary.total_items as f64;
+                }
+            }
+            Ok(Array::from(v))
+        }
 
         let summary = match summary.as_deref() {
             None => Summary::Mean,
@@ -296,27 +453,35 @@ impl BigWigRead {
                 )));
             }
         };
-        let (start, end) = start_end(self, &chrom, start, end)?;
-        macro_rules! to_array {
-            ($b:ident) => {{
-                match bins {
-                    Some(bins) if !exact.unwrap_or(false) => {
-                        let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
-                        let zoom = ($b)
-                            .info()
-                            .zoom_headers
-                            .iter()
-                            .filter(|z| z.reduction_level <= max_zoom_size)
-                            .min_by_key(|z| max_zoom_size - z.reduction_level);
-                        match zoom {
-                            Some(zoom) => {
-                                let iter = ($b)
-                                    .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
-                                    .map_err(|e| {
-                                        PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                    })?;
-                                Python::with_gil(|py| {
-                                    Ok(to_array_zoom(
+        let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
+        fn intervals_to_array<R: BBIFileRead>(
+            b: &mut BigWigReadRaw<R>,
+            chrom: &str,
+            start: u32,
+            end: u32,
+            bins: Option<usize>,
+            summary: Summary,
+            exact: Option<bool>,
+        ) -> PyResult<PyObject> {
+            match bins {
+                Some(bins) if !exact.unwrap_or(false) => {
+                    let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
+                    let zoom = b
+                        .info()
+                        .zoom_headers
+                        .iter()
+                        .filter(|z| z.reduction_level <= max_zoom_size)
+                        .min_by_key(|z| max_zoom_size - z.reduction_level);
+                    match zoom {
+                        Some(zoom) => {
+                            let iter = b
+                                .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
+                                .map_err(|e| {
+                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                                })?;
+                            Python::with_gil(|py| {
+                                Ok(
+                                    to_array_zoom(
                                         start as usize,
                                         end as usize,
                                         iter,
@@ -327,72 +492,166 @@ impl BigWigRead {
                                         PyErr::new::<exceptions::PyException, _>(format!("{}", e))
                                     })?
                                     .into_pyarray(py)
-                                    .to_object(py))
-                                })
-                            }
-                            None => {
-                                let iter = ($b).get_interval(&chrom, start, end).map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?;
-                                Python::with_gil(|py| {
-                                    Ok(to_array_bins(
+                                    .to_object(py),
+                                )
+                            })
+                        }
+                        None => {
+                            let iter = b.get_interval(&chrom, start, end).map_err(|e| {
+                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                            })?;
+                            Python::with_gil(|py| {
+                                Ok(
+                                    to_array_bins(
                                         start as usize,
                                         end as usize,
                                         iter,
                                         summary,
                                         bins,
                                     )
-                                    .map_err(|e| {
-                                        PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                    })?
-                                    .into_pyarray(py)
-                                    .to_object(py))
-                                })
-                            }
-                        }
-                    }
-                    Some(bins) => {
-                        let iter = ($b).get_interval(&chrom, start, end).map_err(|e| {
-                            PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                        })?;
-                        Python::with_gil(|py| {
-                            Ok(
-                                to_array_bins(start as usize, end as usize, iter, summary, bins)
                                     .map_err(|e| {
                                         PyErr::new::<exceptions::PyException, _>(format!("{}", e))
                                     })?
                                     .into_pyarray(py)
                                     .to_object(py),
-                            )
-                        })
+                                )
+                            })
+                        }
                     }
-                    _ => {
-                        let iter = ($b).get_interval(&chrom, start, end).map_err(|e| {
-                            PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                        })?;
-                        Python::with_gil(|py| {
-                            Ok(to_array(start as usize, end as usize, iter)
+                }
+                Some(bins) => {
+                    let iter = b
+                        .get_interval(&chrom, start, end)
+                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
+                    Python::with_gil(|py| {
+                        Ok(
+                            to_array_bins(start as usize, end as usize, iter, summary, bins)
                                 .map_err(|e| {
                                     PyErr::new::<exceptions::PyException, _>(format!("{}", e))
                                 })?
                                 .into_pyarray(py)
-                                .to_object(py))
-                        })
+                                .to_object(py),
+                        )
+                    })
+                }
+                _ => {
+                    let iter = b
+                        .get_interval(&chrom, start, end)
+                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
+                    Python::with_gil(|py| {
+                        Ok(to_array(start as usize, end as usize, iter)
+                            .map_err(|e| {
+                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                            })?
+                            .into_pyarray(py)
+                            .to_object(py))
+                    })
+                }
+            }
+        }
+        fn entries_to_array<R: BBIFileRead>(
+            b: &mut BigBedReadRaw<R>,
+            chrom: &str,
+            start: u32,
+            end: u32,
+            bins: Option<usize>,
+            exact: Option<bool>,
+        ) -> PyResult<PyObject> {
+            match bins {
+                Some(bins) if !exact.unwrap_or(false) => {
+                    let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
+                    let zoom = b
+                        .info()
+                        .zoom_headers
+                        .iter()
+                        .filter(|z| z.reduction_level <= max_zoom_size)
+                        .min_by_key(|z| max_zoom_size - z.reduction_level);
+                    match zoom {
+                        Some(zoom) => {
+                            let iter = b
+                                .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
+                                .map_err(|e| {
+                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                                })?;
+                            Python::with_gil(|py| {
+                                Ok(
+                                    to_entry_array_zoom(start as usize, end as usize, iter, bins)
+                                        .map_err(|e| {
+                                            PyErr::new::<exceptions::PyException, _>(format!(
+                                                "{}",
+                                                e
+                                            ))
+                                        })?
+                                        .into_pyarray(py)
+                                        .to_object(py),
+                                )
+                            })
+                        }
+                        None => {
+                            let iter = b.get_interval(&chrom, start, end).map_err(|e| {
+                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                            })?;
+                            Python::with_gil(|py| {
+                                Ok(
+                                    to_array_entry_bins(start as usize, end as usize, iter, bins)
+                                        .map_err(|e| {
+                                            PyErr::new::<exceptions::PyException, _>(format!(
+                                                "{}",
+                                                e
+                                            ))
+                                        })?
+                                        .into_pyarray(py)
+                                        .to_object(py),
+                                )
+                            })
+                        }
                     }
                 }
-            }};
+                Some(bins) => {
+                    let iter = b
+                        .get_interval(&chrom, start, end)
+                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
+                    Python::with_gil(|py| {
+                        Ok(
+                            to_array_entry_bins(start as usize, end as usize, iter, bins)
+                                .map_err(|e| {
+                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                                })?
+                                .into_pyarray(py)
+                                .to_object(py),
+                        )
+                    })
+                }
+                _ => {
+                    let iter = b
+                        .get_interval(&chrom, start, end)
+                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
+                    Python::with_gil(|py| {
+                        Ok(to_entry_array(start as usize, end as usize, iter)
+                            .map_err(|e| {
+                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                            })?
+                            .into_pyarray(py)
+                            .to_object(py))
+                    })
+                }
+            }
         }
-        match &mut self.bigwig {
-            BigWigRaw::File(b) => {
-                to_array!(b)
+        match &mut self.bbi {
+            BBIReadRaw::BigWigFile(b) => {
+                intervals_to_array(b, &chrom, start, end, bins, summary, exact)
             }
             #[cfg(feature = "remote")]
-            BigWigRaw::Remote(b) => {
-                to_array!(b)
+            BBIReadRaw::BigWigRemote(b) => {
+                intervals_to_array(b, &chrom, start, end, bins, summary, exact)
             }
-            BigWigRaw::FileLike(b) => {
-                to_array!(b)
+            BBIReadRaw::BigWigFileLike(b) => {
+                intervals_to_array(b, &chrom, start, end, bins, summary, exact)
             }
+            BBIReadRaw::BigBedFile(b) => entries_to_array(b, &chrom, start, end, bins, exact),
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigBedRemote(b) => entries_to_array(b, &chrom, start, end, bins, exact),
+            BBIReadRaw::BigBedFileLike(b) => entries_to_array(b, &chrom, start, end, bins, exact),
         }
     }
 
@@ -402,63 +661,37 @@ impl BigWigRead {
     ///  If it is None, then all chroms will be returned.  
     ///  If it is a String, then the length of that chromosome will be returned.  
     ///  If the chromosome doesn't exist, nothing will be returned.  
-    fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<Option<PyObject>> {
-        match &self.bigwig {
-            BigWigRaw::File(b) => {
-                let val = match chrom {
-                    Some(chrom) => b
-                        .chroms()
+    fn chroms(&mut self, py: Python, chrom: Option<String>) -> Option<PyObject> {
+        fn get_chrom_obj<B: bigtools::BBIRead>(
+            b: &B,
+            py: Python,
+            chrom: Option<String>,
+        ) -> Option<PyObject> {
+            match chrom {
+                Some(chrom) => b
+                    .chroms()
+                    .into_iter()
+                    .find(|c| c.name == chrom)
+                    .map(|c| c.length)
+                    .map(|c| c.to_object(py)),
+                None => Some(
+                    b.chroms()
                         .into_iter()
-                        .find(|c| c.name == chrom)
-                        .map(|c| c.length)
-                        .map(|c| c.to_object(py)),
-                    None => Some(
-                        b.chroms()
-                            .into_iter()
-                            .map(|c| (c.name.clone(), c.length))
-                            .into_py_dict(py)
-                            .into(),
-                    ),
-                };
-                Ok(val)
+                        .map(|c| (c.name.clone(), c.length))
+                        .into_py_dict(py)
+                        .into(),
+                ),
             }
+        }
+        match &self.bbi {
+            BBIReadRaw::BigWigFile(b) => get_chrom_obj(b, py, chrom),
             #[cfg(feature = "remote")]
-            BigWigRaw::Remote(b) => {
-                let val = match chrom {
-                    Some(chrom) => b
-                        .chroms()
-                        .into_iter()
-                        .find(|c| c.name == chrom)
-                        .map(|c| c.length)
-                        .map(|c| c.to_object(py)),
-                    None => Some(
-                        b.chroms()
-                            .into_iter()
-                            .map(|c| (c.name.clone(), c.length))
-                            .into_py_dict(py)
-                            .into(),
-                    ),
-                };
-                Ok(val)
-            }
-            BigWigRaw::FileLike(b) => {
-                let val = match chrom {
-                    Some(chrom) => b
-                        .chroms()
-                        .into_iter()
-                        .find(|c| c.name == chrom)
-                        .map(|c| c.length)
-                        .map(|c| c.to_object(py)),
-                    None => Some(
-                        b.chroms()
-                            .into_iter()
-                            .map(|c| (c.name.clone(), c.length))
-                            .into_py_dict(py)
-                            .into(),
-                    ),
-                };
-                Ok(val)
-            }
+            BBIReadRaw::BigWigRemote(b) => get_chrom_obj(b, py, chrom),
+            BBIReadRaw::BigWigFileLike(b) => get_chrom_obj(b, py, chrom),
+            BBIReadRaw::BigBedFile(b) => get_chrom_obj(b, py, chrom),
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigBedRemote(b) => get_chrom_obj(b, py, chrom),
+            BBIReadRaw::BigBedFileLike(b) => get_chrom_obj(b, py, chrom),
         }
     }
 }
@@ -467,13 +700,13 @@ impl BigWigRead {
 /// It returns only values that exist in the bigWig, skipping
 /// any missing intervals.
 #[pyclass(module = "pybigtools")]
-struct IntervalIterator {
+struct BigWigIntervalIterator {
     iter: Box<dyn Iterator<Item = Result<Value, BBIReadError>> + Send>,
 }
 
 #[pymethods]
-impl IntervalIterator {
-    fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<IntervalIterator>> {
+impl BigWigIntervalIterator {
+    fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<BigWigIntervalIterator>> {
         Ok(slf.into())
     }
 
@@ -486,6 +719,27 @@ impl IntervalIterator {
     }
 }
 
+/// This class is an interator for the entries in a bigBed file.
+#[pyclass(module = "pybigtools")]
+struct BigBedEntriesIterator {
+    iter: Box<dyn Iterator<Item = Result<BedEntry, BBIReadError>> + Send>,
+}
+
+#[pymethods]
+impl BigBedEntriesIterator {
+    fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<BigBedEntriesIterator>> {
+        Ok(slf.into())
+    }
+
+    fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<BedEntryTuple>> {
+        Ok(slf
+            .iter
+            .next()
+            .map(|e| e.unwrap())
+            .map(|e| (e.start, e.end, e.rest)))
+    }
+}
+
 /// This class is the interface for writing a bigWig.
 #[pyclass(module = "pybigtools")]
 struct BigWigWrite {
@@ -625,144 +879,6 @@ impl BigWigWrite {
     }
 }
 
-enum BigBedRaw {
-    File(BigBedReadRaw<CachedBBIFileRead<ReopenableFile>>),
-    #[cfg(feature = "remote")]
-    Remote(BigBedReadRaw<CachedBBIFileRead<RemoteFile>>),
-    FileLike(BigBedReadRaw<CachedBBIFileRead<PyFileLikeObject>>),
-}
-
-/// This class is the interface for reading a bigBed.
-#[pyclass(module = "pybigtools")]
-struct BigBedRead {
-    bigbed: BigBedRaw,
-}
-
-#[pymethods]
-impl BigBedRead {
-    /// Returns the entries of a given range on a chromosome. The result is an iterator of (int, int, String) in the format (start, end, rest).  
-    /// The entries may not be contiguous if the values in the bigbed are not.  
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.  
-    ///  If end is not provided, it defaults to the length of the chromosome.  
-    ///  If start is not provided, it defaults to the beginning of the chromosome.  
-    ///
-    /// This returns an `EntriesIterator`.  
-    fn entries(
-        &mut self,
-        chrom: String,
-        start: Option<u32>,
-        end: Option<u32>,
-    ) -> PyResult<EntriesIterator> {
-        let (start, end) = start_end(self, &chrom, start, end)?;
-        match &mut self.bigbed {
-            BigBedRaw::File(b) => {
-                let b = b.reopen()?;
-                Ok(EntriesIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
-                })
-            }
-            #[cfg(feature = "remote")]
-            BigBedRaw::Remote(b) => {
-                let b = b.reopen()?;
-                Ok(EntriesIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
-                })
-            }
-            BigBedRaw::FileLike(b) => {
-                let b = b.reopen()?;
-                Ok(EntriesIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
-                })
-            }
-        }
-    }
-
-    /// Returns the chromosomes in a bigwig, and their lengths.  
-    /// The chroms argument can be either String or None. If it is None, then all chroms will be returned. If it is a String, then the length of that chromosome will be returned.  
-    ///  If the chromosome doesn't exist, nothing will be returned.  
-    fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<Option<PyObject>> {
-        match &self.bigbed {
-            BigBedRaw::File(b) => {
-                let val = match chrom {
-                    Some(chrom) => b
-                        .chroms()
-                        .into_iter()
-                        .find(|c| c.name == chrom)
-                        .map(|c| c.length)
-                        .map(|c| c.to_object(py)),
-                    None => Some(
-                        b.chroms()
-                            .into_iter()
-                            .map(|c| (c.name.clone(), c.length))
-                            .into_py_dict(py)
-                            .into(),
-                    ),
-                };
-                Ok(val)
-            }
-            #[cfg(feature = "remote")]
-            BigBedRaw::Remote(b) => {
-                let val = match chrom {
-                    Some(chrom) => b
-                        .chroms()
-                        .into_iter()
-                        .find(|c| c.name == chrom)
-                        .map(|c| c.length)
-                        .map(|c| c.to_object(py)),
-                    None => Some(
-                        b.chroms()
-                            .into_iter()
-                            .map(|c| (c.name.clone(), c.length))
-                            .into_py_dict(py)
-                            .into(),
-                    ),
-                };
-                Ok(val)
-            }
-            BigBedRaw::FileLike(b) => {
-                let val = match chrom {
-                    Some(chrom) => b
-                        .chroms()
-                        .into_iter()
-                        .find(|c| c.name == chrom)
-                        .map(|c| c.length)
-                        .map(|c| c.to_object(py)),
-                    None => Some(
-                        b.chroms()
-                            .into_iter()
-                            .map(|c| (c.name.clone(), c.length))
-                            .into_py_dict(py)
-                            .into(),
-                    ),
-                };
-                Ok(val)
-            }
-        }
-    }
-}
-
-/// This class is an interator for the entries in a bigBed file.
-#[pyclass(module = "pybigtools")]
-struct EntriesIterator {
-    iter: Box<dyn Iterator<Item = Result<BedEntry, BBIReadError>> + Send>,
-}
-
-#[pymethods]
-impl EntriesIterator {
-    fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<EntriesIterator>> {
-        Ok(slf.into())
-    }
-
-    fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<BedEntryTuple>> {
-        Ok(slf
-            .iter
-            .next()
-            .map(|e| e.unwrap())
-            .map(|e| (e.start, e.end, e.rest)))
-    }
-}
-
 /// This class is the interface for writing to a bigBed.
 #[pyclass(module = "pybigtools")]
 struct BigBedWrite {
@@ -966,9 +1082,8 @@ impl BigWigAverageOverBedEntriesIterator {
 ///
 /// This returns one of the following:  
 /// - `BigWigWrite`
-/// - `BigWigRead`
 /// - `BigBedWrite`
-/// - `BigBedRead`
+/// - `BBIRead`
 #[pyfunction]
 fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> PyResult<PyObject> {
     let iswrite = match &mode {
@@ -1000,13 +1115,13 @@ fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> Py
         ))),
     };
     let read = match BigWigReadRaw::open(file_like.clone()) {
-        Ok(bwr) => BigWigRead {
-            bigwig: BigWigRaw::FileLike(bwr.cached()),
+        Ok(bwr) => BBIRead {
+            bbi: BBIReadRaw::BigWigFileLike(bwr.cached()),
         }
         .into_py(py),
         Err(_) => match BigBedReadRaw::open(file_like) {
-            Ok(bbr) => BigBedRead {
-                bigbed: BigBedRaw::FileLike(bbr.cached()),
+            Ok(bbr) => BBIRead {
+                bbi: BBIReadRaw::BigBedFileLike(bbr.cached()),
             }
             .into_py(py),
             Err(e) => {
@@ -1071,8 +1186,8 @@ fn open_path_or_url(
             "bw" | "bigWig" | "bigwig" => {
                 if isfile {
                     match BigWigReadRaw::open_file(&path_url_or_file_like) {
-                        Ok(bwr) => BigWigRead {
-                            bigwig: BigWigRaw::File(bwr.cached()),
+                        Ok(bwr) => BBIRead {
+                            bbi: BBIReadRaw::BigWigFile(bwr.cached()),
                         }
                         .into_py(py),
                         Err(_) => {
@@ -1084,8 +1199,8 @@ fn open_path_or_url(
                 } else {
                     #[cfg(feature = "remote")]
                     match BigWigReadRaw::open(RemoteFile::new(&path_url_or_file_like)) {
-                        Ok(bwr) => BigWigRead {
-                            bigwig: BigWigRaw::Remote(bwr.cached()),
+                        Ok(bwr) => BBIRead {
+                            bbi: BBIReadRaw::BigWigRemote(bwr.cached()),
                         }
                         .into_py(py),
                         Err(_) => {
@@ -1106,8 +1221,8 @@ fn open_path_or_url(
             "bb" | "bigBed" | "bigbed" => {
                 if isfile {
                     match BigBedReadRaw::open_file(&path_url_or_file_like) {
-                        Ok(bwr) => BigBedRead {
-                            bigbed: BigBedRaw::File(bwr.cached()),
+                        Ok(bwr) => BBIRead {
+                            bbi: BBIReadRaw::BigBedFile(bwr.cached()),
                         }
                         .into_py(py),
                         Err(_) => {
@@ -1119,8 +1234,8 @@ fn open_path_or_url(
                 } else {
                     #[cfg(feature = "remote")]
                     match BigBedReadRaw::open(RemoteFile::new(&path_url_or_file_like)) {
-                        Ok(bwr) => BigBedRead {
-                            bigbed: BigBedRaw::Remote(bwr.cached()),
+                        Ok(bwr) => BBIRead {
+                            bbi: BBIReadRaw::BigBedRemote(bwr.cached()),
                         }
                         .into_py(py),
                         Err(_) => {
@@ -1275,12 +1390,10 @@ fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
 
     m.add_class::<BigWigWrite>()?;
     m.add_class::<BigBedWrite>()?;
-    m.add_class::<BigWigRead>()?;
-    m.add_class::<BigBedRead>()?;
-
-    m.add_class::<IntervalIterator>()?;
+    m.add_class::<BBIRead>()?;
 
-    m.add_class::<EntriesIterator>()?;
+    m.add_class::<BigWigIntervalIterator>()?;
+    m.add_class::<BigBedEntriesIterator>()?;
 
     Ok(())
 }

From 07fe3b6587e39bcaf0e75963b9ea9277a1db5140 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Fri, 15 Mar 2024 20:57:35 -0400
Subject: [PATCH 03/30] Overhaul pybigtools API

---
 pybigtools/src/lib.rs | 463 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 391 insertions(+), 72 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 08f0d8b..a5731d1 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -1,5 +1,6 @@
 #![allow(non_snake_case)]
 
+use std::collections::VecDeque;
 use std::fs::File;
 use std::io::{self, BufReader};
 use std::path::Path;
@@ -21,6 +22,7 @@ use bigtools::{
 
 use bigtools::utils::reopen::Reopen;
 use file_like::PyFileLikeObject;
+use numpy::ndarray::Array1;
 use numpy::IntoPyArray;
 use pyo3::exceptions::{self, PyTypeError};
 use pyo3::prelude::*;
@@ -81,6 +83,229 @@ enum BBIReadRaw {
     BigBedFileLike(BigBedReadRaw<CachedBBIFileRead<PyFileLikeObject>>),
 }
 
+#[derive(Copy, Clone, Debug)]
+enum Summary {
+    Mean,
+    Min,
+    Max,
+}
+
+fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+    start: usize,
+    end: usize,
+    iter: I,
+    summary: Summary,
+    bins: usize,
+) -> Result<Array1<f64>, BBIReadError> {
+    use numpy::ndarray::Array;
+    let mut bin_data: VecDeque<(usize, usize, usize, Vec<f64>)> = VecDeque::new();
+    let mut v = vec![0.0; bins];
+    let bin_size = (end - start) as f64 / bins as f64;
+    for interval in iter {
+        let interval = interval?;
+        let interval_start = (interval.start as usize).max(start) - start;
+        let interval_end = (interval.end as usize).min(end) - start;
+        let bin_start = ((interval_start as f64) / bin_size) as usize;
+        let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+
+        while let Some(front) = bin_data.front_mut() {
+            if front.0 < bin_start {
+                let front = bin_data.pop_front().unwrap();
+                let bin = front.0;
+
+                match summary {
+                    Summary::Min => {
+                        v[bin] = front
+                            .3
+                            .into_iter()
+                            .reduce(|min, v| min.min(v))
+                            .unwrap_or(0.0);
+                    }
+                    Summary::Max => {
+                        v[bin] = front
+                            .3
+                            .into_iter()
+                            .reduce(|max, v| max.max(v))
+                            .unwrap_or(0.0);
+                    }
+                    Summary::Mean => {
+                        v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+                    }
+                }
+            } else {
+                break;
+            }
+        }
+        while let Some(bin) = bin_data
+            .back_mut()
+            .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
+            .unwrap_or(Some(bin_start))
+        {
+            let bin_start = ((bin as f64) * bin_size).ceil() as usize;
+            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as usize;
+
+            bin_data.push_back((bin, bin_start, bin_end, vec![0.0; bin_end - bin_start]));
+        }
+        for bin in bin_data.iter() {
+            assert!(
+                (bin_start..bin_end).contains(&bin.0),
+                "{} not in {}..{}",
+                bin.0,
+                bin_start,
+                bin_end
+            );
+        }
+        for bin in bin_start..bin_end {
+            assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
+        }
+        for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+            let overlap_start = (*bin_start).max(interval_start);
+            let overlap_end = (*bin_end).min(interval_end);
+
+            for i in &mut data[(overlap_start - *bin_start)..(overlap_end - *bin_start)] {
+                *i = *i + 1.0;
+            }
+        }
+    }
+    while let Some(front) = bin_data.pop_front() {
+        let bin = front.0;
+
+        match summary {
+            Summary::Min => {
+                v[bin] = front
+                    .3
+                    .into_iter()
+                    .reduce(|min, v| min.min(v))
+                    .unwrap_or(0.0);
+            }
+            Summary::Max => {
+                v[bin] = front
+                    .3
+                    .into_iter()
+                    .reduce(|max, v| max.max(v))
+                    .unwrap_or(0.0);
+            }
+            Summary::Mean => {
+                v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+            }
+        }
+    }
+    Ok(Array::from(v))
+}
+
+fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
+    start: usize,
+    end: usize,
+    iter: I,
+    summary: Summary,
+    bins: usize,
+) -> Result<Array1<f64>, BBIReadError> {
+    use numpy::ndarray::Array;
+    let mut bin_data: VecDeque<(usize, usize, usize, Vec<f64>)> = VecDeque::new();
+    let mut v = vec![0.0; bins];
+    let bin_size = (end - start) as f64 / bins as f64;
+    for interval in iter {
+        let interval = interval?;
+        let interval_start = (interval.start as usize).max(start) - start;
+        let interval_end = (interval.end as usize).min(end) - start;
+        let bin_start = ((interval_start as f64) / bin_size) as usize;
+        let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+
+        while let Some(front) = bin_data.front_mut() {
+            if front.0 < bin_start {
+                let front = bin_data.pop_front().unwrap();
+                let bin = front.0;
+
+                match summary {
+                    Summary::Min => {
+                        v[bin] = front
+                            .3
+                            .into_iter()
+                            .reduce(|min, v| min.min(v))
+                            .unwrap_or(0.0)
+                            .max(0.0);
+                    }
+                    Summary::Max => {
+                        v[bin] = front
+                            .3
+                            .into_iter()
+                            .reduce(|max, v| max.max(v))
+                            .unwrap_or(0.0)
+                            .max(0.0);
+                    }
+                    Summary::Mean => {
+                        v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+                    }
+                }
+            } else {
+                break;
+            }
+        }
+        while let Some(bin) = bin_data
+            .back_mut()
+            .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
+            .unwrap_or(Some(bin_start))
+        {
+            let bin_start = ((bin as f64) * bin_size).ceil() as usize;
+            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as usize;
+
+            bin_data.push_back((bin, bin_start, bin_end, vec![f64::NAN; bin_end - bin_start]));
+        }
+        for bin in bin_data.iter() {
+            assert!(
+                (bin_start..bin_end).contains(&bin.0),
+                "{} not in {}..{}",
+                bin.0,
+                bin_start,
+                bin_end
+            );
+        }
+        for bin in bin_start..bin_end {
+            assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
+        }
+        for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+            let overlap_start = (*bin_start).max(interval_start);
+            let overlap_end = (*bin_end).min(interval_end);
+
+            let mean = interval.summary.sum / (interval.end - interval.start) as f64;
+            for i in &mut data[(overlap_start - *bin_start)..(overlap_end - *bin_start)] {
+                match summary {
+                    Summary::Mean => *i += mean,
+                    Summary::Min => *i = i.min(interval.summary.min_val),
+                    Summary::Max => *i = i.max(interval.summary.max_val),
+                }
+                *i = i.max(0.0);
+            }
+        }
+    }
+    while let Some(front) = bin_data.pop_front() {
+        let bin = front.0;
+
+        match summary {
+            Summary::Min => {
+                v[bin] = front
+                    .3
+                    .into_iter()
+                    .reduce(|min, v| min.min(v))
+                    .unwrap_or(0.0)
+                    .max(0.0);
+            }
+            Summary::Max => {
+                v[bin] = front
+                    .3
+                    .into_iter()
+                    .reduce(|max, v| max.max(v))
+                    .unwrap_or(0.0)
+                    .max(0.0);
+            }
+            Summary::Mean => {
+                v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+            }
+        }
+    }
+    Ok(Array::from(v))
+}
+
 /// This class is the interface for reading a bigWig or bigBed.
 #[pyclass(module = "pybigtools")]
 struct BBIRead {
@@ -255,12 +480,6 @@ impl BBIRead {
         summary: Option<String>,
         exact: Option<bool>,
     ) -> PyResult<PyObject> {
-        enum Summary {
-            Mean,
-            Min,
-            Max,
-        }
-        use numpy::ndarray::Array1;
         fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
             start: usize,
             end: usize,
@@ -399,48 +618,6 @@ impl BBIRead {
             }
             Ok(Array::from(v))
         }
-        fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
-            start: usize,
-            end: usize,
-            iter: I,
-            bins: usize,
-        ) -> Result<Array1<f64>, BBIReadError> {
-            use numpy::ndarray::Array;
-            let mut v = vec![0.0; bins];
-            let bin_size = (end - start) as f64 / bins as f64;
-            for interval in iter {
-                let interval = interval?;
-                let interval_start = (interval.start as usize) - start;
-                let interval_end = (interval.end as usize) - start;
-                let bin_start = ((interval_start as f64) / bin_size) as usize;
-                let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                for bin in bin_start..bin_end {
-                    v[bin] += 1.0;
-                }
-            }
-            Ok(Array::from(v))
-        }
-        fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
-            start: usize,
-            end: usize,
-            iter: I,
-            bins: usize,
-        ) -> Result<Array1<f64>, BBIReadError> {
-            use numpy::ndarray::Array;
-            let mut v = vec![0.0; bins];
-            let bin_size = (end - start) as f64 / bins as f64;
-            for interval in iter {
-                let interval = interval?;
-                let interval_start = (interval.start as usize).max(start) - start;
-                let interval_end = (interval.end as usize).min(end) - start;
-                let bin_start = ((interval_start as f64) / bin_size) as usize;
-                let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                for bin in bin_start..bin_end {
-                    v[bin] += interval.summary.total_items as f64;
-                }
-            }
-            Ok(Array::from(v))
-        }
 
         let summary = match summary.as_deref() {
             None => Summary::Mean,
@@ -555,6 +732,7 @@ impl BBIRead {
             start: u32,
             end: u32,
             bins: Option<usize>,
+            summary: Summary,
             exact: Option<bool>,
         ) -> PyResult<PyObject> {
             match bins {
@@ -574,17 +752,18 @@ impl BBIRead {
                                     PyErr::new::<exceptions::PyException, _>(format!("{}", e))
                                 })?;
                             Python::with_gil(|py| {
-                                Ok(
-                                    to_entry_array_zoom(start as usize, end as usize, iter, bins)
-                                        .map_err(|e| {
-                                            PyErr::new::<exceptions::PyException, _>(format!(
-                                                "{}",
-                                                e
-                                            ))
-                                        })?
-                                        .into_pyarray(py)
-                                        .to_object(py),
+                                Ok(to_entry_array_zoom(
+                                    start as usize,
+                                    end as usize,
+                                    iter,
+                                    summary,
+                                    bins,
                                 )
+                                .map_err(|e| {
+                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                                })?
+                                .into_pyarray(py)
+                                .to_object(py))
                             })
                         }
                         None => {
@@ -592,17 +771,18 @@ impl BBIRead {
                                 PyErr::new::<exceptions::PyException, _>(format!("{}", e))
                             })?;
                             Python::with_gil(|py| {
-                                Ok(
-                                    to_array_entry_bins(start as usize, end as usize, iter, bins)
-                                        .map_err(|e| {
-                                            PyErr::new::<exceptions::PyException, _>(format!(
-                                                "{}",
-                                                e
-                                            ))
-                                        })?
-                                        .into_pyarray(py)
-                                        .to_object(py),
+                                Ok(to_array_entry_bins(
+                                    start as usize,
+                                    end as usize,
+                                    iter,
+                                    summary,
+                                    bins,
                                 )
+                                .map_err(|e| {
+                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
+                                })?
+                                .into_pyarray(py)
+                                .to_object(py))
                             })
                         }
                     }
@@ -613,7 +793,7 @@ impl BBIRead {
                         .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
                     Python::with_gil(|py| {
                         Ok(
-                            to_array_entry_bins(start as usize, end as usize, iter, bins)
+                            to_array_entry_bins(start as usize, end as usize, iter, summary, bins)
                                 .map_err(|e| {
                                     PyErr::new::<exceptions::PyException, _>(format!("{}", e))
                                 })?
@@ -648,10 +828,16 @@ impl BBIRead {
             BBIReadRaw::BigWigFileLike(b) => {
                 intervals_to_array(b, &chrom, start, end, bins, summary, exact)
             }
-            BBIReadRaw::BigBedFile(b) => entries_to_array(b, &chrom, start, end, bins, exact),
+            BBIReadRaw::BigBedFile(b) => {
+                entries_to_array(b, &chrom, start, end, bins, summary, exact)
+            }
             #[cfg(feature = "remote")]
-            BBIReadRaw::BigBedRemote(b) => entries_to_array(b, &chrom, start, end, bins, exact),
-            BBIReadRaw::BigBedFileLike(b) => entries_to_array(b, &chrom, start, end, bins, exact),
+            BBIReadRaw::BigBedRemote(b) => {
+                entries_to_array(b, &chrom, start, end, bins, summary, exact)
+            }
+            BBIReadRaw::BigBedFileLike(b) => {
+                entries_to_array(b, &chrom, start, end, bins, summary, exact)
+            }
         }
     }
 
@@ -1397,3 +1583,136 @@ fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
 
     Ok(())
 }
+
+#[cfg(test)]
+mod test {
+    use bigtools::BedEntry;
+
+    use crate::to_array_entry_bins;
+
+    #[test]
+    fn test_to_array_entry_bins() {
+        let entries = [BedEntry {
+            start: 10,
+            end: 20,
+            rest: "".to_string(),
+        }];
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.into_iter().map(|v| Ok(v)),
+            crate::Summary::Mean,
+            1,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
+
+        let entries = [BedEntry {
+            start: 10,
+            end: 20,
+            rest: "".to_string(),
+        }];
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.into_iter().map(|v| Ok(v)),
+            crate::Summary::Mean,
+            2,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [1.0, 1.0].into_iter().collect::<Vec<_>>());
+
+        let entries = [
+            BedEntry {
+                start: 10,
+                end: 20,
+                rest: "".to_string(),
+            },
+            BedEntry {
+                start: 15,
+                end: 20,
+                rest: "".to_string(),
+            },
+        ];
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.clone().into_iter().map(|v| Ok(v)),
+            crate::Summary::Mean,
+            2,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [1.0, 2.0].into_iter().collect::<Vec<_>>());
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.clone().into_iter().map(|v| Ok(v)),
+            crate::Summary::Min,
+            1,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.clone().into_iter().map(|v| Ok(v)),
+            crate::Summary::Max,
+            1,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [2.0].into_iter().collect::<Vec<_>>());
+
+        let entries = [
+            BedEntry {
+                start: 10,
+                end: 20,
+                rest: "".to_string(),
+            },
+            BedEntry {
+                start: 15,
+                end: 20,
+                rest: "".to_string(),
+            },
+            BedEntry {
+                start: 15,
+                end: 25,
+                rest: "".to_string(),
+            },
+        ];
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.clone().into_iter().map(|v| Ok(v)),
+            crate::Summary::Mean,
+            2,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [1.0, 3.0].into_iter().collect::<Vec<_>>());
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.clone().into_iter().map(|v| Ok(v)),
+            crate::Summary::Min,
+            1,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
+        let res = to_array_entry_bins(
+            10,
+            20,
+            entries.clone().into_iter().map(|v| Ok(v)),
+            crate::Summary::Max,
+            1,
+        )
+        .unwrap();
+        let res = res.to_vec();
+        assert_eq!(res, [3.0].into_iter().collect::<Vec<_>>());
+    }
+}

From 0da362ab606bac56dc39652b0e9385b4364496d4 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Mon, 18 Mar 2024 10:17:08 -0400
Subject: [PATCH 04/30] Split rest into fields of tuple

---
 pybigtools/src/lib.rs | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index a5731d1..5b250a3 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -35,7 +35,6 @@ use url::Url;
 mod file_like;
 
 type ValueTuple = (u32, u32, f32);
-type BedEntryTuple = (u32, u32, String);
 
 fn start_end(
     bbi: &BBIReadRaw,
@@ -917,12 +916,19 @@ impl BigBedEntriesIterator {
         Ok(slf.into())
     }
 
-    fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<BedEntryTuple>> {
-        Ok(slf
-            .iter
-            .next()
-            .map(|e| e.unwrap())
-            .map(|e| (e.start, e.end, e.rest)))
+    fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
+        let py = slf.py();
+        let next = match slf.iter.next() {
+            Some(n) => n.map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
+            None => return Ok(None),
+        };
+        let elements: Vec<_> = [next.start.to_object(py), next.end.to_object(py)]
+            .into_iter()
+            .chain(next.rest.split_whitespace().map(|o| o.to_object(py)))
+            .collect();
+        Ok(Some(
+            PyTuple::new::<PyObject, _>(py, elements.into_iter()).to_object(py),
+        ))
     }
 }
 

From d205d4d597f2cccc1db5ab142144cf169b40ced3 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Mon, 18 Mar 2024 10:26:02 -0400
Subject: [PATCH 05/30] Add is_bigwig and is_bigbed

---
 pybigtools/src/lib.rs | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 5b250a3..ea43644 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -313,6 +313,44 @@ struct BBIRead {
 
 #[pymethods]
 impl BBIRead {
+    fn is_bigwig(&self) -> bool {
+        #[cfg(feature = "remote")]
+        {
+            matches!(
+                self.bbi,
+                BBIReadRaw::BigWigFile(_)
+                    | BBIReadRaw::BigWigRemote(_)
+                    | BBIReadRaw::BigWigFileLike(_)
+            )
+        }
+        #[cfg(not(feature = "remote"))]
+        {
+            matches!(
+                self.bbi,
+                BBIReadRaw::BigWigFile(_) | BBIReadRaw::BigWigFileLike(_)
+            )
+        }
+    }
+
+    fn is_bigbed(&self) -> bool {
+        #[cfg(feature = "remote")]
+        {
+            matches!(
+                self.bbi,
+                BBIReadRaw::BigBedFile(_)
+                    | BBIReadRaw::BigBedRemote(_)
+                    | BBIReadRaw::BigBedFileLike(_)
+            )
+        }
+        #[cfg(not(feature = "remote"))]
+        {
+            matches!(
+                self.bbi,
+                BBIReadRaw::BigBedFile(_) | BBIReadRaw::BigBedFileLike(_)
+            )
+        }
+    }
+
     /// Returns the autosql of this bbi file.
     ///
     /// For bigBeds, this comes directly from the autosql stored in the file.

From d37ebe48013e66482d54d3e9d1379ec888e0409e Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Tue, 19 Mar 2024 20:01:34 -0400
Subject: [PATCH 06/30] Move other values creation functions to outer scope and
 add simplified error conversion

---
 pybigtools/src/lib.rs | 686 ++++++++++++++++++++----------------------
 1 file changed, 325 insertions(+), 361 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index ea43644..73a9924 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -89,6 +89,298 @@ enum Summary {
     Max,
 }
 
+trait ToPyErr {
+    fn to_py_err(self) -> PyErr;
+}
+
+impl ToPyErr for bigtools::ZoomIntervalError {
+    fn to_py_err(self) -> PyErr {
+        PyErr::new::<exceptions::PyException, _>(format!("{}", self))
+    }
+}
+impl ToPyErr for bigtools::BBIReadError {
+    fn to_py_err(self) -> PyErr {
+        PyErr::new::<exceptions::PyException, _>(format!("{}", self))
+    }
+}
+
+trait ConvertResult<T> {
+    fn convert_err(self) -> Result<T, PyErr>;
+}
+impl<T, E: ToPyErr> ConvertResult<T> for Result<T, E> {
+    fn convert_err(self) -> Result<T, PyErr> {
+        self.map_err(|e| e.to_py_err())
+    }
+}
+
+fn intervals_to_array<R: BBIFileRead>(
+    b: &mut BigWigReadRaw<R>,
+    chrom: &str,
+    start: u32,
+    end: u32,
+    bins: Option<usize>,
+    summary: Summary,
+    exact: bool,
+    missing: f64,
+    oob: f64,
+    arr: Option<PyObject>,
+) -> PyResult<PyObject> {
+    let zoom = if let (Some(bins), false) = (bins, exact) {
+        let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
+        let zoom = b
+            .info()
+            .zoom_headers
+            .iter()
+            .filter(|z| z.reduction_level <= max_zoom_size)
+            .min_by_key(|z| max_zoom_size - z.reduction_level);
+        zoom
+    } else {
+        None
+    };
+    match (bins, zoom) {
+        (Some(bins), Some(zoom)) => {
+            let iter = b
+                .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
+                .convert_err()?;
+            Python::with_gil(|py| {
+                Ok(
+                    to_array_zoom(start as usize, end as usize, iter, summary, bins)
+                        .convert_err()?
+                        .into_pyarray(py)
+                        .to_object(py),
+                )
+            })
+        }
+        (Some(bins), None) => {
+            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            Python::with_gil(|py| {
+                Ok(
+                    to_array_bins(start as usize, end as usize, iter, summary, bins)
+                        .convert_err()?
+                        .into_pyarray(py)
+                        .to_object(py),
+                )
+            })
+        }
+        _ => {
+            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            Python::with_gil(|py| {
+                Ok(to_array(start as usize, end as usize, iter)
+                    .convert_err()?
+                    .into_pyarray(py)
+                    .to_object(py))
+            })
+        }
+    }
+}
+fn entries_to_array<R: BBIFileRead>(
+    b: &mut BigBedReadRaw<R>,
+    chrom: &str,
+    start: u32,
+    end: u32,
+    bins: Option<usize>,
+    summary: Summary,
+    exact: bool,
+    missing: f64,
+    oob: f64,
+    arr: Option<PyObject>,
+) -> PyResult<PyObject> {
+    match bins {
+        Some(bins) if !exact => {
+            let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
+            let zoom = b
+                .info()
+                .zoom_headers
+                .iter()
+                .filter(|z| z.reduction_level <= max_zoom_size)
+                .min_by_key(|z| max_zoom_size - z.reduction_level);
+            match zoom {
+                Some(zoom) => {
+                    let iter = b
+                        .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
+                        .convert_err()?;
+                    Python::with_gil(|py| {
+                        Ok(
+                            to_entry_array_zoom(start as usize, end as usize, iter, summary, bins)
+                                .convert_err()?
+                                .into_pyarray(py)
+                                .to_object(py),
+                        )
+                    })
+                }
+                None => {
+                    let iter = b.get_interval(&chrom, start, end).convert_err()?;
+                    Python::with_gil(|py| {
+                        Ok(
+                            to_array_entry_bins(start as usize, end as usize, iter, summary, bins)
+                                .convert_err()?
+                                .into_pyarray(py)
+                                .to_object(py),
+                        )
+                    })
+                }
+            }
+        }
+        Some(bins) => {
+            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            Python::with_gil(|py| {
+                Ok(
+                    to_array_entry_bins(start as usize, end as usize, iter, summary, bins)
+                        .convert_err()?
+                        .into_pyarray(py)
+                        .to_object(py),
+                )
+            })
+        }
+        _ => {
+            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            Python::with_gil(|py| {
+                Ok(to_entry_array(start as usize, end as usize, iter)
+                    .convert_err()?
+                    .into_pyarray(py)
+                    .to_object(py))
+            })
+        }
+    }
+}
+fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
+    start: usize,
+    end: usize,
+    iter: I,
+) -> Result<Array1<f64>, BBIReadError> {
+    use numpy::ndarray::Array;
+    let mut v = vec![f64::NAN; end - start];
+    for interval in iter {
+        let interval = interval?;
+        let interval_start = (interval.start as usize) - start;
+        let interval_end = (interval.end as usize) - start;
+        for i in v[interval_start..interval_end].iter_mut() {
+            *i = interval.value as f64;
+        }
+    }
+    Ok(Array::from(v))
+}
+fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
+    start: usize,
+    end: usize,
+    iter: I,
+    summary: Summary,
+    bins: usize,
+) -> Result<Array1<f64>, BBIReadError> {
+    use numpy::ndarray::Array;
+    let mut v = match summary {
+        Summary::Min | Summary::Max => vec![f64::NAN; bins],
+        Summary::Mean => vec![0.0; bins],
+    };
+    let bin_size = (end - start) as f64 / bins as f64;
+    for interval in iter {
+        let interval = interval?;
+        let interval_start = (interval.start as usize) - start;
+        let interval_end = (interval.end as usize) - start;
+        let bin_start = ((interval_start as f64) / bin_size) as usize;
+        let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+        for bin in bin_start..bin_end {
+            let bin_start = (bin as f64) * bin_size;
+            let bin_end = (bin as f64 + 1.0) * bin_size;
+            let interval_start = (interval_start as f64).max(bin_start);
+            let interval_end = (interval_end as f64).min(bin_end);
+            // Possible overlaps
+            // bin  |-----|    |------|   |-----|
+            // value   |----|   |----|  |----|
+            // overlap ----     ------    ----
+            match summary {
+                Summary::Min => {
+                    v[bin] = v[bin].min(interval.value as f64);
+                }
+                Summary::Max => {
+                    v[bin] = v[bin].max(interval.value as f64);
+                }
+                Summary::Mean => {
+                    let overlap_size = interval_end - interval_start;
+                    v[bin] += (overlap_size as f64) * interval.value as f64;
+                }
+            }
+        }
+    }
+    if let Summary::Mean = summary {
+        let last = v.last().copied().expect("Should be at least one bin.");
+        let last_size = (end - start) as f64 - (bins as f64 - 1.0) * bin_size;
+        v = v.into_iter().map(|v| v / bin_size as f64).collect();
+        // The last bin could be smaller
+        *v.last_mut().expect("Should be at least one bin.") = last / last_size;
+    }
+    Ok(Array::from(v))
+}
+fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
+    start: usize,
+    end: usize,
+    iter: I,
+    summary: Summary,
+    bins: usize,
+) -> Result<Array1<f64>, BBIReadError> {
+    use numpy::ndarray::Array;
+    let mut v = match summary {
+        Summary::Min | Summary::Max => vec![f64::NAN; bins],
+        Summary::Mean => vec![0.0; bins],
+    };
+    let bin_size = (end - start) as f64 / bins as f64;
+    for interval in iter {
+        let interval = interval?;
+        let interval_start = (interval.start as usize).max(start) - start;
+        let interval_end = (interval.end as usize).min(end) - start;
+        let bin_start = ((interval_start as f64) / bin_size) as usize;
+        let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+        for bin in bin_start..bin_end {
+            let bin_start = (bin as f64) * bin_size;
+            let bin_end = (bin as f64 + 1.0) * bin_size;
+            let interval_start = (interval_start as f64).max(bin_start);
+            let interval_end = (interval_end as f64).min(bin_end);
+            // Possible overlaps
+            // bin  |-----|    |------|   |-----|
+            // value   |----|   |----|  |----|
+            // overlap ----     ------    ----
+            match summary {
+                Summary::Min => {
+                    v[bin] = v[bin].min(interval.summary.min_val as f64);
+                }
+                Summary::Max => {
+                    v[bin] = v[bin].max(interval.summary.max_val as f64);
+                }
+                Summary::Mean => {
+                    let overlap_size = interval_end - interval_start;
+                    let zoom_mean =
+                        (interval.summary.sum as f64) / (interval.summary.bases_covered as f64);
+                    v[bin] += (overlap_size as f64) * zoom_mean;
+                }
+            }
+        }
+    }
+    if let Summary::Mean = summary {
+        let last = v.last().copied().expect("Should be at least one bin.");
+        let last_size = (end - start) as f64 - (bins as f64 - 1.0) * bin_size;
+        v = v.into_iter().map(|v| v / bin_size as f64).collect();
+        // The last bin could be smaller
+        *v.last_mut().expect("Should be at least one bin.") = last / last_size;
+    }
+    Ok(Array::from(v))
+}
+fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+    start: usize,
+    end: usize,
+    iter: I,
+) -> Result<Array1<f64>, BBIReadError> {
+    use numpy::ndarray::Array;
+    let mut v = vec![0.0; end - start];
+    for interval in iter {
+        let interval = interval?;
+        let interval_start = (interval.start as usize) - start;
+        let interval_end = (interval.end as usize) - start;
+        for i in v[interval_start..interval_end].iter_mut() {
+            *i += 1.0;
+        }
+    }
+    Ok(Array::from(v))
+}
 fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     start: usize,
     end: usize,
@@ -381,15 +673,9 @@ impl BBIRead {
             BBIReadRaw::BigWigFile(_)
             | BBIReadRaw::BigWigRemote(_)
             | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),
-            BBIReadRaw::BigBedFile(b) => b
-                .autosql()
-                .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
-            BBIReadRaw::BigBedRemote(b) => b
-                .autosql()
-                .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
-            BBIReadRaw::BigBedFileLike(b) => b
-                .autosql()
-                .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
+            BBIReadRaw::BigBedFile(b) => b.autosql().convert_err()?,
+            BBIReadRaw::BigBedRemote(b) => b.autosql().convert_err()?,
+            BBIReadRaw::BigBedFileLike(b) => b.autosql().convert_err()?,
         };
         let obj = if parse {
             let mut declarations = parse_autosql(&schema).map_err(|_| {
@@ -508,159 +794,23 @@ impl BBIRead {
     ///  If start is not provided, it defaults to the beginning of the chromosome.  
     ///
     /// This returns a numpy array.
+    #[pyo3(signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None))]
     fn values(
         &mut self,
         chrom: String,
         start: Option<u32>,
         end: Option<u32>,
         bins: Option<usize>,
-        summary: Option<String>,
-        exact: Option<bool>,
+        summary: String,
+        exact: bool,
+        missing: f64,
+        oob: f64,
+        arr: Option<PyObject>,
     ) -> PyResult<PyObject> {
-        fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
-            start: usize,
-            end: usize,
-            iter: I,
-        ) -> Result<Array1<f64>, BBIReadError> {
-            use numpy::ndarray::Array;
-            let mut v = vec![f64::NAN; end - start];
-            for interval in iter {
-                let interval = interval?;
-                let interval_start = (interval.start as usize) - start;
-                let interval_end = (interval.end as usize) - start;
-                for i in v[interval_start..interval_end].iter_mut() {
-                    *i = interval.value as f64;
-                }
-            }
-            Ok(Array::from(v))
-        }
-        fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
-            start: usize,
-            end: usize,
-            iter: I,
-            summary: Summary,
-            bins: usize,
-        ) -> Result<Array1<f64>, BBIReadError> {
-            use numpy::ndarray::Array;
-            let mut v = match summary {
-                Summary::Min | Summary::Max => vec![f64::NAN; bins],
-                Summary::Mean => vec![0.0; bins],
-            };
-            let bin_size = (end - start) as f64 / bins as f64;
-            for interval in iter {
-                let interval = interval?;
-                let interval_start = (interval.start as usize) - start;
-                let interval_end = (interval.end as usize) - start;
-                let bin_start = ((interval_start as f64) / bin_size) as usize;
-                let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                for bin in bin_start..bin_end {
-                    let bin_start = (bin as f64) * bin_size;
-                    let bin_end = (bin as f64 + 1.0) * bin_size;
-                    let interval_start = (interval_start as f64).max(bin_start);
-                    let interval_end = (interval_end as f64).min(bin_end);
-                    // Possible overlaps
-                    // bin  |-----|    |------|   |-----|
-                    // value   |----|   |----|  |----|
-                    // overlap ----     ------    ----
-                    match summary {
-                        Summary::Min => {
-                            v[bin] = v[bin].min(interval.value as f64);
-                        }
-                        Summary::Max => {
-                            v[bin] = v[bin].max(interval.value as f64);
-                        }
-                        Summary::Mean => {
-                            let overlap_size = interval_end - interval_start;
-                            v[bin] += (overlap_size as f64) * interval.value as f64;
-                        }
-                    }
-                }
-            }
-            if let Summary::Mean = summary {
-                let last = v.last().copied().expect("Should be at least one bin.");
-                let last_size = (end - start) as f64 - (bins as f64 - 1.0) * bin_size;
-                v = v.into_iter().map(|v| v / bin_size as f64).collect();
-                // The last bin could be smaller
-                *v.last_mut().expect("Should be at least one bin.") = last / last_size;
-            }
-            Ok(Array::from(v))
-        }
-        fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
-            start: usize,
-            end: usize,
-            iter: I,
-            summary: Summary,
-            bins: usize,
-        ) -> Result<Array1<f64>, BBIReadError> {
-            use numpy::ndarray::Array;
-            let mut v = match summary {
-                Summary::Min | Summary::Max => vec![f64::NAN; bins],
-                Summary::Mean => vec![0.0; bins],
-            };
-            let bin_size = (end - start) as f64 / bins as f64;
-            for interval in iter {
-                let interval = interval?;
-                let interval_start = (interval.start as usize).max(start) - start;
-                let interval_end = (interval.end as usize).min(end) - start;
-                let bin_start = ((interval_start as f64) / bin_size) as usize;
-                let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                for bin in bin_start..bin_end {
-                    let bin_start = (bin as f64) * bin_size;
-                    let bin_end = (bin as f64 + 1.0) * bin_size;
-                    let interval_start = (interval_start as f64).max(bin_start);
-                    let interval_end = (interval_end as f64).min(bin_end);
-                    // Possible overlaps
-                    // bin  |-----|    |------|   |-----|
-                    // value   |----|   |----|  |----|
-                    // overlap ----     ------    ----
-                    match summary {
-                        Summary::Min => {
-                            v[bin] = v[bin].min(interval.summary.min_val as f64);
-                        }
-                        Summary::Max => {
-                            v[bin] = v[bin].max(interval.summary.max_val as f64);
-                        }
-                        Summary::Mean => {
-                            let overlap_size = interval_end - interval_start;
-                            let zoom_mean = (interval.summary.sum as f64)
-                                / (interval.summary.bases_covered as f64);
-                            v[bin] += (overlap_size as f64) * zoom_mean;
-                        }
-                    }
-                }
-            }
-            if let Summary::Mean = summary {
-                let last = v.last().copied().expect("Should be at least one bin.");
-                let last_size = (end - start) as f64 - (bins as f64 - 1.0) * bin_size;
-                v = v.into_iter().map(|v| v / bin_size as f64).collect();
-                // The last bin could be smaller
-                *v.last_mut().expect("Should be at least one bin.") = last / last_size;
-            }
-            Ok(Array::from(v))
-        }
-        fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
-            start: usize,
-            end: usize,
-            iter: I,
-        ) -> Result<Array1<f64>, BBIReadError> {
-            use numpy::ndarray::Array;
-            let mut v = vec![0.0; end - start];
-            for interval in iter {
-                let interval = interval?;
-                let interval_start = (interval.start as usize) - start;
-                let interval_end = (interval.end as usize) - start;
-                for i in v[interval_start..interval_end].iter_mut() {
-                    *i += 1.0;
-                }
-            }
-            Ok(Array::from(v))
-        }
-
-        let summary = match summary.as_deref() {
-            None => Summary::Mean,
-            Some("mean") => Summary::Mean,
-            Some("min") => Summary::Min,
-            Some("max") => Summary::Max,
+        let summary = match summary.as_ref() {
+            "mean" => Summary::Mean,
+            "min" => Summary::Min,
+            "max" => Summary::Max,
             _ => {
                 return Err(PyErr::new::<exceptions::PyException, _>(format!(
                     "Unrecognized summary. Only `mean`, `min`, and `max` are allowed."
@@ -668,213 +818,27 @@ impl BBIRead {
             }
         };
         let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
-        fn intervals_to_array<R: BBIFileRead>(
-            b: &mut BigWigReadRaw<R>,
-            chrom: &str,
-            start: u32,
-            end: u32,
-            bins: Option<usize>,
-            summary: Summary,
-            exact: Option<bool>,
-        ) -> PyResult<PyObject> {
-            match bins {
-                Some(bins) if !exact.unwrap_or(false) => {
-                    let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
-                    let zoom = b
-                        .info()
-                        .zoom_headers
-                        .iter()
-                        .filter(|z| z.reduction_level <= max_zoom_size)
-                        .min_by_key(|z| max_zoom_size - z.reduction_level);
-                    match zoom {
-                        Some(zoom) => {
-                            let iter = b
-                                .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
-                                .map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?;
-                            Python::with_gil(|py| {
-                                Ok(
-                                    to_array_zoom(
-                                        start as usize,
-                                        end as usize,
-                                        iter,
-                                        summary,
-                                        bins,
-                                    )
-                                    .map_err(|e| {
-                                        PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                    })?
-                                    .into_pyarray(py)
-                                    .to_object(py),
-                                )
-                            })
-                        }
-                        None => {
-                            let iter = b.get_interval(&chrom, start, end).map_err(|e| {
-                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                            })?;
-                            Python::with_gil(|py| {
-                                Ok(
-                                    to_array_bins(
-                                        start as usize,
-                                        end as usize,
-                                        iter,
-                                        summary,
-                                        bins,
-                                    )
-                                    .map_err(|e| {
-                                        PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                    })?
-                                    .into_pyarray(py)
-                                    .to_object(py),
-                                )
-                            })
-                        }
-                    }
-                }
-                Some(bins) => {
-                    let iter = b
-                        .get_interval(&chrom, start, end)
-                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
-                    Python::with_gil(|py| {
-                        Ok(
-                            to_array_bins(start as usize, end as usize, iter, summary, bins)
-                                .map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?
-                                .into_pyarray(py)
-                                .to_object(py),
-                        )
-                    })
-                }
-                _ => {
-                    let iter = b
-                        .get_interval(&chrom, start, end)
-                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
-                    Python::with_gil(|py| {
-                        Ok(to_array(start as usize, end as usize, iter)
-                            .map_err(|e| {
-                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                            })?
-                            .into_pyarray(py)
-                            .to_object(py))
-                    })
-                }
-            }
-        }
-        fn entries_to_array<R: BBIFileRead>(
-            b: &mut BigBedReadRaw<R>,
-            chrom: &str,
-            start: u32,
-            end: u32,
-            bins: Option<usize>,
-            summary: Summary,
-            exact: Option<bool>,
-        ) -> PyResult<PyObject> {
-            match bins {
-                Some(bins) if !exact.unwrap_or(false) => {
-                    let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
-                    let zoom = b
-                        .info()
-                        .zoom_headers
-                        .iter()
-                        .filter(|z| z.reduction_level <= max_zoom_size)
-                        .min_by_key(|z| max_zoom_size - z.reduction_level);
-                    match zoom {
-                        Some(zoom) => {
-                            let iter = b
-                                .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
-                                .map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?;
-                            Python::with_gil(|py| {
-                                Ok(to_entry_array_zoom(
-                                    start as usize,
-                                    end as usize,
-                                    iter,
-                                    summary,
-                                    bins,
-                                )
-                                .map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?
-                                .into_pyarray(py)
-                                .to_object(py))
-                            })
-                        }
-                        None => {
-                            let iter = b.get_interval(&chrom, start, end).map_err(|e| {
-                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                            })?;
-                            Python::with_gil(|py| {
-                                Ok(to_array_entry_bins(
-                                    start as usize,
-                                    end as usize,
-                                    iter,
-                                    summary,
-                                    bins,
-                                )
-                                .map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?
-                                .into_pyarray(py)
-                                .to_object(py))
-                            })
-                        }
-                    }
-                }
-                Some(bins) => {
-                    let iter = b
-                        .get_interval(&chrom, start, end)
-                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
-                    Python::with_gil(|py| {
-                        Ok(
-                            to_array_entry_bins(start as usize, end as usize, iter, summary, bins)
-                                .map_err(|e| {
-                                    PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                                })?
-                                .into_pyarray(py)
-                                .to_object(py),
-                        )
-                    })
-                }
-                _ => {
-                    let iter = b
-                        .get_interval(&chrom, start, end)
-                        .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
-                    Python::with_gil(|py| {
-                        Ok(to_entry_array(start as usize, end as usize, iter)
-                            .map_err(|e| {
-                                PyErr::new::<exceptions::PyException, _>(format!("{}", e))
-                            })?
-                            .into_pyarray(py)
-                            .to_object(py))
-                    })
-                }
-            }
-        }
         match &mut self.bbi {
-            BBIReadRaw::BigWigFile(b) => {
-                intervals_to_array(b, &chrom, start, end, bins, summary, exact)
-            }
+            BBIReadRaw::BigWigFile(b) => intervals_to_array(
+                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+            ),
             #[cfg(feature = "remote")]
-            BBIReadRaw::BigWigRemote(b) => {
-                intervals_to_array(b, &chrom, start, end, bins, summary, exact)
-            }
-            BBIReadRaw::BigWigFileLike(b) => {
-                intervals_to_array(b, &chrom, start, end, bins, summary, exact)
-            }
-            BBIReadRaw::BigBedFile(b) => {
-                entries_to_array(b, &chrom, start, end, bins, summary, exact)
-            }
+            BBIReadRaw::BigWigRemote(b) => intervals_to_array(
+                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+            ),
+            BBIReadRaw::BigWigFileLike(b) => intervals_to_array(
+                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+            ),
+            BBIReadRaw::BigBedFile(b) => entries_to_array(
+                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+            ),
             #[cfg(feature = "remote")]
-            BBIReadRaw::BigBedRemote(b) => {
-                entries_to_array(b, &chrom, start, end, bins, summary, exact)
-            }
-            BBIReadRaw::BigBedFileLike(b) => {
-                entries_to_array(b, &chrom, start, end, bins, summary, exact)
-            }
+            BBIReadRaw::BigBedRemote(b) => entries_to_array(
+                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+            ),
+            BBIReadRaw::BigBedFileLike(b) => entries_to_array(
+                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+            ),
         }
     }
 
@@ -938,7 +902,7 @@ impl BigWigIntervalIterator {
             .next()
             .transpose()
             .map(|o| o.map(|v| (v.start, v.end, v.value)))
-            .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))
+            .convert_err()
     }
 }
 
@@ -957,7 +921,7 @@ impl BigBedEntriesIterator {
     fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<PyObject>> {
         let py = slf.py();
         let next = match slf.iter.next() {
-            Some(n) => n.map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?,
+            Some(n) => n.convert_err()?,
             None => return Ok(None),
         };
         let elements: Vec<_> = [next.start.to_object(py), next.end.to_object(py)]

From 51198d1e0d39d51284c6d11767e38de484d71162 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Fri, 22 Mar 2024 20:16:22 -0400
Subject: [PATCH 07/30] Accept i32 not u32

---
 pybigtools/src/lib.rs | 232 ++++++++++++++++++++++++------------------
 1 file changed, 132 insertions(+), 100 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 73a9924..2d583ff 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -65,6 +65,45 @@ fn start_end(
     return Ok((start.unwrap_or(0), end.unwrap_or(length)));
 }
 
+fn bigwig_start_end_length<R>(
+    bbi: &BigWigReadRaw<R>,
+    chrom_name: &str,
+    start: Option<i32>,
+    end: Option<i32>,
+) -> PyResult<(i32, i32, i32)> {
+    let chroms = bbi.chroms();
+    start_end_length_inner(chrom_name, chroms, start, end)
+}
+
+fn bigbed_start_end_length<R>(
+    bbi: &BigBedReadRaw<R>,
+    chrom_name: &str,
+    start: Option<i32>,
+    end: Option<i32>,
+) -> PyResult<(i32, i32, i32)> {
+    let chroms = bbi.chroms();
+    start_end_length_inner(chrom_name, chroms, start, end)
+}
+
+fn start_end_length_inner(
+    chrom_name: &str,
+    chroms: &[bigtools::ChromInfo],
+    start: Option<i32>,
+    end: Option<i32>,
+) -> PyResult<(i32, i32, i32)> {
+    let chrom = chroms.into_iter().find(|x| x.name == chrom_name);
+    let length = match chrom {
+        None => {
+            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                "No chromomsome with name `{}` found.",
+                chrom_name
+            )))
+        }
+        Some(c) => c.length as i32,
+    };
+    return Ok((start.unwrap_or(0), end.unwrap_or(length), length));
+}
+
 impl Reopen for PyFileLikeObject {
     fn reopen(&self) -> io::Result<Self> {
         Ok(self.clone())
@@ -116,8 +155,8 @@ impl<T, E: ToPyErr> ConvertResult<T> for Result<T, E> {
 fn intervals_to_array<R: BBIFileRead>(
     b: &mut BigWigReadRaw<R>,
     chrom: &str,
-    start: u32,
-    end: u32,
+    start: Option<i32>,
+    end: Option<i32>,
     bins: Option<usize>,
     summary: Summary,
     exact: bool,
@@ -125,6 +164,7 @@ fn intervals_to_array<R: BBIFileRead>(
     oob: f64,
     arr: Option<PyObject>,
 ) -> PyResult<PyObject> {
+    let (start, end, length) = bigwig_start_end_length(b, chrom, start, end)?;
     let zoom = if let (Some(bins), false) = (bins, exact) {
         let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
         let zoom = b
@@ -137,14 +177,15 @@ fn intervals_to_array<R: BBIFileRead>(
     } else {
         None
     };
+    let (intervals_start, intervals_end) = (start.max(0) as u32, end.min(length) as u32);
     match (bins, zoom) {
         (Some(bins), Some(zoom)) => {
             let iter = b
-                .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
+                .get_zoom_interval(&chrom, intervals_start, intervals_end, zoom.reduction_level)
                 .convert_err()?;
             Python::with_gil(|py| {
                 Ok(
-                    to_array_zoom(start as usize, end as usize, iter, summary, bins)
+                    to_array_zoom(start, end, iter, summary, bins)
                         .convert_err()?
                         .into_pyarray(py)
                         .to_object(py),
@@ -152,10 +193,10 @@ fn intervals_to_array<R: BBIFileRead>(
             })
         }
         (Some(bins), None) => {
-            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
             Python::with_gil(|py| {
                 Ok(
-                    to_array_bins(start as usize, end as usize, iter, summary, bins)
+                    to_array_bins(start, end, iter, summary, bins)
                         .convert_err()?
                         .into_pyarray(py)
                         .to_object(py),
@@ -163,9 +204,9 @@ fn intervals_to_array<R: BBIFileRead>(
             })
         }
         _ => {
-            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
             Python::with_gil(|py| {
-                Ok(to_array(start as usize, end as usize, iter)
+                Ok(to_array(start, end, iter)
                     .convert_err()?
                     .into_pyarray(py)
                     .to_object(py))
@@ -176,8 +217,8 @@ fn intervals_to_array<R: BBIFileRead>(
 fn entries_to_array<R: BBIFileRead>(
     b: &mut BigBedReadRaw<R>,
     chrom: &str,
-    start: u32,
-    end: u32,
+    start: Option<i32>,
+    end: Option<i32>,
     bins: Option<usize>,
     summary: Summary,
     exact: bool,
@@ -185,47 +226,39 @@ fn entries_to_array<R: BBIFileRead>(
     oob: f64,
     arr: Option<PyObject>,
 ) -> PyResult<PyObject> {
-    match bins {
-        Some(bins) if !exact => {
-            let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
-            let zoom = b
-                .info()
-                .zoom_headers
-                .iter()
-                .filter(|z| z.reduction_level <= max_zoom_size)
-                .min_by_key(|z| max_zoom_size - z.reduction_level);
-            match zoom {
-                Some(zoom) => {
-                    let iter = b
-                        .get_zoom_interval(&chrom, start, end, zoom.reduction_level)
-                        .convert_err()?;
-                    Python::with_gil(|py| {
-                        Ok(
-                            to_entry_array_zoom(start as usize, end as usize, iter, summary, bins)
-                                .convert_err()?
-                                .into_pyarray(py)
-                                .to_object(py),
-                        )
-                    })
-                }
-                None => {
-                    let iter = b.get_interval(&chrom, start, end).convert_err()?;
-                    Python::with_gil(|py| {
-                        Ok(
-                            to_array_entry_bins(start as usize, end as usize, iter, summary, bins)
-                                .convert_err()?
-                                .into_pyarray(py)
-                                .to_object(py),
-                        )
-                    })
-                }
-            }
+    let (start, end, length) = bigbed_start_end_length(b, chrom, start, end)?;
+    let zoom = if let (Some(bins), false) = (bins, exact) {
+        let max_zoom_size = ((end - start) as f32 / (bins * 2) as f32) as u32;
+        let zoom = b
+            .info()
+            .zoom_headers
+            .iter()
+            .filter(|z| z.reduction_level <= max_zoom_size)
+            .min_by_key(|z| max_zoom_size - z.reduction_level);
+        zoom
+    } else {
+        None
+    };
+    let (intervals_start, intervals_end) = (start.max(0) as u32, end.min(length) as u32);
+    match (bins, zoom) {
+        (Some(bins), Some(zoom)) => {
+            let iter = b
+                .get_zoom_interval(&chrom, intervals_start, intervals_end, zoom.reduction_level)
+                .convert_err()?;
+            Python::with_gil(|py| {
+                Ok(
+                    to_entry_array_zoom(start, end, iter, summary, bins)
+                        .convert_err()?
+                        .into_pyarray(py)
+                        .to_object(py),
+                )
+            })
         }
-        Some(bins) => {
-            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+        (Some(bins), None) => {
+            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
             Python::with_gil(|py| {
                 Ok(
-                    to_array_entry_bins(start as usize, end as usize, iter, summary, bins)
+                    to_entry_array_bins(start, end, iter, summary, bins)
                         .convert_err()?
                         .into_pyarray(py)
                         .to_object(py),
@@ -233,9 +266,9 @@ fn entries_to_array<R: BBIFileRead>(
             })
         }
         _ => {
-            let iter = b.get_interval(&chrom, start, end).convert_err()?;
+            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
             Python::with_gil(|py| {
-                Ok(to_entry_array(start as usize, end as usize, iter)
+                Ok(to_entry_array(start, end, iter)
                     .convert_err()?
                     .into_pyarray(py)
                     .to_object(py))
@@ -244,16 +277,16 @@ fn entries_to_array<R: BBIFileRead>(
     }
 }
 fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
-    start: usize,
-    end: usize,
+    start: i32,
+    end: i32,
     iter: I,
 ) -> Result<Array1<f64>, BBIReadError> {
     use numpy::ndarray::Array;
-    let mut v = vec![f64::NAN; end - start];
+    let mut v = vec![f64::NAN; (end - start) as usize];
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as usize) - start;
-        let interval_end = (interval.end as usize) - start;
+        let interval_start = ((interval.start as i32) - start) as usize;
+        let interval_end = ((interval.end as i32) - start) as usize;
         for i in v[interval_start..interval_end].iter_mut() {
             *i = interval.value as f64;
         }
@@ -261,8 +294,8 @@ fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
     Ok(Array::from(v))
 }
 fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
-    start: usize,
-    end: usize,
+    start: i32,
+    end: i32,
     iter: I,
     summary: Summary,
     bins: usize,
@@ -275,8 +308,8 @@ fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as usize) - start;
-        let interval_end = (interval.end as usize) - start;
+        let interval_start = (interval.start as i32) - start;
+        let interval_end = (interval.end as i32) - start;
         let bin_start = ((interval_start as f64) / bin_size) as usize;
         let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
         for bin in bin_start..bin_end {
@@ -312,8 +345,8 @@ fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     Ok(Array::from(v))
 }
 fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
-    start: usize,
-    end: usize,
+    start: i32,
+    end: i32,
     iter: I,
     summary: Summary,
     bins: usize,
@@ -326,8 +359,8 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as usize).max(start) - start;
-        let interval_end = (interval.end as usize).min(end) - start;
+        let interval_start = (interval.start as i32).max(start) - start;
+        let interval_end = (interval.end as i32).min(end) - start;
         let bin_start = ((interval_start as f64) / bin_size) as usize;
         let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
         for bin in bin_start..bin_end {
@@ -365,37 +398,37 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     Ok(Array::from(v))
 }
 fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
-    start: usize,
-    end: usize,
+    start: i32,
+    end: i32,
     iter: I,
 ) -> Result<Array1<f64>, BBIReadError> {
     use numpy::ndarray::Array;
-    let mut v = vec![0.0; end - start];
+    let mut v = vec![0.0; (end - start) as usize];
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as usize) - start;
-        let interval_end = (interval.end as usize) - start;
+        let interval_start = ((interval.start as i32) - start) as usize;
+        let interval_end = ((interval.end as i32) - start) as usize;
         for i in v[interval_start..interval_end].iter_mut() {
             *i += 1.0;
         }
     }
     Ok(Array::from(v))
 }
-fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
-    start: usize,
-    end: usize,
+fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+    start: i32,
+    end: i32,
     iter: I,
     summary: Summary,
     bins: usize,
 ) -> Result<Array1<f64>, BBIReadError> {
     use numpy::ndarray::Array;
-    let mut bin_data: VecDeque<(usize, usize, usize, Vec<f64>)> = VecDeque::new();
+    let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
     let mut v = vec![0.0; bins];
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as usize).max(start) - start;
-        let interval_end = (interval.end as usize).min(end) - start;
+        let interval_start = (interval.start as i32).max(start) - start;
+        let interval_end = (interval.end as i32).min(end) - start;
         let bin_start = ((interval_start as f64) / bin_size) as usize;
         let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
 
@@ -432,10 +465,10 @@ fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
             .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
             .unwrap_or(Some(bin_start))
         {
-            let bin_start = ((bin as f64) * bin_size).ceil() as usize;
-            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as usize;
+            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
 
-            bin_data.push_back((bin, bin_start, bin_end, vec![0.0; bin_end - bin_start]));
+            bin_data.push_back((bin, bin_start, bin_end, vec![0.0; (bin_end - bin_start) as usize]));
         }
         for bin in bin_data.iter() {
             assert!(
@@ -453,7 +486,7 @@ fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
-            for i in &mut data[(overlap_start - *bin_start)..(overlap_end - *bin_start)] {
+            for i in &mut data[((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)] {
                 *i = *i + 1.0;
             }
         }
@@ -485,20 +518,20 @@ fn to_array_entry_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
 }
 
 fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
-    start: usize,
-    end: usize,
+    start: i32,
+    end: i32,
     iter: I,
     summary: Summary,
     bins: usize,
 ) -> Result<Array1<f64>, BBIReadError> {
     use numpy::ndarray::Array;
-    let mut bin_data: VecDeque<(usize, usize, usize, Vec<f64>)> = VecDeque::new();
+    let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
     let mut v = vec![0.0; bins];
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as usize).max(start) - start;
-        let interval_end = (interval.end as usize).min(end) - start;
+        let interval_start = (interval.start as i32).max(start) - start;
+        let interval_end = (interval.end as i32).min(end) - start;
         let bin_start = ((interval_start as f64) / bin_size) as usize;
         let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
 
@@ -537,10 +570,10 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
             .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
             .unwrap_or(Some(bin_start))
         {
-            let bin_start = ((bin as f64) * bin_size).ceil() as usize;
-            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as usize;
+            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
 
-            bin_data.push_back((bin, bin_start, bin_end, vec![f64::NAN; bin_end - bin_start]));
+            bin_data.push_back((bin, bin_start, bin_end, vec![f64::NAN; (bin_end - bin_start) as usize]));
         }
         for bin in bin_data.iter() {
             assert!(
@@ -559,7 +592,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
             let overlap_end = (*bin_end).min(interval_end);
 
             let mean = interval.summary.sum / (interval.end - interval.start) as f64;
-            for i in &mut data[(overlap_start - *bin_start)..(overlap_end - *bin_start)] {
+            for i in &mut data[((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)] {
                 match summary {
                     Summary::Mean => *i += mean,
                     Summary::Min => *i = i.min(interval.summary.min_val),
@@ -798,8 +831,8 @@ impl BBIRead {
     fn values(
         &mut self,
         chrom: String,
-        start: Option<u32>,
-        end: Option<u32>,
+        start: Option<i32>,
+        end: Option<i32>,
         bins: Option<usize>,
         summary: String,
         exact: bool,
@@ -817,7 +850,6 @@ impl BBIRead {
                 )));
             }
         };
-        let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
         match &mut self.bbi {
             BBIReadRaw::BigWigFile(b) => intervals_to_array(
                 b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
@@ -1596,16 +1628,16 @@ fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
 mod test {
     use bigtools::BedEntry;
 
-    use crate::to_array_entry_bins;
+    use crate::to_entry_array_bins;
 
     #[test]
-    fn test_to_array_entry_bins() {
+    fn test_to_entry_array_bins() {
         let entries = [BedEntry {
             start: 10,
             end: 20,
             rest: "".to_string(),
         }];
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.into_iter().map(|v| Ok(v)),
@@ -1621,7 +1653,7 @@ mod test {
             end: 20,
             rest: "".to_string(),
         }];
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.into_iter().map(|v| Ok(v)),
@@ -1644,7 +1676,7 @@ mod test {
                 rest: "".to_string(),
             },
         ];
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
@@ -1654,7 +1686,7 @@ mod test {
         .unwrap();
         let res = res.to_vec();
         assert_eq!(res, [1.0, 2.0].into_iter().collect::<Vec<_>>());
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
@@ -1664,7 +1696,7 @@ mod test {
         .unwrap();
         let res = res.to_vec();
         assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
@@ -1692,7 +1724,7 @@ mod test {
                 rest: "".to_string(),
             },
         ];
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
@@ -1702,7 +1734,7 @@ mod test {
         .unwrap();
         let res = res.to_vec();
         assert_eq!(res, [1.0, 3.0].into_iter().collect::<Vec<_>>());
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
@@ -1712,7 +1744,7 @@ mod test {
         .unwrap();
         let res = res.to_vec();
         assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
-        let res = to_array_entry_bins(
+        let res = to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),

From 02bab3d1b8676712b6cb9d24887d0702f0447e0a Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Tue, 26 Mar 2024 02:11:32 -0400
Subject: [PATCH 08/30] Allow passing in an array

---
 pybigtools/src/lib.rs | 646 +++++++++++++++++++++++++++++++-----------
 1 file changed, 477 insertions(+), 169 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 2d583ff..32bf659 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -3,6 +3,7 @@
 use std::collections::VecDeque;
 use std::fs::File;
 use std::io::{self, BufReader};
+use std::ops::IndexMut;
 use std::path::Path;
 
 use bigtools::bed::autosql::parse::parse_autosql;
@@ -22,8 +23,8 @@ use bigtools::{
 
 use bigtools::utils::reopen::Reopen;
 use file_like::PyFileLikeObject;
-use numpy::ndarray::Array1;
-use numpy::IntoPyArray;
+use numpy::ndarray::ArrayViewMut;
+use numpy::PyArray1;
 use pyo3::exceptions::{self, PyTypeError};
 use pyo3::prelude::*;
 use pyo3::types::{IntoPyDict, PyAny, PyDict, PyFloat, PyInt, PyIterator, PyString, PyTuple};
@@ -153,6 +154,7 @@ impl<T, E: ToPyErr> ConvertResult<T> for Result<T, E> {
 }
 
 fn intervals_to_array<R: BBIFileRead>(
+    py: Python<'_>,
     b: &mut BigWigReadRaw<R>,
     chrom: &str,
     start: Option<i32>,
@@ -178,43 +180,129 @@ fn intervals_to_array<R: BBIFileRead>(
         None
     };
     let (intervals_start, intervals_end) = (start.max(0) as u32, end.min(length) as u32);
-    match (bins, zoom) {
-        (Some(bins), Some(zoom)) => {
-            let iter = b
-                .get_zoom_interval(&chrom, intervals_start, intervals_end, zoom.reduction_level)
-                .convert_err()?;
-            Python::with_gil(|py| {
-                Ok(
-                    to_array_zoom(start, end, iter, summary, bins)
-                        .convert_err()?
-                        .into_pyarray(py)
-                        .to_object(py),
+    match bins {
+        Some(bins) => {
+            let arr = match arr {
+                Some(v) => v,
+                None => PyArray1::from_vec(py, vec![missing; bins]).to_object(py),
+            };
+            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
+                PyErr::new::<exceptions::PyException, _>(
+                    "`arr` option must be a one-dimensional numpy array, if passed.",
                 )
-            })
-        }
-        (Some(bins), None) => {
-            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
-            Python::with_gil(|py| {
-                Ok(
-                    to_array_bins(start, end, iter, summary, bins)
-                        .convert_err()?
-                        .into_pyarray(py)
-                        .to_object(py),
-                )
-            })
+            })?;
+            {
+                let mut array = v.readwrite();
+
+                match zoom {
+                    Some(zoom) => {
+                        let iter = b
+                            .get_zoom_interval(
+                                &chrom,
+                                intervals_start,
+                                intervals_end,
+                                zoom.reduction_level,
+                            )
+                            .convert_err()?;
+                        to_array_zoom(
+                            start,
+                            end,
+                            iter,
+                            summary,
+                            bins,
+                            missing,
+                            array.as_array_mut(),
+                        )
+                        .convert_err()?;
+                    }
+                    None => {
+                        let iter = b
+                            .get_interval(&chrom, intervals_start, intervals_end)
+                            .convert_err()?;
+                        to_array_bins(
+                            start,
+                            end,
+                            iter,
+                            summary,
+                            bins,
+                            missing,
+                            array.as_array_mut(),
+                        )
+                        .convert_err()?;
+                    }
+                };
+
+                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+
+                let bin_size = (end - start) as f64 / bins as f64;
+                if start < 0 {
+                    let bin_start = 0;
+                    let interval_end = 0 - start;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+                if end >= length {
+                    let interval_start = (length as i32) - start;
+                    let interval_end = end - start;
+                    let bin_start = ((interval_start as f64) / bin_size) as usize;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    assert_eq!(bin_end, array.len() - 1);
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+            }
+            Ok(arr)
         }
         _ => {
-            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
-            Python::with_gil(|py| {
-                Ok(to_array(start, end, iter)
-                    .convert_err()?
-                    .into_pyarray(py)
-                    .to_object(py))
-            })
+            let arr = match arr {
+                Some(v) => v,
+                None => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
+            };
+            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
+                PyErr::new::<exceptions::PyException, _>(
+                    "`arr` option must be a one-dimensional numpy array, if passed.",
+                )
+            })?;
+
+            {
+                let mut array = v.readwrite();
+
+                let iter = b
+                    .get_interval(&chrom, intervals_start, intervals_end)
+                    .convert_err()?;
+                to_array(start, end, iter, array.as_array_mut()).convert_err()?;
+
+                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+
+                let bin_size = (end - start) as f64;
+                if start < 0 {
+                    let bin_start = 0;
+                    let interval_end = 0 - start;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+                if end >= length {
+                    let interval_start = (length as i32) - start;
+                    let interval_end = end - start;
+                    let bin_start = ((interval_start as f64) / bin_size) as usize;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    assert_eq!(bin_end, array.len() - 1);
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+            }
+            Ok(arr)
         }
     }
 }
 fn entries_to_array<R: BBIFileRead>(
+    py: Python<'_>,
     b: &mut BigBedReadRaw<R>,
     chrom: &str,
     start: Option<i32>,
@@ -240,39 +328,108 @@ fn entries_to_array<R: BBIFileRead>(
         None
     };
     let (intervals_start, intervals_end) = (start.max(0) as u32, end.min(length) as u32);
-    match (bins, zoom) {
-        (Some(bins), Some(zoom)) => {
-            let iter = b
-                .get_zoom_interval(&chrom, intervals_start, intervals_end, zoom.reduction_level)
-                .convert_err()?;
-            Python::with_gil(|py| {
-                Ok(
-                    to_entry_array_zoom(start, end, iter, summary, bins)
-                        .convert_err()?
-                        .into_pyarray(py)
-                        .to_object(py),
+    match bins {
+        Some(bins) => {
+            let arr = match arr {
+                Some(v) => v,
+                None => PyArray1::from_vec(py, vec![missing; bins]).to_object(py),
+            };
+            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
+                PyErr::new::<exceptions::PyException, _>(
+                    "`arr` option must be a one-dimensional numpy array, if passed.",
                 )
-            })
-        }
-        (Some(bins), None) => {
-            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
-            Python::with_gil(|py| {
-                Ok(
-                    to_entry_array_bins(start, end, iter, summary, bins)
-                        .convert_err()?
-                        .into_pyarray(py)
-                        .to_object(py),
-                )
-            })
+            })?;
+            {
+                let mut array = v.readwrite();
+
+                match zoom {
+                    Some(zoom) => {
+                        let iter = b
+                            .get_zoom_interval(
+                                &chrom,
+                                intervals_start,
+                                intervals_end,
+                                zoom.reduction_level,
+                            )
+                            .convert_err()?;
+                        to_entry_array_zoom(start, end, iter, summary, bins, array.as_array_mut())
+                            .convert_err()?;
+                    }
+                    None => {
+                        let iter = b
+                            .get_interval(&chrom, intervals_start, intervals_end)
+                            .convert_err()?;
+                        to_entry_array_bins(start, end, iter, summary, bins, array.as_array_mut())
+                            .convert_err()?;
+                    }
+                };
+
+                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+
+                let bin_size = (end - start) as f64 / bins as f64;
+                if start < 0 {
+                    let bin_start = 0;
+                    let interval_end = 0 - start;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+                if end >= length {
+                    let interval_start = (length as i32) - start;
+                    let interval_end = end - start;
+                    let bin_start = ((interval_start as f64) / bin_size) as usize;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    assert_eq!(bin_end, array.len() - 1);
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+            }
+            Ok(arr)
         }
         _ => {
-            let iter = b.get_interval(&chrom, intervals_start, intervals_end).convert_err()?;
-            Python::with_gil(|py| {
-                Ok(to_entry_array(start, end, iter)
-                    .convert_err()?
-                    .into_pyarray(py)
-                    .to_object(py))
-            })
+            let arr = match arr {
+                Some(v) => v,
+                None => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
+            };
+            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
+                PyErr::new::<exceptions::PyException, _>(
+                    "`arr` option must be a one-dimensional numpy array, if passed.",
+                )
+            })?;
+
+            {
+                let mut array = v.readwrite();
+
+                let iter = b
+                    .get_interval(&chrom, intervals_start, intervals_end)
+                    .convert_err()?;
+                to_entry_array(start, end, iter, array.as_array_mut()).convert_err()?;
+
+                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+
+                let bin_size = (end - start) as f64;
+                if start < 0 {
+                    let bin_start = 0;
+                    let interval_end = 0 - start;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+                if end >= length {
+                    let interval_start = (length as i32) - start;
+                    let interval_end = end - start;
+                    let bin_start = ((interval_start as f64) / bin_size) as usize;
+                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+                    assert_eq!(bin_end, array.len() - 1);
+                    for i in bin_start..bin_end {
+                        array[i] = oob;
+                    }
+                }
+            }
+            Ok(arr)
         }
     }
 }
@@ -280,18 +437,18 @@ fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
-) -> Result<Array1<f64>, BBIReadError> {
-    use numpy::ndarray::Array;
-    let mut v = vec![f64::NAN; (end - start) as usize];
+    mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
+) -> Result<(), BBIReadError> {
+    assert_eq!(v.len(), (end - start) as usize);
     for interval in iter {
         let interval = interval?;
         let interval_start = ((interval.start as i32) - start) as usize;
         let interval_end = ((interval.end as i32) - start) as usize;
-        for i in v[interval_start..interval_end].iter_mut() {
-            *i = interval.value as f64;
+        for i in interval_start..interval_end {
+            *v.index_mut(i) += interval.value as f64;
         }
     }
-    Ok(Array::from(v))
+    Ok(())
 }
 fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     start: i32,
@@ -299,50 +456,104 @@ fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     iter: I,
     summary: Summary,
     bins: usize,
-) -> Result<Array1<f64>, BBIReadError> {
-    use numpy::ndarray::Array;
-    let mut v = match summary {
-        Summary::Min | Summary::Max => vec![f64::NAN; bins],
-        Summary::Mean => vec![0.0; bins],
-    };
+    missing: f64,
+    mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
+) -> Result<(), BBIReadError> {
+    assert_eq!(v.len(), bins);
+
+    let mut bin_data: VecDeque<(usize, i32, i32, Option<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
-        let interval_start = (interval.start as i32) - start;
-        let interval_end = (interval.end as i32) - start;
+        let interval_start = (interval.start as i32).max(start) - start;
+        let interval_end = (interval.end as i32).min(end) - start;
         let bin_start = ((interval_start as f64) / bin_size) as usize;
         let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+
+        while let Some(front) = bin_data.front_mut() {
+            if front.0 < bin_start {
+                let front = bin_data.pop_front().unwrap();
+                let bin = front.0;
+
+                match summary {
+                    Summary::Min => {
+                        v[bin] = front.3.unwrap_or(missing);
+                    }
+                    Summary::Max => {
+                        v[bin] = front.3.unwrap_or(missing);
+                    }
+                    Summary::Mean => {
+                        v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+                    }
+                }
+            } else {
+                break;
+            }
+        }
+        while let Some(bin) = bin_data
+            .back_mut()
+            .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
+            .unwrap_or(Some(bin_start))
+        {
+            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
+
+            bin_data.push_back((bin, bin_start, bin_end, None));
+        }
+        for bin in bin_data.iter() {
+            assert!(
+                (bin_start..bin_end).contains(&bin.0),
+                "{} not in {}..{}",
+                bin.0,
+                bin_start,
+                bin_end
+            );
+        }
         for bin in bin_start..bin_end {
-            let bin_start = (bin as f64) * bin_size;
-            let bin_end = (bin as f64 + 1.0) * bin_size;
-            let interval_start = (interval_start as f64).max(bin_start);
-            let interval_end = (interval_end as f64).min(bin_end);
-            // Possible overlaps
-            // bin  |-----|    |------|   |-----|
-            // value   |----|   |----|  |----|
-            // overlap ----     ------    ----
+            assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
+        }
+        for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+            let v = data.get_or_insert_with(|| {
+                match summary {
+                    // min & max are defined for NAN and we are about to set it
+                    // can't use 0.0 because it may be either below or above the real value
+                    Summary::Min | Summary::Max => f64::NAN,
+                    // addition is not defined for NAN
+                    Summary::Mean => 0.0,
+                }
+            });
             match summary {
                 Summary::Min => {
-                    v[bin] = v[bin].min(interval.value as f64);
+                    *v = v.min(interval.value as f64);
                 }
                 Summary::Max => {
-                    v[bin] = v[bin].max(interval.value as f64);
+                    *v = v.max(interval.value as f64);
                 }
                 Summary::Mean => {
-                    let overlap_size = interval_end - interval_start;
-                    v[bin] += (overlap_size as f64) * interval.value as f64;
+                    let overlap_start = (*bin_start).max(interval_start);
+                    let overlap_end = (*bin_end).min(interval_end);
+                    let overlap_size: i32 = overlap_end - overlap_start;
+                    *v += (overlap_size as f64) * interval.value as f64;
                 }
             }
         }
     }
-    if let Summary::Mean = summary {
-        let last = v.last().copied().expect("Should be at least one bin.");
-        let last_size = (end - start) as f64 - (bins as f64 - 1.0) * bin_size;
-        v = v.into_iter().map(|v| v / bin_size as f64).collect();
-        // The last bin could be smaller
-        *v.last_mut().expect("Should be at least one bin.") = last / last_size;
+    while let Some(front) = bin_data.pop_front() {
+        let bin = front.0;
+
+        match summary {
+            Summary::Min => {
+                v[bin] = front.3.unwrap_or(missing);
+            }
+            Summary::Max => {
+                v[bin] = front.3.unwrap_or(missing);
+            }
+            Summary::Mean => {
+                v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+            }
+        }
     }
-    Ok(Array::from(v))
+    Ok(())
 }
 fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     start: i32,
@@ -350,12 +561,12 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     iter: I,
     summary: Summary,
     bins: usize,
-) -> Result<Array1<f64>, BBIReadError> {
-    use numpy::ndarray::Array;
-    let mut v = match summary {
-        Summary::Min | Summary::Max => vec![f64::NAN; bins],
-        Summary::Mean => vec![0.0; bins],
-    };
+    missing: f64,
+    mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
+) -> Result<(), BBIReadError> {
+    assert_eq!(v.len(), bins);
+
+    let mut bin_data: VecDeque<(usize, i32, i32, Option<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -363,56 +574,119 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
         let interval_end = (interval.end as i32).min(end) - start;
         let bin_start = ((interval_start as f64) / bin_size) as usize;
         let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-        for bin in bin_start..bin_end {
-            let bin_start = (bin as f64) * bin_size;
-            let bin_end = (bin as f64 + 1.0) * bin_size;
-            let interval_start = (interval_start as f64).max(bin_start);
-            let interval_end = (interval_end as f64).min(bin_end);
-            // Possible overlaps
-            // bin  |-----|    |------|   |-----|
-            // value   |----|   |----|  |----|
-            // overlap ----     ------    ----
-            match summary {
-                Summary::Min => {
-                    v[bin] = v[bin].min(interval.summary.min_val as f64);
-                }
-                Summary::Max => {
-                    v[bin] = v[bin].max(interval.summary.max_val as f64);
-                }
-                Summary::Mean => {
-                    let overlap_size = interval_end - interval_start;
-                    let zoom_mean =
-                        (interval.summary.sum as f64) / (interval.summary.bases_covered as f64);
-                    v[bin] += (overlap_size as f64) * zoom_mean;
+
+        while let Some(front) = bin_data.front_mut() {
+            if front.0 < bin_start {
+                let front = bin_data.pop_front().unwrap();
+                let bin = front.0;
+
+                match summary {
+                    Summary::Min => {
+                        v[bin] = front.3.unwrap_or(missing);
+                    }
+                    Summary::Max => {
+                        v[bin] = front.3.unwrap_or(missing);
+                    }
+                    Summary::Mean => {
+                        v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+                    }
                 }
+            } else {
+                break;
+            }
+        }
+        while let Some(bin) = bin_data
+            .back_mut()
+            .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
+            .unwrap_or(Some(bin_start))
+        {
+            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
+
+            bin_data.push_back((bin, bin_start, bin_end, None));
+        }
+        for bin in bin_data.iter() {
+            assert!(
+                (bin_start..bin_end).contains(&bin.0),
+                "{} not in {}..{}",
+                bin.0,
+                bin_start,
+                bin_end
+            );
+        }
+        for bin in bin_start..bin_end {
+            assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
+        }
+        for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+            match data.as_mut() {
+                Some(v) => match summary {
+                    Summary::Min => {
+                        *v = v.min(interval.summary.min_val as f64);
+                    }
+                    Summary::Max => {
+                        *v = v.max(interval.summary.max_val as f64);
+                    }
+                    Summary::Mean => {
+                        let overlap_start = (*bin_start).max(interval_start);
+                        let overlap_end = (*bin_end).min(interval_end);
+                        let overlap_size = overlap_end - overlap_start;
+                        let zoom_mean =
+                            (interval.summary.sum as f64) / (interval.summary.bases_covered as f64);
+                        *v += (overlap_size as f64) * zoom_mean;
+                    }
+                },
+                None => match summary {
+                    Summary::Min => {
+                        *data = Some(interval.summary.min_val as f64);
+                    }
+                    Summary::Max => {
+                        *data = Some(interval.summary.max_val as f64);
+                    }
+                    Summary::Mean => {
+                        let overlap_start = (*bin_start).max(interval_start);
+                        let overlap_end = (*bin_end).min(interval_end);
+                        let overlap_size = overlap_end - overlap_start;
+                        let zoom_mean =
+                            (interval.summary.sum as f64) / (interval.summary.bases_covered as f64);
+                        *data = Some((overlap_size as f64) * zoom_mean);
+                    }
+                },
             }
         }
     }
-    if let Summary::Mean = summary {
-        let last = v.last().copied().expect("Should be at least one bin.");
-        let last_size = (end - start) as f64 - (bins as f64 - 1.0) * bin_size;
-        v = v.into_iter().map(|v| v / bin_size as f64).collect();
-        // The last bin could be smaller
-        *v.last_mut().expect("Should be at least one bin.") = last / last_size;
+    while let Some(front) = bin_data.pop_front() {
+        let bin = front.0;
+
+        match summary {
+            Summary::Min => {
+                v[bin] = front.3.unwrap_or(missing);
+            }
+            Summary::Max => {
+                v[bin] = front.3.unwrap_or(missing);
+            }
+            Summary::Mean => {
+                v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+            }
+        }
     }
-    Ok(Array::from(v))
+    Ok(())
 }
 fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
-) -> Result<Array1<f64>, BBIReadError> {
-    use numpy::ndarray::Array;
-    let mut v = vec![0.0; (end - start) as usize];
+    mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
+) -> Result<(), BBIReadError> {
+    assert_eq!(v.len(), (end - start) as usize);
     for interval in iter {
         let interval = interval?;
         let interval_start = ((interval.start as i32) - start) as usize;
         let interval_end = ((interval.end as i32) - start) as usize;
-        for i in v[interval_start..interval_end].iter_mut() {
-            *i += 1.0;
+        for i in interval_start..interval_end {
+            *v.index_mut(i) += 1.0;
         }
     }
-    Ok(Array::from(v))
+    Ok(())
 }
 fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     start: i32,
@@ -420,10 +694,11 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     iter: I,
     summary: Summary,
     bins: usize,
-) -> Result<Array1<f64>, BBIReadError> {
-    use numpy::ndarray::Array;
+    mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
+) -> Result<(), BBIReadError> {
+    assert_eq!(v.len(), bins);
+
     let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
-    let mut v = vec![0.0; bins];
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -468,7 +743,12 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
             let bin_start = ((bin as f64) * bin_size).ceil() as i32;
             let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
 
-            bin_data.push_back((bin, bin_start, bin_end, vec![0.0; (bin_end - bin_start) as usize]));
+            bin_data.push_back((
+                bin,
+                bin_start,
+                bin_end,
+                vec![0.0; (bin_end - bin_start) as usize],
+            ));
         }
         for bin in bin_data.iter() {
             assert!(
@@ -486,7 +766,9 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
-            for i in &mut data[((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)] {
+            for i in &mut data
+                [((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)]
+            {
                 *i = *i + 1.0;
             }
         }
@@ -514,7 +796,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
             }
         }
     }
-    Ok(Array::from(v))
+    Ok(())
 }
 
 fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
@@ -523,10 +805,11 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     iter: I,
     summary: Summary,
     bins: usize,
-) -> Result<Array1<f64>, BBIReadError> {
-    use numpy::ndarray::Array;
+    mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
+) -> Result<(), BBIReadError> {
+    assert_eq!(v.len(), bins);
+
     let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
-    let mut v = vec![0.0; bins];
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -573,7 +856,12 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
             let bin_start = ((bin as f64) * bin_size).ceil() as i32;
             let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
 
-            bin_data.push_back((bin, bin_start, bin_end, vec![f64::NAN; (bin_end - bin_start) as usize]));
+            bin_data.push_back((
+                bin,
+                bin_start,
+                bin_end,
+                vec![f64::NAN; (bin_end - bin_start) as usize],
+            ));
         }
         for bin in bin_data.iter() {
             assert!(
@@ -592,7 +880,9 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
             let overlap_end = (*bin_end).min(interval_end);
 
             let mean = interval.summary.sum / (interval.end - interval.start) as f64;
-            for i in &mut data[((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)] {
+            for i in &mut data
+                [((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)]
+            {
                 match summary {
                     Summary::Mean => *i += mean,
                     Summary::Min => *i = i.min(interval.summary.min_val),
@@ -627,7 +917,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
             }
         }
     }
-    Ok(Array::from(v))
+    Ok(())
 }
 
 /// This class is the interface for reading a bigWig or bigBed.
@@ -830,6 +1120,7 @@ impl BBIRead {
     #[pyo3(signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None))]
     fn values(
         &mut self,
+        py: Python<'_>,
         chrom: String,
         start: Option<i32>,
         end: Option<i32>,
@@ -852,24 +1143,24 @@ impl BBIRead {
         };
         match &mut self.bbi {
             BBIReadRaw::BigWigFile(b) => intervals_to_array(
-                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+                py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
             #[cfg(feature = "remote")]
             BBIReadRaw::BigWigRemote(b) => intervals_to_array(
-                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+                py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
             BBIReadRaw::BigWigFileLike(b) => intervals_to_array(
-                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+                py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
             BBIReadRaw::BigBedFile(b) => entries_to_array(
-                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+                py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
             #[cfg(feature = "remote")]
             BBIReadRaw::BigBedRemote(b) => entries_to_array(
-                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+                py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
             BBIReadRaw::BigBedFileLike(b) => entries_to_array(
-                b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
+                py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
         }
     }
@@ -1627,6 +1918,7 @@ fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
 #[cfg(test)]
 mod test {
     use bigtools::BedEntry;
+    use numpy::ndarray::Array;
 
     use crate::to_entry_array_bins;
 
@@ -1637,15 +1929,17 @@ mod test {
             end: 20,
             rest: "".to_string(),
         }];
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             1,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
 
         let entries = [BedEntry {
@@ -1653,15 +1947,17 @@ mod test {
             end: 20,
             rest: "".to_string(),
         }];
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN, f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             2,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [1.0, 1.0].into_iter().collect::<Vec<_>>());
 
         let entries = [
@@ -1676,35 +1972,41 @@ mod test {
                 rest: "".to_string(),
             },
         ];
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN, f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             2,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [1.0, 2.0].into_iter().collect::<Vec<_>>());
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Min,
             1,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Max,
             1,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [2.0].into_iter().collect::<Vec<_>>());
 
         let entries = [
@@ -1724,35 +2026,41 @@ mod test {
                 rest: "".to_string(),
             },
         ];
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN, f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             2,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [1.0, 3.0].into_iter().collect::<Vec<_>>());
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Min,
             1,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
-        let res = to_entry_array_bins(
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
             10,
             20,
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Max,
             1,
+            arr.view_mut(),
         )
         .unwrap();
-        let res = res.to_vec();
+        let res = arr.to_vec();
         assert_eq!(res, [3.0].into_iter().collect::<Vec<_>>());
     }
 }

From 1708ab15b026c1bb41e7f56a9006c2bea88bc6bc Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Wed, 27 Mar 2024 17:42:46 -0400
Subject: [PATCH 09/30] Cleanup and some fixes

---
 pybigtools/src/lib.rs | 491 +++++++++++++++++++++++-------------------
 1 file changed, 271 insertions(+), 220 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 32bf659..88cbb8a 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -179,127 +179,114 @@ fn intervals_to_array<R: BBIFileRead>(
     } else {
         None
     };
+    let arr = match (bins, arr) {
+        (_, Some(arr)) => arr,
+        (Some(bins), None) => PyArray1::from_vec(py, vec![missing; bins]).to_object(py),
+        (None, None) => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
+    };
+    let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
+        PyErr::new::<exceptions::PyException, _>(
+            "`arr` option must be a one-dimensional numpy array, if passed.",
+        )
+    })?;
     let (intervals_start, intervals_end) = (start.max(0) as u32, end.min(length) as u32);
-    match bins {
+    let bin_size = match bins {
         Some(bins) => {
-            let arr = match arr {
-                Some(v) => v,
-                None => PyArray1::from_vec(py, vec![missing; bins]).to_object(py),
-            };
-            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
-                PyErr::new::<exceptions::PyException, _>(
-                    "`arr` option must be a one-dimensional numpy array, if passed.",
-                )
-            })?;
-            {
-                let mut array = v.readwrite();
-
-                match zoom {
-                    Some(zoom) => {
-                        let iter = b
-                            .get_zoom_interval(
-                                &chrom,
-                                intervals_start,
-                                intervals_end,
-                                zoom.reduction_level,
-                            )
-                            .convert_err()?;
-                        to_array_zoom(
-                            start,
-                            end,
-                            iter,
-                            summary,
-                            bins,
-                            missing,
-                            array.as_array_mut(),
-                        )
-                        .convert_err()?;
-                    }
-                    None => {
-                        let iter = b
-                            .get_interval(&chrom, intervals_start, intervals_end)
-                            .convert_err()?;
-                        to_array_bins(
-                            start,
-                            end,
-                            iter,
-                            summary,
-                            bins,
-                            missing,
-                            array.as_array_mut(),
-                        )
-                        .convert_err()?;
-                    }
-                };
+            if v.len() != bins {
+                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                    bins,
+                    v.len(),
+                )));
+            }
 
-                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+            let mut array = v.readwrite();
 
-                let bin_size = (end - start) as f64 / bins as f64;
-                if start < 0 {
-                    let bin_start = 0;
-                    let interval_end = 0 - start;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
+            match zoom {
+                Some(zoom) => {
+                    let iter = b
+                        .get_zoom_interval(
+                            &chrom,
+                            intervals_start,
+                            intervals_end,
+                            zoom.reduction_level,
+                        )
+                        .convert_err()?;
+                    to_array_zoom(
+                        start,
+                        end,
+                        iter,
+                        summary,
+                        bins,
+                        missing,
+                        array.as_array_mut(),
+                    )
+                    .convert_err()?;
                 }
-                if end >= length {
-                    let interval_start = (length as i32) - start;
-                    let interval_end = end - start;
-                    let bin_start = ((interval_start as f64) / bin_size) as usize;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    assert_eq!(bin_end, array.len() - 1);
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
+                None => {
+                    let iter = b
+                        .get_interval(&chrom, intervals_start, intervals_end)
+                        .convert_err()?;
+                    to_array_bins(
+                        start,
+                        end,
+                        iter,
+                        summary,
+                        bins,
+                        missing,
+                        array.as_array_mut(),
+                    )
+                    .convert_err()?;
                 }
-            }
-            Ok(arr)
+            };
+
+            (end - start) as f64 / bins as f64
         }
         _ => {
-            let arr = match arr {
-                Some(v) => v,
-                None => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
-            };
-            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
-                PyErr::new::<exceptions::PyException, _>(
-                    "`arr` option must be a one-dimensional numpy array, if passed.",
-                )
-            })?;
+            if v.len() != (end - start) as usize {
+                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                    (end - start) as usize,
+                    v.len(),
+                )));
+            }
 
-            {
-                let mut array = v.readwrite();
+            let mut array = v.readwrite();
 
-                let iter = b
-                    .get_interval(&chrom, intervals_start, intervals_end)
-                    .convert_err()?;
-                to_array(start, end, iter, array.as_array_mut()).convert_err()?;
+            let iter = b
+                .get_interval(&chrom, intervals_start, intervals_end)
+                .convert_err()?;
+            to_array(start, end, iter, missing, array.as_array_mut()).convert_err()?;
 
-                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+            (end - start) as f64
+        }
+    };
 
-                let bin_size = (end - start) as f64;
-                if start < 0 {
-                    let bin_start = 0;
-                    let interval_end = 0 - start;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
-                }
-                if end >= length {
-                    let interval_start = (length as i32) - start;
-                    let interval_end = end - start;
-                    let bin_start = ((interval_start as f64) / bin_size) as usize;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    assert_eq!(bin_end, array.len() - 1);
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
-                }
+    {
+        let mut array = v.readwrite();
+        let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+
+        if start < 0 {
+            let bin_start = 0;
+            let interval_end = 0 - start;
+            let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+            for i in bin_start..bin_end {
+                array[i] = oob;
+            }
+        }
+        if end >= length {
+            let interval_start = (length as i32) - start;
+            let interval_end = end - start;
+            let bin_start = ((interval_start as f64) / bin_size) as usize;
+            let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+            assert_eq!(bin_end, array.len() - 1);
+            for i in bin_start..bin_end {
+                array[i] = oob;
             }
-            Ok(arr)
         }
     }
+
+    Ok(arr)
 }
 fn entries_to_array<R: BBIFileRead>(
     py: Python<'_>,
@@ -327,119 +314,125 @@ fn entries_to_array<R: BBIFileRead>(
     } else {
         None
     };
+    let arr = match (bins, arr) {
+        (_, Some(arr)) => arr,
+        (Some(bins), None) => PyArray1::from_vec(py, vec![missing; bins]).to_object(py),
+        (None, None) => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
+    };
+    let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
+        PyErr::new::<exceptions::PyException, _>(
+            "`arr` option must be a one-dimensional numpy array, if passed.",
+        )
+    })?;
     let (intervals_start, intervals_end) = (start.max(0) as u32, end.min(length) as u32);
-    match bins {
+    let bin_size = match bins {
         Some(bins) => {
-            let arr = match arr {
-                Some(v) => v,
-                None => PyArray1::from_vec(py, vec![missing; bins]).to_object(py),
-            };
-            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
-                PyErr::new::<exceptions::PyException, _>(
-                    "`arr` option must be a one-dimensional numpy array, if passed.",
-                )
-            })?;
-            {
-                let mut array = v.readwrite();
-
-                match zoom {
-                    Some(zoom) => {
-                        let iter = b
-                            .get_zoom_interval(
-                                &chrom,
-                                intervals_start,
-                                intervals_end,
-                                zoom.reduction_level,
-                            )
-                            .convert_err()?;
-                        to_entry_array_zoom(start, end, iter, summary, bins, array.as_array_mut())
-                            .convert_err()?;
-                    }
-                    None => {
-                        let iter = b
-                            .get_interval(&chrom, intervals_start, intervals_end)
-                            .convert_err()?;
-                        to_entry_array_bins(start, end, iter, summary, bins, array.as_array_mut())
-                            .convert_err()?;
-                    }
-                };
+            if v.len() != bins {
+                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                    bins,
+                    v.len(),
+                )));
+            }
 
-                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+            let mut array = v.readwrite();
 
-                let bin_size = (end - start) as f64 / bins as f64;
-                if start < 0 {
-                    let bin_start = 0;
-                    let interval_end = 0 - start;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
+            match zoom {
+                Some(zoom) => {
+                    let iter = b
+                        .get_zoom_interval(
+                            &chrom,
+                            intervals_start,
+                            intervals_end,
+                            zoom.reduction_level,
+                        )
+                        .convert_err()?;
+                    to_entry_array_zoom(
+                        start,
+                        end,
+                        iter,
+                        summary,
+                        bins,
+                        missing,
+                        array.as_array_mut(),
+                    )
+                    .convert_err()?;
                 }
-                if end >= length {
-                    let interval_start = (length as i32) - start;
-                    let interval_end = end - start;
-                    let bin_start = ((interval_start as f64) / bin_size) as usize;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    assert_eq!(bin_end, array.len() - 1);
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
+                None => {
+                    let iter = b
+                        .get_interval(&chrom, intervals_start, intervals_end)
+                        .convert_err()?;
+                    to_entry_array_bins(
+                        start,
+                        end,
+                        iter,
+                        summary,
+                        bins,
+                        missing,
+                        array.as_array_mut(),
+                    )
+                    .convert_err()?;
                 }
-            }
-            Ok(arr)
+            };
+
+            (end - start) as f64 / bins as f64
         }
         _ => {
-            let arr = match arr {
-                Some(v) => v,
-                None => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
-            };
-            let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
-                PyErr::new::<exceptions::PyException, _>(
-                    "`arr` option must be a one-dimensional numpy array, if passed.",
-                )
-            })?;
+            if v.len() != (end - start) as usize {
+                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                    (end - start) as usize,
+                    v.len(),
+                )));
+            }
 
-            {
-                let mut array = v.readwrite();
+            let mut array = v.readwrite();
 
-                let iter = b
-                    .get_interval(&chrom, intervals_start, intervals_end)
-                    .convert_err()?;
-                to_entry_array(start, end, iter, array.as_array_mut()).convert_err()?;
+            let iter = b
+                .get_interval(&chrom, intervals_start, intervals_end)
+                .convert_err()?;
+            to_entry_array(start, end, iter, missing, array.as_array_mut()).convert_err()?;
 
-                let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+            (end - start) as f64
+        }
+    };
 
-                let bin_size = (end - start) as f64;
-                if start < 0 {
-                    let bin_start = 0;
-                    let interval_end = 0 - start;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
-                }
-                if end >= length {
-                    let interval_start = (length as i32) - start;
-                    let interval_end = end - start;
-                    let bin_start = ((interval_start as f64) / bin_size) as usize;
-                    let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
-                    assert_eq!(bin_end, array.len() - 1);
-                    for i in bin_start..bin_end {
-                        array[i] = oob;
-                    }
-                }
+    {
+        let mut array = v.readwrite();
+        let mut array: ArrayViewMut<'_, f64, numpy::Ix1> = array.as_array_mut();
+
+        if start < 0 {
+            let bin_start = 0;
+            let interval_end = 0 - start;
+            let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+            for i in bin_start..bin_end {
+                array[i] = oob;
+            }
+        }
+        if end >= length {
+            let interval_start = (length as i32) - start;
+            let interval_end = end - start;
+            let bin_start = ((interval_start as f64) / bin_size) as usize;
+            let bin_end = ((interval_end as f64) / bin_size).ceil() as usize;
+            assert_eq!(bin_end, array.len() - 1);
+            for i in bin_start..bin_end {
+                array[i] = oob;
             }
-            Ok(arr)
         }
     }
+
+    Ok(arr)
 }
+
 fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
+    missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
 ) -> Result<(), BBIReadError> {
     assert_eq!(v.len(), (end - start) as usize);
+    v.fill(missing);
     for interval in iter {
         let interval = interval?;
         let interval_start = ((interval.start as i32) - start) as usize;
@@ -460,6 +453,7 @@ fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
 ) -> Result<(), BBIReadError> {
     assert_eq!(v.len(), bins);
+    v.fill(missing);
 
     let mut bin_data: VecDeque<(usize, i32, i32, Option<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
@@ -565,6 +559,7 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
 ) -> Result<(), BBIReadError> {
     assert_eq!(v.len(), bins);
+    v.fill(missing);
 
     let mut bin_data: VecDeque<(usize, i32, i32, Option<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
@@ -675,9 +670,11 @@ fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
+    missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
 ) -> Result<(), BBIReadError> {
     assert_eq!(v.len(), (end - start) as usize);
+    v.fill(missing);
     for interval in iter {
         let interval = interval?;
         let interval_start = ((interval.start as i32) - start) as usize;
@@ -694,9 +691,11 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     iter: I,
     summary: Summary,
     bins: usize,
+    missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
 ) -> Result<(), BBIReadError> {
     assert_eq!(v.len(), bins);
+    v.fill(missing);
 
     let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
@@ -718,14 +717,14 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
                             .3
                             .into_iter()
                             .reduce(|min, v| min.min(v))
-                            .unwrap_or(0.0);
+                            .unwrap_or(missing);
                     }
                     Summary::Max => {
                         v[bin] = front
                             .3
                             .into_iter()
                             .reduce(|max, v| max.max(v))
-                            .unwrap_or(0.0);
+                            .unwrap_or(missing);
                     }
                     Summary::Mean => {
                         v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
@@ -747,7 +746,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
                 bin,
                 bin_start,
                 bin_end,
-                vec![0.0; (bin_end - bin_start) as usize],
+                vec![missing; (bin_end - bin_start) as usize],
             ));
         }
         for bin in bin_data.iter() {
@@ -769,7 +768,8 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
             for i in &mut data
                 [((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)]
             {
-                *i = *i + 1.0;
+                // If NAN, then 0.0 + 1.0, else i + 1.0
+                *i = (*i).max(0.0) + 1.0;
             }
         }
     }
@@ -782,14 +782,14 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
                     .3
                     .into_iter()
                     .reduce(|min, v| min.min(v))
-                    .unwrap_or(0.0);
+                    .unwrap_or(missing);
             }
             Summary::Max => {
                 v[bin] = front
                     .3
                     .into_iter()
                     .reduce(|max, v| max.max(v))
-                    .unwrap_or(0.0);
+                    .unwrap_or(missing);
             }
             Summary::Mean => {
                 v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
@@ -805,9 +805,11 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     iter: I,
     summary: Summary,
     bins: usize,
+    missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
 ) -> Result<(), BBIReadError> {
     assert_eq!(v.len(), bins);
+    v.fill(missing);
 
     let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
@@ -829,16 +831,14 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
                             .3
                             .into_iter()
                             .reduce(|min, v| min.min(v))
-                            .unwrap_or(0.0)
-                            .max(0.0);
+                            .unwrap_or(missing);
                     }
                     Summary::Max => {
                         v[bin] = front
                             .3
                             .into_iter()
                             .reduce(|max, v| max.max(v))
-                            .unwrap_or(0.0)
-                            .max(0.0);
+                            .unwrap_or(missing);
                     }
                     Summary::Mean => {
                         v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
@@ -860,7 +860,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
                 bin,
                 bin_start,
                 bin_end,
-                vec![f64::NAN; (bin_end - bin_start) as usize],
+                vec![missing; (bin_end - bin_start) as usize],
             ));
         }
         for bin in bin_data.iter() {
@@ -883,12 +883,12 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
             for i in &mut data
                 [((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)]
             {
+                *i = i.max(0.0);
                 match summary {
                     Summary::Mean => *i += mean,
                     Summary::Min => *i = i.min(interval.summary.min_val),
                     Summary::Max => *i = i.max(interval.summary.max_val),
                 }
-                *i = i.max(0.0);
             }
         }
     }
@@ -901,16 +901,14 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
                     .3
                     .into_iter()
                     .reduce(|min, v| min.min(v))
-                    .unwrap_or(0.0)
-                    .max(0.0);
+                    .unwrap_or(missing);
             }
             Summary::Max => {
                 v[bin] = front
                     .3
                     .into_iter()
                     .reduce(|max, v| max.max(v))
-                    .unwrap_or(0.0)
-                    .max(0.0);
+                    .unwrap_or(missing);
             }
             Summary::Mean => {
                 v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
@@ -1924,6 +1922,51 @@ mod test {
 
     #[test]
     fn test_to_entry_array_bins() {
+        fn eq_with_nan_eq(a: f64, b: f64) -> bool {
+            (a.is_nan() && b.is_nan()) || (a == b)
+        }
+
+        #[track_caller]
+        fn assert_equal(found: Vec<f64>, expected: Vec<f64>) {
+            let equal = (found.len() == expected.len()) &&  // zip stops at the shortest
+            found.iter()
+               .zip(expected.iter())
+               .all(|(a,b)| eq_with_nan_eq(*a, *b));
+            if !equal {
+                panic!("Vecs not equal: expected {:?}, found {:?}", expected, found);
+            }
+        }
+
+        let entries = [];
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
+            10,
+            20,
+            entries.into_iter().map(|v| Ok(v)),
+            crate::Summary::Mean,
+            1,
+            f64::NAN,
+            arr.view_mut(),
+        )
+        .unwrap();
+        let res = arr.to_vec();
+        assert_equal(res, [f64::NAN].into_iter().collect::<Vec<_>>());
+
+        let entries = [];
+        let mut arr = Array::from(vec![f64::NAN]);
+        to_entry_array_bins(
+            10,
+            20,
+            entries.into_iter().map(|v| Ok(v)),
+            crate::Summary::Mean,
+            1,
+            0.0,
+            arr.view_mut(),
+        )
+        .unwrap();
+        let res = arr.to_vec();
+        assert_equal(res, [0.0].into_iter().collect::<Vec<_>>());
+
         let entries = [BedEntry {
             start: 10,
             end: 20,
@@ -1936,11 +1979,12 @@ mod test {
             entries.into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             1,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [1.0].into_iter().collect::<Vec<_>>());
 
         let entries = [BedEntry {
             start: 10,
@@ -1954,11 +1998,12 @@ mod test {
             entries.into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             2,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [1.0, 1.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [1.0, 1.0].into_iter().collect::<Vec<_>>());
 
         let entries = [
             BedEntry {
@@ -1979,11 +2024,12 @@ mod test {
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             2,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [1.0, 2.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [1.0, 2.0].into_iter().collect::<Vec<_>>());
         let mut arr = Array::from(vec![f64::NAN]);
         to_entry_array_bins(
             10,
@@ -1991,11 +2037,12 @@ mod test {
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Min,
             1,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [1.0].into_iter().collect::<Vec<_>>());
         let mut arr = Array::from(vec![f64::NAN]);
         to_entry_array_bins(
             10,
@@ -2003,11 +2050,12 @@ mod test {
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Max,
             1,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [2.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [2.0].into_iter().collect::<Vec<_>>());
 
         let entries = [
             BedEntry {
@@ -2033,11 +2081,12 @@ mod test {
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Mean,
             2,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [1.0, 3.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [1.0, 3.0].into_iter().collect::<Vec<_>>());
         let mut arr = Array::from(vec![f64::NAN]);
         to_entry_array_bins(
             10,
@@ -2045,11 +2094,12 @@ mod test {
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Min,
             1,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [1.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [1.0].into_iter().collect::<Vec<_>>());
         let mut arr = Array::from(vec![f64::NAN]);
         to_entry_array_bins(
             10,
@@ -2057,10 +2107,11 @@ mod test {
             entries.clone().into_iter().map(|v| Ok(v)),
             crate::Summary::Max,
             1,
+            f64::NAN,
             arr.view_mut(),
         )
         .unwrap();
         let res = arr.to_vec();
-        assert_eq!(res, [3.0].into_iter().collect::<Vec<_>>());
+        assert_equal(res, [3.0].into_iter().collect::<Vec<_>>());
     }
 }

From a4166bbf1abc2c5f072c09564868a900e0e54a3d Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Sun, 31 Mar 2024 19:26:55 -0400
Subject: [PATCH 10/30] Add info fn

---
 bigtools/src/bbi/bbiread.rs | 10 +++++++
 pybigtools/src/lib.rs       | 59 +++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/bigtools/src/bbi/bbiread.rs b/bigtools/src/bbi/bbiread.rs
index 4a02fe2..8ef5079 100644
--- a/bigtools/src/bbi/bbiread.rs
+++ b/bigtools/src/bbi/bbiread.rs
@@ -54,6 +54,16 @@ pub struct BBIHeader {
     pub(crate) uncompress_buf_size: u32,
 }
 
+impl BBIHeader {
+    pub fn is_compressed(&self) -> bool {
+        self.uncompress_buf_size > 0
+    }
+
+    pub fn primary_data_size(&self) -> u64 {
+        self.full_index_offset - self.full_data_offset
+    }
+}
+
 /// Information on a chromosome in a bbi file
 #[derive(Clone, Debug)]
 pub struct ChromInfo {
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 88cbb8a..c9919b5 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -964,6 +964,65 @@ impl BBIRead {
         }
     }
 
+    fn info(&mut self, py: Python<'_>) -> PyResult<PyObject> {
+        let (info, summary) = match &mut self.bbi {
+            BBIReadRaw::BigWigFile(b) => {
+                let summary = b.get_summary()?;
+                (b.info(), summary)
+            }
+            BBIReadRaw::BigWigRemote(b) => {
+                let summary = b.get_summary()?;
+                (b.info(), summary)
+            }
+            BBIReadRaw::BigWigFileLike(b) => {
+                let summary = b.get_summary()?;
+                (b.info(), summary)
+            }
+            BBIReadRaw::BigBedFile(b) => {
+                let summary = b.get_summary()?;
+                (b.info(), summary)
+            }
+            BBIReadRaw::BigBedRemote(b) => {
+                let summary = b.get_summary()?;
+                (b.info(), summary)
+            }
+            BBIReadRaw::BigBedFileLike(b) => {
+                let summary = b.get_summary()?;
+                (b.info(), summary)
+            }
+        };
+        let var = (summary.sum_squares
+            - (summary.sum * summary.sum) / summary.bases_covered as f64)
+            / (summary.bases_covered as f64 - 1.0);
+        let summary = [
+            ("basesCovered", summary.bases_covered.to_object(py)),
+            ("sum", summary.sum.to_object(py)),
+            (
+                "mean",
+                (summary.sum as f64 / summary.bases_covered as f64).to_object(py),
+            ),
+            ("min", summary.min_val.to_object(py)),
+            ("max", summary.max_val.to_object(py)),
+            ("std", f64::sqrt(var).to_object(py)),
+        ]
+        .into_py_dict(py)
+        .to_object(py);
+        let info = [
+            ("version", info.header.version.to_object(py)),
+            ("isCompressed", info.header.is_compressed().to_object(py)),
+            (
+                "primaryDataSize",
+                info.header.primary_data_size().to_object(py),
+            ),
+            ("zoomLevels", info.zoom_headers.len().to_object(py)),
+            ("chromCount", info.chrom_info.len().to_object(py)),
+            ("summary", summary),
+        ]
+        .into_py_dict(py)
+        .to_object(py);
+        Ok(info)
+    }
+
     /// Returns the autosql of this bbi file.
     ///
     /// For bigBeds, this comes directly from the autosql stored in the file.

From 6fc14fdda51dd0a1577d33f19d7e0ee3f6249aad Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Mon, 1 Apr 2024 20:03:45 -0400
Subject: [PATCH 11/30] Make is_bigwig, is_bigbed properties

---
 pybigtools/src/lib.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index c9919b5..37c6169 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -926,6 +926,7 @@ struct BBIRead {
 
 #[pymethods]
 impl BBIRead {
+    #[getter]
     fn is_bigwig(&self) -> bool {
         #[cfg(feature = "remote")]
         {
@@ -945,6 +946,7 @@ impl BBIRead {
         }
     }
 
+    #[getter]
     fn is_bigbed(&self) -> bool {
         #[cfg(feature = "remote")]
         {

From 84a73d5f4f027ed8e0ba4afbe6efc295cb62bae4 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Wed, 3 Apr 2024 23:00:48 -0400
Subject: [PATCH 12/30] Add zoom_records function

---
 bigtools/src/bbi/bbiread.rs    |  31 +++++---
 bigtools/src/bbi/bigbedread.rs |  23 ++++++
 bigtools/src/bbi/bigwigread.rs |  22 ++++++
 pybigtools/src/lib.rs          | 134 +++++++++++++++++++++++++++++++--
 4 files changed, 192 insertions(+), 18 deletions(-)

diff --git a/bigtools/src/bbi/bbiread.rs b/bigtools/src/bbi/bbiread.rs
index 8ef5079..891e513 100644
--- a/bigtools/src/bbi/bbiread.rs
+++ b/bigtools/src/bbi/bbiread.rs
@@ -1,3 +1,4 @@
+use std::borrow::BorrowMut;
 use std::collections::hash_map::Entry;
 use std::collections::{HashMap, VecDeque};
 use std::fs::File;
@@ -1318,7 +1319,7 @@ pub(crate) fn get_zoom_block_values<B: BBIRead>(
     chrom: u32,
     start: u32,
     end: u32,
-) -> Result<Box<dyn Iterator<Item = ZoomRecord> + Send>, BBIReadError> {
+) -> Result<std::vec::IntoIter<ZoomRecord>, BBIReadError> {
     let (read, info) = bbifile.reader_and_info();
     let data = read.get_block_data(info, &block)?;
     let mut bytes = BytesMut::with_capacity(data.len());
@@ -1389,30 +1390,34 @@ pub(crate) fn get_zoom_block_values<B: BBIRead>(
     }
 
     *known_offset = block.offset + block.size;
-    Ok(Box::new(records.into_iter()))
+    Ok(records.into_iter())
 }
 
-pub(crate) struct ZoomIntervalIter<'a, I, B>
+pub(crate) struct ZoomIntervalIter<I, R, B>
 where
     I: Iterator<Item = Block> + Send,
-    B: BBIRead,
+    R: BBIRead,
+    B: BorrowMut<R>,
 {
-    bbifile: &'a mut B,
+    _r: std::marker::PhantomData<R>,
+    bbifile: B,
     known_offset: u64,
     blocks: I,
-    vals: Option<Box<dyn Iterator<Item = ZoomRecord> + Send + 'a>>,
+    vals: Option<std::vec::IntoIter<ZoomRecord>>,
     chrom: u32,
     start: u32,
     end: u32,
 }
 
-impl<'a, I, B> ZoomIntervalIter<'a, I, B>
+impl<I, R, B> ZoomIntervalIter<I, R, B>
 where
     I: Iterator<Item = Block> + Send,
-    B: BBIRead,
+    R: BBIRead,
+    B: BorrowMut<R>,
 {
-    pub fn new(bbifile: &'a mut B, blocks: I, chrom: u32, start: u32, end: u32) -> Self {
+    pub fn new(bbifile: B, blocks: I, chrom: u32, start: u32, end: u32) -> Self {
         ZoomIntervalIter {
+            _r: std::marker::PhantomData,
             bbifile,
             known_offset: 0,
             blocks,
@@ -1424,10 +1429,11 @@ where
     }
 }
 
-impl<'a, I, B> Iterator for ZoomIntervalIter<'a, I, B>
+impl<I, R, B> Iterator for ZoomIntervalIter<I, R, B>
 where
     I: Iterator<Item = Block> + Send,
-    B: BBIRead,
+    R: BBIRead,
+    B: BorrowMut<R>,
 {
     type Item = Result<ZoomRecord, BBIReadError>;
 
@@ -1444,8 +1450,9 @@ where
                 },
                 None => {
                     let current_block = self.blocks.next()?;
+                    let file = self.bbifile.borrow_mut();
                     match get_zoom_block_values(
-                        self.bbifile,
+                        file,
                         current_block,
                         &mut self.known_offset,
                         self.chrom,
diff --git a/bigtools/src/bbi/bigbedread.rs b/bigtools/src/bbi/bigbedread.rs
index c99653f..99323af 100644
--- a/bigtools/src/bbi/bigbedread.rs
+++ b/bigtools/src/bbi/bigbedread.rs
@@ -328,6 +328,29 @@ impl<R: BBIFileRead> BigBedRead<R> {
 
         let chrom = self.info.chrom_id(chrom_name)?;
 
+        let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;
+        Ok(ZoomIntervalIter::<
+            std::vec::IntoIter<Block>,
+            BigBedRead<R>,
+            &mut BigBedRead<R>,
+        >::new(self, blocks.into_iter(), chrom, start, end))
+    }
+
+    /// For a given chromosome, start, and end, returns an `Iterator` of the
+    /// intersecting `ZoomRecord`s.
+    pub fn get_zoom_interval_move<'a>(
+        mut self,
+        chrom_name: &str,
+        start: u32,
+        end: u32,
+        reduction_level: u32,
+    ) -> Result<impl Iterator<Item = Result<ZoomRecord, BBIReadError>>, ZoomIntervalError> {
+        let cir_tree = self
+            .zoom_cir_tree(reduction_level)
+            .map_err(|_| ZoomIntervalError::ReductionLevelNotFound)?;
+
+        let chrom = self.info.chrom_id(chrom_name)?;
+
         let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;
         Ok(ZoomIntervalIter::new(
             self,
diff --git a/bigtools/src/bbi/bigwigread.rs b/bigtools/src/bbi/bigwigread.rs
index b7f62b7..6047bfd 100644
--- a/bigtools/src/bbi/bigwigread.rs
+++ b/bigtools/src/bbi/bigwigread.rs
@@ -348,6 +348,28 @@ where
 
         let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;
 
+        Ok(ZoomIntervalIter::<
+            std::vec::IntoIter<Block>,
+            BigWigRead<R>,
+            &mut BigWigRead<R>,
+        >::new(self, blocks.into_iter(), chrom, start, end))
+    }
+
+    /// For a given chromosome, start, and end, returns an `Iterator` of the
+    /// intersecting `ZoomRecord`s.
+    pub fn get_zoom_interval_move<'a>(
+        mut self,
+        chrom_name: &str,
+        start: u32,
+        end: u32,
+        reduction_level: u32,
+    ) -> Result<impl Iterator<Item = Result<ZoomRecord, BBIReadError>>, ZoomIntervalError> {
+        let cir_tree = self.zoom_cir_tree(reduction_level)?;
+
+        let chrom = self.info.chrom_id(chrom_name)?;
+
+        let blocks = search_cir_tree(&self.info, &mut self.read, cir_tree, chrom_name, start, end)?;
+
         Ok(ZoomIntervalIter::new(
             self,
             blocks.into_iter(),
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 37c6169..7f99fe8 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -1025,6 +1025,18 @@ impl BBIRead {
         Ok(info)
     }
 
+    fn zooms(&self) -> Vec<u32> {
+        let zooms = match &self.bbi {
+            BBIReadRaw::BigWigFile(b) => &b.info().zoom_headers,
+            BBIReadRaw::BigWigRemote(b) => &b.info().zoom_headers,
+            BBIReadRaw::BigWigFileLike(b) => &b.info().zoom_headers,
+            BBIReadRaw::BigBedFile(b) => &b.info().zoom_headers,
+            BBIReadRaw::BigBedRemote(b) => &b.info().zoom_headers,
+            BBIReadRaw::BigBedFileLike(b) => &b.info().zoom_headers,
+        };
+        zooms.iter().map(|z| z.reduction_level).collect()
+    }
+
     /// Returns the autosql of this bbi file.
     ///
     /// For bigBeds, this comes directly from the autosql stored in the file.
@@ -1118,7 +1130,7 @@ impl BBIRead {
             BBIReadRaw::BigWigFile(b) => {
                 let b = b.reopen()?;
                 Ok(BigWigIntervalIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).convert_err()?),
                 }
                 .into_py(py))
             }
@@ -1126,21 +1138,21 @@ impl BBIRead {
             BBIReadRaw::BigWigRemote(b) => {
                 let b = b.reopen()?;
                 Ok(BigWigIntervalIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).convert_err()?),
                 }
                 .into_py(py))
             }
             BBIReadRaw::BigWigFileLike(b) => {
                 let b = b.reopen()?;
                 Ok(BigWigIntervalIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).convert_err()?),
                 }
                 .into_py(py))
             }
             BBIReadRaw::BigBedFile(b) => {
                 let b = b.reopen()?;
                 Ok(BigBedEntriesIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).convert_err()?),
                 }
                 .into_py(py))
             }
@@ -1148,20 +1160,97 @@ impl BBIRead {
             BBIReadRaw::BigBedRemote(b) => {
                 let b = b.reopen()?;
                 Ok(BigBedEntriesIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).convert_err()?),
                 }
                 .into_py(py))
             }
             BBIReadRaw::BigBedFileLike(b) => {
                 let b = b.reopen()?;
                 Ok(BigBedEntriesIterator {
-                    iter: Box::new(b.get_interval_move(&chrom, start, end).unwrap()),
+                    iter: Box::new(b.get_interval_move(&chrom, start, end).convert_err()?),
                 }
                 .into_py(py))
             }
         }
     }
 
+    /// Returns the zoom records of a given range on a chromosome for a given zoom level.
+    ///
+    /// The result is an iterator of tuples. These tuples are in the format
+    /// (start: int, end: int, summary: dict).
+    ///
+    /// The chrom argument is the name of the chromosome.  
+    /// The start and end arguments denote the range to get values for.
+    ///  If end is not provided, it defaults to the length of the chromosome.
+    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    fn zoom_records(
+        &mut self,
+        reduction_level: u32,
+        chrom: String,
+        start: Option<u32>,
+        end: Option<u32>,
+    ) -> PyResult<ZoomIntervalIterator> {
+        let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
+        match &self.bbi {
+            BBIReadRaw::BigWigFile(b) => {
+                let b = b.reopen()?;
+                let iter = b
+                    .get_zoom_interval_move(&chrom, start, end, reduction_level)
+                    .convert_err()?;
+                Ok(ZoomIntervalIterator {
+                    iter: Box::new(iter),
+                })
+            }
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigWigRemote(b) => {
+                let b = b.reopen()?;
+                let iter = b
+                    .get_zoom_interval_move(&chrom, start, end, reduction_level)
+                    .convert_err()?;
+                Ok(ZoomIntervalIterator {
+                    iter: Box::new(iter),
+                })
+            }
+            BBIReadRaw::BigWigFileLike(b) => {
+                let b = b.reopen()?;
+                let iter = b
+                    .get_zoom_interval_move(&chrom, start, end, reduction_level)
+                    .convert_err()?;
+                Ok(ZoomIntervalIterator {
+                    iter: Box::new(iter),
+                })
+            }
+            BBIReadRaw::BigBedFile(b) => {
+                let b = b.reopen()?;
+                let iter = b
+                    .get_zoom_interval_move(&chrom, start, end, reduction_level)
+                    .convert_err()?;
+                Ok(ZoomIntervalIterator {
+                    iter: Box::new(iter),
+                })
+            }
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigBedRemote(b) => {
+                let b = b.reopen()?;
+                let iter = b
+                    .get_zoom_interval_move(&chrom, start, end, reduction_level)
+                    .convert_err()?;
+                Ok(ZoomIntervalIterator {
+                    iter: Box::new(iter),
+                })
+            }
+            BBIReadRaw::BigBedFileLike(b) => {
+                let b = b.reopen()?;
+                let iter = b
+                    .get_zoom_interval_move(&chrom, start, end, reduction_level)
+                    .convert_err()?;
+                Ok(ZoomIntervalIterator {
+                    iter: Box::new(iter),
+                })
+            }
+        }
+    }
+
     /// Returns the values of a given range on a chromosome.
     ///
     /// For bigWigs, the result is an array of length (end - start).
@@ -1265,6 +1354,39 @@ impl BBIRead {
     }
 }
 
+#[pyclass(module = "pybigtools")]
+struct ZoomIntervalIterator {
+    iter: Box<dyn Iterator<Item = Result<ZoomRecord, BBIReadError>> + Send>,
+}
+
+#[pymethods]
+impl ZoomIntervalIterator {
+    fn __iter__(slf: PyRefMut<Self>) -> PyResult<Py<ZoomIntervalIterator>> {
+        Ok(slf.into())
+    }
+
+    fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<(u32, u32, PyObject)>> {
+        slf.iter
+            .next()
+            .transpose()
+            .map(|o| {
+                o.map(|v| {
+                    let summary = [
+                        ("total_items", v.summary.total_items.to_object(slf.py())),
+                        ("bases_covered", v.summary.bases_covered.to_object(slf.py())),
+                        ("min_val", v.summary.min_val.to_object(slf.py())),
+                        ("max_val", v.summary.max_val.to_object(slf.py())),
+                        ("sum", v.summary.sum.to_object(slf.py())),
+                        ("sum_squares", v.summary.sum_squares.to_object(slf.py())),
+                    ]
+                    .to_object(slf.py());
+                    (v.start, v.end, summary)
+                })
+            })
+            .convert_err()
+    }
+}
+
 /// This class is an iterator for `Values` from a bigWig.  
 /// It returns only values that exist in the bigWig, skipping
 /// any missing intervals.

From bf528f0d13801155063036af34838ab01092018b Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Sun, 7 Apr 2024 02:53:49 -0400
Subject: [PATCH 13/30] Add close method, fix text signature of values,
 implement gc and context manager methods, add api test, and fix bigtools
 returning intervals starting at the end base

---
 bigtools/src/bbi/bbiread.rs    |   8 ++-
 bigtools/src/bbi/bigbedread.rs |   5 ++
 bigtools/src/bbi/bigwigread.rs |   6 +-
 pybigtools/src/file_like.rs    |   2 +-
 pybigtools/src/lib.rs          | 106 +++++++++++++++++++++++--------
 pybigtools/tests/test_api.py   | 111 +++++++++++++++++++++++++++++++++
 6 files changed, 207 insertions(+), 31 deletions(-)
 create mode 100644 pybigtools/tests/test_api.py

diff --git a/bigtools/src/bbi/bbiread.rs b/bigtools/src/bbi/bbiread.rs
index 891e513..0a44f7d 100644
--- a/bigtools/src/bbi/bbiread.rs
+++ b/bigtools/src/bbi/bbiread.rs
@@ -557,12 +557,18 @@ impl<S: SeekableRead> BBIFileRead for S {
     }
 }
 
-pub struct CachedBBIFileRead<S: SeekableRead> {
+pub struct CachedBBIFileRead<S> {
     read: S,
     cir_tree_node_map: HashMap<u64, Either<Vec<CirTreeNodeLeaf>, Vec<CirTreeNodeNonLeaf>>>,
     block_data: HashMap<Block, Vec<u8>>,
 }
 
+impl<S> CachedBBIFileRead<S> {
+    pub fn inner_read(&self) -> &S {
+        &self.read
+    }
+}
+
 impl<S: SeekableRead> CachedBBIFileRead<S> {
     pub fn new(read: S) -> Self {
         CachedBBIFileRead {
diff --git a/bigtools/src/bbi/bigbedread.rs b/bigtools/src/bbi/bigbedread.rs
index 99323af..bf8fca4 100644
--- a/bigtools/src/bbi/bigbedread.rs
+++ b/bigtools/src/bbi/bigbedread.rs
@@ -214,6 +214,11 @@ impl<R: BBIFileRead> BigBedRead<R> {
         })
     }
 
+    /// Gets a reference to the inner `R` type, in order to access any info
+    pub fn inner_read(&self) -> &R {
+        &self.read
+    }
+
     /// Returns the summary data from bigBed
     ///
     /// Note: For version 1 of bigBeds, there is no total summary. In that
diff --git a/bigtools/src/bbi/bigwigread.rs b/bigtools/src/bbi/bigwigread.rs
index 6047bfd..b5b7bb1 100644
--- a/bigtools/src/bbi/bigwigread.rs
+++ b/bigtools/src/bbi/bigwigread.rs
@@ -527,7 +527,7 @@ fn get_block_values<R: BBIFileRead>(
                     end: chrom_end,
                     value,
                 };
-                if value.end >= start && value.start <= end {
+                if value.end > start && value.start < end {
                     value.start = value.start.max(start);
                     value.end = value.end.min(end);
                     values.push(value)
@@ -555,7 +555,7 @@ fn get_block_values<R: BBIFileRead>(
                     end: chrom_end,
                     value,
                 };
-                if value.end >= start && value.start <= end {
+                if value.end > start && value.start < end {
                     value.start = value.start.max(start);
                     value.end = value.end.min(end);
                     values.push(value)
@@ -584,7 +584,7 @@ fn get_block_values<R: BBIFileRead>(
                     end: chrom_end,
                     value,
                 };
-                if value.end >= start && value.start <= end {
+                if value.end > start && value.start < end {
                     value.start = value.start.max(start);
                     value.end = value.end.min(end);
                     values.push(value)
diff --git a/pybigtools/src/file_like.rs b/pybigtools/src/file_like.rs
index 51873db..77a2c75 100644
--- a/pybigtools/src/file_like.rs
+++ b/pybigtools/src/file_like.rs
@@ -8,7 +8,7 @@ use pyo3::{
 /// traits, calling into python io methods.
 #[derive(Clone)]
 pub struct PyFileLikeObject {
-    inner: PyObject,
+    pub(crate) inner: PyObject,
 }
 
 impl PyFileLikeObject {
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 7f99fe8..af21c93 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -26,9 +26,9 @@ use file_like::PyFileLikeObject;
 use numpy::ndarray::ArrayViewMut;
 use numpy::PyArray1;
 use pyo3::exceptions::{self, PyTypeError};
-use pyo3::prelude::*;
 use pyo3::types::{IntoPyDict, PyAny, PyDict, PyFloat, PyInt, PyIterator, PyString, PyTuple};
-use pyo3::wrap_pyfunction;
+use pyo3::{create_exception, wrap_pyfunction};
+use pyo3::{prelude::*, PyTraverseError, PyVisit};
 
 use tokio::runtime;
 use url::Url;
@@ -37,13 +37,16 @@ mod file_like;
 
 type ValueTuple = (u32, u32, f32);
 
+create_exception!(pybigtools, BBIFileClosed, exceptions::PyException);
+
 fn start_end(
     bbi: &BBIReadRaw,
     chrom_name: &str,
-    start: Option<u32>,
-    end: Option<u32>,
+    start: Option<i32>,
+    end: Option<i32>,
 ) -> PyResult<(u32, u32)> {
     let chroms = match &bbi {
+        BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
         BBIReadRaw::BigWigFile(b) => b.chroms(),
         #[cfg(feature = "remote")]
         BBIReadRaw::BigWigRemote(b) => b.chroms(),
@@ -63,7 +66,10 @@ fn start_end(
         }
         Some(c) => c.length,
     };
-    return Ok((start.unwrap_or(0), end.unwrap_or(length)));
+    return Ok((
+        start.map(|v| v.max(0) as u32).unwrap_or(0),
+        end.map(|v| (v.max(0) as u32).min(length)).unwrap_or(length),
+    ));
 }
 
 fn bigwig_start_end_length<R>(
@@ -112,6 +118,7 @@ impl Reopen for PyFileLikeObject {
 }
 
 enum BBIReadRaw {
+    Closed,
     BigWigFile(BigWigReadRaw<CachedBBIFileRead<ReopenableFile>>),
     #[cfg(feature = "remote")]
     BigWigRemote(BigWigReadRaw<CachedBBIFileRead<RemoteFile>>),
@@ -968,6 +975,7 @@ impl BBIRead {
 
     fn info(&mut self, py: Python<'_>) -> PyResult<PyObject> {
         let (info, summary) = match &mut self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => {
                 let summary = b.get_summary()?;
                 (b.info(), summary)
@@ -1025,8 +1033,9 @@ impl BBIRead {
         Ok(info)
     }
 
-    fn zooms(&self) -> Vec<u32> {
+    fn zooms(&self) -> PyResult<Vec<u32>> {
         let zooms = match &self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => &b.info().zoom_headers,
             BBIReadRaw::BigWigRemote(b) => &b.info().zoom_headers,
             BBIReadRaw::BigWigFileLike(b) => &b.info().zoom_headers,
@@ -1034,7 +1043,7 @@ impl BBIRead {
             BBIReadRaw::BigBedRemote(b) => &b.info().zoom_headers,
             BBIReadRaw::BigBedFileLike(b) => &b.info().zoom_headers,
         };
-        zooms.iter().map(|z| z.reduction_level).collect()
+        Ok(zooms.iter().map(|z| z.reduction_level).collect())
     }
 
     /// Returns the autosql of this bbi file.
@@ -1051,19 +1060,19 @@ impl BBIRead {
     ///   "fields": [(<field name>, <field type>, <field comment>), ...],
     /// }
     /// ```
-    fn sql(&mut self, py: Python, parse: Option<bool>) -> PyResult<PyObject> {
-        let parse = parse.unwrap_or(false);
-        pub const BEDGRAPH: &str = r#"
-            table bedGraph
-            "bedGraph file"
-            (
-                string chrom;        "Reference sequence chromosome or scaffold"
-                uint   chromStart;   "Start position in chromosome"
-                uint   chromEnd;     "End position in chromosome"
-                float  value;        "Value for a given interval"
-            )
+    #[pyo3(signature = (parse = false))]
+    fn sql(&mut self, py: Python, parse: bool) -> PyResult<PyObject> {
+        pub const BEDGRAPH: &str = r#"table bedGraph
+"bedGraph file"
+(
+    string chrom;        "Reference sequence chromosome or scaffold"
+    uint   chromStart;   "Start position in chromosome"
+    uint   chromEnd;     "End position in chromosome"
+    float  value;        "Value for a given interval"
+)\
             "#;
         let schema = match &mut self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(_)
             | BBIReadRaw::BigWigRemote(_)
             | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),
@@ -1122,11 +1131,12 @@ impl BBIRead {
         &mut self,
         py: Python<'_>,
         chrom: String,
-        start: Option<u32>,
-        end: Option<u32>,
+        start: Option<i32>,
+        end: Option<i32>,
     ) -> PyResult<PyObject> {
         let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
         match &self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => {
                 let b = b.reopen()?;
                 Ok(BigWigIntervalIterator {
@@ -1187,11 +1197,12 @@ impl BBIRead {
         &mut self,
         reduction_level: u32,
         chrom: String,
-        start: Option<u32>,
-        end: Option<u32>,
+        start: Option<i32>,
+        end: Option<i32>,
     ) -> PyResult<ZoomIntervalIterator> {
         let (start, end) = start_end(&self.bbi, &chrom, start, end)?;
         match &self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => {
                 let b = b.reopen()?;
                 let iter = b
@@ -1262,10 +1273,14 @@ impl BBIRead {
     /// The chrom argument is the name of the chromosome.  
     /// The start and end arguments denote the range to get values for.  
     ///  If end is not provided, it defaults to the length of the chromosome.  
-    ///  If start is not provided, it defaults to the beginning of the chromosome.  
+    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    /// The default oob value is `numpy.nan`.
     ///
     /// This returns a numpy array.
-    #[pyo3(signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None))]
+    #[pyo3(
+        signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None),
+        text_signature = r#"(chrom, start, end, bins=None, summary="mean", exact=False, missing=0.0, oob=..., arr=None)"#,
+    )]
     fn values(
         &mut self,
         py: Python<'_>,
@@ -1290,6 +1305,7 @@ impl BBIRead {
             }
         };
         match &mut self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => intervals_to_array(
                 py, b, &chrom, start, end, bins, summary, exact, missing, oob, arr,
             ),
@@ -1319,7 +1335,7 @@ impl BBIRead {
     ///  If it is None, then all chroms will be returned.  
     ///  If it is a String, then the length of that chromosome will be returned.  
     ///  If the chromosome doesn't exist, nothing will be returned.  
-    fn chroms(&mut self, py: Python, chrom: Option<String>) -> Option<PyObject> {
+    fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<Option<PyObject>> {
         fn get_chrom_obj<B: bigtools::BBIRead>(
             b: &B,
             py: Python,
@@ -1341,7 +1357,8 @@ impl BBIRead {
                 ),
             }
         }
-        match &self.bbi {
+        Ok(match &self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => get_chrom_obj(b, py, chrom),
             #[cfg(feature = "remote")]
             BBIReadRaw::BigWigRemote(b) => get_chrom_obj(b, py, chrom),
@@ -1350,7 +1367,41 @@ impl BBIRead {
             #[cfg(feature = "remote")]
             BBIReadRaw::BigBedRemote(b) => get_chrom_obj(b, py, chrom),
             BBIReadRaw::BigBedFileLike(b) => get_chrom_obj(b, py, chrom),
+        })
+    }
+
+    fn close(&mut self) {
+        self.bbi = BBIReadRaw::Closed;
+    }
+
+    fn __enter__(slf: Py<Self>) -> Py<Self> {
+        slf
+    }
+
+    fn __exit__(
+        slf: Py<Self>,
+        py: Python<'_>,
+        _exc_type: PyObject,
+        _exc_value: PyObject,
+        _exc_traceback: PyObject,
+    ) -> Py<Self> {
+        slf.borrow_mut(py).bbi = BBIReadRaw::Closed;
+        slf
+    }
+
+    fn __traverse__(&self, visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
+        match &self.bbi {
+            BBIReadRaw::Closed | BBIReadRaw::BigWigFile(_) | BBIReadRaw::BigBedFile(_) => {}
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigWigRemote(_) | BBIReadRaw::BigBedRemote(_) => {}
+            BBIReadRaw::BigWigFileLike(b) => visit.call(&b.inner_read().inner_read().inner)?,
+            BBIReadRaw::BigBedFileLike(b) => visit.call(&b.inner_read().inner_read().inner)?,
         }
+        Ok(())
+    }
+
+    fn __clear__(&mut self) {
+        self.close()
     }
 }
 
@@ -1379,6 +1430,7 @@ impl ZoomIntervalIterator {
                         ("sum", v.summary.sum.to_object(slf.py())),
                         ("sum_squares", v.summary.sum_squares.to_object(slf.py())),
                     ]
+                    .into_py_dict(slf.py())
                     .to_object(slf.py());
                     (v.start, v.end, summary)
                 })
@@ -2083,6 +2135,8 @@ fn bigWigAverageOverBed(
 #[pymodule]
 fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
+    m.add("BBIFileClosed", m.py().get_type::<BBIFileClosed>())?;
+
     m.add_wrapped(wrap_pyfunction!(open))?;
     m.add_wrapped(wrap_pyfunction!(bigWigAverageOverBed))?;
 
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
new file mode 100644
index 0000000..2cf004a
--- /dev/null
+++ b/pybigtools/tests/test_api.py
@@ -0,0 +1,111 @@
+import pybigtools
+import math
+import numpy as np
+
+def test_api():
+    b = pybigtools.open('../bigtools/resources/test/valid.bigWig', 'r')
+
+    # Check type of file
+    assert b.is_bigbed == False
+    assert b.is_bigwig == True
+
+    # No args => dict
+    assert b.chroms() == {'chr17': 83257441}
+    # Arg with chrom name => length
+    assert b.chroms('chr17') == 83257441
+    # Missing chrom => None
+    assert b.chroms('chr11') == None
+
+    # Get a list of zooms
+    assert b.zooms() == [10, 40, 160, 640, 2560, 10240, 40960, 163840, 655360, 2621440]
+
+    # Even bigwigs have sql (a sql representing bedGraph)
+    assert "bedGraph" in b.sql()
+    # We can parse the sql
+    assert b.sql(True)['name'] == 'bedGraph'
+
+    # (chrom, None, None) => all records on chrom
+    assert len(list(b.records('chr17'))) == 100000
+    # (chrom, start, None) => all records from (start, <chrom_end>)
+    assert len(list(b.records('chr17', 100000))) == 91360
+    # (chrom, start, end) => all records from (start, end)
+    assert len(list(b.records('chr17', 100000, 110000))) == 1515
+    # Out of bounds start/end are truncated
+    assert len(list(b.records('chr17', -1000, 100000))) == 8641
+    assert len(list(b.records('chr17', -1000, -500))) == 0
+    assert len(list(b.records('chr17', 0, 84000000))) == 100000
+    assert next(b.records('chr17')) == (59898, 59900, 0.06791999936103821)
+    # Unknown chrom  => exception
+    try:
+        b.zoom_records('chr11')
+    except:
+        pass
+    else:
+        assert False
+
+    # (chrom, None, None) => all records on chrom
+    assert len(list(b.zoom_records(10, 'chr17'))) == 13811
+    # (chrom, start, None) => all records from (start, <chrom_end>)
+    assert len(list(b.zoom_records(10, 'chr17', 100000))) == 10872
+    # (chrom, start, end) => all records from (start, end)
+    assert len(list(b.zoom_records(10, 'chr17', 100000, 110000))) == 766
+    # Out of bounds start/end are truncated
+    assert len(list(b.zoom_records(10, 'chr17', -1000, 100000))) == 2940
+    assert len(list(b.zoom_records(10, 'chr17', -1000, -500))) == 0
+    assert len(list(b.zoom_records(10, 'chr17', 0, 84000000))) == 13811
+    assert next(b.zoom_records(10, 'chr17', 0, 100000)) == (59898, 59908, {'total_items': 0, 'bases_covered': 10, 'min_val': 0.06791999936103821, 'max_val': 0.16627000272274017, 'sum': 1.4660000801086426, 'sum_squares': 0.2303919643163681})
+    assert next(b.zoom_records(160, 'chr17', 0, 100000)) == (59898, 60058, {'total_items': 0, 'bases_covered': 160, 'min_val': 0.06791999936103821, 'max_val': 0.8688300251960754, 'sum': 101.3516616821289, 'sum_squares': 80.17473602294922})
+    # Unknown zoom  => exception
+    try:
+        b.zoom_records(0, 'chr17')
+    except:
+        pass
+    else:
+        assert False
+    # Unknown chrom  => exception
+    try:
+        b.zoom_records(10, 'chr11')
+    except:
+        pass
+    else:
+        assert False
+
+    assert len(list(b.values('chr17', 100000, 110000))) == 10000
+    assert len(list(b.values('chr17', 100000, 110000, 10))) == 10
+    assert b.values('chr17', 100000, 110000, 10)[0] == 0.37435242314338685
+    assert b.values('chr17', 100000, 110000, 10, 'max')[0] == 1.1978399753570557
+    assert b.values('chr17', 100000, 110000, 10, 'min')[0] == 0.05403999984264374
+    assert b.values('chr17', 100000, 110000, 10, 'mean',  True)[0] == 0.37885534041374924
+    assert list(b.values('chr17', 59890, 59900, 10, 'mean', True)) == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06791999936103821, 0.06791999936103821]
+    assert list(b.values('chr17', 59890, 59900, 10, 'mean', True, -1.0)) == [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.06791999936103821, 0.06791999936103821]
+    assert math.isnan(b.values('chr17', -10, 10, 20, 'mean', True, 0.0)[0])
+    assert not math.isnan(b.values('chr17', -10, 10, 20, 'mean', True, 0.0)[19])
+    assert b.values('chr17', -10, 10, 20, 'mean', True, 0.0, 0.0)[0] == 0.0
+    arr = np.zeros(20)
+    ret_arr = b.values('chr17', -10, 10, 20, 'mean', True, 0.0, np.nan, arr)
+    # The returned array is the same as the one passed, so both show the same values
+    assert math.isnan(arr[0])
+    assert arr[19] == 0.0
+    assert math.isnan(ret_arr[0])
+    assert ret_arr[19] == 0.0
+
+    b.close()
+    # Closing means file is not usable
+    try:
+        b.chroms()
+    except:
+        pass
+    else:
+        assert False
+
+    b = pybigtools.open('../bigtools/resources/test/valid.bigWig', 'r')
+    # Files are closed when exiting a context manager
+    with b:
+      pass
+    try:
+        b.chroms()
+    except:
+        pass
+    else:
+        assert False
+

From 075cc464d3c011a232bd7801268d97e64e35f9e1 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Mon, 8 Apr 2024 10:07:48 -0400
Subject: [PATCH 14/30] Move bigWigAverageOverBed to BBIFileRead

---
 pybigtools/src/lib.rs | 236 ++++++++++++++++++------------------------
 1 file changed, 101 insertions(+), 135 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index af21c93..99a5313 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -980,6 +980,7 @@ impl BBIRead {
                 let summary = b.get_summary()?;
                 (b.info(), summary)
             }
+            #[cfg(feature = "remote")]
             BBIReadRaw::BigWigRemote(b) => {
                 let summary = b.get_summary()?;
                 (b.info(), summary)
@@ -992,6 +993,7 @@ impl BBIRead {
                 let summary = b.get_summary()?;
                 (b.info(), summary)
             }
+            #[cfg(feature = "remote")]
             BBIReadRaw::BigBedRemote(b) => {
                 let summary = b.get_summary()?;
                 (b.info(), summary)
@@ -1037,9 +1039,11 @@ impl BBIRead {
         let zooms = match &self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => &b.info().zoom_headers,
+            #[cfg(feature = "remote")]
             BBIReadRaw::BigWigRemote(b) => &b.info().zoom_headers,
             BBIReadRaw::BigWigFileLike(b) => &b.info().zoom_headers,
             BBIReadRaw::BigBedFile(b) => &b.info().zoom_headers,
+            #[cfg(feature = "remote")]
             BBIReadRaw::BigBedRemote(b) => &b.info().zoom_headers,
             BBIReadRaw::BigBedFileLike(b) => &b.info().zoom_headers,
         };
@@ -1073,10 +1077,11 @@ impl BBIRead {
             "#;
         let schema = match &mut self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
-            BBIReadRaw::BigWigFile(_)
-            | BBIReadRaw::BigWigRemote(_)
-            | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),
+            BBIReadRaw::BigWigFile(_) | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigWigRemote(_) => BEDGRAPH.to_string(),
             BBIReadRaw::BigBedFile(b) => b.autosql().convert_err()?,
+            #[cfg(feature = "remote")]
             BBIReadRaw::BigBedRemote(b) => b.autosql().convert_err()?,
             BBIReadRaw::BigBedFileLike(b) => b.autosql().convert_err()?,
         };
@@ -1370,6 +1375,85 @@ impl BBIRead {
         })
     }
 
+    /// Gets the average values from a bigWig over the entries of a bed file.
+    /// Raises an exception if the current file is a bigBed.
+    ///
+    /// Parameters:
+    ///     bed (str): The path to the bed.
+    ///     names (None, bool, or int):  
+    ///         If `None`, then each return value will be a single `float`,
+    ///             the average value over an interval in the bed file.  
+    ///         If `True`, then each return value will be a tuple of the value of column 4
+    ///             and the average value over the interval with that name in the bed file.  
+    ///         If `False`, then each return value will be a tuple of the interval in the format
+    ///             `{chrom}:{start}-{end}` and the average value over that interval.  
+    ///         If `0`, then each return value will match as if `False` was passed.  
+    ///         If a `1+`, then each return value will be a tuple of the value of column of this
+    ///             parameter (1-based) and the average value over the interval.  
+    ///
+    /// Returns:
+    ///     This returns a generator of values. (Therefore, to save to a list, do `list(bigWigAverageOverBed(...))`)  
+    ///     If no name is specified (see the `names` parameter above), then returns a generator of `float`s.  
+    ///     If a name column is specified (see above), then returns a generator of tuples `({name}, {average})`
+    fn average_over_bed(
+        &mut self,
+        py: Python,
+        bed: String,
+        names: Option<PyObject>,
+    ) -> PyResult<PyObject> {
+        let (name, usename) = {
+            match names {
+                Some(names) => match names.extract::<bool>(py) {
+                    Ok(true) => (Name::Column(3), true),
+                    Ok(false) => (Name::None, true),
+                    Err(_) => match names.extract::<isize>(py) {
+                        Ok(col) => match col {
+                            0 => (Name::None, true),
+                            1.. => (Name::Column((col - 1) as usize), true),
+                            _ => {
+                                return Err(PyErr::new::<exceptions::PyValueError, _>(
+                                    "Invalid names argument. Must be >= 0.",
+                                ));
+                            }
+                        },
+                        Err(_) => {
+                            return Err(PyErr::new::<exceptions::PyException, _>("Invalid names argument. Should be either `None`, a `bool`, or an `int`"));
+                        }
+                    },
+                },
+                None => (Name::None, false),
+            }
+        };
+        let bedin = BufReader::new(File::open(bed)?);
+
+        let res = match &mut self.bbi {
+            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
+            BBIReadRaw::BigWigFile(b) => {
+                let b = b.reopen()?;
+                let iter = Box::new(bigwig_average_over_bed(bedin, b, name));
+                BigWigAverageOverBedEntriesIterator { iter, usename }.into_py(py)
+            }
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigWigRemote(b) => {
+                let b = b.reopen()?;
+                let iter = Box::new(bigwig_average_over_bed(bedin, b, name));
+                BigWigAverageOverBedEntriesIterator { iter, usename }.into_py(py)
+            }
+            BBIReadRaw::BigWigFileLike(b) => {
+                let b = b.reopen()?;
+                let iter = Box::new(bigwig_average_over_bed(bedin, b, name));
+                BigWigAverageOverBedEntriesIterator { iter, usename }.into_py(py)
+            }
+            BBIReadRaw::BigBedFile(_) | BBIReadRaw::BigBedFileLike(_) => {
+                return Err(BBIFileClosed::new_err("Not a bigWig."))
+            }
+            #[cfg(feature = "remote")]
+            BBIReadRaw::BigBedRemote(_) => return Err(BBIFileClosed::new_err("Not a bigWig.")),
+        };
+
+        Ok(res)
+    }
+
     fn close(&mut self) {
         self.bbi = BBIReadRaw::Closed;
     }
@@ -1798,19 +1882,22 @@ impl BigWigAverageOverBedEntriesIterator {
     fn __next__(
         mut slf: PyRefMut<Self>,
     ) -> PyResult<Option<BigWigAverageOverBedEntriesIteratorRet>> {
-        slf.iter
+        let v = slf
+            .iter
             .next()
             .transpose()
-            .map(|o| {
-                o.map(|v| {
-                    if slf.usename {
-                        BigWigAverageOverBedEntriesIteratorRet::WithName((v.name, v.mean))
-                    } else {
-                        BigWigAverageOverBedEntriesIteratorRet::Single(v.mean)
-                    }
-                })
-            })
-            .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))
+            .map_err(|e| PyErr::new::<exceptions::PyException, _>(format!("{}", e)))?;
+
+        let Some(v) = v else {
+            return Ok(None);
+        };
+
+        let item = if slf.usename {
+            BigWigAverageOverBedEntriesIteratorRet::WithName((v.name, v.mean))
+        } else {
+            BigWigAverageOverBedEntriesIteratorRet::Single(v.mean)
+        };
+        Ok(Some(item))
     }
 }
 
@@ -2011,126 +2098,6 @@ fn open_path_or_url(
     Ok(res)
 }
 
-/// Gets the average values from a bigWig over the entries of a bed file.
-///
-/// Parameters:
-///     bigWig (str): The path to the bigWig.
-///     bed (str): The path to the bed.
-///     names (None, bool, or int):  
-///         If `None`, then each return value will be a single `float`,
-///             the average value over an interval in the bed file.  
-///         If `True`, then each return value will be a tuple of the value of column 4
-///             and the average value over the interval with that name in the bed file.  
-///         If `False`, then each return value will be a tuple of the interval in the format
-///             `{chrom}:{start}-{end}` and the average value over that interval.  
-///         If `0`, then each return value will match as if `False` was passed.  
-///         If a `1+`, then each return value will be a tuple of the value of column of this
-///             parameter (1-based) and the average value over the interval.  
-///
-/// Returns:
-///     This returns a generator of values. (Therefore, to save to a list, do `list(bigWigAverageOverBed(...))`)  
-///     If no name is specified (see the `names` parameter above), then returns a generator of `float`s.  
-///     If a name column is specified (see above), then returns a generator of tuples `({name}, {average})`  
-#[pyfunction]
-fn bigWigAverageOverBed(
-    py: Python,
-    bigwig: String,
-    bed: String,
-    names: Option<PyObject>,
-) -> PyResult<PyObject> {
-    let extension = match &Path::new(&bigwig).extension().map(|e| e.to_string_lossy()) {
-        Some(e) => e.to_string(),
-        None => {
-            return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                "Invalid file type. Must be a bigWig (.bigWig, .bw)."
-            )));
-        }
-    };
-    let isfile = std::path::Path::new(&bigwig).exists();
-    if !isfile {
-        match Url::parse(&bigwig) {
-            Ok(_) => {}
-            Err(_) => {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                    "Invalid file path. The file does not exists and it is not a url."
-                )))
-            }
-        }
-    }
-    let (name, usename) = {
-        match names {
-            Some(names) => match names.extract::<bool>(py) {
-                Ok(b) => {
-                    if b {
-                        (Name::Column(3), true)
-                    } else {
-                        (Name::None, true)
-                    }
-                }
-                Err(_) => match names.extract::<isize>(py) {
-                    Ok(col) => match col {
-                        0 => (Name::None, true),
-                        1.. => (Name::Column((col - 1) as usize), true),
-                        _ => {
-                            return Err(PyErr::new::<exceptions::PyException, _>(
-                                "Invalid names argument. Must be >= 0.",
-                            ));
-                        }
-                    },
-                    Err(_) => {
-                        return Err(PyErr::new::<exceptions::PyException, _>("Invalid names argument. Should be either `None`, a `bool`, or an `int`"));
-                    }
-                },
-            },
-            None => (Name::None, false),
-        }
-    };
-    let res = match extension.as_ref() {
-        "bw" | "bigWig" | "bigwig" => {
-            if isfile {
-                let read = BigWigReadRaw::open_file(&bigwig)
-                    .map_err(|_| {
-                        PyErr::new::<exceptions::PyException, _>(format!("Error opening bigWig."))
-                    })?
-                    .cached();
-                let bedin = BufReader::new(File::open(bed)?);
-                let iter = Box::new(bigwig_average_over_bed(bedin, read, name));
-
-                BigWigAverageOverBedEntriesIterator { iter, usename }.into_py(py)
-            } else {
-                #[cfg(feature = "remote")]
-                {
-                    let read = BigWigReadRaw::open(RemoteFile::new(&bigwig))
-                        .map_err(|_| {
-                            PyErr::new::<exceptions::PyException, _>(format!(
-                                "Error opening bigBed."
-                            ))
-                        })?
-                        .cached();
-                    let bedin = BufReader::new(File::open(bed)?);
-                    let iter = Box::new(bigwig_average_over_bed(bedin, read, name));
-
-                    BigWigAverageOverBedEntriesIterator { iter, usename }.into_py(py)
-                }
-
-                #[cfg(not(feature = "remote"))]
-                {
-                    return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                        "Builtin support for remote files is not available on this platform."
-                    )));
-                }
-            }
-        }
-        _ => {
-            return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                "Invalid file type. Must be a bigWig (.bigWig, .bw)."
-            )));
-        }
-    };
-
-    Ok(res)
-}
-
 /// The base module for opening a bigWig or bigBed. The only defined function is `open`.
 #[pymodule]
 fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
@@ -2138,7 +2105,6 @@ fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add("BBIFileClosed", m.py().get_type::<BBIFileClosed>())?;
 
     m.add_wrapped(wrap_pyfunction!(open))?;
-    m.add_wrapped(wrap_pyfunction!(bigWigAverageOverBed))?;
 
     m.add_class::<BigWigWrite>()?;
     m.add_class::<BigBedWrite>()?;

From a69bf96e63c871108a23d39e31efedefa98ee9c9 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Wed, 10 Apr 2024 01:24:44 -0400
Subject: [PATCH 15/30] Use specialized Python exceptions

---
 pybigtools/src/lib.rs | 118 ++++++++++++++++++++++--------------------
 1 file changed, 63 insertions(+), 55 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 99a5313..b22393e 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -16,7 +16,7 @@ use bigtools::utils::misc::{
     bigwig_average_over_bed, BigWigAverageOverBedEntry, BigWigAverageOverBedError, Name,
 };
 use bigtools::{
-    BBIFileRead, BBIReadError, BedEntry, BigBedRead as BigBedReadRaw,
+    BBIFileRead, BBIReadError as _BBIReadError, BedEntry, BigBedRead as BigBedReadRaw,
     BigBedWrite as BigBedWriteRaw, BigWigRead as BigWigReadRaw, BigWigWrite as BigWigWriteRaw,
     CachedBBIFileRead, Value, ZoomRecord,
 };
@@ -38,6 +38,7 @@ mod file_like;
 type ValueTuple = (u32, u32, f32);
 
 create_exception!(pybigtools, BBIFileClosed, exceptions::PyException);
+create_exception!(pybigtools, BBIReadError, exceptions::PyException);
 
 fn start_end(
     bbi: &BBIReadRaw,
@@ -59,7 +60,7 @@ fn start_end(
     let chrom = chroms.into_iter().find(|x| x.name == chrom_name);
     let length = match chrom {
         None => {
-            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+            return Err(PyErr::new::<exceptions::PyKeyError, _>(format!(
                 "No chromomsome with name `{}` found.",
                 chrom_name
             )))
@@ -101,7 +102,7 @@ fn start_end_length_inner(
     let chrom = chroms.into_iter().find(|x| x.name == chrom_name);
     let length = match chrom {
         None => {
-            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+            return Err(PyErr::new::<exceptions::PyKeyError, _>(format!(
                 "No chromomsome with name `{}` found.",
                 chrom_name
             )))
@@ -140,14 +141,21 @@ trait ToPyErr {
     fn to_py_err(self) -> PyErr;
 }
 
-impl ToPyErr for bigtools::ZoomIntervalError {
+impl ToPyErr for bigtools::BBIReadError {
     fn to_py_err(self) -> PyErr {
-        PyErr::new::<exceptions::PyException, _>(format!("{}", self))
+        PyErr::new::<BBIReadError, _>(format!("{}", self))
     }
 }
-impl ToPyErr for bigtools::BBIReadError {
+impl ToPyErr for bigtools::ZoomIntervalError {
     fn to_py_err(self) -> PyErr {
-        PyErr::new::<exceptions::PyException, _>(format!("{}", self))
+        match self {
+            bigtools::ZoomIntervalError::ReductionLevelNotFound => {
+                PyErr::new::<exceptions::PyKeyError, _>(format!(
+                    "The passed reduction level was not found"
+                ))
+            }
+            _ => PyErr::new::<BBIReadError, _>(format!("{}", self)),
+        }
     }
 }
 
@@ -192,7 +200,7 @@ fn intervals_to_array<R: BBIFileRead>(
         (None, None) => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
     };
     let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
-        PyErr::new::<exceptions::PyException, _>(
+        PyErr::new::<exceptions::PyValueError, _>(
             "`arr` option must be a one-dimensional numpy array, if passed.",
         )
     })?;
@@ -200,7 +208,7 @@ fn intervals_to_array<R: BBIFileRead>(
     let bin_size = match bins {
         Some(bins) => {
             if v.len() != bins {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
                     "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
                     bins,
                     v.len(),
@@ -251,8 +259,8 @@ fn intervals_to_array<R: BBIFileRead>(
         }
         _ => {
             if v.len() != (end - start) as usize {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
+                    "`arr` does not have the expected size (expected `{}`, found `{}`), if passed.",
                     (end - start) as usize,
                     v.len(),
                 )));
@@ -327,7 +335,7 @@ fn entries_to_array<R: BBIFileRead>(
         (None, None) => PyArray1::from_vec(py, vec![missing; (end - start) as usize]).to_object(py),
     };
     let v: &PyArray1<f64> = arr.downcast::<PyArray1<f64>>(py).map_err(|_| {
-        PyErr::new::<exceptions::PyException, _>(
+        PyErr::new::<exceptions::PyValueError, _>(
             "`arr` option must be a one-dimensional numpy array, if passed.",
         )
     })?;
@@ -335,8 +343,8 @@ fn entries_to_array<R: BBIFileRead>(
     let bin_size = match bins {
         Some(bins) => {
             if v.len() != bins {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
+                    "`arr` does not have the expected size (expected `{}`, found `{}`), if passed.",
                     bins,
                     v.len(),
                 )));
@@ -386,8 +394,8 @@ fn entries_to_array<R: BBIFileRead>(
         }
         _ => {
             if v.len() != (end - start) as usize {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
+                    "`arr` does not have the expected size (expected `{}`, found `{}`), if passed.",
                     (end - start) as usize,
                     v.len(),
                 )));
@@ -431,13 +439,13 @@ fn entries_to_array<R: BBIFileRead>(
     Ok(arr)
 }
 
-fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
+fn to_array<I: Iterator<Item = Result<Value, _BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
     missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
-) -> Result<(), BBIReadError> {
+) -> Result<(), _BBIReadError> {
     assert_eq!(v.len(), (end - start) as usize);
     v.fill(missing);
     for interval in iter {
@@ -450,7 +458,7 @@ fn to_array<I: Iterator<Item = Result<Value, BBIReadError>>>(
     }
     Ok(())
 }
-fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
+fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
@@ -458,7 +466,7 @@ fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     bins: usize,
     missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
-) -> Result<(), BBIReadError> {
+) -> Result<(), _BBIReadError> {
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
@@ -556,7 +564,7 @@ fn to_array_bins<I: Iterator<Item = Result<Value, BBIReadError>>>(
     }
     Ok(())
 }
-fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
+fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
@@ -564,7 +572,7 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     bins: usize,
     missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
-) -> Result<(), BBIReadError> {
+) -> Result<(), _BBIReadError> {
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
@@ -673,13 +681,13 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     }
     Ok(())
 }
-fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+fn to_entry_array<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
     missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
-) -> Result<(), BBIReadError> {
+) -> Result<(), _BBIReadError> {
     assert_eq!(v.len(), (end - start) as usize);
     v.fill(missing);
     for interval in iter {
@@ -692,7 +700,7 @@ fn to_entry_array<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     }
     Ok(())
 }
-fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
+fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
@@ -700,7 +708,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     bins: usize,
     missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
-) -> Result<(), BBIReadError> {
+) -> Result<(), _BBIReadError> {
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
@@ -806,7 +814,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, BBIReadError>>>(
     Ok(())
 }
 
-fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
+fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     start: i32,
     end: i32,
     iter: I,
@@ -814,7 +822,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, BBIReadError>>>(
     bins: usize,
     missing: f64,
     mut v: ArrayViewMut<'_, f64, numpy::Ix1>,
-) -> Result<(), BBIReadError> {
+) -> Result<(), _BBIReadError> {
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
@@ -1086,11 +1094,10 @@ impl BBIRead {
             BBIReadRaw::BigBedFileLike(b) => b.autosql().convert_err()?,
         };
         let obj = if parse {
-            let mut declarations = parse_autosql(&schema).map_err(|_| {
-                PyErr::new::<exceptions::PyException, _>("Unable to parse autosql.")
-            })?;
+            let mut declarations = parse_autosql(&schema)
+                .map_err(|_| PyErr::new::<BBIReadError, _>("Unable to parse autosql."))?;
             if declarations.len() > 1 {
-                return Err(PyErr::new::<exceptions::PyException, _>(
+                return Err(PyErr::new::<BBIReadError, _>(
                     "Unexpected extra declarations.",
                 ));
             }
@@ -1304,7 +1311,7 @@ impl BBIRead {
             "min" => Summary::Min,
             "max" => Summary::Max,
             _ => {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
                     "Unrecognized summary. Only `mean`, `min`, and `max` are allowed."
                 )));
             }
@@ -1417,7 +1424,7 @@ impl BBIRead {
                             }
                         },
                         Err(_) => {
-                            return Err(PyErr::new::<exceptions::PyException, _>("Invalid names argument. Should be either `None`, a `bool`, or an `int`"));
+                            return Err(PyErr::new::<exceptions::PyValueError, _>("Invalid names argument. Should be either `None`, a `bool`, or an `int`"));
                         }
                     },
                 },
@@ -1491,7 +1498,7 @@ impl BBIRead {
 
 #[pyclass(module = "pybigtools")]
 struct ZoomIntervalIterator {
-    iter: Box<dyn Iterator<Item = Result<ZoomRecord, BBIReadError>> + Send>,
+    iter: Box<dyn Iterator<Item = Result<ZoomRecord, _BBIReadError>> + Send>,
 }
 
 #[pymethods]
@@ -1528,7 +1535,7 @@ impl ZoomIntervalIterator {
 /// any missing intervals.
 #[pyclass(module = "pybigtools")]
 struct BigWigIntervalIterator {
-    iter: Box<dyn Iterator<Item = Result<Value, BBIReadError>> + Send>,
+    iter: Box<dyn Iterator<Item = Result<Value, _BBIReadError>> + Send>,
 }
 
 #[pymethods]
@@ -1549,7 +1556,7 @@ impl BigWigIntervalIterator {
 /// This class is an interator for the entries in a bigBed file.
 #[pyclass(module = "pybigtools")]
 struct BigBedEntriesIterator {
-    iter: Box<dyn Iterator<Item = Result<BedEntry, BBIReadError>> + Send>,
+    iter: Box<dyn Iterator<Item = Result<BedEntry, _BBIReadError>> + Send>,
 }
 
 #[pymethods]
@@ -1600,7 +1607,7 @@ impl BigWigWrite {
         let bigwig = self
             .bigwig
             .take()
-            .ok_or_else(|| PyErr::new::<exceptions::PyException, _>("File already closed."))?;
+            .ok_or_else(|| PyErr::new::<BBIFileClosed, _>("File already closed."))?;
         let chrom_map = chroms
             .into_iter()
             .map(|(key, val)| {
@@ -1738,7 +1745,7 @@ impl BigBedWrite {
         let bigbed = self
             .bigbed
             .take()
-            .ok_or_else(|| PyErr::new::<exceptions::PyException, _>("File already closed."))?;
+            .ok_or_else(|| PyErr::new::<BBIFileClosed, _>("File already closed."))?;
         let chrom_map = chroms
             .into_iter()
             .map(|(key, val)| {
@@ -1928,7 +1935,7 @@ fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> Py
         Some(mode) if mode == "r" => false,
         None => false,
         Some(mode) => {
-            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+            return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
                 "Invalid mode: `{}`",
                 mode
             )));
@@ -1941,13 +1948,13 @@ fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> Py
     }
 
     if iswrite {
-        return Err(PyErr::new::<exceptions::PyException, _>(format!(
+        return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
             "Writing only supports file names",
         )));
     }
     let file_like = match PyFileLikeObject::new(path_url_or_file_like, true, false, true) {
         Ok(file_like) => file_like,
-        Err(_) => return Err(PyErr::new::<exceptions::PyException, _>(format!(
+        Err(_) => return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
             "Unknown argument for `path_url_or_file_like`. Not a file path string or url, and not a file-like object.",
         ))),
     };
@@ -1962,7 +1969,7 @@ fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> Py
             }
             .into_py(py),
             Err(e) => {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                return Err(PyErr::new::<BBIReadError, _>(format!(
                     "File-like object is not a bigWig or bigBed. Or there was just a problem reading: {e}",
                 )))
             }
@@ -1982,13 +1989,13 @@ fn open_path_or_url(
     {
         Some(e) => e.to_string(),
         None => {
-            return Err(PyErr::new::<exceptions::PyException, _>(format!("Invalid file type. Must be either a bigWig (.bigWig, .bw) or bigBed (.bigBed, .bb).")));
+            return Err(PyErr::new::<exceptions::PyValueError, _>(format!("Invalid file type. Must be either a bigWig (.bigWig, .bw) or bigBed (.bigBed, .bb).")));
         }
     };
     let res = if iswrite {
         match Url::parse(&path_url_or_file_like) {
             Ok(_) => {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
                     "Invalid file path. Writing does not support urls."
                 )))
             }
@@ -2004,7 +2011,7 @@ fn open_path_or_url(
             }
             .into_py(py),
             _ => {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!("Invalid file type. Must be either a bigWig (.bigWig, .bw) or bigBed (.bigBed, .bb).")));
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!("Invalid file type. Must be either a bigWig (.bigWig, .bw) or bigBed (.bigBed, .bb).")));
             }
         }
     } else {
@@ -2013,8 +2020,8 @@ fn open_path_or_url(
             match Url::parse(&path_url_or_file_like) {
                 Ok(_) => {}
                 Err(_) => {
-                    return Err(PyErr::new::<exceptions::PyException, _>(format!(
-                        "Invalid file path. The file does not exists and it is not a url."
+                    return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
+                        "Invalid file path. The file does not exist and it is not a url."
                     )))
                 }
             }
@@ -2028,7 +2035,7 @@ fn open_path_or_url(
                         }
                         .into_py(py),
                         Err(_) => {
-                            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                            return Err(PyErr::new::<BBIReadError, _>(format!(
                                 "Error opening bigWig."
                             )))
                         }
@@ -2041,7 +2048,7 @@ fn open_path_or_url(
                         }
                         .into_py(py),
                         Err(_) => {
-                            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                            return Err(PyErr::new::<BBIReadError, _>(format!(
                                 "Error opening bigWig."
                             )))
                         }
@@ -2049,7 +2056,7 @@ fn open_path_or_url(
 
                     #[cfg(not(feature = "remote"))]
                     {
-                        return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                        return Err(PyErr::new::<exceptions::PyOSError, _>(format!(
                             "Builtin support for remote files is not supported on this platform."
                         )));
                     }
@@ -2063,7 +2070,7 @@ fn open_path_or_url(
                         }
                         .into_py(py),
                         Err(_) => {
-                            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                            return Err(PyErr::new::<BBIReadError, _>(format!(
                                 "Error opening bigBed."
                             )))
                         }
@@ -2076,7 +2083,7 @@ fn open_path_or_url(
                         }
                         .into_py(py),
                         Err(_) => {
-                            return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                            return Err(PyErr::new::<BBIReadError, _>(format!(
                                 "Error opening bigBed."
                             )))
                         }
@@ -2084,14 +2091,14 @@ fn open_path_or_url(
 
                     #[cfg(not(feature = "remote"))]
                     {
-                        return Err(PyErr::new::<exceptions::PyException, _>(format!(
+                        return Err(PyErr::new::<exceptions::PyOSError, _>(format!(
                             "Builtin support for remote files is not supported on this platform."
                         )));
                     }
                 }
             }
             _ => {
-                return Err(PyErr::new::<exceptions::PyException, _>(format!("Invalid file type. Must be either a bigWig (.bigWig, .bw) or bigBed (.bigBed, .bb).")));
+                return Err(PyErr::new::<exceptions::PyValueError, _>(format!("Invalid file type. Must be either a bigWig (.bigWig, .bw) or bigBed (.bigBed, .bb).")));
             }
         }
     };
@@ -2103,6 +2110,7 @@ fn open_path_or_url(
 fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
     m.add("BBIFileClosed", m.py().get_type::<BBIFileClosed>())?;
+    m.add("BBIReadError", m.py().get_type::<BBIReadError>())?;
 
     m.add_wrapped(wrap_pyfunction!(open))?;
 

From a555056fe28e6406e4fb12cb97ec31a9ebcf149c Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Wed, 10 Apr 2024 01:24:56 -0400
Subject: [PATCH 16/30] test: Refactor tests

Update test_bigwig tests

Linting

Update tests

Update tests

Add bigbed fixture

Update tests

Add test for info method
---
 pybigtools/tests/test_api.py    | 338 ++++++++++++++++++++++++--------
 pybigtools/tests/test_bigwig.py |  53 ++---
 2 files changed, 288 insertions(+), 103 deletions(-)

diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 2cf004a..4d3f29b 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -1,111 +1,291 @@
-import pybigtools
 import math
+import pathlib
+from io import BytesIO
+
 import numpy as np
+import pytest
+
+import pybigtools
+
+TEST_DIR = pathlib.Path(__file__).parent
+REPO_ROOT = TEST_DIR.parent.parent
+
+
+def test_open_close():
+    path = str(REPO_ROOT / "bigtools/resources/test/valid.bigWig")
+    b = pybigtools.open(path, "r")
+    b.close()
+    assert pytest.raises(pybigtools.BBIFileClosed, b.chroms)
+
+    # Files are closed when exiting a context manager
+    with pybigtools.open(path, "r") as b:
+        pass
+    assert pytest.raises(pybigtools.BBIFileClosed, b.chroms)
+
+    # Invalid file-like object
+    s = BytesIO()
+    assert pytest.raises(pybigtools.BBIReadError, pybigtools.open, s, "r")
+
+
+@pytest.fixture
+def bw():
+    path = str(REPO_ROOT / "bigtools/resources/test/valid.bigWig")
+    return pybigtools.open(path, "r")
+
+
+@pytest.fixture
+def bb():
+    path = str(TEST_DIR / "data/bigBedExample.bb")
+    return pybigtools.open(path, "r")
+
+
+def test_check_filetype(bw, bb):
+    assert not bw.is_bigbed
+    assert bw.is_bigwig
 
-def test_api():
-    b = pybigtools.open('../bigtools/resources/test/valid.bigWig', 'r')
+    assert bb.is_bigbed
+    assert not bb.is_bigwig
 
-    # Check type of file
-    assert b.is_bigbed == False
-    assert b.is_bigwig == True
 
+def test_info(bw):
+    assert bw.info() == {
+        'version': 4,
+        'isCompressed': True,
+        'primaryDataSize': 603305,
+        'zoomLevels': 10,
+        'chromCount': 1,
+        'summary': {
+            'basesCovered': 137894,
+            'sum': 89001714.97238067,
+            'mean': 645.4357330440822,
+            'min': 0.006219999864697456,
+            'max': 14254.0,
+            'std': 751.0146556298351
+        },
+    }
+
+
+def test_chroms(bw, bb):
     # No args => dict
-    assert b.chroms() == {'chr17': 83257441}
+    assert bw.chroms() == {"chr17": 83_257_441}
+    assert bb.chroms() == {"chr21": 48_129_895}
+
     # Arg with chrom name => length
-    assert b.chroms('chr17') == 83257441
+    assert bw.chroms("chr17") == 83257441
+    assert bb.chroms("chr21") == 48129895
+
     # Missing chrom => None
-    assert b.chroms('chr11') == None
+    assert bw.chroms("chr11") is None
+    assert bb.chroms("chr11") is None
+
 
+def test_zooms(bw, bb):
     # Get a list of zooms
-    assert b.zooms() == [10, 40, 160, 640, 2560, 10240, 40960, 163840, 655360, 2621440]
+    assert bw.zooms() == [10, 40, 160, 640, 2560, 10240, 40960, 163840, 655360, 2621440]
+    assert bb.zooms() == [3911, 39110, 391100, 3911000, 39110000]
 
+
+def test_autosql(bw, bb):
     # Even bigwigs have sql (a sql representing bedGraph)
-    assert "bedGraph" in b.sql()
+    assert "bedGraph" in bw.sql()
     # We can parse the sql
-    assert b.sql(True)['name'] == 'bedGraph'
+    # assert bw.sql(True)['name'] == 'bedGraph'
+
+    # bb.sql()
+    # BBIReadError: The file was invalid: Invalid autosql: not UTF-8
 
+
+def test_records(bw, bb):
     # (chrom, None, None) => all records on chrom
-    assert len(list(b.records('chr17'))) == 100000
+    assert len(list(bw.records("chr17"))) == 100_000
+    assert len(list(bb.records("chr21"))) == 14_810
+
     # (chrom, start, None) => all records from (start, <chrom_end>)
-    assert len(list(b.records('chr17', 100000))) == 91360
+    assert len(list(bw.records("chr17", 100_000))) == 91_360
+    assert len(list(bb.records("chr21", 10_000_000))) == 14_799
+
     # (chrom, start, end) => all records from (start, end)
-    assert len(list(b.records('chr17', 100000, 110000))) == 1515
+    assert len(list(bw.records("chr17", 100_000, 110_000))) == 1515
+    assert len(list(bb.records("chr21", 10_000_000, 20_000_000))) == 233
+
     # Out of bounds start/end are truncated
-    assert len(list(b.records('chr17', -1000, 100000))) == 8641
-    assert len(list(b.records('chr17', -1000, -500))) == 0
-    assert len(list(b.records('chr17', 0, 84000000))) == 100000
-    assert next(b.records('chr17')) == (59898, 59900, 0.06791999936103821)
+    x = list(bw.records("chr17", 0, 100_000))
+    assert len(x) == 8641
+    assert list(bw.records("chr17", -1000, 100_000)) == x
+    x = list(bb.records("chr21", 0, 10_000_000))
+    assert len(x) == 11
+    assert list(bb.records("chr21", -1000, 10_000_000)) == x
+
+    y = list(bw.records("chr17", 0, bw.chroms("chr17")))
+    assert len(y) == 100_000
+    assert list(bw.records("chr17", 0, bw.chroms("chr17") * 2)) == y
+    assert next(bw.records("chr17")) == (59898, 59900, 0.06791999936103821)
+    y = list(bb.records("chr21", 0, bb.chroms("chr21")))
+    assert len(y) == 14810
+    assert list(bb.records("chr21", 0, bb.chroms("chr21") * 2)) == y
+    assert next(bb.records("chr21")) == (9434178, 9434609)
+
+    # Fully out of bounds ranges return no records
+    assert len(list(bw.records("chr17", -1000, -500))) == 0
+    assert len(list(bw.records("chr17", 83_257_441, 84_000_000))) == 0
+
+    assert len(list(bb.records("chr21", -1000, -500))) == 0
+    assert len(list(bb.records("chr21", 48_129_895, 49_000_000))) == 0
+
     # Unknown chrom  => exception
-    try:
-        b.zoom_records('chr11')
-    except:
-        pass
-    else:
-        assert False
+    assert pytest.raises(KeyError, bw.records, "chr11")
+    assert pytest.raises(KeyError, bb.records, "chr11")
+
 
+def test_zoom_records(bw, bb):
     # (chrom, None, None) => all records on chrom
-    assert len(list(b.zoom_records(10, 'chr17'))) == 13811
+    assert len(list(bw.zoom_records(10, "chr17"))) == 13811
+    assert len(list(bb.zoom_records(3911, "chr21"))) == 1676
+
     # (chrom, start, None) => all records from (start, <chrom_end>)
-    assert len(list(b.zoom_records(10, 'chr17', 100000))) == 10872
+    assert len(list(bw.zoom_records(10, "chr17", 100_000))) == 10872
+    assert len(list(bb.zoom_records(3911, "chr21", 10_000_000))) == 1670
+
     # (chrom, start, end) => all records from (start, end)
-    assert len(list(b.zoom_records(10, 'chr17', 100000, 110000))) == 766
+    assert len(list(bw.zoom_records(10, "chr17", 100_000, 110_000))) == 766
+    assert len(list(bb.zoom_records(3911, "chr21", 10_000_000, 20_000_000))) == 154
+
     # Out of bounds start/end are truncated
-    assert len(list(b.zoom_records(10, 'chr17', -1000, 100000))) == 2940
-    assert len(list(b.zoom_records(10, 'chr17', -1000, -500))) == 0
-    assert len(list(b.zoom_records(10, 'chr17', 0, 84000000))) == 13811
-    assert next(b.zoom_records(10, 'chr17', 0, 100000)) == (59898, 59908, {'total_items': 0, 'bases_covered': 10, 'min_val': 0.06791999936103821, 'max_val': 0.16627000272274017, 'sum': 1.4660000801086426, 'sum_squares': 0.2303919643163681})
-    assert next(b.zoom_records(160, 'chr17', 0, 100000)) == (59898, 60058, {'total_items': 0, 'bases_covered': 160, 'min_val': 0.06791999936103821, 'max_val': 0.8688300251960754, 'sum': 101.3516616821289, 'sum_squares': 80.17473602294922})
+    x = list(bw.zoom_records(10, "chr17", 0, 100_000))
+    assert len(x) == 2940
+    assert list(bw.zoom_records(10, "chr17", -1000, 100_000)) == x
+    x = list(bb.zoom_records(3911, "chr21", 0, 10_000_000))
+    assert len(x) == 6
+    assert list(bb.zoom_records(3911, "chr21", -1000, 10_000_000)) == x
+
+    y = list(bw.zoom_records(10, "chr17", 0, bw.chroms("chr17")))
+    assert len(y) == 13811
+    assert list(bw.zoom_records(10, "chr17", 0, bw.chroms("chr17") * 2)) == y
+    y = list(bb.zoom_records(3911, "chr21", 0, bb.chroms("chr21")))
+    assert len(y) == 1676
+    assert list(bb.zoom_records(3911, "chr21", 0, bb.chroms("chr21") * 2)) == y
+
+    # Fully out of bounds ranges return no records
+    assert len(list(bw.zoom_records(10, "chr17", -1000, -500))) == 0
+    assert len(list(bw.zoom_records(10, "chr17", 83_257_441, 84_000_000))) == 0
+    assert len(list(bb.zoom_records(3911, "chr21", -1000, -500))) == 0
+    assert len(list(bb.zoom_records(3911, "chr21", 48_129_895, 49_000_000))) == 0
+
+    assert next(bw.zoom_records(10, "chr17", 0, 100_000)) == (
+        59898,
+        59908,
+        {
+            "total_items": 0,
+            "bases_covered": 10,
+            "min_val": 0.06791999936103821,
+            "max_val": 0.16627000272274017,
+            "sum": 1.4660000801086426,
+            "sum_squares": 0.2303919643163681,
+        },
+    )
+    assert next(bw.zoom_records(160, "chr17", 0, 100000)) == (
+        59898,
+        60058,
+        {
+            "total_items": 0,
+            "bases_covered": 160,
+            "min_val": 0.06791999936103821,
+            "max_val": 0.8688300251960754,
+            "sum": 101.3516616821289,
+            "sum_squares": 80.17473602294922,
+        },
+    )
+
     # Unknown zoom  => exception
-    try:
-        b.zoom_records(0, 'chr17')
-    except:
-        pass
-    else:
-        assert False
+    assert pytest.raises(KeyError, bw.zoom_records, 0, "chr17")
+    assert pytest.raises(KeyError, bb.zoom_records, 0, "chr21")
+
     # Unknown chrom  => exception
-    try:
-        b.zoom_records(10, 'chr11')
-    except:
-        pass
-    else:
-        assert False
-
-    assert len(list(b.values('chr17', 100000, 110000))) == 10000
-    assert len(list(b.values('chr17', 100000, 110000, 10))) == 10
-    assert b.values('chr17', 100000, 110000, 10)[0] == 0.37435242314338685
-    assert b.values('chr17', 100000, 110000, 10, 'max')[0] == 1.1978399753570557
-    assert b.values('chr17', 100000, 110000, 10, 'min')[0] == 0.05403999984264374
-    assert b.values('chr17', 100000, 110000, 10, 'mean',  True)[0] == 0.37885534041374924
-    assert list(b.values('chr17', 59890, 59900, 10, 'mean', True)) == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06791999936103821, 0.06791999936103821]
-    assert list(b.values('chr17', 59890, 59900, 10, 'mean', True, -1.0)) == [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.06791999936103821, 0.06791999936103821]
-    assert math.isnan(b.values('chr17', -10, 10, 20, 'mean', True, 0.0)[0])
-    assert not math.isnan(b.values('chr17', -10, 10, 20, 'mean', True, 0.0)[19])
-    assert b.values('chr17', -10, 10, 20, 'mean', True, 0.0, 0.0)[0] == 0.0
-    arr = np.zeros(20)
-    ret_arr = b.values('chr17', -10, 10, 20, 'mean', True, 0.0, np.nan, arr)
+    assert pytest.raises(KeyError, bw.zoom_records, 10, "chr11")
+    assert pytest.raises(KeyError, bb.zoom_records, 3911, "chr11")
+
+
+def test_values(bw, bb):
+    assert len(bw.values("chr17", 100_000, 110_000)) == 10_000
+    assert len(bb.values("chr21", 10_148_000, 10_158_000)) == 10_000
+
+    assert len(bw.values("chr17", 100000, 110000, 10)) == 10
+    assert len(bb.values("chr21", 10_148_000, 10_158_000, 10)) == 10
+
+    assert bw.values("chr17", 100000, 110000, 10)[0] == 0.37435242314338685
+    assert bb.values("chr21", 10_148_000, 10_158_000, 10)[0] == 0.175
+
+    assert bw.values("chr17", 100000, 110000, 10, "max")[0] == 1.1978399753570557
+    assert bb.values("chr21", 10_148_000, 10_158_000, 10, "max")[0] == 1.0
+
+    assert bw.values("chr17", 100000, 110000, 10, "min")[0] == 0.05403999984264374
+    assert bb.values("chr21", 10_148_000, 10_158_000, 10, "min")[0] == 0.0
+
+    assert (
+        bw.values("chr17", 100000, 110000, 10, "mean", exact=True)[0]
+        == 0.37885534041374924
+    )
+    assert (
+        bb.values("chr21", 10_148_000, 10_158_000, 10, "mean", exact=True)[0]
+        == 0.175
+    )
+
+    assert list(bw.values("chr17", 59890, 59900, 10, "mean", exact=True)) == [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.06791999936103821,
+        0.06791999936103821,
+    ]
+
+    assert list(
+        bw.values("chr17", 59890, 59900, 10, "mean", exact=True, missing=-1.0)
+    ) == [
+        -1.0,
+        -1.0,
+        -1.0,
+        -1.0,
+        -1.0,
+        -1.0,
+        -1.0,
+        -1.0,
+        0.06791999936103821,
+        0.06791999936103821,
+    ]
+
+    x = bw.values("chr17", -10, 10, 20, "mean", exact=True, missing=0.0)
+    assert math.isnan(x[0])
+    assert not math.isnan(x[19])
+    x = bb.values("chr21", -10, 10, 20, "mean", exact=True, missing=0.0)
+    assert math.isnan(x[0])
+    assert not math.isnan(x[19])
+
+    x = bw.values("chr17", -10, 10, 20, "mean", exact=True, missing=0.0, oob=0.0)
+    assert x[0] == 0.0
+    x = bb.values("chr21", -10, 10, 20, "mean", exact=True, missing=0.0, oob=0.0)
+    assert x[0] == 0.0
+
     # The returned array is the same as the one passed, so both show the same values
+    arr = np.zeros(20)
+    ret_arr = bw.values(
+        "chr17", -10, 10, 20, "mean", exact=True, missing=0.0, oob=np.nan, arr=arr
+    )
     assert math.isnan(arr[0])
     assert arr[19] == 0.0
     assert math.isnan(ret_arr[0])
     assert ret_arr[19] == 0.0
+    assert np.array_equal(arr, ret_arr, equal_nan=True)
 
-    b.close()
-    # Closing means file is not usable
-    try:
-        b.chroms()
-    except:
-        pass
-    else:
-        assert False
-
-    b = pybigtools.open('../bigtools/resources/test/valid.bigWig', 'r')
-    # Files are closed when exiting a context manager
-    with b:
-      pass
-    try:
-        b.chroms()
-    except:
-        pass
-    else:
-        assert False
-
+    ret_arr = bb.values(
+        "chr21", -10, 10, 20, "mean", exact=True, missing=0.0, oob=np.nan, arr=arr
+    )
+    assert math.isnan(arr[0])
+    assert arr[19] == 0.0
+    assert math.isnan(ret_arr[0])
+    assert ret_arr[19] == 0.0
+    assert np.array_equal(arr, ret_arr, equal_nan=True)
diff --git a/pybigtools/tests/test_bigwig.py b/pybigtools/tests/test_bigwig.py
index 95ff918..ff3191b 100644
--- a/pybigtools/tests/test_bigwig.py
+++ b/pybigtools/tests/test_bigwig.py
@@ -1,40 +1,45 @@
+import math
 import os
+import pathlib
+import urllib.request
 
-def install_dependencies():
-    import urllib.request
-    if not os.path.exists('./ENCFF667CZO.bigWig'):
-        print("Downloading test bigWig")
-        urllib.request.urlretrieve('https://www.encodeproject.org/files/ENCFF667CZO/@@download/ENCFF667CZO.bigWig', './ENCFF667CZO.bigWig')
+import pybigtools
 
-print("Testing")
-# Imports for test
-import math
+TEST_DIR = pathlib.Path(__file__).parent
 
-# Test
-import pybigtools
 
-def test_bigwig_write_read(tmp_path):
-    install_dependencies()
-    # bigWig read
-    b = pybigtools.open('./ENCFF667CZO.bigWig')
+def retrieve_encode_file(accession, filetype):
+    file_url = f"https://www.encodeproject.org/files/{accession}/@@download/{accession}.{filetype}"
+    path = TEST_DIR / f"data/{accession}.{filetype}"
+    if not path.exists():
+        urllib.request.urlretrieve(file_url, path)
+    return path
+
 
-    i = b.intervals("chr1")
+def test_bigwig_read():
+    path = retrieve_encode_file("ENCFF667CZO", "bigWig")
+    b = pybigtools.open(str(path))
+
+    i = b.records("chr1")
     n = next(i)
     assert n[0] == 10495
     assert n[1] == 10545
     assert math.isclose(n[2], 0.01591, abs_tol=0.00001)
 
-    i = b.intervals("chr12")
+    i = b.records("chr12")
     c = 0
     for _ in i:
         c += 1
     assert c == 1028747
 
-    # bigWig write
-    chroms = ['chr1', 'chr2', 'chr3']
-    clengths = { 'chr1': 10000, 'chr2': 8000, 'chr3': 6000 }
+
+def test_bigwig_write(tmpdir):
+    chroms = ["chr1", "chr2", "chr3"]
+    clengths = {"chr1": 10000, "chr2": 8000, "chr3": 6000}
+
     def genintervals():
         import random
+
         for chrom in chroms:
             clength = clengths[chrom]
             current = random.randint(0, 300)
@@ -49,20 +54,20 @@ def genintervals():
                 start = end + random.randint(20, 50)
 
     intervals = list(genintervals())
-    b = pybigtools.open('./test.bigWig', 'w')
+    b = pybigtools.open(os.path.join(tmpdir, "test.bigWig"), "w")
     b.write(clengths, iter(intervals))
     # If didn't need to test, could also be done like
-    #b.write(clengths, genintervals())
+    # b.write(clengths, genintervals())
 
-    b = pybigtools.open('./test.bigWig')
+    b = pybigtools.open(os.path.join(tmpdir, "test.bigWig"))
     i = []
     for chrom in chroms:
-        i.extend(list(b.intervals(chrom)))
+        i.extend(list(b.records(chrom)))
     c = 0
     for _ in i:
         c += 1
     assert c == len(intervals)
-    for a,b in zip(i, intervals):
+    for a, b in zip(i, intervals):
         assert a[0] == b[1]
         assert a[1] == b[2]
         assert math.isclose(a[2], b[3], abs_tol=0.00001)

From ac2b9aab654c3a9c3d12bfeb572fb2e32074639a Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Wed, 10 Apr 2024 13:09:52 -0400
Subject: [PATCH 17/30] docs: Numpy-style docstrings for methods

Update docstrings
---
 pybigtools/src/lib.rs | 370 +++++++++++++++++++++++++++++++-----------
 1 file changed, 272 insertions(+), 98 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index b22393e..aff15eb 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -933,7 +933,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     Ok(())
 }
 
-/// This class is the interface for reading a bigWig or bigBed.
+/// Interface for reading a BigWig or BigBed file.
 #[pyclass(module = "pybigtools")]
 struct BBIRead {
     bbi: BBIReadRaw,
@@ -981,6 +981,7 @@ impl BBIRead {
         }
     }
 
+    /// Return a dict of information about the BBI file.
     fn info(&mut self, py: Python<'_>) -> PyResult<PyObject> {
         let (info, summary) = match &mut self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
@@ -1043,6 +1044,8 @@ impl BBIRead {
         Ok(info)
     }
 
+    /// Return a list of sizes in bases of the summary intervals used in each
+    /// of the zoom levels (i.e. reduction levels) of the BBI file.
     fn zooms(&self) -> PyResult<Vec<u32>> {
         let zooms = match &self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
@@ -1058,20 +1061,38 @@ impl BBIRead {
         Ok(zooms.iter().map(|z| z.reduction_level).collect())
     }
 
-    /// Returns the autosql of this bbi file.
+    /// Return the autoSql schema definition of this BBI file.
     ///
-    /// For bigBeds, this comes directly from the autosql stored in the file.
-    /// For bigWigs, the autosql returned matches that of a bedGraph file.
+    /// For BigBeds, this schema comes directly from the autoSql string stored
+    /// in the file. For BigWigs, the schema generated describes a bedGraph
+    /// file.
     ///
-    /// By default, the autosql is returned as a string. Passing `parse = true`
-    /// returns instead a dictionary of the format:
-    /// ```
-    /// {
-    ///   "name": <declared name>,
-    ///   "comment": <declaration coment>,
-    ///   "fields": [(<field name>, <field type>, <field comment>), ...],
-    /// }
-    /// ```
+    /// Parameters
+    /// ----------
+    /// parse : bool, optional [default: False]
+    ///     If True, return the schema as a dictionary. If False, return the
+    ///     schema as a string. Default is False.
+    ///
+    /// Returns
+    /// -------
+    /// schema : str or dict
+    ///     The autoSql schema of the BBI file. If `parse` is True, the schema
+    ///     is returned as a dictionary of the format:
+    ///
+    ///     ```
+    ///     {
+    ///         "name": <declared name>,
+    ///         "comment": <declaration coment>,
+    ///         "fields": [(<field name>, <field type>, <field comment>), ...],
+    ///     }
+    ///     ```
+    ///
+    /// See Also
+    /// --------
+    /// is_bigwig : Check if the BBI file is a bigWig.
+    /// is_bigbed : Check if the BBI file is a bigBed.
+    /// info : Get information about the BBI file.
+    /// zooms : Get the zoom levels of the BBI file.
     #[pyo3(signature = (parse = false))]
     fn sql(&mut self, py: Python, parse: bool) -> PyResult<PyObject> {
         pub const BEDGRAPH: &str = r#"table bedGraph
@@ -1126,19 +1147,36 @@ impl BBIRead {
         Ok(obj)
     }
 
-    /// Returns the records of a given range on a chromosome.
+    /// Return the records of a given range on a chromosome.
     ///
-    /// The result is an iterator of tuples. For bigWigs, these tuples are in
-    /// the format (start: int, end: int, value: float). For bigBeds, these
+    /// The result is an iterator of tuples. For BigWigs, these tuples are in
+    /// the format (start: int, end: int, value: float). For BigBeds, these
     /// tuples are in the format (start: int, end: int, ...), where the "rest"
     /// fields are split by whitespace.
     ///
-    /// Missing values in bigWigs will results in non-contiguous records.
+    /// Parameters
+    /// ----------
+    /// chrom : str
+    ///     Name of the chromosome.
+    /// start, end : int, optional
+    ///     The range to get values for. If end is not provided, it defaults to
+    ///     the length of the chromosome. If start is not provided, it defaults
+    ///     to the beginning of the chromosome.
+    ///
+    /// Returns
+    /// -------
+    /// Iterator[tuple[int, int, float] or tuple[int, int, ...]]
+    ///     An iterator of tuples in the format (start: int, end: int, value:
+    ///     float) for BigWigs, or (start: int, end: int, *rest) for BigBeds.
+    ///
+    /// Notes
+    /// -----
+    /// Missing values in BigWigs will results in non-contiguous records.
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.
-    ///  If end is not provided, it defaults to the length of the chromosome.
-    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    /// See Also
+    /// --------
+    /// zoom_records : Get the zoom records of a given range on a chromosome.
+    /// values : Get the values of a given range on a chromosome.
     fn records(
         &mut self,
         py: Python<'_>,
@@ -1196,15 +1234,52 @@ impl BBIRead {
         }
     }
 
-    /// Returns the zoom records of a given range on a chromosome for a given zoom level.
+    /// Return the zoom records of a given range on a chromosome for a given
+    /// zoom level.
     ///
     /// The result is an iterator of tuples. These tuples are in the format
     /// (start: int, end: int, summary: dict).
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.
-    ///  If end is not provided, it defaults to the length of the chromosome.
-    ///  If start is not provided, it defaults to the beginning of the chromosome.
+    /// Parameters
+    /// ----------
+    /// reduction_level : int
+    ///     The zoom level to use, as a resolution in bases. Use the ``zooms``
+    ///     method to get a list of available zoom levels.
+    /// chrom : str
+    ///     Name of the chromosome.
+    /// start, end : int, optional
+    ///     The range to get values for. If end is not provided, it defaults
+    ///     to the length of the chromosome. If start is not provided, it
+    ///     defaults to the beginning of the chromosome.
+    ///
+    /// Returns
+    /// -------
+    /// Iterator[tuple[int, int, dict]]
+    ///     An iterator of tuples in the format (start: int, end: int,
+    ///     summary: dict).
+    ///
+    /// Notes
+    /// -----
+    /// The summary dictionary contains the following keys
+    ///
+    /// - ``total_items``: The number of items in the interval.
+    /// - ``bases_covered``: The number of bases covered by the interval.
+    /// - ``min_val``: The minimum value in the interval.
+    /// - ``max_val``: The maximum value in the interval.
+    /// - ``sum``: The sum of all values in the interval.
+    /// - ``sum_squares``: The sum of the squares of all values in the interval.
+    ///
+    /// For BigWigs, the summary statistics are derived from the unique
+    /// **signal values** associated with each base in the interval.
+    ///
+    /// For BigBeds, the summary statistics instead are derived from the
+    /// **number of BED intervals** overlapping each base in the interval.
+    ///
+    /// See Also
+    /// --------
+    /// zooms : Get a list of available zoom levels.
+    /// records : Get the records of a given range on a chromosome.
+    /// values : Get the values of a given range on a chromosome.
     fn zoom_records(
         &mut self,
         reduction_level: u32,
@@ -1274,21 +1349,66 @@ impl BBIRead {
         }
     }
 
-    /// Returns the values of a given range on a chromosome.
+    /// Return the values of a given range on a chromosome as a numpy array.
+    ///
+    /// For BigWigs, the returned values or summary statistics are derived
+    /// from the unique **signal values** associated with each base.
+    ///
+    /// For BigBeds, the returned values or summary statistics instead are
+    /// derived from the **number of BED intervals** overlapping each base.
     ///
-    /// For bigWigs, the result is an array of length (end - start).
-    /// If a value does not exist in the bigwig for a specific base, it will be nan.
+    /// Parameters
+    /// ----------
+    /// chrom : str
+    ///     Name of the chromosome.  
+    /// start, end : int, optional
+    ///     The range to get values for. If end is not provided, it defaults
+    ///     to the length of the chromosome. If start is not provided, it
+    ///     defaults to the beginning of the chromosome.
+    /// bins : int, optional
+    ///     If provided, the query interval will be divided into equally spaced
+    ///     bins and the values in each bin will be interpolated or summarized.
+    ///     If not provided, the values will be returned for each base.
+    /// summary : Literal["mean", "min", "max"], optional [default: "mean"]
+    ///     The summary statistic to use. Currently supported statistics are
+    ///     ``mean``, ``min``, and ``max``.
+    /// exact : bool, optional [default: False]
+    ///     If True and ``bins`` is specified, return exact summary statistic
+    ///     values instead of interpolating from the optimal zoom level.
+    ///     Default is False.
+    /// missing : float, optional [default: 0.0]
+    ///     Fill-in value for unreported data in valid regions. Default is 0.
+    /// oob : float, optional [default: NaN]
+    ///     Fill-in value for out-of-bounds regions. Default is NaN.
+    /// arr : numpy.ndarray, optional
+    ///     If provided, the values will be written to this array or array
+    ///     view. The array must be of the correct size and type.
     ///
-    /// For bigBeds, the returned array instead represents a pileup of the count
-    /// of intervals overlapping each base.
+    /// Returns
+    /// -------
+    /// numpy.ndarray
+    ///     The signal values of the bigwig or bigbed in the specified range.
     ///
-    /// The chrom argument is the name of the chromosome.  
-    /// The start and end arguments denote the range to get values for.  
-    ///  If end is not provided, it defaults to the length of the chromosome.  
-    ///  If start is not provided, it defaults to the beginning of the chromosome.
-    /// The default oob value is `numpy.nan`.
+    /// Notes
+    /// -----
+    /// A BigWig file encodes a step function, and the value at
+    /// a base is given by the signal value of the unique interval that
+    /// contains that base.
     ///
-    /// This returns a numpy array.
+    /// A BigBed file encodes a collection of (possibly overlapping) intervals
+    /// which may or may not be associated with quantitative scores. The
+    /// "value" at given base used here summarizes the number of intervals
+    /// overlapping that base, not any particular score.
+    ///
+    /// If a number of bins is requested and ``exact`` is False, the summarized
+    /// data is interpolated from the closest available zoom level. If you
+    /// need accurate summary data and are okay with small trade-off in speed,
+    /// set ``exact`` to True.
+    ///
+    /// See Also
+    /// --------
+    /// records : Get the records of a given range on a chromosome.
+    /// zoom_records : Get the zoom records of a given range on a chromosome.
     #[pyo3(
         signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None),
         text_signature = r#"(chrom, start, end, bins=None, summary="mean", exact=False, missing=0.0, oob=..., arr=None)"#,
@@ -1341,12 +1461,19 @@ impl BBIRead {
         }
     }
 
-    /// Returns the chromosomes in a bigwig, and their lengths.  
+    /// Return the names of chromosomes in a BBI file and their lengths.  
     ///
-    /// The chroms argument can be either String or None.  
-    ///  If it is None, then all chroms will be returned.  
-    ///  If it is a String, then the length of that chromosome will be returned.  
-    ///  If the chromosome doesn't exist, nothing will be returned.  
+    /// Parameters
+    /// ----------
+    /// chrom : str or None
+    ///     The name of the chromosome to get the length of. If None, then a
+    ///     dictionary of all chromosome sizes will be returned. If the
+    ///     chromosome doesn't exist, returns None.
+    ///
+    /// Returns
+    /// -------
+    /// int or Dict[str, int] or None:
+    ///     Chromosome length or a dictionary of chromosome lengths.
     fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<Option<PyObject>> {
         fn get_chrom_obj<B: bigtools::BBIRead>(
             b: &B,
@@ -1383,25 +1510,38 @@ impl BBIRead {
     }
 
     /// Gets the average values from a bigWig over the entries of a bed file.
-    /// Raises an exception if the current file is a bigBed.
     ///
-    /// Parameters:
-    ///     bed (str): The path to the bed.
-    ///     names (None, bool, or int):  
-    ///         If `None`, then each return value will be a single `float`,
-    ///             the average value over an interval in the bed file.  
-    ///         If `True`, then each return value will be a tuple of the value of column 4
-    ///             and the average value over the interval with that name in the bed file.  
-    ///         If `False`, then each return value will be a tuple of the interval in the format
-    ///             `{chrom}:{start}-{end}` and the average value over that interval.  
-    ///         If `0`, then each return value will match as if `False` was passed.  
-    ///         If a `1+`, then each return value will be a tuple of the value of column of this
-    ///             parameter (1-based) and the average value over the interval.  
+    /// Parameters
+    /// ----------
+    /// bed : str
+    ///     The path to the bed.
+    /// names : bool or int, optional
+    ///     If ``None``, then each return value will be a single float, the
+    ///     average value over an interval in the bed file.  
+    ///     
+    ///     If ``True``, then each return value will be a tuple of the value of
+    ///     column 4 and the average value over the interval with that name in the
+    ///     bed file.  
+    ///     
+    ///     If ``False``, then each return value will be a tuple of the interval
+    ///     in the format ``{chrom}:{start}-{end}`` and the average value over
+    ///     that interval.  
+    ///     
+    ///     If ``0``, then each return value will match as if ``False`` was passed.
+    ///   
+    ///     If a ``1+``, then each return value will be a tuple of the value of
+    ///     column of this parameter (1-based) and the average value over the
+    ///     interval.  
+    ///
+    /// Returns
+    /// -------
+    /// Generator of float or tuple.
     ///
-    /// Returns:
-    ///     This returns a generator of values. (Therefore, to save to a list, do `list(bigWigAverageOverBed(...))`)  
-    ///     If no name is specified (see the `names` parameter above), then returns a generator of `float`s.  
-    ///     If a name column is specified (see above), then returns a generator of tuples `({name}, {average})`
+    /// Notes
+    /// -----
+    /// If no ``name`` field is specified, returns a generator of floats.  
+    /// If a ``name`` column is specified, returns a generator of tuples
+    /// ``({name}, {average})``.
     fn average_over_bed(
         &mut self,
         py: Python,
@@ -1530,9 +1670,10 @@ impl ZoomIntervalIterator {
     }
 }
 
-/// This class is an iterator for `Values` from a bigWig.  
-/// It returns only values that exist in the bigWig, skipping
-/// any missing intervals.
+/// An iterator for intervals in a bigWig.  
+///
+/// It returns only values that exist in the bigWig, skipping any missing
+/// intervals.
 #[pyclass(module = "pybigtools")]
 struct BigWigIntervalIterator {
     iter: Box<dyn Iterator<Item = Result<Value, _BBIReadError>> + Send>,
@@ -1553,7 +1694,7 @@ impl BigWigIntervalIterator {
     }
 }
 
-/// This class is an interator for the entries in a bigBed file.
+/// An iterator for the entries in a bigBed.
 #[pyclass(module = "pybigtools")]
 struct BigBedEntriesIterator {
     iter: Box<dyn Iterator<Item = Result<BedEntry, _BBIReadError>> + Send>,
@@ -1581,7 +1722,7 @@ impl BigBedEntriesIterator {
     }
 }
 
-/// This class is the interface for writing a bigWig.
+/// Interface for writing to a BigWig file.
 #[pyclass(module = "pybigtools")]
 struct BigWigWrite {
     bigwig: Option<BigWigWriteRaw>,
@@ -1589,11 +1730,24 @@ struct BigWigWrite {
 
 #[pymethods]
 impl BigWigWrite {
-    /// Writes the values passsed to the bigwig file.
-    /// The underlying file will be closed automatically when the function completes (and no other operations will be able to be performed).  
+    /// Write values to the BigWig file.
+    ///
+    /// The underlying file will be closed automatically when the function
+    /// completes (and no other operations will be able to be performed).
     ///
-    /// The chroms argument should be a dictionary with keys as chromosome names and values as their length.  
-    /// The vals argument should be an iterable with values (String, int, int, float) that represents each value to write in the format (chromosome, start, end, value).  
+    /// Parameters
+    /// ----------
+    /// chroms : Dict[str, int]
+    ///     A dictionary with keys as chromosome names and values as their
+    ///     length.
+    /// vals : Iterable[tuple[str, int, int, float]]
+    ///     An iterable with values that represents each value to write in the
+    ///     format (chromosome, start, end, value).
+    ///
+    /// Notes
+    /// -----
+    /// The underlying file will be closed automatically when the function
+    /// completes, and no other operations will be able to be performed.  
     fn write(&mut self, py: Python, chroms: &PyDict, vals: Py<PyAny>) -> PyResult<()> {
         let runtime = runtime::Builder::new_multi_thread()
             .worker_threads(
@@ -1709,18 +1863,17 @@ impl BigWigWrite {
         })
     }
 
-    /// close()
-    /// --
+    /// Close the file.
     ///
-    /// Manually closed the file.
-    /// No other operations will be allowed after it is closed. This is done automatically after write is performed.
+    /// No other operations will be allowed after it is closed. This is done
+    /// automatically after write is performed.
     fn close(&mut self) -> PyResult<()> {
         self.bigwig.take();
         Ok(())
     }
 }
 
-/// This class is the interface for writing to a bigBed.
+/// Interface for writing to a BigBed file.
 #[pyclass(module = "pybigtools")]
 struct BigBedWrite {
     bigbed: Option<BigBedWriteRaw>,
@@ -1728,10 +1881,25 @@ struct BigBedWrite {
 
 #[pymethods]
 impl BigBedWrite {
-    /// Writes the values passsed to the bigwig file. The underlying file will be closed automatically when the function completes (and no other operations will be able to be performed).  
-    /// The chroms argument should be a dictionary with keys as chromosome names and values as their length.  
-    /// The vals argument should be an iterable with values (String, int, int, String) that represents each value to write in the format (chromosome, start, end, rest)  
-    ///   The rest String should consist of tab-delimited fields.  
+    /// Write values to the BigBed file.
+    ///
+    /// The underlying file will be closed automatically when the function
+    /// completes (and no other operations will be able to be performed).
+    ///
+    /// Parameters
+    /// ----------
+    /// chroms : Dict[str, int]
+    ///     A dictionary with keys as chromosome names and values as their
+    ///     length.
+    /// vals : Iterable[tuple[str, int, int, str]]
+    ///     An iterable with values that represents each value to write in the
+    ///     format (chromosome, start, end, rest). The ``rest`` string should
+    ///     consist of tab-delimited fields.  
+    ///
+    /// Notes
+    /// -----
+    /// The underlying file will be closed automatically when the function
+    /// completes, and no other operations will be able to be performed.
     fn write(&mut self, py: Python, chroms: &PyDict, vals: Py<PyAny>) -> PyResult<()> {
         let runtime = runtime::Builder::new_multi_thread()
             .worker_threads(
@@ -1850,7 +2018,10 @@ impl BigBedWrite {
         })
     }
 
-    /// Manually closed the file. No other operations will be allowed after it is closed. This is done automatically after write is performed.
+    /// Close the file.
+    ///
+    /// No other operations will be allowed after it is closed. This is done
+    /// automatically after write is performed.
     fn close(&mut self) -> PyResult<()> {
         self.bigbed.take();
         Ok(())
@@ -1908,26 +2079,29 @@ impl BigWigAverageOverBedEntriesIterator {
     }
 }
 
-/// This is the entrypoint for working with bigWigs or bigBeds.
+/// Open a BigWig or BigBed file for reading or writing.
 ///
-/// The first argument can take one of three values:
-/// 1) A path to a file as a string
-/// 2) An http url for a remote file as a string
-/// 3) A file-like object with a `read` and `seek` method
+/// Parameters
+/// ----------
+/// path_url_or_file_like : str or file-like object
+///     The path to a file or an http url for a remote file as a string, or
+///     a Python file-like object with ``read`` and ``seek`` methods.
+/// mode : Literal["r", "w"], optional [default: "r"]
+///     The mode to open the file in. If not provided, it will default to read.
+///     "r" will open a bigWig/bigBed for reading but will not allow writing.
+///     "w" will open a bigWig/bigBed for writing but will not allow reading.
 ///
-/// When writing, only a file path can be used.
+/// Returns
+/// -------
+/// BigWigWrite or BigBedWrite or BBIRead
+///     The object for reading or writing the BigWig or BigBed file.
 ///
-/// The mode is either "w", "r", or None. "r" or None will open a
-/// bigWig or bigBed for reading (but will not allow writing).
-/// "w" Will open a bigWig/bigBed for writing (but will not allow reading).
+/// Notes
+/// -----
+/// For writing, only a file path is currently accepted.
 ///
-/// If passing a file-like object, simultaneous reading of different intervals
+/// If passing a file-like object, concurrent reading of different intervals
 /// is not supported and may result in incorrect behavior.
-///
-/// This returns one of the following:  
-/// - `BigWigWrite`
-/// - `BigBedWrite`
-/// - `BBIRead`
 #[pyfunction]
 fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> PyResult<PyObject> {
     let iswrite = match &mode {
@@ -2105,22 +2279,22 @@ fn open_path_or_url(
     Ok(res)
 }
 
-/// The base module for opening a bigWig or bigBed. The only defined function is `open`.
+/// Read and write Big Binary Indexed (BBI) file types: BigWig and BigBed.
 #[pymodule]
 fn pybigtools(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add("__version__", env!("CARGO_PKG_VERSION"))?;
-    m.add("BBIFileClosed", m.py().get_type::<BBIFileClosed>())?;
-    m.add("BBIReadError", m.py().get_type::<BBIReadError>())?;
 
     m.add_wrapped(wrap_pyfunction!(open))?;
 
+    m.add_class::<BBIRead>()?;
     m.add_class::<BigWigWrite>()?;
     m.add_class::<BigBedWrite>()?;
-    m.add_class::<BBIRead>()?;
-
     m.add_class::<BigWigIntervalIterator>()?;
     m.add_class::<BigBedEntriesIterator>()?;
 
+    m.add("BBIFileClosed", m.py().get_type::<BBIFileClosed>())?;
+    m.add("BBIReadError", m.py().get_type::<BBIReadError>())?;
+
     Ok(())
 }
 

From 7ee1ab7c48bd926ab0687f66881f505a4e8f5f3f Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Thu, 11 Apr 2024 19:45:55 -0400
Subject: [PATCH 18/30] feat: Make chrom not found raise a KeyError instead of
 returning None

---
 pybigtools/src/lib.rs        | 42 ++++++++++++++++++++++--------------
 pybigtools/tests/test_api.py |  6 +++---
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index aff15eb..4250886 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -25,7 +25,7 @@ use bigtools::utils::reopen::Reopen;
 use file_like::PyFileLikeObject;
 use numpy::ndarray::ArrayViewMut;
 use numpy::PyArray1;
-use pyo3::exceptions::{self, PyTypeError};
+use pyo3::exceptions::{self, PyKeyError, PyTypeError};
 use pyo3::types::{IntoPyDict, PyAny, PyDict, PyFloat, PyInt, PyIterator, PyString, PyTuple};
 use pyo3::{create_exception, wrap_pyfunction};
 use pyo3::{prelude::*, PyTraverseError, PyVisit};
@@ -1474,30 +1474,40 @@ impl BBIRead {
     /// -------
     /// int or Dict[str, int] or None:
     ///     Chromosome length or a dictionary of chromosome lengths.
-    fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<Option<PyObject>> {
+    fn chroms(&mut self, py: Python, chrom: Option<String>) -> PyResult<PyObject> {
         fn get_chrom_obj<B: bigtools::BBIRead>(
             b: &B,
             py: Python,
             chrom: Option<String>,
-        ) -> Option<PyObject> {
+        ) -> PyResult<PyObject> {
             match chrom {
-                Some(chrom) => b
-                    .chroms()
-                    .into_iter()
-                    .find(|c| c.name == chrom)
-                    .map(|c| c.length)
-                    .map(|c| c.to_object(py)),
-                None => Some(
-                    b.chroms()
+                Some(chrom) => {
+                    let chrom_length = b
+                        .chroms()
+                        .into_iter()
+                        .find(|c| c.name == chrom)
+                        .ok_or_else(|| {
+                            PyErr::new::<PyKeyError, _>(
+                                "No chromosome found with the specified name",
+                            )
+                        })
+                        .map(|c| c.length.to_object(py))?;
+                    Ok(chrom_length)
+                }
+                None => {
+                    let chrom_dict: PyObject = b
+                        .chroms()
                         .into_iter()
                         .map(|c| (c.name.clone(), c.length))
                         .into_py_dict(py)
-                        .into(),
-                ),
+                        .into();
+                    Ok(chrom_dict)
+                }
             }
         }
-        Ok(match &self.bbi {
-            BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
+
+        match &self.bbi {
+            BBIReadRaw::Closed => Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(b) => get_chrom_obj(b, py, chrom),
             #[cfg(feature = "remote")]
             BBIReadRaw::BigWigRemote(b) => get_chrom_obj(b, py, chrom),
@@ -1506,7 +1516,7 @@ impl BBIRead {
             #[cfg(feature = "remote")]
             BBIReadRaw::BigBedRemote(b) => get_chrom_obj(b, py, chrom),
             BBIReadRaw::BigBedFileLike(b) => get_chrom_obj(b, py, chrom),
-        })
+        }
     }
 
     /// Gets the average values from a bigWig over the entries of a bed file.
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 4d3f29b..bd0bb88 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -74,9 +74,9 @@ def test_chroms(bw, bb):
     assert bw.chroms("chr17") == 83257441
     assert bb.chroms("chr21") == 48129895
 
-    # Missing chrom => None
-    assert bw.chroms("chr11") is None
-    assert bb.chroms("chr11") is None
+    # Missing chrom => KeyError
+    pytest.raises(KeyError, bw.chroms, "chr11")
+    pytest.raises(KeyError, bb.chroms, "chr11")
 
 
 def test_zooms(bw, bb):

From 3f52238915859d117fb3c386b3e99a43486b577e Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Thu, 11 Apr 2024 20:57:35 -0400
Subject: [PATCH 19/30] feat: Support pathlib.Path as input to open

---
 pybigtools/src/lib.rs        | 7 +++++++
 pybigtools/tests/test_api.py | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 4250886..45ffaef 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -2131,6 +2131,13 @@ fn open(py: Python, path_url_or_file_like: PyObject, mode: Option<String>) -> Py
         return open_path_or_url(py, string_ref.to_str().unwrap().to_owned(), iswrite);
     }
 
+    // If pathlib.Path, convert to string and try to open
+    let path_class = py.import("pathlib")?.getattr("Path")?;
+    if path_url_or_file_like.as_ref(py).is_instance(path_class)? {
+        let path_str = path_url_or_file_like.as_ref(py).str()?.to_str()?;
+        return open_path_or_url(py, path_str.to_owned(), iswrite);
+    }
+
     if iswrite {
         return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
             "Writing only supports file names",
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index bd0bb88..76184d1 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -17,6 +17,12 @@ def test_open_close():
     b.close()
     assert pytest.raises(pybigtools.BBIFileClosed, b.chroms)
 
+    # Works with pathlib.Path
+    path = REPO_ROOT / "bigtools/resources/test/valid.bigWig"
+    b = pybigtools.open(path, "r")
+    b.close()
+    assert pytest.raises(pybigtools.BBIFileClosed, b.chroms)
+
     # Files are closed when exiting a context manager
     with pybigtools.open(path, "r") as b:
         pass

From 29684d67ce7db7b6d3dda192cecafd806a705286 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Fri, 12 Apr 2024 04:47:22 -0400
Subject: [PATCH 20/30] test: Add url and filelikes to tests

ci: Install pybigtools[test] for testing
---
 .github/workflows/ci.yml     |  4 ++--
 pybigtools/pyproject.toml    | 24 ++++++++++++++++++++
 pybigtools/tests/test_api.py | 44 +++++++++++++++++++++++++++++-------
 3 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 26baa88..7b25fa1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -56,9 +56,9 @@ jobs:
     
       - name: Install pybigtools
         run: |
-          pip install maturin pytest
+          pip install maturin
           cd pybigtools
-          pip install -e .
+          pip install -e .[test]
   
       - name: Install
         run: |
diff --git a/pybigtools/pyproject.toml b/pybigtools/pyproject.toml
index 562e4be..cd8225b 100644
--- a/pybigtools/pyproject.toml
+++ b/pybigtools/pyproject.toml
@@ -6,10 +6,34 @@ build-backend = "maturin"
 name = "pybigtools"
 description = "Python bindings to the Bigtools Rust library for high-performance BigWig and BigBed I/O"
 license = { text = "MIT" }
+keywords = [
+    "bigwig",
+    "bigbed",
+    "bbi",
+    "bioinformatics",
+    "genomics",
+    "kent",
+    "ucsc",
+    "rust",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Operating System :: OS Independent",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Programming Language :: Rust",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+]
 dependencies = [
     "numpy"
 ]
 
+[project.optional-dependencies]
+test = [
+    "pytest",
+    "smart_open[http]"
+]
+
 [project.urls]
 homepage = "https://github.com/jackh726/bigtools"
 documentation = "https://bigtools.readthedocs.io"
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 76184d1..28d1715 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import pytest
+import smart_open
 
 import pybigtools
 
@@ -17,12 +18,6 @@ def test_open_close():
     b.close()
     assert pytest.raises(pybigtools.BBIFileClosed, b.chroms)
 
-    # Works with pathlib.Path
-    path = REPO_ROOT / "bigtools/resources/test/valid.bigWig"
-    b = pybigtools.open(path, "r")
-    b.close()
-    assert pytest.raises(pybigtools.BBIFileClosed, b.chroms)
-
     # Files are closed when exiting a context manager
     with pybigtools.open(path, "r") as b:
         pass
@@ -33,6 +28,39 @@ def test_open_close():
     assert pytest.raises(pybigtools.BBIReadError, pybigtools.open, s, "r")
 
 
+def test_open_pathlib_path():
+    path = REPO_ROOT / "bigtools/resources/test/valid.bigWig"
+    with pybigtools.open(path, "r") as b:
+        assert b.chroms() == {"chr17": 83_257_441}
+
+
+def test_open_raw_url():
+    url = "http://genome.ucsc.edu/goldenPath/help/examples/bigLollyExample2.bb"
+    with pybigtools.open(url, "r") as b:
+        assert b.chroms() == {'chr21': 46_709_983}
+
+
+def test_open_filelike():
+    # Regular file
+    with open(REPO_ROOT / "bigtools/resources/test/valid.bigWig", "rb") as f:
+        with pybigtools.open(f, "r") as b:
+            assert b.chroms() == {"chr17": 83_257_441}
+
+    # BytesIO
+    with open(REPO_ROOT / "bigtools/resources/test/valid.bigWig", "rb") as f:
+        bw_bytes = f.read()
+
+    with BytesIO(bw_bytes) as f:
+        with pybigtools.open(f, "r") as b:
+            assert b.chroms() == {"chr17": 83_257_441}
+
+    # Other file-like objects
+    url = "http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw"
+    with smart_open.open(url, "rb") as f:
+        with pybigtools.open(f, "r") as b:
+            assert b.chroms() == {'chr21': 48_129_895}
+
+
 @pytest.fixture
 def bw():
     path = str(REPO_ROOT / "bigtools/resources/test/valid.bigWig")
@@ -77,8 +105,8 @@ def test_chroms(bw, bb):
     assert bb.chroms() == {"chr21": 48_129_895}
 
     # Arg with chrom name => length
-    assert bw.chroms("chr17") == 83257441
-    assert bb.chroms("chr21") == 48129895
+    assert bw.chroms("chr17") == 83_257_441
+    assert bb.chroms("chr21") == 48_129_895
 
     # Missing chrom => KeyError
     pytest.raises(KeyError, bw.chroms, "chr11")

From f46571ae9375df26501a00c9aff0595b613fb3f8 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Sat, 13 Apr 2024 10:57:50 -0400
Subject: [PATCH 21/30] Add doc string to custom exceptions

---
 pybigtools/src/lib.rs | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 45ffaef..93dcf17 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -37,8 +37,18 @@ mod file_like;
 
 type ValueTuple = (u32, u32, f32);
 
-create_exception!(pybigtools, BBIFileClosed, exceptions::PyException);
-create_exception!(pybigtools, BBIReadError, exceptions::PyException);
+create_exception!(
+    pybigtools,
+    BBIFileClosed,
+    exceptions::PyException,
+    "BBI File is closed."
+);
+create_exception!(
+    pybigtools,
+    BBIReadError,
+    exceptions::PyException,
+    "Error reading BBI file."
+);
 
 fn start_end(
     bbi: &BBIReadRaw,
@@ -209,7 +219,7 @@ fn intervals_to_array<R: BBIFileRead>(
         Some(bins) => {
             if v.len() != bins {
                 return Err(PyErr::new::<exceptions::PyValueError, _>(format!(
-                    "`arr` does not the expected size (expected `{}`, found `{}`), if passed.",
+                    "`arr` does not have the expected size (expected `{}`, found `{}`), if passed.",
                     bins,
                     v.len(),
                 )));
@@ -303,6 +313,7 @@ fn intervals_to_array<R: BBIFileRead>(
 
     Ok(arr)
 }
+
 fn entries_to_array<R: BBIFileRead>(
     py: Python<'_>,
     b: &mut BigBedReadRaw<R>,
@@ -458,6 +469,7 @@ fn to_array<I: Iterator<Item = Result<Value, _BBIReadError>>>(
     }
     Ok(())
 }
+
 fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
     start: i32,
     end: i32,
@@ -564,6 +576,7 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
     }
     Ok(())
 }
+
 fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     start: i32,
     end: i32,
@@ -681,6 +694,7 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     }
     Ok(())
 }
+
 fn to_entry_array<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     start: i32,
     end: i32,
@@ -700,6 +714,7 @@ fn to_entry_array<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     }
     Ok(())
 }
+
 fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     start: i32,
     end: i32,

From d04cc4cb1cd5592e89d1dbdc809c8a082586adf5 Mon Sep 17 00:00:00 2001
From: Nezar Abdennur <nabdennur@gmail.com>
Date: Sat, 13 Apr 2024 10:59:52 -0400
Subject: [PATCH 22/30] fix: Make chrom and end optional for values()

---
 pybigtools/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 93dcf17..15e1bdc 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -1425,7 +1425,7 @@ impl BBIRead {
     /// records : Get the records of a given range on a chromosome.
     /// zoom_records : Get the zoom records of a given range on a chromosome.
     #[pyo3(
-        signature = (chrom, start, end, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None),
+        signature = (chrom, start=None, end=None, bins=None, summary="mean".to_string(), exact=false, missing=0.0, oob=f64::NAN, arr=None),
         text_signature = r#"(chrom, start, end, bins=None, summary="mean", exact=False, missing=0.0, oob=..., arr=None)"#,
     )]
     fn values(

From 64dcb9f92487d1ed0f798c8ffecff5eb34bf26cc Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Mon, 8 Apr 2024 16:50:24 -0400
Subject: [PATCH 23/30] Fix test

---
 pybigtools/src/lib.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 15e1bdc..856e008 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -1117,8 +1117,7 @@ impl BBIRead {
     uint   chromStart;   "Start position in chromosome"
     uint   chromEnd;     "End position in chromosome"
     float  value;        "Value for a given interval"
-)\
-            "#;
+)"#;
         let schema = match &mut self.bbi {
             BBIReadRaw::Closed => return Err(BBIFileClosed::new_err("File is closed.")),
             BBIReadRaw::BigWigFile(_) | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),

From bff95bd4de0c02d00c07e4f006aab743f1c6d420 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Mon, 13 May 2024 10:00:27 -0400
Subject: [PATCH 24/30] Use nanmean instead of mean

---
 pybigtools/src/lib.rs        | 104 ++++++++++++++++++++---------------
 pybigtools/tests/test_api.py |   8 +--
 2 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 856e008..4695de2 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -482,7 +482,7 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
-    let mut bin_data: VecDeque<(usize, i32, i32, Option<f64>)> = VecDeque::new();
+    let mut bin_data: VecDeque<(usize, i32, i32, Option<(i32, f64)>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -498,13 +498,13 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
 
                 match summary {
                     Summary::Min => {
-                        v[bin] = front.3.unwrap_or(missing);
+                        v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
                     }
                     Summary::Max => {
-                        v[bin] = front.3.unwrap_or(missing);
+                        v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
                     }
                     Summary::Mean => {
-                        v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+                        v[bin] = front.3.map(|(c, v)| v / (c as f64)).unwrap_or(missing);
                     }
                 }
             } else {
@@ -534,13 +534,13 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
             assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
         }
         for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
-            let v = data.get_or_insert_with(|| {
+            let (c, v) = data.get_or_insert_with(|| {
                 match summary {
                     // min & max are defined for NAN and we are about to set it
                     // can't use 0.0 because it may be either below or above the real value
-                    Summary::Min | Summary::Max => f64::NAN,
+                    Summary::Min | Summary::Max => (0, f64::NAN),
                     // addition is not defined for NAN
-                    Summary::Mean => 0.0,
+                    Summary::Mean => (0, 0.0),
                 }
             });
             match summary {
@@ -555,6 +555,7 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
                     let overlap_end = (*bin_end).min(interval_end);
                     let overlap_size: i32 = overlap_end - overlap_start;
                     *v += (overlap_size as f64) * interval.value as f64;
+                    *c += overlap_size;
                 }
             }
         }
@@ -564,13 +565,13 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
 
         match summary {
             Summary::Min => {
-                v[bin] = front.3.unwrap_or(missing);
+                v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
             }
             Summary::Max => {
-                v[bin] = front.3.unwrap_or(missing);
+                v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
             }
             Summary::Mean => {
-                v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+                v[bin] = front.3.map(|(c, v)| v / (c as f64)).unwrap_or(missing);
             }
         }
     }
@@ -589,7 +590,8 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
-    let mut bin_data: VecDeque<(usize, i32, i32, Option<f64>)> = VecDeque::new();
+    // (bin, bin_start, bin_end, Option<(covered_bases, value)>)
+    let mut bin_data: VecDeque<(usize, i32, i32, Option<(i32, f64)>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -605,13 +607,13 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
 
                 match summary {
                     Summary::Min => {
-                        v[bin] = front.3.unwrap_or(missing);
+                        v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
                     }
                     Summary::Max => {
-                        v[bin] = front.3.unwrap_or(missing);
+                        v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
                     }
                     Summary::Mean => {
-                        v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+                        v[bin] = front.3.map(|(c, v)| v / (c as f64)).unwrap_or(missing);
                     }
                 }
             } else {
@@ -641,37 +643,37 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
             assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
         }
         for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+            let overlap_start = (*bin_start).max(interval_start);
+            let overlap_end = (*bin_end).min(interval_end);
+            let overlap_size: i32 = overlap_end - overlap_start;
             match data.as_mut() {
-                Some(v) => match summary {
+                Some((c, v)) => match summary {
                     Summary::Min => {
                         *v = v.min(interval.summary.min_val as f64);
+                        *c += overlap_size;
                     }
                     Summary::Max => {
                         *v = v.max(interval.summary.max_val as f64);
+                        *c += overlap_size;
                     }
                     Summary::Mean => {
-                        let overlap_start = (*bin_start).max(interval_start);
-                        let overlap_end = (*bin_end).min(interval_end);
-                        let overlap_size = overlap_end - overlap_start;
                         let zoom_mean =
                             (interval.summary.sum as f64) / (interval.summary.bases_covered as f64);
                         *v += (overlap_size as f64) * zoom_mean;
+                        *c += overlap_size;
                     }
                 },
                 None => match summary {
                     Summary::Min => {
-                        *data = Some(interval.summary.min_val as f64);
+                        *data = Some((overlap_size, interval.summary.min_val as f64));
                     }
                     Summary::Max => {
-                        *data = Some(interval.summary.max_val as f64);
+                        *data = Some((overlap_size, interval.summary.max_val as f64));
                     }
                     Summary::Mean => {
-                        let overlap_start = (*bin_start).max(interval_start);
-                        let overlap_end = (*bin_end).min(interval_end);
-                        let overlap_size = overlap_end - overlap_start;
                         let zoom_mean =
                             (interval.summary.sum as f64) / (interval.summary.bases_covered as f64);
-                        *data = Some((overlap_size as f64) * zoom_mean);
+                        *data = Some((overlap_size, (overlap_size as f64) * zoom_mean));
                     }
                 },
             }
@@ -682,13 +684,13 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
 
         match summary {
             Summary::Min => {
-                v[bin] = front.3.unwrap_or(missing);
+                v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
             }
             Summary::Max => {
-                v[bin] = front.3.unwrap_or(missing);
+                v[bin] = front.3.map(|v| v.1).unwrap_or(missing);
             }
             Summary::Mean => {
-                v[bin] = front.3.map(|v| v / bin_size).unwrap_or(missing);
+                v[bin] = front.3.map(|(c, v)| v / (c as f64)).unwrap_or(missing);
             }
         }
     }
@@ -727,7 +729,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
-    let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
+    let mut bin_data: VecDeque<(usize, i32, i32, i32, Vec<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -744,20 +746,23 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                 match summary {
                     Summary::Min => {
                         v[bin] = front
-                            .3
+                            .4
                             .into_iter()
                             .reduce(|min, v| min.min(v))
                             .unwrap_or(missing);
                     }
                     Summary::Max => {
                         v[bin] = front
-                            .3
+                            .4
                             .into_iter()
                             .reduce(|max, v| max.max(v))
                             .unwrap_or(missing);
                     }
                     Summary::Mean => {
-                        v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+                        v[bin] = (front.3 != 0)
+                            .then(|| front.3 as f64)
+                            .map(|c| front.4.into_iter().sum::<f64>() / c)
+                            .unwrap_or(missing);
                     }
                 }
             } else {
@@ -776,6 +781,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                 bin,
                 bin_start,
                 bin_end,
+                0,
                 vec![missing; (bin_end - bin_start) as usize],
             ));
         }
@@ -791,7 +797,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
         for bin in bin_start..bin_end {
             assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
         }
-        for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+        for (_bin, bin_start, bin_end, covered, data) in bin_data.iter_mut() {
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
@@ -801,6 +807,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                 // If NAN, then 0.0 + 1.0, else i + 1.0
                 *i = (*i).max(0.0) + 1.0;
             }
+            *covered += overlap_end - overlap_start;
         }
     }
     while let Some(front) = bin_data.pop_front() {
@@ -809,20 +816,23 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
         match summary {
             Summary::Min => {
                 v[bin] = front
-                    .3
+                    .4
                     .into_iter()
                     .reduce(|min, v| min.min(v))
                     .unwrap_or(missing);
             }
             Summary::Max => {
                 v[bin] = front
-                    .3
+                    .4
                     .into_iter()
                     .reduce(|max, v| max.max(v))
                     .unwrap_or(missing);
             }
             Summary::Mean => {
-                v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+                v[bin] = (front.3 != 0)
+                    .then(|| front.3 as f64)
+                    .map(|c| front.4.into_iter().sum::<f64>() / c)
+                    .unwrap_or(missing);
             }
         }
     }
@@ -841,7 +851,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
-    let mut bin_data: VecDeque<(usize, i32, i32, Vec<f64>)> = VecDeque::new();
+    let mut bin_data: VecDeque<(usize, i32, i32, i32, Vec<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -858,20 +868,23 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                 match summary {
                     Summary::Min => {
                         v[bin] = front
-                            .3
+                            .4
                             .into_iter()
                             .reduce(|min, v| min.min(v))
                             .unwrap_or(missing);
                     }
                     Summary::Max => {
                         v[bin] = front
-                            .3
+                            .4
                             .into_iter()
                             .reduce(|max, v| max.max(v))
                             .unwrap_or(missing);
                     }
                     Summary::Mean => {
-                        v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+                        v[bin] = (front.3 != 0)
+                            .then(|| front.3 as f64)
+                            .map(|c| front.4.into_iter().sum::<f64>() / c)
+                            .unwrap_or(missing);
                     }
                 }
             } else {
@@ -890,6 +903,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                 bin,
                 bin_start,
                 bin_end,
+                0,
                 vec![missing; (bin_end - bin_start) as usize],
             ));
         }
@@ -905,7 +919,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
         for bin in bin_start..bin_end {
             assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
         }
-        for (_bin, bin_start, bin_end, data) in bin_data.iter_mut() {
+        for (_bin, bin_start, bin_end, c, data) in bin_data.iter_mut() {
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
@@ -920,6 +934,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                     Summary::Max => *i = i.max(interval.summary.max_val),
                 }
             }
+            *c += overlap_end - overlap_start;
         }
     }
     while let Some(front) = bin_data.pop_front() {
@@ -928,20 +943,23 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
         match summary {
             Summary::Min => {
                 v[bin] = front
-                    .3
+                    .4
                     .into_iter()
                     .reduce(|min, v| min.min(v))
                     .unwrap_or(missing);
             }
             Summary::Max => {
                 v[bin] = front
-                    .3
+                    .4
                     .into_iter()
                     .reduce(|max, v| max.max(v))
                     .unwrap_or(missing);
             }
             Summary::Mean => {
-                v[bin] = front.3.into_iter().sum::<f64>() / bin_size;
+                v[bin] = (front.3 != 0)
+                    .then(|| front.3 as f64)
+                    .map(|c| front.4.into_iter().sum::<f64>() / c)
+                    .unwrap_or(missing);
             }
         }
     }
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 28d1715..07347dc 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -246,8 +246,8 @@ def test_values(bw, bb):
     assert len(bw.values("chr17", 100000, 110000, 10)) == 10
     assert len(bb.values("chr21", 10_148_000, 10_158_000, 10)) == 10
 
-    assert bw.values("chr17", 100000, 110000, 10)[0] == 0.37435242314338685
-    assert bb.values("chr21", 10_148_000, 10_158_000, 10)[0] == 0.175
+    assert bw.values("chr17", 100000, 110000, 10)[0] == 0.44886381671868925
+    assert bb.values("chr21", 10_148_000, 10_158_000, 10)[0] == 1.0
 
     assert bw.values("chr17", 100000, 110000, 10, "max")[0] == 1.1978399753570557
     assert bb.values("chr21", 10_148_000, 10_158_000, 10, "max")[0] == 1.0
@@ -257,11 +257,11 @@ def test_values(bw, bb):
 
     assert (
         bw.values("chr17", 100000, 110000, 10, "mean", exact=True)[0]
-        == 0.37885534041374924
+        == 0.4542629980980206
     )
     assert (
         bb.values("chr21", 10_148_000, 10_158_000, 10, "mean", exact=True)[0]
-        == 0.175
+        == 1.0
     )
 
     assert list(bw.values("chr17", 59890, 59900, 10, "mean", exact=True)) == [

From 7c2df7967a1113525685e2efd947fb261d14afb6 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Sun, 23 Jun 2024 17:11:55 -0400
Subject: [PATCH 25/30] bigBed autosql can be null, so return Option<String>

---
 bigtools/src/bbi/bigbedread.rs       |  7 +++++--
 bigtools/src/utils/cli/bigbedinfo.rs | 16 +++++++++++-----
 bigtools/tests/bigbedwrite.rs        |  2 +-
 pybigtools/src/lib.rs                |  6 +++---
 pybigtools/tests/test_api.py         |  6 +++---
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/bigtools/src/bbi/bigbedread.rs b/bigtools/src/bbi/bigbedread.rs
index bf8fca4..b26b24a 100644
--- a/bigtools/src/bbi/bigbedread.rs
+++ b/bigtools/src/bbi/bigbedread.rs
@@ -190,8 +190,11 @@ impl<R: BBIFileRead> BigBedRead<R> {
     }
 
     /// Reads the autosql from this bigBed
-    pub fn autosql(&mut self) -> Result<String, BBIReadError> {
+    pub fn autosql(&mut self) -> Result<Option<String>, BBIReadError> {
         let auto_sql_offset = self.info.header.auto_sql_offset;
+        if auto_sql_offset == 0 {
+            return Ok(None);
+        }
         let reader = self.reader().raw_reader();
         let mut reader = BufReader::new(reader);
         reader.seek(SeekFrom::Start(auto_sql_offset))?;
@@ -200,7 +203,7 @@ impl<R: BBIFileRead> BigBedRead<R> {
         buffer.pop();
         let autosql = String::from_utf8(buffer)
             .map_err(|_| BBIReadError::InvalidFile("Invalid autosql: not UTF-8".to_owned()))?;
-        Ok(autosql)
+        Ok(Some(autosql))
     }
 
     pub fn item_count(&mut self) -> Result<u64, BBIReadError> {
diff --git a/bigtools/src/utils/cli/bigbedinfo.rs b/bigtools/src/utils/cli/bigbedinfo.rs
index f43d018..46431a2 100644
--- a/bigtools/src/utils/cli/bigbedinfo.rs
+++ b/bigtools/src/utils/cli/bigbedinfo.rs
@@ -83,11 +83,17 @@ pub fn bigbedinfo(args: BigBedInfoArgs) -> Result<(), Box<dyn Error>> {
         }
         if args.autosql {
             let autosql = bigbed.autosql()?;
-            if autosql.len() == 0 {
-                println!("as:  n/a");
-            } else {
-                println!("as:");
-                print!("{}", autosql);
+            match autosql {
+                None => {
+                    println!("as:  n/a");
+                }
+                Some(autosql) if autosql.is_empty() => {
+                    println!("as:  n/a");
+                }
+                Some(autosql) => {
+                    println!("as:");
+                    print!("{}", autosql);
+                }
             }
         }
         let summary = bigbed.get_summary()?;
diff --git a/bigtools/tests/bigbedwrite.rs b/bigtools/tests/bigbedwrite.rs
index bc55c42..2ef1216 100644
--- a/bigtools/tests/bigbedwrite.rs
+++ b/bigtools/tests/bigbedwrite.rs
@@ -60,7 +60,7 @@ fn bigbedwrite_test() -> Result<(), Box<dyn Error>> {
     assert_eq!(chroms[0].length, 83257441);
 
     assert_eq!(
-        &bwread.autosql().unwrap(),
+        &bwread.autosql().unwrap().unwrap(),
         "table bed\n\"Browser Extensible Data\"\n(\n    string chrom;       \"Reference sequence chromosome or scaffold\"\n    uint   chromStart;  \"Start position in chromosome\"\n    uint   chromEnd;    \"End position in chromosome\"\n   string name;        \"Name of item.\"\n   uint score;          \"Score (0-1000)\"\n)",
     );
 
diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 4695de2..748b931 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -1141,10 +1141,10 @@ impl BBIRead {
             BBIReadRaw::BigWigFile(_) | BBIReadRaw::BigWigFileLike(_) => BEDGRAPH.to_string(),
             #[cfg(feature = "remote")]
             BBIReadRaw::BigWigRemote(_) => BEDGRAPH.to_string(),
-            BBIReadRaw::BigBedFile(b) => b.autosql().convert_err()?,
+            BBIReadRaw::BigBedFile(b) => b.autosql().convert_err()?.unwrap_or(String::new()),
             #[cfg(feature = "remote")]
-            BBIReadRaw::BigBedRemote(b) => b.autosql().convert_err()?,
-            BBIReadRaw::BigBedFileLike(b) => b.autosql().convert_err()?,
+            BBIReadRaw::BigBedRemote(b) => b.autosql().convert_err()?.unwrap_or(String::new()),
+            BBIReadRaw::BigBedFileLike(b) => b.autosql().convert_err()?.unwrap_or(String::new()),
         };
         let obj = if parse {
             let mut declarations = parse_autosql(&schema)
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 07347dc..b3ee065 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -123,10 +123,10 @@ def test_autosql(bw, bb):
     # Even bigwigs have sql (a sql representing bedGraph)
     assert "bedGraph" in bw.sql()
     # We can parse the sql
-    # assert bw.sql(True)['name'] == 'bedGraph'
+    assert bw.sql(True)['name'] == 'bedGraph'
 
-    # bb.sql()
-    # BBIReadError: The file was invalid: Invalid autosql: not UTF-8
+    # Unfortunately, this test bigBed doesn't actually have autosql
+    assert len(bb.sql()) == 0
 
 
 def test_records(bw, bb):

From c134044b99b8718f5437fbb53b57940d52bb6d8d Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Sun, 23 Jun 2024 17:25:41 -0400
Subject: [PATCH 26/30] Bump pybigtools dev minor version

---
 Cargo.lock            | 2 +-
 pybigtools/Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8b0acc2..75c86e3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -743,7 +743,7 @@ dependencies = [
 
 [[package]]
 name = "pybigtools"
-version = "0.1.5-dev"
+version = "0.2.0-dev"
 dependencies = [
  "bigtools",
  "futures",
diff --git a/pybigtools/Cargo.toml b/pybigtools/Cargo.toml
index 0806602..38164d1 100644
--- a/pybigtools/Cargo.toml
+++ b/pybigtools/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pybigtools"
-version = "0.1.5-dev"
+version = "0.2.0-dev"
 authors = ["Jack <jackh726@gmail.com>"]
 edition = "2021"
 

From 88e429e558f0b1e45d41d27b7239ad884b773370 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Mon, 24 Jun 2024 00:27:14 -0400
Subject: [PATCH 27/30] Track coverage by base

---
 pybigtools/src/lib.rs | 63 ++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 748b931..2a25ad8 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -729,7 +729,9 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
-    let mut bin_data: VecDeque<(usize, i32, i32, i32, Vec<f64>)> = VecDeque::new();
+    // (<bin>, <bin_start>, <bin_end>, <covered_bases>, <sum>)
+    // covered_bases = 0 if uncovered, 1 if covered
+    let mut bin_data: VecDeque<(usize, i32, i32, Vec<i32>, Vec<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -759,8 +761,11 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                             .unwrap_or(missing);
                     }
                     Summary::Mean => {
-                        v[bin] = (front.3 != 0)
-                            .then(|| front.3 as f64)
+                        v[bin] = front
+                            .3
+                            .iter()
+                            .any(|v| *v > 0)
+                            .then(|| front.3.into_iter().sum::<i32>() as f64)
                             .map(|c| front.4.into_iter().sum::<f64>() / c)
                             .unwrap_or(missing);
                     }
@@ -781,7 +786,7 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                 bin,
                 bin_start,
                 bin_end,
-                0,
+                vec![0; (bin_end - bin_start) as usize],
                 vec![missing; (bin_end - bin_start) as usize],
             ));
         }
@@ -801,13 +806,15 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
-            for i in &mut data
-                [((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)]
-            {
+            let range =
+                ((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize);
+            for i in &mut data[range.clone()] {
                 // If NAN, then 0.0 + 1.0, else i + 1.0
                 *i = (*i).max(0.0) + 1.0;
             }
-            *covered += overlap_end - overlap_start;
+            for i in &mut covered[range] {
+                *i = (*i).max(1);
+            }
         }
     }
     while let Some(front) = bin_data.pop_front() {
@@ -829,8 +836,11 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                     .unwrap_or(missing);
             }
             Summary::Mean => {
-                v[bin] = (front.3 != 0)
-                    .then(|| front.3 as f64)
+                v[bin] = front
+                    .3
+                    .iter()
+                    .any(|v| *v > 0)
+                    .then(|| front.3.into_iter().sum::<i32>() as f64)
                     .map(|c| front.4.into_iter().sum::<f64>() / c)
                     .unwrap_or(missing);
             }
@@ -851,7 +861,9 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
     assert_eq!(v.len(), bins);
     v.fill(missing);
 
-    let mut bin_data: VecDeque<(usize, i32, i32, i32, Vec<f64>)> = VecDeque::new();
+    // (<bin>, <bin_start>, <bin_end>, <covered_bases>, <sum>)
+    // covered_bases = 0 if uncovered, 1 if covered
+    let mut bin_data: VecDeque<(usize, i32, i32, Vec<i32>, Vec<f64>)> = VecDeque::new();
     let bin_size = (end - start) as f64 / bins as f64;
     for interval in iter {
         let interval = interval?;
@@ -881,8 +893,11 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                             .unwrap_or(missing);
                     }
                     Summary::Mean => {
-                        v[bin] = (front.3 != 0)
-                            .then(|| front.3 as f64)
+                        v[bin] = front
+                            .3
+                            .iter()
+                            .any(|v| *v > 0)
+                            .then(|| front.3.into_iter().sum::<i32>() as f64)
                             .map(|c| front.4.into_iter().sum::<f64>() / c)
                             .unwrap_or(missing);
                     }
@@ -903,7 +918,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                 bin,
                 bin_start,
                 bin_end,
-                0,
+                vec![0; (bin_end - bin_start) as usize],
                 vec![missing; (bin_end - bin_start) as usize],
             ));
         }
@@ -919,14 +934,14 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
         for bin in bin_start..bin_end {
             assert!(bin_data.iter().find(|b| b.0 == bin).is_some());
         }
-        for (_bin, bin_start, bin_end, c, data) in bin_data.iter_mut() {
+        for (_bin, bin_start, bin_end, covered, data) in bin_data.iter_mut() {
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
             let mean = interval.summary.sum / (interval.end - interval.start) as f64;
-            for i in &mut data
-                [((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize)]
-            {
+            let range =
+                ((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize);
+            for i in &mut data[range.clone()] {
                 *i = i.max(0.0);
                 match summary {
                     Summary::Mean => *i += mean,
@@ -934,7 +949,10 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                     Summary::Max => *i = i.max(interval.summary.max_val),
                 }
             }
-            *c += overlap_end - overlap_start;
+
+            for i in &mut covered[range] {
+                *i = (*i).max(1);
+            }
         }
     }
     while let Some(front) = bin_data.pop_front() {
@@ -956,8 +974,11 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                     .unwrap_or(missing);
             }
             Summary::Mean => {
-                v[bin] = (front.3 != 0)
-                    .then(|| front.3 as f64)
+                v[bin] = front
+                    .3
+                    .iter()
+                    .any(|v| *v > 0)
+                    .then(|| front.3.into_iter().sum::<i32>() as f64)
                     .map(|c| front.4.into_iter().sum::<f64>() / c)
                     .unwrap_or(missing);
             }

From f4cae68f35ecd2dd0d21cf7dda5faef86f9627b2 Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Wed, 26 Jun 2024 17:51:57 -0400
Subject: [PATCH 28/30] Don't use ceil for bin_start and bin_end

---
 pybigtools/src/lib.rs        | 16 ++++++++--------
 pybigtools/tests/test_api.py | 10 ++++++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 2a25ad8..4ee84f8 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -516,8 +516,8 @@ fn to_array_bins<I: Iterator<Item = Result<Value, _BBIReadError>>>(
             .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
             .unwrap_or(Some(bin_start))
         {
-            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
-            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
+            let bin_start = ((bin as f64) * bin_size) as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size) as i32;
 
             bin_data.push_back((bin, bin_start, bin_end, None));
         }
@@ -625,8 +625,8 @@ fn to_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
             .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
             .unwrap_or(Some(bin_start))
         {
-            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
-            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
+            let bin_start = ((bin as f64) * bin_size) as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size) as i32;
 
             bin_data.push_back((bin, bin_start, bin_end, None));
         }
@@ -779,8 +779,8 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
             .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
             .unwrap_or(Some(bin_start))
         {
-            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
-            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
+            let bin_start = ((bin as f64) * bin_size) as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size) as i32;
 
             bin_data.push_back((
                 bin,
@@ -911,8 +911,8 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
             .map(|b| ((b.0 + 1) < bin_end).then(|| b.0 + 1))
             .unwrap_or(Some(bin_start))
         {
-            let bin_start = ((bin as f64) * bin_size).ceil() as i32;
-            let bin_end = (((bin + 1) as f64) * bin_size).ceil() as i32;
+            let bin_start = ((bin as f64) * bin_size) as i32;
+            let bin_end = (((bin + 1) as f64) * bin_size) as i32;
 
             bin_data.push_back((
                 bin,
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index b3ee065..78e323b 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -323,3 +323,13 @@ def test_values(bw, bb):
     assert math.isnan(ret_arr[0])
     assert ret_arr[19] == 0.0
     assert np.array_equal(arr, ret_arr, equal_nan=True)
+
+    # Some differences in estimates between pybigtools to other libs
+    vals = bw.values("chr17", 85525, 85730, bins=2, exact=False)
+    assert list(vals) == [0.15392776003070907, 2.728891665264241]
+    vals = bw.values("chr17", 85525, 85730, bins=2, exact=True)
+    assert list(vals) == [0.06770934917680595, 2.4864424403431347]
+    vals = bw.values("chr17", 59900, 60105, bins=2, exact=False)
+    assert list(vals) == [0.5358060553962108, 0.5513471488813751]
+    vals = bw.values("chr17", 59900, 60105, bins=2, exact=True)
+    assert list(vals) == [0.5362001863472602, 0.5527710799959679]

From ad7c1b75156453244fb915deefdfc6cd5934d09e Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Thu, 27 Jun 2024 11:45:08 -0400
Subject: [PATCH 29/30] Before summing base values, max with 0 to ignore nan

---
 pybigtools/src/lib.rs        | 14 +++++++++-----
 pybigtools/tests/test_api.py |  8 ++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 4ee84f8..7659f38 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -766,7 +766,8 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                             .iter()
                             .any(|v| *v > 0)
                             .then(|| front.3.into_iter().sum::<i32>() as f64)
-                            .map(|c| front.4.into_iter().sum::<f64>() / c)
+                            // Map nan to 0 so the sum is non-nan
+                            .map(|c| front.4.into_iter().map(|c| c.max(0.0)).sum::<f64>() / c)
                             .unwrap_or(missing);
                     }
                 }
@@ -841,7 +842,8 @@ fn to_entry_array_bins<I: Iterator<Item = Result<BedEntry, _BBIReadError>>>(
                     .iter()
                     .any(|v| *v > 0)
                     .then(|| front.3.into_iter().sum::<i32>() as f64)
-                    .map(|c| front.4.into_iter().sum::<f64>() / c)
+                    // Map nan to 0 so the sum is non-nan
+                    .map(|c| front.4.into_iter().map(|c| c.max(0.0)).sum::<f64>() / c)
                     .unwrap_or(missing);
             }
         }
@@ -898,7 +900,8 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                             .iter()
                             .any(|v| *v > 0)
                             .then(|| front.3.into_iter().sum::<i32>() as f64)
-                            .map(|c| front.4.into_iter().sum::<f64>() / c)
+                            // Map nan to 0 so the sum is non-nan
+                            .map(|c| front.4.into_iter().map(|c| c.max(0.0)).sum::<f64>() / c)
                             .unwrap_or(missing);
                     }
                 }
@@ -938,7 +941,7 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
             let overlap_start = (*bin_start).max(interval_start);
             let overlap_end = (*bin_end).min(interval_end);
 
-            let mean = interval.summary.sum / (interval.end - interval.start) as f64;
+            let mean = interval.summary.sum / (interval.summary.bases_covered) as f64;
             let range =
                 ((overlap_start - *bin_start) as usize)..((overlap_end - *bin_start) as usize);
             for i in &mut data[range.clone()] {
@@ -979,7 +982,8 @@ fn to_entry_array_zoom<I: Iterator<Item = Result<ZoomRecord, _BBIReadError>>>(
                     .iter()
                     .any(|v| *v > 0)
                     .then(|| front.3.into_iter().sum::<i32>() as f64)
-                    .map(|c| front.4.into_iter().sum::<f64>() / c)
+                    // Map nan to 0 so the sum is non-nan
+                    .map(|c| front.4.into_iter().map(|c| c.max(0.0)).sum::<f64>() / c)
                     .unwrap_or(missing);
             }
         }
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 78e323b..5a67ba1 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -325,6 +325,9 @@ def test_values(bw, bb):
     assert np.array_equal(arr, ret_arr, equal_nan=True)
 
     # Some differences in estimates between pybigtools to other libs
+    # Namely, bigtools calculates estimates by taking the
+    # sum of nanmeans over covered bases (summary.sum/summary.covered_bases) and dividing by covered bases (overlap between zooom and bin)
+    # So, including these as cases where the calculated value is different
     vals = bw.values("chr17", 85525, 85730, bins=2, exact=False)
     assert list(vals) == [0.15392776003070907, 2.728891665264241]
     vals = bw.values("chr17", 85525, 85730, bins=2, exact=True)
@@ -333,3 +336,8 @@ def test_values(bw, bb):
     assert list(vals) == [0.5358060553962108, 0.5513471488813751]
     vals = bw.values("chr17", 59900, 60105, bins=2, exact=True)
     assert list(vals) == [0.5362001863472602, 0.5527710799959679]
+
+    vals = bb.values("chr21", 14_760_000, 14_800_000, bins=1, exact=False)
+    assert list(vals) == [1.2572170068028603]
+    vals = bb.values("chr21", 14_760_000, 14_800_000, bins=1, exact=True)
+    assert list(vals) == [1.3408662900188324]

From fc83f8203162bb55056a0f37854e98666a5a9a3f Mon Sep 17 00:00:00 2001
From: Jack Huey <31162821+jackh726@users.noreply.github.com>
Date: Thu, 27 Jun 2024 11:57:13 -0400
Subject: [PATCH 30/30] __exit__ should not return Self (it's truthy and
 prevents exceptions from being raised)

---
 pybigtools/src/lib.rs        | 3 +--
 pybigtools/tests/test_api.py | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/pybigtools/src/lib.rs b/pybigtools/src/lib.rs
index 7659f38..b965531 100644
--- a/pybigtools/src/lib.rs
+++ b/pybigtools/src/lib.rs
@@ -1682,9 +1682,8 @@ impl BBIRead {
         _exc_type: PyObject,
         _exc_value: PyObject,
         _exc_traceback: PyObject,
-    ) -> Py<Self> {
+    ) {
         slf.borrow_mut(py).bbi = BBIReadRaw::Closed;
-        slf
     }
 
     fn __traverse__(&self, visit: PyVisit<'_>) -> Result<(), PyTraverseError> {
diff --git a/pybigtools/tests/test_api.py b/pybigtools/tests/test_api.py
index 5a67ba1..3ba26b6 100644
--- a/pybigtools/tests/test_api.py
+++ b/pybigtools/tests/test_api.py
@@ -60,6 +60,12 @@ def test_open_filelike():
         with pybigtools.open(f, "r") as b:
             assert b.chroms() == {'chr21': 48_129_895}
 
+def test_contextmanager_exception():
+    class RaisesException(Exception):
+        pass
+    with pytest.raises(RaisesException):
+        with pybigtools.open(REPO_ROOT / "bigtools/resources/test/valid.bigWig", "r"):
+            raise RaisesException()
 
 @pytest.fixture
 def bw():