Skip to content

Commit

Permalink
Use index_list crate instead of copied indexlist code
Browse files Browse the repository at this point in the history
  • Loading branch information
jackh726 committed Aug 5, 2024
1 parent c7d6320 commit eb9e1e8
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 2,019 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion bigtools/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ ufmt = { version = "0.2", features = ["std"], optional = true }
bytes = { version = "1.4.0", optional = true }
tokio = { version = "1.34.0", features = ["rt", "rt-multi-thread"] }
smallvec = { version = "1.11.2", features = ["write"] }
index_list = { version = "0.2.13", optional = true }

[dev-dependencies]
rand = "0.8"
Expand Down Expand Up @@ -83,4 +84,4 @@ default = ["remote", "read", "write", "cli"]
remote = ["attohttpc", "tempfile"]
cli = ["anyhow", "clap", "ryu", "ufmt", "read", "write"]
read = ["bytes", "itertools"]
write = ["crossbeam-channel", "tempfile", "futures", "serde", "itertools", "bincode"]
write = ["crossbeam-channel", "tempfile", "futures", "serde", "itertools", "bincode", "index_list"]
54 changes: 30 additions & 24 deletions bigtools/src/bbi/bigbedwrite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ use byteorder::{NativeEndian, WriteBytesExt};
use tokio::runtime::{Handle, Runtime};

use crate::bbiwrite::process_internal::BBIDataProcessorCreate;
use crate::utils::indexlist::IndexList;
use crate::utils::tell::Tell;
use crate::{
write_info, BBIDataProcessor, BBIDataProcessoredData, BBIDataProcessoringInputSectionChannel,
BBIDataSource, InternalProcessData, InternalTempZoomInfo, NoZoomsInternalProcessData,
NoZoomsInternalProcessedData, ProcessDataError, ZoomsInternalProcessData,
ZoomsInternalProcessedData,
};
use index_list::IndexList;

use crate::bbi::{BedEntry, Summary, Value, ZoomRecord, BIGBED_MAGIC};
use crate::bbiwrite::{
Expand Down Expand Up @@ -294,15 +294,15 @@ async fn process_val(
// If any overlaps exists, it must be starting at the current start (else it would have to be after the current entry)
// If the overlap starts before, the entry wasn't correctly cut last iteration
debug_assert!(overlap
.head()
.get_first()
.map(|f| f.start == item_start)
.unwrap_or(true));

// For each item in `overlap` that overlaps the current
// item, add `1` to the value.
let mut index = overlap.head_index();
while let Some(i) = index {
match overlap.get_mut(i) {
let mut index = overlap.first_index();
while index.is_some() {
match overlap.get_mut(index) {
None => break,
Some(o) => {
o.value += 1.0;
Expand All @@ -311,7 +311,7 @@ async fn process_val(
let end = o.end;
o.end = item_end;
overlap.insert_after(
i,
index,
Value {
start: item_end,
end,
Expand All @@ -320,15 +320,18 @@ async fn process_val(
);
break;
}
index = overlap.next_index(i);
index = overlap.next_index(index);
}
}
}

debug_assert!(overlap.tail().map(|o| o.end >= item_start).unwrap_or(true));
debug_assert!(overlap
.get_last()
.map(|o| o.end >= item_start)
.unwrap_or(true));

if overlap.tail().map(|o| o.end).unwrap_or(item_start) == item_start {
overlap.push_back(Value {
if overlap.get_last().map(|o| o.end).unwrap_or(item_start) == item_start {
overlap.insert_last(Value {
start: item_start,
end: item_end,
value: 1.0,
Expand All @@ -338,18 +341,18 @@ async fn process_val(
let next_start = next_start_opt.unwrap_or(u32::max_value());

while overlap
.head()
.get_first()
.map(|f| f.start < next_start)
.unwrap_or(false)
{
let mut removed = overlap.pop_front().unwrap();
let mut removed = overlap.remove_first().unwrap();
let (len, val) = if removed.end <= next_start {
(removed.end - removed.start, f64::from(removed.value))
} else {
let len = next_start - removed.start;
let val = f64::from(removed.value);
removed.start = next_start;
overlap.push_front(removed);
overlap.insert_first(removed);
(len, val)
};

Expand Down Expand Up @@ -411,9 +414,9 @@ async fn process_val_zoom(

// For each item in `overlap` that overlaps the current
// item, add `1` to the value.
let mut index = overlap.head_index();
while let Some(i) = index {
match overlap.get_mut(i) {
let mut index = overlap.first_index();
while index.is_some() {
match overlap.get_mut(index) {
None => break,
Some(o) => {
o.value += 1.0;
Expand All @@ -422,7 +425,7 @@ async fn process_val_zoom(
let end = o.end;
o.end = item_end;
overlap.insert_after(
i,
index,
Value {
start: item_end,
end,
Expand All @@ -431,15 +434,18 @@ async fn process_val_zoom(
);
break;
}
index = overlap.next_index(i);
index = overlap.next_index(index);
}
}
}

debug_assert!(overlap.tail().map(|o| o.end >= item_start).unwrap_or(true));
debug_assert!(overlap
.get_last()
.map(|o| o.end >= item_start)
.unwrap_or(true));

if overlap.tail().map(|o| o.end).unwrap_or(item_start) == item_start {
overlap.push_back(Value {
if overlap.get_last().map(|o| o.end).unwrap_or(item_start) == item_start {
overlap.insert_last(Value {
start: item_start,
end: item_end,
value: 1.0,
Expand All @@ -449,18 +455,18 @@ async fn process_val_zoom(
let next_start = next_val.map(|v| v.start).unwrap_or(u32::max_value());

while overlap
.head()
.get_first()
.map(|f| f.start < next_start)
.unwrap_or(false)
{
let mut removed = overlap.pop_front().unwrap();
let mut removed = overlap.remove_first().unwrap();
let val = f64::from(removed.value);
let (removed_start, removed_end) = if removed.end <= next_start {
(removed.start, removed.end)
} else {
let start = removed.start;
removed.start = next_start;
overlap.push_front(removed);
overlap.insert_first(removed);
(start, next_start)
};

Expand Down
32 changes: 19 additions & 13 deletions bigtools/src/bed/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Utilities for indexing a bed file that is start-sorted (chromosomes may be out o
use std::fs::File;
use std::io::{self, BufRead, BufReader, Seek, SeekFrom};

use crate::utils::indexlist::{Index, IndexList};
use index_list::{IndexList, ListIndex};

use crate::utils::tell::Tell;

/// Returns a Vec of offsets into a bed file, and the chromosome starting at each offset.
Expand Down Expand Up @@ -49,24 +50,26 @@ pub fn index_chroms(file: File) -> io::Result<Option<Vec<(u64, String)>>> {
}

let chrom = parse_line(&line)?.unwrap();
let first = chroms.push_front((0, chrom));
let first = chroms.insert_first((0, chrom));
let file_size = file.seek(SeekFrom::End(0))?;

fn do_index(
file_size: u64,
file: &mut BufReader<File>,
chroms: &mut IndexList<(u64, String)>,
line: &mut String,
prev: Index<(u64, String)>,
next: Option<Index<(u64, String)>>,
prev: ListIndex,
next: Option<ListIndex>,
limit: usize,
) -> Result<(), io::Error> {
if limit == 0 {
panic!("Recursive depth limit reached");
}

let next_tell = next.map(|next| chroms[next].0).unwrap_or(file_size);
let mid = (next_tell + chroms[prev].0) / 2;
let next_tell = next
.map(|next| chroms.get(next).unwrap().0)
.unwrap_or(file_size);
let mid = (next_tell + chroms.get(prev).unwrap().0) / 2;
file.seek(SeekFrom::Start(mid))?;
file.read_line(line)?;
line.clear();
Expand Down Expand Up @@ -97,11 +100,14 @@ pub fn index_chroms(file: File) -> io::Result<Option<Vec<(u64, String)>>> {
// to continue to index between the previous and current as well as
// between the current and next.

let curr = chroms.insert_after(prev, (tell, chrom)).unwrap();
let curr = chroms.insert_after(prev, (tell, chrom));

let left = chroms[curr].1 != chroms[prev].1 && tell < next_tell;
let left = chroms.get(curr).unwrap().1 != chroms.get(prev).unwrap().1 && tell < next_tell;
let right = next
.map(|next| chroms[curr].1 != chroms[next].1 && tell < chroms[next].0)
.map(|next| {
chroms.get(curr).unwrap().1 != chroms.get(next).unwrap().1
&& tell < chroms.get(next).unwrap().0
})
.unwrap_or(true);

if left {
Expand All @@ -112,16 +118,16 @@ pub fn index_chroms(file: File) -> io::Result<Option<Vec<(u64, String)>>> {
do_index(file_size, file, chroms, line, curr, next, limit - 1)?;
}

if chroms[curr].1 != chroms[prev].1 && tell == next_tell {
file.seek(SeekFrom::Start(chroms[prev].0))?;
if chroms.get(curr).unwrap().1 != chroms.get(prev).unwrap().1 && tell == next_tell {
file.seek(SeekFrom::Start(chroms.get(prev).unwrap().0))?;
line.clear();
file.read_line(line)?;
line.clear();
let tell = file.tell()?;
file.read_line(line)?;
let chrom = parse_line(&*line)?
.expect("Bad logic. Must at least find last entry for chromosome.");
chroms.insert_after(prev, (tell, chrom)).unwrap();
chroms.insert_after(prev, (tell, chrom));
}
Ok(())
}
Expand All @@ -136,7 +142,7 @@ pub fn index_chroms(file: File) -> io::Result<Option<Vec<(u64, String)>>> {
100,
)?;

let mut chroms: Vec<_> = chroms.into_iter().collect();
let mut chroms: Vec<_> = chroms.drain_iter().collect();
chroms.dedup_by_key(|index| index.1.clone());
let mut deduped_chroms = chroms.clone();
deduped_chroms.sort();
Expand Down
Loading

0 comments on commit eb9e1e8

Please sign in to comment.