Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use newtypes for the arena types #175

Merged
merged 6 commits into from
May 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions flatgfa-py/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use flatgfa::flatgfa::{FlatGFA, HeapStore};
use flatgfa::flatgfa::{FlatGFA, HeapGFAStore};
use flatgfa::pool::Id;
use pyo3::prelude::*;
use pyo3::types::PyBytes;

Expand All @@ -16,7 +17,7 @@ fn load(filename: &str) -> PyFlatGFA {
}

enum InternalStore {
Heap(Box<HeapStore>),
Heap(Box<HeapGFAStore>),
File(memmap::Mmap),
}

Expand Down Expand Up @@ -117,15 +118,15 @@ impl PySegment {
/// so it is slow to use for large sequences.
fn sequence<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> {
let view = self.gfa.view();
let seg = view.segs[self.id as usize];
let seg = &view.segs[Id::from(self.id)];
let seq = view.get_seq(&seg);
PyBytes::new_bound(py, seq)
}

#[getter]
fn name(&self) -> usize {
let view = self.gfa.view();
let seg = view.segs[self.id as usize];
let seg = view.segs[Id::from(self.id)];
seg.name
}

Expand Down
42 changes: 25 additions & 17 deletions flatgfa/src/cmds.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::flatgfa::{self, Handle, Segment};
use crate::pool::{self, Id, Pool};
use crate::pool::{self, Id, Store};
use argh::FromArgs;
use bstr::BStr;
use std::collections::{HashMap, HashSet};
Expand Down Expand Up @@ -29,7 +29,7 @@ pub fn toc(gfa: &flatgfa::FlatGFA) {
pub struct Paths {}

pub fn paths(gfa: &flatgfa::FlatGFA) {
for path in gfa.paths.iter() {
for path in gfa.paths.all().iter() {
println!("{}", gfa.get_path_name(path));
}
}
Expand Down Expand Up @@ -61,7 +61,7 @@ pub fn stats(gfa: &flatgfa::FlatGFA, args: Stats) {
} else if args.self_loops {
let mut counts: HashMap<Id<Segment>, usize> = HashMap::new();
let mut total: usize = 0;
for link in gfa.links.iter() {
for link in gfa.links.all().iter() {
if link.from.segment() == link.to.segment() {
let count = counts.entry(link.from.segment()).or_insert(0);
*count += 1;
Expand Down Expand Up @@ -96,7 +96,7 @@ pub fn position(gfa: &flatgfa::FlatGFA, args: Position) -> Result<(), &'static s
};

let path_id = gfa.find_path(path_name.into()).ok_or("path not found")?;
let path = gfa.paths.get_id(path_id);
let path = &gfa.paths[path_id];
assert_eq!(
orientation,
flatgfa::Orientation::Forward,
Expand All @@ -106,7 +106,7 @@ pub fn position(gfa: &flatgfa::FlatGFA, args: Position) -> Result<(), &'static s
// Traverse the path until we reach the position.
let mut cur_pos = 0;
let mut found = None;
for step in gfa.get_steps(path) {
for step in &gfa.steps[path.steps] {
let seg = gfa.get_handle_seg(*step);
let end_pos = cur_pos + seg.len();
if offset < end_pos {
Expand Down Expand Up @@ -149,7 +149,10 @@ pub struct Extract {
link_distance: usize,
}

pub fn extract(gfa: &flatgfa::FlatGFA, args: Extract) -> Result<flatgfa::HeapStore, &'static str> {
pub fn extract(
gfa: &flatgfa::FlatGFA,
args: Extract,
) -> Result<flatgfa::HeapGFAStore, &'static str> {
let origin_seg = gfa.find_seg(args.seg_name).ok_or("segment not found")?;

let mut subgraph = SubgraphBuilder::new(gfa);
Expand All @@ -160,7 +163,7 @@ pub fn extract(gfa: &flatgfa::FlatGFA, args: Extract) -> Result<flatgfa::HeapSto
/// A helper to construct a new graph that includes part of an old graph.
struct SubgraphBuilder<'a> {
old: &'a flatgfa::FlatGFA<'a>,
store: flatgfa::HeapStore,
store: flatgfa::HeapGFAStore,
seg_map: HashMap<Id<Segment>, Id<Segment>>,
}

Expand All @@ -173,14 +176,14 @@ impl<'a> SubgraphBuilder<'a> {
fn new(old: &'a flatgfa::FlatGFA) -> Self {
Self {
old,
store: flatgfa::HeapStore::default(),
store: flatgfa::HeapGFAStore::default(),
seg_map: HashMap::new(),
}
}

/// Add a segment from the source graph to this subgraph.
fn include_seg(&mut self, seg_id: Id<Segment>) {
let seg = self.old.segs.get_id(seg_id);
let seg = &self.old.segs[seg_id];
let new_seg_id = self.store.add_seg(
seg.name,
self.old.get_seq(seg),
Expand Down Expand Up @@ -211,7 +214,7 @@ impl<'a> SubgraphBuilder<'a> {
let mut cur_subpath_start: Option<SubpathStart> = None;
let mut path_pos = 0;

for step in self.old.get_steps(path) {
for step in &self.old.steps[path.steps] {
let in_neighb = self.seg_map.contains_key(&step.segment());

if let (Some(start), false) = (&cur_subpath_start, in_neighb) {
Expand Down Expand Up @@ -261,7 +264,7 @@ impl<'a> SubgraphBuilder<'a> {

// Find the set of all segments that are 1 link away.
assert_eq!(dist, 1, "only `-c 1` is implemented so far");
for link in self.old.links.iter() {
for link in self.old.links.all().iter() {
if let Some(other_seg) = link.incident_seg(origin) {
if !self.seg_map.contains_key(&other_seg) {
self.include_seg(other_seg);
Expand All @@ -270,14 +273,14 @@ impl<'a> SubgraphBuilder<'a> {
}

// Include all links within the subgraph.
for link in self.old.links.iter() {
for link in self.old.links.all().iter() {
if self.contains(link.from.segment()) && self.contains(link.to.segment()) {
self.include_link(link);
}
}

// Find subpaths within the subgraph.
for path in self.old.paths.iter() {
for path in self.old.paths.all().iter() {
self.find_subpaths(path);
}
}
Expand All @@ -296,9 +299,9 @@ pub fn depth(gfa: &flatgfa::FlatGFA) {
let mut uniq_paths = Vec::<HashSet<&BStr>>::new();
uniq_paths.resize(gfa.segs.len(), HashSet::new());
// do not assume that each handle in `gfa.steps()` is unique
for path in gfa.paths {
for path in gfa.paths.all() {
let path_name = gfa.get_path_name(path);
for step in gfa.get_steps(path) {
for step in &gfa.steps[path.steps] {
let seg_id = step.segment().index();
// Increment depths
depths[seg_id] = depths[seg_id] + 1;
Expand All @@ -308,8 +311,13 @@ pub fn depth(gfa: &flatgfa::FlatGFA) {
}
// print out depth and depth.uniq
println!("#node.id\tdepth\tdepth.uniq");
for (id, seg) in gfa.segs.iter().enumerate() {
for (id, seg) in gfa.segs.items() {
let name: u32 = seg.name as u32;
println!("{}\t{}\t{}", name, depths[id], uniq_paths[id].len());
println!(
"{}\t{}\t{}",
name,
depths[id.index()],
uniq_paths[id.index()].len()
);
}
}
134 changes: 67 additions & 67 deletions flatgfa/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::flatgfa;
use crate::pool::Span;
use crate::pool::{FixedStore, Pool, Span, Store};
use memmap::{Mmap, MmapMut};
use std::mem::{size_of, size_of_val};
use tinyvec::SliceVec;
Expand Down Expand Up @@ -37,17 +37,17 @@ struct Size {
}

impl Size {
fn of_slice<T>(slice: &[T]) -> Self {
fn of_pool<T>(pool: Pool<T>) -> Self {
Size {
len: slice.len(),
capacity: slice.len(),
len: pool.len(),
capacity: pool.len(),
}
}

fn of_slice_vec<T>(slice_vec: &SliceVec<'_, T>) -> Self {
fn of_store<T: Clone>(store: &FixedStore<'_, T>) -> Self {
Size {
len: slice_vec.len(),
capacity: slice_vec.capacity(),
len: store.len(),
capacity: store.capacity(),
}
}

Expand Down Expand Up @@ -81,34 +81,34 @@ impl Toc {
fn full(gfa: &flatgfa::FlatGFA) -> Self {
Self {
magic: MAGIC_NUMBER,
header: Size::of_slice(gfa.header),
segs: Size::of_slice(gfa.segs),
paths: Size::of_slice(gfa.paths),
links: Size::of_slice(gfa.links),
steps: Size::of_slice(gfa.steps),
seq_data: Size::of_slice(gfa.seq_data),
overlaps: Size::of_slice(gfa.overlaps),
alignment: Size::of_slice(gfa.alignment),
name_data: Size::of_slice(gfa.name_data),
optional_data: Size::of_slice(gfa.optional_data),
line_order: Size::of_slice(gfa.line_order),
header: Size::of_pool(gfa.header),
segs: Size::of_pool(gfa.segs),
paths: Size::of_pool(gfa.paths),
links: Size::of_pool(gfa.links),
steps: Size::of_pool(gfa.steps),
seq_data: Size::of_pool(gfa.seq_data),
overlaps: Size::of_pool(gfa.overlaps),
alignment: Size::of_pool(gfa.alignment),
name_data: Size::of_pool(gfa.name_data),
optional_data: Size::of_pool(gfa.optional_data),
line_order: Size::of_pool(gfa.line_order),
}
}

pub fn for_slice_store(store: &flatgfa::SliceStore) -> Self {
pub fn for_fixed_store(store: &flatgfa::FixedGFAStore) -> Self {
Self {
magic: MAGIC_NUMBER,
header: Size::of_slice_vec(&store.header),
segs: Size::of_slice_vec(&store.segs),
paths: Size::of_slice_vec(&store.paths),
links: Size::of_slice_vec(&store.links),
steps: Size::of_slice_vec(&store.steps),
seq_data: Size::of_slice_vec(&store.seq_data),
overlaps: Size::of_slice_vec(&store.overlaps),
alignment: Size::of_slice_vec(&store.alignment),
name_data: Size::of_slice_vec(&store.name_data),
optional_data: Size::of_slice_vec(&store.optional_data),
line_order: Size::of_slice_vec(&store.line_order),
header: Size::of_store(&store.header),
segs: Size::of_store(&store.segs),
paths: Size::of_store(&store.paths),
links: Size::of_store(&store.links),
steps: Size::of_store(&store.steps),
seq_data: Size::of_store(&store.seq_data),
overlaps: Size::of_store(&store.overlaps),
alignment: Size::of_store(&store.alignment),
name_data: Size::of_store(&store.name_data),
optional_data: Size::of_store(&store.optional_data),
line_order: Size::of_store(&store.line_order),
}
}

Expand Down Expand Up @@ -199,17 +199,17 @@ pub fn view(data: &[u8]) -> flatgfa::FlatGFA {
let (line_order, _) = slice_prefix(rest, toc.line_order);

flatgfa::FlatGFA {
header,
segs,
paths,
links,
steps,
seq_data,
overlaps,
alignment,
name_data,
optional_data,
line_order,
header: header.into(),
segs: segs.into(),
paths: paths.into(),
links: links.into(),
steps: steps.into(),
seq_data: seq_data.into(),
overlaps: overlaps.into(),
alignment: alignment.into(),
name_data: name_data.into(),
optional_data: optional_data.into(),
line_order: line_order.into(),
}
}

Expand All @@ -224,7 +224,7 @@ fn slice_vec_prefix<T: FromBytes + AsBytes>(
}

/// Get a FlatGFA `SliceStore` from the suffix of a file just following the table of contents.
fn slice_store<'a>(data: &'a mut [u8], toc: &Toc) -> flatgfa::SliceStore<'a> {
fn slice_store<'a>(data: &'a mut [u8], toc: &Toc) -> flatgfa::FixedGFAStore<'a> {
let (header, rest) = slice_vec_prefix(data, toc.header);
let (segs, rest) = slice_vec_prefix(rest, toc.segs);
let (paths, rest) = slice_vec_prefix(rest, toc.paths);
Expand All @@ -237,29 +237,29 @@ fn slice_store<'a>(data: &'a mut [u8], toc: &Toc) -> flatgfa::SliceStore<'a> {
let (optional_data, rest) = slice_vec_prefix(rest, toc.optional_data);
let (line_order, _) = slice_vec_prefix(rest, toc.line_order);

flatgfa::SliceStore {
header,
segs,
paths,
links,
steps,
seq_data,
overlaps,
alignment,
name_data,
optional_data,
line_order,
flatgfa::FixedGFAStore {
header: header.into(),
segs: segs.into(),
paths: paths.into(),
links: links.into(),
steps: steps.into(),
seq_data: seq_data.into(),
overlaps: overlaps.into(),
alignment: alignment.into(),
name_data: name_data.into(),
optional_data: optional_data.into(),
line_order: line_order.into(),
}
}

/// Get a mutable FlatGFA `SliceStore` backed by a byte buffer.
pub fn view_store(data: &mut [u8]) -> flatgfa::SliceStore {
pub fn view_store(data: &mut [u8]) -> flatgfa::FixedGFAStore {
let (toc, rest) = read_toc_mut(data);
slice_store(rest, toc)
}

/// Initialize a buffer with an empty FlatGFA store.
pub fn init(data: &mut [u8], toc: Toc) -> (&mut Toc, flatgfa::SliceStore) {
pub fn init(data: &mut [u8], toc: Toc) -> (&mut Toc, flatgfa::FixedGFAStore) {
// Write the table of contents.
assert!(data.len() == toc.size());
toc.write_to_prefix(data).unwrap();
Expand Down Expand Up @@ -291,17 +291,17 @@ pub fn dump(gfa: &flatgfa::FlatGFA, buf: &mut [u8]) {
let rest = write_bump(buf, &toc).unwrap();

// All the slices.
let rest = write_bytes(rest, gfa.header).unwrap();
let rest = write_bump(rest, gfa.segs).unwrap();
let rest = write_bump(rest, gfa.paths).unwrap();
let rest = write_bump(rest, gfa.links).unwrap();
let rest = write_bump(rest, gfa.steps).unwrap();
let rest = write_bytes(rest, gfa.seq_data).unwrap();
let rest = write_bump(rest, gfa.overlaps).unwrap();
let rest = write_bump(rest, gfa.alignment).unwrap();
let rest = write_bytes(rest, gfa.name_data).unwrap();
let rest = write_bytes(rest, gfa.optional_data).unwrap();
write_bytes(rest, gfa.line_order).unwrap();
let rest = write_bytes(rest, gfa.header.all()).unwrap();
let rest = write_bump(rest, gfa.segs.all()).unwrap();
let rest = write_bump(rest, gfa.paths.all()).unwrap();
let rest = write_bump(rest, gfa.links.all()).unwrap();
let rest = write_bump(rest, gfa.steps.all()).unwrap();
let rest = write_bytes(rest, gfa.seq_data.all()).unwrap();
let rest = write_bump(rest, gfa.overlaps.all()).unwrap();
let rest = write_bump(rest, gfa.alignment.all()).unwrap();
let rest = write_bytes(rest, gfa.name_data.all()).unwrap();
let rest = write_bytes(rest, gfa.optional_data.all()).unwrap();
write_bytes(rest, gfa.line_order.all()).unwrap();
}

/// Get the total size in bytes of a FlatGFA structure. This should result in a big
Expand Down
Loading
Loading