Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: move out bed #304

Merged
merged 1 commit into from
Nov 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ members = [
"exon-cli",
"exon-examples",
"exon/exon-bam",
"exon/exon-bed",
"exon/exon-common",
"exon/exon-core",
"exon/exon-exome",
Expand Down
4 changes: 4 additions & 0 deletions docs/release-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ flowchart TD
exon-sam --> exon-bam
exon-sam --> exon-core
exon-bam --> exon-core
exon-bed --> exon-core
exon-common --> exon-core
exon-fasta --> exon-core
exon-fastq --> exon-core
exon-gff --> exon-core
exon-gtf --> exon-core
exon-io --> exon-core
exon-core --> exon-exome
exon-test --dev-dep--> exon-fasta
Expand Down Expand Up @@ -43,10 +45,12 @@ Then publish the crates:
```console
# Crates that do not depend on other crates
cargo publish --manifest-path exon/exon-bam/Cargo.toml
cargo publish --manifest-path exon/exon-bed/Cargo.toml
cargo publish --manifest-path exon/exon-common/Cargo.toml
cargo publish --manifest-path exon/exon-fasta/Cargo.toml
cargo publish --manifest-path exon/exon-fastq/Cargo.toml
cargo publish --manifest-path exon/exon-gff/Cargo.toml
cargo publish --manifest-path exon/exon-gtf/Cargo.toml
cargo publish --manifest-path exon/exon-io/Cargo.toml

# Crates that depend on other crates
Expand Down
20 changes: 20 additions & 0 deletions exon/exon-bed/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[package]
description = "Subcrate of the `exon` crate for working with BED files."
edition.workspace = true
homepage.workspace = true
license.workspace = true
name = "exon-bed"
readme.workspace = true
repository.workspace = true
version.workspace = true

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = {workspace = true}
exon-common = {path = "../exon-common", version = "0.3.9"}
futures = {workspace = true}
noodles = {version = "0.59", features = ["bed"]}
object_store = {workspace = true}
tokio = {workspace = true}
tokio-util = {workspace = true}
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
use std::sync::Arc;

use arrow::{
array::{ArrayBuilder, ArrayRef, GenericStringBuilder, Int64Builder},
array::{ArrayRef, GenericStringBuilder, Int64Builder},
datatypes::{DataType, Field, Schema},
};
use exon_common::TableSchema;

use super::bed_record_builder::BEDRecord;

pub(crate) struct BEDSchemaBuilder {
pub struct BEDSchemaBuilder {
file_fields: Vec<Field>,
partition_fields: Vec<Field>,
}
Expand Down Expand Up @@ -86,6 +86,8 @@ pub struct BEDArrayBuilder {
block_counts: Int64Builder,
block_sizes: GenericStringBuilder<i32>,
block_starts: GenericStringBuilder<i32>,

rows: usize,
}

impl BEDArrayBuilder {
Expand All @@ -103,11 +105,16 @@ impl BEDArrayBuilder {
block_counts: Int64Builder::new(),
block_sizes: GenericStringBuilder::<i32>::new(),
block_starts: GenericStringBuilder::<i32>::new(),
rows: 0,
}
}

pub fn len(&self) -> usize {
self.reference_sequence_names.len()
self.rows
}

pub fn is_empty(&self) -> bool {
self.rows == 0
}

pub fn append(&mut self, record: BEDRecord) -> std::io::Result<()> {
Expand All @@ -133,6 +140,8 @@ impl BEDArrayBuilder {
self.block_sizes.append_option(record.block_sizes);
self.block_starts.append_option(record.block_starts);

self.rows += 1;

Ok(())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ where
}
}

if array_builder.len() == 0 {
if array_builder.is_empty() {
return Ok(None);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
use std::sync::Arc;

use arrow::datatypes::SchemaRef;
use exon_common::DEFAULT_BATCH_SIZE;
use object_store::ObjectStore;

use crate::datasources::DEFAULT_BATCH_SIZE;

/// Configuration for a BED datasource.
#[derive(Debug)]
pub struct BEDConfig {
Expand Down
23 changes: 23 additions & 0 deletions exon/exon-bed/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright 2023 WHERE TRUE Technologies.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod array_builder;
mod batch_reader;
mod bed_record_builder;
mod config;

pub use array_builder::BEDArrayBuilder;
pub use array_builder::BEDSchemaBuilder;
pub use batch_reader::BatchReader;
pub use config::BEDConfig;
1 change: 1 addition & 0 deletions exon/exon-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ byteorder = {version = "1.5.0", optional = true}
bytes = "1.5.0"
datafusion = {workspace = true}
exon-bam = {path = "../exon-bam", version = "0.3.9"}
exon-bed = {path = "../exon-bed", version = "0.3.9"}
exon-common = {path = "../exon-common", version = "0.3.9"}
exon-fasta = {path = "../exon-fasta", version = "0.3.9"}
exon-fastq = {path = "../exon-fastq", version = "0.3.9"}
Expand Down
3 changes: 1 addition & 2 deletions exon/exon-core/src/datasources/bed/file_opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@ use datafusion::{
},
error::DataFusionError,
};
use exon_bed::{BEDConfig, BatchReader};
use futures::{StreamExt, TryStreamExt};
use tokio_util::io::StreamReader;

use super::{batch_reader::BatchReader, config::BEDConfig};

/// Implements a datafusion `FileOpener` for BED files.
pub struct BEDOpener {
/// The configuration for the file scan.
Expand Down
5 changes: 0 additions & 5 deletions exon/exon-core/src/datasources/bed/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,11 @@
//!
//! This module provides functionality for working with BED files as a data source.

mod array_builder;
mod batch_reader;
mod bed_record_builder;
mod config;
mod file_opener;
mod scanner;

/// Table provider for BED files.
pub mod table_provider;

pub use self::config::BEDConfig;
pub use self::file_opener::BEDOpener;
pub use self::scanner::BEDScan;
3 changes: 2 additions & 1 deletion exon/exon-core/src/datasources/bed/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ use datafusion::{
Partitioning, SendableRecordBatchStream,
},
};
use exon_bed::BEDConfig;

use crate::datasources::ExonFileScanConfig;

use super::{config::BEDConfig, file_opener::BEDOpener};
use super::file_opener::BEDOpener;

#[derive(Debug, Clone)]
/// Implements a datafusion `ExecutionPlan` for BED files.
Expand Down
3 changes: 2 additions & 1 deletion exon/exon-core/src/datasources/bed/table_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ use datafusion::{
physical_plan::{empty::EmptyExec, ExecutionPlan},
prelude::Expr,
};
use exon_bed::BEDSchemaBuilder;
use exon_common::TableSchema;
use futures::TryStreamExt;

use super::{array_builder::BEDSchemaBuilder, BEDScan};
use super::BEDScan;

#[derive(Debug, Clone)]
/// Configuration for a VCF listing table
Expand Down