Skip to content

Commit

Permalink
build: bump datafusion version (#661)
Browse files Browse the repository at this point in the history
  • Loading branch information
tshauck authored Jan 18, 2025
1 parent e20e6dd commit 4ccfa17
Show file tree
Hide file tree
Showing 28 changed files with 270 additions and 275 deletions.
385 changes: 208 additions & 177 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ resolver = "2"
[workspace.dependencies]
arrow = { version = "53.3" }
async-trait = "0.1.82"
datafusion = { version = "43", features = ["compression", "parquet"] }
datafusion = { version = "44", features = ["compression", "parquet"] }
futures = "0.3"
noodles = { version = "0.87" }
object_store = { version = "0.11.2" }
Expand Down
2 changes: 1 addition & 1 deletion exon/exon-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ version.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
datafusion-cli = { version = "43" }
datafusion-cli = { version = "44" }
clap = { version = "4", features = ["derive", "cargo"] }
datafusion = { workspace = true }
object_store = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion exon/exon-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ exon-fcs = { path = "../exon-fcs", version = "0.32.4", optional = true }
exon-genbank = { path = "../exon-genbank", version = "0.32.4", optional = true }
exon-gff = { path = "../exon-gff", version = "0.32.4" }
exon-gtf = { path = "../exon-gtf", version = "0.32.4" }
deltalake = { version = "0.22.3", features = [
deltalake = { version = "0.23.2", features = [
"datafusion",
"deltalake-aws",
"s3",
Expand Down
3 changes: 2 additions & 1 deletion exon/exon-core/src/datasources/bam/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ use crate::{
ExonRuntimeEnvExt,
};
use datafusion::{
datasource::{function::TableFunctionImpl, listing::ListingTableUrl, TableProvider},
catalog::TableFunctionImpl,
datasource::{listing::ListingTableUrl, TableProvider},
error::{DataFusionError, Result},
execution::context::SessionContext,
logical_expr::Expr,
Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/bcf/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ use std::sync::Arc;

use crate::datasources::{exon_listing_table_options::ExonListingConfig, ScanFunction};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_common::TableSchema;

Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/bed/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ use crate::{
ExonRuntimeEnvExt,
};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_bed::BEDSchemaBuilder;

Expand Down
1 change: 1 addition & 0 deletions exon/exon-core/src/datasources/cram/indexed_file_opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ impl FileOpener for IndexedCRAMOpener {
extensions,
object_meta,
range: _,
metadata_size_hint: _,
} = file_meta;

let index_record = extensions
Expand Down
2 changes: 1 addition & 1 deletion exon/exon-core/src/datasources/cram/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use datafusion::datasource::listing::ListingTableUrl;
use datafusion::datasource::TableProvider;
use datafusion::scalar::ScalarValue;
use datafusion::{
datasource::function::TableFunctionImpl, execution::context::SessionContext, logical_expr::Expr,
catalog::TableFunctionImpl, execution::context::SessionContext, logical_expr::Expr,
};

use datafusion::error::Result as DataFusionResult;
Expand Down
10 changes: 7 additions & 3 deletions exon/exon-core/src/datasources/exon_file_scan_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use datafusion::{
common::Statistics,
datasource::{listing::PartitionedFile, physical_plan::FileScanConfig},
physical_expr::EquivalenceProperties,
physical_plan::{ExecutionMode, Partitioning, PlanProperties},
physical_plan::{execution_plan::Boundedness, Partitioning, PlanProperties},
};
use itertools::Itertools;

Expand Down Expand Up @@ -51,8 +51,12 @@ impl ExonFileScanConfig for FileScanConfig {

let output_partitioning = Partitioning::UnknownPartitioning(self.file_groups.len());

let properties =
PlanProperties::new(eq_properties, output_partitioning, ExecutionMode::Bounded);
let properties = PlanProperties::new(
eq_properties,
output_partitioning,
datafusion::physical_plan::execution_plan::EmissionType::Both,
Boundedness::Bounded,
);

(schema, statistics, properties)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
use std::{str::FromStr, sync::Arc};

use datafusion::{
catalog::TableFunctionImpl,
datasource::{
file_format::file_compression_type::FileCompressionType, function::TableFunctionImpl,
listing::ListingTableUrl, TableProvider,
file_format::file_compression_type::FileCompressionType, listing::ListingTableUrl,
TableProvider,
},
error::{DataFusionError, Result as DataFusionResult},
execution::context::SessionContext,
Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/fasta/udtfs/fasta_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
use std::sync::Arc;

use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_fasta::FASTASchemaBuilder;

Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/fastq/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@
use std::sync::Arc;

use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_fastq::new_fastq_schema_builder;

Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/fcs/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ use std::sync::Arc;

use crate::datasources::ScanFunction;
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_common::TableSchema;

Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/genbank/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ use crate::{
ExonRuntimeEnvExt,
};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_genbank::schema;

Expand Down
5 changes: 3 additions & 2 deletions exon/exon-core/src/datasources/gff/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ use crate::{
ExonRuntimeEnvExt,
};
use datafusion::{
catalog::TableFunctionImpl,
datasource::{
file_format::file_compression_type::FileCompressionType, function::TableFunctionImpl,
listing::ListingTableUrl, TableProvider,
file_format::file_compression_type::FileCompressionType, listing::ListingTableUrl,
TableProvider,
},
error::{DataFusionError, Result},
execution::context::SessionContext,
Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/gtf/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,8 @@ use crate::{
ExonRuntimeEnvExt,
};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_gtf::new_gtf_schema_builder;

Expand Down
4 changes: 1 addition & 3 deletions exon/exon-core/src/datasources/hmmdomtab/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ use super::{
};
use crate::datasources::{exon_listing_table_options::ExonListingConfig, ScanFunction};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result, logical_expr::Expr,
};

/// A table function that returns a table provider for a HMMDomTab file.
Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/mzml/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@ use crate::{
ExonRuntimeEnvExt,
};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_mzml::MzMLSchemaBuilder;

Expand Down
6 changes: 2 additions & 4 deletions exon/exon-core/src/datasources/sam/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@ use crate::{
datasources::{exon_listing_table_options::ExonListingConfig, ScanFunction},
};
use datafusion::{
datasource::{function::TableFunctionImpl, TableProvider},
error::Result,
execution::context::SessionContext,
logical_expr::Expr,
catalog::TableFunctionImpl, datasource::TableProvider, error::Result,
execution::context::SessionContext, logical_expr::Expr,
};
use exon_common::TableSchema;

Expand Down
5 changes: 3 additions & 2 deletions exon/exon-core/src/datasources/vcf/udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ use crate::{
error::ExonError,
};
use datafusion::{
catalog::TableFunctionImpl,
datasource::{
file_format::file_compression_type::FileCompressionType, function::TableFunctionImpl,
listing::ListingTableUrl, TableProvider,
file_format::file_compression_type::FileCompressionType, listing::ListingTableUrl,
TableProvider,
},
error::{DataFusionError, Result},
execution::context::SessionContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ pub async fn pruned_partition_list<'a>(
range: None,
extensions: None,
statistics: None,
metadata_size_hint: None,
})
}));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ impl ExtensionPlanner for ExomeExtensionPlanner {
CopyToSource::Query(q) => {
session_state
.statement_to_plan(Statement::Statement(Box::new(ast::Statement::Query(
Box::new(q.clone()),
Box::new(*q.clone()),
))))
.await?
}
Expand Down
10 changes: 2 additions & 8 deletions exon/exon-core/src/physical_plan/pos_interval_physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{any::Any, fmt::Display, hash::Hash, sync::Arc};
use std::{any::Any, fmt::Display, sync::Arc};

use arrow::datatypes::SchemaRef;
use datafusion::{
Expand All @@ -28,7 +28,7 @@ use noodles::core::region::Interval;
use crate::error::invalid_interval::InvalidIntervalError;

/// A physical expression that represents a genomic interval.
#[derive(Debug)]
#[derive(Debug, Hash, Eq)]
pub struct PosIntervalPhysicalExpr {
start: usize,
end: Option<usize>,
Expand Down Expand Up @@ -217,12 +217,6 @@ impl PhysicalExpr for PosIntervalPhysicalExpr {
Arc::clone(&self.inner),
)))
}

fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
let mut s = state;
self.start.hash(&mut s);
self.end.hash(&mut s);
}
}

#[cfg(test)]
Expand Down
12 changes: 6 additions & 6 deletions exon/exon-core/src/physical_plan/region_name_physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use datafusion::{
/// A physical expression that represents a chromosome.
///
/// Under the hood, this is a binary expression that compares the `chrom` column to a literal. But may be used to optimize queries.
#[derive(Debug)]
#[derive(Debug, Hash, Eq)]
pub struct RegionNamePhysicalExpr {
field_name: String,
field_value: String,
Expand Down Expand Up @@ -257,11 +257,11 @@ impl PhysicalExpr for RegionNamePhysicalExpr {
)))
}

fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
let mut s = state;
self.field_name().hash(&mut s);
self.field_value().hash(&mut s);
}
// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
// let mut s = state;
// self.field_name().hash(&mut s);
// self.field_value().hash(&mut s);
// }
}

#[cfg(test)]
Expand Down
19 changes: 8 additions & 11 deletions exon/exon-core/src/physical_plan/region_physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@ use super::{
};

/// A physical expression that represents a region, e.g. chr1:100-200.
#[derive(Debug)]
#[derive(Debug, Hash, Eq)]
pub struct RegionPhysicalExpr {
region_name_expr: Arc<dyn PhysicalExpr>,
interval_expr: Option<Arc<dyn PhysicalExpr>>,
}

impl PartialEq for RegionPhysicalExpr {
fn eq(&self, other: &Self) -> bool {
self.region_name_expr == other.region_name_expr.clone()
&& self.interval_expr == other.interval_expr
}
}

impl RegionPhysicalExpr {
/// Create a new `RegionPhysicalExpr` from a region and two inner expressions.
pub fn new(
Expand Down Expand Up @@ -245,16 +252,6 @@ impl PhysicalExpr for RegionPhysicalExpr {
self.interval_expr.clone(),
)))
}

fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
let mut s = state;

self.region_name_expr.dyn_hash(&mut s);

if let Some(ref interval_expr) = self.interval_expr {
interval_expr.dyn_hash(&mut s);
}
}
}

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

use std::any::Any;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
use std::sync::Arc;

use datafusion::error::{DataFusionError, Result};
Expand All @@ -28,7 +27,7 @@ use noodles::core::region::Interval;
/// This is where the start is before the interval end and the end is after the interval start.
/// Query: |---------| (10 to 20)
/// Read: |------| (15 to 25)
#[derive(Debug)]
#[derive(Debug, Hash, Eq)]
pub struct StartEndIntervalPhysicalExpr {
start: usize,
end: Option<usize>,
Expand Down Expand Up @@ -196,12 +195,6 @@ impl PhysicalExpr for StartEndIntervalPhysicalExpr {
Arc::clone(&self.inner),
)))
}

fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
let mut s = state;
self.start.hash(&mut s);
self.end.hash(&mut s);
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 4ccfa17

Please sign in to comment.