From 6b3569bc8e97e69890d6bc9e74c3f34617454a29 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 6 Nov 2024 13:48:28 -0800 Subject: [PATCH 01/12] add range support --- Cargo.lock | 13 ++ daft/daft/__init__.pyi | 2 + src/daft-connect/Cargo.toml | 15 +- src/daft-connect/src/command.rs | 102 +++++++++++++ src/daft-connect/src/convert.rs | 45 ++++++ .../src/convert/data_conversion.rs | 61 ++++++++ .../src/convert/data_conversion/range.rs | 48 ++++++ src/daft-connect/src/convert/expression.rs | 120 +++++++++++++++ src/daft-connect/src/convert/formatting.rs | 69 +++++++++ .../src/convert/plan_conversion.rs | 134 +++++++++++++++++ .../src/convert/schema_conversion.rs | 56 +++++++ src/daft-connect/src/err.rs | 2 +- src/daft-connect/src/lib.rs | 137 +++++++++++++++++- src/daft-connect/src/session.rs | 9 +- src/daft-local-execution/src/run.rs | 58 ++++++-- .../rules/eliminate_cross_join.rs | 6 +- src/daft-scan/src/hive.rs | 6 +- tests/connect/test_parquet_simple.py | 47 ++++++ tests/connect/test_range_simple.py | 38 +++++ 19 files changed, 943 insertions(+), 25 deletions(-) create mode 100644 src/daft-connect/src/command.rs create mode 100644 src/daft-connect/src/convert.rs create mode 100644 src/daft-connect/src/convert/data_conversion.rs create mode 100644 src/daft-connect/src/convert/data_conversion/range.rs create mode 100644 src/daft-connect/src/convert/expression.rs create mode 100644 src/daft-connect/src/convert/formatting.rs create mode 100644 src/daft-connect/src/convert/plan_conversion.rs create mode 100644 src/daft-connect/src/convert/schema_conversion.rs create mode 100644 tests/connect/test_parquet_simple.py create mode 100644 tests/connect/test_range_simple.py diff --git a/Cargo.lock b/Cargo.lock index 4e7a19344b..6ef0cb835d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1907,12 +1907,25 @@ dependencies = [ name = "daft-connect" version = "0.3.0-dev0" dependencies = [ + "arrow2", + "common-daft-config", + "common-error", + "common-file-formats", + "daft-core", + "daft-dsl", + "daft-local-execution", + "daft-local-plan", + "daft-logical-plan", + "daft-physical-plan", + "daft-schema", + "daft-table", "dashmap", "eyre", "futures", "pyo3", "spark-connect", "tokio", + "tokio-stream", "tonic", "tracing", "tracing-subscriber", diff --git a/daft/daft/__init__.pyi b/daft/daft/__init__.pyi index 12cffd7dc1..13d2fc6800 100644 --- a/daft/daft/__init__.pyi +++ b/daft/daft/__init__.pyi @@ -1241,6 +1241,8 @@ def connect_start(addr: str) -> ConnectionHandle: ... class ConnectionHandle: def shutdown(self) -> None: ... +def connect_start(addr: str) -> None: ... + # expr numeric ops def abs(expr: PyExpr) -> PyExpr: ... def cbrt(expr: PyExpr) -> PyExpr: ... diff --git a/src/daft-connect/Cargo.toml b/src/daft-connect/Cargo.toml index c11972fa4c..2bcec88a89 100644 --- a/src/daft-connect/Cargo.toml +++ b/src/daft-connect/Cargo.toml @@ -1,14 +1,27 @@ [dependencies] +arrow2.workspace = true +common-daft-config.workspace = true +common-file-formats.workspace = true +daft-core.workspace = true +daft-dsl.workspace = true +daft-local-execution.workspace = true +daft-local-plan.workspace = true +daft-logical-plan.workspace = true +daft-physical-plan.workspace = true +daft-schema.workspace = true +daft-table.workspace = true dashmap = "6.1.0" eyre = "0.6.12" futures = "0.3.31" pyo3 = {workspace = true, optional = true} +spark-connect.workspace = true tokio = {version = "1.40.0", features = ["full"]} +tokio-stream = "0.1.16" tonic = "0.12.3" tracing-subscriber = {version = "0.3.18", features = ["env-filter"]} tracing-tracy = "0.11.3" +common-error.workspace = true uuid = {version = "1.10.0", features = ["v4"]} -spark-connect.workspace = true tracing.workspace = true [features] diff --git a/src/daft-connect/src/command.rs b/src/daft-connect/src/command.rs new file mode 100644 index 0000000000..0cedd56493 --- /dev/null +++ b/src/daft-connect/src/command.rs @@ -0,0 +1,102 @@ +use std::future::ready; + +use arrow2::io::ipc::write::StreamWriter; +use daft_table::Table; +use eyre::Context; +use futures::{stream, StreamExt, TryStreamExt}; +use spark_connect::{ + execute_plan_response::{ArrowBatch, ResponseType, ResultComplete}, + spark_connect_service_server::SparkConnectService, + ExecutePlanResponse, Relation, +}; +use tonic::Status; +use uuid::Uuid; + +use crate::{convert::convert_data, DaftSparkConnectService, Session}; + +type DaftStream = ::ExecutePlanStream; + +pub struct PlanContext { + session_id: String, + server_side_session_id: String, + operation_id: String, +} + +impl PlanContext { + pub fn gen_response(&mut self, table: &Table) -> eyre::Result { + let mut data = Vec::new(); + + let mut writer = StreamWriter::new( + &mut data, + arrow2::io::ipc::write::WriteOptions { compression: None }, + ); + + let row_count = table.num_rows(); + + let schema = table + .schema + .to_arrow() + .wrap_err("Failed to convert Daft schema to Arrow schema")?; + + writer + .start(&schema, None) + .wrap_err("Failed to start Arrow stream writer with schema")?; + + let arrays = table.get_inner_arrow_arrays().collect(); + let chunk = arrow2::chunk::Chunk::new(arrays); + + writer + .write(&chunk, None) + .wrap_err("Failed to write Arrow chunk to stream writer")?; + + let response = ExecutePlanResponse { + session_id: self.session_id.to_string(), + server_side_session_id: self.server_side_session_id.to_string(), + operation_id: self.operation_id.to_string(), + response_id: Uuid::new_v4().to_string(), // todo: implement this + metrics: None, // todo: implement this + observed_metrics: vec![], + schema: None, + response_type: Some(ResponseType::ArrowBatch(ArrowBatch { + row_count: row_count as i64, + data, + start_offset: None, + })), + }; + + Ok(response) + } +} + +impl Session { + pub async fn handle_root_command( + &self, + command: Relation, + operation_id: String, + ) -> Result { + let mut context = PlanContext { + session_id: self.client_side_session_id().to_string(), + server_side_session_id: self.server_side_session_id().to_string(), + operation_id: operation_id.clone(), + }; + + let finished = ExecutePlanResponse { + session_id: self.client_side_session_id().to_string(), + server_side_session_id: self.server_side_session_id().to_string(), + operation_id, + response_id: Uuid::new_v4().to_string(), + metrics: None, + observed_metrics: vec![], + schema: None, + response_type: Some(ResponseType::ResultComplete(ResultComplete {})), + }; + + let stream = convert_data(command, &mut context) + .map_err(|e| Status::internal(e.to_string()))? + .chain(stream::once(ready(Ok(finished)))); + + Ok(Box::pin( + stream.map_err(|e| Status::internal(e.to_string())), + )) + } +} diff --git a/src/daft-connect/src/convert.rs b/src/daft-connect/src/convert.rs new file mode 100644 index 0000000000..3d31693bc3 --- /dev/null +++ b/src/daft-connect/src/convert.rs @@ -0,0 +1,45 @@ +mod data_conversion; +mod expression; +mod formatting; +mod plan_conversion; +mod schema_conversion; + +use std::{collections::HashMap, pin::Pin, sync::Arc}; + +use common_daft_config::DaftExecutionConfig; +use common_error::{DaftError, DaftResult}; +use daft_logical_plan::LogicalPlanRef; +use daft_table::Table; +pub use data_conversion::convert_data; +use futures::{stream, Stream, StreamExt}; +pub use schema_conversion::connect_schema; + +pub fn run_local( + logical_plan: &LogicalPlanRef, +) -> DaftResult>> { + let physical_plan = daft_local_plan::translate(logical_plan)?; + let cfg = Arc::new(DaftExecutionConfig::default()); + let psets = HashMap::new(); + + let stream = daft_local_execution::run_local(&physical_plan, psets, cfg, None)?; + + let stream = stream + .map(|partition| match partition { + Ok(partition) => partition.get_tables().map_err(DaftError::from), + Err(err) => Err(err), + }) + .flat_map(|tables| match tables { + Ok(tables) => { + let tables = Arc::try_unwrap(tables).unwrap(); + + let tables = tables.into_iter().map(Ok); + let stream: Pin>>> = + Box::pin(stream::iter(tables)); + + stream + } + Err(err) => Box::pin(stream::once(async { Err(err) })), + }); + + Ok(stream) +} diff --git a/src/daft-connect/src/convert/data_conversion.rs b/src/daft-connect/src/convert/data_conversion.rs new file mode 100644 index 0000000000..11233e6cc5 --- /dev/null +++ b/src/daft-connect/src/convert/data_conversion.rs @@ -0,0 +1,61 @@ +//! Relation handling for Spark Connect protocol. +//! +//! A Relation represents a structured dataset or transformation in Spark Connect. +//! It can be either a base relation (direct data source) or derived relation +//! (result of operations on other relations). +//! +//! The protocol represents relations as trees of operations where: +//! - Each node is a Relation with metadata and an operation type +//! - Operations can reference other relations, forming a DAG +//! - The tree describes how to derive the final result +//! +//! Example flow for: SELECT age, COUNT(*) FROM employees WHERE dept='Eng' GROUP BY age +//! +//! ```text +//! Aggregate (grouping by age) +//! ↳ Filter (department = 'Engineering') +//! ↳ Read (employees table) +//! ``` +//! +//! Relations abstract away: +//! - Physical storage details +//! - Distributed computation +//! - Query optimization +//! - Data source specifics +//! +//! This allows Spark to optimize and execute queries efficiently across a cluster +//! while providing a consistent API regardless of the underlying data source. +//! ```mermaid +//! +//! ``` + +use eyre::{eyre, Context}; +use futures::Stream; +use spark_connect::{relation::RelType, ExecutePlanResponse, Relation}; +use tracing::trace; + +use crate::convert::formatting::RelTypeExt; + +mod range; +use range::range; + +use crate::command::PlanContext; + +pub fn convert_data( + plan: Relation, + context: &mut PlanContext, +) -> eyre::Result> + Unpin> { + // First check common fields if needed + if let Some(common) = &plan.common { + // contains metadata shared across all relation types + // Log or handle common fields if necessary + trace!("Processing relation with plan_id: {:?}", common.plan_id); + } + + let rel_type = plan.rel_type.ok_or_else(|| eyre!("rel_type is None"))?; + + match rel_type { + RelType::Range(input) => range(input, context).wrap_err("parsing Range"), + other => Err(eyre!("Unsupported top-level relation: {}", other.name())), + } +} diff --git a/src/daft-connect/src/convert/data_conversion/range.rs b/src/daft-connect/src/convert/data_conversion/range.rs new file mode 100644 index 0000000000..787a379363 --- /dev/null +++ b/src/daft-connect/src/convert/data_conversion/range.rs @@ -0,0 +1,48 @@ +use std::future::ready; + +use daft_core::prelude::Series; +use daft_schema::prelude::Schema; +use daft_table::Table; +use eyre::{ensure, Context}; +use futures::{stream, Stream}; +use spark_connect::{ExecutePlanResponse, Range}; + +use crate::command::PlanContext; + +pub fn range( + range: Range, + channel: &mut PlanContext, +) -> eyre::Result> + Unpin> { + let Range { + start, + end, + step, + num_partitions, + } = range; + + let start = start.unwrap_or(0); + + ensure!(num_partitions.is_none(), "num_partitions is not supported"); + + let step = usize::try_from(step).wrap_err("step must be a positive integer")?; + ensure!(step > 0, "step must be greater than 0"); + + let arrow_array: arrow2::array::Int64Array = (start..end).step_by(step).map(Some).collect(); + let len = arrow_array.len(); + + let singleton_series = Series::try_from(( + "range", + Box::new(arrow_array) as Box, + )) + .wrap_err("creating singleton series")?; + + let singleton_table = Table::new_with_size( + Schema::new(vec![singleton_series.field().clone()])?, + vec![singleton_series], + len, + )?; + + let response = channel.gen_response(&singleton_table)?; + + Ok(stream::once(ready(Ok(response)))) +} diff --git a/src/daft-connect/src/convert/expression.rs b/src/daft-connect/src/convert/expression.rs new file mode 100644 index 0000000000..f79a7bf5a8 --- /dev/null +++ b/src/daft-connect/src/convert/expression.rs @@ -0,0 +1,120 @@ +use daft_dsl::{Expr as DaftExpr, Operator}; +use eyre::{bail, ensure, eyre, Result}; +use spark_connect::{expression, expression::literal::LiteralType, Expression}; + +pub fn convert_expression(expr: Expression) -> Result { + match expr.expr_type { + Some(expression::ExprType::Literal(lit)) => Ok(DaftExpr::Literal(convert_literal(lit)?)), + + Some(expression::ExprType::UnresolvedAttribute(attr)) => { + Ok(DaftExpr::Column(attr.unparsed_identifier.into())) + } + + Some(expression::ExprType::Alias(alias)) => { + let expression::Alias { + expr, + name, + metadata, + } = *alias; + let expr = *expr.ok_or_else(|| eyre!("expr is None"))?; + + // Convert alias + let expr = convert_expression(expr)?; + + if let Some(metadata) = metadata + && !metadata.is_empty() + { + bail!("Metadata is not yet supported"); + } + + // ignore metadata for now + + let [name] = name.as_slice() else { + bail!("Alias name must have exactly one element"); + }; + + Ok(DaftExpr::Alias(expr.into(), name.as_str().into())) + } + + Some(expression::ExprType::UnresolvedFunction(expression::UnresolvedFunction { + function_name, + arguments, + is_distinct, + is_user_defined_function, + })) => { + ensure!(!is_distinct, "Distinct is not yet supported"); + ensure!( + !is_user_defined_function, + "User-defined functions are not yet supported" + ); + + let op = function_name.as_str(); + match op { + ">" | "<" | "<=" | ">=" | "+" | "-" | "*" | "/" => { + let arr: [Expression; 2] = arguments + .try_into() + .map_err(|_| eyre!("Expected 2 arguments"))?; + let [left, right] = arr; + + let left = convert_expression(left)?; + let right = convert_expression(right)?; + + let op = match op { + ">" => Operator::Gt, + "<" => Operator::Lt, + "<=" => Operator::LtEq, + ">=" => Operator::GtEq, + "+" => Operator::Plus, + "-" => Operator::Minus, + "*" => Operator::Multiply, + "/" => Operator::FloorDivide, // todo is this what we want? + _ => unreachable!(), + }; + + Ok(DaftExpr::BinaryOp { + left: left.into(), + op, + right: right.into(), + }) + } + other => bail!("Unsupported function name: {other}"), + } + } + + // Handle other expression types... + _ => Err(eyre!("Unsupported expression type")), + } +} + +// Helper functions to convert literals, function names, operators etc. + +fn convert_literal(lit: expression::Literal) -> Result { + let literal_type = lit + .literal_type + .ok_or_else(|| eyre!("literal_type is None"))?; + + let result = match literal_type { + LiteralType::Null(..) => daft_dsl::LiteralValue::Null, + LiteralType::Binary(input) => daft_dsl::LiteralValue::Binary(input), + LiteralType::Boolean(input) => daft_dsl::LiteralValue::Boolean(input), + LiteralType::Byte(input) => daft_dsl::LiteralValue::Int32(input), + LiteralType::Short(input) => daft_dsl::LiteralValue::Int32(input), + LiteralType::Integer(input) => daft_dsl::LiteralValue::Int32(input), + LiteralType::Long(input) => daft_dsl::LiteralValue::Int64(input), + LiteralType::Float(input) => daft_dsl::LiteralValue::Float64(f64::from(input)), + LiteralType::Double(input) => daft_dsl::LiteralValue::Float64(input), + LiteralType::String(input) => daft_dsl::LiteralValue::Utf8(input), + LiteralType::Date(input) => daft_dsl::LiteralValue::Date(input), + LiteralType::Decimal(_) + | LiteralType::Timestamp(_) + | LiteralType::TimestampNtz(_) + | LiteralType::CalendarInterval(_) + | LiteralType::YearMonthInterval(_) + | LiteralType::DayTimeInterval(_) + | LiteralType::Array(_) + | LiteralType::Map(_) + | LiteralType::Struct(_) => bail!("unimplemented"), + }; + + Ok(result) +} diff --git a/src/daft-connect/src/convert/formatting.rs b/src/daft-connect/src/convert/formatting.rs new file mode 100644 index 0000000000..3310a918fb --- /dev/null +++ b/src/daft-connect/src/convert/formatting.rs @@ -0,0 +1,69 @@ +use spark_connect::relation::RelType; + +/// Extension trait for RelType to add a `name` method. +pub trait RelTypeExt { + /// Returns the name of the RelType as a string. + fn name(&self) -> &'static str; +} + +impl RelTypeExt for RelType { + fn name(&self) -> &'static str { + match self { + Self::Read(_) => "Read", + Self::Project(_) => "Project", + Self::Filter(_) => "Filter", + Self::Join(_) => "Join", + Self::SetOp(_) => "SetOp", + Self::Sort(_) => "Sort", + Self::Limit(_) => "Limit", + Self::Aggregate(_) => "Aggregate", + Self::Sql(_) => "Sql", + Self::LocalRelation(_) => "LocalRelation", + Self::Sample(_) => "Sample", + Self::Offset(_) => "Offset", + Self::Deduplicate(_) => "Deduplicate", + Self::Range(_) => "Range", + Self::SubqueryAlias(_) => "SubqueryAlias", + Self::Repartition(_) => "Repartition", + Self::ToDf(_) => "ToDf", + Self::WithColumnsRenamed(_) => "WithColumnsRenamed", + Self::ShowString(_) => "ShowString", + Self::Drop(_) => "Drop", + Self::Tail(_) => "Tail", + Self::WithColumns(_) => "WithColumns", + Self::Hint(_) => "Hint", + Self::Unpivot(_) => "Unpivot", + Self::ToSchema(_) => "ToSchema", + Self::RepartitionByExpression(_) => "RepartitionByExpression", + Self::MapPartitions(_) => "MapPartitions", + Self::CollectMetrics(_) => "CollectMetrics", + Self::Parse(_) => "Parse", + Self::GroupMap(_) => "GroupMap", + Self::CoGroupMap(_) => "CoGroupMap", + Self::WithWatermark(_) => "WithWatermark", + Self::ApplyInPandasWithState(_) => "ApplyInPandasWithState", + Self::HtmlString(_) => "HtmlString", + Self::CachedLocalRelation(_) => "CachedLocalRelation", + Self::CachedRemoteRelation(_) => "CachedRemoteRelation", + Self::CommonInlineUserDefinedTableFunction(_) => "CommonInlineUserDefinedTableFunction", + Self::AsOfJoin(_) => "AsOfJoin", + Self::CommonInlineUserDefinedDataSource(_) => "CommonInlineUserDefinedDataSource", + Self::WithRelations(_) => "WithRelations", + Self::Transpose(_) => "Transpose", + Self::FillNa(_) => "FillNa", + Self::DropNa(_) => "DropNa", + Self::Replace(_) => "Replace", + Self::Summary(_) => "Summary", + Self::Crosstab(_) => "Crosstab", + Self::Describe(_) => "Describe", + Self::Cov(_) => "Cov", + Self::Corr(_) => "Corr", + Self::ApproxQuantile(_) => "ApproxQuantile", + Self::FreqItems(_) => "FreqItems", + Self::SampleBy(_) => "SampleBy", + Self::Catalog(_) => "Catalog", + Self::Extension(_) => "Extension", + Self::Unknown(_) => "Unknown", + } + } +} diff --git a/src/daft-connect/src/convert/plan_conversion.rs b/src/daft-connect/src/convert/plan_conversion.rs new file mode 100644 index 0000000000..6e0c5fc872 --- /dev/null +++ b/src/daft-connect/src/convert/plan_conversion.rs @@ -0,0 +1,134 @@ +use std::{collections::HashSet, sync::Arc}; + +use daft_logical_plan::{LogicalPlanBuilder, ParquetScanBuilder}; +use eyre::{bail, eyre, Result, WrapErr}; +use spark_connect::{ + expression::Alias, + read::{DataSource, ReadType}, + relation::RelType, + Filter, Read, Relation, WithColumns, +}; +use tracing::warn; + +use crate::convert::expression; + +pub fn to_logical_plan(plan: Relation) -> Result { + let scope = std::thread::spawn(|| { + let rel_type = plan.rel_type.ok_or_else(|| eyre!("rel_type is None"))?; + + match rel_type { + RelType::ShowString(..) => { + bail!("ShowString is only supported as a top-level relation") + } + RelType::Filter(filter) => parse_filter(*filter).wrap_err("parsing Filter"), + RelType::WithColumns(with_columns) => { + parse_with_columns(*with_columns).wrap_err("parsing WithColumns") + } + RelType::Read(read) => parse_read(read), + _ => bail!("Unsupported relation type: {rel_type:?}"), + } + }); + + scope.join().unwrap() +} + +fn parse_filter(filter: Filter) -> Result { + let Filter { input, condition } = filter; + let input = *input.ok_or_else(|| eyre!("input is None"))?; + let input_plan = to_logical_plan(input).wrap_err("parsing input")?; + + let condition = condition.ok_or_else(|| eyre!("condition is None"))?; + let condition = + expression::convert_expression(condition).wrap_err("converting to daft expression")?; + let condition = Arc::new(condition); + + input_plan.filter(condition).wrap_err("applying filter") +} + +fn parse_with_columns(with_columns: WithColumns) -> Result { + let WithColumns { input, aliases } = with_columns; + let input = *input.ok_or_else(|| eyre!("input is None"))?; + let input_plan = to_logical_plan(input).wrap_err("parsing input")?; + + let mut new_exprs = Vec::new(); + let mut existing_columns: HashSet<_> = input_plan.schema().names().into_iter().collect(); + + for alias in aliases { + let Alias { + expr, + name, + metadata, + } = alias; + + if name.len() != 1 { + bail!("Alias name must have exactly one element"); + } + let name = name[0].as_str(); + + if metadata.is_some() { + bail!("Metadata is not yet supported"); + } + + let expr = expr.ok_or_else(|| eyre!("expression is None"))?; + let expr = + expression::convert_expression(*expr).wrap_err("converting to daft expression")?; + let expr = Arc::new(expr); + + new_exprs.push(expr.alias(name)); + + if existing_columns.contains(name) { + existing_columns.remove(name); + } + } + + // Add remaining existing columns + for col_name in existing_columns { + new_exprs.push(daft_dsl::col(col_name)); + } + + input_plan + .select(new_exprs) + .wrap_err("selecting new expressions") +} + +fn parse_read(read: Read) -> Result { + let Read { + is_streaming, + read_type, + } = read; + + warn!("Ignoring is_streaming: {is_streaming}"); + + let read_type = read_type.ok_or_else(|| eyre!("type is None"))?; + + match read_type { + ReadType::NamedTable(_) => bail!("Named tables are not yet supported"), + ReadType::DataSource(data_source) => parse_data_source(data_source), + } +} + +fn parse_data_source(data_source: DataSource) -> Result { + let DataSource { + format, + options, + paths, + predicates, + .. + } = data_source; + + let format = format.ok_or_else(|| eyre!("format is None"))?; + if format != "parquet" { + bail!("Only parquet is supported; got {format}"); + } + + if !options.is_empty() { + bail!("Options are not yet supported"); + } + if !predicates.is_empty() { + bail!("Predicates are not yet supported"); + } + + ParquetScanBuilder::new(paths) + .finish() + .wrap_err("creating ParquetScanBuilder") +} diff --git a/src/daft-connect/src/convert/schema_conversion.rs b/src/daft-connect/src/convert/schema_conversion.rs new file mode 100644 index 0000000000..dcce376b94 --- /dev/null +++ b/src/daft-connect/src/convert/schema_conversion.rs @@ -0,0 +1,56 @@ +use spark_connect::{ + data_type::{Kind, Long, Struct, StructField}, + relation::RelType, + DataType, Relation, +}; + +#[tracing::instrument(skip_all)] +pub fn connect_schema(input: Relation) -> Result { + if input.common.is_some() { + tracing::warn!("We do not currently look at common fields"); + } + + let result = match input + .rel_type + .ok_or_else(|| tonic::Status::internal("rel_type is None"))? + { + RelType::Range(spark_connect::Range { num_partitions, .. }) => { + if num_partitions.is_some() { + return Err(tonic::Status::unimplemented( + "num_partitions is not supported", + )); + } + + let long = Long { + type_variation_reference: 0, + }; + + let id_field = StructField { + name: "id".to_string(), + data_type: Some(DataType { + kind: Some(Kind::Long(long)), + }), + nullable: false, + metadata: None, + }; + + let fields = vec![id_field]; + + let strct = Struct { + fields, + type_variation_reference: 0, + }; + + DataType { + kind: Some(Kind::Struct(strct)), + } + } + other => { + return Err(tonic::Status::unimplemented(format!( + "Unsupported relation type: {other:?}" + ))) + } + }; + + Ok(result) +} diff --git a/src/daft-connect/src/err.rs b/src/daft-connect/src/err.rs index d210ef8458..0cf065287f 100644 --- a/src/daft-connect/src/err.rs +++ b/src/daft-connect/src/err.rs @@ -1,5 +1,5 @@ #[macro_export] -macro_rules! invalid_argument { +macro_rules! invalid_argument_err { ($arg: tt) => {{ let msg = format!($arg); Err(::tonic::Status::invalid_argument(msg)) diff --git a/src/daft-connect/src/lib.rs b/src/daft-connect/src/lib.rs index 12c43e6901..4dca16d643 100644 --- a/src/daft-connect/src/lib.rs +++ b/src/daft-connect/src/lib.rs @@ -9,14 +9,18 @@ use dashmap::DashMap; use eyre::Context; +use futures::{StreamExt, TryStreamExt}; #[cfg(feature = "python")] use pyo3::types::PyModuleMethods; use spark_connect::{ + analyze_plan_response, + command::CommandType, + plan::OpType, spark_connect_service_server::{SparkConnectService, SparkConnectServiceServer}, AddArtifactsRequest, AddArtifactsResponse, AnalyzePlanRequest, AnalyzePlanResponse, ArtifactStatusesRequest, ArtifactStatusesResponse, ConfigRequest, ConfigResponse, ExecutePlanRequest, ExecutePlanResponse, FetchErrorDetailsRequest, FetchErrorDetailsResponse, - InterruptRequest, InterruptResponse, ReattachExecuteRequest, ReleaseExecuteRequest, + InterruptRequest, InterruptResponse, Plan, ReattachExecuteRequest, ReleaseExecuteRequest, ReleaseExecuteResponse, ReleaseSessionRequest, ReleaseSessionResponse, }; use tonic::{transport::Server, Request, Response, Status}; @@ -25,7 +29,9 @@ use uuid::Uuid; use crate::session::Session; +mod command; mod config; +mod convert; mod err; mod session; pub mod util; @@ -128,9 +134,90 @@ impl SparkConnectService for DaftSparkConnectService { #[tracing::instrument(skip_all)] async fn execute_plan( &self, - _request: Request, + request: Request, ) -> Result, Status> { - unimplemented_err!("Unsupported plan type") + let request = request.into_inner(); + + let session = self.get_session(&request.session_id)?; + + let Some(operation) = request.operation_id else { + return invalid_argument_err!("Operation ID is required"); + }; + + // Proceed with executing the plan... + let Some(plan) = request.plan else { + return invalid_argument_err!("Plan is required"); + }; + + let Some(plan) = plan.op_type else { + return invalid_argument_err!("Plan operation is required"); + }; + + use spark_connect::plan::OpType; + + match plan { + OpType::Root(relation) => { + let result = session.handle_root_command(relation, operation).await?; + return Ok(Response::new(result)); + } + OpType::Command(command) => { + let Some(command) = command.command_type else { + return invalid_argument_err!("Command type is required"); + }; + + match command { + CommandType::RegisterFunction(_) => { + unimplemented_err!("RegisterFunction not implemented") + } + CommandType::WriteOperation(_) => { + unimplemented_err!("WriteOperation not implemented") + } + CommandType::CreateDataframeView(_) => { + unimplemented_err!("CreateDataframeView not implemented") + } + CommandType::WriteOperationV2(_) => { + unimplemented_err!("WriteOperationV2 not implemented") + } + CommandType::SqlCommand(..) => { + unimplemented_err!("SQL execution not yet implemented") + } + CommandType::WriteStreamOperationStart(_) => { + unimplemented_err!("WriteStreamOperationStart not implemented") + } + CommandType::StreamingQueryCommand(_) => { + unimplemented_err!("StreamingQueryCommand not implemented") + } + CommandType::GetResourcesCommand(_) => { + unimplemented_err!("GetResourcesCommand not implemented") + } + CommandType::StreamingQueryManagerCommand(_) => { + unimplemented_err!("StreamingQueryManagerCommand not implemented") + } + CommandType::RegisterTableFunction(_) => { + unimplemented_err!("RegisterTableFunction not implemented") + } + CommandType::StreamingQueryListenerBusCommand(_) => { + unimplemented_err!("StreamingQueryListenerBusCommand not implemented") + } + CommandType::RegisterDataSource(_) => { + unimplemented_err!("RegisterDataSource not implemented") + } + CommandType::CreateResourceProfileCommand(_) => { + unimplemented_err!("CreateResourceProfileCommand not implemented") + } + CommandType::CheckpointCommand(_) => { + unimplemented_err!("CheckpointCommand not implemented") + } + CommandType::RemoveCachedRemoteRelationCommand(_) => { + unimplemented_err!("RemoveCachedRemoteRelationCommand not implemented") + } + CommandType::MergeIntoTableCommand(_) => { + unimplemented_err!("MergeIntoTableCommand not implemented") + } + CommandType::Extension(_) => unimplemented_err!("Extension not implemented"), + } + } + }? } #[tracing::instrument(skip_all)] @@ -172,9 +259,49 @@ impl SparkConnectService for DaftSparkConnectService { #[tracing::instrument(skip_all)] async fn analyze_plan( &self, - _request: Request, + request: Request, ) -> Result, Status> { - unimplemented_err!("Analyze plan operation is not yet implemented") + use spark_connect::analyze_plan_request::*; + let request = request.into_inner(); + + let AnalyzePlanRequest { + session_id, + analyze, + .. + } = request; + + let Some(analyze) = analyze else { + return Err(Status::invalid_argument("analyze is required")); + }; + + match analyze { + Analyze::Schema(Schema { plan }) => { + let Some(Plan { op_type }) = plan else { + return Err(Status::invalid_argument("plan is required")); + }; + + let Some(OpType::Root(relation)) = op_type else { + return Err(Status::invalid_argument("op_type is required to be root")); + }; + + let result = convert::connect_schema(relation)?; + + let schema = analyze_plan_response::DdlParse { + parsed: Some(result), + }; + + let response = AnalyzePlanResponse { + session_id, + server_side_session_id: String::new(), + result: Some(analyze_plan_response::Result::DdlParse(schema)), + }; + + println!("response: {response:#?}"); + + Ok(Response::new(response)) + } + _ => unimplemented_err!("Analyze plan operation is not yet implemented"), + } } #[tracing::instrument(skip_all)] diff --git a/src/daft-connect/src/session.rs b/src/daft-connect/src/session.rs index 24f7fabe80..72b477478f 100644 --- a/src/daft-connect/src/session.rs +++ b/src/daft-connect/src/session.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use uuid::Uuid; @@ -8,6 +8,12 @@ pub struct Session { /// Also, config_values: BTreeMap, + #[expect( + unused, + reason = "this will be used in the future especially to pass spark connect tests" + )] + tables_by_name: HashMap, + id: String, server_side_session_id: String, } @@ -26,6 +32,7 @@ impl Session { let server_side_session_id = server_side_session_id.to_string(); Self { config_values: Default::default(), + tables_by_name: Default::default(), id, server_side_session_id, } diff --git a/src/daft-local-execution/src/run.rs b/src/daft-local-execution/src/run.rs index 0f01ec61e6..d4fb409313 100644 --- a/src/daft-local-execution/src/run.rs +++ b/src/daft-local-execution/src/run.rs @@ -2,7 +2,9 @@ use std::{ collections::HashMap, fs::File, io::Write, + pin::Pin, sync::Arc, + task::{Context, Poll}, time::{SystemTime, UNIX_EPOCH}, }; @@ -11,6 +13,7 @@ use common_error::DaftResult; use common_tracing::refresh_chrome_trace; use daft_local_plan::{translate, LocalPhysicalPlan}; use daft_micropartition::MicroPartition; +use futures::{Stream, StreamExt}; #[cfg(feature = "python")] use { common_daft_config::PyDaftExecutionConfig, @@ -48,6 +51,34 @@ pub struct NativeExecutor { local_physical_plan: Arc, } +/// A blocking iterator adapter for any Stream. +pub struct BlockingStreamIter { + stream: Pin>, +} + +impl BlockingStreamIter { + /// Creates a new BlockingStreamIter from a Stream. + pub fn new(stream: S) -> Self + where + S: Stream + 'static, + { + Self { + stream: Box::pin(stream), + } + } +} + +impl Iterator for BlockingStreamIter +where + S: Stream + Unpin + 'static, +{ + type Item = S::Item; + + fn next(&mut self) -> Option { + futures::executor::block_on(self.stream.as_mut().next()) + } +} + #[cfg(feature = "python")] #[pymethods] impl NativeExecutor { @@ -84,6 +115,7 @@ impl NativeExecutor { ) }) .collect(); + let out = py.allow_threads(|| { run_local( &self.local_physical_plan, @@ -92,6 +124,9 @@ impl NativeExecutor { results_buffer_size, ) })?; + + let out = BlockingStreamIter::new(out); + let iter = Box::new(out.map(|part| { part.map(|p| pyo3::Python::with_gil(|py| PyMicroPartition::from(p).into_py(py))) })); @@ -116,7 +151,7 @@ pub fn run_local( psets: HashMap>>, cfg: Arc, results_buffer_size: Option, -) -> DaftResult>> + Send>> { +) -> DaftResult>> + Send>>> { refresh_chrome_trace(); let mut pipeline = physical_plan_to_pipeline(physical_plan, &psets, &cfg)?; let (tx, rx) = create_channel(results_buffer_size.unwrap_or(1)); @@ -173,18 +208,18 @@ pub fn run_local( }) }); - struct ReceiverIterator { + struct ReceiverStream { receiver: Receiver>, handle: Option>>, } - impl Iterator for ReceiverIterator { + impl Stream for ReceiverStream { type Item = DaftResult>; - fn next(&mut self) -> Option { - match self.receiver.blocking_recv() { - Some(part) => Some(Ok(part)), - None => { + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.receiver.poll_recv(cx) { + Poll::Ready(Some(part)) => Poll::Ready(Some(Ok(part))), + Poll::Ready(None) => { if self.handle.is_some() { let join_result = self .handle @@ -193,17 +228,18 @@ pub fn run_local( .join() .expect("Execution engine thread panicked"); match join_result { - Ok(()) => None, - Err(e) => Some(Err(e)), + Ok(()) => Poll::Ready(None), + Err(e) => Poll::Ready(Some(Err(e))), } } else { - None + Poll::Ready(None) } } + Poll::Pending => Poll::Pending, } } } - Ok(Box::new(ReceiverIterator { + Ok(Box::pin(ReceiverStream { receiver: rx, handle: Some(handle), })) diff --git a/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs b/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs index c8e888fecf..e1358eae83 100644 --- a/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs +++ b/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs @@ -500,7 +500,7 @@ mod tests { expected, actual, "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" ); - assert_eq!(starting_schema, actual.schema()) + assert_eq!(starting_schema, actual.schema()); } #[rstest] @@ -664,8 +664,8 @@ mod tests { )? .build(); - let plan = LogicalPlanBuilder::from(plan1.clone()) - .cross_join(plan2.clone(), None, Some("t3."))? + let plan = LogicalPlanBuilder::from(plan1) + .cross_join(plan2, None, Some("t3."))? .filter( col("t3.a") .eq(col("a")) diff --git a/src/daft-scan/src/hive.rs b/src/daft-scan/src/hive.rs index d9de48afd2..f929c9d4d5 100644 --- a/src/daft-scan/src/hive.rs +++ b/src/daft-scan/src/hive.rs @@ -134,7 +134,7 @@ mod tests { let partitions = parse_hive_partitioning(uri).unwrap(); assert_eq!(partitions.get("year"), Some(&"2024".to_string())); - assert_eq!(partitions.get("region"), Some(&"".to_string())); + assert_eq!(partitions.get("region"), Some(&String::new())); } #[test] @@ -251,7 +251,7 @@ mod tests { let partitions = parse_hive_partitioning(uri).unwrap(); assert_eq!(partitions.len(), 2); - assert_eq!(partitions.get("empty_key"), Some(&"".to_string())); - assert_eq!(partitions.get("another"), Some(&"".to_string())); + assert_eq!(partitions.get("empty_key"), Some(&String::new())); + assert_eq!(partitions.get("another"), Some(&String::new())); } } diff --git a/tests/connect/test_parquet_simple.py b/tests/connect/test_parquet_simple.py new file mode 100644 index 0000000000..cb3ba9f1b1 --- /dev/null +++ b/tests/connect/test_parquet_simple.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import pathlib +import time + +import pyarrow as pa +import pyarrow.parquet as papq +from pyspark.sql import SparkSession +from pyspark.sql.dataframe import DataFrame + +from daft.daft import connect_start + + +def test_read_parquet(tmpdir): + # Convert tmpdir to Path object + test_dir = pathlib.Path(tmpdir) + input_parquet_path = test_dir / "input.parquet" + + # Create sample data with sequential IDs + sample_data = pa.Table.from_pydict({"id": [0, 1, 2, 3, 4]}) + + # Write sample data to input parquet file + papq.write_table(sample_data, input_parquet_path) + + # Start Daft Connect server + # TODO: Add env var to control server embedding + connect_start("sc://localhost:50051") + + # Initialize Spark Connect session + spark_session: SparkSession = ( + SparkSession.builder.appName("DaftParquetReadWriteTest").remote("sc://localhost:50051").getOrCreate() + ) + + # Read input parquet with Spark Connect + spark_df: DataFrame = spark_session.read.parquet(str(input_parquet_path)) + + # Write DataFrame to output parquet + output_parquet_path = test_dir / "output.parquet" + spark_df.write.parquet(str(output_parquet_path)) + + # Verify output matches input + output_data = papq.read_table(output_parquet_path) + assert output_data.equals(sample_data) + + # Clean up Spark session + spark_session.stop() + time.sleep(2) # Allow time for session cleanup diff --git a/tests/connect/test_range_simple.py b/tests/connect/test_range_simple.py new file mode 100644 index 0000000000..6eab95102f --- /dev/null +++ b/tests/connect/test_range_simple.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import time + +import pytest +from pyspark.sql import SparkSession + + +@pytest.fixture +def spark_session(): + """Fixture to create and clean up a Spark session.""" + from daft.daft import connect_start + + # Start Daft Connect server + server = connect_start("sc://localhost:50051") + + # Initialize Spark Connect session + session = SparkSession.builder.appName("DaftConfigTest").remote("sc://localhost:50051").getOrCreate() + + yield session + + # Cleanup + server.shutdown() + session.stop() + time.sleep(2) # Allow time for session cleanup + + +def test_range_operation(spark_session): + # Create a range using Spark + # For example, creating a range from 0 to 9 + spark_range = spark_session.range(10) # Creates DataFrame with numbers 0 to 9 + + # Convert to Pandas DataFrame + pandas_df = spark_range.toPandas() + + # Verify the DataFrame has expected values + assert len(pandas_df) == 10, "DataFrame should have 10 rows" + assert list(pandas_df["range"]) == list(range(10)), "DataFrame should contain values 0-9" From cc3956ca275d911789ceb9d78963581f380535b5 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Mon, 11 Nov 2024 16:40:53 -0800 Subject: [PATCH 02/12] remove duplicate connect_start --- daft/daft/__init__.pyi | 2 -- 1 file changed, 2 deletions(-) diff --git a/daft/daft/__init__.pyi b/daft/daft/__init__.pyi index 13d2fc6800..12cffd7dc1 100644 --- a/daft/daft/__init__.pyi +++ b/daft/daft/__init__.pyi @@ -1241,8 +1241,6 @@ def connect_start(addr: str) -> ConnectionHandle: ... class ConnectionHandle: def shutdown(self) -> None: ... -def connect_start(addr: str) -> None: ... - # expr numeric ops def abs(expr: PyExpr) -> PyExpr: ... def cbrt(expr: PyExpr) -> PyExpr: ... From 6e9e9339feaff800b0d217324fc1803135f9ed73 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Mon, 11 Nov 2024 16:41:46 -0800 Subject: [PATCH 03/12] only test range --- tests/connect/test_parquet_simple.py | 47 ---------------------------- 1 file changed, 47 deletions(-) delete mode 100644 tests/connect/test_parquet_simple.py diff --git a/tests/connect/test_parquet_simple.py b/tests/connect/test_parquet_simple.py deleted file mode 100644 index cb3ba9f1b1..0000000000 --- a/tests/connect/test_parquet_simple.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -import pathlib -import time - -import pyarrow as pa -import pyarrow.parquet as papq -from pyspark.sql import SparkSession -from pyspark.sql.dataframe import DataFrame - -from daft.daft import connect_start - - -def test_read_parquet(tmpdir): - # Convert tmpdir to Path object - test_dir = pathlib.Path(tmpdir) - input_parquet_path = test_dir / "input.parquet" - - # Create sample data with sequential IDs - sample_data = pa.Table.from_pydict({"id": [0, 1, 2, 3, 4]}) - - # Write sample data to input parquet file - papq.write_table(sample_data, input_parquet_path) - - # Start Daft Connect server - # TODO: Add env var to control server embedding - connect_start("sc://localhost:50051") - - # Initialize Spark Connect session - spark_session: SparkSession = ( - SparkSession.builder.appName("DaftParquetReadWriteTest").remote("sc://localhost:50051").getOrCreate() - ) - - # Read input parquet with Spark Connect - spark_df: DataFrame = spark_session.read.parquet(str(input_parquet_path)) - - # Write DataFrame to output parquet - output_parquet_path = test_dir / "output.parquet" - spark_df.write.parquet(str(output_parquet_path)) - - # Verify output matches input - output_data = papq.read_table(output_parquet_path) - assert output_data.equals(sample_data) - - # Clean up Spark session - spark_session.stop() - time.sleep(2) # Allow time for session cleanup From 83a5dee34dbe40a88e71a8dd74862697dbbac46e Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 11:52:04 -0800 Subject: [PATCH 04/12] Update src/daft-connect/src/convert.rs Co-authored-by: Cory Grinstead --- src/daft-connect/src/convert.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/daft-connect/src/convert.rs b/src/daft-connect/src/convert.rs index 3d31693bc3..3af6ae86b8 100644 --- a/src/daft-connect/src/convert.rs +++ b/src/daft-connect/src/convert.rs @@ -30,7 +30,7 @@ pub fn run_local( }) .flat_map(|tables| match tables { Ok(tables) => { - let tables = Arc::try_unwrap(tables).unwrap(); + let tables = Arc::unwrap_or_clone(tables); let tables = tables.into_iter().map(Ok); let stream: Pin>>> = From e64f2c921a77091bc09e84c6240eceb8284064ac Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 11:53:18 -0800 Subject: [PATCH 05/12] remove tables --- src/daft-connect/src/session.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/daft-connect/src/session.rs b/src/daft-connect/src/session.rs index 72b477478f..1b042ad673 100644 --- a/src/daft-connect/src/session.rs +++ b/src/daft-connect/src/session.rs @@ -8,12 +8,6 @@ pub struct Session { /// Also, config_values: BTreeMap, - #[expect( - unused, - reason = "this will be used in the future especially to pass spark connect tests" - )] - tables_by_name: HashMap, - id: String, server_side_session_id: String, } @@ -32,7 +26,6 @@ impl Session { let server_side_session_id = server_side_session_id.to_string(); Self { config_values: Default::default(), - tables_by_name: Default::default(), id, server_side_session_id, } From c2b2f4dfaa387a176e429bf66faf2caac1190317 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 11:54:32 -0800 Subject: [PATCH 06/12] remove hive --- src/daft-scan/src/hive.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/daft-scan/src/hive.rs b/src/daft-scan/src/hive.rs index f929c9d4d5..d9de48afd2 100644 --- a/src/daft-scan/src/hive.rs +++ b/src/daft-scan/src/hive.rs @@ -134,7 +134,7 @@ mod tests { let partitions = parse_hive_partitioning(uri).unwrap(); assert_eq!(partitions.get("year"), Some(&"2024".to_string())); - assert_eq!(partitions.get("region"), Some(&String::new())); + assert_eq!(partitions.get("region"), Some(&"".to_string())); } #[test] @@ -251,7 +251,7 @@ mod tests { let partitions = parse_hive_partitioning(uri).unwrap(); assert_eq!(partitions.len(), 2); - assert_eq!(partitions.get("empty_key"), Some(&String::new())); - assert_eq!(partitions.get("another"), Some(&String::new())); + assert_eq!(partitions.get("empty_key"), Some(&"".to_string())); + assert_eq!(partitions.get("another"), Some(&"".to_string())); } } From 61bfcad37034f70d2525234b674acccf0aaee57f Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 12:05:44 -0800 Subject: [PATCH 07/12] remove a lot of unused code --- src/daft-connect/src/command.rs | 32 +++-- src/daft-connect/src/convert.rs | 39 ----- .../src/convert/data_conversion.rs | 4 +- .../src/convert/data_conversion/range.rs | 4 +- src/daft-connect/src/convert/expression.rs | 120 ---------------- .../src/convert/plan_conversion.rs | 134 ------------------ src/daft-connect/src/lib.rs | 5 +- src/daft-connect/src/session.rs | 2 +- 8 files changed, 24 insertions(+), 316 deletions(-) delete mode 100644 src/daft-connect/src/convert/expression.rs delete mode 100644 src/daft-connect/src/convert/plan_conversion.rs diff --git a/src/daft-connect/src/command.rs b/src/daft-connect/src/command.rs index 0cedd56493..28ddac2365 100644 --- a/src/daft-connect/src/command.rs +++ b/src/daft-connect/src/command.rs @@ -3,7 +3,7 @@ use std::future::ready; use arrow2::io::ipc::write::StreamWriter; use daft_table::Table; use eyre::Context; -use futures::{stream, StreamExt, TryStreamExt}; +use futures::stream; use spark_connect::{ execute_plan_response::{ArrowBatch, ResponseType, ResultComplete}, spark_connect_service_server::SparkConnectService, @@ -16,14 +16,14 @@ use crate::{convert::convert_data, DaftSparkConnectService, Session}; type DaftStream = ::ExecutePlanStream; -pub struct PlanContext { - session_id: String, - server_side_session_id: String, - operation_id: String, +pub struct PlanIds { + session: String, + server_side_session: String, + operation: String, } -impl PlanContext { - pub fn gen_response(&mut self, table: &Table) -> eyre::Result { +impl PlanIds { + pub fn gen_response(&self, table: &Table) -> eyre::Result { let mut data = Vec::new(); let mut writer = StreamWriter::new( @@ -50,9 +50,9 @@ impl PlanContext { .wrap_err("Failed to write Arrow chunk to stream writer")?; let response = ExecutePlanResponse { - session_id: self.session_id.to_string(), - server_side_session_id: self.server_side_session_id.to_string(), - operation_id: self.operation_id.to_string(), + session_id: self.session.to_string(), + server_side_session_id: self.server_side_session.to_string(), + operation_id: self.operation.to_string(), response_id: Uuid::new_v4().to_string(), // todo: implement this metrics: None, // todo: implement this observed_metrics: vec![], @@ -74,10 +74,12 @@ impl Session { command: Relation, operation_id: String, ) -> Result { - let mut context = PlanContext { - session_id: self.client_side_session_id().to_string(), - server_side_session_id: self.server_side_session_id().to_string(), - operation_id: operation_id.clone(), + use futures::{StreamExt, TryStreamExt}; + + let context = PlanIds { + session: self.client_side_session_id().to_string(), + server_side_session: self.server_side_session_id().to_string(), + operation: operation_id.clone(), }; let finished = ExecutePlanResponse { @@ -91,7 +93,7 @@ impl Session { response_type: Some(ResponseType::ResultComplete(ResultComplete {})), }; - let stream = convert_data(command, &mut context) + let stream = convert_data(command, &context) .map_err(|e| Status::internal(e.to_string()))? .chain(stream::once(ready(Ok(finished)))); diff --git a/src/daft-connect/src/convert.rs b/src/daft-connect/src/convert.rs index 3af6ae86b8..743ffcf06a 100644 --- a/src/daft-connect/src/convert.rs +++ b/src/daft-connect/src/convert.rs @@ -1,45 +1,6 @@ mod data_conversion; -mod expression; mod formatting; -mod plan_conversion; mod schema_conversion; -use std::{collections::HashMap, pin::Pin, sync::Arc}; - -use common_daft_config::DaftExecutionConfig; -use common_error::{DaftError, DaftResult}; -use daft_logical_plan::LogicalPlanRef; -use daft_table::Table; pub use data_conversion::convert_data; -use futures::{stream, Stream, StreamExt}; pub use schema_conversion::connect_schema; - -pub fn run_local( - logical_plan: &LogicalPlanRef, -) -> DaftResult>> { - let physical_plan = daft_local_plan::translate(logical_plan)?; - let cfg = Arc::new(DaftExecutionConfig::default()); - let psets = HashMap::new(); - - let stream = daft_local_execution::run_local(&physical_plan, psets, cfg, None)?; - - let stream = stream - .map(|partition| match partition { - Ok(partition) => partition.get_tables().map_err(DaftError::from), - Err(err) => Err(err), - }) - .flat_map(|tables| match tables { - Ok(tables) => { - let tables = Arc::unwrap_or_clone(tables); - - let tables = tables.into_iter().map(Ok); - let stream: Pin>>> = - Box::pin(stream::iter(tables)); - - stream - } - Err(err) => Box::pin(stream::once(async { Err(err) })), - }); - - Ok(stream) -} diff --git a/src/daft-connect/src/convert/data_conversion.rs b/src/daft-connect/src/convert/data_conversion.rs index 11233e6cc5..71032aa4a8 100644 --- a/src/daft-connect/src/convert/data_conversion.rs +++ b/src/daft-connect/src/convert/data_conversion.rs @@ -39,11 +39,11 @@ use crate::convert::formatting::RelTypeExt; mod range; use range::range; -use crate::command::PlanContext; +use crate::command::PlanIds; pub fn convert_data( plan: Relation, - context: &mut PlanContext, + context: &PlanIds, ) -> eyre::Result> + Unpin> { // First check common fields if needed if let Some(common) = &plan.common { diff --git a/src/daft-connect/src/convert/data_conversion/range.rs b/src/daft-connect/src/convert/data_conversion/range.rs index 787a379363..f370228188 100644 --- a/src/daft-connect/src/convert/data_conversion/range.rs +++ b/src/daft-connect/src/convert/data_conversion/range.rs @@ -7,11 +7,11 @@ use eyre::{ensure, Context}; use futures::{stream, Stream}; use spark_connect::{ExecutePlanResponse, Range}; -use crate::command::PlanContext; +use crate::command::PlanIds; pub fn range( range: Range, - channel: &mut PlanContext, + channel: &PlanIds, ) -> eyre::Result> + Unpin> { let Range { start, diff --git a/src/daft-connect/src/convert/expression.rs b/src/daft-connect/src/convert/expression.rs deleted file mode 100644 index f79a7bf5a8..0000000000 --- a/src/daft-connect/src/convert/expression.rs +++ /dev/null @@ -1,120 +0,0 @@ -use daft_dsl::{Expr as DaftExpr, Operator}; -use eyre::{bail, ensure, eyre, Result}; -use spark_connect::{expression, expression::literal::LiteralType, Expression}; - -pub fn convert_expression(expr: Expression) -> Result { - match expr.expr_type { - Some(expression::ExprType::Literal(lit)) => Ok(DaftExpr::Literal(convert_literal(lit)?)), - - Some(expression::ExprType::UnresolvedAttribute(attr)) => { - Ok(DaftExpr::Column(attr.unparsed_identifier.into())) - } - - Some(expression::ExprType::Alias(alias)) => { - let expression::Alias { - expr, - name, - metadata, - } = *alias; - let expr = *expr.ok_or_else(|| eyre!("expr is None"))?; - - // Convert alias - let expr = convert_expression(expr)?; - - if let Some(metadata) = metadata - && !metadata.is_empty() - { - bail!("Metadata is not yet supported"); - } - - // ignore metadata for now - - let [name] = name.as_slice() else { - bail!("Alias name must have exactly one element"); - }; - - Ok(DaftExpr::Alias(expr.into(), name.as_str().into())) - } - - Some(expression::ExprType::UnresolvedFunction(expression::UnresolvedFunction { - function_name, - arguments, - is_distinct, - is_user_defined_function, - })) => { - ensure!(!is_distinct, "Distinct is not yet supported"); - ensure!( - !is_user_defined_function, - "User-defined functions are not yet supported" - ); - - let op = function_name.as_str(); - match op { - ">" | "<" | "<=" | ">=" | "+" | "-" | "*" | "/" => { - let arr: [Expression; 2] = arguments - .try_into() - .map_err(|_| eyre!("Expected 2 arguments"))?; - let [left, right] = arr; - - let left = convert_expression(left)?; - let right = convert_expression(right)?; - - let op = match op { - ">" => Operator::Gt, - "<" => Operator::Lt, - "<=" => Operator::LtEq, - ">=" => Operator::GtEq, - "+" => Operator::Plus, - "-" => Operator::Minus, - "*" => Operator::Multiply, - "/" => Operator::FloorDivide, // todo is this what we want? - _ => unreachable!(), - }; - - Ok(DaftExpr::BinaryOp { - left: left.into(), - op, - right: right.into(), - }) - } - other => bail!("Unsupported function name: {other}"), - } - } - - // Handle other expression types... - _ => Err(eyre!("Unsupported expression type")), - } -} - -// Helper functions to convert literals, function names, operators etc. - -fn convert_literal(lit: expression::Literal) -> Result { - let literal_type = lit - .literal_type - .ok_or_else(|| eyre!("literal_type is None"))?; - - let result = match literal_type { - LiteralType::Null(..) => daft_dsl::LiteralValue::Null, - LiteralType::Binary(input) => daft_dsl::LiteralValue::Binary(input), - LiteralType::Boolean(input) => daft_dsl::LiteralValue::Boolean(input), - LiteralType::Byte(input) => daft_dsl::LiteralValue::Int32(input), - LiteralType::Short(input) => daft_dsl::LiteralValue::Int32(input), - LiteralType::Integer(input) => daft_dsl::LiteralValue::Int32(input), - LiteralType::Long(input) => daft_dsl::LiteralValue::Int64(input), - LiteralType::Float(input) => daft_dsl::LiteralValue::Float64(f64::from(input)), - LiteralType::Double(input) => daft_dsl::LiteralValue::Float64(input), - LiteralType::String(input) => daft_dsl::LiteralValue::Utf8(input), - LiteralType::Date(input) => daft_dsl::LiteralValue::Date(input), - LiteralType::Decimal(_) - | LiteralType::Timestamp(_) - | LiteralType::TimestampNtz(_) - | LiteralType::CalendarInterval(_) - | LiteralType::YearMonthInterval(_) - | LiteralType::DayTimeInterval(_) - | LiteralType::Array(_) - | LiteralType::Map(_) - | LiteralType::Struct(_) => bail!("unimplemented"), - }; - - Ok(result) -} diff --git a/src/daft-connect/src/convert/plan_conversion.rs b/src/daft-connect/src/convert/plan_conversion.rs deleted file mode 100644 index 6e0c5fc872..0000000000 --- a/src/daft-connect/src/convert/plan_conversion.rs +++ /dev/null @@ -1,134 +0,0 @@ -use std::{collections::HashSet, sync::Arc}; - -use daft_logical_plan::{LogicalPlanBuilder, ParquetScanBuilder}; -use eyre::{bail, eyre, Result, WrapErr}; -use spark_connect::{ - expression::Alias, - read::{DataSource, ReadType}, - relation::RelType, - Filter, Read, Relation, WithColumns, -}; -use tracing::warn; - -use crate::convert::expression; - -pub fn to_logical_plan(plan: Relation) -> Result { - let scope = std::thread::spawn(|| { - let rel_type = plan.rel_type.ok_or_else(|| eyre!("rel_type is None"))?; - - match rel_type { - RelType::ShowString(..) => { - bail!("ShowString is only supported as a top-level relation") - } - RelType::Filter(filter) => parse_filter(*filter).wrap_err("parsing Filter"), - RelType::WithColumns(with_columns) => { - parse_with_columns(*with_columns).wrap_err("parsing WithColumns") - } - RelType::Read(read) => parse_read(read), - _ => bail!("Unsupported relation type: {rel_type:?}"), - } - }); - - scope.join().unwrap() -} - -fn parse_filter(filter: Filter) -> Result { - let Filter { input, condition } = filter; - let input = *input.ok_or_else(|| eyre!("input is None"))?; - let input_plan = to_logical_plan(input).wrap_err("parsing input")?; - - let condition = condition.ok_or_else(|| eyre!("condition is None"))?; - let condition = - expression::convert_expression(condition).wrap_err("converting to daft expression")?; - let condition = Arc::new(condition); - - input_plan.filter(condition).wrap_err("applying filter") -} - -fn parse_with_columns(with_columns: WithColumns) -> Result { - let WithColumns { input, aliases } = with_columns; - let input = *input.ok_or_else(|| eyre!("input is None"))?; - let input_plan = to_logical_plan(input).wrap_err("parsing input")?; - - let mut new_exprs = Vec::new(); - let mut existing_columns: HashSet<_> = input_plan.schema().names().into_iter().collect(); - - for alias in aliases { - let Alias { - expr, - name, - metadata, - } = alias; - - if name.len() != 1 { - bail!("Alias name must have exactly one element"); - } - let name = name[0].as_str(); - - if metadata.is_some() { - bail!("Metadata is not yet supported"); - } - - let expr = expr.ok_or_else(|| eyre!("expression is None"))?; - let expr = - expression::convert_expression(*expr).wrap_err("converting to daft expression")?; - let expr = Arc::new(expr); - - new_exprs.push(expr.alias(name)); - - if existing_columns.contains(name) { - existing_columns.remove(name); - } - } - - // Add remaining existing columns - for col_name in existing_columns { - new_exprs.push(daft_dsl::col(col_name)); - } - - input_plan - .select(new_exprs) - .wrap_err("selecting new expressions") -} - -fn parse_read(read: Read) -> Result { - let Read { - is_streaming, - read_type, - } = read; - - warn!("Ignoring is_streaming: {is_streaming}"); - - let read_type = read_type.ok_or_else(|| eyre!("type is None"))?; - - match read_type { - ReadType::NamedTable(_) => bail!("Named tables are not yet supported"), - ReadType::DataSource(data_source) => parse_data_source(data_source), - } -} - -fn parse_data_source(data_source: DataSource) -> Result { - let DataSource { - format, - options, - paths, - predicates, - .. - } = data_source; - - let format = format.ok_or_else(|| eyre!("format is None"))?; - if format != "parquet" { - bail!("Only parquet is supported; got {format}"); - } - - if !options.is_empty() { - bail!("Options are not yet supported"); - } - if !predicates.is_empty() { - bail!("Predicates are not yet supported"); - } - - ParquetScanBuilder::new(paths) - .finish() - .wrap_err("creating ParquetScanBuilder") -} diff --git a/src/daft-connect/src/lib.rs b/src/daft-connect/src/lib.rs index 4dca16d643..d9f2c2f6ad 100644 --- a/src/daft-connect/src/lib.rs +++ b/src/daft-connect/src/lib.rs @@ -5,11 +5,10 @@ #![feature(iter_from_coroutine)] #![feature(stmt_expr_attributes)] #![feature(try_trait_v2_residual)] -#![warn(unused)] +#![deny(unused)] use dashmap::DashMap; use eyre::Context; -use futures::{StreamExt, TryStreamExt}; #[cfg(feature = "python")] use pyo3::types::PyModuleMethods; use spark_connect::{ @@ -24,7 +23,7 @@ use spark_connect::{ ReleaseExecuteResponse, ReleaseSessionRequest, ReleaseSessionResponse, }; use tonic::{transport::Server, Request, Response, Status}; -use tracing::{info, warn}; +use tracing::info; use uuid::Uuid; use crate::session::Session; diff --git a/src/daft-connect/src/session.rs b/src/daft-connect/src/session.rs index 1b042ad673..24f7fabe80 100644 --- a/src/daft-connect/src/session.rs +++ b/src/daft-connect/src/session.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use uuid::Uuid; From e5c0d858cd9102e2f8518cbf8465223d8460deba Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 12:12:26 -0800 Subject: [PATCH 08/12] less type complexity --- Cargo.lock | 9 --------- src/daft-connect/Cargo.toml | 19 +++++-------------- src/daft-local-execution/src/run.rs | 8 +++++++- 3 files changed, 12 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6ef0cb835d..eda49f44cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1908,15 +1908,7 @@ name = "daft-connect" version = "0.3.0-dev0" dependencies = [ "arrow2", - "common-daft-config", - "common-error", - "common-file-formats", "daft-core", - "daft-dsl", - "daft-local-execution", - "daft-local-plan", - "daft-logical-plan", - "daft-physical-plan", "daft-schema", "daft-table", "dashmap", @@ -1925,7 +1917,6 @@ dependencies = [ "pyo3", "spark-connect", "tokio", - "tokio-stream", "tonic", "tracing", "tracing-subscriber", diff --git a/src/daft-connect/Cargo.toml b/src/daft-connect/Cargo.toml index 2bcec88a89..e67a2da332 100644 --- a/src/daft-connect/Cargo.toml +++ b/src/daft-connect/Cargo.toml @@ -1,27 +1,18 @@ [dependencies] -arrow2.workspace = true -common-daft-config.workspace = true -common-file-formats.workspace = true -daft-core.workspace = true -daft-dsl.workspace = true -daft-local-execution.workspace = true -daft-local-plan.workspace = true -daft-logical-plan.workspace = true -daft-physical-plan.workspace = true -daft-schema.workspace = true -daft-table.workspace = true dashmap = "6.1.0" eyre = "0.6.12" futures = "0.3.31" pyo3 = {workspace = true, optional = true} -spark-connect.workspace = true tokio = {version = "1.40.0", features = ["full"]} -tokio-stream = "0.1.16" tonic = "0.12.3" tracing-subscriber = {version = "0.3.18", features = ["env-filter"]} tracing-tracy = "0.11.3" -common-error.workspace = true uuid = {version = "1.10.0", features = ["v4"]} +arrow2.workspace = true +daft-core.workspace = true +daft-schema.workspace = true +daft-table.workspace = true +spark-connect.workspace = true tracing.workspace = true [features] diff --git a/src/daft-local-execution/src/run.rs b/src/daft-local-execution/src/run.rs index d4fb409313..c1e983c76a 100644 --- a/src/daft-local-execution/src/run.rs +++ b/src/daft-local-execution/src/run.rs @@ -146,12 +146,18 @@ fn should_enable_explain_analyze() -> bool { } } +pub type PartitionResult = DaftResult>; +pub type SendableStream = dyn Stream + Send; + +/// A pinned boxed stream that can be sent across thread boundaries +pub type PinnedStream = Pin>>; + pub fn run_local( physical_plan: &LocalPhysicalPlan, psets: HashMap>>, cfg: Arc, results_buffer_size: Option, -) -> DaftResult>> + Send>>> { +) -> DaftResult> { refresh_chrome_trace(); let mut pipeline = physical_plan_to_pipeline(physical_plan, &psets, &cfg)?; let (tx, rx) = create_channel(results_buffer_size.unwrap_or(1)); From 9c26532bc12e73d21329e1a32beb36ef822273fc Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 15:15:24 -0800 Subject: [PATCH 09/12] revert --- src/daft-local-execution/src/run.rs | 64 ++++--------------- .../rules/eliminate_cross_join.rs | 6 +- 2 files changed, 14 insertions(+), 56 deletions(-) diff --git a/src/daft-local-execution/src/run.rs b/src/daft-local-execution/src/run.rs index c1e983c76a..0f01ec61e6 100644 --- a/src/daft-local-execution/src/run.rs +++ b/src/daft-local-execution/src/run.rs @@ -2,9 +2,7 @@ use std::{ collections::HashMap, fs::File, io::Write, - pin::Pin, sync::Arc, - task::{Context, Poll}, time::{SystemTime, UNIX_EPOCH}, }; @@ -13,7 +11,6 @@ use common_error::DaftResult; use common_tracing::refresh_chrome_trace; use daft_local_plan::{translate, LocalPhysicalPlan}; use daft_micropartition::MicroPartition; -use futures::{Stream, StreamExt}; #[cfg(feature = "python")] use { common_daft_config::PyDaftExecutionConfig, @@ -51,34 +48,6 @@ pub struct NativeExecutor { local_physical_plan: Arc, } -/// A blocking iterator adapter for any Stream. -pub struct BlockingStreamIter { - stream: Pin>, -} - -impl BlockingStreamIter { - /// Creates a new BlockingStreamIter from a Stream. - pub fn new(stream: S) -> Self - where - S: Stream + 'static, - { - Self { - stream: Box::pin(stream), - } - } -} - -impl Iterator for BlockingStreamIter -where - S: Stream + Unpin + 'static, -{ - type Item = S::Item; - - fn next(&mut self) -> Option { - futures::executor::block_on(self.stream.as_mut().next()) - } -} - #[cfg(feature = "python")] #[pymethods] impl NativeExecutor { @@ -115,7 +84,6 @@ impl NativeExecutor { ) }) .collect(); - let out = py.allow_threads(|| { run_local( &self.local_physical_plan, @@ -124,9 +92,6 @@ impl NativeExecutor { results_buffer_size, ) })?; - - let out = BlockingStreamIter::new(out); - let iter = Box::new(out.map(|part| { part.map(|p| pyo3::Python::with_gil(|py| PyMicroPartition::from(p).into_py(py))) })); @@ -146,18 +111,12 @@ fn should_enable_explain_analyze() -> bool { } } -pub type PartitionResult = DaftResult>; -pub type SendableStream = dyn Stream + Send; - -/// A pinned boxed stream that can be sent across thread boundaries -pub type PinnedStream = Pin>>; - pub fn run_local( physical_plan: &LocalPhysicalPlan, psets: HashMap>>, cfg: Arc, results_buffer_size: Option, -) -> DaftResult> { +) -> DaftResult>> + Send>> { refresh_chrome_trace(); let mut pipeline = physical_plan_to_pipeline(physical_plan, &psets, &cfg)?; let (tx, rx) = create_channel(results_buffer_size.unwrap_or(1)); @@ -214,18 +173,18 @@ pub fn run_local( }) }); - struct ReceiverStream { + struct ReceiverIterator { receiver: Receiver>, handle: Option>>, } - impl Stream for ReceiverStream { + impl Iterator for ReceiverIterator { type Item = DaftResult>; - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.receiver.poll_recv(cx) { - Poll::Ready(Some(part)) => Poll::Ready(Some(Ok(part))), - Poll::Ready(None) => { + fn next(&mut self) -> Option { + match self.receiver.blocking_recv() { + Some(part) => Some(Ok(part)), + None => { if self.handle.is_some() { let join_result = self .handle @@ -234,18 +193,17 @@ pub fn run_local( .join() .expect("Execution engine thread panicked"); match join_result { - Ok(()) => Poll::Ready(None), - Err(e) => Poll::Ready(Some(Err(e))), + Ok(()) => None, + Err(e) => Some(Err(e)), } } else { - Poll::Ready(None) + None } } - Poll::Pending => Poll::Pending, } } } - Ok(Box::pin(ReceiverStream { + Ok(Box::new(ReceiverIterator { receiver: rx, handle: Some(handle), })) diff --git a/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs b/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs index e1358eae83..c8e888fecf 100644 --- a/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs +++ b/src/daft-logical-plan/src/optimization/rules/eliminate_cross_join.rs @@ -500,7 +500,7 @@ mod tests { expected, actual, "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n" ); - assert_eq!(starting_schema, actual.schema()); + assert_eq!(starting_schema, actual.schema()) } #[rstest] @@ -664,8 +664,8 @@ mod tests { )? .build(); - let plan = LogicalPlanBuilder::from(plan1) - .cross_join(plan2, None, Some("t3."))? + let plan = LogicalPlanBuilder::from(plan1.clone()) + .cross_join(plan2.clone(), None, Some("t3."))? .filter( col("t3.a") .eq(col("a")) From 7adc5fd53300576433e0d47e21da2765811c2170 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 6 Nov 2024 13:48:28 -0800 Subject: [PATCH 10/12] add parquet support --- Cargo.lock | 963 ++++++++++++------ daft/daft/__init__.pyi | 2 + src/daft-connect/Cargo.toml | 20 +- src/daft-connect/src/command.rs | 214 +++- src/daft-connect/src/convert.rs | 39 + .../src/convert/data_conversion.rs | 19 +- .../src/convert/data_conversion/range.rs | 16 +- .../convert/data_conversion/show_string.rs | 59 ++ src/daft-connect/src/convert/expression.rs | 120 +++ .../src/convert/plan_conversion.rs | 134 +++ src/daft-connect/src/lib.rs | 73 +- src/daft-connect/src/session.rs | 9 +- tests/connect/test_parquet_simple.py | 47 + 13 files changed, 1349 insertions(+), 366 deletions(-) create mode 100644 src/daft-connect/src/convert/data_conversion/show_string.rs create mode 100644 src/daft-connect/src/convert/expression.rs create mode 100644 src/daft-connect/src/convert/plan_conversion.rs create mode 100644 tests/connect/test_parquet_simple.py diff --git a/Cargo.lock b/Cargo.lock index eda49f44cb..c01adc0878 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,18 +10,18 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "adler32" @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" [[package]] name = "android-tzdata" @@ -105,9 +105,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -120,43 +120,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "approx" @@ -181,9 +181,9 @@ checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow-array" @@ -280,7 +280,7 @@ dependencies = [ "indexmap 1.9.3", "itertools 0.10.5", "json-deserializer", - "lexical-core", + "lexical-core 0.8.5", "lz4", "memchr", "multiversion", @@ -333,11 +333,11 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.12" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ - "brotli 6.0.0", + "brotli 7.0.0", "bzip2", "deflate64", "flate2", @@ -374,9 +374,9 @@ dependencies = [ [[package]] name = "async-stream" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ "async-stream-impl", "futures-core", @@ -385,9 +385,9 @@ dependencies = [ [[package]] name = "async-stream-impl" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", @@ -396,9 +396,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", @@ -430,9 +430,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "avro-rs" @@ -495,11 +495,11 @@ dependencies = [ "fastrand 1.9.0", "hex", "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.31", "ring 0.16.20", "time", "tokio", - "tower", + "tower 0.4.13", "tracing", "zeroize", ] @@ -579,7 +579,7 @@ dependencies = [ "percent-encoding", "regex", "tokio-stream", - "tower", + "tower 0.4.13", "tracing", "url", ] @@ -605,7 +605,7 @@ dependencies = [ "http 0.2.12", "regex", "tokio-stream", - "tower", + "tower 0.4.13", "tracing", ] @@ -631,7 +631,7 @@ dependencies = [ "bytes", "http 0.2.12", "regex", - "tower", + "tower 0.4.13", "tracing", ] @@ -718,11 +718,11 @@ dependencies = [ "fastrand 1.9.0", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-tls 0.5.0", "pin-project-lite", "tokio", - "tower", + "tower 0.4.13", "tracing", ] @@ -750,7 +750,7 @@ dependencies = [ "futures-core", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "once_cell", "percent-encoding", "pin-project-lite", @@ -772,7 +772,7 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "pin-project-lite", - "tower", + "tower 0.4.13", "tracing", ] @@ -835,9 +835,9 @@ dependencies = [ [[package]] name = "axum" -version = "0.7.5" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +checksum = "504e3947307ac8326a5437504c517c4b56716c9d98fac0028c2acc7ca47d70ae" dependencies = [ "async-trait", "axum-core", @@ -855,7 +855,7 @@ dependencies = [ "rustversion", "serde", "sync_wrapper 1.0.1", - "tower", + "tower 0.5.1", "tower-layer", "tower-service", ] @@ -904,7 +904,7 @@ dependencies = [ "serde_json", "time", "url", - "uuid 1.10.0", + "uuid 1.11.0", ] [[package]] @@ -925,7 +925,7 @@ dependencies = [ "time", "tz-rs", "url", - "uuid 1.10.0", + "uuid 1.11.0", ] [[package]] @@ -947,7 +947,7 @@ dependencies = [ "sha2", "time", "url", - "uuid 1.10.0", + "uuid 1.11.0", ] [[package]] @@ -967,22 +967,22 @@ dependencies = [ "serde_json", "time", "url", - "uuid 1.10.0", + "uuid 1.11.0", ] [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -1066,9 +1066,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.11.0-rc.2" +version = "0.11.0-rc.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "939c0e62efa052fb0b2db2c0f7c479ad32e364c192c3aab605a7641de265a1a7" +checksum = "3fd016a0ddc7cb13661bf5576073ce07330a693f8608a1320b4e20561cc12cdc" dependencies = [ "hybrid-array", ] @@ -1086,9 +1086,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1122,7 +1122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" dependencies = [ "memchr", - "regex-automata 0.4.7", + "regex-automata 0.4.9", "serde", ] @@ -1140,18 +1140,18 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.3" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" +checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec" dependencies = [ "proc-macro2", "quote", @@ -1172,9 +1172,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.7.1" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" [[package]] name = "bytes-utils" @@ -1224,12 +1224,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.10" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292" +checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8" dependencies = [ "jobserver", "libc", + "shlex", ] [[package]] @@ -1374,9 +1375,9 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "cmake" -version = "0.1.50" +version = "0.1.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" dependencies = [ "cc", ] @@ -1389,9 +1390,9 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "colorz" @@ -1449,7 +1450,7 @@ name = "common-display" version = "0.3.0-dev0" dependencies = [ "comfy-table 7.1.1", - "indexmap 2.5.0", + "indexmap 2.6.0", "pyo3", "terminal_size", "textwrap", @@ -1595,9 +1596,9 @@ checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" [[package]] name = "const-oid" -version = "0.10.0-rc.2" +version = "0.10.0-rc.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a0d96d207edbe5135e55038e79ab9ad6d75ba83b14cdf62326ce5b12bc46ab5" +checksum = "68ff6be19477a1bd5441f382916a89bc2a0b2c35db6d41e0f6e8538bf6d6463f" [[package]] name = "const-random" @@ -1643,9 +1644,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.13" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" +checksum = "0ca741a962e1b0bff6d724a1a0958b686406e853bb14061f218562e1896f95e6" dependencies = [ "libc", ] @@ -1813,9 +1814,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -1908,7 +1909,14 @@ name = "daft-connect" version = "0.3.0-dev0" dependencies = [ "arrow2", + "common-daft-config", + "common-file-formats", "daft-core", + "daft-dsl", + "daft-local-execution", + "daft-local-plan", + "daft-logical-plan", + "daft-physical-plan", "daft-schema", "daft-table", "dashmap", @@ -1917,11 +1925,12 @@ dependencies = [ "pyo3", "spark-connect", "tokio", + "tokio-stream", "tonic", "tracing", "tracing-subscriber", "tracing-tracy", - "uuid 1.10.0", + "uuid 1.11.0", ] [[package]] @@ -1944,11 +1953,11 @@ dependencies = [ "daft-schema", "daft-sketch", "derive_more", - "fastrand 2.1.0", + "fastrand 2.2.0", "fnv", "html-escape", "hyperloglog", - "indexmap 2.5.0", + "indexmap 2.6.0", "itertools 0.11.0", "lazy_static", "log", @@ -2020,7 +2029,7 @@ dependencies = [ "daft-core", "daft-sketch", "derive_more", - "indexmap 2.5.0", + "indexmap 2.6.0", "itertools 0.11.0", "log", "pyo3", @@ -2052,7 +2061,7 @@ dependencies = [ "tiktoken-rs", "tokio", "typetag", - "uuid 1.10.0", + "uuid 1.11.0", "xxhash-rust", ] @@ -2127,7 +2136,7 @@ dependencies = [ "google-cloud-storage", "google-cloud-token", "home", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-tls 0.5.0", "itertools 0.11.0", "lazy_static", @@ -2166,7 +2175,7 @@ dependencies = [ "daft-io", "daft-table", "futures", - "indexmap 2.5.0", + "indexmap 2.6.0", "memchr", "memmap2", "num-traits", @@ -2209,7 +2218,7 @@ dependencies = [ "daft-table", "daft-writers", "futures", - "indexmap 2.5.0", + "indexmap 2.6.0", "lazy_static", "log", "num-format", @@ -2251,7 +2260,7 @@ dependencies = [ "daft-functions", "daft-schema", "derivative", - "indexmap 2.5.0", + "indexmap 2.6.0", "itertools 0.11.0", "log", "pretty_assertions", @@ -2260,7 +2269,7 @@ dependencies = [ "serde", "snafu", "test-log", - "uuid 1.10.0", + "uuid 1.11.0", ] [[package]] @@ -2294,7 +2303,7 @@ dependencies = [ "approx", "common-error", "daft-hash", - "fastrand 2.1.0", + "fastrand 2.2.0", "memchr", "proptest", "tango-bench", @@ -2320,7 +2329,7 @@ dependencies = [ "daft-stats", "daft-table", "futures", - "indexmap 2.5.0", + "indexmap 2.6.0", "itertools 0.11.0", "log", "parquet2", @@ -2380,7 +2389,7 @@ dependencies = [ "daft-stats", "daft-table", "futures", - "indexmap 2.5.0", + "indexmap 2.6.0", "itertools 0.11.0", "parquet2", "pyo3", @@ -2423,7 +2432,7 @@ dependencies = [ "common-version", "derive_more", "html-escape", - "indexmap 2.5.0", + "indexmap 2.6.0", "num-derive", "num-traits", "pyo3", @@ -2472,7 +2481,7 @@ dependencies = [ "daft-core", "daft-dsl", "daft-table", - "indexmap 2.5.0", + "indexmap 2.6.0", "serde", "snafu", ] @@ -2491,7 +2500,7 @@ dependencies = [ "daft-image", "daft-logical-plan", "html-escape", - "indexmap 2.5.0", + "indexmap 2.6.0", "num-traits", "pyo3", "rand 0.8.5", @@ -2685,11 +2694,22 @@ version = "0.11.0-pre.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf2e3d6615d99707295a9673e889bf363a04b2a466bd320c65a72536f7577379" dependencies = [ - "block-buffer 0.11.0-rc.2", - "const-oid 0.10.0-rc.2", + "block-buffer 0.11.0-rc.3", + "const-oid 0.10.0-rc.3", "crypto-common 0.2.0-rc.1", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -2710,9 +2730,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -2850,24 +2870,24 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "fdeflate" -version = "0.3.4" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" +checksum = "07c6f4c64c1d33a3111c4466f7365ebdcc37c5bd1ea0d62aae2e3d722aacbedb" dependencies = [ "simd-adler32", ] [[package]] name = "flate2" -version = "1.0.31" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "libz-ng-sys", @@ -3088,9 +3108,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" @@ -3106,15 +3126,15 @@ checksum = "9985c9503b412198aa4197559e9a318524ebc4519c229bfa05a535828c950b9d" [[package]] name = "globset" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] @@ -3215,7 +3235,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.5.0", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -3234,7 +3254,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.5.0", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -3285,6 +3305,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashbrown" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" + [[package]] name = "heck" version = "0.3.3" @@ -3444,9 +3470,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "httpdate" @@ -3456,18 +3482,18 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hybrid-array" -version = "0.2.0-rc.11" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5a41e5b0754cae5aaf7915f1df1147ba8d316fc6e019cfcc00fbaba96d5e030" +checksum = "45a9a965bb102c1c891fb017c09a05c965186b1265a207640f323ddd009f9deb" dependencies = [ "typenum", ] [[package]] name = "hyper" -version = "0.14.30" +version = "0.14.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" +checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" dependencies = [ "bytes", "futures-channel", @@ -3528,7 +3554,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.30", + "hyper 0.14.31", "native-tls", "tokio", "tokio-native-tls", @@ -3575,9 +3601,9 @@ version = "0.3.0-dev0" [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -3596,6 +3622,124 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -3604,19 +3748,30 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] name = "image" -version = "0.25.4" +version = "0.25.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc144d44a31d753b02ce64093d532f55ff8dc4ebf2ffb8a63c0dda691385acae" +checksum = "cd6f44aed642f18953a158afeb30206f4d50da59fbc66ecb53c66488de73563b" dependencies = [ "bytemuck", "byteorder-lite", @@ -3658,12 +3813,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "serde", ] @@ -3699,9 +3854,9 @@ checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "is-terminal" @@ -3744,6 +3899,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -3776,7 +3940,7 @@ dependencies = [ "ahash", "dyn-clone", "hifijson", - "indexmap 2.5.0", + "indexmap 2.6.0", "jaq-syn", "once_cell", "serde_json", @@ -3827,9 +3991,9 @@ checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -3870,11 +4034,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +dependencies = [ + "lexical-parse-float 1.0.2", + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", + "lexical-write-float 1.0.2", + "lexical-write-integer 1.0.2", ] [[package]] @@ -3883,8 +4060,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +dependencies = [ + "lexical-parse-integer 1.0.2", + "lexical-util 1.0.3", "static_assertions", ] @@ -3894,7 +4082,17 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] @@ -3907,14 +4105,34 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +dependencies = [ + "static_assertions", +] + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +dependencies = [ + "lexical-util 1.0.3", + "lexical-write-integer 1.0.2", "static_assertions", ] @@ -3924,15 +4142,25 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +dependencies = [ + "lexical-util 1.0.3", "static_assertions", ] [[package]] name = "libc" -version = "0.2.155" +version = "0.2.162" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" [[package]] name = "libflate" @@ -3966,15 +4194,15 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libz-ng-sys" -version = "1.1.15" +version = "1.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6409efc61b12687963e602df8ecf70e8ddacf95bc6576bcf16e3ac6328083c5" +checksum = "8f0f7295a34685977acb2e8cc8b08ee4a8dffd6cf278eeccddbe1ed55ba815d5" dependencies = [ "cmake", "libc", @@ -3986,6 +4214,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -4017,19 +4251,18 @@ dependencies = [ [[package]] name = "lz4" -version = "1.26.0" +version = "1.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958b4caa893816eea05507c20cfe47574a43d9a697138a7872990bba8a0ece68" +checksum = "4d1febb2b4a79ddd1980eede06a8f7902197960aa0383ffcfdd62fe723036725" dependencies = [ - "libc", "lz4-sys", ] [[package]] name = "lz4-sys" -version = "1.10.0" +version = "1.11.1+lz4-1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" dependencies = [ "cc", "libc", @@ -4104,9 +4337,9 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", ] @@ -4138,11 +4371,11 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", "simd-adler32", ] @@ -4392,18 +4625,18 @@ dependencies = [ [[package]] name = "object" -version = "0.36.3" +version = "0.36.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oneshot" @@ -4419,9 +4652,9 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "openssl" -version = "0.10.66" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -4451,18 +4684,18 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.3.1+3.3.1" +version = "300.4.0+3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91" +checksum = "a709e02f2b4aca747929cca5ed248880847c650233cf8b8cdc48f40aaf4898a6" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.103" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -4502,9 +4735,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "parking" -version = "2.2.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" [[package]] name = "parking_lot" @@ -4549,7 +4782,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.3", + "redox_syscall 0.5.7", "smallvec", "windows-targets 0.52.6", ] @@ -4573,7 +4806,7 @@ dependencies = [ "criterion", "flate2", "futures", - "indexmap 2.5.0", + "indexmap 2.6.0", "lz4", "lz4_flex", "parquet-format-safe", @@ -4674,18 +4907,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.5" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.5" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", @@ -4694,9 +4927,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -4716,9 +4949,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "plain" @@ -4737,9 +4970,9 @@ dependencies = [ [[package]] name = "plotters" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", @@ -4750,24 +4983,24 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "png" -version = "0.17.13" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1" +checksum = "52f9d46a34a05a6a57566bc2bfae066ef07585a6e3fa30fbbdff5936380623f0" dependencies = [ "bitflags 1.3.2", "crc32fast", @@ -4778,9 +5011,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.7.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" [[package]] name = "powerfmt" @@ -4799,9 +5032,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -4809,9 +5042,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -4830,7 +5063,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", - "regex-syntax 0.8.4", + "regex-syntax 0.8.5", "rusty-fork", "tempfile", "unarray", @@ -4899,7 +5132,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.87", @@ -4921,7 +5154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" dependencies = [ "cfg-if", - "indexmap 2.5.0", + "indexmap 2.6.0", "indoc", "inventory", "libc", @@ -5025,9 +5258,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -5159,9 +5392,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ "bitflags 2.6.0", ] @@ -5188,14 +5421,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.6" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] @@ -5209,13 +5442,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.4", + "regex-syntax 0.8.5", ] [[package]] @@ -5232,9 +5465,9 @@ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "relative-path" @@ -5256,7 +5489,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-tls 0.5.0", "ipnet", "js-sys", @@ -5451,18 +5684,18 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" dependencies = [ "bitflags 2.6.0", "errno", @@ -5497,9 +5730,9 @@ checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "rusty-fork" @@ -5576,11 +5809,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5639,9 +5872,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.1" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -5661,18 +5894,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.206" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b3e4cd94123dd520a128bcd11e34d9e9e423e7e3e50425cb1b4b1e3549d0284" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_arrow" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff56acef131ef74bacc5e86c5038b524d61dee59d65c9e3e5e0f35b9de98cf99" +checksum = "f11dc39a704b214e72e4cec092fff98180ac432f5f7850dd0d55e9012c29fba9" dependencies = [ "arrow2", "bytemuck", @@ -5683,9 +5916,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.206" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -5694,11 +5927,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.124" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ - "indexmap 2.5.0", + "indexmap 2.6.0", "itoa", "memchr", "ryu", @@ -5780,6 +6013,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -5797,14 +6036,14 @@ checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] name = "simd-json" -version = "0.13.10" +version = "0.13.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "570c430b3d902ea083097e853263ae782dfe40857d93db019a12356c8e8143fa" +checksum = "a0228a564470f81724e30996bbc2b171713b37b15254a6440c7e2d5449b95691" dependencies = [ "ahash", "getrandom 0.2.15", "halfbrown", - "lexical-core", + "lexical-core 1.0.2", "once_cell", "ref-cast", "serde", @@ -5815,9 +6054,9 @@ dependencies = [ [[package]] name = "simdutf8" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" @@ -5957,6 +6196,12 @@ dependencies = [ "log", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -6114,6 +6359,17 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "sysinfo" version = "0.30.13" @@ -6185,12 +6441,12 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.12.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand 2.1.0", + "fastrand 2.2.0", "once_cell", "rustix", "windows-sys 0.59.0", @@ -6241,18 +6497,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", @@ -6358,6 +6614,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -6435,9 +6701,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -6471,7 +6737,7 @@ dependencies = [ "socket2", "tokio", "tokio-stream", - "tower", + "tower 0.4.13", "tower-layer", "tower-service", "tracing", @@ -6497,17 +6763,31 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 0.1.2", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -6643,9 +6923,9 @@ dependencies = [ [[package]] name = "typeid" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "059d83cc991e7a42fc37bd50941885db0888e34209f8cfd9aab07ddec03bc9cf" +checksum = "0e13db2e0ccd5e14a544e8a246ba2312cd25223f616442d7f2cb0e3db614236e" [[package]] name = "typenum" @@ -6694,24 +6974,15 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicase" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-linebreak" @@ -6721,30 +6992,30 @@ checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-xid" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "unindent" @@ -6766,9 +7037,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", @@ -6782,12 +7053,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8-width" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -6806,9 +7089,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom 0.2.15", "serde", @@ -6898,19 +7181,20 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", @@ -6923,9 +7207,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -6935,9 +7219,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6945,9 +7229,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", @@ -6958,15 +7242,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -6992,9 +7276,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", @@ -7289,6 +7573,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xmlparser" version = "0.13.6" @@ -7312,9 +7608,33 @@ dependencies = [ [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "yoke" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure 0.13.1", +] [[package]] name = "zerocopy" @@ -7344,7 +7664,7 @@ checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ "proc-macro2", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", ] [[package]] @@ -7358,12 +7678,55 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure 0.13.1", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd" version = "0.12.4" diff --git a/daft/daft/__init__.pyi b/daft/daft/__init__.pyi index 12cffd7dc1..13d2fc6800 100644 --- a/daft/daft/__init__.pyi +++ b/daft/daft/__init__.pyi @@ -1241,6 +1241,8 @@ def connect_start(addr: str) -> ConnectionHandle: ... class ConnectionHandle: def shutdown(self) -> None: ... +def connect_start(addr: str) -> None: ... + # expr numeric ops def abs(expr: PyExpr) -> PyExpr: ... def cbrt(expr: PyExpr) -> PyExpr: ... diff --git a/src/daft-connect/Cargo.toml b/src/daft-connect/Cargo.toml index e67a2da332..57566ba7fc 100644 --- a/src/daft-connect/Cargo.toml +++ b/src/daft-connect/Cargo.toml @@ -1,19 +1,27 @@ [dependencies] +arrow2.workspace = true +common-daft-config.workspace = true +common-file-formats.workspace = true +daft-core.workspace = true +daft-dsl.workspace = true +daft-local-execution.workspace = true +daft-local-plan.workspace = true +daft-logical-plan.workspace = true +daft-physical-plan.workspace = true +daft-schema.workspace = true +daft-table.workspace = true dashmap = "6.1.0" eyre = "0.6.12" futures = "0.3.31" pyo3 = {workspace = true, optional = true} +spark-connect.workspace = true tokio = {version = "1.40.0", features = ["full"]} +tokio-stream = "0.1.16" tonic = "0.12.3" tracing-subscriber = {version = "0.3.18", features = ["env-filter"]} tracing-tracy = "0.11.3" -uuid = {version = "1.10.0", features = ["v4"]} -arrow2.workspace = true -daft-core.workspace = true -daft-schema.workspace = true -daft-table.workspace = true -spark-connect.workspace = true tracing.workspace = true +uuid = {version = "1.10.0", features = ["v4"]} [features] python = ["dep:pyo3"] diff --git a/src/daft-connect/src/command.rs b/src/daft-connect/src/command.rs index 28ddac2365..50acf5e37e 100644 --- a/src/daft-connect/src/command.rs +++ b/src/daft-connect/src/command.rs @@ -1,29 +1,40 @@ -use std::future::ready; +use std::{ops::ControlFlow, thread}; use arrow2::io::ipc::write::StreamWriter; +use common_file_formats::FileFormat; use daft_table::Table; use eyre::Context; -use futures::stream; +use futures::TryStreamExt; use spark_connect::{ execute_plan_response::{ArrowBatch, ResponseType, ResultComplete}, spark_connect_service_server::SparkConnectService, - ExecutePlanResponse, Relation, + write_operation::{SaveMode, SaveType}, + ExecutePlanResponse, Relation, WriteOperation, }; +use tokio_stream::wrappers::UnboundedReceiverStream; use tonic::Status; use uuid::Uuid; -use crate::{convert::convert_data, DaftSparkConnectService, Session}; +use crate::{ + convert::{convert_data, run_local, to_logical_plan}, + invalid_argument_err, unimplemented_err, DaftSparkConnectService, Session, +}; type DaftStream = ::ExecutePlanStream; -pub struct PlanIds { - session: String, - server_side_session: String, - operation: String, +struct ExecutablePlanChannel { + session_id: String, + server_side_session_id: String, + operation_id: String, + tx: tokio::sync::mpsc::UnboundedSender>, +} + +pub trait ConcreteDataChannel { + fn send_table(&mut self, table: &Table) -> eyre::Result<()>; } -impl PlanIds { - pub fn gen_response(&self, table: &Table) -> eyre::Result { +impl ConcreteDataChannel for ExecutablePlanChannel { + fn send_table(&mut self, table: &Table) -> eyre::Result<()> { let mut data = Vec::new(); let mut writer = StreamWriter::new( @@ -50,9 +61,9 @@ impl PlanIds { .wrap_err("Failed to write Arrow chunk to stream writer")?; let response = ExecutePlanResponse { - session_id: self.session.to_string(), - server_side_session_id: self.server_side_session.to_string(), - operation_id: self.operation.to_string(), + session_id: self.session_id.to_string(), + server_side_session_id: self.server_side_session_id.to_string(), + operation_id: self.operation_id.to_string(), response_id: Uuid::new_v4().to_string(), // todo: implement this metrics: None, // todo: implement this observed_metrics: vec![], @@ -64,7 +75,11 @@ impl PlanIds { })), }; - Ok(response) + self.tx + .send(Ok(response)) + .wrap_err("Error sending response to client")?; + + Ok(()) } } @@ -74,31 +89,162 @@ impl Session { command: Relation, operation_id: String, ) -> Result { - use futures::{StreamExt, TryStreamExt}; + let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); - let context = PlanIds { - session: self.client_side_session_id().to_string(), - server_side_session: self.server_side_session_id().to_string(), - operation: operation_id.clone(), - }; - - let finished = ExecutePlanResponse { + let mut channel = ExecutablePlanChannel { session_id: self.client_side_session_id().to_string(), server_side_session_id: self.server_side_session_id().to_string(), - operation_id, - response_id: Uuid::new_v4().to_string(), - metrics: None, - observed_metrics: vec![], - schema: None, - response_type: Some(ResponseType::ResultComplete(ResultComplete {})), + operation_id: operation_id.clone(), + tx: tx.clone(), + }; + + thread::spawn({ + let session_id = self.client_side_session_id().to_string(); + let server_side_session_id = self.server_side_session_id().to_string(); + move || { + let result = convert_data(command, &mut channel); + + if let Err(e) = result { + tx.send(Err(e)).unwrap(); + } else { + let finished = ExecutePlanResponse { + session_id, + server_side_session_id, + operation_id: operation_id.to_string(), + response_id: Uuid::new_v4().to_string(), + metrics: None, + observed_metrics: vec![], + schema: None, + response_type: Some(ResponseType::ResultComplete(ResultComplete {})), + }; + + tx.send(Ok(finished)).unwrap(); + } + } + }); + + let recv_stream = + UnboundedReceiverStream::new(rx).map_err(|e| Status::internal(e.to_string())); + + Ok(Box::pin(recv_stream)) + } + + pub fn handle_write_operation( + &self, + operation: WriteOperation, + operation_id: String, + ) -> Result { + let mode = operation.mode(); + + let WriteOperation { + input, + source, + sort_column_names, + partitioning_columns, + bucket_by, + options, + clustering_columns, + save_type, + mode: _, + } = operation; + + let Some(input) = input else { + return invalid_argument_err!("input is None"); + }; + + let source = source.unwrap_or_else(|| "parquet".to_string()); + if source != "parquet" { + return unimplemented_err!( + "Only writing parquet is supported for now but got {source}" + ); + } + + match mode { + SaveMode::Unspecified => {} + SaveMode::Append => { + return unimplemented_err!("Append mode is not yet supported"); + } + SaveMode::Overwrite => { + return unimplemented_err!("Overwrite mode is not yet supported"); + } + SaveMode::ErrorIfExists => { + return unimplemented_err!("ErrorIfExists mode is not yet supported"); + } + SaveMode::Ignore => { + return unimplemented_err!("Ignore mode is not yet supported"); + } + } + + if !sort_column_names.is_empty() { + return unimplemented_err!("Sort by columns is not yet supported"); + } + + if !partitioning_columns.is_empty() { + return unimplemented_err!("Partitioning columns is not yet supported"); + } + + if bucket_by.is_some() { + return unimplemented_err!("Bucket by columns is not yet supported"); + } + + if !options.is_empty() { + return unimplemented_err!("Options are not yet supported"); + } + + if !clustering_columns.is_empty() { + return unimplemented_err!("Clustering columns is not yet supported"); + } + let Some(save_type) = save_type else { + return invalid_argument_err!("save_type is required"); }; - let stream = convert_data(command, &context) - .map_err(|e| Status::internal(e.to_string()))? - .chain(stream::once(ready(Ok(finished)))); + let save_path = match save_type { + SaveType::Path(path) => path, + SaveType::Table(_) => { + return unimplemented_err!("Save type table is not yet supported"); + } + }; - Ok(Box::pin( - stream.map_err(|e| Status::internal(e.to_string())), - )) + thread::scope(|scope| { + let res = scope.spawn(|| { + let plan = to_logical_plan(input) + .map_err(|_| Status::internal("Failed to convert to logical plan"))?; + + // todo: assuming this is parquet + // todo: is save_path right? + let plan = plan + .table_write(&save_path, FileFormat::Parquet, None, None, None) + .map_err(|_| Status::internal("Failed to write table"))?; + + let plan = plan.build(); + + run_local( + &plan, + |_table| ControlFlow::Continue(()), + || ControlFlow::Break(()), + ) + .map_err(|e| Status::internal(format!("Failed to write table: {e}")))?; + + Result::<(), Status>::Ok(()) + }); + + res.join().unwrap() + })?; + + let session_id = self.client_side_session_id().to_string(); + let server_side_session_id = self.server_side_session_id().to_string(); + + Ok(Box::pin(futures::stream::once(async { + Ok(ExecutePlanResponse { + session_id, + server_side_session_id, + operation_id, + response_id: "abcxyz".to_string(), + metrics: None, + observed_metrics: vec![], + schema: None, + response_type: Some(ResponseType::ResultComplete(ResultComplete {})), + }) + }))) } } diff --git a/src/daft-connect/src/convert.rs b/src/daft-connect/src/convert.rs index 743ffcf06a..a3c48a6e8e 100644 --- a/src/daft-connect/src/convert.rs +++ b/src/daft-connect/src/convert.rs @@ -1,6 +1,45 @@ mod data_conversion; +mod expression; mod formatting; +mod plan_conversion; mod schema_conversion; +use std::{ + collections::HashMap, + ops::{ControlFlow, Try}, + sync::Arc, +}; + +use common_daft_config::DaftExecutionConfig; +use daft_logical_plan::LogicalPlanRef; +use daft_table::Table; pub use data_conversion::convert_data; +use eyre::Context; +pub use plan_conversion::to_logical_plan; pub use schema_conversion::connect_schema; + +pub fn run_local( + logical_plan: &LogicalPlanRef, + mut f: impl FnMut(&Table) -> T, + default: impl FnOnce() -> T, +) -> eyre::Result { + let physical_plan = daft_local_plan::translate(logical_plan)?; + let cfg = Arc::new(DaftExecutionConfig::default()); + let psets = HashMap::new(); + + let stream = daft_local_execution::run_local(&physical_plan, psets, cfg, None) + .wrap_err("running local execution")?; + + for elem in stream { + let elem = elem?; + let tables = elem.get_tables()?; + + for table in tables.as_slice() { + if let ControlFlow::Break(x) = f(table).branch() { + return Ok(T::from_residual(x)); + } + } + } + + Ok(default()) +} diff --git a/src/daft-connect/src/convert/data_conversion.rs b/src/daft-connect/src/convert/data_conversion.rs index 71032aa4a8..96dde37b76 100644 --- a/src/daft-connect/src/convert/data_conversion.rs +++ b/src/daft-connect/src/convert/data_conversion.rs @@ -28,23 +28,19 @@ //! ```mermaid //! //! ``` - use eyre::{eyre, Context}; -use futures::Stream; -use spark_connect::{relation::RelType, ExecutePlanResponse, Relation}; +use spark_connect::{relation::RelType, Relation}; use tracing::trace; -use crate::convert::formatting::RelTypeExt; +use crate::{command::ConcreteDataChannel, convert::formatting::RelTypeExt}; + +mod show_string; +use show_string::show_string; mod range; use range::range; -use crate::command::PlanIds; - -pub fn convert_data( - plan: Relation, - context: &PlanIds, -) -> eyre::Result> + Unpin> { +pub fn convert_data(plan: Relation, encoder: &mut impl ConcreteDataChannel) -> eyre::Result<()> { // First check common fields if needed if let Some(common) = &plan.common { // contains metadata shared across all relation types @@ -55,7 +51,8 @@ pub fn convert_data( let rel_type = plan.rel_type.ok_or_else(|| eyre!("rel_type is None"))?; match rel_type { - RelType::Range(input) => range(input, context).wrap_err("parsing Range"), + RelType::ShowString(input) => show_string(*input, encoder).wrap_err("parsing ShowString"), + RelType::Range(input) => range(input, encoder).wrap_err("parsing Range"), other => Err(eyre!("Unsupported top-level relation: {}", other.name())), } } diff --git a/src/daft-connect/src/convert/data_conversion/range.rs b/src/daft-connect/src/convert/data_conversion/range.rs index f370228188..1afb710a81 100644 --- a/src/daft-connect/src/convert/data_conversion/range.rs +++ b/src/daft-connect/src/convert/data_conversion/range.rs @@ -1,18 +1,12 @@ -use std::future::ready; - use daft_core::prelude::Series; use daft_schema::prelude::Schema; use daft_table::Table; use eyre::{ensure, Context}; -use futures::{stream, Stream}; -use spark_connect::{ExecutePlanResponse, Range}; +use spark_connect::Range; -use crate::command::PlanIds; +use crate::command::ConcreteDataChannel; -pub fn range( - range: Range, - channel: &PlanIds, -) -> eyre::Result> + Unpin> { +pub fn range(range: Range, channel: &mut impl ConcreteDataChannel) -> eyre::Result<()> { let Range { start, end, @@ -42,7 +36,7 @@ pub fn range( len, )?; - let response = channel.gen_response(&singleton_table)?; + channel.send_table(&singleton_table)?; - Ok(stream::once(ready(Ok(response)))) + Ok(()) } diff --git a/src/daft-connect/src/convert/data_conversion/show_string.rs b/src/daft-connect/src/convert/data_conversion/show_string.rs new file mode 100644 index 0000000000..bed2952a21 --- /dev/null +++ b/src/daft-connect/src/convert/data_conversion/show_string.rs @@ -0,0 +1,59 @@ +use daft_core::prelude::Series; +use daft_schema::prelude::Schema; +use daft_table::Table; +use eyre::{ensure, eyre, Context}; +use spark_connect::ShowString; + +use crate::{ + command::ConcreteDataChannel, + convert::{plan_conversion::to_logical_plan, run_local}, +}; + +pub fn show_string( + show_string: ShowString, + channel: &mut impl ConcreteDataChannel, +) -> eyre::Result<()> { + let ShowString { + input, + num_rows, + truncate, + vertical, + } = show_string; + + ensure!(num_rows > 0, "num_rows must be positive, got {num_rows}"); + ensure!(truncate > 0, "truncate must be positive, got {truncate}"); + ensure!(!vertical, "vertical is not yet supported"); + + let input = *input.ok_or_else(|| eyre!("input is None"))?; + + let logical_plan = to_logical_plan(input)?.build(); + + run_local( + &logical_plan, + |table| -> eyre::Result<()> { + let display = format!("{table}"); + + let arrow_array: arrow2::array::Utf8Array = + std::iter::once(display.as_str()).map(Some).collect(); + + let singleton_series = Series::try_from(( + "show_string", + Box::new(arrow_array) as Box, + )) + .wrap_err("creating singleton series")?; + + let singleton_table = Table::new_with_size( + Schema::new(vec![singleton_series.field().clone()])?, + vec![singleton_series], + 1, + )?; + + channel.send_table(&singleton_table)?; + + Ok(()) + }, + || Ok(()), + )??; + + Ok(()) +} diff --git a/src/daft-connect/src/convert/expression.rs b/src/daft-connect/src/convert/expression.rs new file mode 100644 index 0000000000..f79a7bf5a8 --- /dev/null +++ b/src/daft-connect/src/convert/expression.rs @@ -0,0 +1,120 @@ +use daft_dsl::{Expr as DaftExpr, Operator}; +use eyre::{bail, ensure, eyre, Result}; +use spark_connect::{expression, expression::literal::LiteralType, Expression}; + +pub fn convert_expression(expr: Expression) -> Result { + match expr.expr_type { + Some(expression::ExprType::Literal(lit)) => Ok(DaftExpr::Literal(convert_literal(lit)?)), + + Some(expression::ExprType::UnresolvedAttribute(attr)) => { + Ok(DaftExpr::Column(attr.unparsed_identifier.into())) + } + + Some(expression::ExprType::Alias(alias)) => { + let expression::Alias { + expr, + name, + metadata, + } = *alias; + let expr = *expr.ok_or_else(|| eyre!("expr is None"))?; + + // Convert alias + let expr = convert_expression(expr)?; + + if let Some(metadata) = metadata + && !metadata.is_empty() + { + bail!("Metadata is not yet supported"); + } + + // ignore metadata for now + + let [name] = name.as_slice() else { + bail!("Alias name must have exactly one element"); + }; + + Ok(DaftExpr::Alias(expr.into(), name.as_str().into())) + } + + Some(expression::ExprType::UnresolvedFunction(expression::UnresolvedFunction { + function_name, + arguments, + is_distinct, + is_user_defined_function, + })) => { + ensure!(!is_distinct, "Distinct is not yet supported"); + ensure!( + !is_user_defined_function, + "User-defined functions are not yet supported" + ); + + let op = function_name.as_str(); + match op { + ">" | "<" | "<=" | ">=" | "+" | "-" | "*" | "/" => { + let arr: [Expression; 2] = arguments + .try_into() + .map_err(|_| eyre!("Expected 2 arguments"))?; + let [left, right] = arr; + + let left = convert_expression(left)?; + let right = convert_expression(right)?; + + let op = match op { + ">" => Operator::Gt, + "<" => Operator::Lt, + "<=" => Operator::LtEq, + ">=" => Operator::GtEq, + "+" => Operator::Plus, + "-" => Operator::Minus, + "*" => Operator::Multiply, + "/" => Operator::FloorDivide, // todo is this what we want? + _ => unreachable!(), + }; + + Ok(DaftExpr::BinaryOp { + left: left.into(), + op, + right: right.into(), + }) + } + other => bail!("Unsupported function name: {other}"), + } + } + + // Handle other expression types... + _ => Err(eyre!("Unsupported expression type")), + } +} + +// Helper functions to convert literals, function names, operators etc. + +fn convert_literal(lit: expression::Literal) -> Result { + let literal_type = lit + .literal_type + .ok_or_else(|| eyre!("literal_type is None"))?; + + let result = match literal_type { + LiteralType::Null(..) => daft_dsl::LiteralValue::Null, + LiteralType::Binary(input) => daft_dsl::LiteralValue::Binary(input), + LiteralType::Boolean(input) => daft_dsl::LiteralValue::Boolean(input), + LiteralType::Byte(input) => daft_dsl::LiteralValue::Int32(input), + LiteralType::Short(input) => daft_dsl::LiteralValue::Int32(input), + LiteralType::Integer(input) => daft_dsl::LiteralValue::Int32(input), + LiteralType::Long(input) => daft_dsl::LiteralValue::Int64(input), + LiteralType::Float(input) => daft_dsl::LiteralValue::Float64(f64::from(input)), + LiteralType::Double(input) => daft_dsl::LiteralValue::Float64(input), + LiteralType::String(input) => daft_dsl::LiteralValue::Utf8(input), + LiteralType::Date(input) => daft_dsl::LiteralValue::Date(input), + LiteralType::Decimal(_) + | LiteralType::Timestamp(_) + | LiteralType::TimestampNtz(_) + | LiteralType::CalendarInterval(_) + | LiteralType::YearMonthInterval(_) + | LiteralType::DayTimeInterval(_) + | LiteralType::Array(_) + | LiteralType::Map(_) + | LiteralType::Struct(_) => bail!("unimplemented"), + }; + + Ok(result) +} diff --git a/src/daft-connect/src/convert/plan_conversion.rs b/src/daft-connect/src/convert/plan_conversion.rs new file mode 100644 index 0000000000..6e0c5fc872 --- /dev/null +++ b/src/daft-connect/src/convert/plan_conversion.rs @@ -0,0 +1,134 @@ +use std::{collections::HashSet, sync::Arc}; + +use daft_logical_plan::{LogicalPlanBuilder, ParquetScanBuilder}; +use eyre::{bail, eyre, Result, WrapErr}; +use spark_connect::{ + expression::Alias, + read::{DataSource, ReadType}, + relation::RelType, + Filter, Read, Relation, WithColumns, +}; +use tracing::warn; + +use crate::convert::expression; + +pub fn to_logical_plan(plan: Relation) -> Result { + let scope = std::thread::spawn(|| { + let rel_type = plan.rel_type.ok_or_else(|| eyre!("rel_type is None"))?; + + match rel_type { + RelType::ShowString(..) => { + bail!("ShowString is only supported as a top-level relation") + } + RelType::Filter(filter) => parse_filter(*filter).wrap_err("parsing Filter"), + RelType::WithColumns(with_columns) => { + parse_with_columns(*with_columns).wrap_err("parsing WithColumns") + } + RelType::Read(read) => parse_read(read), + _ => bail!("Unsupported relation type: {rel_type:?}"), + } + }); + + scope.join().unwrap() +} + +fn parse_filter(filter: Filter) -> Result { + let Filter { input, condition } = filter; + let input = *input.ok_or_else(|| eyre!("input is None"))?; + let input_plan = to_logical_plan(input).wrap_err("parsing input")?; + + let condition = condition.ok_or_else(|| eyre!("condition is None"))?; + let condition = + expression::convert_expression(condition).wrap_err("converting to daft expression")?; + let condition = Arc::new(condition); + + input_plan.filter(condition).wrap_err("applying filter") +} + +fn parse_with_columns(with_columns: WithColumns) -> Result { + let WithColumns { input, aliases } = with_columns; + let input = *input.ok_or_else(|| eyre!("input is None"))?; + let input_plan = to_logical_plan(input).wrap_err("parsing input")?; + + let mut new_exprs = Vec::new(); + let mut existing_columns: HashSet<_> = input_plan.schema().names().into_iter().collect(); + + for alias in aliases { + let Alias { + expr, + name, + metadata, + } = alias; + + if name.len() != 1 { + bail!("Alias name must have exactly one element"); + } + let name = name[0].as_str(); + + if metadata.is_some() { + bail!("Metadata is not yet supported"); + } + + let expr = expr.ok_or_else(|| eyre!("expression is None"))?; + let expr = + expression::convert_expression(*expr).wrap_err("converting to daft expression")?; + let expr = Arc::new(expr); + + new_exprs.push(expr.alias(name)); + + if existing_columns.contains(name) { + existing_columns.remove(name); + } + } + + // Add remaining existing columns + for col_name in existing_columns { + new_exprs.push(daft_dsl::col(col_name)); + } + + input_plan + .select(new_exprs) + .wrap_err("selecting new expressions") +} + +fn parse_read(read: Read) -> Result { + let Read { + is_streaming, + read_type, + } = read; + + warn!("Ignoring is_streaming: {is_streaming}"); + + let read_type = read_type.ok_or_else(|| eyre!("type is None"))?; + + match read_type { + ReadType::NamedTable(_) => bail!("Named tables are not yet supported"), + ReadType::DataSource(data_source) => parse_data_source(data_source), + } +} + +fn parse_data_source(data_source: DataSource) -> Result { + let DataSource { + format, + options, + paths, + predicates, + .. + } = data_source; + + let format = format.ok_or_else(|| eyre!("format is None"))?; + if format != "parquet" { + bail!("Only parquet is supported; got {format}"); + } + + if !options.is_empty() { + bail!("Options are not yet supported"); + } + if !predicates.is_empty() { + bail!("Predicates are not yet supported"); + } + + ParquetScanBuilder::new(paths) + .finish() + .wrap_err("creating ParquetScanBuilder") +} diff --git a/src/daft-connect/src/lib.rs b/src/daft-connect/src/lib.rs index d9f2c2f6ad..43913db596 100644 --- a/src/daft-connect/src/lib.rs +++ b/src/daft-connect/src/lib.rs @@ -7,6 +7,8 @@ #![feature(try_trait_v2_residual)] #![deny(unused)] +use std::ops::ControlFlow; + use dashmap::DashMap; use eyre::Context; #[cfg(feature = "python")] @@ -26,7 +28,7 @@ use tonic::{transport::Server, Request, Response, Status}; use tracing::info; use uuid::Uuid; -use crate::session::Session; +use crate::{convert::run_local, session::Session}; mod command; mod config; @@ -168,8 +170,12 @@ impl SparkConnectService for DaftSparkConnectService { CommandType::RegisterFunction(_) => { unimplemented_err!("RegisterFunction not implemented") } - CommandType::WriteOperation(_) => { - unimplemented_err!("WriteOperation not implemented") + CommandType::WriteOperation(op) => { + println!("WriteOperation: {:#2?}", op); + + let result = session.handle_write_operation(op, operation)?; + + return Ok(Response::new(result)); } CommandType::CreateDataframeView(_) => { unimplemented_err!("CreateDataframeView not implemented") @@ -299,6 +305,67 @@ impl SparkConnectService for DaftSparkConnectService { Ok(Response::new(response)) } + Analyze::TreeString(tree_string) => { + if let Some(level) = tree_string.level { + warn!("Ignoring level {level} in TreeString"); + } + + let Some(plan) = tree_string.plan else { + return invalid_argument_err!("TreeString must have a plan"); + }; + + let Some(op_type) = plan.op_type else { + return invalid_argument_err!("plan must have an op_type"); + }; + + println!("op_type: {op_type:?}"); + + let OpType::Root(plan) = op_type else { + return invalid_argument_err!("Only op_type Root is supported"); + }; + + let logical_plan = match convert::to_logical_plan(plan) { + Ok(lp) => lp, + e => { + return invalid_argument_err!("Failed to convert to logical plan: {e:?}"); + } + }; + + let logical_plan = logical_plan.build(); + + let res = std::thread::spawn(move || { + let result = run_local( + &logical_plan, + |table| { + let table = format!("{table}"); + ControlFlow::Break(table) + }, + || ControlFlow::Continue(()), + ) + .unwrap(); + + let result = match result { + ControlFlow::Break(x) => Some(x), + ControlFlow::Continue(()) => None, + } + .unwrap(); + + AnalyzePlanResponse { + session_id, + server_side_session_id: String::new(), + result: Some(analyze_plan_response::Result::TreeString( + analyze_plan_response::TreeString { + tree_string: result, + }, + )), + } + }); + + let res = res.join().unwrap(); + + let response = Response::new(res); + Ok(response) + } _ => unimplemented_err!("Analyze plan operation is not yet implemented"), } } diff --git a/src/daft-connect/src/session.rs b/src/daft-connect/src/session.rs index 24f7fabe80..72b477478f 100644 --- a/src/daft-connect/src/session.rs +++ b/src/daft-connect/src/session.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use uuid::Uuid; @@ -8,6 +8,12 @@ pub struct Session { /// Also, config_values: BTreeMap, + #[expect( + unused, + reason = "this will be used in the future especially to pass spark connect tests" + )] + tables_by_name: HashMap, + id: String, server_side_session_id: String, } @@ -26,6 +32,7 @@ impl Session { let server_side_session_id = server_side_session_id.to_string(); Self { config_values: Default::default(), + tables_by_name: Default::default(), id, server_side_session_id, } diff --git a/tests/connect/test_parquet_simple.py b/tests/connect/test_parquet_simple.py new file mode 100644 index 0000000000..cb3ba9f1b1 --- /dev/null +++ b/tests/connect/test_parquet_simple.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import pathlib +import time + +import pyarrow as pa +import pyarrow.parquet as papq +from pyspark.sql import SparkSession +from pyspark.sql.dataframe import DataFrame + +from daft.daft import connect_start + + +def test_read_parquet(tmpdir): + # Convert tmpdir to Path object + test_dir = pathlib.Path(tmpdir) + input_parquet_path = test_dir / "input.parquet" + + # Create sample data with sequential IDs + sample_data = pa.Table.from_pydict({"id": [0, 1, 2, 3, 4]}) + + # Write sample data to input parquet file + papq.write_table(sample_data, input_parquet_path) + + # Start Daft Connect server + # TODO: Add env var to control server embedding + connect_start("sc://localhost:50051") + + # Initialize Spark Connect session + spark_session: SparkSession = ( + SparkSession.builder.appName("DaftParquetReadWriteTest").remote("sc://localhost:50051").getOrCreate() + ) + + # Read input parquet with Spark Connect + spark_df: DataFrame = spark_session.read.parquet(str(input_parquet_path)) + + # Write DataFrame to output parquet + output_parquet_path = test_dir / "output.parquet" + spark_df.write.parquet(str(output_parquet_path)) + + # Verify output matches input + output_data = papq.read_table(output_parquet_path) + assert output_data.equals(sample_data) + + # Clean up Spark session + spark_session.stop() + time.sleep(2) # Allow time for session cleanup From b78e520e97a2232212ca12739e9c16ff3e13650d Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Mon, 11 Nov 2024 15:28:12 -0800 Subject: [PATCH 11/12] stash --- Cargo.lock | 1 + src/daft-connect/Cargo.toml | 1 + src/daft-connect/src/command.rs | 123 +----------------- src/daft-connect/src/convert.rs | 43 +++--- .../src/convert/data_conversion.rs | 4 - .../convert/data_conversion/show_string.rs | 59 --------- src/daft-connect/src/lib.rs | 70 +--------- src/daft-scan/src/hive.rs | 6 +- 8 files changed, 32 insertions(+), 275 deletions(-) delete mode 100644 src/daft-connect/src/convert/data_conversion/show_string.rs diff --git a/Cargo.lock b/Cargo.lock index c01adc0878..19f5b9791f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1910,6 +1910,7 @@ version = "0.3.0-dev0" dependencies = [ "arrow2", "common-daft-config", + "common-error", "common-file-formats", "daft-core", "daft-dsl", diff --git a/src/daft-connect/Cargo.toml b/src/daft-connect/Cargo.toml index 57566ba7fc..82ac4cac55 100644 --- a/src/daft-connect/Cargo.toml +++ b/src/daft-connect/Cargo.toml @@ -20,6 +20,7 @@ tokio-stream = "0.1.16" tonic = "0.12.3" tracing-subscriber = {version = "0.3.18", features = ["env-filter"]} tracing-tracy = "0.11.3" +common-error.workspace = true tracing.workspace = true uuid = {version = "1.10.0", features = ["v4"]} diff --git a/src/daft-connect/src/command.rs b/src/daft-connect/src/command.rs index 50acf5e37e..4e6fc91fa6 100644 --- a/src/daft-connect/src/command.rs +++ b/src/daft-connect/src/command.rs @@ -1,10 +1,10 @@ -use std::{ops::ControlFlow, thread}; +use std::thread; use arrow2::io::ipc::write::StreamWriter; use common_file_formats::FileFormat; use daft_table::Table; use eyre::Context; -use futures::TryStreamExt; +use futures::{StreamExt, TryStreamExt}; use spark_connect::{ execute_plan_response::{ArrowBatch, ResponseType, ResultComplete}, spark_connect_service_server::SparkConnectService, @@ -128,123 +128,4 @@ impl Session { Ok(Box::pin(recv_stream)) } - - pub fn handle_write_operation( - &self, - operation: WriteOperation, - operation_id: String, - ) -> Result { - let mode = operation.mode(); - - let WriteOperation { - input, - source, - sort_column_names, - partitioning_columns, - bucket_by, - options, - clustering_columns, - save_type, - mode: _, - } = operation; - - let Some(input) = input else { - return invalid_argument_err!("input is None"); - }; - - let source = source.unwrap_or_else(|| "parquet".to_string()); - if source != "parquet" { - return unimplemented_err!( - "Only writing parquet is supported for now but got {source}" - ); - } - - match mode { - SaveMode::Unspecified => {} - SaveMode::Append => { - return unimplemented_err!("Append mode is not yet supported"); - } - SaveMode::Overwrite => { - return unimplemented_err!("Overwrite mode is not yet supported"); - } - SaveMode::ErrorIfExists => { - return unimplemented_err!("ErrorIfExists mode is not yet supported"); - } - SaveMode::Ignore => { - return unimplemented_err!("Ignore mode is not yet supported"); - } - } - - if !sort_column_names.is_empty() { - return unimplemented_err!("Sort by columns is not yet supported"); - } - - if !partitioning_columns.is_empty() { - return unimplemented_err!("Partitioning columns is not yet supported"); - } - - if bucket_by.is_some() { - return unimplemented_err!("Bucket by columns is not yet supported"); - } - - if !options.is_empty() { - return unimplemented_err!("Options are not yet supported"); - } - - if !clustering_columns.is_empty() { - return unimplemented_err!("Clustering columns is not yet supported"); - } - let Some(save_type) = save_type else { - return invalid_argument_err!("save_type is required"); - }; - - let save_path = match save_type { - SaveType::Path(path) => path, - SaveType::Table(_) => { - return unimplemented_err!("Save type table is not yet supported"); - } - }; - - thread::scope(|scope| { - let res = scope.spawn(|| { - let plan = to_logical_plan(input) - .map_err(|_| Status::internal("Failed to convert to logical plan"))?; - - // todo: assuming this is parquet - // todo: is save_path right? - let plan = plan - .table_write(&save_path, FileFormat::Parquet, None, None, None) - .map_err(|_| Status::internal("Failed to write table"))?; - - let plan = plan.build(); - - run_local( - &plan, - |_table| ControlFlow::Continue(()), - || ControlFlow::Break(()), - ) - .map_err(|e| Status::internal(format!("Failed to write table: {e}")))?; - - Result::<(), Status>::Ok(()) - }); - - res.join().unwrap() - })?; - - let session_id = self.client_side_session_id().to_string(); - let server_side_session_id = self.server_side_session_id().to_string(); - - Ok(Box::pin(futures::stream::once(async { - Ok(ExecutePlanResponse { - session_id, - server_side_session_id, - operation_id, - response_id: "abcxyz".to_string(), - metrics: None, - observed_metrics: vec![], - schema: None, - response_type: Some(ResponseType::ResultComplete(ResultComplete {})), - }) - }))) - } } diff --git a/src/daft-connect/src/convert.rs b/src/daft-connect/src/convert.rs index a3c48a6e8e..1318f17e64 100644 --- a/src/daft-connect/src/convert.rs +++ b/src/daft-connect/src/convert.rs @@ -4,42 +4,43 @@ mod formatting; mod plan_conversion; mod schema_conversion; -use std::{ - collections::HashMap, - ops::{ControlFlow, Try}, - sync::Arc, -}; +use std::{collections::HashMap, pin::Pin, sync::Arc}; use common_daft_config::DaftExecutionConfig; +use common_error::{DaftError, DaftResult}; use daft_logical_plan::LogicalPlanRef; use daft_table::Table; pub use data_conversion::convert_data; -use eyre::Context; +use futures::{stream, Stream, StreamExt}; pub use plan_conversion::to_logical_plan; pub use schema_conversion::connect_schema; -pub fn run_local( +pub fn run_local( logical_plan: &LogicalPlanRef, - mut f: impl FnMut(&Table) -> T, - default: impl FnOnce() -> T, -) -> eyre::Result { +) -> DaftResult>> { let physical_plan = daft_local_plan::translate(logical_plan)?; let cfg = Arc::new(DaftExecutionConfig::default()); let psets = HashMap::new(); - let stream = daft_local_execution::run_local(&physical_plan, psets, cfg, None) - .wrap_err("running local execution")?; + let stream = daft_local_execution::run_local(&physical_plan, psets, cfg, None)?; - for elem in stream { - let elem = elem?; - let tables = elem.get_tables()?; + let stream = stream + .map(|partition| match partition { + Ok(partition) => partition.get_tables().map_err(DaftError::from), + Err(err) => Err(err), + }) + .flat_map(|tables| match tables { + Ok(tables) => { + let tables = Arc::try_unwrap(tables).unwrap(); - for table in tables.as_slice() { - if let ControlFlow::Break(x) = f(table).branch() { - return Ok(T::from_residual(x)); + let tables = tables.into_iter().map(Ok); + let stream: Pin>>> = + Box::pin(stream::iter(tables)); + + stream } - } - } + Err(err) => Box::pin(stream::once(async { Err(err) })), + }); - Ok(default()) + Ok(stream) } diff --git a/src/daft-connect/src/convert/data_conversion.rs b/src/daft-connect/src/convert/data_conversion.rs index 96dde37b76..3fbbb66cb4 100644 --- a/src/daft-connect/src/convert/data_conversion.rs +++ b/src/daft-connect/src/convert/data_conversion.rs @@ -34,9 +34,6 @@ use tracing::trace; use crate::{command::ConcreteDataChannel, convert::formatting::RelTypeExt}; -mod show_string; -use show_string::show_string; - mod range; use range::range; @@ -51,7 +48,6 @@ pub fn convert_data(plan: Relation, encoder: &mut impl ConcreteDataChannel) -> e let rel_type = plan.rel_type.ok_or_else(|| eyre!("rel_type is None"))?; match rel_type { - RelType::ShowString(input) => show_string(*input, encoder).wrap_err("parsing ShowString"), RelType::Range(input) => range(input, encoder).wrap_err("parsing Range"), other => Err(eyre!("Unsupported top-level relation: {}", other.name())), } diff --git a/src/daft-connect/src/convert/data_conversion/show_string.rs b/src/daft-connect/src/convert/data_conversion/show_string.rs deleted file mode 100644 index bed2952a21..0000000000 --- a/src/daft-connect/src/convert/data_conversion/show_string.rs +++ /dev/null @@ -1,59 +0,0 @@ -use daft_core::prelude::Series; -use daft_schema::prelude::Schema; -use daft_table::Table; -use eyre::{ensure, eyre, Context}; -use spark_connect::ShowString; - -use crate::{ - command::ConcreteDataChannel, - convert::{plan_conversion::to_logical_plan, run_local}, -}; - -pub fn show_string( - show_string: ShowString, - channel: &mut impl ConcreteDataChannel, -) -> eyre::Result<()> { - let ShowString { - input, - num_rows, - truncate, - vertical, - } = show_string; - - ensure!(num_rows > 0, "num_rows must be positive, got {num_rows}"); - ensure!(truncate > 0, "truncate must be positive, got {truncate}"); - ensure!(!vertical, "vertical is not yet supported"); - - let input = *input.ok_or_else(|| eyre!("input is None"))?; - - let logical_plan = to_logical_plan(input)?.build(); - - run_local( - &logical_plan, - |table| -> eyre::Result<()> { - let display = format!("{table}"); - - let arrow_array: arrow2::array::Utf8Array = - std::iter::once(display.as_str()).map(Some).collect(); - - let singleton_series = Series::try_from(( - "show_string", - Box::new(arrow_array) as Box, - )) - .wrap_err("creating singleton series")?; - - let singleton_table = Table::new_with_size( - Schema::new(vec![singleton_series.field().clone()])?, - vec![singleton_series], - 1, - )?; - - channel.send_table(&singleton_table)?; - - Ok(()) - }, - || Ok(()), - )??; - - Ok(()) -} diff --git a/src/daft-connect/src/lib.rs b/src/daft-connect/src/lib.rs index 43913db596..956f180c68 100644 --- a/src/daft-connect/src/lib.rs +++ b/src/daft-connect/src/lib.rs @@ -11,6 +11,7 @@ use std::ops::ControlFlow; use dashmap::DashMap; use eyre::Context; +use futures::{StreamExt, TryStreamExt}; #[cfg(feature = "python")] use pyo3::types::PyModuleMethods; use spark_connect::{ @@ -170,12 +171,8 @@ impl SparkConnectService for DaftSparkConnectService { CommandType::RegisterFunction(_) => { unimplemented_err!("RegisterFunction not implemented") } - CommandType::WriteOperation(op) => { - println!("WriteOperation: {:#2?}", op); - - let result = session.handle_write_operation(op, operation)?; - - return Ok(Response::new(result)); + CommandType::WriteOperation(_) => { + unimplemented_err!("WriteOperation not implemented") } CommandType::CreateDataframeView(_) => { unimplemented_err!("CreateDataframeView not implemented") @@ -305,67 +302,6 @@ impl SparkConnectService for DaftSparkConnectService { Ok(Response::new(response)) } - Analyze::TreeString(tree_string) => { - if let Some(level) = tree_string.level { - warn!("Ignoring level {level} in TreeString"); - } - - let Some(plan) = tree_string.plan else { - return invalid_argument_err!("TreeString must have a plan"); - }; - - let Some(op_type) = plan.op_type else { - return invalid_argument_err!("plan must have an op_type"); - }; - - println!("op_type: {op_type:?}"); - - let OpType::Root(plan) = op_type else { - return invalid_argument_err!("Only op_type Root is supported"); - }; - - let logical_plan = match convert::to_logical_plan(plan) { - Ok(lp) => lp, - e => { - return invalid_argument_err!("Failed to convert to logical plan: {e:?}"); - } - }; - - let logical_plan = logical_plan.build(); - - let res = std::thread::spawn(move || { - let result = run_local( - &logical_plan, - |table| { - let table = format!("{table}"); - ControlFlow::Break(table) - }, - || ControlFlow::Continue(()), - ) - .unwrap(); - - let result = match result { - ControlFlow::Break(x) => Some(x), - ControlFlow::Continue(()) => None, - } - .unwrap(); - - AnalyzePlanResponse { - session_id, - server_side_session_id: String::new(), - result: Some(analyze_plan_response::Result::TreeString( - analyze_plan_response::TreeString { - tree_string: result, - }, - )), - } - }); - - let res = res.join().unwrap(); - - let response = Response::new(res); - Ok(response) - } _ => unimplemented_err!("Analyze plan operation is not yet implemented"), } } diff --git a/src/daft-scan/src/hive.rs b/src/daft-scan/src/hive.rs index d9de48afd2..f929c9d4d5 100644 --- a/src/daft-scan/src/hive.rs +++ b/src/daft-scan/src/hive.rs @@ -134,7 +134,7 @@ mod tests { let partitions = parse_hive_partitioning(uri).unwrap(); assert_eq!(partitions.get("year"), Some(&"2024".to_string())); - assert_eq!(partitions.get("region"), Some(&"".to_string())); + assert_eq!(partitions.get("region"), Some(&String::new())); } #[test] @@ -251,7 +251,7 @@ mod tests { let partitions = parse_hive_partitioning(uri).unwrap(); assert_eq!(partitions.len(), 2); - assert_eq!(partitions.get("empty_key"), Some(&"".to_string())); - assert_eq!(partitions.get("another"), Some(&"".to_string())); + assert_eq!(partitions.get("empty_key"), Some(&String::new())); + assert_eq!(partitions.get("another"), Some(&String::new())); } } From 4c009bf8ec5c98130d8426f1d1532609e58f2130 Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Tue, 12 Nov 2024 15:12:51 -0800 Subject: [PATCH 12/12] stash --- src/daft-connect/src/command.rs | 11 +++-------- src/daft-connect/src/convert.rs | 3 +-- src/daft-connect/src/lib.rs | 5 +---- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/daft-connect/src/command.rs b/src/daft-connect/src/command.rs index 4e6fc91fa6..6d7c5cfabf 100644 --- a/src/daft-connect/src/command.rs +++ b/src/daft-connect/src/command.rs @@ -1,24 +1,19 @@ use std::thread; use arrow2::io::ipc::write::StreamWriter; -use common_file_formats::FileFormat; use daft_table::Table; use eyre::Context; -use futures::{StreamExt, TryStreamExt}; +use futures::TryStreamExt; use spark_connect::{ execute_plan_response::{ArrowBatch, ResponseType, ResultComplete}, spark_connect_service_server::SparkConnectService, - write_operation::{SaveMode, SaveType}, - ExecutePlanResponse, Relation, WriteOperation, + ExecutePlanResponse, Relation, }; use tokio_stream::wrappers::UnboundedReceiverStream; use tonic::Status; use uuid::Uuid; -use crate::{ - convert::{convert_data, run_local, to_logical_plan}, - invalid_argument_err, unimplemented_err, DaftSparkConnectService, Session, -}; +use crate::{convert::convert_data, DaftSparkConnectService, Session}; type DaftStream = ::ExecutePlanStream; diff --git a/src/daft-connect/src/convert.rs b/src/daft-connect/src/convert.rs index 1318f17e64..4018b44ae1 100644 --- a/src/daft-connect/src/convert.rs +++ b/src/daft-connect/src/convert.rs @@ -12,10 +12,9 @@ use daft_logical_plan::LogicalPlanRef; use daft_table::Table; pub use data_conversion::convert_data; use futures::{stream, Stream, StreamExt}; -pub use plan_conversion::to_logical_plan; pub use schema_conversion::connect_schema; -pub fn run_local( +pub fn run_local_to_tables( logical_plan: &LogicalPlanRef, ) -> DaftResult>> { let physical_plan = daft_local_plan::translate(logical_plan)?; diff --git a/src/daft-connect/src/lib.rs b/src/daft-connect/src/lib.rs index 956f180c68..d9f2c2f6ad 100644 --- a/src/daft-connect/src/lib.rs +++ b/src/daft-connect/src/lib.rs @@ -7,11 +7,8 @@ #![feature(try_trait_v2_residual)] #![deny(unused)] -use std::ops::ControlFlow; - use dashmap::DashMap; use eyre::Context; -use futures::{StreamExt, TryStreamExt}; #[cfg(feature = "python")] use pyo3::types::PyModuleMethods; use spark_connect::{ @@ -29,7 +26,7 @@ use tonic::{transport::Server, Request, Response, Status}; use tracing::info; use uuid::Uuid; -use crate::{convert::run_local, session::Session}; +use crate::session::Session; mod command; mod config;