Skip to content

Commit

Permalink
Renames merge to zip
Browse files Browse the repository at this point in the history
  • Loading branch information
rchowell committed Jan 31, 2025
1 parent a02db08 commit 70c3d20
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/daft-core/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ pub mod list;
pub mod log;
pub mod logical;
pub mod map;
pub mod merge;
pub mod minhash;
pub mod not;
pub mod null;
Expand All @@ -47,6 +46,7 @@ pub mod take;
pub mod time;
mod trigonometry;
pub mod utf8;
pub mod zip;

pub fn cast_series_to_supertype(series: &[&Series]) -> DaftResult<Vec<Series>> {
let supertype = series
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,34 @@ use crate::{
};

impl Series {
/// Merges series into a single series of lists.
/// Zips series into a single series of lists.
/// ex:
/// ```text
/// A: Series := ( a_0, a_1, .. , a_n )
/// B: Series := ( b_0, b_1, .. , b_n )
/// C: Series := MERGE(A, B) <-> ( [a_0, b_0], [a_1, b_1], [a_2, b_2] )
/// C: Series := Zip(A, B) <-> ( [a_0, b_0], [a_1, b_1], [a_2, b_2] )
/// ```
pub fn merge(field: Field, series: &[&Self]) -> DaftResult<Self> {
// err if no series to merge
pub fn zip(field: Field, series: &[&Self]) -> DaftResult<Self> {
// err if no series to zip
if series.is_empty() {
return Err(DaftError::ValueError(
"Need at least 1 series to perform merge".to_string(),
"Need at least 1 series to perform zip".to_string(),
));
}

// homogeneity checks happen in lower-levels, assume ok.
let dtype = if let DataType::List(dtype) = &field.dtype {
dtype.as_ref()
} else {
return Err(DaftError::ValueError(
"Cannot merge field with non-list type".to_string(),
));
// homogeneity checks naturally happen in make_growable's downcast.
let dtype = match &field.dtype {
DataType::List(dtype) => dtype.as_ref(),
DataType::FixedSizeList(..) => {
return Err(DaftError::ValueError(
"Fixed size list constructor is currently not supported".to_string(),
));
}
_ => {
return Err(DaftError::ValueError(
"Cannot zip field with non-list type".to_string(),
));
}
};

// build a null series mask so we can skip making full_nulls and avoid downcast "Null to T" errors.
Expand Down
2 changes: 1 addition & 1 deletion src/daft-table/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ impl Table {
let field = expr.to_field(&self.schema)?;
let items = items.iter().map(|item| self.eval_expression(item)).collect::<DaftResult<Vec<_>>>()?;
let items = items.iter().collect::<Vec<&Series>>();
Series::merge(field, items.as_slice())
Series::zip(field, items.as_slice())
}
Expr::Between(child, lower, upper) => self
.eval_expression(child)?
Expand Down
8 changes: 4 additions & 4 deletions tests/sql/test_list_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import daft
from daft import DataType, col
from daft import DataType, col, list_
from daft.daft import CountMode
from daft.sql.sql import SQLCatalog

Expand Down Expand Up @@ -47,11 +47,11 @@ def test_list_constructor_heterogeneous():
df # for ruff ignore unused


@pytest.mark.skip("Cannot do a multi-column list constructor from python API")
def test_list_constructor_heterogeneous_with_cast():
df = daft.from_pydict({"x": [1, 2, 3], "y": [True, True, False]})
actual = daft.sql("SELECT [ CAST(x AS STRING), CAST(y AS STRING) ] FROM df")
print(df, actual)
expect = df.select(list_(col("x").cast(DataType.string()), col("y").cast(DataType.string())))
assert_eq(actual, expect)


def test_list_constructor_mixed_null_first():
Expand All @@ -63,7 +63,7 @@ def test_list_constructor_mixed_null_first():

def test_list_constructor_mixed_null_mid():
df = daft.from_pydict({"x": [1, 2, 3]})
actual = daft.sql("SELECT [ -x, NULL, x ] FROM df")
actual = daft.sql("SELECT [ x * -1, NULL, x ] FROM df")
expect = df.select(col("x").apply(lambda x: [x * -1, None, x], DataType.list(DataType.int64())).alias("list"))
assert_eq(actual, expect)

Expand Down

0 comments on commit 70c3d20

Please sign in to comment.