diff --git a/ibis/backends/dask/executor.py b/ibis/backends/dask/executor.py index 921985a833c71..4ef746b425b7a 100644 --- a/ibis/backends/dask/executor.py +++ b/ibis/backends/dask/executor.py @@ -28,6 +28,7 @@ plan, ) from ibis.common.exceptions import UnboundExpressionError, UnsupportedOperationError +from ibis.formats.numpy import NumpyType from ibis.formats.pandas import PandasData, PandasType from ibis.util import gen_name @@ -155,9 +156,10 @@ def mapper(df, cases): return cls.partitionwise(mapper, kwargs, name=op.name, dtype=dtype) @classmethod - def visit(cls, op: ops.Array, exprs): + def visit(cls, op: ops.Array, exprs, dtype): + np_type = NumpyType.from_ibis(dtype) return cls.rowwise( - lambda row: np.array(row, dtype=object), exprs, name=op.name, dtype=object + lambda row: np.array(row, dtype=np_type), exprs, name=op.name, dtype=object ) @classmethod diff --git a/ibis/backends/dask/helpers.py b/ibis/backends/dask/helpers.py index 1ca8d191c29a7..dec137a8f9319 100644 --- a/ibis/backends/dask/helpers.py +++ b/ibis/backends/dask/helpers.py @@ -30,7 +30,7 @@ def concat(cls, dfs, **kwargs): @classmethod def asseries(cls, value, like=None): - """Ensure that value is a pandas Series object, broadcast if necessary.""" + """Ensure that value is a dask Series object, broadcast if necessary.""" if isinstance(value, dd.Series): return value @@ -50,7 +50,7 @@ def asseries(cls, value, like=None): elif isinstance(value, pd.Series): return dd.from_pandas(value, npartitions=1) elif like is not None: - if isinstance(value, (tuple, list, dict)): + if isinstance(value, (tuple, list, dict, np.ndarray)): fn = lambda df: pd.Series([value] * len(df), index=df.index) else: fn = lambda df: pd.Series(value, index=df.index) diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py index a3153d17b8b47..136f16e901902 100644 --- a/ibis/backends/pandas/executor.py +++ b/ibis/backends/pandas/executor.py @@ -49,7 +49,9 @@ def visit(cls, op: ops.Node, **kwargs): @classmethod def visit(cls, op: ops.Literal, value, dtype): - if dtype.is_interval(): + if value is None: + value = None + elif dtype.is_interval(): value = pd.Timedelta(value, dtype.unit.short) elif dtype.is_array(): value = np.array(value) @@ -220,7 +222,7 @@ def visit(cls, op: ops.FindInSet, needle, values): return pd.Series(result, name=op.name) @classmethod - def visit(cls, op: ops.Array, exprs): + def visit(cls, op: ops.Array, exprs, dtype): return cls.rowwise(lambda row: np.array(row, dtype=object), exprs) @classmethod diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 4d9a497191b4a..f802b0a886d5c 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -87,25 +87,27 @@ def literal(op, **_): value = op.value dtype = op.dtype - if dtype.is_array(): - value = pl.Series("", value) - typ = PolarsType.from_ibis(dtype) - val = pl.lit(value, dtype=typ) - return val.implode() + # There are some interval types that _make_duration() can handle, + # but PolarsType.from_ibis can't, so we need to handle them here. + if dtype.is_interval(): + return _make_duration(value, dtype) + + typ = PolarsType.from_ibis(dtype) + if value is None: + return pl.lit(None, dtype=typ) + elif dtype.is_array(): + return pl.lit(pl.Series("", value).implode(), dtype=typ) elif dtype.is_struct(): values = [ pl.lit(v, dtype=PolarsType.from_ibis(dtype[k])).alias(k) for k, v in value.items() ] return pl.struct(values) - elif dtype.is_interval(): - return _make_duration(value, dtype) elif dtype.is_null(): return pl.lit(value) elif dtype.is_binary(): return pl.lit(value) else: - typ = PolarsType.from_ibis(dtype) return pl.lit(op.value, dtype=typ) @@ -974,9 +976,12 @@ def array_concat(op, **kw): @translate.register(ops.Array) -def array_column(op, **kw): - cols = [translate(col, **kw) for col in op.exprs] - return pl.concat_list(cols) +def array_literal(op, **kw): + pdt = PolarsType.from_ibis(op.dtype) + if op.exprs: + return pl.concat_list([translate(col, **kw) for col in op.exprs]).cast(pdt) + else: + return pl.lit([], dtype=pdt) @translate.register(ops.ArrayCollect) diff --git a/ibis/backends/sql/compiler.py b/ibis/backends/sql/compiler.py index 09f26b80d62fe..0c224f42d7cd1 100644 --- a/ibis/backends/sql/compiler.py +++ b/ibis/backends/sql/compiler.py @@ -1019,8 +1019,8 @@ def visit_InSubquery(self, op, *, rel, needle): query = sg.select(STAR).from_(query) return needle.isin(query=query) - def visit_Array(self, op, *, exprs): - return self.f.array(*exprs) + def visit_Array(self, op, *, exprs, dtype): + return self.cast(self.f.array(*exprs), dtype) def visit_StructColumn(self, op, *, names, values): return sge.Struct.from_arg_list( diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql index 13f05009a8c05..cf15a4257b433 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/clickhouse/out.sql @@ -26,7 +26,7 @@ WITH "t5" AS ( SELECT "t0"."field_of_study", arrayJoin( - [ + CAST([ CAST(tuple('1970-71', "t0"."1970-71") AS Tuple("years" Nullable(String), "degrees" Nullable(Int64))), CAST(tuple('1975-76', "t0"."1975-76") AS Tuple("years" Nullable(String), "degrees" Nullable(Int64))), CAST(tuple('1980-81', "t0"."1980-81") AS Tuple("years" Nullable(String), "degrees" Nullable(Int64))), @@ -45,7 +45,7 @@ WITH "t5" AS ( CAST(tuple('2017-18', "t0"."2017-18") AS Tuple("years" Nullable(String), "degrees" Nullable(Int64))), CAST(tuple('2018-19', "t0"."2018-19") AS Tuple("years" Nullable(String), "degrees" Nullable(Int64))), CAST(tuple('2019-20', "t0"."2019-20") AS Tuple("years" Nullable(String), "degrees" Nullable(Int64))) - ] + ] AS Array(Tuple("years" Nullable(String), "degrees" Nullable(Int64)))) ) AS "__pivoted__" FROM "humanities" AS "t0" ) AS "t1" diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql index 90c27a09e25e0..d36aa6f0ab8a9 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/duckdb/out.sql @@ -26,7 +26,7 @@ WITH "t5" AS ( SELECT "t0"."field_of_study", UNNEST( - [ + CAST([ {'years': '1970-71', 'degrees': "t0"."1970-71"}, {'years': '1975-76', 'degrees': "t0"."1975-76"}, {'years': '1980-81', 'degrees': "t0"."1980-81"}, @@ -45,7 +45,7 @@ WITH "t5" AS ( {'years': '2017-18', 'degrees': "t0"."2017-18"}, {'years': '2018-19', 'degrees': "t0"."2018-19"}, {'years': '2019-20', 'degrees': "t0"."2019-20"} - ] + ] AS STRUCT("years" TEXT, "degrees" BIGINT)[]) ) AS "__pivoted__" FROM "humanities" AS "t0" ) AS "t1" diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql index 0056587285781..e45982d73dd6b 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/postgres/out.sql @@ -26,7 +26,7 @@ WITH "t5" AS ( SELECT "t0"."field_of_study", UNNEST( - ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] + CAST(ARRAY[ROW(CAST('1970-71' AS VARCHAR), CAST("t0"."1970-71" AS BIGINT)), ROW(CAST('1975-76' AS VARCHAR), CAST("t0"."1975-76" AS BIGINT)), ROW(CAST('1980-81' AS VARCHAR), CAST("t0"."1980-81" AS BIGINT)), ROW(CAST('1985-86' AS VARCHAR), CAST("t0"."1985-86" AS BIGINT)), ROW(CAST('1990-91' AS VARCHAR), CAST("t0"."1990-91" AS BIGINT)), ROW(CAST('1995-96' AS VARCHAR), CAST("t0"."1995-96" AS BIGINT)), ROW(CAST('2000-01' AS VARCHAR), CAST("t0"."2000-01" AS BIGINT)), ROW(CAST('2005-06' AS VARCHAR), CAST("t0"."2005-06" AS BIGINT)), ROW(CAST('2010-11' AS VARCHAR), CAST("t0"."2010-11" AS BIGINT)), ROW(CAST('2011-12' AS VARCHAR), CAST("t0"."2011-12" AS BIGINT)), ROW(CAST('2012-13' AS VARCHAR), CAST("t0"."2012-13" AS BIGINT)), ROW(CAST('2013-14' AS VARCHAR), CAST("t0"."2013-14" AS BIGINT)), ROW(CAST('2014-15' AS VARCHAR), CAST("t0"."2014-15" AS BIGINT)), ROW(CAST('2015-16' AS VARCHAR), CAST("t0"."2015-16" AS BIGINT)), ROW(CAST('2016-17' AS VARCHAR), CAST("t0"."2016-17" AS BIGINT)), ROW(CAST('2017-18' AS VARCHAR), CAST("t0"."2017-18" AS BIGINT)), ROW(CAST('2018-19' AS VARCHAR), CAST("t0"."2018-19" AS BIGINT)), ROW(CAST('2019-20' AS VARCHAR), CAST("t0"."2019-20" AS BIGINT))] AS STRUCT<"years" VARCHAR, "degrees" BIGINT>[]) ) AS "__pivoted__" FROM "humanities" AS "t0" ) AS "t1" diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql index da950f94d1b16..b9f7594b3cfad 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql @@ -31,19 +31,19 @@ WITH "t5" AS ( 1, GREATEST( CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] + CAST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] AS ARRAY(ROW("years" VARCHAR, "degrees" BIGINT))) ) ) )) AS _u(pos) - CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) + CROSS JOIN UNNEST(CAST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] AS ARRAY(ROW("years" VARCHAR, "degrees" BIGINT)))) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) WHERE _u.pos = _u_2.pos_2 OR ( _u.pos > CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] + CAST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] AS ARRAY(ROW("years" VARCHAR, "degrees" BIGINT))) ) AND _u_2.pos_2 = CARDINALITY( - ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] + CAST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW("years" VARCHAR, "degrees" BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW("years" VARCHAR, "degrees" BIGINT))] AS ARRAY(ROW("years" VARCHAR, "degrees" BIGINT))) ) ) ) AS "t1" diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 8b55c189e9d6b..0e14ff315a9dd 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -31,6 +31,7 @@ PySparkAnalysisException, TrinoUserError, ) +from ibis.common.annotations import ValidationError from ibis.common.collections import frozendict pytestmark = [ @@ -72,6 +73,85 @@ # list. +def test_array_factory(con): + a = ibis.array([1, 2, 3]) + assert a.type() == dt.Array(value_type=dt.Int8) + assert con.execute(a) == [1, 2, 3] + + a2 = ibis.array(a) + assert a.type() == dt.Array(value_type=dt.Int8) + assert con.execute(a2) == [1, 2, 3] + + +@pytest.mark.broken( + ["pandas", "dask"], + raises=AssertionError, + reason="results in [1, 2, 3]", +) +def test_array_factory_typed(con): + typed = ibis.array([1, 2, 3], type="array") + assert con.execute(typed) == ["1", "2", "3"] + + typed2 = ibis.array(ibis.array([1, 2, 3]), type="array") + assert con.execute(typed2) == ["1", "2", "3"] + + +@pytest.mark.notimpl("flink", raises=Py4JJavaError) +@pytest.mark.notimpl(["pandas", "dask"], raises=ValueError) +def test_array_factory_empty(con): + with pytest.raises(ValidationError): + ibis.array([]) + + empty_typed = ibis.array([], type="array") + assert empty_typed.type() == dt.Array(value_type=dt.string) + assert con.execute(empty_typed) == [] + + +@pytest.mark.notyet( + "clickhouse", raises=ClickHouseDatabaseError, reason="nested types can't be NULL" +) +@pytest.mark.notyet( + "flink", raises=Py4JJavaError, reason="Parameters must be of the same type" +) +def test_array_factory_null(con): + with pytest.raises(ValidationError): + ibis.array(None) + with pytest.raises(ValidationError): + ibis.array(None, type="int64") + none_typed = ibis.array(None, type="array") + assert none_typed.type() == dt.Array(value_type=dt.string) + assert con.execute(none_typed) is None + + nones = ibis.array([None, None], type="array") + assert nones.type() == dt.Array(value_type=dt.string) + assert con.execute(nones) == [None, None] + + # Execute a real value here, so the backends that don't support arrays + # actually xfail as we expect them to. + # Otherwise would have to @mark.xfail every test in this file besides this one. + assert con.execute(ibis.array([1, 2])) == [1, 2] + + +@pytest.mark.broken( + ["datafusion", "flink", "polars"], + raises=AssertionError, + reason="[None, 1] executes to [np.nan, 1.0]", +) +@pytest.mark.broken( + ["pandas", "dask"], + raises=AssertionError, + reason="even with explicit cast, results in [None, 1]", +) +def test_array_factory_null_mixed(con): + none_and_val = ibis.array([None, 1]) + assert none_and_val.type() == dt.Array(value_type=dt.Int8) + assert con.execute(none_and_val) == [None, 1] + + none_and_val_typed = ibis.array([None, 1], type="array") + assert none_and_val_typed.type() == dt.Array(value_type=dt.String) + assert con.execute(none_and_val_typed) == [None, "1"] + + def test_array_column(backend, alltypes, df): expr = ibis.array( [alltypes["double_col"], alltypes["double_col"], 5.0, ibis.literal(6.0)] @@ -1354,11 +1434,6 @@ def test_unnest_range(con): id="array", marks=[ pytest.mark.notyet(["bigquery"], raises=GoogleBadRequest), - pytest.mark.broken( - ["polars"], - reason="expression input not supported with nested arrays", - raises=TypeError, - ), ], ), ], diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 3ed4a9db8cc5e..9f9a5ce5e4c96 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1431,13 +1431,12 @@ def query(t, group_cols): snapshot.assert_match(str(ibis.to_sql(t3, dialect=con.name)), "out.sql") -@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["druid"], raises=AssertionError) @pytest.mark.notyet( - ["datafusion", "impala", "mssql", "mysql", "sqlite"], + ["datafusion", "exasol", "impala", "mssql", "mysql", "oracle", "sqlite"], reason="backend doesn't support arrays and we don't implement pivot_longer with unions yet", - raises=com.OperationNotDefinedError, + raises=(com.OperationNotDefinedError, com.UnsupportedBackendType), ) +@pytest.mark.notimpl(["druid"], raises=AssertionError) @pytest.mark.broken( ["trino"], reason="invalid code generated for unnesting a struct", diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 777cfa3db8bb3..657790c709cf9 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -31,7 +31,11 @@ marks=[ pytest.mark.never( ["impala", "mysql", "sqlite", "mssql", "exasol"], - raises=(NotImplementedError, exc.UnsupportedBackendType), + raises=( + exc.OperationNotDefinedError, + NotImplementedError, + exc.UnsupportedBackendType, + ), reason="structs not supported in the backend", ), pytest.mark.notimpl( @@ -104,7 +108,7 @@ def test_isin_bug(con, snapshot): @pytest.mark.notyet( ["datafusion", "exasol", "oracle", "flink", "risingwave"], reason="no unnest support", - raises=exc.OperationNotDefinedError, + raises=(exc.OperationNotDefinedError, exc.UnsupportedBackendType), ) @pytest.mark.notyet( ["sqlite", "mysql", "druid", "impala", "mssql"], reason="no unnest support upstream" diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index ceb9fdc77711b..0c7e16c9d7367 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -16,6 +16,7 @@ ClickHouseDatabaseError, OracleDatabaseError, PsycoPg2InternalError, + PyDruidProgrammingError, PyODBCProgrammingError, ) from ibis.common.annotations import ValidationError @@ -835,6 +836,11 @@ def test_capitalize(con, inp, expected): assert pd.isnull(result) +@pytest.mark.never( + ["exasol", "impala", "mssql", "mysql", "sqlite"], + reason="Backend doesn't support arrays", + raises=com.OperationNotDefinedError, +) @pytest.mark.notimpl( [ "dask", @@ -842,14 +848,14 @@ def test_capitalize(con, inp, expected): "polars", "oracle", "flink", - "sqlite", - "mssql", - "mysql", - "exasol", - "impala", ], raises=com.OperationNotDefinedError, ) +@pytest.mark.broken( + "druid", + raises=PyDruidProgrammingError, + reason="ibis.array() has a cast, and we compile the dtype to 'VARCHAR[] instead of 'ARRAY' as needed", +) def test_array_string_join(con): s = ibis.array(["a", "b", "c"]) expected = "a,b,c" diff --git a/ibis/expr/operations/arrays.py b/ibis/expr/operations/arrays.py index 6d68baab94c32..8cdcb63392283 100644 --- a/ibis/expr/operations/arrays.py +++ b/ibis/expr/operations/arrays.py @@ -19,14 +19,15 @@ class Array(Value): """Construct an array.""" exprs: VarTuple[Value] + dtype: Optional[dt.Array] = None - @attribute - def shape(self): - return rlz.highest_precedence_shape(self.exprs) + shape = rlz.shape_like("exprs") - @attribute - def dtype(self): - return dt.Array(rlz.highest_precedence_dtype(self.exprs)) + def __init__(self, exprs, dtype: dt.Array | None = None): + # If len(exprs) == 0, the caller is responsible for providing a dtype + if dtype is None: + dtype = dt.Array(rlz.highest_precedence_dtype(exprs)) + super().__init__(exprs=exprs, dtype=dtype) @public diff --git a/ibis/expr/rules.py b/ibis/expr/rules.py index 0c865297889f4..5f681cab8ec80 100644 --- a/ibis/expr/rules.py +++ b/ibis/expr/rules.py @@ -5,6 +5,7 @@ from public import public +import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis import util @@ -16,6 +17,9 @@ @public def highest_precedence_shape(nodes): + nodes = tuple(nodes) + if len(nodes) == 0: + return ds.scalar return max(node.shape for node in nodes) diff --git a/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt index fbda1a87cc5fc..480a404803c71 100644 --- a/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_format_dummy_table/repr.txt @@ -1,2 +1,2 @@ DummyTable - foo: Array([1]) \ No newline at end of file + foo: Array(exprs=[1], dtype=array) \ No newline at end of file diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 2d9e5a8f5b3a3..1e56a7c792f2f 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -5,14 +5,16 @@ from public import public +import ibis.expr.datatypes as dt import ibis.expr.operations as ops +import ibis.expr.types as ir +from ibis.common.annotations import ValidationError from ibis.common.deferred import Deferred, deferrable from ibis.expr.types.generic import Column, Scalar, Value if TYPE_CHECKING: from collections.abc import Callable, Iterable - import ibis.expr.types as ir from ibis.expr.types.typing import V import ibis.common.exceptions as com @@ -1067,7 +1069,11 @@ def __getitem__(self, index: int | ir.IntegerValue | slice) -> ir.Column: @public @deferrable -def array(values: Iterable[V]) -> ArrayValue: +def array( + values: ArrayValue | Iterable[V] | ir.NullValue | None, + *, + type: str | dt.DataType | None = None, +) -> ArrayValue: """Create an array expression. If any values are [column expressions](../concepts/datatypes.qmd) the @@ -1078,6 +1084,9 @@ def array(values: Iterable[V]) -> ArrayValue: ---------- values An iterable of Ibis expressions or Python literals + type + An instance of `ibis.expr.datatypes.DataType` or a string indicating + the Ibis type of `value`. eg `array`. Returns ------- @@ -1099,7 +1108,7 @@ def array(values: Iterable[V]) -> ArrayValue: >>> t = ibis.memtable({"a": [1, 2, 3], "b": [4, 5, 6]}) >>> ibis.array([t.a, 42, ibis.literal(None)]) ┏━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Array() ┃ + ┃ Array(Array) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ │ array │ ├──────────────────────┤ @@ -1108,15 +1117,37 @@ def array(values: Iterable[V]) -> ArrayValue: │ [3, 42, ... +1] │ └──────────────────────┘ - >>> ibis.array([t.a, 42 + ibis.literal(5)]) - ┏━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Array() ┃ - ┡━━━━━━━━━━━━━━━━━━━━━━┩ - │ array │ - ├──────────────────────┤ - │ [1, 47] │ - │ [2, 47] │ - │ [3, 47] │ - └──────────────────────┘ + >>> ibis.array([t.a, 42 + ibis.literal(5)], type="array") + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Cast(Array(Array), array) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├────────────────────────────────────┤ + │ [1.0, 47.0] │ + │ [2.0, 47.0] │ + │ [3.0, 47.0] │ + └────────────────────────────────────┘ """ - return ops.Array(tuple(values)).to_expr() + type = dt.dtype(type) if type is not None else None + if type is not None and not isinstance(type, dt.Array): + raise ValidationError(f"type must be an array, got {type}") + + if isinstance(values, ir.Value): + if type is not None: + return values.cast(type) + elif isinstance(values, ArrayValue): + return values + else: + raise ValidationError( + f"If no type passed, values must be an array, got {values.type()}" + ) + + if values is None: + if type is None: + raise ValidationError("If values is None/NULL, type must be provided") + return ir.null(type) + + values = tuple(values) + if len(values) == 0 and type is None: + raise ValidationError("If values is empty, type must be provided") + return ops.Array(values, type).to_expr()