Skip to content

Commit

Permalink
refactor: rename remaining unique -> distinct instances
Browse files Browse the repository at this point in the history
  • Loading branch information
f4t4nt committed Jan 28, 2025
1 parent c22087e commit 71f24f1
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 28 deletions.
2 changes: 1 addition & 1 deletion daft/daft/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ def dt_truncate(expr: PyExpr, interval: str, relative_to: PyExpr) -> PyExpr: ...
# ---
def explode(expr: PyExpr) -> PyExpr: ...
def list_sort(expr: PyExpr, desc: PyExpr, nulls_first: PyExpr) -> PyExpr: ...
def list_unique(expr: PyExpr, ignore_nulls: bool) -> PyExpr: ...
def list_distinct(expr: PyExpr, ignore_nulls: bool) -> PyExpr: ...
def list_value_counts(expr: PyExpr) -> PyExpr: ...
def list_join(expr: PyExpr, delimiter: PyExpr) -> PyExpr: ...
def list_count(expr: PyExpr, mode: CountMode) -> PyExpr: ...
Expand Down
4 changes: 2 additions & 2 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
from daft.daft import date_lit as _date_lit
from daft.daft import decimal_lit as _decimal_lit
from daft.daft import duration_lit as _duration_lit
from daft.daft import list_distinct as _list_distinct
from daft.daft import list_sort as _list_sort
from daft.daft import list_unique as _list_unique
from daft.daft import lit as _lit
from daft.daft import series_lit as _series_lit
from daft.daft import time_lit as _time_lit
Expand Down Expand Up @@ -3264,7 +3264,7 @@ def distinct(self, ignore_nulls: bool = True) -> Expression:
Returns:
Expression: An expression with lists containing only unique elements
"""
return Expression._from_pyexpr(_list_unique(self._expr, ignore_nulls))
return Expression._from_pyexpr(_list_distinct(self._expr, ignore_nulls))


class ExpressionStructNamespace(ExpressionNamespace):
Expand Down
2 changes: 1 addition & 1 deletion src/daft-core/src/series/ops/agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ impl Series {
}

pub fn count_distinct(&self, groups: Option<&GroupIndices>) -> DaftResult<Self> {
let series = self.agg_list(groups)?.list_unique_count()?;
let series = self.agg_list(groups)?.list_distinct_count()?;
Ok(series)
}

Expand Down
2 changes: 1 addition & 1 deletion src/daft-core/src/series/ops/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ impl Series {
/// ```txt
/// [[1, 2, 3], [1, 1, 1], [NULL, NULL, 5]] -> [3, 1, 1]
/// ```
pub fn list_unique_count(&self) -> DaftResult<Self> {
pub fn list_distinct_count(&self) -> DaftResult<Self> {
let field = Field::new(self.name(), DataType::UInt64);
match self.data_type() {
DataType::List(..) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ use daft_dsl::{
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct ListUnique {
pub struct ListDistinct {
ignore_nulls: bool,
}

#[typetag::serde]
impl ScalarUDF for ListUnique {
impl ScalarUDF for ListDistinct {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &'static str {
"list_unique"
"list_distinct"
}

fn to_field(&self, inputs: &[ExprRef], schema: &Schema) -> DaftResult<Field> {
Expand Down Expand Up @@ -136,6 +136,6 @@ impl ScalarUDF for ListUnique {
///
/// When ignore_nulls is true (default), nulls are excluded from the result.
/// When ignore_nulls is false, nulls are included in the result.
pub fn list_unique(expr: ExprRef, ignore_nulls: bool) -> ExprRef {
ScalarFunction::new(ListUnique { ignore_nulls }, vec![expr]).into()
pub fn list_distinct(expr: ExprRef, ignore_nulls: bool) -> ExprRef {
ScalarFunction::new(ListDistinct { ignore_nulls }, vec![expr]).into()
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ use daft_dsl::{
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct ListUniqueCount;
pub struct ListDistinctCount;

#[typetag::serde]
impl ScalarUDF for ListUniqueCount {
impl ScalarUDF for ListDistinctCount {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &'static str {
"list_unique_count"
"list_distinct_count"
}

fn to_field(&self, inputs: &[ExprRef], schema: &Schema) -> DaftResult<Field> {
Expand All @@ -39,7 +39,7 @@ impl ScalarUDF for ListUniqueCount {

fn evaluate(&self, inputs: &[Series]) -> DaftResult<Series> {
match inputs {
[input] => input.list_unique_count(),
[input] => input.list_distinct_count(),
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input arg, got {}",
inputs.len()
Expand All @@ -49,6 +49,6 @@ impl ScalarUDF for ListUniqueCount {
}

#[must_use]
pub fn list_unique_count(expr: ExprRef) -> ExprRef {
ScalarFunction::new(ListUniqueCount, vec![expr]).into()
pub fn list_distinct_count(expr: ExprRef) -> ExprRef {
ScalarFunction::new(ListDistinctCount, vec![expr]).into()
}
8 changes: 4 additions & 4 deletions src/daft-functions/src/list/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod chunk;
mod count;
mod distinct;
mod distinct_count;
mod explode;
mod get;
mod join;
Expand All @@ -10,12 +12,12 @@ mod min;
mod slice;
mod sort;
mod sum;
mod unique;
mod unique_count;
mod value_counts;

pub use chunk::{list_chunk as chunk, ListChunk};
pub use count::{list_count as count, ListCount};
pub use distinct::{list_distinct as distinct, ListDistinct};
pub use distinct_count::{list_distinct_count as distinct_count, ListDistinctCount};
pub use explode::{explode, Explode};
pub use get::{list_get as get, ListGet};
pub use join::{list_join as join, ListJoin};
Expand All @@ -26,6 +28,4 @@ pub use min::{list_min as min, ListMin};
pub use slice::{list_slice as slice, ListSlice};
pub use sort::{list_sort as sort, ListSort};
pub use sum::{list_sum as sum, ListSum};
pub use unique::{list_unique as unique, ListUnique};
pub use unique_count::{list_unique_count as unique_count, ListUniqueCount};
pub use value_counts::list_value_counts as value_counts;
6 changes: 3 additions & 3 deletions src/daft-functions/src/python/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use daft_dsl::python::PyExpr;
use pyo3::{pyfunction, PyResult};

simple_python_wrapper!(list_chunk, crate::list::chunk, [expr: PyExpr, size: usize]);
simple_python_wrapper!(list_unique_count, crate::list::unique_count, [expr: PyExpr]);
simple_python_wrapper!(list_distinct_count, crate::list::distinct_count, [expr: PyExpr]);
simple_python_wrapper!(list_count, crate::list::count, [expr: PyExpr, mode: CountMode]);
simple_python_wrapper!(explode, crate::list::explode, [expr: PyExpr]);
simple_python_wrapper!(list_get, crate::list::get, [expr: PyExpr, idx: PyExpr, default_value: PyExpr]);
Expand All @@ -17,8 +17,8 @@ simple_python_wrapper!(list_value_counts, crate::list::value_counts, [expr: PyEx

#[pyfunction]
#[pyo3(signature = (expr, ignore_nulls=true))]
pub fn list_unique(expr: PyExpr, ignore_nulls: bool) -> PyResult<PyExpr> {
Ok(crate::list::unique(expr.into(), ignore_nulls).into())
pub fn list_distinct(expr: PyExpr, ignore_nulls: bool) -> PyResult<PyExpr> {
Ok(crate::list::distinct(expr.into(), ignore_nulls).into())
}

#[pyfunction]
Expand Down
4 changes: 2 additions & 2 deletions src/daft-functions/src/python/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ pub fn register(parent: &Bound<PyModule>) -> PyResult<()> {
add!(list::list_slice);
add!(list::list_sort);
add!(list::list_sum);
add!(list::list_unique_count);
add!(list::list_distinct_count);
add!(list::list_value_counts);
add!(list::list_unique);
add!(list::list_distinct);

add!(misc::to_struct);
add!(misc::utf8_count_matches);
Expand Down
7 changes: 4 additions & 3 deletions src/daft-physical-plan/src/physical_planner/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use daft_dsl::{
ApproxPercentileParams, Expr, ExprRef, SketchType,
};
use daft_functions::{
list::{unique, unique_count},
list::{distinct, distinct_count},
numeric::sqrt,
};
use daft_logical_plan::{
Expand Down Expand Up @@ -929,7 +929,7 @@ pub fn populate_aggregation_stages(
);

// Final projection
let result = unique_count(col(list_concat_id.clone())).alias(output_name);
let result = distinct_count(col(list_concat_id.clone())).alias(output_name);
final_exprs.push(result);
}
AggExpr::Sum(e) => {
Expand Down Expand Up @@ -1107,7 +1107,8 @@ pub fn populate_aggregation_stages(
schema,
&mut second_stage_aggs,
);
let result = unique(col(list_concat_id.clone()), *ignore_nulls).alias(output_name);
let result =
distinct(col(list_concat_id.clone()), *ignore_nulls).alias(output_name);
final_exprs.push(result);
}
AggExpr::Concat(e) => {
Expand Down

0 comments on commit 71f24f1

Please sign in to comment.