Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] MaxBy/MinBy not filter nulls #51354

Merged
merged 1 commit into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion be/src/exec/analytor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,13 @@ Status Analytor::prepare(RuntimeState* state, ObjectPool* pool, RuntimeProfile*
real_fn_name += "_in";
_need_partition_materializing = true;
}
func = get_window_function(real_fn_name, arg_type.type, return_type.type, is_input_nullable, fn.binary_type,
const auto& fname = fn.name.function_name;
auto real_arg_type = arg_type.type;
satanson marked this conversation as resolved.
Show resolved Hide resolved
if (fname == "max_by" || fname == "min_by" || fname == "max_by_v2" || fname == "min_by_v2") {
const TypeDescriptor arg1_type = TypeDescriptor::from_thrift(fn.arg_types[1]);
real_arg_type = arg1_type.type;
}
func = get_window_function(real_fn_name, real_arg_type, return_type.type, is_input_nullable, fn.binary_type,
state->func_version());
if (func == nullptr) {
return Status::InternalError(strings::Substitute(
Expand Down
3 changes: 2 additions & 1 deletion be/src/exprs/agg/factory/aggregate_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ const AggregateFunction* get_aggregate_function(const std::string& agg_func_name

// Because max_by and min_by function have two input types,
// so we use its second arguments type as input.
if (agg_func_name == "max_by" || agg_func_name == "min_by") {
if (agg_func_name == "max_by" || agg_func_name == "min_by" || agg_func_name == "max_by_v2" ||
agg_func_name == "min_by_v2") {
arg_type = arg_types[1];
}

Expand Down
16 changes: 8 additions & 8 deletions be/src/exprs/agg/factory/aggregate_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ class AggregateFactory {
template <LogicalType LT>
static auto MakeMaxAggregateFunction();

template <LogicalType LT>
template <LogicalType LT, bool not_filter_nulls>
static auto MakeMaxByAggregateFunction();

template <LogicalType LT>
template <LogicalType LT, bool not_filter_nulls>
static auto MakeMinByAggregateFunction();

template <LogicalType LT>
Expand Down Expand Up @@ -303,16 +303,16 @@ auto AggregateFactory::MakeMaxAggregateFunction() {
return std::make_shared<MaxMinAggregateFunction<LT, MaxAggregateData<LT>, MaxElement<LT, MaxAggregateData<LT>>>>();
}

template <LogicalType LT>
template <LogicalType LT, bool not_filter_nulls>
auto AggregateFactory::MakeMaxByAggregateFunction() {
return std::make_shared<
MaxMinByAggregateFunction<LT, MaxByAggregateData<LT>, MaxByElement<LT, MaxByAggregateData<LT>>>>();
using AggData = MaxByAggregateData<LT, not_filter_nulls>;
return std::make_shared<MaxMinByAggregateFunction<LT, AggData, MaxByElement<LT, AggData>>>();
}

template <LogicalType LT>
template <LogicalType LT, bool not_filter_nulls>
auto AggregateFactory::MakeMinByAggregateFunction() {
return std::make_shared<
MaxMinByAggregateFunction<LT, MinByAggregateData<LT>, MinByElement<LT, MinByAggregateData<LT>>>>();
using AggData = MinByAggregateData<LT, not_filter_nulls>;
return std::make_shared<MaxMinByAggregateFunction<LT, AggData, MinByElement<LT, AggData>>>();
}

template <LogicalType LT>
Expand Down
12 changes: 8 additions & 4 deletions be/src/exprs/agg/factory/aggregate_resolver_minmaxany.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,15 @@ struct MaxMinByDispatcherInner {
if constexpr ((lt_is_aggregate<arg_type> || lt_is_json<arg_type>)&&(lt_is_aggregate<ret_type> ||
lt_is_json<ret_type>)) {
if constexpr (is_max_by) {
resolver->add_aggregate_mapping_variadic<arg_type, ret_type, MaxByAggregateData<arg_type>>(
"max_by", true, AggregateFactory::MakeMaxByAggregateFunction<arg_type>());
resolver->add_aggregate_mapping_notnull<arg_type, ret_type>(
"max_by", true, AggregateFactory::MakeMaxByAggregateFunction<arg_type, false>());
resolver->add_aggregate_mapping_notnull<arg_type, ret_type>(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why using add_aggregate_mapping_notnull? What will happen if y is a nullable column for max_by(x, y) ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, it seems that nullable columns are handled by MaxBy itself now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, null is acceptable output values, so max_by/min_by agg cannot be wrapped by NullableAggFunction

"max_by_v2", true, AggregateFactory::MakeMaxByAggregateFunction<arg_type, true>());
} else {
resolver->add_aggregate_mapping_variadic<arg_type, ret_type, MinByAggregateData<arg_type>>(
"min_by", true, AggregateFactory::MakeMinByAggregateFunction<arg_type>());
resolver->add_aggregate_mapping_notnull<arg_type, ret_type>(
"min_by", true, AggregateFactory::MakeMinByAggregateFunction<arg_type, false>());
resolver->add_aggregate_mapping_notnull<arg_type, ret_type>(
"min_by_v2", true, AggregateFactory::MakeMinByAggregateFunction<arg_type, true>());
satanson marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Expand Down
Loading
Loading