From 703785c6b89e9be7c096d5dd9b636185af5a36db Mon Sep 17 00:00:00 2001 From: Yingchun Lai <405403881@qq.com> Date: Tue, 22 Dec 2020 23:46:00 +0800 Subject: [PATCH] [Perf] Merge delete conditions to improve performance --- be/src/olap/delete_handler.cpp | 36 +- be/src/olap/delete_handler.h | 6 + be/src/olap/olap_cond.cpp | 518 ++++++++++++++++-- be/src/olap/olap_cond.h | 64 ++- be/src/olap/reader.h | 2 +- be/src/olap/rowset/segment_reader.cpp | 18 +- .../rowset/segment_v2/segment_iterator.cpp | 2 +- be/test/olap/CMakeLists.txt | 1 + be/test/olap/conditions_test.cpp | 241 ++++++++ 9 files changed, 790 insertions(+), 98 deletions(-) create mode 100644 be/test/olap/conditions_test.cpp diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 94fca294c9d60d..7e3f593156468a 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -27,6 +27,7 @@ #include #include +#include "common/logging.h" #include "gen_cpp/olap_file.pb.h" #include "olap/olap_common.h" #include "olap/olap_cond.h" @@ -224,11 +225,28 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio return true; } +void DeleteHandler::_merge_del_conds() { + _merged_del_conds.filter_version = _version; + _merged_del_conds.del_cond = new (std::nothrow) Conditions(); + CHECK(_merged_del_conds.del_cond != nullptr) << "fail to malloc Conditions. size=" << sizeof(Conditions); + _merged_del_conds.del_cond->set_tablet_schema(_schema); + + for (const auto& del_cond : _del_conds) { + DCHECK_LE(del_cond.filter_version, _version); + _merged_del_conds_valid = _merged_del_conds.del_cond->merge_del_condition(del_cond.del_cond->columns()); + if (!_merged_del_conds_valid) { + break; + } + } +} + OLAPStatus DeleteHandler::init(const TabletSchema& schema, const DelPredicateArray& delete_conditions, int64_t version) { DCHECK(!_is_inited) << "reinitialize delete handler."; DCHECK(version >= 0) << "invalid parameters. version=" << version; + _version = version; + _schema = &schema; for (const auto& delete_condition : delete_conditions) { // 跳过版本号大于version的过滤条件 if (delete_condition.version() > version) { @@ -279,16 +297,25 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema, _del_conds.push_back(temp); } + _merge_del_conds(); + _is_inited = true; return OLAP_SUCCESS; } bool DeleteHandler::is_filter_data(const int64_t data_version, const RowCursor& row) const { - // 根据语义,存储在_del_conds的删除条件应该是OR关系 - // 因此,只要数据符合其中一条过滤条件,则返回true + // Return true when this row can be filtered by _merged_del_conds + if (_merged_del_conds_valid && data_version <= _merged_del_conds.filter_version && + _merged_del_conds.del_cond->delete_conditions_eval(row)) { + return true; + } + + // DeleteConditions in _del_conds are in 'OR' relationship, + // return true when this row could be filtered by any DeleteConditions for (const auto& del_cond : _del_conds) { - if (data_version <= del_cond.filter_version && del_cond.del_cond->delete_conditions_eval(row)) { + if (data_version <= del_cond.filter_version && + del_cond.del_cond->delete_conditions_eval(row)) { return true; } } @@ -298,6 +325,7 @@ bool DeleteHandler::is_filter_data(const int64_t data_version, const RowCursor& std::vector DeleteHandler::get_conds_version() { std::vector conds_version; + conds_version.reserve(_del_conds.size()); for (const auto& cond : _del_conds) { conds_version.push_back(cond.filter_version); } @@ -309,6 +337,8 @@ void DeleteHandler::finalize() { return; } + delete _merged_del_conds.del_cond; + _merged_del_conds.del_cond = nullptr; for (auto& cond : _del_conds) { cond.del_cond->finalize(); delete cond.del_cond; diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index aea828dd12a674..f9c6ab77e52ed3 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -134,9 +134,15 @@ class DeleteHandler { // Use regular expression to extract 'column_name', 'op' and 'operands' bool _parse_condition(const std::string& condition_str, TCondition* condition); + void _merge_del_conds(); + bool _is_inited = false; + int64_t _version = 0; + const TabletSchema* _schema = nullptr; // DeleteConditions in _del_conds are in 'OR' relationship std::vector _del_conds; + bool _merged_del_conds_valid = true; + DeleteConditions _merged_del_conds; DISALLOW_COPY_AND_ASSIGN(DeleteHandler); }; diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp index 231da7be21afa9..25ffd3bdf164a8 100644 --- a/be/src/olap/olap_cond.cpp +++ b/be/src/olap/olap_cond.cpp @@ -87,15 +87,6 @@ static CondOp parse_op_type(const string& op) { return OP_NULL; } -Cond::~Cond() { - delete operand_field; - for (auto& it : operand_set) { - delete it; - } - min_value_field = nullptr; - max_value_field = nullptr; -} - OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { // Parse op type op = parse_op_type(tcond.condition_op); @@ -108,7 +99,7 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { // 'is null' or 'is not null' DCHECK_EQ(tcond.condition_values.size(), 1); auto operand = tcond.condition_values.begin(); - std::unique_ptr f(WrapperField::create(column, operand->length())); + std::shared_ptr f(WrapperField::create(column, operand->length())); if (f == nullptr) { OLAP_LOG_WARNING("Create field failed. [name=%s, operand=%s, op_type=%d]", tcond.column_name.c_str(), operand->c_str(), op); @@ -119,11 +110,11 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { } else { f->set_not_null(); } - operand_field = f.release(); + operand_field = f; } else if (op != OP_IN && op != OP_NOT_IN) { DCHECK_EQ(tcond.condition_values.size(), 1); auto operand = tcond.condition_values.begin(); - std::unique_ptr f(WrapperField::create(column, operand->length())); + std::shared_ptr f(WrapperField::create(column, operand->length())); if (f == nullptr) { OLAP_LOG_WARNING("Create field failed. [name=%s, operand=%s, op_type=%d]", tcond.column_name.c_str(), operand->c_str(), op); @@ -135,12 +126,12 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { tcond.column_name.c_str(), operand->c_str(), op); return res; } - operand_field = f.release(); + operand_field = f; } else { DCHECK(op == OP_IN || op == OP_NOT_IN); DCHECK(!tcond.condition_values.empty()); for (auto& operand : tcond.condition_values) { - std::unique_ptr f(WrapperField::create(column, operand.length())); + std::shared_ptr f(WrapperField::create(column, operand.length())); if (f == nullptr) { OLAP_LOG_WARNING("Create field failed. [name=%s, operand=%s, op_type=%d]", tcond.column_name.c_str(), operand.c_str(), op); @@ -152,21 +143,17 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { tcond.column_name.c_str(), operand.c_str(), op); return res; } - if (min_value_field == nullptr || f->cmp(min_value_field) < 0) { - min_value_field = f.get(); + if (min_value_field == nullptr || f->cmp(min_value_field.get()) < 0) { + min_value_field = f; } - if (max_value_field == nullptr || f->cmp(max_value_field) > 0) { - max_value_field = f.get(); + if (max_value_field == nullptr || f->cmp(max_value_field.get()) > 0) { + max_value_field = f; } - auto insert_result = operand_set.insert(f.get()); + auto insert_result = operand_set.insert(f); if (!insert_result.second) { LOG(WARNING) << "Duplicate operand in in-predicate.[condition=" << operand << "]"; - // Duplicated, let std::unique_ptr delete field - } else { - // Normal case, release this std::unique_ptr - f.release(); } } } @@ -174,6 +161,383 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { return OLAP_SUCCESS; } +OLAPStatus Cond::intersection_cond(const Cond& other) { + DCHECK(op == other.op || + (op == OP_NOT_IN && other.op == OP_NE) || + (op == OP_NE && other.op == OP_NOT_IN) || + (op == OP_IN && other.op == OP_EQ) || + (op == OP_EQ && other.op == OP_IN) || + (op == OP_LT && other.op == OP_LE) || + (op == OP_LE && other.op == OP_LT) || + (op == OP_GT && other.op == OP_GE) || + (op == OP_GE && other.op == OP_GT)) << "op: " << op << ", other.op: " << other.op; + switch (op) { + case OP_EQ: + if (other.op == OP_EQ) { + if (operand_field->field()->compare_cell(*operand_field, *(other.operand_field)) != 0) { + // No intersection, all not satisfied + op = OP_NULL; + return OLAP_SUCCESS; + } + } else { + DCHECK_EQ(other.op, OP_IN) << "op: " << op << ", other.op: " << other.op; + if (other.operand_set.find(operand_field) == other.operand_set.end()) { + // No intersection, all not satisfied + op = OP_NULL; + return OLAP_SUCCESS; + } + } + return OLAP_SUCCESS; + case OP_NE: + if (other.op == OP_NE) { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (cmp != 0) { + // Transfer to OP_NOT_IN if they OP_NE to two different values + op = OP_NOT_IN; + operand_set.insert(operand_field); + operand_set.insert(other.operand_field); + min_value_field = cmp < 0 ? operand_field : other.operand_field; + max_value_field = cmp > 0 ? operand_field : other.operand_field; + // Invalidate operand_field after transferring to operand_set + operand_field = nullptr; + } + } else { + DCHECK_EQ(other.op, OP_NOT_IN) << "op: " << op << ", other.op: " << other.op; + if (other.operand_set.size() == 1 && + operand_field->field()->compare_cell(*operand_field, *(other.min_value_field)) == 0) { + // Do nothing if the other's only one value equal to operand_field + return OLAP_SUCCESS; + } + // Transfer to OP_NOT_IN otherwise + op = OP_NOT_IN; + operand_set = other.operand_set; + min_value_field = other.min_value_field; + max_value_field = other.max_value_field; + + if (operand_set.find(operand_field) != operand_set.end()) { + // Exist a same value in operand_set, do nothing but release and invalidate operand_field + operand_field = nullptr; + return OLAP_SUCCESS; + } + + // Insert and update min & max + operand_set.insert(operand_field); + if (operand_field->field()->compare_cell(*operand_field, *(min_value_field)) < 0) { + min_value_field = operand_field; + } + if (operand_field->field()->compare_cell(*operand_field, *(max_value_field)) > 0) { + max_value_field = operand_field; + } + + // Invalidate operand_field after inserting to operand_set + operand_field = nullptr; + } + return OLAP_SUCCESS; + case OP_LT: + case OP_LE: { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (op == other.op) { + if (cmp > 0) { + operand_field = other.operand_field; + } + return OLAP_SUCCESS; + } + if (cmp == 0) { + op = OP_LT; + } + return OLAP_SUCCESS; + } + case OP_GT: + case OP_GE: { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (op == other.op) { + if (cmp < 0) { + operand_field = other.operand_field; + } + return OLAP_SUCCESS; + } + if (cmp == 0) { + op = OP_GT; + } + return OLAP_SUCCESS; + } + case OP_IN: + if (other.op == OP_IN) { + for (auto operand = operand_set.begin(); operand != operand_set.end();) { + if (other.operand_set.find(*operand) == other.operand_set.end()) { + // Not in other's operand_set, invalidate and release it + operand = operand_set.erase(operand); + } else { + ++operand; + } + } + if (operand_set.empty()) { + // No intersection, all not satisfied + op = OP_NULL; + return OLAP_SUCCESS; + } + + min_value_field = nullptr; + max_value_field = nullptr; + if (operand_set.size() == 1) { + // Transfer to OP_EQ + op = OP_EQ; + operand_field = *operand_set.begin(); + operand_set.clear(); + return OLAP_SUCCESS; + } + + // Update min & max + for (const auto& operand : operand_set) { + if (min_value_field == nullptr || operand->field()->compare_cell(*min_value_field, *(operand)) > 0) { + min_value_field = operand; + } + if (max_value_field == nullptr || operand->field()->compare_cell(*max_value_field, *(operand)) < 0) { + max_value_field = operand; + } + } + } else { + DCHECK_EQ(other.op, OP_EQ) << "op: " << op << ", other.op: " << other.op; + if (operand_set.find(other.operand_field) == operand_set.end()) { + // No intersection, all not satisfied + op = OP_NULL; + return OLAP_SUCCESS; + } + + // Transfer to OP_EQ + op = OP_EQ; + operand_field = other.operand_field; + + // Invalidate + operand_set.clear(); + min_value_field = nullptr; + max_value_field = nullptr; + } + return OLAP_SUCCESS; + case OP_NOT_IN: + if (other.op == OP_NOT_IN) { + // Update min & max + if (min_value_field->field()->compare_cell(*min_value_field, *(other.min_value_field)) > 0) { + min_value_field = other.min_value_field; + } + if (max_value_field->field()->compare_cell(*max_value_field, *(other.max_value_field)) < 0) { + max_value_field = other.max_value_field; + } + // Update operand_set + operand_set.insert(other.operand_set.begin(), other.operand_set.end()); + } else { + DCHECK_EQ(other.op, OP_NE) << "op: " << op << ", other.op: " << other.op; + if (operand_set.find(other.operand_field) != operand_set.end()) { + // Exist a same value in operand_set, do nothing but release and invalidate this operand + return OLAP_SUCCESS; + } + + // Update min & max + if (other.operand_field->field()->compare_cell(*min_value_field, *(other.operand_field)) > 0) { + min_value_field = other.operand_field; + } + if (other.operand_field->field()->compare_cell(*max_value_field, *(other.operand_field)) < 0) { + max_value_field = other.operand_field; + } + + // Update operand_set + operand_set.insert(other.operand_field); + } + return OLAP_SUCCESS; + case OP_IS: + if (operand_field->is_null() != other.operand_field->is_null()) { + // No intersection, all not satisfied + op = OP_NULL; + return OLAP_SUCCESS; + } + return OLAP_SUCCESS; + default: + op = OP_ALL;; + return OLAP_ERR_READER_INITIALIZE_ERROR; + } +} + +OLAPStatus Cond::union_cond(const Cond& other) { + DCHECK(op == other.op || + (op == OP_NOT_IN && other.op == OP_NE) || + (op == OP_NE && other.op == OP_NOT_IN) || + (op == OP_IN && other.op == OP_EQ) || + (op == OP_EQ && other.op == OP_IN) || + (op == OP_LT && other.op == OP_LE) || + (op == OP_LE && other.op == OP_LT) || + (op == OP_GT && other.op == OP_GE) || + (op == OP_GE && other.op == OP_GT)) << "op: " << op << ", other.op: " << other.op; + switch (op) { + case OP_EQ: + if (other.op == OP_EQ) { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (cmp != 0) { + // Transfer to OP_IN if they OP_EQ to two different values + op = OP_IN; + operand_set.insert(operand_field); + operand_set.insert(other.operand_field); + min_value_field = cmp < 0 ? operand_field : other.operand_field; + max_value_field = cmp > 0 ? operand_field : other.operand_field; + // Invalidate operand_field after transferring to operand_set + operand_field = nullptr; + } + } else { + DCHECK_EQ(other.op, OP_IN) << "op: " << op << ", other.op: " << other.op; + // Transfer to OP_IN + op = OP_IN; + operand_set = other.operand_set; + min_value_field = other.min_value_field; + max_value_field = other.max_value_field; + + if (operand_set.find(operand_field) == operand_set.end()) { + // Insert and update min & max + operand_set.insert(operand_field); + if (operand_field->field()->compare_cell(*operand_field, *(min_value_field)) < 0) { + min_value_field = operand_field; + } + if (operand_field->field()->compare_cell(*operand_field, *(max_value_field)) > 0) { + max_value_field = operand_field; + } + } + operand_field = nullptr; + } + return OLAP_SUCCESS; + case OP_NE: + if (other.op == OP_NE) { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (cmp != 0) { + // All satisfied + op = OP_ALL; + operand_field = nullptr; + } + } else { + DCHECK_EQ(other.op, OP_NOT_IN) << "op: " << op << ", other.op: " << other.op; + if (other.operand_set.find(operand_field) == other.operand_set.end()) { + // All satisfied + op = OP_ALL; + operand_field = nullptr; + } + } + return OLAP_SUCCESS; + case OP_LT: + case OP_LE: { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (op == other.op) { + if (cmp < 0) { + operand_field = other.operand_field; + } + return OLAP_SUCCESS; + } + if (cmp == 0) { + op = OP_LE; + } + return OLAP_SUCCESS; + } + case OP_GT: + case OP_GE: { + int cmp = operand_field->field()->compare_cell(*operand_field, *(other.operand_field)); + if (op == other.op) { + if (cmp > 0) { + operand_field = other.operand_field; + } + return OLAP_SUCCESS; + } + if (cmp == 0) { + op = OP_GE; + } + return OLAP_SUCCESS; + } + case OP_IN: + if (other.op == OP_IN) { + for (const auto& operand : other.operand_set) { + if (operand_set.find(operand) == operand_set.end()) { + operand_set.insert(operand); + if (operand->field()->compare_cell(*min_value_field, *operand) > 0) { + min_value_field = operand; + } + if (operand->field()->compare_cell(*max_value_field, *operand) < 0) { + max_value_field = operand; + } + } + } + } else { + DCHECK_EQ(other.op, OP_EQ) << "op: " << op << ", other.op: " << other.op; + if (operand_set.find(other.operand_field) == operand_set.end()) { + operand_set.insert(other.operand_field); + if (other.operand_field->field()->compare_cell(*min_value_field, *(other.operand_field)) > 0) { + min_value_field = other.operand_field; + } + if (other.operand_field->field()->compare_cell(*max_value_field, *(other.operand_field)) < 0) { + max_value_field = other.operand_field; + } + } + } + return OLAP_SUCCESS; + case OP_NOT_IN: + if (other.op == OP_NOT_IN) { + for (auto operand = operand_set.begin(); operand != operand_set.end();) { + if (other.operand_set.find(*operand) != other.operand_set.end()) { + ++operand; + } else { + operand = operand_set.erase(operand); + } + } + min_value_field = nullptr; + max_value_field = nullptr; + if (operand_set.empty()) { + // All satisfied + op = OP_ALL; + return OLAP_SUCCESS; + } + + if (operand_set.size() == 1) { + // Transfer to OP_NE + op = OP_NE; + operand_field = *operand_set.begin(); + operand_set.clear(); + return OLAP_SUCCESS; + } + + // Update min & max + for (const auto& operand : operand_set) { + if (min_value_field == nullptr || operand->field()->compare_cell(*min_value_field, *(operand)) > 0) { + min_value_field = operand; + } + if (max_value_field == nullptr || operand->field()->compare_cell(*max_value_field, *(operand)) < 0) { + max_value_field = operand; + } + } + } else { + DCHECK_EQ(other.op, OP_NE) << "op: " << op << ", other.op: " << other.op; + min_value_field = nullptr; + max_value_field = nullptr; + if (operand_set.find(other.operand_field) == operand_set.end()) { + // All satisfied + op = OP_ALL; + operand_set.clear(); + return OLAP_SUCCESS; + } + + // Transfer to OP_NE + op = OP_NE; + operand_field = other.operand_field; + operand_set.clear(); + } + return OLAP_SUCCESS; + case OP_IS: + if (operand_field->is_null() != other.operand_field->is_null()) { + // All satisfied + op = OP_ALL; + operand_field = nullptr; + return OLAP_SUCCESS; + } + return OLAP_SUCCESS; + default: + op = OP_ALL;; + return OLAP_ERR_READER_INITIALIZE_ERROR; + } +} + bool Cond::eval(const RowCursorCell& cell) const { if (cell.is_null() && op != OP_IS) { //任何非OP_IS operand和NULL的运算都是false @@ -194,15 +558,15 @@ bool Cond::eval(const RowCursorCell& cell) const { case OP_GE: return operand_field->field()->compare_cell(*operand_field, cell) <= 0; case OP_IN: { - WrapperField wrapperField(const_cast(min_value_field->field()), cell); - auto ret = operand_set.find(&wrapperField) != operand_set.end(); - wrapperField.release_field(); + auto wrapperField = std::make_shared(const_cast(min_value_field->field()), cell); + auto ret = operand_set.find(wrapperField) != operand_set.end(); + wrapperField->release_field(); return ret; } case OP_NOT_IN: { - WrapperField wrapperField(const_cast(min_value_field->field()), cell); - auto ret = operand_set.find(&wrapperField) == operand_set.end(); - wrapperField.release_field(); + auto wrapperField = std::make_shared(const_cast(min_value_field->field()), cell); + auto ret = operand_set.find(wrapperField) == operand_set.end(); + wrapperField->release_field(); return ret; } case OP_IS: { @@ -350,7 +714,7 @@ int Cond::del_eval(const std::pair& stat) const { } case OP_IN: { if (stat.first->cmp(stat.second) == 0) { - if (operand_set.find(stat.first) != operand_set.end()) { + if (operand_set.find(std::shared_ptr(stat.first, [](WrapperField*){})) != operand_set.end()) { ret = DEL_SATISFIED; } else { ret = DEL_NOT_SATISFIED; @@ -366,7 +730,7 @@ int Cond::del_eval(const std::pair& stat) const { } case OP_NOT_IN: { if (stat.first->cmp(stat.second) == 0) { - if (operand_set.find(stat.first) == operand_set.end()) { + if (operand_set.find(std::shared_ptr(stat.first, [](WrapperField*){})) == operand_set.end()) { ret = DEL_SATISFIED; } else { ret = DEL_NOT_SATISFIED; @@ -492,23 +856,39 @@ bool Cond::eval(const segment_v2::BloomFilter* bf) const { return true; } -CondColumn::~CondColumn() { - for (auto& it : _conds) { - delete it; - } -} - // PRECONDITION 1. index is valid; 2. at least has one operand OLAPStatus CondColumn::add_cond(const TCondition& tcond, const TabletColumn& column) { - std::unique_ptr cond(new Cond()); + auto cond = std::make_shared(); auto res = cond->init(tcond, column); if (res != OLAP_SUCCESS) { return res; } - _conds.push_back(cond.release()); + _conds.push_back(cond); return OLAP_SUCCESS; } +void CondColumn::merge_cond(const CondColumn& cond_col) { + DCHECK_EQ(_is_key, cond_col._is_key); + DCHECK_EQ(_col_index, cond_col._col_index); + + for (auto& cond1 : _conds) { + for (const auto& cond2 : cond_col._conds) { + if ((cond1->op == cond2->op) || + (cond1->op == OP_NOT_IN && cond2->op == OP_NE) || + (cond1->op == OP_NE && cond2->op == OP_NOT_IN) || + (cond1->op == OP_IN && cond2->op == OP_EQ) || + (cond1->op == OP_EQ && cond2->op == OP_IN) || + (cond1->op == OP_LT && cond2->op == OP_LE) || + (cond1->op == OP_LE && cond2->op == OP_LT) || + (cond1->op == OP_GT && cond2->op == OP_GE) || + (cond1->op == OP_GE && cond2->op == OP_GT)) { + CHECK_EQ(cond1->union_cond(*cond2), OLAP_SUCCESS); + break; + } + } + } +} + bool CondColumn::eval(const RowCursor& row) const { auto cell = row.cell(_col_index); for (auto& each_cond : _conds) { @@ -601,10 +981,10 @@ OLAPStatus Conditions::append_condition(const TCondition& tcond) { } CondColumn* cond_col = nullptr; - auto it = _columns.find(index); - if (it == _columns.end()) { + auto it = _cond_cols.find(index); + if (it == _cond_cols.end()) { cond_col = new CondColumn(*_schema, index); - _columns[index] = cond_col; + _cond_cols[index] = cond_col; } else { cond_col = it->second; } @@ -612,19 +992,42 @@ OLAPStatus Conditions::append_condition(const TCondition& tcond) { return cond_col->add_cond(tcond, column); } +bool Conditions::merge_del_condition(const CondColumns& cond_cols) { + if (cond_cols.size() > 1) { + // Only support to merge on single column + return false; + } + for (const auto& cond_col : cond_cols) { + int32_t index = cond_col.first; + auto it = _cond_cols.find(index); + if (it == _cond_cols.end()) { + if (!_cond_cols.empty()) { + // Only support to merge on the same column + return false; + } + CondColumn* new_cond_col = new CondColumn(*_schema, index); + new_cond_col->_conds = cond_col.second->conds(); + _cond_cols[index] = new_cond_col; + } else { + it->second->merge_cond(*cond_col.second); + } + } + return true; +} + bool Conditions::delete_conditions_eval(const RowCursor& row) const { - if (_columns.empty()) { + if (_cond_cols.empty()) { return false; } - for (auto& each_cond : _columns) { - if (_cond_column_is_key_or_duplicate(each_cond.second) && !each_cond.second->eval(row)) { + for (const auto& cond_col : _cond_cols) { + if (_cond_column_is_key_or_duplicate(cond_col.second) && !cond_col.second->eval(row)) { return false; } } VLOG_NOTICE << "Row meets the delete conditions. " - << "condition_count=" << _columns.size() << ", row=" << row.to_string(); + << "condition_count=" << _cond_cols.size() << ", row=" << row.to_string(); return true; } @@ -632,10 +1035,10 @@ bool Conditions::rowset_pruning_filter(const std::vector& zone_maps) c // ZoneMap will store min/max of rowset. // The function is to filter rowset using ZoneMaps // and query predicates. - for (auto& cond_it : _columns) { - if (_cond_column_is_key_or_duplicate(cond_it.second)) { - if (cond_it.first < zone_maps.size() && - !cond_it.second->eval(zone_maps.at(cond_it.first))) { + for (const auto& cond_col : _cond_cols) { + if (_cond_column_is_key_or_duplicate(cond_col.second)) { + if (cond_col.first < zone_maps.size() && + !cond_col.second->eval(zone_maps.at(cond_col.first))) { return true; } } @@ -644,9 +1047,10 @@ bool Conditions::rowset_pruning_filter(const std::vector& zone_maps) c } int Conditions::delete_pruning_filter(const std::vector& zone_maps) const { - if (_columns.empty()) { + if (_cond_cols.empty()) { return DEL_NOT_SATISFIED; } + // ZoneMap and DeletePredicate are all stored in TabletMeta. // This function is to filter rowset using ZoneMap and Delete Predicate. /* @@ -658,19 +1062,19 @@ int Conditions::delete_pruning_filter(const std::vector& zone_maps) co int ret = DEL_NOT_SATISFIED; bool del_partial_satisfied = false; bool del_not_satisfied = false; - for (auto& cond_it : _columns) { + for (auto& cond_col : _cond_cols) { /* * this is base on the assumption that the delete condition * is only about key field, not about value field except the storage model is duplicate. */ - if (_cond_column_is_key_or_duplicate(cond_it.second) && cond_it.first > zone_maps.size()) { + if (_cond_column_is_key_or_duplicate(cond_col.second) && cond_col.first > zone_maps.size()) { LOG(WARNING) << "where condition not equal column statistics size. " - << "cond_id=" << cond_it.first << ", zone_map_size=" << zone_maps.size(); + << "cond_id=" << cond_col.first << ", zone_map_size=" << zone_maps.size(); del_partial_satisfied = true; continue; } - int del_ret = cond_it.second->del_eval(zone_maps.at(cond_it.first)); + int del_ret = cond_col.second->del_eval(zone_maps.at(cond_col.first)); if (DEL_SATISFIED == del_ret) { continue; } else if (DEL_PARTIAL_SATISFIED == del_ret) { @@ -692,8 +1096,8 @@ int Conditions::delete_pruning_filter(const std::vector& zone_maps) co } CondColumn* Conditions::get_column(int32_t cid) const { - auto iter = _columns.find(cid); - if (iter != _columns.end()) { + auto iter = _cond_cols.find(cid); + if (iter != _cond_cols.end()) { return iter->second; } return nullptr; diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h index 54d699e51f60b8..a799fb63c8aa86 100644 --- a/be/src/olap/olap_cond.h +++ b/be/src/olap/olap_cond.h @@ -38,27 +38,30 @@ class WrapperField; class RowCursorCell; enum CondOp { - OP_NULL = -1, // invalid op - OP_EQ = 0, // equal - OP_NE = 1, // not equal - OP_LT = 2, // less than - OP_LE = 3, // less or equal - OP_GT = 4, // greater than - OP_GE = 5, // greater or equal - OP_IN = 6, // in - OP_IS = 7, // is null or not null - OP_NOT_IN = 8 // not in + OP_NULL = -1, // invalid op + OP_EQ = 0, // equal + OP_NE = 1, // not equal + OP_LT = 2, // less than + OP_LE = 3, // less or equal + OP_GT = 4, // greater than + OP_GE = 5, // greater or equal + OP_IN = 6, // in + OP_IS = 7, // is null or not null + OP_NOT_IN = 8, // not in + OP_ALL = 100 // all satisfied }; // Hash functor for IN set struct FieldHash { - size_t operator()(const WrapperField* field) const { return field->hash_code(); } + size_t operator()(const std::shared_ptr& field) const { + return field->hash_code(); + } }; // Equal function for IN set struct FieldEqual { - bool operator()(const WrapperField* left, const WrapperField* right) const { - return left->cmp(right) == 0; + bool operator()(const std::shared_ptr& left, const std::shared_ptr& right) const { + return left->cmp(right.get()) == 0; } }; @@ -66,10 +69,13 @@ struct FieldEqual { struct Cond { public: Cond() = default; - ~Cond(); OLAPStatus init(const TCondition& tcond, const TabletColumn& column); + OLAPStatus intersection_cond(const Cond& other); + + OLAPStatus union_cond(const Cond& other); + // 用一行数据的指定列同条件进行比较,如果符合过滤条件, // 即按照此条件,行应被过滤掉,则返回true,否则返回false bool eval(const RowCursorCell& cell) const; @@ -86,13 +92,13 @@ struct Cond { CondOp op = OP_NULL; // valid when op is not OP_IN and OP_NOT_IN - WrapperField* operand_field = nullptr; + std::shared_ptr operand_field; // valid when op is OP_IN or OP_NOT_IN - typedef std::unordered_set FieldSet; + typedef std::unordered_set, FieldHash, FieldEqual> FieldSet; FieldSet operand_set; // valid when op is OP_IN or OP_NOT_IN, represents the minimum or maximum value of in elements - WrapperField* min_value_field = nullptr; - WrapperField* max_value_field = nullptr; + std::shared_ptr min_value_field; + std::shared_ptr max_value_field; }; // 所有归属于同一列上的条件二元组,聚合在一个CondColumn上 @@ -101,9 +107,9 @@ class CondColumn { CondColumn(const TabletSchema& tablet_schema, int32_t index) : _col_index(index) { _is_key = tablet_schema.column(_col_index).is_key(); } - ~CondColumn(); OLAPStatus add_cond(const TCondition& tcond, const TabletColumn& column); + void merge_cond(const CondColumn& cond_col); // 对一行数据中的指定列,用所有过滤条件进行比较,如果所有条件都满足,则过滤此行 // Return true means this row should be filtered out, otherwise return false @@ -134,7 +140,7 @@ class CondColumn { inline bool is_key() const { return _is_key; } - const std::vector& conds() const { return _conds; } + const std::vector>& conds() const { return _conds; } private: friend class Conditions; @@ -142,7 +148,7 @@ class CondColumn { bool _is_key = false; int32_t _col_index = 0; // Conds in _conds are in 'AND' relationship - std::vector _conds; + std::vector> _conds; }; // 一次请求所关联的条件 @@ -156,10 +162,10 @@ class Conditions { ~Conditions() { finalize(); } void finalize() { - for (auto& it : _columns) { + for (auto& it : _cond_cols) { delete it.second; } - _columns.clear(); + _cond_cols.clear(); } // TODO(yingchun): should do it in constructor @@ -170,7 +176,10 @@ class Conditions { // 1. column不属于key列 // 2. column类型是double, float OLAPStatus append_condition(const TCondition& condition); - + + // Now only support to merge on a same single column + bool merge_del_condition(const CondColumns& cond_cols); + // 通过所有列上的删除条件对RowCursor进行过滤 // Return true means this row should be filtered out, otherwise return false bool delete_conditions_eval(const RowCursor& row) const; @@ -181,7 +190,7 @@ class Conditions { // Whether the rowset satisfied delete condition int delete_pruning_filter(const std::vector& zone_maps) const; - const CondColumns& columns() const { return _columns; } + const CondColumns& columns() const { return _cond_cols; } CondColumn* get_column(int32_t cid) const; @@ -192,8 +201,9 @@ class Conditions { private: const TabletSchema* _schema = nullptr; - // CondColumns in _index_conds are in 'AND' relationship - CondColumns _columns; // list of condition column + + // CondColumns in _cond_cols are in 'AND' relationship + CondColumns _cond_cols; // list of condition column DISALLOW_COPY_AND_ASSIGN(Conditions); }; diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 9da3006252a4eb..28f28ccce67307 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -182,7 +182,7 @@ class Reader { bool _need_agg_finalize = true; ReaderType _reader_type = READER_QUERY; bool _next_delete_flag = false; - bool _filter_delete = false; + bool _filter_delete = false; // Whether to delete the filtered rows bool _has_sequence_col = false; int32_t _sequence_col_idx = -1; const RowCursor* _next_key = nullptr; diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 114b704e7608fd..2d9ff778acae5b 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -394,14 +394,14 @@ OLAPStatus SegmentReader::_pick_delete_row_groups(uint32_t first_block, uint32_t bool del_partial_satisfied = false; bool del_not_satisfied = false; - for (auto& i : delete_condition.del_cond->columns()) { - ColumnId table_column_id = i.first; + for (const auto& col_cond : delete_condition.del_cond->columns()) { + ColumnId table_column_id = col_cond.first; ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id]; if (0 == _unique_id_to_segment_id_map.count(unique_column_id)) { continue; } StreamIndexReader* index_reader = _indices[unique_column_id]; - int del_ret = i.second->del_eval(index_reader->entry(j).column_statistic().pair()); + int del_ret = col_cond.second->del_eval(index_reader->entry(j).column_statistic().pair()); if (DEL_SATISFIED == del_ret) { continue; } else if (DEL_PARTIAL_SATISFIED == del_ret) { @@ -412,7 +412,7 @@ OLAPStatus SegmentReader::_pick_delete_row_groups(uint32_t first_block, uint32_t } } - if (true == del_not_satisfied || 0 == delete_condition.del_cond->columns().size()) { + if (true == del_not_satisfied || delete_condition.del_cond->columns().empty()) { //if state is DEL_PARTIAL_SATISFIED last_time, cannot be set as DEL_NOT_SATISFIED //it is special for for delete condition if (DEL_PARTIAL_SATISFIED == _include_blocks[j]) { @@ -471,21 +471,21 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b _pick_delete_row_groups(first_block, last_block); - if (NULL == _conditions || _conditions->columns().size() == 0) { + if (NULL == _conditions || _conditions->columns().empty()) { return OLAP_SUCCESS; } OlapStopWatch timer; timer.reset(); - for (auto& i : _conditions->columns()) { - FieldAggregationMethod aggregation = _get_aggregation_by_index(i.first); + for (const auto& cond_col : _conditions->columns()) { + FieldAggregationMethod aggregation = _get_aggregation_by_index(cond_col.first); bool is_continue = (aggregation == OLAP_FIELD_AGGREGATION_NONE); if (!is_continue) { continue; } - ColumnId table_column_id = i.first; + ColumnId table_column_id = cond_col.first; ColumnId unique_column_id = _tablet_id_to_unique_id_map[table_column_id]; if (0 == _unique_id_to_segment_id_map.count(unique_column_id)) { continue; @@ -496,7 +496,7 @@ OLAPStatus SegmentReader::_pick_row_groups(uint32_t first_block, uint32_t last_b continue; } - if (!i.second->eval(index_reader->entry(j).column_statistic().pair())) { + if (!cond_col.second->eval(index_reader->entry(j).column_statistic().pair())) { _include_blocks[j] = DEL_SATISFIED; --_remain_block; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 1519f62e36b041..a82172567f3f61 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -262,7 +262,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row // final filter data with delete conditions for (auto& delete_condition : _opts.delete_conditions) { RowRanges delete_condition_row_ranges = RowRanges::create_single(0); - for (auto& delete_column_condition : delete_condition->columns()) { + for (const auto& delete_column_condition : delete_condition->columns()) { const int32_t cid = delete_column_condition.first; CondColumn* column_cond = nullptr; if (_opts.conditions != nullptr) { diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index ffc4507971ca19..db2538237394d3 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -50,6 +50,7 @@ ADD_BE_TEST(decimal12_test) ADD_BE_TEST(column_vector_test) ADD_BE_TEST(storage_types_test) ADD_BE_TEST(aggregate_func_test) +ADD_BE_TEST(conditions_test) ADD_BE_TEST(rowset/segment_v2/bitshuffle_page_test) ADD_BE_TEST(rowset/segment_v2/plain_page_test) ADD_BE_TEST(rowset/segment_v2/binary_plain_page_test) diff --git a/be/test/olap/conditions_test.cpp b/be/test/olap/conditions_test.cpp new file mode 100644 index 00000000000000..a85c02655a2b5a --- /dev/null +++ b/be/test/olap/conditions_test.cpp @@ -0,0 +1,241 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "olap/olap_cond.h" +#include "olap/tablet_schema_helper.h" + +namespace doris { + +class CondTest : public testing::Test { +public: + CondTest() { + _tablet_column = create_int_key(_col); + } + + void SetUp() override { + CpuInfo::init(); + } + + Cond create_cond(const std::string& op, const std::vector& operands) { + TCondition tcond; + tcond.__set_column_name(std::to_string(_col)); + tcond.__set_condition_op(op); + tcond.__set_condition_values(operands); + + Cond cond; + cond.init(tcond, _tablet_column); + + return cond; + } + + void check_operand(Cond cond, CondOp op, const std::string& operand) { + if (op == OP_ALL) { + ASSERT_EQ(cond.operand_field, nullptr); + ASSERT_EQ(cond.operand_set.size(), 0); + ASSERT_EQ(cond.min_value_field, nullptr); + ASSERT_EQ(cond.max_value_field, nullptr); + } else if (op == OP_IS) { + ASSERT_NE(cond.operand_field, nullptr); + ASSERT_EQ(operand == "NULL", cond.operand_field->is_null()); + } else { + ASSERT_NE(cond.operand_field, nullptr); + ASSERT_EQ(operand, cond.operand_field->to_string()); + } + } + + void check_operands(Cond cond, const std::set& operands) { + ASSERT_EQ(cond.operand_field, nullptr); + ASSERT_NE(cond.min_value_field, nullptr); + ASSERT_NE(cond.max_value_field, nullptr); + ASSERT_EQ(cond.min_value_field->to_string(), *(operands.begin())); + ASSERT_EQ(cond.max_value_field->to_string(), *(operands.rbegin())); + ASSERT_EQ(cond.operand_set.size(), operands.size()); + for (const auto& operand : cond.operand_set) { + ASSERT_EQ(operands.count(operand->to_string()), 1); + } + } + + void calc_and_check(bool is_intersection, + Cond cond1, Cond cond2, CondOp op, + const std::string& operand = "", + const std::set& operands = {}) { + if (is_intersection) { + ASSERT_EQ(cond1.intersection_cond(cond2), OLAP_SUCCESS); + } else { + ASSERT_EQ(cond1.union_cond(cond2), OLAP_SUCCESS); + } + ASSERT_EQ(cond1.op, op); + if (op != OP_NULL) { + if (op == OP_IN || op == OP_NOT_IN) { + check_operands(cond1, operands); + } else { + check_operand(cond1, op, operand); + } + } + } + +private: + static const int32_t _col = 0; + TabletColumn _tablet_column; +}; + +TEST_F(CondTest, InitTest) { +} + +TEST_F(CondTest, IntersectionCondsTest) { + struct { + std::string op1; + std::vector operands1; + std::string op2; + std::vector operands2; + CondOp result_op; + std::string result_operand; + std::set result_operands; + } test_cases[] = {{"=", {"1"}, "=", {"1"}, OP_EQ, "1", {}}, + {"=", {"1"}, "=", {"2"}, OP_NULL, "", {}}, + {"=", {"1"}, "*=", {"1", "2", "3"}, OP_EQ, "1", {}}, + {"=", {"1"}, "*=", {"2", "3"}, OP_NULL, "", {}}, + + {"!=", {"1"}, "!=", {"1"}, OP_NE, "1", {}}, + {"!=", {"1"}, "!=", {"2"}, OP_NOT_IN, "", {"1", "2"}}, + {"!=", {"1"}, "!*=", {"1"}, OP_NE, "1", {}}, + {"!=", {"1"}, "!*=", {"1", "2", "3"}, OP_NOT_IN, "", {"1", "2", "3"}}, + {"!=", {"1"}, "!*=", {"2", "3"}, OP_NOT_IN, "", {"1", "2", "3"}}, + {"!=", {"3"}, "!*=", {"1", "2"}, OP_NOT_IN, "", {"1", "2", "3"}}, + + {"<", {"1"}, "<", {"1"}, OP_LT, "1", {}}, + {"<", {"1"}, "<", {"2"}, OP_LT, "1", {}}, + {"<", {"2"}, "<", {"1"}, OP_LT, "1", {}}, + {"<", {"1"}, "<=", {"1"}, OP_LT, "1", {}}, + + {"<=", {"1"}, "<=", {"1"}, OP_LE, "1", {}}, + {"<=", {"1"}, "<=", {"2"}, OP_LE, "1", {}}, + {"<=", {"2"}, "<=", {"1"}, OP_LE, "1", {}}, + {"<=", {"1"}, "<", {"1"}, OP_LT, "1", {}}, + + {">", {"1"}, ">", {"1"}, OP_GT, "1", {}}, + {">", {"1"}, ">", {"2"}, OP_GT, "2", {}}, + {">", {"2"}, ">", {"1"}, OP_GT, "2", {}}, + {">", {"1"}, ">=", {"1"}, OP_GT, "1", {}}, + + {">=", {"1"}, ">=", {"1"}, OP_GE, "1", {}}, + {">=", {"1"}, ">=", {"2"}, OP_GE, "2", {}}, + {">=", {"2"}, ">=", {"1"}, OP_GE, "2", {}}, + {">=", {"1"}, ">", {"1"}, OP_GT, "1", {}}, + + {"*=", {"1"}, "*=", {"2"}, OP_NULL, "", {}}, + {"*=", {"1", "2", "3"}, "*=", {"2"}, OP_EQ, "2", {}}, + {"*=", {"1", "2", "3"}, "*=", {"2", "3"}, OP_IN, "", {"2", "3"}}, + {"*=", {"1"}, "=", {"2"}, OP_NULL, "", {}}, + {"*=", {"1", "2"}, "=", {"1"}, OP_EQ, "1", {}}, + + {"!*=", {"1", "2"}, "!*=", {"1", "3"}, OP_NOT_IN, "", {"1", "2", "3"}}, + {"!*=", {"1", "2", "3"}, "!=", {"2"}, OP_NOT_IN, "", {"1", "2", "3"}}, + {"!*=", {"1", "2"}, "!=", {"3"}, OP_NOT_IN, "", {"1", "2", "3"}}, + + {"is", {"NULL"}, "is", {"NOT NULL"}, OP_NULL, "", {}}, + {"is", {"NOT NULL"}, "is", {"NULL"}, OP_NULL, "", {}}, + {"is", {"NULL"}, "is", {"NULL"}, OP_IS, "NULL", {}}, + {"is", {"NOT NULL"}, "is", {"NOT NULL"}, OP_IS, "NOT NULL", {}}}; + + int i = 0; + for (const auto &test : test_cases) { + ASSERT_NO_FATAL_FAILURE(calc_and_check(true, + create_cond(test.op1, test.operands1), + create_cond(test.op2, test.operands2), + test.result_op, + test.result_operand, + test.result_operands)) << "error index: " << i; + ++i; + } +} + +TEST_F(CondTest, UnionCondsTest) { + struct { + std::string op1; + std::vector operands1; + std::string op2; + std::vector operands2; + CondOp result_op; + std::string result_operand; + std::set result_operands; + } test_cases[] = {{"=", {"1"}, "=", {"1"}, OP_EQ, "1", {}}, + {"=", {"1"}, "=", {"2"}, OP_IN, "", {"1", "2"}}, + {"=", {"1"}, "*=", {"1", "2"}, OP_IN, "", {"1", "2"}}, + {"=", {"1"}, "*=", {"2", "3"}, OP_IN, "", {"1", "2", "3"}}, + + {"!=", {"1"}, "!=", {"1"}, OP_NE, "1", {}}, + {"!=", {"1"}, "!=", {"2"}, OP_ALL, "", {}}, + {"!=", {"1"}, "!*=", {"2", "3"}, OP_ALL, "", {}}, + {"!=", {"1"}, "!*=", {"1", "2"}, OP_NE, "1", {}}, + + {"<", {"1"}, "<", {"1"}, OP_LT, "1", {}}, + {"<", {"1"}, "<", {"2"}, OP_LT, "2", {}}, + {"<", {"2"}, "<", {"1"}, OP_LT, "2", {}}, + {"<", {"1"}, "<=", {"1"}, OP_LE, "1", {}}, + + {"<=", {"1"}, "<=", {"1"}, OP_LE, "1", {}}, + {"<=", {"1"}, "<=", {"2"}, OP_LE, "2", {}}, + {"<=", {"2"}, "<=", {"1"}, OP_LE, "2", {}}, + {"<=", {"1"}, "<", {"1"}, OP_LE, "1", {}}, + + {">", {"1"}, ">", {"1"}, OP_GT, "1", {}}, + {">", {"1"}, ">", {"2"}, OP_GT, "1", {}}, + {">", {"2"}, ">", {"1"}, OP_GT, "1", {}}, + {">", {"1"}, ">=", {"1"}, OP_GE, "1", {}}, + + {">=", {"1"}, ">=", {"1"}, OP_GE, "1", {}}, + {">=", {"1"}, ">=", {"2"}, OP_GE, "1", {}}, + {">=", {"2"}, ">=", {"1"}, OP_GE, "1", {}}, + {">=", {"1"}, ">", {"1"}, OP_GE, "1", {}}, + + {"*=", {"1"}, "*=", {"2"}, OP_IN, "", {"1", "2"}}, + {"*=", {"1", "2", "3"}, "*=", {"2", "4"}, OP_IN, "", {"1", "2", "3", "4"}}, + {"*=", {"1", "2", "3"}, "=", {"2"}, OP_IN, "", {"1", "2", "3"}}, + {"*=", {"2", "3"}, "=", {"1"}, OP_IN, "", {"1", "2", "3"}}, + {"*=", {"1", "2"}, "=", {"3"}, OP_IN, "", {"1", "2", "3"}}, + + {"!*=", {"1"}, "!*=", {"2"}, OP_ALL, "", {}}, + {"!*=", {"1", "2", "3"}, "!*=", {"2", "4"}, OP_NE, "2", {}}, + {"!*=", {"1", "2", "3"}, "!*=", {"2", "3", "4"}, OP_NOT_IN, "", {"2", "3"}}, + {"!*=", {"1", "2"}, "!=", {"3"}, OP_ALL, "", {}}, + {"!*=", {"1", "2"}, "!=", {"1"}, OP_NE, "1", {}}, + + {"is", {"NULL"}, "is", {"NOT NULL"}, OP_ALL, "", {}}, + {"is", {"NOT NULL"}, "is", {"NULL"}, OP_ALL, "", {}}, + {"is", {"NULL"}, "is", {"NULL"}, OP_IS, "NULL", {}}, + {"is", {"NOT NULL"}, "is", {"NOT NULL"}, OP_IS, "NOT NULL", {}}}; + int i = 0; + for (const auto &test : test_cases) { + ASSERT_NO_FATAL_FAILURE(calc_and_check(false, + create_cond(test.op1, test.operands1), + create_cond(test.op2, test.operands2), + test.result_op, + test.result_operand, + test.result_operands)) << "error index: " << i; + ++i; + } +} + +} // namespace doris + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}