From c44da7bb8dc6c5db777a98f6232108a357ed5725 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Sat, 26 Dec 2020 06:59:34 -0500 Subject: [PATCH 01/20] Implementation of limit for index scans --- src/execution/compiler/codegen.cpp | 16 ++- .../operator/index_join_translator.cpp | 15 ++- .../compiler/operator/index_join_translator.h | 3 +- src/include/optimizer/logical_operators.h | 56 ++++++++++ src/include/optimizer/physical_operators.h | 49 ++++++++- src/include/optimizer/rule.h | 3 + .../optimizer/rules/transformation_rules.h | 74 +++++++++++++ .../planner/plannodes/index_join_plan_node.h | 21 ++++ .../planner/plannodes/plan_node_defs.h | 3 + src/optimizer/index_util.cpp | 35 ++++-- src/optimizer/optimizer.cpp | 1 + src/optimizer/physical_operators.cpp | 12 ++- src/optimizer/plan_generator.cpp | 26 ++++- src/optimizer/rule.cpp | 3 + src/optimizer/rules/implementation_rules.cpp | 102 ++++++++++++------ src/optimizer/rules/transformation_rules.cpp | 56 ++++++++++ 16 files changed, 417 insertions(+), 58 deletions(-) diff --git a/src/execution/compiler/codegen.cpp b/src/execution/compiler/codegen.cpp index 78ccdadfa0..11c104b499 100644 --- a/src/execution/compiler/codegen.cpp +++ b/src/execution/compiler/codegen.cpp @@ -428,27 +428,37 @@ ast::Expr *CodeGen::IndexIteratorScan(ast::Identifier iter, planner::IndexScanTy ast::Builtin builtin; bool asc_scan = false; bool use_limit = false; + bool limit_unusable = true; storage::index::ScanType asc_type; switch (scan_type) { case planner::IndexScanType::Exact: + // Exact scan does not take advantage of limit now, but it can builtin = ast::Builtin::IndexIteratorScanKey; break; + case planner::IndexScanType::AscendingClosedLimit: case planner::IndexScanType::AscendingClosed: + case planner::IndexScanType::AscendingOpenHighLimit: case planner::IndexScanType::AscendingOpenHigh: case planner::IndexScanType::AscendingOpenLow: case planner::IndexScanType::AscendingOpenBoth: - asc_scan = true; use_limit = true; + if (scan_type == planner::IndexScanType::AscendingOpenHighLimit || + scan_type == planner::IndexScanType::AscendingClosedLimit) + limit_unusable = false; + asc_scan = true; builtin = ast::Builtin::IndexIteratorScanAscending; - if (scan_type == planner::IndexScanType::AscendingClosed) + if (scan_type == planner::IndexScanType::AscendingClosed || + scan_type == planner::IndexScanType::AscendingClosedLimit) asc_type = storage::index::ScanType::Closed; - else if (scan_type == planner::IndexScanType::AscendingOpenHigh) + else if (scan_type == planner::IndexScanType::AscendingOpenHigh || + scan_type == planner::IndexScanType::AscendingOpenHighLimit) asc_type = storage::index::ScanType::OpenHigh; else if (scan_type == planner::IndexScanType::AscendingOpenLow) asc_type = storage::index::ScanType::OpenLow; else if (scan_type == planner::IndexScanType::AscendingOpenBoth) asc_type = storage::index::ScanType::OpenBoth; break; + // TODO(Deepayan): These cases are currently unreachable case planner::IndexScanType::Descending: builtin = ast::Builtin::IndexIteratorScanDescending; break; diff --git a/src/execution/compiler/operator/index_join_translator.cpp b/src/execution/compiler/operator/index_join_translator.cpp index 73b0bac96f..8c92497fc1 100644 --- a/src/execution/compiler/operator/index_join_translator.cpp +++ b/src/execution/compiler/operator/index_join_translator.cpp @@ -66,7 +66,10 @@ void IndexJoinTranslator::PerformPipelineWork(WorkContext *context, FunctionBuil // var hi_index_pr = @indexIteratorGetHiPR(&index_iter) DeclareIndexPR(function); // @prSet(lo_index_pr, ...) - FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns()); + // TODO(Deepayan): fix flag here + bool flag = true; + FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns(), flag); + //FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns()); // @prSet(hi_index_pr, ...) FillKey(context, function, hi_index_pr_, op.GetHiIndexColumns()); @@ -200,16 +203,22 @@ void IndexJoinTranslator::DeclareSlot(noisepage::execution::compiler::FunctionBu void IndexJoinTranslator::FillKey( WorkContext *context, FunctionBuilder *builder, ast::Identifier pr, - const std::unordered_map &index_exprs) const { + const std::unordered_map &index_exprs, + const bool flag) const { // For each key attribute, + uint32_t count = 0; for (const auto &key : index_exprs) { // @prSet(pr, type, nullable, attr, expr, true) uint16_t attr_offset = index_pm_.at(key.first); type::TypeId attr_type = index_schema_.GetColumn(key.first.UnderlyingValue() - 1).Type(); bool nullable = index_schema_.GetColumn(key.first.UnderlyingValue() - 1).Nullable(); + auto *val = index_exprs.size() == count ? GetCodeGen()->BinaryOp(parsing::Token::Type::PLUS, context->DeriveValue(*key.second.Get(), this), GetCodeGen()->Const32(1)) + : context->DeriveValue(*key.second.Get(), this); + auto *set_key_call = GetCodeGen()->PRSet(GetCodeGen()->MakeExpr(pr), attr_type, nullable, attr_offset, - context->DeriveValue(*key.second.Get(), this), true); + val, true); builder->Append(GetCodeGen()->MakeStmt(set_key_call)); + count++; } } diff --git a/src/include/execution/compiler/operator/index_join_translator.h b/src/include/execution/compiler/operator/index_join_translator.h index 439d650eb5..8ac6712cb4 100644 --- a/src/include/execution/compiler/operator/index_join_translator.h +++ b/src/include/execution/compiler/operator/index_join_translator.h @@ -63,7 +63,8 @@ class IndexJoinTranslator : public OperatorTranslator, public PipelineDriver { void DeclareIterator(FunctionBuilder *builder) const; void SetOids(FunctionBuilder *builder) const; void FillKey(WorkContext *context, FunctionBuilder *builder, ast::Identifier pr, - const std::unordered_map &index_exprs) const; + const std::unordered_map &index_exprs, + const bool flag = false) const; void FreeIterator(FunctionBuilder *builder) const; void DeclareIndexPR(FunctionBuilder *builder) const; void DeclareTablePR(FunctionBuilder *builder) const; diff --git a/src/include/optimizer/logical_operators.h b/src/include/optimizer/logical_operators.h index 1deb062a47..b80faab2a6 100644 --- a/src/include/optimizer/logical_operators.h +++ b/src/include/optimizer/logical_operators.h @@ -117,6 +117,24 @@ class LogicalGet : public OperatorNodeContents { */ bool GetIsForUpdate() const { return is_for_update_; } + /** + * Sets the limit + */ + void SetLimit(uint32_t limit) { + limit_exists_ = true; + limit_ = limit; + } + + /** + * @returns whether the limit exists + */ + bool GetLimitExists() { return limit_exists_; } + + /** + * @returns value of the limit + */ + uint32_t GetLimit() { return limit_; } + private: /** * OID of the database @@ -142,6 +160,16 @@ class LogicalGet : public OperatorNodeContents { * Whether the scan is used for update */ bool is_for_update_; + + /** + * Whether limit exists + */ + bool limit_exists_; + + /** + * Limit value for get + */ + uint32_t limit_; }; /** @@ -487,11 +515,39 @@ class LogicalInnerJoin : public OperatorNodeContents { */ const std::vector &GetJoinPredicates() const { return join_predicates_; } + /** + * Sets the limit + */ + void SetLimit(uint32_t limit) { + limit_exists_ = true; + limit_ = limit; + } + + /** + * @returns whether the limit exists + */ + bool GetLimitExists() { return limit_exists_; } + + /** + * @returns value of the limit + */ + uint32_t GetLimit() { return limit_; } + private: /** * Join predicates */ std::vector join_predicates_; + + /** + * Whether limit exists + */ + bool limit_exists_; + + /** + * Limit value for get + */ + uint32_t limit_; }; /** diff --git a/src/include/optimizer/physical_operators.h b/src/include/optimizer/physical_operators.h index dd2932f42b..e024b70266 100644 --- a/src/include/optimizer/physical_operators.h +++ b/src/include/optimizer/physical_operators.h @@ -143,12 +143,15 @@ class IndexScan : public OperatorNodeContents { * @param is_for_update whether the scan is used for update * @param scan_type IndexScanType * @param bounds Bounds for IndexScan + * @param limit_exists whether a limit exists + * @param limit value of the limit * @return an IndexScan operator */ static Operator Make(catalog::db_oid_t database_oid, catalog::table_oid_t tbl_oid, catalog::index_oid_t index_oid, std::vector &&predicates, bool is_for_update, planner::IndexScanType scan_type, - std::unordered_map> bounds); + std::unordered_map> bounds, + bool limit_exists = false, uint32_t limit = 0); /** * Copy @@ -197,6 +200,16 @@ class IndexScan : public OperatorNodeContents { return bounds_; } + /** + * @return whether the limit exists + */ + bool GetLimitExists() const { return limit_exists_; } + + /** + * @return value of the limit + */ + uint32_t GetLimit() const { return limit_; } + private: /** * OID of the database @@ -232,6 +245,16 @@ class IndexScan : public OperatorNodeContents { * Bounds */ std::unordered_map> bounds_; + + /** + * Whether limit exists + */ + bool limit_exists_; + + /** + * Limit value for get + */ + uint32_t limit_; }; /** @@ -467,11 +490,13 @@ class InnerIndexJoin : public OperatorNodeContents { * @param scan_type IndexScanType * @param join_keys Join Keys * @param join_predicates predicates for join + * @param limit_exists whether a limit exists + * @param limit value of the limit * @return an InnerIndexJoin operator */ static Operator Make(catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, planner::IndexScanType scan_type, std::unordered_map> join_keys, - std::vector join_predicates); + std::vector join_predicates,bool limit_exists = false, uint32_t limit = 0); /** * Copy @@ -510,6 +535,16 @@ class InnerIndexJoin : public OperatorNodeContents { */ const std::vector &GetJoinPredicates() const { return join_predicates_; } + /** + * @return whether the limit exists + */ + bool GetLimitExists() const { return limit_exists_; } + + /** + * @return value of the limit + */ + uint32_t GetLimit() const { return limit_; } + private: /** * Table OID @@ -535,6 +570,16 @@ class InnerIndexJoin : public OperatorNodeContents { * Predicates for join */ std::vector join_predicates_; + + /** +* Whether limit exists +*/ + bool limit_exists_; + + /** + * Limit value for get + */ + uint32_t limit_; }; /** diff --git a/src/include/optimizer/rule.h b/src/include/optimizer/rule.h index d8f0fba659..345d3ce613 100644 --- a/src/include/optimizer/rule.h +++ b/src/include/optimizer/rule.h @@ -17,6 +17,8 @@ enum class RuleType : uint32_t { // Transformation rules (logical -> logical) INNER_JOIN_COMMUTE = 0, INNER_JOIN_ASSOCIATE, + SET_LIMIT_IN_GET, + SET_LIMIT_IN_LOGICAL_INNER_JOIN, // Don't move this one LogicalPhysicalDelimiter, @@ -83,6 +85,7 @@ enum class RuleType : uint32_t { */ enum class RuleSetName : uint32_t { PREDICATE_PUSH_DOWN = 0, + CLAUSE_PUSH_DOWN, UNNEST_SUBQUERY, LOGICAL_TRANSFORMATION, PHYSICAL_IMPLEMENTATION diff --git a/src/include/optimizer/rules/transformation_rules.h b/src/include/optimizer/rules/transformation_rules.h index c26cb6afb8..138853f2ba 100644 --- a/src/include/optimizer/rules/transformation_rules.h +++ b/src/include/optimizer/rules/transformation_rules.h @@ -66,4 +66,78 @@ class LogicalInnerJoinAssociativity : public Rule { OptimizationContext *context) const override; }; +/** + * Rule embeds a logical limit into a child scan operator. + * TODO(dpatra): After pruning stage, we should eliminate all limits with children get operators in the operator trees. + */ +class SetLimitInGet : public Rule { + public: + /** + * Constructor + */ + SetLimitInGet(); + + /** + * Gets the rule's promise to apply against a GroupExpression + * @param group_expr GroupExpression to compute promise from + * @returns The promise value of applying the rule for ordering + */ + RulePromise Promise(GroupExpression *group_expr) const override; + + /** + * Checks whether the given rule can be applied + * @param plan AbstractOptimizerNode to check + * @param context Current OptimizationContext executing under + * @returns Whether the input AbstractOptimizerNode passes the check + */ + bool Check(common::ManagedPointer plan, OptimizationContext *context) const override; + + /** + * Transforms the input expression using the given rule + * @param input Input AbstractOptimizerNode to transform + * @param transformed Vector of transformed AbstractOptimizerNodes + * @param context Current OptimizationContext executing under + */ + void Transform(common::ManagedPointer input, + std::vector> *transformed, + OptimizationContext *context) const override; +}; + +/** + * Rule embeds a logical limit into a child scan operator. + * TODO(dpatra): After pruning stage, we should eliminate all limits with children get operators in the operator trees. + */ +class SetLimitInLogicalInnerJoin : public Rule { + public: + /** + * Constructor + */ + SetLimitInLogicalInnerJoin(); + + /** + * Gets the rule's promise to apply against a GroupExpression + * @param group_expr GroupExpression to compute promise from + * @returns The promise value of applying the rule for ordering + */ + RulePromise Promise(GroupExpression *group_expr) const override; + + /** + * Checks whether the given rule can be applied + * @param plan AbstractOptimizerNode to check + * @param context Current OptimizationContext executing under + * @returns Whether the input AbstractOptimizerNode passes the check + */ + bool Check(common::ManagedPointer plan, OptimizationContext *context) const override; + + /** + * Transforms the input expression using the given rule + * @param input Input AbstractOptimizerNode to transform + * @param transformed Vector of transformed AbstractOptimizerNodes + * @param context Current OptimizationContext executing under + */ + void Transform(common::ManagedPointer input, + std::vector> *transformed, + OptimizationContext *context) const override; +}; + } // namespace noisepage::optimizer diff --git a/src/include/planner/plannodes/index_join_plan_node.h b/src/include/planner/plannodes/index_join_plan_node.h index 70ba970be7..d343eb8d1b 100644 --- a/src/include/planner/plannodes/index_join_plan_node.h +++ b/src/include/planner/plannodes/index_join_plan_node.h @@ -89,6 +89,16 @@ class IndexJoinPlanNode : public AbstractJoinPlanNode { return *this; } + /** + * @param limit number of tuples to limit to + * @return builder object + */ + Builder &SetScanLimit(uint32_t limit) { + scan_limit_ = limit; + scan_has_limit_ = true; + return *this; + } + private: /** * OID of the index @@ -99,6 +109,17 @@ class IndexJoinPlanNode : public AbstractJoinPlanNode { */ catalog::table_oid_t table_oid_; + /** + * The number of tuples that this scan should emit due to a LIMIT clause. + */ + uint32_t scan_limit_{0}; + + /** + * Flag to indicate if scan_limit_ is set + */ + bool scan_has_limit_{false}; + + IndexScanType scan_type_; std::unordered_map lo_index_cols_{}; std::unordered_map hi_index_cols_{}; diff --git a/src/include/planner/plannodes/plan_node_defs.h b/src/include/planner/plannodes/plan_node_defs.h index 0dce286ae5..6d14a3d135 100644 --- a/src/include/planner/plannodes/plan_node_defs.h +++ b/src/include/planner/plannodes/plan_node_defs.h @@ -141,9 +141,12 @@ enum class SetOpType { INVALID = INVALID_TYPE_ID, INTERSECT = 1, INTERSECT_ALL = using IndexExpression = common::ManagedPointer; /** Type of index scan. */ enum class IndexScanType : uint8_t { + Dummy, Exact, AscendingClosed, + AscendingClosedLimit, AscendingOpenHigh, + AscendingOpenHighLimit, AscendingOpenLow, AscendingOpenBoth, diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index 5c6f7a3761..1b548826ab 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -176,7 +176,8 @@ bool IndexUtil::CheckPredicates( if (open_highs.empty() && open_lows.empty()) return false; // Check predicate open/close ordering - planner::IndexScanType scan_type = planner::IndexScanType::AscendingClosed; + // TODO(Deepayan): Check that this initialization gets around Exact initalization issues + planner::IndexScanType scan_type = planner::IndexScanType::Dummy; if (open_highs.size() == open_lows.size() && open_highs.size() == schema.GetColumns().size()) { // Generally on multi-column indexes, exact would result in comparing against unspecified attribute. // Only try to do an exact key lookup if potentially all attributes are specified. @@ -196,14 +197,23 @@ bool IndexUtil::CheckPredicates( // A range is defined but we are doing exact scans, so make ascending closed // If already doing ascending closed, ascending open then it would be a matter of // picking the right low/high key at the plan_generator stage of processing. - if (open_highs[oid] != open_lows[oid] && scan_type == planner::IndexScanType::Exact) - scan_type = planner::IndexScanType::AscendingClosed; + if (open_highs[oid] != open_lows[oid]) { + if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::Dummy) + scan_type = planner::IndexScanType::AscendingClosedLimit; + else if (scan_type == planner::IndexScanType::AscendingClosedLimit) + scan_type = planner::IndexScanType::AscendingClosed; + else if (scan_type == planner::IndexScanType::AscendingOpenHighLimit) + scan_type = planner::IndexScanType::AscendingOpenHigh; + } } else if (open_highs.find(oid) != open_highs.end()) { - if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::AscendingClosed || - scan_type == planner::IndexScanType::AscendingOpenHigh) { + if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::Dummy) + scan_type = planner::IndexScanType::AscendingOpenHighLimit; + else if (scan_type == planner::IndexScanType::AscendingClosed || + scan_type == planner::IndexScanType::AscendingClosedLimit || + scan_type == planner::IndexScanType::AscendingOpenHigh || + scan_type == planner::IndexScanType::AscendingOpenHighLimit) scan_type = planner::IndexScanType::AscendingOpenHigh; - - } else { + else { // OpenHigh scan is not compatible with an OpenLow scan // Revert to a sequential scan break; @@ -211,7 +221,10 @@ bool IndexUtil::CheckPredicates( bounds->insert(std::make_pair( oid, std::vector{open_highs[oid], planner::IndexExpression(nullptr)})); } else if (open_lows.find(oid) != open_lows.end()) { - if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::AscendingClosed || + if (scan_type == planner::IndexScanType::Exact || + scan_type == planner::IndexScanType::Dummy || + scan_type == planner::IndexScanType::AscendingClosed || + scan_type == planner::IndexScanType::AscendingClosedLimit || scan_type == planner::IndexScanType::AscendingOpenLow) { scan_type = planner::IndexScanType::AscendingOpenLow; } else { @@ -233,6 +246,12 @@ bool IndexUtil::CheckPredicates( return false; } + // Lower scan type to valid type + // Note: if scan type could be exact, it would have already been set by this point + // TODO(Deepayan): It may be possible to get away with being more lenient here on the index scan type to push down + // limits but needs further investigation + if (scan_type == planner::IndexScanType::Dummy) scan_type = planner::IndexScanType::AscendingClosed; + *idx_scan_type = scan_type; return !bounds->empty(); } diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index eb28c2f35f..4c98a64109 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -126,6 +126,7 @@ void Optimizer::OptimizeLoop(group_id_t root_group_id, PropertySet *required_pro context_->AddOptimizationContext(root_context); // Perform rewrite first + task_stack->Push(new TopDownRewrite(root_group_id, root_context, RuleSetName::CLAUSE_PUSH_DOWN)); task_stack->Push(new TopDownRewrite(root_group_id, root_context, RuleSetName::PREDICATE_PUSH_DOWN)); task_stack->Push(new BottomUpRewrite(root_group_id, root_context, RuleSetName::UNNEST_SUBQUERY, false)); ExecuteTaskStack(task_stack, root_group_id, root_context); diff --git a/src/optimizer/physical_operators.cpp b/src/optimizer/physical_operators.cpp index 35f66c254e..7fd4d72103 100644 --- a/src/optimizer/physical_operators.cpp +++ b/src/optimizer/physical_operators.cpp @@ -84,7 +84,8 @@ BaseOperatorNodeContents *IndexScan::Copy() const { return new IndexScan(*this); Operator IndexScan::Make(catalog::db_oid_t database_oid, catalog::table_oid_t tbl_oid, catalog::index_oid_t index_oid, std::vector &&predicates, bool is_for_update, planner::IndexScanType scan_type, - std::unordered_map> bounds) { + std::unordered_map> bounds, + bool limit_exists, uint32_t limit) { auto *scan = new IndexScan(); scan->database_oid_ = database_oid; scan->tbl_oid_ = tbl_oid; @@ -93,6 +94,8 @@ Operator IndexScan::Make(catalog::db_oid_t database_oid, catalog::table_oid_t tb scan->predicates_ = std::move(predicates); scan->scan_type_ = scan_type; scan->bounds_ = std::move(bounds); + scan->limit_exists_ = limit_exists; + scan->limit_ = limit; return Operator(common::ManagedPointer(scan)); } @@ -279,13 +282,14 @@ BaseOperatorNodeContents *InnerIndexJoin::Copy() const { return new InnerIndexJo Operator InnerIndexJoin::Make( catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, planner::IndexScanType scan_type, std::unordered_map> join_keys, - std::vector join_predicates) { + std::vector join_predicates, + bool limit_exists, uint32_t limit) { auto *join = new InnerIndexJoin(); join->tbl_oid_ = tbl_oid; join->idx_oid_ = idx_oid; join->join_keys_ = std::move(join_keys); - join->join_predicates_ = std::move(join_predicates); - join->scan_type_ = scan_type; + join->limit_exists_ = limit_exists; + join->limit_ = limit; return Operator(common::ManagedPointer(join)); } diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp index 6d6afcd1e2..e9fbedbe21 100644 --- a/src/optimizer/plan_generator.cpp +++ b/src/optimizer/plan_generator.cpp @@ -243,21 +243,29 @@ void PlanGenerator::Visit(const IndexScan *op) { if (type == planner::IndexScanType::Exact) { // Exact lookup builder.AddIndexColumn(bound.first, bound.second[0]); - } else if (type == planner::IndexScanType::AscendingClosed) { + } else if (type == planner::IndexScanType::AscendingClosed || type == planner::IndexScanType::AscendingClosedLimit) { // Range lookup, so use lo and hi builder.AddLoIndexColumn(bound.first, bound.second[0]); builder.AddHiIndexColumn(bound.first, bound.second[1]); - } else if (type == planner::IndexScanType::AscendingOpenHigh) { + } else if (type == planner::IndexScanType::AscendingOpenHigh || type == planner::IndexScanType::AscendingOpenHighLimit) { // Open high scan, so use only lo builder.AddLoIndexColumn(bound.first, bound.second[0]); } else if (type == planner::IndexScanType::AscendingOpenLow) { // Open low scan, so use only high builder.AddHiIndexColumn(bound.first, bound.second[1]); } else if (type == planner::IndexScanType::AscendingOpenBoth) { - // No bounds need to be set + // No bounds need to be set; + } else { + // No other scan type are supported + NOISEPAGE_ASSERT(0, "Unreachable"); } } + // Check that the limit is set in the operator + if (op->GetLimitExists()) { + builder.SetScanLimit(op->GetLimit()); + } + output_plan_ = builder.Build(); } @@ -539,7 +547,7 @@ void PlanGenerator::Visit(const InnerIndexJoin *op) { builder.AddLoIndexColumn(bound.first, common::ManagedPointer(key)); builder.AddHiIndexColumn(bound.first, common::ManagedPointer(key)); - } else if (type == planner::IndexScanType::AscendingClosed) { + } else if (type == planner::IndexScanType::AscendingClosed || type == planner::IndexScanType::AscendingClosedLimit) { // Range lookup, so use lo and hi auto lkey = parser::ExpressionUtil::EvaluateExpression(children_expr_map_, bound.second[0]).release(); auto hkey = parser::ExpressionUtil::EvaluateExpression(children_expr_map_, bound.second[1]).release(); @@ -548,7 +556,7 @@ void PlanGenerator::Visit(const InnerIndexJoin *op) { builder.AddLoIndexColumn(bound.first, common::ManagedPointer(lkey)); builder.AddHiIndexColumn(bound.first, common::ManagedPointer(hkey)); - } else if (type == planner::IndexScanType::AscendingOpenHigh) { + } else if (type == planner::IndexScanType::AscendingOpenHigh || type == planner::IndexScanType::AscendingOpenHighLimit) { // Open high scan, so use only lo auto key = parser::ExpressionUtil::EvaluateExpression(children_expr_map_, bound.second[0]).release(); RegisterPointerCleanup(key, true, true); @@ -561,9 +569,17 @@ void PlanGenerator::Visit(const InnerIndexJoin *op) { } else if (type == planner::IndexScanType::AscendingOpenBoth) { // No bounds need to be set NOISEPAGE_ASSERT(0, "Unreachable"); + } else { + // No other scan type are supported + NOISEPAGE_ASSERT(0, "Unreachable"); } } + // Check that the limit is set in the operator + if (op->GetLimitExists()) { + builder.SetScanLimit(op->GetLimit()); + } + output_plan_ = builder.Build(); } diff --git a/src/optimizer/rule.cpp b/src/optimizer/rule.cpp index f133423fbd..64b6b8822d 100644 --- a/src/optimizer/rule.cpp +++ b/src/optimizer/rule.cpp @@ -60,6 +60,9 @@ RuleSet::RuleSet() { AddRule(RuleSetName::PREDICATE_PUSH_DOWN, new RewriteCombineConsecutiveFilter()); AddRule(RuleSetName::PREDICATE_PUSH_DOWN, new RewriteEmbedFilterIntoGet()); + AddRule(RuleSetName::CLAUSE_PUSH_DOWN, new SetLimitInGet()); + AddRule(RuleSetName::CLAUSE_PUSH_DOWN, new SetLimitInLogicalInnerJoin()); + AddRule(RuleSetName::UNNEST_SUBQUERY, new RewritePullFilterThroughMarkJoin()); AddRule(RuleSetName::UNNEST_SUBQUERY, new UnnestMarkJoinToInnerJoin()); AddRule(RuleSetName::UNNEST_SUBQUERY, new UnnestSingleJoinToInnerJoin()); diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index f62c13d516..c9dc1477d2 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -117,43 +117,77 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetDatabaseOid(); bool is_update = get->GetIsForUpdate(); auto *accessor = context->GetOptimizerContext()->GetCatalogAccessor(); + auto limit_exists = get->GetLimitExists(); + auto limit = get->GetLimit(); + auto indexes = accessor->GetIndexOids(get->GetTableOid()); + // Get sort property auto sort = context->GetRequiredProperties()->GetPropertyOfType(PropertyType::SORT); - std::vector sort_col_ids; - if (sort != nullptr) { - // Check if can satisfy sort property with an index - auto sort_prop = sort->As(); - if (IndexUtil::CheckSortProperty(sort_prop)) { - auto indexes = accessor->GetIndexOids(get->GetTableOid()); - for (auto index : indexes) { - if (IndexUtil::SatisfiesSortWithIndex(accessor, sort_prop, get->GetTableOid(), index)) { - std::vector preds = get->GetPredicates(); + bool sort_exists = sort != nullptr; + auto sort_prop = sort_exists ? sort->As() : nullptr; + bool sort_possible = sort_exists && IndexUtil::CheckSortProperty(sort_prop); + bool predicate_exists = !get->GetPredicates().empty(); + + if (predicate_exists) { + // Apply predicates if they exist and attempt to use indexes that match at least one predicate + for (auto &index : indexes) { + planner::IndexScanType scan_type; + std::unordered_map> bounds; + std::vector preds = get->GetPredicates(); + // Check whether any index can fulfill in-order predicate evaluation + if (IndexUtil::SatisfiesPredicateWithIndex(accessor, get->GetTableOid(), get->GetTableAlias(), index, preds, + allow_cves_, &scan_type, &bounds)) { + // Limit can only be pushed down in the following cases + // TODO(Deepayan): Check if limit can actually be pushed down in the case of an exact scan + limit_exists = scan_type == planner::IndexScanType::Exact || + scan_type == planner::IndexScanType::AscendingOpenHighLimit || + scan_type == planner::IndexScanType::AscendingClosedLimit || + scan_type == planner::IndexScanType::DescendingLimit; + preds = get->GetPredicates(); + + // There is an index that satisfies predicates for at least one column + if (sort_exists) { + if (sort_possible && IndexUtil::SatisfiesSortWithIndex(accessor, sort_prop, get->GetTableOid(), index)) { + // Index also satisfies sort properties so can potentially push down limit and sort + auto op = std::make_unique( + IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, scan_type, + std::move(bounds), limit_exists, limit) + .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), + std::vector>(), context->GetOptimizerContext()->GetTxn()); + transformed->emplace_back(std::move(op)); + } else { + // Index does not satisfy existing sort property so cannot push down limit + // Instead must rely on parent OrderBy produced in generating physical plan + auto op = std::make_unique( + IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, scan_type, + std::move(bounds)) + .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), + std::vector>(), context->GetOptimizerContext()->GetTxn()); + transformed->emplace_back(std::move(op)); + } + } else { + // Index scan does not have sort property so can potentially push down limit auto op = std::make_unique( - IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, - planner::IndexScanType::AscendingOpenBoth, {}) + IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, scan_type, + std::move(bounds), limit_exists, limit) .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), std::vector>(), context->GetOptimizerContext()->GetTxn()); transformed->emplace_back(std::move(op)); } } } - } - - // Check whether any index can fulfill predicate predicate evaluation - if (!get->GetPredicates().empty()) { - // Find match index for the predicates - auto indexes = accessor->GetIndexOids(get->GetTableOid()); + } else if (sort_exists) { + // If no predicates exist, index itself must satisfy sort properties and can push down limit if such an index exists + // If sort property is not satisfied, should default to sequential scan + // NOTE: This assumes no external predicates exist; if they do, remove the limit for (auto &index : indexes) { - planner::IndexScanType scan_type; - std::unordered_map> bounds; - std::vector preds = get->GetPredicates(); - if (IndexUtil::SatisfiesPredicateWithIndex(accessor, get->GetTableOid(), get->GetTableAlias(), index, preds, - allow_cves_, &scan_type, &bounds)) { - auto op = std::make_unique(IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), - is_update, scan_type, std::move(bounds)) - .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), - std::vector>(), - context->GetOptimizerContext()->GetTxn()); + if (sort_possible && IndexUtil::SatisfiesSortWithIndex(accessor, sort_prop, get->GetTableOid(), index)) { + std::vector preds = get->GetPredicates(); + auto op = std::make_unique( + IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, + planner::IndexScanType::AscendingOpenBoth, {}, limit_exists, limit) + .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), + std::vector>(), context->GetOptimizerContext()->GetTxn()); transformed->emplace_back(std::move(op)); } } @@ -487,12 +521,16 @@ void LogicalInnerJoinToPhysicalInnerIndexJoin::Transform( std::vector join_preds = r_child->GetPredicates(); for (auto &pred : inner_join->GetJoinPredicates()) join_preds.push_back(pred); + // Get inner join limit information + auto limit_exists = inner_join->GetLimitExists(); + auto limit = inner_join->GetLimit(); + std::vector> empty; - auto new_child = - std::make_unique(LogicalGet::Make(r_child->GetDatabaseOid(), r_child->GetTableOid(), join_preds, - r_child->GetTableAlias(), r_child->GetIsForUpdate()) - .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), - std::move(empty), context->GetOptimizerContext()->GetTxn()); + auto child_get = LogicalGet::Make(r_child->GetDatabaseOid(), r_child->GetTableOid(), join_preds, + r_child->GetTableAlias(), r_child->GetIsForUpdate()); + if (limit_exists) child_get.GetContentsAs()->SetLimit(limit); + auto new_child = std::make_unique(child_get.RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), + std::move(empty), context->GetOptimizerContext()->GetTxn()); std::vector> transform; LogicalGetToPhysicalIndexScan idx_scan_transform; diff --git a/src/optimizer/rules/transformation_rules.cpp b/src/optimizer/rules/transformation_rules.cpp index 1da8140919..79cd96e607 100644 --- a/src/optimizer/rules/transformation_rules.cpp +++ b/src/optimizer/rules/transformation_rules.cpp @@ -158,4 +158,60 @@ void LogicalInnerJoinAssociativity::Transform(common::ManagedPointeremplace_back(std::move(new_parent_join)); } +/////////////////////////////////////////////////////////////////////////////// +/// SetLimitInGet +/////////////////////////////////////////////////////////////////////////////// +SetLimitInGet::SetLimitInGet() { + type_ = RuleType::SET_LIMIT_IN_GET; + + match_pattern_ = new Pattern(OpType::LOGICALLIMIT); + auto child = new Pattern(OpType::LOGICALGET); + + match_pattern_->AddChild(child); +} + +bool SetLimitInGet::Check(common::ManagedPointer plan, OptimizationContext *context) const { + (void)context; + (void)plan; + return true; +} + +RulePromise SetLimitInGet::Promise(GroupExpression *group_expr) const { return RulePromise::LOGICAL_PROMISE; } + +void SetLimitInGet::Transform(common::ManagedPointer input, + std::vector> *transformed, + UNUSED_ATTRIBUTE OptimizationContext *context) const { + auto get = input->GetChildren()[0]->Contents()->GetContentsAs(); + size_t limit = input->Contents()->GetContentsAs()->GetLimit(); + get->SetLimit(limit); +} + +/////////////////////////////////////////////////////////////////////////////// +/// SetLimitInGet +/////////////////////////////////////////////////////////////////////////////// +SetLimitInLogicalInnerJoin::SetLimitInLogicalInnerJoin() { + type_ = RuleType::SET_LIMIT_IN_LOGICAL_INNER_JOIN; + + match_pattern_ = new Pattern(OpType::LOGICALLIMIT); + auto child = new Pattern(OpType::LOGICALINNERJOIN); + + match_pattern_->AddChild(child); +} + +bool SetLimitInLogicalInnerJoin::Check(common::ManagedPointer plan, OptimizationContext *context) const { + (void)context; + (void)plan; + return true; +} + +RulePromise SetLimitInLogicalInnerJoin::Promise(GroupExpression *group_expr) const { return RulePromise::LOGICAL_PROMISE; } + +void SetLimitInLogicalInnerJoin::Transform(common::ManagedPointer input, + std::vector> *transformed, + UNUSED_ATTRIBUTE OptimizationContext *context) const { + auto join = input->GetChildren()[0]->Contents()->GetContentsAs(); + size_t limit = input->Contents()->GetContentsAs()->GetLimit(); + join->SetLimit(limit); +} + } // namespace noisepage::optimizer From 531a6dc3233f6547fa155abbb6b044fb6a30cdca Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Sun, 27 Dec 2020 01:53:03 -0500 Subject: [PATCH 02/20] Frontend fix to limit pushdown for join --- src/optimizer/rules/implementation_rules.cpp | 4 +++- src/optimizer/rules/transformation_rules.cpp | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index c9dc1477d2..dffac5fdf1 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -544,10 +544,12 @@ void LogicalInnerJoinToPhysicalInnerIndexJoin::Transform( if (!idx_scan->GetBounds().empty()) { std::vector> child; child.emplace_back(children[0]->Copy()); + bool scan_limit_exists = idx_scan->GetLimitExists(); + auto scan_limit = idx_scan->GetLimit(); auto result = std::make_unique( InnerIndexJoin::Make(idx_scan->GetTableOID(), idx_scan->GetIndexOID(), idx_scan->GetIndexScanType(), - idx_scan->GetBounds(), join_preds) + idx_scan->GetBounds(), join_preds, scan_limit_exists, scan_limit) .RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), std::move(child), context->GetOptimizerContext()->GetTxn()); transformed->emplace_back(std::move(result)); diff --git a/src/optimizer/rules/transformation_rules.cpp b/src/optimizer/rules/transformation_rules.cpp index 79cd96e607..31b31c1059 100644 --- a/src/optimizer/rules/transformation_rules.cpp +++ b/src/optimizer/rules/transformation_rules.cpp @@ -192,9 +192,13 @@ void SetLimitInGet::Transform(common::ManagedPointer inpu SetLimitInLogicalInnerJoin::SetLimitInLogicalInnerJoin() { type_ = RuleType::SET_LIMIT_IN_LOGICAL_INNER_JOIN; - match_pattern_ = new Pattern(OpType::LOGICALLIMIT); auto child = new Pattern(OpType::LOGICALINNERJOIN); + auto left_get = new Pattern(OpType::LEAF); + auto right_get = new Pattern(OpType::LEAF); + child->AddChild(left_get); + child->AddChild(right_get); + match_pattern_ = new Pattern(OpType::LOGICALLIMIT); match_pattern_->AddChild(child); } From 0fd9405d370d8b8ca3b1d07802b3a734b5051c4c Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Sun, 27 Dec 2020 06:14:27 -0500 Subject: [PATCH 03/20] Complete limit pushdown for join --- src/optimizer/physical_operators.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/optimizer/physical_operators.cpp b/src/optimizer/physical_operators.cpp index 7fd4d72103..a2269e4a39 100644 --- a/src/optimizer/physical_operators.cpp +++ b/src/optimizer/physical_operators.cpp @@ -287,7 +287,9 @@ Operator InnerIndexJoin::Make( auto *join = new InnerIndexJoin(); join->tbl_oid_ = tbl_oid; join->idx_oid_ = idx_oid; + join->scan_type_ = scan_type; join->join_keys_ = std::move(join_keys); + join->join_predicates_ = std::move(join_predicates); join->limit_exists_ = limit_exists; join->limit_ = limit; return Operator(common::ManagedPointer(join)); From 9cbeda2d8deb748154eefde7a0f8300013d2c8a2 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 28 Dec 2020 20:48:35 -0500 Subject: [PATCH 04/20] Minor cleanups --- src/execution/compiler/codegen.cpp | 2 +- .../compiler/operator/index_join_translator.cpp | 12 +++--------- src/optimizer/index_util.cpp | 3 +-- src/optimizer/rules/implementation_rules.cpp | 3 ++- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/execution/compiler/codegen.cpp b/src/execution/compiler/codegen.cpp index 11c104b499..0cabe1ff27 100644 --- a/src/execution/compiler/codegen.cpp +++ b/src/execution/compiler/codegen.cpp @@ -458,7 +458,7 @@ ast::Expr *CodeGen::IndexIteratorScan(ast::Identifier iter, planner::IndexScanTy else if (scan_type == planner::IndexScanType::AscendingOpenBoth) asc_type = storage::index::ScanType::OpenBoth; break; - // TODO(Deepayan): These cases are currently unreachable + // TODO(dpatra): These cases are currently unreachable case planner::IndexScanType::Descending: builtin = ast::Builtin::IndexIteratorScanDescending; break; diff --git a/src/execution/compiler/operator/index_join_translator.cpp b/src/execution/compiler/operator/index_join_translator.cpp index 8c92497fc1..07f520994d 100644 --- a/src/execution/compiler/operator/index_join_translator.cpp +++ b/src/execution/compiler/operator/index_join_translator.cpp @@ -66,9 +66,7 @@ void IndexJoinTranslator::PerformPipelineWork(WorkContext *context, FunctionBuil // var hi_index_pr = @indexIteratorGetHiPR(&index_iter) DeclareIndexPR(function); // @prSet(lo_index_pr, ...) - // TODO(Deepayan): fix flag here - bool flag = true; - FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns(), flag); + FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns()); //FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns()); // @prSet(hi_index_pr, ...) FillKey(context, function, hi_index_pr_, op.GetHiIndexColumns()); @@ -203,8 +201,7 @@ void IndexJoinTranslator::DeclareSlot(noisepage::execution::compiler::FunctionBu void IndexJoinTranslator::FillKey( WorkContext *context, FunctionBuilder *builder, ast::Identifier pr, - const std::unordered_map &index_exprs, - const bool flag) const { + const std::unordered_map &index_exprs) const { // For each key attribute, uint32_t count = 0; for (const auto &key : index_exprs) { @@ -212,11 +209,8 @@ void IndexJoinTranslator::FillKey( uint16_t attr_offset = index_pm_.at(key.first); type::TypeId attr_type = index_schema_.GetColumn(key.first.UnderlyingValue() - 1).Type(); bool nullable = index_schema_.GetColumn(key.first.UnderlyingValue() - 1).Nullable(); - auto *val = index_exprs.size() == count ? GetCodeGen()->BinaryOp(parsing::Token::Type::PLUS, context->DeriveValue(*key.second.Get(), this), GetCodeGen()->Const32(1)) - : context->DeriveValue(*key.second.Get(), this); - auto *set_key_call = GetCodeGen()->PRSet(GetCodeGen()->MakeExpr(pr), attr_type, nullable, attr_offset, - val, true); + context->DeriveValue(*key.second.Get(), this), true); builder->Append(GetCodeGen()->MakeStmt(set_key_call)); count++; } diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index 1b548826ab..c1aea23fbb 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -176,7 +176,6 @@ bool IndexUtil::CheckPredicates( if (open_highs.empty() && open_lows.empty()) return false; // Check predicate open/close ordering - // TODO(Deepayan): Check that this initialization gets around Exact initalization issues planner::IndexScanType scan_type = planner::IndexScanType::Dummy; if (open_highs.size() == open_lows.size() && open_highs.size() == schema.GetColumns().size()) { // Generally on multi-column indexes, exact would result in comparing against unspecified attribute. @@ -248,7 +247,7 @@ bool IndexUtil::CheckPredicates( // Lower scan type to valid type // Note: if scan type could be exact, it would have already been set by this point - // TODO(Deepayan): It may be possible to get away with being more lenient here on the index scan type to push down + // TODO(dpatra): It may be possible to get away with being more lenient here on the index scan type to push down // limits but needs further investigation if (scan_type == planner::IndexScanType::Dummy) scan_type = planner::IndexScanType::AscendingClosed; diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index dffac5fdf1..9f97a6fda8 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -138,7 +138,8 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetTableOid(), get->GetTableAlias(), index, preds, allow_cves_, &scan_type, &bounds)) { // Limit can only be pushed down in the following cases - // TODO(Deepayan): Check if limit can actually be pushed down in the case of an exact scan + // TODO(dpatra): Limit can be pushed down in the case of an exact scan as well, but the index framework + // doesn't currently support it. limit_exists = scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::AscendingOpenHighLimit || scan_type == planner::IndexScanType::AscendingClosedLimit || From 2524271db4e4c1bc1844f3b1fc6a196381765241 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 28 Dec 2020 21:15:23 -0500 Subject: [PATCH 05/20] Add tests and documentation --- docs/optimization.md | 153 ++++ script/testing/junit/sql/index-select.sql | 50 ++ script/testing/junit/traces/index-select.test | 792 ++++++++++++++++++ src/include/catalog/catalog_defs.h | 5 + .../compiler/operator/index_join_translator.h | 3 +- 5 files changed, 1001 insertions(+), 2 deletions(-) create mode 100644 docs/optimization.md create mode 100644 script/testing/junit/sql/index-select.sql create mode 100644 script/testing/junit/traces/index-select.test diff --git a/docs/optimization.md b/docs/optimization.md new file mode 100644 index 0000000000..eba9d458ed --- /dev/null +++ b/docs/optimization.md @@ -0,0 +1,153 @@ +# A Developer's Guide to the Optimizer (SELECT version) + +Query optimization is the process by which a physical operator query plan is generated from an input SQL statement. In this document, we will go through the process of optimizing a simple `SELECT` statement in NoisePage. Other queries will follow a similar format, but might have different sets of rules explored. + +The example referred to in this guide is generated from the query + +```sql +SELECT NO_O_ID FROM NEW_ORDER +WHERE NO_D_ID = 4 AND NO_W_ID = 1 +ORDER BY NO_O_ID ASC +LIMIT 5; +``` + +run on a loaded TPCC dataset with `ScaleFactor=4`. + +# 1. Traffic Cop Layer + +Optimization starts after the binding process in our system's traffic cop. + +In `traffic_cop.cpp`, `TrafficCop::OptimizeBoundQuery` takes as input a: + +- `const common::ManagedPointer connection_ctx`: A context that stores the state of the active connection to the client +- `const common::ManagedPointer query`: The output statements and expressions generated by the parser + +and will generate as output a: + +- `std::unique_ptr physical_plan`: the generated physical plan for the query + +This function, in turn, calls `TrafficCopUtil::Optimize` that takes in additional arguments to track the active transaction, catalog information, the internal database ID, statistics, the active cost model, and a timeout period for the optimizer, which explored further below. + +# 2. Input Transformation + +The first step within the optimization process is to transform the input `SQLStatement` to a set of logical expressions,`AbstractOptimizerNode logical_exprs`, in `QueryToOperatorTransformer::ConvertToOpExpression`. In the `QueryToOperatorTransformer` class, we start by visiting the `SelectStatement` outputted by the parse process: + +- If the `SELECT` statement operates upon a table, the `TableRef` is accepted as well and added to the `output_expr_` as a `LogicalGet`. Otherwise, the `LogicalGet` is initialized to operate on an invalid table (used for queries like `SELECT 1 + 1`). +- Predicates for the input expression, if they exist, are collected with an alias to each referenced column's associated table name in `std::vector predicates_` and are stored in a `LogicalFilter` that is added as the parent of the existing `output_expr_` + - [ ] Add information for subquery transformation above +- [ ] Add information about aggregations in `SELECT` here +- [ ] Add information about `SelectDistinct` in `SELECT` here +- If a `LIMIT` exists in the select statement, the value of the limit and associated offset, along with any `ORDER_BY` clauses, are stored in a `LogicalLimit` node that is added as the parent of the existing `output_expr_` + +Our example query has both a `LogicalLimit` and set of `LogicalFilter`s above a `LogicalGet` in the `output_expr_`. + +Following the above transformations, we create a `QueryInfo` object that keeps track of the query type. Additionally, on a `SELECT` statement, we also keep track of the output columns and sort properties in the `QueryInfo` object. + +The top-level function call of the following sections is `Optimizer::BuildPlanTree`, which takes as input: + +- `transaction::TransactionContext *txn`: the active transaction +- `catalog::CatalogAccessor *accessor`: the catalog accessor +- `StatsStorage *storage`: a structure to manage table statistics +- `QueryInfo query_info`: information directly relevant to active query (type, output expressions, physical properties) +- `std::unique_ptr op_tree`: the logical expression tree generated from the transformation + +and will eventually output the physical plan chosen. + +# 3. Optimization + +The bulk of the optimization logic occurs in this section. The optimization loop generates all transformations, . Some key related definitions follow: + +- `OptimizationContext`: the structure which keeps track of required properties, costs, and the OptimizerContext for each optimization process. +- `OptimizerContext`: the structure which tracks the cost model, task pool, transaction context, statistics storage, catalog accessor, rule set, and tracked optimization contexts, and importantly the **memoized expressions for each group.** +- `Memo`: a class to track groups and group expressions that allows for duplicate detection, rewrites, access, and erase procedures. +- `GroupExpression`: a class to track a specific operator (logical or physical) in a group which tracks the transaction, group number, whether the stats have been derived, the lowest cost satisfying the required properties of an expression, and the details of the expression. +- `Group`: a class to collect group expressions representing logically equivalent expressions including logical and physical transformations. +- `LogicalExpression`: a group expression which represents a logical expression i.e. the logical operator that specifies the general behavior of a node +- `PhysicalExpression`: a group expression which represents a logical expression i.e. the physical operator that specifies the implementation details of the expression +- `Rule`: definition of a transformation from one expression to another (may be logical transformation, physical implementation, or rewrite) +- `Property`: interface to define a physical property, currently of which only sort properties exist +- `PropertySet`: interface to group together properties + +## Setup + +For each expression in the logical expression tree, a `GroupExpression` is created that initializes the `group_id_t_ group_id_` and `child_groups_`, a vector of the `group_id_t`'s of the child groups (child logical expressions), and generates an associated `Group` that initializes it's own `group_id_t id_`, `table_aliases_`, and the initial `GroupExpression` for this group in `logical_expressions_`. Effectively, each logical expression will be part of its own `Group`. + +Physical properties are additionally kept track of in `PropertySet *phys_properties`, along with output expressions in `std::vector> output_exprs`, both of which are later passed to the `ChooseBestPlan` phase in section 4. + +## Optimization Loop + +In `Optimizer::OptimizeLoop`, we operate on the memoized expressions from the `group_id_t root_group_id` and with a set of `required_props`. We begin by initializing an `OptimizationContext` which stores the current `OptimizerContext` structure as well as the required physical properties. Additionally, a stack is created to manage optimization tasks in a LIFO manner. The `Optimizer`'s task stack begins with (at the top of the stack) `BottomUpRewrite` tasks, followed by `TopDownRewrite`. Following completion of this stack, a new stack then continues to perform the statistics calculations and finally, the optimizer will explore logically equivalent expressions, compute costs, and choose the optimal plan (with lowest cost) for the execution engine. A task stack runs up to the `task_execution_timeout_` setting ***IF*** a costed expression satisfying the properties is chosen, or until all the tasks in the stack have been completed otherwise. + +### Bottom Up Rewrite + +The bottom up rewrite initially attempts to rewrite from the root group. For each group, the program will recursively push rewrite tasks for all sub-tree groups to be completed before re-attempting the rewrite of the current group. Upon completion of all child tasks, which is ensured by the LIFO stack ordering, and kept track of with the `has_optimized_child` flag in the `BottomUpRewrite` task, the task will attempt to check any rules defined to be in the `RuleSet` with a `RuleSetName` value matching that defined in `rule.cpp`. Rules are determined to be "valid" in `OptimizerTask::ConstructValidRules`, first checking whether the root pattern of the rule is satisfied for the group expression and the correct number of children exists (if the rule has not already been explored). + +For each valid rule, a binding iterator for the group expression is created with a call to the constructor for `GroupExprBindingIterator`. If a binding is found, the transformation from the before state of the group expression to the after state (if one exists) is applied. We expect only one output from the rewrite. If a new expression is made, we will replace the current logical expression of the group with the outputted logical expression created by the transformation rule (of base class `Rule::Transform`) in `OptimizerContext::ReplaceRewriteExpression`. If a transformation is successfully completed, the current Rewrite task is pushed back on the stack. If a binding is not found, no such action happens. In either case, the rule is marked as explored for the current group expression, `cur_group_expr`. + +This sub-section will now further explore the actions of `GroupExprBindingIterator`, as utilized above. + +The constructor for this class first repeats the valid rule checks from `OptimizerTask::ConstructValidRules` (as they may not have previously been performed on a child from the potential recursive call described below). If these checks fail, the function returns and `has_next_`, which identifies a valid binding exists on an ensuing call to `GroupExprBindingIterator::HasNext`, is never set to true. + +Otherwise, for each child group, this function creates a `GroupBindingIterator` to attempt to match with the child pattern expected by the rule, and will collect all the bindings for each child in the variable `child_bindings`. If all of the child groups can bind to the expected child patterns, the `has_next_` flag is set to true and a binding to an `OperatorNode` tracking the expression, children, and active transaction, is created. If any of the child groups do not have valid bindings, the binding attempt fails, and again, `has_next_` is never set to true. + +To complete a consideration of `GroupExprBindingIterator`, `GroupExprBindingIterator::HasNext` will first check whether both `has_next_` and `first_` are set, in which case, the function may return true, as there is a next binding that has not been encountered yet. However, if only `has_next_` is set to true, there are two cases. If all the bindings of all the children (from the last child to the first), have been tried, then there is no next binding. Otherwise, the children in the operator are replaced with the next appropriate child binding set per the positions in `children_bindings_pos_`. + +Finally, the current state of `has_next_` is returned. + +This sub-section will now further explore the actions of `GroupBindingIterator`, as utilized above. + +Each `GroupBindingIterator iterator`, on a call to `GroupBindingIterator::HasNext` will first check if the expected pattern is of `OpType::LEAF`, in which case `current_item_index_` must never have been incremented. Otherwise, if `current_iterator_` has already been set, we check whether there still exist bindings in the current item, and if not, increment the index and reset the variable. If `current_iterator_` had not previously been set, we attempt to find a matching pattern by recursively creating a new `GroupExprBindingIterator` for each logical expression in this child group. If no matching pattern is found for any of the logical expressions, (the boolean field `has_next_` is not set for any of the logical expressions, and thus no binding is made) the iterator will not be able to match the child pattern, reset its value to `nullptr`, and return `false` to its caller. However, if one is found, we change the `current_iterator_` to the new logical expression iterator and return. + +### Top Down Rewrite + +The top down rewrite phases similarly attempt to rewrite from the root group, first applying the `PREDICATE_PUSH_DOWN` rules, and then the `CLAUSE_PUSH_DOWN` rules defined in `rule.cpp` (the following process is run for each of the two rule sets). As before, rules are determined to be "valid" in `OptimizerTask::ConstructValidRules`, first checking whether the root pattern of the rule is satisfied for the group expression and the correct number of children exists (if the rule has not already been explored). + +For each valid rule, a binding iterator for the group expression is created with a call to the constructor for `GroupExprBindingIterator` (details on `GroupExprBindingIterator` are found in the Bottom Up Rewrite section). If a binding is found, the transformation from the before state of the group expression to the after state (if one exists) is applied. We expect only one output from the rewrite. If a new expression is made, we will replace the current logical expression of the group with the outputted logical expression created by the transformation rule (of base class `Rule::Transform`) in `OptimizerContext::ReplaceRewriteExpression`. If a transformation is successfully completed, the current Rewrite task is pushed back on the stack. If a binding is not found, no such actions happen. In either case, the rule is updated to explored for the current group expression, `cur_group_expr`. + +Finally, we push new tasks for all children groups of the current group to apply the same rule set executed on this pass. + +### Derive Stats + +With the rewriting phases complete, we will first go through the statistics derivation process for our logical plan. We start by pushing a `DeriveStats` task for the root group onto the task stack. + +Inside each DeriveStats task, first, we create a `ChildStatsDeriver` object that is then used to create a vector of expression sets for the appropriate number of children groups (and then call `OperatorVisitor::Visit(const NODETYPE *NODENAME)`, which thus far is unimplemented). Then, for each child group, if any the required stats are empty, or the first logical expression in any child does not have stats derived, we will push the current task back onto the stack, and then push tasks to derive the stats of each child group logical expression not yet completed. + +On the final call to the current `DeriveStats` task, we calculate the stats for the group expression, and set a flag in the expression indicating the stats have been derived. The implementations of the statistics calculation process with `StatsCalculator::Visit(arg)` function vary by operator type. However, we will describe a pair of examples to follow: + +We will describe a pair of such processes, taken from our running example: + +- `StatsCalculator::Visit(const LogicalGet *op)` In the case the `LogicalGet` corresponds to a dummy scan, no stats will be calculated and we return immediately. Otherwise, we consider the statistics of the table in question. If no `TableStats` exist in the optimizer's context, identified by database and table id pairs, then, for each of the required columns, default stats are initialized with a cast to a `ColumnValueExpression` from the `ExprSet` col before calling `CreateDefaultStats` on the result and returning. + + If `TableStats` does exist, then we may copy the `ColumnStats` from the existing `TableStats` object and otherwise, create default stats if the column does not exist, and store these computed results in `required_stats`, a map from the column name to the associated `ColumnStats` object. On the first instance of computing selectivity, for the predicates existing at this level, we will determine the base table statistics for each of the columns before then determining the row estimate (0 if no column stats exist, and filter-based estimations computed in `StatsCalculator::EstimateCardinalityForFilter` otherwise). + + Finally, for each column in the required columns, we add the stats for the columns into the group. + +- `StatsCalculator::Visit(const LogicalLimit *op)` this operation will first set the number of rows for the Limit's group to be the minimum of the limit and the number of rows of the child group. Then, for each of the required output columns, the stats are copied from the child group, and the number of rows determined above is set in the `ColumnStats` object as well, before being added to the current group. + +### Optimize Group + +After the determination of stats for the groups in our logical expression tree, we start to optimize the given groups, starting from the root. Optimizing a group attempts to choose a "best expression" for that group satisfying all required properties (i.e. a lowest cost group). In attempting to optimize a group, we first push tasks of the form `OptimizeExpression(logical_expr, context)` onto the task stack for all the logical expressions of a group, if the group has not previously been explored and can have its cost lowered . Then, for each physical expression in the group, we push an `OptimizeExpressionCostWithEnforcedProperty(physical_expr, context_)` task, which is executed in the task stack before optimizing logical expressions. Once done, this group will be fully explored as there are no cycles, and we will not revisit this group. + +Calls to `OptimizeExpression` proceed by attempting to apply rules from the `LOGICAL_TRANSFORMATION` and `PHYSICAL_IMPLEMENTATION` rulesets. As in previous section, rules are determined to be "valid" in `OptimizerTask::ConstructValidRules`, first checking whether the root pattern of the rule is satisfied for the group expression and the correct number of children exists (if the rule has not already been explored). For each valid rule `r`, a task of the form `ApplyRule(group_expr_, r.GetRule(), context_)` is pushed onto the task stack to be explored. Then, based on the rule's match pattern, if the current rule should have more non-leaf children, these child groups will be explored via an `ExploreGroup(group, context_)` with group the child group. + +For a call to `ApplyRule`, a binding iterator for the group expression is created with a call to the constructor for `GroupExprBindingIterator` (details on `GroupExprBindingIterator` are found in the Bottom Up Rewrite section). If a binding is found, the transformation updates the memo to track any new group expressions created. The newly created expressions are added both to `group_expressions_` in the memo, as well as the specific group the expression is added to in `groups_` (or a new group if no group is specified). Additionally, any new logical expressions push `DeriveStats`, `ExploreExpression`, and `OptimizeExpression` tasks for the newly constructed logical expression. New physical expressions only push an `OptimizeExpressionCostWithEnforcedProperty` task onto the stack. Upon completion of this process, the rule is marked as explored. + +- [ ] Add documentation to what `ExploreExpression` does + +Calls to `OptimizeExpressionCostWithEnforcedProperty` will first prune nodes that have excessive cost (currently not yet implemented) and determine a vector of output property, input property set pairs for the current expression. If the child group is already optimized, the best expression is determined, but if not, then we re-push the `OptimizeExpressionCostWithEnforcedProperty` onto the stack to return to later, update the upper bound of the cost exploration in a new optimization context, and optimize the child group with this new context by pushing an `OptimizeGroup` task for the child onto the stack, and return from the call. Otherwise, when we do determine a best expression for all children, we update the hash table for the group expression to map the property set to the determined input properties and lowest cost, and update the current group's expression cost similarly. + +- [ ] Add explanation on meeting `PropertyEnforcer` requirements + +We start by optimizing our example's root group by first optimizing the the `LogicalLimit` expression. In `OptimizeExpression` the only valid rule is the implementation rule `LogicalLimitToPhysicalLimit`, which creates a new physical operator for the limit node. As this rule expects only a leaf child, no further task is pushed for exploration. Following the rule transformation process described previously in this section, which is slightly different than the process for the rewrite rules, we generate a `PhysicalLimit` which tracks the contents of the original `LogicalLimit` and continue. + +In optimizing the `Limit` expression, we first determine the input and output properties of the node, and pass down sort properties to children. In particular, the `Limit` MUST satisfy any sort properties in the output from the node, but the child may also attempt to satisfy these properties ahead of time. We observe that at this point, the child group (`LogicalGet`) has not been optimized so push the `OptimizeGroup` for the child group onto the stack, repeat this process for this group (with different rules applied). + +We note that the query is amenable to the rules `LogicalGetToPhysicalTableFreeScan`, `LogicalGetToPhysicalSeqScan`, and `LogicalGetToPhysicalIndexScan`, which we push `ApplyRule` tasks onto the stack for. We only consider the process for the `LogicalGetToPhysicalIndexScan` as follows. We first `Check` that the specified rule is valid, in that we check that there is an index for the table specified in the `Catalog`. Then, in the `Transform` rule, we first determine whether predicates and sorts exist in the scan and determine the index scan type to pass into the `IndexScan`. We note that the property and predicate check pass in our case, so the limit is pushed down to the `IndexScan`. + +After this (and the other) rule constructions we return to the `OptimizeExpressionCostWithEnforcedProperty` task for the groups associated with the `Get` which now determines an optimal cost, that of the `IndexScan`. + +After the application completion of the above, we now return to the re-pushed `OptimizeExpressionCostWithEnforcedProperty` task for the group associated with the `Limit` and can now cost the group according to the observed cost of the child `Get` group (now determined to be an `IndexScan`. + +# 4. Choosing an Output Plan + +Upon completion of the optimization process, we choose the best plan by selecting the best expression for each group, starting from the root. We first derive all children plans with their own required properties, and then rebuild the overall plan from the recursive result. We note that each optimizer node is converted via the `PlanGenerator generator` into a `PlanNode`. In the case of this example, the `IndexScan` from the optimizer is converted to an `IndexScanPlanNode`, which becomes the child of the `OrderByPlanNode` generated from the `Limit` for our sort expressions. Finally, the `Limit` itself is converted to a `LimitPlanNode` with the `OrderByPlanNode` the new child of the `LimitPlanNode`. diff --git a/script/testing/junit/sql/index-select.sql b/script/testing/junit/sql/index-select.sql new file mode 100644 index 0000000000..1054d45290 --- /dev/null +++ b/script/testing/junit/sql/index-select.sql @@ -0,0 +1,50 @@ +CREATE TABLE foo (var1 INT, var2 INT, var3 INT); + +INSERT INTO foo VALUES (0,0,0), (0,0,1), (0,0,2), (0,1,0), (0,1,1), (0,1,2), (0,2,0), (0,2,1), (0,2,2), (1,0,0), (1,0,1), (1,0,2), (1,1,0), (1,1,1), (1,1,2), (1,2,0), (1,2,1), (1,2,2), (2,0,0), (2,0,1), (2,0,2), (2,1,0), (2,1,1), (2,1,2), (2,2,0), (2,2,1), (2,2,2); + + +CREATE INDEX ind ON foo (var1, var2, var3); + + +-- Predicates + Sort exist +-- At least one predicate satisfied and sort not satisfied +SELECT * FROM foo WHERE var1 >= 1 ORDER BY var3 LIMIT 17; + +DROP INDEX ind; +CREATE INDEX ind ON foo (var1, var3); + +-- No predicates satisfied +SELECT * FROM foo WHERE var2 >= 1 ORDER BY var2 LIMIT 17; + +-- At least one predicate satisfied and sort satisfied +-- Predicate must be checked +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var1, var3 LIMIT 11; +SELECT * FROM foo WHERE var1 >= 1 AND var2 >= 1 ORDER BY var1, var3 LIMIT 11; +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var2 LIMIT 11; +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; + + +-- Sort exists +-- Sort not satisfied +SELECT * FROM foo ORDER BY var2 LIMIT 26; +SELECT * FROM foo ORDER BY var3 LIMIT 26; + +-- Sort satisfied +SELECT * FROM foo ORDER BY var1, var3 LIMIT 26; + + +-- Predicates exist +-- Predicates not satisfied +SELECT * FROM foo WHERE var2 >= 1 LIMIT 17; + +-- Predicates satisfied +-- Predicate must be checked +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 LIMIT 11; + + +-- Neither predicates nor sort exist +SELECT * FROM foo LIMIT 26; + + +DROP INDEX ind; +DROP TABLE foo; diff --git a/script/testing/junit/traces/index-select.test b/script/testing/junit/traces/index-select.test new file mode 100644 index 0000000000..21bc61e0f1 --- /dev/null +++ b/script/testing/junit/traces/index-select.test @@ -0,0 +1,792 @@ +statement ok +CREATE TABLE foo (var1 INT, var2 INT, var3 INT); + +statement ok + + +statement ok +INSERT INTO foo VALUES (0,0,0), (0,0,1), (0,0,2), (0,1,0), (0,1,1), (0,1,2), (0,2,0), (0,2,1), (0,2,2), (1,0,0), (1,0,1), (1,0,2), (1,1,0), (1,1,1), (1,1,2), (1,2,0), (1,2,1), (1,2,2), (2,0,0), (2,0,1), (2,0,2), (2,1,0), (2,1,1), (2,1,2), (2,2,0), (2,2,1), (2,2,2); + +statement ok + + +statement ok + + +statement ok +CREATE INDEX ind ON foo (var1, var2, var3); + +statement ok + + +statement ok + + +statement ok +-- Predicates + Sort exist + +statement ok +-- At least one predicate satisfied and sort not satisfied + +query III nosort +SELECT * FROM foo WHERE var1 >= 1 ORDER BY var3 LIMIT 17; +---- +1 +0 +0 +1 +1 +0 +1 +2 +0 +2 +0 +0 +2 +1 +0 +2 +2 +0 +2 +2 +1 +1 +0 +1 +2 +0 +1 +2 +1 +1 +1 +1 +1 +1 +2 +1 +1 +1 +2 +1 +2 +2 +2 +2 +2 +2 +0 +2 +1 +0 +2 + +statement ok + + +statement ok +DROP INDEX ind; + +statement ok +CREATE INDEX ind ON foo (var1, var3); + +statement ok + + +statement ok +-- No predicates satisfied + +query III nosort +SELECT * FROM foo WHERE var2 >= 1 ORDER BY var2 LIMIT 17; +---- +0 +1 +2 +0 +1 +1 +1 +1 +0 +2 +1 +0 +2 +1 +1 +2 +1 +2 +1 +1 +1 +1 +1 +2 +0 +1 +0 +2 +2 +2 +0 +2 +0 +0 +2 +1 +0 +2 +2 +1 +2 +0 +1 +2 +1 +1 +2 +2 +2 +2 +0 + +statement ok + + +statement ok +-- At least one predicate satisfied and sort satisfied + +statement ok +-- Predicate must be checked + +query III nosort +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var1, var3 LIMIT 11; +---- +1 +0 +1 +1 +1 +1 +1 +2 +1 +1 +0 +2 +1 +1 +2 +1 +2 +2 +2 +0 +1 +2 +1 +1 +2 +2 +1 +2 +1 +2 +2 +0 +2 + +query III nosort +SELECT * FROM foo WHERE var1 >= 1 AND var2 >= 1 ORDER BY var1, var3 LIMIT 11; +---- +1 +1 +0 +1 +2 +0 +1 +1 +1 +1 +2 +1 +1 +1 +2 +1 +2 +2 +2 +1 +0 +2 +2 +0 +2 +2 +1 +2 +1 +1 +2 +1 +2 + +query III nosort +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var2 LIMIT 11; +---- +1 +0 +1 +1 +0 +2 +2 +0 +1 +2 +0 +2 +1 +1 +2 +2 +1 +1 +2 +1 +2 +1 +1 +1 +1 +2 +1 +1 +2 +2 +2 +2 +1 + +query III nosort +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; +---- +1 +0 +1 +1 +1 +1 +1 +2 +1 +2 +0 +1 +2 +1 +1 +2 +2 +1 +2 +1 +2 +1 +0 +2 +2 +0 +2 +1 +1 +2 +2 +2 +2 + +statement ok + + +statement ok + + +statement ok +-- Sort exists + +statement ok +-- Sort not satisfied + +query III nosort +SELECT * FROM foo ORDER BY var2 LIMIT 26; +---- +1 +0 +0 +1 +0 +1 +1 +0 +2 +0 +0 +2 +0 +0 +0 +0 +0 +1 +2 +0 +2 +2 +0 +1 +2 +0 +0 +1 +1 +1 +0 +1 +0 +0 +1 +1 +0 +1 +2 +1 +1 +0 +1 +1 +2 +2 +1 +0 +2 +1 +1 +2 +1 +2 +0 +2 +2 +0 +2 +1 +0 +2 +0 +2 +2 +0 +2 +2 +1 +2 +2 +2 +1 +2 +0 +1 +2 +1 + +query III nosort +SELECT * FROM foo ORDER BY var3 LIMIT 26; +---- +1 +0 +0 +2 +2 +0 +1 +2 +0 +1 +1 +0 +0 +0 +0 +2 +1 +0 +0 +2 +0 +0 +1 +0 +2 +0 +0 +1 +1 +1 +0 +0 +1 +0 +1 +1 +0 +2 +1 +1 +0 +1 +1 +2 +1 +2 +0 +1 +2 +1 +1 +2 +2 +1 +0 +2 +2 +0 +0 +2 +2 +0 +2 +0 +1 +2 +2 +2 +2 +1 +1 +2 +1 +0 +2 +2 +1 +2 + +statement ok + + +statement ok +-- Sort satisfied + +query III nosort +SELECT * FROM foo ORDER BY var1, var3 LIMIT 26; +---- +0 +1 +0 +0 +0 +0 +0 +2 +0 +0 +1 +1 +0 +0 +1 +0 +2 +1 +0 +0 +2 +0 +2 +2 +0 +1 +2 +1 +2 +0 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +0 +1 +1 +2 +1 +1 +0 +2 +1 +2 +2 +1 +1 +2 +2 +1 +0 +2 +0 +0 +2 +2 +0 +2 +2 +1 +2 +0 +1 +2 +1 +1 +2 +1 +2 +2 +2 +2 + +statement ok + + +statement ok + + +statement ok +-- Predicates exist + +statement ok +-- Predicates not satisfied + +query III rowsort +SELECT * FROM foo WHERE var2 >= 1 LIMIT 17; +---- +0 +1 +0 +0 +1 +1 +0 +1 +2 +0 +2 +0 +0 +2 +1 +0 +2 +2 +1 +1 +0 +1 +1 +1 +1 +1 +2 +1 +2 +0 +1 +2 +1 +1 +2 +2 +2 +1 +0 +2 +1 +1 +2 +1 +2 +2 +2 +0 +2 +2 +1 + +statement ok + + +statement ok +-- Predicates satisfied + +statement ok +-- Predicate must be checked + +query III rowsort +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 LIMIT 11; +---- +1 +0 +1 +1 +0 +2 +1 +1 +1 +1 +1 +2 +1 +2 +1 +1 +2 +2 +2 +0 +1 +2 +0 +2 +2 +1 +1 +2 +1 +2 +2 +2 +1 + +statement ok + + +statement ok + + +statement ok +-- Neither predicates nor sort exist + +query III rowsort +SELECT * FROM foo LIMIT 26; +---- +0 +0 +0 +0 +0 +1 +0 +0 +2 +0 +1 +0 +0 +1 +1 +0 +1 +2 +0 +2 +0 +0 +2 +1 +0 +2 +2 +1 +0 +0 +1 +0 +1 +1 +0 +2 +1 +1 +0 +1 +1 +1 +1 +1 +2 +1 +2 +0 +1 +2 +1 +1 +2 +2 +2 +0 +0 +2 +0 +1 +2 +0 +2 +2 +1 +0 +2 +1 +1 +2 +1 +2 +2 +2 +0 +2 +2 +1 + +statement ok + + +statement ok + + +statement ok +DROP INDEX ind; + +statement ok +DROP TABLE foo; + diff --git a/src/include/catalog/catalog_defs.h b/src/include/catalog/catalog_defs.h index 21b6c8cb60..d67ac07fb0 100644 --- a/src/include/catalog/catalog_defs.h +++ b/src/include/catalog/catalog_defs.h @@ -7,6 +7,11 @@ namespace noisepage::catalog { +/** + * Ordering types + */ +enum class OrderByOrderingType { ASC, DESC }; + constexpr uint32_t NULL_OID = 0; // error return value constexpr uint32_t START_OID = 1001; diff --git a/src/include/execution/compiler/operator/index_join_translator.h b/src/include/execution/compiler/operator/index_join_translator.h index 8ac6712cb4..439d650eb5 100644 --- a/src/include/execution/compiler/operator/index_join_translator.h +++ b/src/include/execution/compiler/operator/index_join_translator.h @@ -63,8 +63,7 @@ class IndexJoinTranslator : public OperatorTranslator, public PipelineDriver { void DeclareIterator(FunctionBuilder *builder) const; void SetOids(FunctionBuilder *builder) const; void FillKey(WorkContext *context, FunctionBuilder *builder, ast::Identifier pr, - const std::unordered_map &index_exprs, - const bool flag = false) const; + const std::unordered_map &index_exprs) const; void FreeIterator(FunctionBuilder *builder) const; void DeclareIndexPR(FunctionBuilder *builder) const; void DeclareTablePR(FunctionBuilder *builder) const; From 00001041f679aa46319e9826a1f3f50d5b9f2fb7 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 28 Dec 2020 21:44:47 -0500 Subject: [PATCH 06/20] OrderBy refactor --- .../compiler/operator/sort_translator.cpp | 2 +- src/include/optimizer/index_util.h | 2 +- src/include/optimizer/logical_operators.h | 6 +-- src/include/optimizer/optimizer_defs.h | 5 --- src/include/optimizer/physical_operators.h | 6 +-- src/include/optimizer/properties.h | 6 +-- .../planner/plannodes/order_by_plan_node.h | 4 +- src/optimizer/child_property_deriver.cpp | 4 +- src/optimizer/logical_operators.cpp | 2 +- src/optimizer/physical_operators.cpp | 2 +- src/optimizer/properties.cpp | 2 +- .../query_to_operator_transformer.cpp | 6 +-- src/optimizer/rules/implementation_rules.cpp | 6 +-- src/planner/plannodes/order_by_plan_node.cpp | 4 +- src/traffic_cop/traffic_cop_util.cpp | 6 +-- test/execution/compiler_test.cpp | 10 ++--- test/optimizer/operator_transformer_test.cpp | 2 +- test/optimizer/tpcc_plan_index_scan.cpp | 2 +- test/optimizer/tpcc_plan_seq_scan.cpp | 4 +- test/planner/plan_node_json_test.cpp | 4 +- test/test_util/ssb/star_schema_query.cpp | 44 +++++++++---------- test/test_util/tpcc/tpcc_plan_test.cpp | 6 +-- test/test_util/tpch/tpch_query.cpp | 28 ++++++------ 23 files changed, 77 insertions(+), 86 deletions(-) diff --git a/src/execution/compiler/operator/sort_translator.cpp b/src/execution/compiler/operator/sort_translator.cpp index 080fb6a53a..acb63b765d 100644 --- a/src/execution/compiler/operator/sort_translator.cpp +++ b/src/execution/compiler/operator/sort_translator.cpp @@ -87,7 +87,7 @@ void SortTranslator::GenerateComparisonFunction(FunctionBuilder *function) { context.SetExpressionCacheEnable(false); int32_t ret_value; for (const auto &[expr, sort_order] : GetPlanAs().GetSortKeys()) { - if (sort_order == optimizer::OrderByOrderingType::ASC) { + if (sort_order == catalog::OrderByOrderingType::ASC) { ret_value = -1; } else { ret_value = 1; diff --git a/src/include/optimizer/index_util.h b/src/include/optimizer/index_util.h index 7968ca583d..79c8872a87 100644 --- a/src/include/optimizer/index_util.h +++ b/src/include/optimizer/index_util.h @@ -40,7 +40,7 @@ class IndexUtil { auto sort_col_size = prop->GetSortColumnSize(); for (size_t idx = 0; idx < sort_col_size; idx++) { // TODO(wz2): Consider descending when catalog/index support - auto is_asc = prop->GetSortAscending(static_cast(idx)) == optimizer::OrderByOrderingType::ASC; + auto is_asc = prop->GetSortAscending(static_cast(idx)) == catalog::OrderByOrderingType::ASC; auto is_base = IsBaseColumn(prop->GetSortColumn(static_cast(idx))); if (!is_asc || !is_base) { return false; diff --git a/src/include/optimizer/logical_operators.h b/src/include/optimizer/logical_operators.h index b80faab2a6..d5301613bf 100644 --- a/src/include/optimizer/logical_operators.h +++ b/src/include/optimizer/logical_operators.h @@ -887,7 +887,7 @@ class LogicalLimit : public OperatorNodeContents { */ static Operator Make(size_t offset, size_t limit, std::vector> &&sort_exprs, - std::vector &&sort_directions); + std::vector &&sort_directions); /** * Copy @@ -918,7 +918,7 @@ class LogicalLimit : public OperatorNodeContents { /** * @return inlined sort directions (can be empty) */ - const std::vector &GetSortDirections() const { return sort_directions_; } + const std::vector &GetSortDirections() const { return sort_directions_; } private: /** @@ -942,7 +942,7 @@ class LogicalLimit : public OperatorNodeContents { /** * The sort direction of sort expressions */ - std::vector sort_directions_; + std::vector sort_directions_; }; /** diff --git a/src/include/optimizer/optimizer_defs.h b/src/include/optimizer/optimizer_defs.h index ff50f6fb25..1f30b10522 100644 --- a/src/include/optimizer/optimizer_defs.h +++ b/src/include/optimizer/optimizer_defs.h @@ -30,11 +30,6 @@ const group_id_t UNDEFINED_GROUP = group_id_t(-1); */ enum class ExternalFileFormat { CSV }; -/** - * OrderBy ordering - */ -enum class OrderByOrderingType { ASC, DESC }; - /** * Operator type */ diff --git a/src/include/optimizer/physical_operators.h b/src/include/optimizer/physical_operators.h index e024b70266..b0f3ffaf60 100644 --- a/src/include/optimizer/physical_operators.h +++ b/src/include/optimizer/physical_operators.h @@ -417,7 +417,7 @@ class Limit : public OperatorNodeContents { */ static Operator Make(size_t offset, size_t limit, std::vector> &&sort_columns, - std::vector &&sort_directions); + std::vector &&sort_directions); /** * Copy @@ -448,7 +448,7 @@ class Limit : public OperatorNodeContents { /** * @return sorting orders (if ascending) */ - const std::vector &GetSortAscending() const { return sort_directions_; } + const std::vector &GetSortAscending() const { return sort_directions_; } private: /** @@ -476,7 +476,7 @@ class Limit : public OperatorNodeContents { /** * Sorting order */ - std::vector sort_directions_; + std::vector sort_directions_; }; /** diff --git a/src/include/optimizer/properties.h b/src/include/optimizer/properties.h index f8326f2234..265053f993 100644 --- a/src/include/optimizer/properties.h +++ b/src/include/optimizer/properties.h @@ -21,7 +21,7 @@ class PropertySort : public Property { * @param sort_ascending Whether each sort_column is ascending or descending */ PropertySort(std::vector> sort_columns, - std::vector sort_ascending) + std::vector sort_ascending) : sort_columns_(std::move(sort_columns)), sort_ascending_(std::move(sort_ascending)) {} /** @@ -53,7 +53,7 @@ class PropertySort : public Property { * @param idx Index of ascending flag to retrieve * @returns Whether sort column at index idx is sorted in ascending order */ - OrderByOrderingType GetSortAscending(int idx) const { return sort_ascending_[idx]; } + catalog::OrderByOrderingType GetSortAscending(int idx) const { return sort_ascending_[idx]; } /** * Hashes this PropertySort @@ -86,7 +86,7 @@ class PropertySort : public Property { /** * Direction of the sort */ - std::vector sort_ascending_; + std::vector sort_ascending_; }; } // namespace noisepage::optimizer diff --git a/src/include/planner/plannodes/order_by_plan_node.h b/src/include/planner/plannodes/order_by_plan_node.h index 42ad3bf530..89a5f8acee 100644 --- a/src/include/planner/plannodes/order_by_plan_node.h +++ b/src/include/planner/plannodes/order_by_plan_node.h @@ -12,7 +12,7 @@ namespace noisepage::planner { -using SortKey = std::pair, optimizer::OrderByOrderingType>; +using SortKey = std::pair, catalog::OrderByOrderingType>; /** * Plan node for order by operator @@ -42,7 +42,7 @@ class OrderByPlanNode : public AbstractPlanNode { * @return builder object */ Builder &AddSortKey(common::ManagedPointer key, - optimizer::OrderByOrderingType ordering) { + catalog::OrderByOrderingType ordering) { sort_keys_.emplace_back(key, ordering); return *this; } diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp index f36f3fa61c..944cb7bb72 100644 --- a/src/optimizer/child_property_deriver.cpp +++ b/src/optimizer/child_property_deriver.cpp @@ -78,7 +78,7 @@ void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const HashGroupBy *op) { void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const SortGroupBy *op) { // Child must provide sort for Groupby columns - std::vector sort_ascending(op->GetColumns().size(), OrderByOrderingType::ASC); + std::vector sort_ascending(op->GetColumns().size(), catalog::OrderByOrderingType::ASC); auto sort_prop = new PropertySort(op->GetColumns(), std::move(sort_ascending)); auto prop_set = new PropertySet(std::vector{sort_prop}); @@ -95,7 +95,7 @@ void ChildPropertyDeriver::Visit(const Limit *op) { auto provided_prop = new PropertySet(); if (!op->GetSortExpressions().empty()) { const std::vector> &exprs = op->GetSortExpressions(); - const std::vector &sorts{op->GetSortAscending()}; + const std::vector &sorts{op->GetSortAscending()}; provided_prop->AddProperty(new PropertySort(exprs, sorts)); } diff --git a/src/optimizer/logical_operators.cpp b/src/optimizer/logical_operators.cpp index ee95466508..062bfe539b 100644 --- a/src/optimizer/logical_operators.cpp +++ b/src/optimizer/logical_operators.cpp @@ -288,7 +288,7 @@ BaseOperatorNodeContents *LogicalLimit::Copy() const { return new LogicalLimit(* Operator LogicalLimit::Make(size_t offset, size_t limit, std::vector> &&sort_exprs, - std::vector &&sort_directions) { + std::vector &&sort_directions) { NOISEPAGE_ASSERT(sort_exprs.size() == sort_directions.size(), "Mismatched ORDER BY expressions + directions"); auto *op = new LogicalLimit(); op->offset_ = offset; diff --git a/src/optimizer/physical_operators.cpp b/src/optimizer/physical_operators.cpp index a2269e4a39..0dfa3f08aa 100644 --- a/src/optimizer/physical_operators.cpp +++ b/src/optimizer/physical_operators.cpp @@ -246,7 +246,7 @@ BaseOperatorNodeContents *Limit::Copy() const { return new Limit(*this); } Operator Limit::Make(size_t offset, size_t limit, std::vector> &&sort_columns, - std::vector &&sort_directions) { + std::vector &&sort_directions) { auto *limit_op = new Limit(); limit_op->offset_ = offset; limit_op->limit_ = limit; diff --git a/src/optimizer/properties.cpp b/src/optimizer/properties.cpp index 755b2e6995..7af49488ad 100644 --- a/src/optimizer/properties.cpp +++ b/src/optimizer/properties.cpp @@ -53,7 +53,7 @@ common::hash_t PropertySort::Hash() const { for (size_t i = 0; i < num_sort_columns; ++i) { hash = common::HashUtil::CombineHashes(hash, sort_columns_[i]->Hash()); - auto asc_hash = common::HashUtil::Hash(sort_ascending_[i]); + auto asc_hash = common::HashUtil::Hash(sort_ascending_[i]); hash = common::HashUtil::CombineHashes(hash, asc_hash); } return hash; diff --git a/src/optimizer/query_to_operator_transformer.cpp b/src/optimizer/query_to_operator_transformer.cpp index 7e824c918a..47a4571a1e 100644 --- a/src/optimizer/query_to_operator_transformer.cpp +++ b/src/optimizer/query_to_operator_transformer.cpp @@ -126,7 +126,7 @@ void QueryToOperatorTransformer::Visit(common::ManagedPointerGetSelectLimit() != nullptr && op->GetSelectLimit()->GetLimit() != -1) { OPTIMIZER_LOG_DEBUG("Handling order by/limit/offset in SelectStatement ..."); std::vector> sort_exprs; - std::vector sort_direction; + std::vector sort_direction; if (op->GetSelectOrderBy() != nullptr) { const auto &order_info = op->GetSelectOrderBy(); @@ -135,9 +135,9 @@ void QueryToOperatorTransformer::Visit(common::ManagedPointerGetOrderByTypes()) { if (type == parser::kOrderAsc) - sort_direction.push_back(optimizer::OrderByOrderingType::ASC); + sort_direction.push_back(catalog::OrderByOrderingType::ASC); else - sort_direction.push_back(optimizer::OrderByOrderingType::DESC); + sort_direction.push_back(catalog::OrderByOrderingType::DESC); } } auto limit_expr = std::make_unique( diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index 9f97a6fda8..e16307e828 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -1,14 +1,11 @@ #include "optimizer/rules/implementation_rules.h" #include -#include #include -#include #include #include #include "catalog/catalog_accessor.h" -#include "loggers/optimizer_logger.h" #include "optimizer/group_expression.h" #include "optimizer/index_util.h" #include "optimizer/logical_operators.h" @@ -18,7 +15,6 @@ #include "optimizer/properties.h" #include "optimizer/util.h" #include "parser/expression_util.h" -#include "storage/storage_defs.h" namespace noisepage::optimizer { @@ -798,7 +794,7 @@ void LogicalLimitToPhysicalLimit::Transform(common::ManagedPointerGetChildren().size() == 1, "LogicalLimit should have 1 child"); std::vector> sorts = limit_op->GetSortExpressions(); - std::vector types = limit_op->GetSortDirections(); + std::vector types = limit_op->GetSortDirections(); std::vector> c; auto child = input->GetChildren()[0]->Copy(); c.emplace_back(std::move(child)); diff --git a/src/planner/plannodes/order_by_plan_node.cpp b/src/planner/plannodes/order_by_plan_node.cpp index 9cd38b8f80..22cfa9ccee 100644 --- a/src/planner/plannodes/order_by_plan_node.cpp +++ b/src/planner/plannodes/order_by_plan_node.cpp @@ -79,7 +79,7 @@ bool OrderByPlanNode::operator==(const AbstractPlanNode &rhs) const { nlohmann::json OrderByPlanNode::ToJson() const { nlohmann::json j = AbstractPlanNode::ToJson(); - std::vector> sort_keys; + std::vector> sort_keys; sort_keys.reserve(sort_keys_.size()); for (const auto &key : sort_keys_) { sort_keys.emplace_back(key.first->ToJson(), key.second); @@ -97,7 +97,7 @@ std::vector> OrderByPlanNode::FromJs exprs.insert(exprs.end(), std::make_move_iterator(e1.begin()), std::make_move_iterator(e1.end())); // Deserialize sort keys - auto sort_keys = j.at("sort_keys").get>>(); + auto sort_keys = j.at("sort_keys").get>>(); for (const auto &key_json : sort_keys) { auto deserialized = parser::DeserializeExpression(key_json.first); sort_keys_.emplace_back(common::ManagedPointer(deserialized.result_), key_json.second); diff --git a/src/traffic_cop/traffic_cop_util.cpp b/src/traffic_cop/traffic_cop_util.cpp index 0395c47166..dab7beb220 100644 --- a/src/traffic_cop/traffic_cop_util.cpp +++ b/src/traffic_cop/traffic_cop_util.cpp @@ -46,15 +46,15 @@ std::unique_ptr TrafficCopUtil::Optimize( // PropertySort if (sel_stmt->GetSelectOrderBy()) { - std::vector sort_dirs; + std::vector sort_dirs; std::vector> sort_exprs; auto order_by = sel_stmt->GetSelectOrderBy(); auto types = order_by->GetOrderByTypes(); auto exprs = order_by->GetOrderByExpressions(); for (size_t idx = 0; idx < order_by->GetOrderByExpressionsSize(); idx++) { sort_exprs.emplace_back(exprs[idx]); - sort_dirs.push_back(types[idx] == parser::OrderType::kOrderAsc ? optimizer::OrderByOrderingType::ASC - : optimizer::OrderByOrderingType::DESC); + sort_dirs.push_back(types[idx] == parser::OrderType::kOrderAsc ? catalog::OrderByOrderingType::ASC + : catalog::OrderByOrderingType::DESC); } auto sort_prop = new optimizer::PropertySort(sort_exprs, sort_dirs); diff --git a/test/execution/compiler_test.cpp b/test/execution/compiler_test.cpp index 7d998f2c02..93bd984012 100644 --- a/test/execution/compiler_test.cpp +++ b/test/execution/compiler_test.cpp @@ -1774,9 +1774,9 @@ TEST_F(CompilerTest, SimpleSortTest) { order_by_out.AddOutput("sum", sum); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{col2, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause1{col2, catalog::OrderByOrderingType::ASC}; auto diff = expr_maker.OpMin(col1, col2); - planner::SortKey clause2{diff, optimizer::OrderByOrderingType::DESC}; + planner::SortKey clause2{diff, catalog::OrderByOrderingType::DESC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -1891,9 +1891,9 @@ TEST_F(CompilerTest, SortWithLimitTest) { order_by_out.AddOutput("sum", sum); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{col2, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause1{col2, catalog::OrderByOrderingType::ASC}; auto diff = expr_maker.OpMin(col1, col2); - planner::SortKey clause2{diff, optimizer::OrderByOrderingType::DESC}; + planner::SortKey clause2{diff, catalog::OrderByOrderingType::DESC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -1986,7 +1986,7 @@ TEST_F(CompilerTest, SortWithLimitAndOffsetTest) { order_by_out.AddOutput("col1", col1); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{col1, optimizer::OrderByOrderingType::DESC}; + planner::SortKey clause1{col1, catalog::OrderByOrderingType::DESC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) diff --git a/test/optimizer/operator_transformer_test.cpp b/test/optimizer/operator_transformer_test.cpp index 0487bfc9aa..4a57c6720f 100644 --- a/test/optimizer/operator_transformer_test.cpp +++ b/test/optimizer/operator_transformer_test.cpp @@ -413,7 +413,7 @@ TEST_F(OperatorTransformerTest, SelectStatementOrderByTest) { auto logical_limit = operator_tree_->Contents()->GetContentsAs(); EXPECT_EQ(2, logical_limit->GetLimit()); EXPECT_EQ(1, logical_limit->GetOffset()); - EXPECT_EQ(optimizer::OrderByOrderingType::ASC, logical_limit->GetSortDirections()[0]); + EXPECT_EQ(catalog::OrderByOrderingType::ASC, logical_limit->GetSortDirections()[0]); EXPECT_EQ( "b2", logical_limit->GetSortExpressions()[0].CastManagedPointerTo()->GetColumnName()); diff --git a/test/optimizer/tpcc_plan_index_scan.cpp b/test/optimizer/tpcc_plan_index_scan.cpp index 8ed746a98e..cef139618c 100644 --- a/test/optimizer/tpcc_plan_index_scan.cpp +++ b/test/optimizer/tpcc_plan_index_scan.cpp @@ -186,7 +186,7 @@ TEST_F(TpccPlanIndexScanTests, IndexFulfillSortAndPredicateWithLimitOffset) { EXPECT_EQ(orderby->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); EXPECT_EQ(orderby->GetOffset(), sel_stmt->GetSelectLimit()->GetOffset()); EXPECT_EQ(orderby->GetSortKeys().size(), 1); - EXPECT_EQ(orderby->GetSortKeys()[0].second, optimizer::OrderByOrderingType::ASC); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::ASC); auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); EXPECT_TRUE(sortkey != nullptr); EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); diff --git a/test/optimizer/tpcc_plan_seq_scan.cpp b/test/optimizer/tpcc_plan_seq_scan.cpp index ce5f2ca005..a33ef3c8a0 100644 --- a/test/optimizer/tpcc_plan_seq_scan.cpp +++ b/test/optimizer/tpcc_plan_seq_scan.cpp @@ -90,7 +90,7 @@ TEST_F(TpccPlanSeqScanTests, SimpleSeqScanSelectWithPredicateOrderBy) { auto orderby = reinterpret_cast(planc); EXPECT_EQ(orderby->HasLimit(), false); EXPECT_EQ(orderby->GetSortKeys().size(), 1); - EXPECT_EQ(orderby->GetSortKeys()[0].second, optimizer::OrderByOrderingType::DESC); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::DESC); auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); EXPECT_TRUE(sortkey != nullptr); EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); @@ -195,7 +195,7 @@ TEST_F(TpccPlanSeqScanTests, SimpleSeqScanSelectWithPredicateOrderByLimit) { EXPECT_EQ(orderby->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); EXPECT_EQ(orderby->GetOffset(), sel_stmt->GetSelectLimit()->GetOffset()); EXPECT_EQ(orderby->GetSortKeys().size(), 1); - EXPECT_EQ(orderby->GetSortKeys()[0].second, optimizer::OrderByOrderingType::DESC); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::DESC); auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); EXPECT_TRUE(sortkey != nullptr); EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); diff --git a/test/planner/plan_node_json_test.cpp b/test/planner/plan_node_json_test.cpp index ba8e8826b7..49e0c9055d 100644 --- a/test/planner/plan_node_json_test.cpp +++ b/test/planner/plan_node_json_test.cpp @@ -771,8 +771,8 @@ TEST(PlanNodeJsonTest, OrderByPlanNodeJsonTest) { parser::AbstractExpression *sortkey2 = new parser::DerivedValueExpression(type::TypeId::INTEGER, 0, 1); auto plan_node = builder.SetOutputSchema(PlanNodeJsonTest::BuildDummyOutputSchema()) - .AddSortKey(common::ManagedPointer(sortkey1), optimizer::OrderByOrderingType::ASC) - .AddSortKey(common::ManagedPointer(sortkey2), optimizer::OrderByOrderingType::DESC) + .AddSortKey(common::ManagedPointer(sortkey1), catalog::OrderByOrderingType::ASC) + .AddSortKey(common::ManagedPointer(sortkey2), catalog::OrderByOrderingType::DESC) .SetLimit(10) .SetOffset(10) .Build(); diff --git a/test/test_util/ssb/star_schema_query.cpp b/test/test_util/ssb/star_schema_query.cpp index 4cb5b8497b..3b03915aa1 100644 --- a/test/test_util/ssb/star_schema_query.cpp +++ b/test/test_util/ssb/star_schema_query.cpp @@ -591,8 +591,8 @@ SSBQuery::SSBMakeExecutableQ2Part1(const std::unique_ptr TpccPlanTest::Optimize(const std::str // Property Sort auto property_set = new optimizer::PropertySet(); - std::vector sort_dirs; + std::vector sort_dirs; std::vector> sort_exprs; if (sel_stmt->GetSelectOrderBy()) { auto order_by = sel_stmt->GetSelectOrderBy(); @@ -125,8 +125,8 @@ std::unique_ptr TpccPlanTest::Optimize(const std::str auto exprs = order_by->GetOrderByExpressions(); for (size_t idx = 0; idx < order_by->GetOrderByExpressionsSize(); idx++) { sort_exprs.emplace_back(exprs[idx]); - sort_dirs.push_back(types[idx] == parser::OrderType::kOrderAsc ? optimizer::OrderByOrderingType::ASC - : optimizer::OrderByOrderingType::DESC); + sort_dirs.push_back(types[idx] == parser::OrderType::kOrderAsc ? catalog::OrderByOrderingType::ASC + : catalog::OrderByOrderingType::DESC); } auto sort_prop = new optimizer::PropertySort(sort_exprs, sort_dirs); diff --git a/test/test_util/tpch/tpch_query.cpp b/test/test_util/tpch/tpch_query.cpp index 78cb4d251b..34e8a4a8e8 100644 --- a/test/test_util/tpch/tpch_query.cpp +++ b/test/test_util/tpch/tpch_query.cpp @@ -148,8 +148,8 @@ TPCHQuery::MakeExecutableQ1(const std::unique_ptr &acc order_by_out.AddOutput("count_order", count_order); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{l_returnflag, optimizer::OrderByOrderingType::ASC}; - planner::SortKey clause2{l_linestatus, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause1{l_returnflag, catalog::OrderByOrderingType::ASC}; + planner::SortKey clause2{l_linestatus, catalog::OrderByOrderingType::ASC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -286,7 +286,7 @@ TPCHQuery::MakeExecutableQ4(const std::unique_ptr &acc order_by_out.AddOutput("order_count", order_count); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause{o_orderpriority, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause{o_orderpriority, catalog::OrderByOrderingType::ASC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -658,7 +658,7 @@ TPCHQuery::MakeExecutableQ5(const std::unique_ptr &acc order_by_out.AddOutput("revenue", revenue); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause{revenue, optimizer::OrderByOrderingType::DESC}; + planner::SortKey clause{revenue, catalog::OrderByOrderingType::DESC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -1154,9 +1154,9 @@ TPCHQuery::MakeExecutableQ7(const std::unique_ptr &acc order_by_out.AddOutput("volume", volume); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{supp_nation, optimizer::OrderByOrderingType::ASC}; - planner::SortKey clause2{cust_nation, optimizer::OrderByOrderingType::ASC}; - planner::SortKey clause3{l_year, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause1{supp_nation, catalog::OrderByOrderingType::ASC}; + planner::SortKey clause2{cust_nation, catalog::OrderByOrderingType::ASC}; + planner::SortKey clause3{l_year, catalog::OrderByOrderingType::ASC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -1525,7 +1525,7 @@ TPCHQuery::MakeExecutableQ11(const std::unique_ptr &ac order_by_out.AddOutput("value_sum", value_sum); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{value_sum, optimizer::OrderByOrderingType::DESC}; + planner::SortKey clause1{value_sum, catalog::OrderByOrderingType::DESC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -1758,10 +1758,10 @@ TPCHQuery::MakeExecutableQ16(const std::unique_ptr &ac order_by_out.AddOutput("supplier_cnt", supplier_cnt); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{supplier_cnt, optimizer::OrderByOrderingType::DESC}; - planner::SortKey clause2{p_brand, optimizer::OrderByOrderingType::ASC}; - planner::SortKey clause3{p_type, optimizer::OrderByOrderingType::ASC}; - planner::SortKey clause4{p_size, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause1{supplier_cnt, catalog::OrderByOrderingType::DESC}; + planner::SortKey clause2{p_brand, catalog::OrderByOrderingType::ASC}; + planner::SortKey clause3{p_type, catalog::OrderByOrderingType::ASC}; + planner::SortKey clause4{p_size, catalog::OrderByOrderingType::ASC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) @@ -2070,8 +2070,8 @@ TPCHQuery::MakeExecutableQ18(const std::unique_ptr &ac order_by_out.AddOutput("sum_qty", sum_qty); auto schema = order_by_out.MakeSchema(); // Order By Clause - planner::SortKey clause1{o_totalprice, optimizer::OrderByOrderingType::DESC}; - planner::SortKey clause2{o_orderdate, optimizer::OrderByOrderingType::ASC}; + planner::SortKey clause1{o_totalprice, catalog::OrderByOrderingType::DESC}; + planner::SortKey clause2{o_orderdate, catalog::OrderByOrderingType::ASC}; // Build planner::OrderByPlanNode::Builder builder; order_by = builder.SetOutputSchema(std::move(schema)) From 1c22936320d8b27982d1141ec4af3f8cda1dee2a Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 28 Dec 2020 22:02:45 -0500 Subject: [PATCH 07/20] Improved index sorting check, check-lint compelted --- .../operator/index_join_translator.cpp | 1 - src/include/optimizer/index_util.h | 36 ++++++---- src/include/optimizer/physical_operators.h | 2 +- src/optimizer/index_util.cpp | 66 +++++++++++-------- src/optimizer/plan_generator.cpp | 12 ++-- src/optimizer/rules/implementation_rules.cpp | 5 +- src/optimizer/rules/transformation_rules.cpp | 6 +- test/optimizer/logical_operator_test.cpp | 2 +- test/optimizer/physical_operator_test.cpp | 2 +- 9 files changed, 79 insertions(+), 53 deletions(-) diff --git a/src/execution/compiler/operator/index_join_translator.cpp b/src/execution/compiler/operator/index_join_translator.cpp index 07f520994d..b0bbaed965 100644 --- a/src/execution/compiler/operator/index_join_translator.cpp +++ b/src/execution/compiler/operator/index_join_translator.cpp @@ -67,7 +67,6 @@ void IndexJoinTranslator::PerformPipelineWork(WorkContext *context, FunctionBuil DeclareIndexPR(function); // @prSet(lo_index_pr, ...) FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns()); - //FillKey(context, function, lo_index_pr_, op.GetLoIndexColumns()); // @prSet(hi_index_pr, ...) FillKey(context, function, hi_index_pr_, op.GetHiIndexColumns()); diff --git a/src/include/optimizer/index_util.h b/src/include/optimizer/index_util.h index 79c8872a87..07eff1baec 100644 --- a/src/include/optimizer/index_util.h +++ b/src/include/optimizer/index_util.h @@ -53,19 +53,29 @@ class IndexUtil { /** * Checks whether a given index can be used to satisfy a property. * For an index to fulfill the sort property, the columns sorted - * on must be in the same order and in the same direction. + * on must be in the same order and in the same direction as the index columns. + * However, if an intermediate column is in a bound, then the index may be used. * * @param accessor CatalogAccessor * @param prop PropertySort to satisfy * @param tbl_oid OID of the table that the index is built on * @param idx_oid OID of index to use to satisfy + * @param bounds bounds for IndexScan * @returns TRUE if the specified index can fulfill sort property + * TODO(dpatra): We should combine this function and the following function + * to check both sorts and predicates at once. */ - static bool SatisfiesSortWithIndex(catalog::CatalogAccessor *accessor, const PropertySort *prop, - catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid); + static bool SatisfiesSortWithIndex( + catalog::CatalogAccessor *accessor, const PropertySort *prop, + catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, + std::unordered_map> *bounds = nullptr); /** - * Checks whether a set of predicates can be satisfied with an index + * Checks whether a set of predicates can be satisfied with an index. + * For an index to satisfy a subset of the predicates, the predicates must be in + * the same order as the index columns and start from the first column in + * the index schema. + * * @param accessor CatalogAccessor * @param tbl_oid OID of the table * @param tbl_alias Name of the table @@ -75,6 +85,8 @@ class IndexUtil { * @param scan_type IndexScanType to utilize * @param bounds Relevant bounds for the index scan * @returns Whether index can be used + * TODO(dpatra): We should combine this function and the previous function + * to check both sorts and predicates at once. */ static bool SatisfiesPredicateWithIndex( catalog::CatalogAccessor *accessor, catalog::table_oid_t tbl_oid, const std::string &tbl_alias, @@ -85,7 +97,7 @@ class IndexUtil { private: /** * Check whether predicate can take part in index computation - * @param schema Index Schema + * @param index_schema Index Schema * @param tbl_oid Table OID * @param tbl_alias Table name * @param lookup map from col_oid_t to indexkeycol_oid_t @@ -97,7 +109,7 @@ class IndexUtil { * @returns Whether predicate can be utilized */ static bool CheckPredicates( - const catalog::IndexSchema &schema, catalog::table_oid_t tbl_oid, const std::string &tbl_alias, + const catalog::IndexSchema &index_schema, catalog::table_oid_t tbl_oid, const std::string &tbl_alias, const std::unordered_map &lookup, const std::unordered_set &mapped_cols, const std::vector &predicates, bool allow_cves, planner::IndexScanType *idx_scan_type, @@ -105,16 +117,16 @@ class IndexUtil { /** * Retrieves the catalog::col_oid_t equivalent for the index - * @requires SatisfiesBaseColumnRequirement(schema) + * @requires SatisfiesBaseColumnRequirement(index_schema) * @param accessor CatalogAccessor to use * @param tbl_oid Table the index belongs to - * @param schema Schema + * @param index_schema Schema * @param key_map Mapping from col_oid_t to indexkeycol_oid_t * @param col_oids Vector to place col_oid_t translations * @returns TRUE if conversion successful */ static bool ConvertIndexKeyOidToColOid(catalog::CatalogAccessor *accessor, catalog::table_oid_t tbl_oid, - const catalog::IndexSchema &schema, + const catalog::IndexSchema &index_schema, std::unordered_map *key_map, std::vector *col_oids); @@ -122,11 +134,11 @@ class IndexUtil { * Checks whether a Index satisfies the "base column" requirement. * The base column requirement (as defined from Peloton) is where * the index is built only on base table columns. - * @param schema IndexSchema to evaluate + * @param index_schema IndexSchema to evaluate * @returns TRUE if the "base column" requirement is met */ - static bool SatisfiesBaseColumnRequirement(const catalog::IndexSchema &schema) { - for (auto &column : schema.GetColumns()) { + static bool SatisfiesBaseColumnRequirement(const catalog::IndexSchema &index_schema) { + for (auto &column : index_schema.GetColumns()) { auto recast = const_cast(column.StoredExpression().Get()); if (!IsBaseColumn(common::ManagedPointer(recast))) { return false; diff --git a/src/include/optimizer/physical_operators.h b/src/include/optimizer/physical_operators.h index b0f3ffaf60..4cc20e402f 100644 --- a/src/include/optimizer/physical_operators.h +++ b/src/include/optimizer/physical_operators.h @@ -496,7 +496,7 @@ class InnerIndexJoin : public OperatorNodeContents { */ static Operator Make(catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, planner::IndexScanType scan_type, std::unordered_map> join_keys, - std::vector join_predicates,bool limit_exists = false, uint32_t limit = 0); + std::vector join_predicates, bool limit_exists = false, uint32_t limit = 0); /** * Copy diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index c1aea23fbb..7e83c88c8e 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -13,8 +13,10 @@ namespace noisepage::optimizer { -bool IndexUtil::SatisfiesSortWithIndex(catalog::CatalogAccessor *accessor, const PropertySort *prop, - catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid) { +bool IndexUtil::SatisfiesSortWithIndex( + catalog::CatalogAccessor *accessor, const PropertySort *prop, + catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, + std::unordered_map> *bounds) { auto &index_schema = accessor->GetIndexSchema(idx_oid); if (!SatisfiesBaseColumnRequirement(index_schema)) { return false; @@ -34,17 +36,23 @@ bool IndexUtil::SatisfiesSortWithIndex(catalog::CatalogAccessor *accessor, const return false; } - for (size_t idx = 0; idx < sort_col_size; idx++) { - // Compare col_oid_t directly due to "Base Column" requirement - auto tv_expr = prop->GetSortColumn(idx).CastManagedPointerTo(); - - // Sort(a,b,c) cannot be fulfilled by Index(a,c,b) - auto col_match = tv_expr->GetColumnOid() == mapped_cols[idx]; - - // TODO(wz2): need catalog flag for column sort direction - // Sort(a ASC) cannot be fulfilled by Index(a DESC) - auto dir_match = true; - if (!col_match || !dir_match) { + // Index into sort property columns + size_t sort_ind = 0; + // Index into index column + size_t idx_ind = 0; + while (sort_ind < sort_col_size && idx_ind < mapped_cols.size()) { + auto tv_expr = prop->GetSortColumn(sort_ind).CastManagedPointerTo(); + auto tv_col_oid = tv_expr->GetColumnOid(); + + // Sort c,b can only be fulfilled on Index a,c,b if a is a bound + if (tv_col_oid == mapped_cols[idx_ind]) { + // Column is present in both sort and index so increment both + sort_ind++, idx_ind++; + } else if (bounds != nullptr && bounds->find(lookup[mapped_cols[idx_ind]]) != bounds->end()) { + // If column is found in bounds but not index, continue + idx_ind++; + } else { + // Column not found in index so cannot use this index return false; } } @@ -78,7 +86,7 @@ bool IndexUtil::SatisfiesPredicateWithIndex( } bool IndexUtil::CheckPredicates( - const catalog::IndexSchema &schema, catalog::table_oid_t tbl_oid, const std::string &tbl_alias, + const catalog::IndexSchema &index_schema, catalog::table_oid_t tbl_oid, const std::string &tbl_alias, const std::unordered_map &lookup, const std::unordered_set &mapped_cols, const std::vector &predicates, bool allow_cves, planner::IndexScanType *idx_scan_type, @@ -177,13 +185,13 @@ bool IndexUtil::CheckPredicates( // Check predicate open/close ordering planner::IndexScanType scan_type = planner::IndexScanType::Dummy; - if (open_highs.size() == open_lows.size() && open_highs.size() == schema.GetColumns().size()) { + if (open_highs.size() == open_lows.size() && open_highs.size() == index_schema.GetColumns().size()) { // Generally on multi-column indexes, exact would result in comparing against unspecified attribute. // Only try to do an exact key lookup if potentially all attributes are specified. scan_type = planner::IndexScanType::Exact; } - for (auto &col : schema.GetColumns()) { + for (auto &col : index_schema.GetColumns()) { auto oid = col.Oid(); if (open_highs.find(oid) == open_highs.end() && open_lows.find(oid) == open_lows.end()) { // Index predicate ordering is busted @@ -205,14 +213,14 @@ bool IndexUtil::CheckPredicates( scan_type = planner::IndexScanType::AscendingOpenHigh; } } else if (open_highs.find(oid) != open_highs.end()) { - if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::Dummy) + if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::Dummy) { scan_type = planner::IndexScanType::AscendingOpenHighLimit; - else if (scan_type == planner::IndexScanType::AscendingClosed || + } else if (scan_type == planner::IndexScanType::AscendingClosed || scan_type == planner::IndexScanType::AscendingClosedLimit || scan_type == planner::IndexScanType::AscendingOpenHigh || - scan_type == planner::IndexScanType::AscendingOpenHighLimit) + scan_type == planner::IndexScanType::AscendingOpenHighLimit) { scan_type = planner::IndexScanType::AscendingOpenHigh; - else { + } else { // OpenHigh scan is not compatible with an OpenLow scan // Revert to a sequential scan break; @@ -236,7 +244,7 @@ bool IndexUtil::CheckPredicates( } } - if (schema.Type() == storage::index::IndexType::HASHMAP && scan_type != planner::IndexScanType::Exact) { + if (index_schema.Type() == storage::index::IndexType::HASHMAP && scan_type != planner::IndexScanType::Exact) { // This is a range-based scan, but this is a hashmap so it cannot satisfy the predicate. // // TODO(John): Ideally this check should be based off of lookups in the catalog. However, we do not @@ -256,21 +264,21 @@ bool IndexUtil::CheckPredicates( } bool IndexUtil::ConvertIndexKeyOidToColOid(catalog::CatalogAccessor *accessor, catalog::table_oid_t tbl_oid, - const catalog::IndexSchema &schema, + const catalog::IndexSchema &index_schema, std::unordered_map *key_map, std::vector *col_oids) { - NOISEPAGE_ASSERT(SatisfiesBaseColumnRequirement(schema), "GetIndexColOid() pre-cond not satisfied"); + NOISEPAGE_ASSERT(SatisfiesBaseColumnRequirement(index_schema), "GetIndexColOid() pre-cond not satisfied"); auto &tbl_schema = accessor->GetSchema(tbl_oid); - if (tbl_schema.GetColumns().size() < schema.GetColumns().size()) { + if (tbl_schema.GetColumns().size() < index_schema.GetColumns().size()) { return false; } - std::unordered_map schema_col; + std::unordered_map tbl_col_to_oid_map; for (auto &column : tbl_schema.GetColumns()) { - schema_col[column.Name()] = column.Oid(); + tbl_col_to_oid_map[column.Name()] = column.Oid(); } - for (auto &column : schema.GetColumns()) { + for (auto &column : index_schema.GetColumns()) { if (column.StoredExpression()->GetExpressionType() == parser::ExpressionType::COLUMN_VALUE) { auto tv_expr = column.StoredExpression().CastManagedPointerTo(); if (tv_expr->GetColumnOid() != catalog::INVALID_COLUMN_OID) { @@ -280,8 +288,8 @@ bool IndexUtil::ConvertIndexKeyOidToColOid(catalog::CatalogAccessor *accessor, c continue; } - auto it = schema_col.find(tv_expr->GetColumnName()); - NOISEPAGE_ASSERT(it != schema_col.end(), "Inconsistency between IndexSchema and table schema"); + auto it = tbl_col_to_oid_map.find(tv_expr->GetColumnName()); + NOISEPAGE_ASSERT(it != tbl_col_to_oid_map.end(), "Inconsistency between IndexSchema and table schema"); col_oids->push_back(it->second); key_map->insert(std::make_pair(it->second, column.Oid())); } diff --git a/src/optimizer/plan_generator.cpp b/src/optimizer/plan_generator.cpp index e9fbedbe21..78643172dd 100644 --- a/src/optimizer/plan_generator.cpp +++ b/src/optimizer/plan_generator.cpp @@ -243,11 +243,13 @@ void PlanGenerator::Visit(const IndexScan *op) { if (type == planner::IndexScanType::Exact) { // Exact lookup builder.AddIndexColumn(bound.first, bound.second[0]); - } else if (type == planner::IndexScanType::AscendingClosed || type == planner::IndexScanType::AscendingClosedLimit) { + } else if (type == planner::IndexScanType::AscendingClosed || + type == planner::IndexScanType::AscendingClosedLimit) { // Range lookup, so use lo and hi builder.AddLoIndexColumn(bound.first, bound.second[0]); builder.AddHiIndexColumn(bound.first, bound.second[1]); - } else if (type == planner::IndexScanType::AscendingOpenHigh || type == planner::IndexScanType::AscendingOpenHighLimit) { + } else if (type == planner::IndexScanType::AscendingOpenHigh || + type == planner::IndexScanType::AscendingOpenHighLimit) { // Open high scan, so use only lo builder.AddLoIndexColumn(bound.first, bound.second[0]); } else if (type == planner::IndexScanType::AscendingOpenLow) { @@ -547,7 +549,8 @@ void PlanGenerator::Visit(const InnerIndexJoin *op) { builder.AddLoIndexColumn(bound.first, common::ManagedPointer(key)); builder.AddHiIndexColumn(bound.first, common::ManagedPointer(key)); - } else if (type == planner::IndexScanType::AscendingClosed || type == planner::IndexScanType::AscendingClosedLimit) { + } else if (type == planner::IndexScanType::AscendingClosed || + type == planner::IndexScanType::AscendingClosedLimit) { // Range lookup, so use lo and hi auto lkey = parser::ExpressionUtil::EvaluateExpression(children_expr_map_, bound.second[0]).release(); auto hkey = parser::ExpressionUtil::EvaluateExpression(children_expr_map_, bound.second[1]).release(); @@ -556,7 +559,8 @@ void PlanGenerator::Visit(const InnerIndexJoin *op) { builder.AddLoIndexColumn(bound.first, common::ManagedPointer(lkey)); builder.AddHiIndexColumn(bound.first, common::ManagedPointer(hkey)); - } else if (type == planner::IndexScanType::AscendingOpenHigh || type == planner::IndexScanType::AscendingOpenHighLimit) { + } else if (type == planner::IndexScanType::AscendingOpenHigh || + type == planner::IndexScanType::AscendingOpenHighLimit) { // Open high scan, so use only lo auto key = parser::ExpressionUtil::EvaluateExpression(children_expr_map_, bound.second[0]).release(); RegisterPointerCleanup(key, true, true); diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index e16307e828..40df849dc8 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -526,8 +526,9 @@ void LogicalInnerJoinToPhysicalInnerIndexJoin::Transform( auto child_get = LogicalGet::Make(r_child->GetDatabaseOid(), r_child->GetTableOid(), join_preds, r_child->GetTableAlias(), r_child->GetIsForUpdate()); if (limit_exists) child_get.GetContentsAs()->SetLimit(limit); - auto new_child = std::make_unique(child_get.RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), - std::move(empty), context->GetOptimizerContext()->GetTxn()); + auto new_child = std::make_unique( + child_get.RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), + std::move(empty), context->GetOptimizerContext()->GetTxn()); std::vector> transform; LogicalGetToPhysicalIndexScan idx_scan_transform; diff --git a/src/optimizer/rules/transformation_rules.cpp b/src/optimizer/rules/transformation_rules.cpp index 31b31c1059..b5dcb5ceb1 100644 --- a/src/optimizer/rules/transformation_rules.cpp +++ b/src/optimizer/rules/transformation_rules.cpp @@ -202,13 +202,15 @@ SetLimitInLogicalInnerJoin::SetLimitInLogicalInnerJoin() { match_pattern_->AddChild(child); } -bool SetLimitInLogicalInnerJoin::Check(common::ManagedPointer plan, OptimizationContext *context) const { +bool SetLimitInLogicalInnerJoin::Check(common::ManagedPointer plan, + OptimizationContext *context) const { (void)context; (void)plan; return true; } -RulePromise SetLimitInLogicalInnerJoin::Promise(GroupExpression *group_expr) const { return RulePromise::LOGICAL_PROMISE; } +RulePromise SetLimitInLogicalInnerJoin::Promise(GroupExpression *group_expr) const + { return RulePromise::LOGICAL_PROMISE; } void SetLimitInLogicalInnerJoin::Transform(common::ManagedPointer input, std::vector> *transformed, diff --git a/test/optimizer/logical_operator_test.cpp b/test/optimizer/logical_operator_test.cpp index 7b97e74062..679f87fe6a 100644 --- a/test/optimizer/logical_operator_test.cpp +++ b/test/optimizer/logical_operator_test.cpp @@ -144,7 +144,7 @@ TEST(OperatorTests, LogicalLimitTest) { parser::AbstractExpression *sort_expr_ori = new parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)); auto sort_expr = common::ManagedPointer(sort_expr_ori); - OrderByOrderingType sort_dir = OrderByOrderingType::ASC; + catalog::OrderByOrderingType sort_dir = catalog::OrderByOrderingType::ASC; // Check that all of our GET methods work as expected Operator op1 = LogicalLimit::Make(offset, limit, {sort_expr}, {sort_dir}).RegisterWithTxnContext(txn_context); diff --git a/test/optimizer/physical_operator_test.cpp b/test/optimizer/physical_operator_test.cpp index 49fe1956d4..19b0b96783 100644 --- a/test/optimizer/physical_operator_test.cpp +++ b/test/optimizer/physical_operator_test.cpp @@ -421,7 +421,7 @@ TEST(OperatorTests, LimitTest) { size_t limit = 22; auto sort_expr_ori = new parser::ConstantValueExpression(type::TypeId::TINYINT, execution::sql::Integer(1)); auto sort_expr = common::ManagedPointer(sort_expr_ori); - OrderByOrderingType sort_dir = OrderByOrderingType::ASC; + catalog::OrderByOrderingType sort_dir = catalog::OrderByOrderingType::ASC; // Check that all of our GET methods work as expected Operator op1 = Limit::Make(offset, limit, {sort_expr}, {sort_dir}).RegisterWithTxnContext(txn_context); From 7845fd0d7ad61f36310461f84dd0a07ba793ab14 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 28 Dec 2020 22:29:16 -0500 Subject: [PATCH 08/20] Format + tests --- src/execution/sql/aggregation_hash_table.cpp | 4 +- src/execution/sql/join_hash_table.cpp | 4 +- src/execution/vm/vm.cpp | 26 +- src/include/common/container/concurrent_map.h | 2 +- src/include/execution/sql/memory_pool.h | 2 +- .../sql/vector_projection_iterator.h | 3 +- src/include/execution/util/chunked_vector.h | 2 +- src/include/execution/vm/module.h | 2 +- src/include/optimizer/index_util.h | 4 +- src/include/optimizer/physical_operators.h | 8 +- .../optimizer/rules/transformation_rules.h | 56 ++-- .../planner/plannodes/index_join_plan_node.h | 1 - .../planner/plannodes/order_by_plan_node.h | 3 +- src/include/settings/settings_param.h | 8 +- src/optimizer/index_util.cpp | 13 +- src/optimizer/physical_operators.cpp | 3 +- src/optimizer/rules/implementation_rules.cpp | 9 +- src/optimizer/rules/transformation_rules.cpp | 9 +- src/settings/settings_manager.cpp | 4 +- test/optimizer/tpcc_plan_index_scan.cpp | 292 ++++++++++++++++++ 20 files changed, 372 insertions(+), 83 deletions(-) diff --git a/src/execution/sql/aggregation_hash_table.cpp b/src/execution/sql/aggregation_hash_table.cpp index 5192496a57..3d34c8b2eb 100644 --- a/src/execution/sql/aggregation_hash_table.cpp +++ b/src/execution/sql/aggregation_hash_table.cpp @@ -319,8 +319,8 @@ void AggregationHashTable::LookupInitial() { // use SIMD gathers if hashes vector is full. For some reason it // isn't. Investigate why. UnaryOperationExecutor::Execute( - exec_settings_, *batch_state_->Hashes(), - batch_state_->Entries(), [&](const hash_t hash) noexcept { return hash_table_.FindChainHead(hash); }); + exec_settings_, *batch_state_->Hashes(), batch_state_->Entries(), + [&](const hash_t hash) noexcept { return hash_table_.FindChainHead(hash); }); // Find non-null entries whose keys must be checked and place them in the // key-not-equal list which is used during key equality checking. diff --git a/src/execution/sql/join_hash_table.cpp b/src/execution/sql/join_hash_table.cpp index 60df330154..ff602ad91e 100644 --- a/src/execution/sql/join_hash_table.cpp +++ b/src/execution/sql/join_hash_table.cpp @@ -519,8 +519,8 @@ void JoinHashTable::Build() { void JoinHashTable::LookupBatchInChainingHashTable(const Vector &hashes, Vector *results) const { UnaryOperationExecutor::Execute( - exec_settings_, hashes, - results, [&](const hash_t hash_val) noexcept { return chaining_hash_table_.FindChainHead(hash_val); }); + exec_settings_, hashes, results, + [&](const hash_t hash_val) noexcept { return chaining_hash_table_.FindChainHead(hash_val); }); } void JoinHashTable::LookupBatchInConciseHashTable(const Vector &hashes, Vector *results) const { diff --git a/src/execution/vm/vm.cpp b/src/execution/vm/vm.cpp index 169f8582c2..e82193f019 100644 --- a/src/execution/vm/vm.cpp +++ b/src/execution/vm/vm.cpp @@ -160,20 +160,20 @@ void VM::Interpret(const uint8_t *ip, Frame *frame) { // NOLINT // TODO(pmenon): Should these READ/PEEK macros take in a vm::OperandType so // that we can infer primitive types using traits? This minimizes number of // changes if the underlying offset/bytecode/register sizes changes? -#define PEEK_JMP_OFFSET() Peek(&ip) /* NOLINT */ -#define READ_IMM1() Read(&ip) /* NOLINT */ -#define READ_IMM2() Read(&ip) /* NOLINT */ -#define READ_IMM4() Read(&ip) /* NOLINT */ -#define READ_IMM8() Read(&ip) /* NOLINT */ -#define READ_IMM4F() Read(&ip) /* NOLINT */ -#define READ_IMM8F() Read(&ip) /* NOLINT */ -#define READ_UIMM2() Read(&ip) /* NOLINT */ -#define READ_UIMM4() Read(&ip) /* NOLINT */ -#define READ_JMP_OFFSET() READ_IMM4() /* NOLINT */ -#define READ_LOCAL_ID() Read(&ip) /* NOLINT */ -#define READ_STATIC_LOCAL_ID() Read(&ip) /* NOLINT */ +#define PEEK_JMP_OFFSET() Peek(&ip) /* NOLINT */ +#define READ_IMM1() Read(&ip) /* NOLINT */ +#define READ_IMM2() Read(&ip) /* NOLINT */ +#define READ_IMM4() Read(&ip) /* NOLINT */ +#define READ_IMM8() Read(&ip) /* NOLINT */ +#define READ_IMM4F() Read(&ip) /* NOLINT */ +#define READ_IMM8F() Read(&ip) /* NOLINT */ +#define READ_UIMM2() Read(&ip) /* NOLINT */ +#define READ_UIMM4() Read(&ip) /* NOLINT */ +#define READ_JMP_OFFSET() READ_IMM4() /* NOLINT */ +#define READ_LOCAL_ID() Read(&ip) /* NOLINT */ +#define READ_STATIC_LOCAL_ID() Read(&ip) /* NOLINT */ #define READ_OP() Read>(&ip) /* NOLINT */ -#define READ_FUNC_ID() READ_UIMM2() /* NOLINT */ +#define READ_FUNC_ID() READ_UIMM2() /* NOLINT */ #define OP(name) op_##name #define DISPATCH_NEXT() \ diff --git a/src/include/common/container/concurrent_map.h b/src/include/common/container/concurrent_map.h index 7d544da974..17b6bf2d5b 100644 --- a/src/include/common/container/concurrent_map.h +++ b/src/include/common/container/concurrent_map.h @@ -157,7 +157,7 @@ class ConcurrentMap { * True for Insertion, False for No Insertion. */ template - std::pair Emplace(Args &&... args) { + std::pair Emplace(Args &&...args) { auto result = map_.emplace(std::move(args)...); return {Iterator(result.first), result.second}; } diff --git a/src/include/execution/sql/memory_pool.h b/src/include/execution/sql/memory_pool.h index 688ef9f32f..cf8b7a7955 100644 --- a/src/include/execution/sql/memory_pool.h +++ b/src/include/execution/sql/memory_pool.h @@ -231,7 +231,7 @@ class EXPORT MemoryPool { * @return A construct object allocated from this pool. */ template - MemPoolPtr MakeObject(Args &&... args) { + MemPoolPtr MakeObject(Args &&...args) { auto *object_mem = Allocate(sizeof(T), true); auto *obj = new (object_mem) T(std::forward(args)...); return MemPoolPtr(obj); diff --git a/src/include/execution/sql/vector_projection_iterator.h b/src/include/execution/sql/vector_projection_iterator.h index 778a6136b6..db6d415546 100644 --- a/src/include/execution/sql/vector_projection_iterator.h +++ b/src/include/execution/sql/vector_projection_iterator.h @@ -78,8 +78,7 @@ class VectorProjectionIterator { SelectionVector ret{}; for (sel_t i = 0; i < common::Constants::K_DEFAULT_VECTOR_SIZE; i++) ret[i] = i; return ret; - } - (); + }(); public: /** diff --git a/src/include/execution/util/chunked_vector.h b/src/include/execution/util/chunked_vector.h index b125cc39cf..41098635ba 100644 --- a/src/include/execution/util/chunked_vector.h +++ b/src/include/execution/util/chunked_vector.h @@ -867,7 +867,7 @@ class ChunkedVectorT { * In-place construct an element using arguments @em args and append to the end of the vector. */ template - void emplace_back(Args &&... args) { // NOLINT + void emplace_back(Args &&...args) { // NOLINT T *space = reinterpret_cast(vec_.Append()); new (space) T(std::forward(args)...); } diff --git a/src/include/execution/vm/module.h b/src/include/execution/vm/module.h index 8273d3ca67..8654944cd2 100644 --- a/src/include/execution/vm/module.h +++ b/src/include/execution/vm/module.h @@ -182,7 +182,7 @@ namespace detail { inline void CopyAll(UNUSED_ATTRIBUTE uint8_t *buffer) {} template -inline void CopyAll(uint8_t *buffer, const HeadT &head, const RestT &... rest) { +inline void CopyAll(uint8_t *buffer, const HeadT &head, const RestT &...rest) { std::memcpy(buffer, reinterpret_cast(&head), sizeof(head)); CopyAll(buffer + sizeof(head), rest...); } diff --git a/src/include/optimizer/index_util.h b/src/include/optimizer/index_util.h index 07eff1baec..281a7ce95d 100644 --- a/src/include/optimizer/index_util.h +++ b/src/include/optimizer/index_util.h @@ -66,8 +66,8 @@ class IndexUtil { * to check both sorts and predicates at once. */ static bool SatisfiesSortWithIndex( - catalog::CatalogAccessor *accessor, const PropertySort *prop, - catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, + catalog::CatalogAccessor *accessor, const PropertySort *prop, catalog::table_oid_t tbl_oid, + catalog::index_oid_t idx_oid, std::unordered_map> *bounds = nullptr); /** diff --git a/src/include/optimizer/physical_operators.h b/src/include/optimizer/physical_operators.h index 4cc20e402f..495c4d0880 100644 --- a/src/include/optimizer/physical_operators.h +++ b/src/include/optimizer/physical_operators.h @@ -247,8 +247,8 @@ class IndexScan : public OperatorNodeContents { std::unordered_map> bounds_; /** - * Whether limit exists - */ + * Whether limit exists + */ bool limit_exists_; /** @@ -572,8 +572,8 @@ class InnerIndexJoin : public OperatorNodeContents { std::vector join_predicates_; /** -* Whether limit exists -*/ + * Whether limit exists + */ bool limit_exists_; /** diff --git a/src/include/optimizer/rules/transformation_rules.h b/src/include/optimizer/rules/transformation_rules.h index 138853f2ba..316d928fe8 100644 --- a/src/include/optimizer/rules/transformation_rules.h +++ b/src/include/optimizer/rules/transformation_rules.h @@ -78,26 +78,26 @@ class SetLimitInGet : public Rule { SetLimitInGet(); /** - * Gets the rule's promise to apply against a GroupExpression - * @param group_expr GroupExpression to compute promise from - * @returns The promise value of applying the rule for ordering - */ + * Gets the rule's promise to apply against a GroupExpression + * @param group_expr GroupExpression to compute promise from + * @returns The promise value of applying the rule for ordering + */ RulePromise Promise(GroupExpression *group_expr) const override; /** - * Checks whether the given rule can be applied - * @param plan AbstractOptimizerNode to check - * @param context Current OptimizationContext executing under - * @returns Whether the input AbstractOptimizerNode passes the check - */ + * Checks whether the given rule can be applied + * @param plan AbstractOptimizerNode to check + * @param context Current OptimizationContext executing under + * @returns Whether the input AbstractOptimizerNode passes the check + */ bool Check(common::ManagedPointer plan, OptimizationContext *context) const override; /** - * Transforms the input expression using the given rule - * @param input Input AbstractOptimizerNode to transform - * @param transformed Vector of transformed AbstractOptimizerNodes - * @param context Current OptimizationContext executing under - */ + * Transforms the input expression using the given rule + * @param input Input AbstractOptimizerNode to transform + * @param transformed Vector of transformed AbstractOptimizerNodes + * @param context Current OptimizationContext executing under + */ void Transform(common::ManagedPointer input, std::vector> *transformed, OptimizationContext *context) const override; @@ -115,26 +115,26 @@ class SetLimitInLogicalInnerJoin : public Rule { SetLimitInLogicalInnerJoin(); /** - * Gets the rule's promise to apply against a GroupExpression - * @param group_expr GroupExpression to compute promise from - * @returns The promise value of applying the rule for ordering - */ + * Gets the rule's promise to apply against a GroupExpression + * @param group_expr GroupExpression to compute promise from + * @returns The promise value of applying the rule for ordering + */ RulePromise Promise(GroupExpression *group_expr) const override; /** - * Checks whether the given rule can be applied - * @param plan AbstractOptimizerNode to check - * @param context Current OptimizationContext executing under - * @returns Whether the input AbstractOptimizerNode passes the check - */ + * Checks whether the given rule can be applied + * @param plan AbstractOptimizerNode to check + * @param context Current OptimizationContext executing under + * @returns Whether the input AbstractOptimizerNode passes the check + */ bool Check(common::ManagedPointer plan, OptimizationContext *context) const override; /** - * Transforms the input expression using the given rule - * @param input Input AbstractOptimizerNode to transform - * @param transformed Vector of transformed AbstractOptimizerNodes - * @param context Current OptimizationContext executing under - */ + * Transforms the input expression using the given rule + * @param input Input AbstractOptimizerNode to transform + * @param transformed Vector of transformed AbstractOptimizerNodes + * @param context Current OptimizationContext executing under + */ void Transform(common::ManagedPointer input, std::vector> *transformed, OptimizationContext *context) const override; diff --git a/src/include/planner/plannodes/index_join_plan_node.h b/src/include/planner/plannodes/index_join_plan_node.h index d343eb8d1b..28d3705388 100644 --- a/src/include/planner/plannodes/index_join_plan_node.h +++ b/src/include/planner/plannodes/index_join_plan_node.h @@ -119,7 +119,6 @@ class IndexJoinPlanNode : public AbstractJoinPlanNode { */ bool scan_has_limit_{false}; - IndexScanType scan_type_; std::unordered_map lo_index_cols_{}; std::unordered_map hi_index_cols_{}; diff --git a/src/include/planner/plannodes/order_by_plan_node.h b/src/include/planner/plannodes/order_by_plan_node.h index 89a5f8acee..9b8ef927d7 100644 --- a/src/include/planner/plannodes/order_by_plan_node.h +++ b/src/include/planner/plannodes/order_by_plan_node.h @@ -41,8 +41,7 @@ class OrderByPlanNode : public AbstractPlanNode { * @param ordering ordering (ASC or DESC) for key * @return builder object */ - Builder &AddSortKey(common::ManagedPointer key, - catalog::OrderByOrderingType ordering) { + Builder &AddSortKey(common::ManagedPointer key, catalog::OrderByOrderingType ordering) { sort_keys_.emplace_back(key, ordering); return *this; } diff --git a/src/include/settings/settings_param.h b/src/include/settings/settings_param.h index 139135d08f..b82d76653c 100644 --- a/src/include/settings/settings_param.h +++ b/src/include/settings/settings_param.h @@ -27,11 +27,11 @@ using callback_fn = void (*)(void *, void *, DBMain *, common::ManagedPointer> *bounds) { auto &index_schema = accessor->GetIndexSchema(idx_oid); if (!SatisfiesBaseColumnRequirement(index_schema)) { @@ -216,9 +216,9 @@ bool IndexUtil::CheckPredicates( if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::Dummy) { scan_type = planner::IndexScanType::AscendingOpenHighLimit; } else if (scan_type == planner::IndexScanType::AscendingClosed || - scan_type == planner::IndexScanType::AscendingClosedLimit || - scan_type == planner::IndexScanType::AscendingOpenHigh || - scan_type == planner::IndexScanType::AscendingOpenHighLimit) { + scan_type == planner::IndexScanType::AscendingClosedLimit || + scan_type == planner::IndexScanType::AscendingOpenHigh || + scan_type == planner::IndexScanType::AscendingOpenHighLimit) { scan_type = planner::IndexScanType::AscendingOpenHigh; } else { // OpenHigh scan is not compatible with an OpenLow scan @@ -228,8 +228,7 @@ bool IndexUtil::CheckPredicates( bounds->insert(std::make_pair( oid, std::vector{open_highs[oid], planner::IndexExpression(nullptr)})); } else if (open_lows.find(oid) != open_lows.end()) { - if (scan_type == planner::IndexScanType::Exact || - scan_type == planner::IndexScanType::Dummy || + if (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::Dummy || scan_type == planner::IndexScanType::AscendingClosed || scan_type == planner::IndexScanType::AscendingClosedLimit || scan_type == planner::IndexScanType::AscendingOpenLow) { diff --git a/src/optimizer/physical_operators.cpp b/src/optimizer/physical_operators.cpp index 0dfa3f08aa..2027385dbe 100644 --- a/src/optimizer/physical_operators.cpp +++ b/src/optimizer/physical_operators.cpp @@ -282,8 +282,7 @@ BaseOperatorNodeContents *InnerIndexJoin::Copy() const { return new InnerIndexJo Operator InnerIndexJoin::Make( catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, planner::IndexScanType scan_type, std::unordered_map> join_keys, - std::vector join_predicates, - bool limit_exists, uint32_t limit) { + std::vector join_predicates, bool limit_exists, uint32_t limit) { auto *join = new InnerIndexJoin(); join->tbl_oid_ = tbl_oid; join->idx_oid_ = idx_oid; diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index 40df849dc8..0dfb410073 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -144,7 +144,8 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetTableOid(), index)) { + if (sort_possible && + IndexUtil::SatisfiesSortWithIndex(accessor, sort_prop, get->GetTableOid(), index, &bounds)) { // Index also satisfies sort properties so can potentially push down limit and sort auto op = std::make_unique( IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, scan_type, @@ -526,9 +527,9 @@ void LogicalInnerJoinToPhysicalInnerIndexJoin::Transform( auto child_get = LogicalGet::Make(r_child->GetDatabaseOid(), r_child->GetTableOid(), join_preds, r_child->GetTableAlias(), r_child->GetIsForUpdate()); if (limit_exists) child_get.GetContentsAs()->SetLimit(limit); - auto new_child = std::make_unique( - child_get.RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), - std::move(empty), context->GetOptimizerContext()->GetTxn()); + auto new_child = + std::make_unique(child_get.RegisterWithTxnContext(context->GetOptimizerContext()->GetTxn()), + std::move(empty), context->GetOptimizerContext()->GetTxn()); std::vector> transform; LogicalGetToPhysicalIndexScan idx_scan_transform; diff --git a/src/optimizer/rules/transformation_rules.cpp b/src/optimizer/rules/transformation_rules.cpp index b5dcb5ceb1..a674b09471 100644 --- a/src/optimizer/rules/transformation_rules.cpp +++ b/src/optimizer/rules/transformation_rules.cpp @@ -209,12 +209,13 @@ bool SetLimitInLogicalInnerJoin::Check(common::ManagedPointer input, - std::vector> *transformed, - UNUSED_ATTRIBUTE OptimizationContext *context) const { + std::vector> *transformed, + UNUSED_ATTRIBUTE OptimizationContext *context) const { auto join = input->GetChildren()[0]->Contents()->GetContentsAs(); size_t limit = input->Contents()->GetContentsAs()->GetLimit(); join->SetLimit(limit); diff --git a/src/settings/settings_manager.cpp b/src/settings/settings_manager.cpp index 0802e24a7a..c9611ace88 100644 --- a/src/settings/settings_manager.cpp +++ b/src/settings/settings_manager.cpp @@ -54,9 +54,9 @@ void SettingsManager::ValidateParams() { // execution::sql::Integer(1024)), parser::ConstantValueExpression(type::TypeID::INTEGER, // execution::sql::Integer(65535))); -#define __SETTING_VALIDATE__ // NOLINT +#define __SETTING_VALIDATE__ // NOLINT #include "settings/settings_defs.h" // NOLINT -#undef __SETTING_VALIDATE__ // NOLINT +#undef __SETTING_VALIDATE__ // NOLINT } void SettingsManager::ValidateSetting(Param param, const parser::ConstantValueExpression &min_value, diff --git a/test/optimizer/tpcc_plan_index_scan.cpp b/test/optimizer/tpcc_plan_index_scan.cpp index cef139618c..7f689efcc7 100644 --- a/test/optimizer/tpcc_plan_index_scan.cpp +++ b/test/optimizer/tpcc_plan_index_scan.cpp @@ -36,6 +36,7 @@ TEST_F(TpccPlanIndexScanTests, SimplePredicateIndexScan) { {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); EXPECT_EQ(index_plan->IsForUpdate(), false); EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + EXPECT_EQ(index_plan->GetScanHasLimit(), false); auto scan_pred = index_plan->GetScanPredicate(); EXPECT_TRUE(scan_pred != nullptr); @@ -54,6 +55,51 @@ TEST_F(TpccPlanIndexScanTests, SimplePredicateIndexScan) { OptimizeQuery(query, tbl_new_order_, check); } +// NOLINTNEXTLINE +TEST_F(TpccPlanIndexScanTests, SimplePredicateIndexScanWithLimit) { + auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, + std::unique_ptr plan) { + // Get Schema + auto &schema = test->accessor_->GetSchema(test->tbl_new_order_); + + // Limit + EXPECT_EQ(plan->GetChildrenSize(), 1); + EXPECT_EQ(plan->GetPlanNodeType(), planner::PlanNodeType::LIMIT); + auto limit_plan = reinterpret_cast(plan.get()); + EXPECT_EQ(limit_plan->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + + // Should use New Order Primary Key (NO_W_ID, NO_D_ID, NO_O_ID) + auto plani = plan->GetChild(0); + EXPECT_EQ(plani->GetPlanNodeType(), planner::PlanNodeType::INDEXSCAN); + EXPECT_EQ(plani->GetChildrenSize(), 0); + auto index_plan = reinterpret_cast(plani); + EXPECT_EQ(index_plan->GetIndexOid(), test->pk_new_order_); + EXPECT_EQ(index_plan->GetScanHasLimit(), true); + EXPECT_EQ(index_plan->GetScanLimit(), 15); + test->CheckOids(index_plan->GetColumnOids(), + {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); + + EXPECT_EQ(index_plan->IsForUpdate(), false); + EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + + // Check Index Scan Predicate + auto scan_pred = index_plan->GetScanPredicate(); + EXPECT_TRUE(scan_pred != nullptr); + EXPECT_EQ(scan_pred->GetExpressionType(), parser::ExpressionType::COMPARE_EQUAL); + EXPECT_EQ(scan_pred->GetChildrenSize(), 2); + EXPECT_EQ(scan_pred->GetChild(0)->GetExpressionType(), parser::ExpressionType::COLUMN_VALUE); + EXPECT_EQ(scan_pred->GetChild(1)->GetExpressionType(), parser::ExpressionType::VALUE_CONSTANT); + auto tve = scan_pred->GetChild(0).CastManagedPointerTo(); + auto cve = scan_pred->GetChild(1).CastManagedPointerTo(); + EXPECT_EQ(tve->GetColumnName(), "no_w_id"); + EXPECT_EQ(tve->GetColumnOid(), schema.GetColumn("no_w_id").Oid()); + EXPECT_EQ(cve->Peek(), 1); + }; + + std::string query = "SELECT NO_O_ID FROM \"NEW ORDER\" WHERE NO_W_ID = 1 LIMIT 15"; + OptimizeQuery(query, tbl_new_order_, check); +} + // NOLINTNEXTLINE TEST_F(TpccPlanIndexScanTests, SimplePredicateFlippedIndexScan) { auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, @@ -70,6 +116,7 @@ TEST_F(TpccPlanIndexScanTests, SimplePredicateFlippedIndexScan) { {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); EXPECT_EQ(index_plan->IsForUpdate(), false); EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + EXPECT_EQ(index_plan->GetScanHasLimit(), false); auto scan_pred = index_plan->GetScanPredicate(); EXPECT_TRUE(scan_pred != nullptr); @@ -104,6 +151,7 @@ TEST_F(TpccPlanIndexScanTests, IndexFulfillSort) { EXPECT_EQ(index_plan->IsForUpdate(), false); EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); EXPECT_EQ(index_plan->GetScanPredicate().Get(), nullptr); + EXPECT_EQ(index_plan->GetScanHasLimit(), false); }; std::string query = "SELECT NO_O_ID FROM \"NEW ORDER\" ORDER BY NO_W_ID"; @@ -141,6 +189,7 @@ TEST_F(TpccPlanIndexScanTests, IndexFulfillSortAndPredicate) { {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); EXPECT_EQ(index_plan->IsForUpdate(), false); EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + EXPECT_EQ(index_plan->GetScanHasLimit(), false); // Check Index Scan Predicate auto scan_pred = index_plan->GetScanPredicate(); @@ -160,6 +209,247 @@ TEST_F(TpccPlanIndexScanTests, IndexFulfillSortAndPredicate) { OptimizeQuery(query, tbl_new_order_, check); } +// NOLINTNEXTLINE +TEST_F(TpccPlanIndexScanTests, IndexFulfillSortWithLimit) { + auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, + std::unique_ptr plan) { + // Get Schema + auto &schema = test->accessor_->GetSchema(test->tbl_new_order_); + EXPECT_EQ(plan->GetChildrenSize(), 1); + EXPECT_EQ(plan->GetPlanNodeType(), planner::PlanNodeType::PROJECTION); + + // Limit + auto planl = plan->GetChild(0); + EXPECT_EQ(planl->GetChildrenSize(), 1); + EXPECT_EQ(planl->GetPlanNodeType(), planner::PlanNodeType::LIMIT); + auto limit_plan = reinterpret_cast(planl); + EXPECT_EQ(limit_plan->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + + // Order By + auto planc = planl->GetChild(0); + EXPECT_EQ(planc->GetChildrenSize(), 1); + EXPECT_EQ(planc->GetPlanNodeType(), planner::PlanNodeType::ORDERBY); + auto orderby = reinterpret_cast(planc); + EXPECT_EQ(orderby->HasLimit(), true); + EXPECT_EQ(orderby->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + EXPECT_EQ(orderby->GetSortKeys().size(), 1); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::ASC); + auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); + EXPECT_TRUE(sortkey != nullptr); + EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); + EXPECT_EQ(sortkey->GetTupleIdx(), 0); + EXPECT_EQ(sortkey->GetValueIdx(), 0); + + // Should use New Order Primary Key (NO_W_ID, NO_D_ID, NO_O_ID) + auto plani = planc->GetChild(0); + EXPECT_EQ(plani->GetPlanNodeType(), planner::PlanNodeType::INDEXSCAN); + EXPECT_EQ(plani->GetChildrenSize(), 0); + auto index_plan = reinterpret_cast(plani); + EXPECT_EQ(index_plan->GetIndexOid(), test->pk_new_order_); + EXPECT_EQ(index_plan->GetScanHasLimit(), true); + EXPECT_EQ(index_plan->GetScanLimit(), 15); + test->CheckOids(index_plan->GetColumnOids(), + {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); + + EXPECT_EQ(index_plan->IsForUpdate(), false); + EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + }; + + std::string query = "SELECT NO_O_ID FROM \"NEW ORDER\" ORDER BY NO_W_ID LIMIT 15"; + OptimizeQuery(query, tbl_new_order_, check); +} + +// NOLINTNEXTLINE +TEST_F(TpccPlanIndexScanTests, IndexCannotFulfillSortWithPredicateWithLimit) { + auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, + std::unique_ptr plan) { + // Get Schema + auto &schema = test->accessor_->GetSchema(test->tbl_new_order_); + + // Limit + EXPECT_EQ(plan->GetChildrenSize(), 1); + EXPECT_EQ(plan->GetPlanNodeType(), planner::PlanNodeType::LIMIT); + auto limit_plan = reinterpret_cast(plan.get()); + EXPECT_EQ(limit_plan->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + + // Order By + auto planc = plan->GetChild(0); + EXPECT_EQ(planc->GetChildrenSize(), 1); + EXPECT_EQ(planc->GetPlanNodeType(), planner::PlanNodeType::ORDERBY); + auto orderby = reinterpret_cast(planc); + EXPECT_EQ(orderby->HasLimit(), true); + EXPECT_EQ(orderby->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + EXPECT_EQ(orderby->GetSortKeys().size(), 1); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::ASC); + auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); + EXPECT_TRUE(sortkey != nullptr); + EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); + EXPECT_EQ(sortkey->GetTupleIdx(), 0); + EXPECT_EQ(sortkey->GetValueIdx(), 0); + + // Should use New Order Primary Key (NO_W_ID, NO_D_ID, NO_O_ID) + auto plani = planc->GetChild(0); + EXPECT_EQ(plani->GetPlanNodeType(), planner::PlanNodeType::INDEXSCAN); + EXPECT_EQ(plani->GetChildrenSize(), 0); + auto index_plan = reinterpret_cast(plani); + EXPECT_EQ(index_plan->GetIndexOid(), test->pk_new_order_); + EXPECT_EQ(index_plan->GetScanHasLimit(), false); + test->CheckOids(index_plan->GetColumnOids(), + {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); + + EXPECT_EQ(index_plan->IsForUpdate(), false); + EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + + // Check Index Scan Predicate + auto scan_pred = index_plan->GetScanPredicate(); + EXPECT_TRUE(scan_pred != nullptr); + EXPECT_EQ(scan_pred->GetExpressionType(), parser::ExpressionType::COMPARE_EQUAL); + EXPECT_EQ(scan_pred->GetChildrenSize(), 2); + EXPECT_EQ(scan_pred->GetChild(0)->GetExpressionType(), parser::ExpressionType::COLUMN_VALUE); + EXPECT_EQ(scan_pred->GetChild(1)->GetExpressionType(), parser::ExpressionType::VALUE_CONSTANT); + auto tve = scan_pred->GetChild(0).CastManagedPointerTo(); + auto cve = scan_pred->GetChild(1).CastManagedPointerTo(); + EXPECT_EQ(tve->GetColumnName(), "no_w_id"); + EXPECT_EQ(tve->GetColumnOid(), schema.GetColumn("no_w_id").Oid()); + EXPECT_EQ(cve->Peek(), 1); + }; + + std::string query = "SELECT NO_O_ID FROM \"NEW ORDER\" WHERE NO_W_ID = 1 ORDER BY NO_O_ID LIMIT 15"; + OptimizeQuery(query, tbl_new_order_, check); +} + +// NOLINTNEXTLINE +TEST_F(TpccPlanIndexScanTests, IndexFulfillSortAndRequiredPredicateWithLimit) { + auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, + std::unique_ptr plan) { + // Get Schema + auto &schema = test->accessor_->GetSchema(test->tbl_new_order_); + EXPECT_EQ(plan->GetChildrenSize(), 1); + EXPECT_EQ(plan->GetPlanNodeType(), planner::PlanNodeType::PROJECTION); + + // Limit + auto planl = plan->GetChild(0); + EXPECT_EQ(planl->GetChildrenSize(), 1); + EXPECT_EQ(planl->GetPlanNodeType(), planner::PlanNodeType::LIMIT); + auto limit_plan = reinterpret_cast(planl); + EXPECT_EQ(limit_plan->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + + // Order By + auto planc = planl->GetChild(0); + EXPECT_EQ(planc->GetChildrenSize(), 1); + EXPECT_EQ(planc->GetPlanNodeType(), planner::PlanNodeType::ORDERBY); + auto orderby = reinterpret_cast(planc); + EXPECT_EQ(orderby->HasLimit(), true); + EXPECT_EQ(orderby->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + EXPECT_EQ(orderby->GetSortKeys().size(), 1); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::ASC); + auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); + EXPECT_TRUE(sortkey != nullptr); + EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); + EXPECT_EQ(sortkey->GetTupleIdx(), 0); + EXPECT_EQ(sortkey->GetValueIdx(), 0); + + // Should use New Order Primary Key (NO_W_ID, NO_D_ID, NO_O_ID) + auto plani = planc->GetChild(0); + EXPECT_EQ(plani->GetPlanNodeType(), planner::PlanNodeType::INDEXSCAN); + EXPECT_EQ(plani->GetChildrenSize(), 0); + auto index_plan = reinterpret_cast(plani); + EXPECT_EQ(index_plan->GetIndexOid(), test->pk_new_order_); + EXPECT_EQ(index_plan->GetScanHasLimit(), true); + EXPECT_EQ(index_plan->GetScanLimit(), 15); + test->CheckOids(index_plan->GetColumnOids(), {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid(), + schema.GetColumn("no_d_id").Oid()}); + + EXPECT_EQ(index_plan->IsForUpdate(), false); + EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + + // Check Index Scan Predicate + auto scan_pred = index_plan->GetScanPredicate(); + EXPECT_TRUE(scan_pred != nullptr); + EXPECT_EQ(scan_pred->GetExpressionType(), parser::ExpressionType::COMPARE_EQUAL); + EXPECT_EQ(scan_pred->GetChildrenSize(), 2); + EXPECT_EQ(scan_pred->GetChild(0)->GetExpressionType(), parser::ExpressionType::COLUMN_VALUE); + EXPECT_EQ(scan_pred->GetChild(1)->GetExpressionType(), parser::ExpressionType::VALUE_CONSTANT); + auto tve = scan_pred->GetChild(0).CastManagedPointerTo(); + auto cve = scan_pred->GetChild(1).CastManagedPointerTo(); + EXPECT_EQ(tve->GetColumnName(), "no_w_id"); + EXPECT_EQ(tve->GetColumnOid(), schema.GetColumn("no_w_id").Oid()); + EXPECT_EQ(cve->Peek(), 1); + }; + + std::string query = "SELECT NO_O_ID FROM \"NEW ORDER\" WHERE NO_W_ID = 1 ORDER BY NO_D_ID LIMIT 15"; + OptimizeQuery(query, tbl_new_order_, check); +} + +// NOLINTNEXTLINE +TEST_F(TpccPlanIndexScanTests, IndexFulfillMultipleSortAndRequiredPredicateWithLimit) { + auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, + std::unique_ptr plan) { + // Get Schema + auto &schema = test->accessor_->GetSchema(test->tbl_new_order_); + EXPECT_EQ(plan->GetChildrenSize(), 1); + EXPECT_EQ(plan->GetPlanNodeType(), planner::PlanNodeType::PROJECTION); + + // Limit + auto planl = plan->GetChild(0); + EXPECT_EQ(planl->GetChildrenSize(), 1); + EXPECT_EQ(planl->GetPlanNodeType(), planner::PlanNodeType::LIMIT); + auto limit_plan = reinterpret_cast(planl); + EXPECT_EQ(limit_plan->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + + // Order By + auto planc = planl->GetChild(0); + EXPECT_EQ(planc->GetChildrenSize(), 1); + EXPECT_EQ(planc->GetPlanNodeType(), planner::PlanNodeType::ORDERBY); + auto orderby = reinterpret_cast(planc); + EXPECT_EQ(orderby->HasLimit(), true); + EXPECT_EQ(orderby->GetLimit(), sel_stmt->GetSelectLimit()->GetLimit()); + EXPECT_EQ(orderby->GetSortKeys().size(), 2); + EXPECT_EQ(orderby->GetSortKeys()[0].second, catalog::OrderByOrderingType::ASC); + EXPECT_EQ(orderby->GetSortKeys()[1].second, catalog::OrderByOrderingType::ASC); + auto sortkey = orderby->GetSortKeys()[0].first.CastManagedPointerTo(); + EXPECT_TRUE(sortkey != nullptr); + EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); + EXPECT_EQ(sortkey->GetTupleIdx(), 0); + EXPECT_EQ(sortkey->GetValueIdx(), 0); + sortkey = orderby->GetSortKeys()[1].first.CastManagedPointerTo(); + EXPECT_TRUE(sortkey != nullptr); + EXPECT_EQ(sortkey->GetExpressionType(), parser::ExpressionType::VALUE_TUPLE); + EXPECT_EQ(sortkey->GetTupleIdx(), 0); + EXPECT_EQ(sortkey->GetValueIdx(), 1); + + // Should use New Order Primary Key (NO_W_ID, NO_D_ID, NO_O_ID) + auto plani = planc->GetChild(0); + EXPECT_EQ(plani->GetPlanNodeType(), planner::PlanNodeType::INDEXSCAN); + EXPECT_EQ(plani->GetChildrenSize(), 0); + auto index_plan = reinterpret_cast(plani); + EXPECT_EQ(index_plan->GetIndexOid(), test->pk_new_order_); + EXPECT_EQ(index_plan->GetScanHasLimit(), true); + EXPECT_EQ(index_plan->GetScanLimit(), 15); + test->CheckOids(index_plan->GetColumnOids(), {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid(), + schema.GetColumn("no_d_id").Oid()}); + + EXPECT_EQ(index_plan->IsForUpdate(), false); + EXPECT_EQ(index_plan->GetDatabaseOid(), test->db_); + + // Check Index Scan Predicate + auto scan_pred = index_plan->GetScanPredicate(); + EXPECT_TRUE(scan_pred != nullptr); + EXPECT_EQ(scan_pred->GetExpressionType(), parser::ExpressionType::COMPARE_EQUAL); + EXPECT_EQ(scan_pred->GetChildrenSize(), 2); + EXPECT_EQ(scan_pred->GetChild(0)->GetExpressionType(), parser::ExpressionType::COLUMN_VALUE); + EXPECT_EQ(scan_pred->GetChild(1)->GetExpressionType(), parser::ExpressionType::VALUE_CONSTANT); + auto tve = scan_pred->GetChild(0).CastManagedPointerTo(); + auto cve = scan_pred->GetChild(1).CastManagedPointerTo(); + EXPECT_EQ(tve->GetColumnName(), "no_w_id"); + EXPECT_EQ(tve->GetColumnOid(), schema.GetColumn("no_w_id").Oid()); + EXPECT_EQ(cve->Peek(), 1); + }; + + std::string query = "SELECT NO_O_ID FROM \"NEW ORDER\" WHERE NO_W_ID = 1 ORDER BY NO_D_ID, NO_O_ID LIMIT 15"; + OptimizeQuery(query, tbl_new_order_, check); +} + // NOLINTNEXTLINE TEST_F(TpccPlanIndexScanTests, IndexFulfillSortAndPredicateWithLimitOffset) { auto check = [](TpccPlanTest *test, parser::SelectStatement *sel_stmt, catalog::table_oid_t tbl_oid, @@ -199,6 +489,8 @@ TEST_F(TpccPlanIndexScanTests, IndexFulfillSortAndPredicateWithLimitOffset) { EXPECT_EQ(plani->GetChildrenSize(), 0); auto index_plan = reinterpret_cast(plani); EXPECT_EQ(index_plan->GetIndexOid(), test->pk_new_order_); + EXPECT_EQ(index_plan->GetScanHasLimit(), true); + EXPECT_EQ(index_plan->GetScanLimit(), 15); test->CheckOids(index_plan->GetColumnOids(), {schema.GetColumn("no_o_id").Oid(), schema.GetColumn("no_w_id").Oid()}); From 8764e574a759e654311ab93654fa6550f89476d3 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 28 Dec 2020 23:02:42 -0500 Subject: [PATCH 09/20] Format --- src/execution/compiler/codegen.cpp | 4 --- src/execution/sql/aggregation_hash_table.cpp | 4 +-- src/execution/sql/join_hash_table.cpp | 4 +-- src/execution/vm/vm.cpp | 26 +++++++++---------- src/include/common/container/concurrent_map.h | 2 +- src/include/execution/sql/memory_pool.h | 2 +- .../sql/vector_projection_iterator.h | 3 ++- src/include/execution/util/chunked_vector.h | 2 +- src/include/execution/vm/module.h | 2 +- src/include/settings/settings_param.h | 8 +++--- src/settings/settings_manager.cpp | 4 +-- 11 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/execution/compiler/codegen.cpp b/src/execution/compiler/codegen.cpp index 0cabe1ff27..76846927eb 100644 --- a/src/execution/compiler/codegen.cpp +++ b/src/execution/compiler/codegen.cpp @@ -428,7 +428,6 @@ ast::Expr *CodeGen::IndexIteratorScan(ast::Identifier iter, planner::IndexScanTy ast::Builtin builtin; bool asc_scan = false; bool use_limit = false; - bool limit_unusable = true; storage::index::ScanType asc_type; switch (scan_type) { case planner::IndexScanType::Exact: @@ -442,9 +441,6 @@ ast::Expr *CodeGen::IndexIteratorScan(ast::Identifier iter, planner::IndexScanTy case planner::IndexScanType::AscendingOpenLow: case planner::IndexScanType::AscendingOpenBoth: use_limit = true; - if (scan_type == planner::IndexScanType::AscendingOpenHighLimit || - scan_type == planner::IndexScanType::AscendingClosedLimit) - limit_unusable = false; asc_scan = true; builtin = ast::Builtin::IndexIteratorScanAscending; if (scan_type == planner::IndexScanType::AscendingClosed || diff --git a/src/execution/sql/aggregation_hash_table.cpp b/src/execution/sql/aggregation_hash_table.cpp index 3d34c8b2eb..5192496a57 100644 --- a/src/execution/sql/aggregation_hash_table.cpp +++ b/src/execution/sql/aggregation_hash_table.cpp @@ -319,8 +319,8 @@ void AggregationHashTable::LookupInitial() { // use SIMD gathers if hashes vector is full. For some reason it // isn't. Investigate why. UnaryOperationExecutor::Execute( - exec_settings_, *batch_state_->Hashes(), batch_state_->Entries(), - [&](const hash_t hash) noexcept { return hash_table_.FindChainHead(hash); }); + exec_settings_, *batch_state_->Hashes(), + batch_state_->Entries(), [&](const hash_t hash) noexcept { return hash_table_.FindChainHead(hash); }); // Find non-null entries whose keys must be checked and place them in the // key-not-equal list which is used during key equality checking. diff --git a/src/execution/sql/join_hash_table.cpp b/src/execution/sql/join_hash_table.cpp index ff602ad91e..60df330154 100644 --- a/src/execution/sql/join_hash_table.cpp +++ b/src/execution/sql/join_hash_table.cpp @@ -519,8 +519,8 @@ void JoinHashTable::Build() { void JoinHashTable::LookupBatchInChainingHashTable(const Vector &hashes, Vector *results) const { UnaryOperationExecutor::Execute( - exec_settings_, hashes, results, - [&](const hash_t hash_val) noexcept { return chaining_hash_table_.FindChainHead(hash_val); }); + exec_settings_, hashes, + results, [&](const hash_t hash_val) noexcept { return chaining_hash_table_.FindChainHead(hash_val); }); } void JoinHashTable::LookupBatchInConciseHashTable(const Vector &hashes, Vector *results) const { diff --git a/src/execution/vm/vm.cpp b/src/execution/vm/vm.cpp index e82193f019..169f8582c2 100644 --- a/src/execution/vm/vm.cpp +++ b/src/execution/vm/vm.cpp @@ -160,20 +160,20 @@ void VM::Interpret(const uint8_t *ip, Frame *frame) { // NOLINT // TODO(pmenon): Should these READ/PEEK macros take in a vm::OperandType so // that we can infer primitive types using traits? This minimizes number of // changes if the underlying offset/bytecode/register sizes changes? -#define PEEK_JMP_OFFSET() Peek(&ip) /* NOLINT */ -#define READ_IMM1() Read(&ip) /* NOLINT */ -#define READ_IMM2() Read(&ip) /* NOLINT */ -#define READ_IMM4() Read(&ip) /* NOLINT */ -#define READ_IMM8() Read(&ip) /* NOLINT */ -#define READ_IMM4F() Read(&ip) /* NOLINT */ -#define READ_IMM8F() Read(&ip) /* NOLINT */ -#define READ_UIMM2() Read(&ip) /* NOLINT */ -#define READ_UIMM4() Read(&ip) /* NOLINT */ -#define READ_JMP_OFFSET() READ_IMM4() /* NOLINT */ -#define READ_LOCAL_ID() Read(&ip) /* NOLINT */ -#define READ_STATIC_LOCAL_ID() Read(&ip) /* NOLINT */ +#define PEEK_JMP_OFFSET() Peek(&ip) /* NOLINT */ +#define READ_IMM1() Read(&ip) /* NOLINT */ +#define READ_IMM2() Read(&ip) /* NOLINT */ +#define READ_IMM4() Read(&ip) /* NOLINT */ +#define READ_IMM8() Read(&ip) /* NOLINT */ +#define READ_IMM4F() Read(&ip) /* NOLINT */ +#define READ_IMM8F() Read(&ip) /* NOLINT */ +#define READ_UIMM2() Read(&ip) /* NOLINT */ +#define READ_UIMM4() Read(&ip) /* NOLINT */ +#define READ_JMP_OFFSET() READ_IMM4() /* NOLINT */ +#define READ_LOCAL_ID() Read(&ip) /* NOLINT */ +#define READ_STATIC_LOCAL_ID() Read(&ip) /* NOLINT */ #define READ_OP() Read>(&ip) /* NOLINT */ -#define READ_FUNC_ID() READ_UIMM2() /* NOLINT */ +#define READ_FUNC_ID() READ_UIMM2() /* NOLINT */ #define OP(name) op_##name #define DISPATCH_NEXT() \ diff --git a/src/include/common/container/concurrent_map.h b/src/include/common/container/concurrent_map.h index 17b6bf2d5b..7d544da974 100644 --- a/src/include/common/container/concurrent_map.h +++ b/src/include/common/container/concurrent_map.h @@ -157,7 +157,7 @@ class ConcurrentMap { * True for Insertion, False for No Insertion. */ template - std::pair Emplace(Args &&...args) { + std::pair Emplace(Args &&... args) { auto result = map_.emplace(std::move(args)...); return {Iterator(result.first), result.second}; } diff --git a/src/include/execution/sql/memory_pool.h b/src/include/execution/sql/memory_pool.h index cf8b7a7955..688ef9f32f 100644 --- a/src/include/execution/sql/memory_pool.h +++ b/src/include/execution/sql/memory_pool.h @@ -231,7 +231,7 @@ class EXPORT MemoryPool { * @return A construct object allocated from this pool. */ template - MemPoolPtr MakeObject(Args &&...args) { + MemPoolPtr MakeObject(Args &&... args) { auto *object_mem = Allocate(sizeof(T), true); auto *obj = new (object_mem) T(std::forward(args)...); return MemPoolPtr(obj); diff --git a/src/include/execution/sql/vector_projection_iterator.h b/src/include/execution/sql/vector_projection_iterator.h index db6d415546..778a6136b6 100644 --- a/src/include/execution/sql/vector_projection_iterator.h +++ b/src/include/execution/sql/vector_projection_iterator.h @@ -78,7 +78,8 @@ class VectorProjectionIterator { SelectionVector ret{}; for (sel_t i = 0; i < common::Constants::K_DEFAULT_VECTOR_SIZE; i++) ret[i] = i; return ret; - }(); + } + (); public: /** diff --git a/src/include/execution/util/chunked_vector.h b/src/include/execution/util/chunked_vector.h index 41098635ba..b125cc39cf 100644 --- a/src/include/execution/util/chunked_vector.h +++ b/src/include/execution/util/chunked_vector.h @@ -867,7 +867,7 @@ class ChunkedVectorT { * In-place construct an element using arguments @em args and append to the end of the vector. */ template - void emplace_back(Args &&...args) { // NOLINT + void emplace_back(Args &&... args) { // NOLINT T *space = reinterpret_cast(vec_.Append()); new (space) T(std::forward(args)...); } diff --git a/src/include/execution/vm/module.h b/src/include/execution/vm/module.h index 8654944cd2..8273d3ca67 100644 --- a/src/include/execution/vm/module.h +++ b/src/include/execution/vm/module.h @@ -182,7 +182,7 @@ namespace detail { inline void CopyAll(UNUSED_ATTRIBUTE uint8_t *buffer) {} template -inline void CopyAll(uint8_t *buffer, const HeadT &head, const RestT &...rest) { +inline void CopyAll(uint8_t *buffer, const HeadT &head, const RestT &... rest) { std::memcpy(buffer, reinterpret_cast(&head), sizeof(head)); CopyAll(buffer + sizeof(head), rest...); } diff --git a/src/include/settings/settings_param.h b/src/include/settings/settings_param.h index b82d76653c..139135d08f 100644 --- a/src/include/settings/settings_param.h +++ b/src/include/settings/settings_param.h @@ -27,11 +27,11 @@ using callback_fn = void (*)(void *, void *, DBMain *, common::ManagedPointer Date: Tue, 29 Dec 2020 05:32:26 -0500 Subject: [PATCH 10/20] Optional properties necessary for sort pushdowns --- src/include/optimizer/optimization_context.h | 27 ++++++++++++++---- src/include/optimizer/properties.h | 4 +-- src/include/optimizer/property.h | 24 ++++++++++++++++ src/optimizer/child_property_deriver.cpp | 7 +++++ src/optimizer/index_util.cpp | 2 +- src/optimizer/optimizer_task.cpp | 30 +++++++++++++++++--- src/optimizer/rules/implementation_rules.cpp | 16 +++++++---- 7 files changed, 93 insertions(+), 17 deletions(-) diff --git a/src/include/optimizer/optimization_context.h b/src/include/optimizer/optimization_context.h index 72b4827d5a..b76ffc0b71 100644 --- a/src/include/optimizer/optimization_context.h +++ b/src/include/optimizer/optimization_context.h @@ -22,14 +22,21 @@ class OptimizationContext { * @param required_prop Properties required to satisfy. acquires ownership * @param cost_upper_bound Upper cost bound */ - OptimizationContext(OptimizerContext *context, PropertySet *required_prop, + OptimizationContext(OptimizerContext *context, PropertySet *required_props, + PropertySet *optional_props = new PropertySet (), double cost_upper_bound = std::numeric_limits::max()) - : context_(context), required_prop_(required_prop), cost_upper_bound_(cost_upper_bound) {} + : context_(context), + required_props_(required_props), + optional_props_(optional_props), + cost_upper_bound_(cost_upper_bound) {} /** * Destructor */ - ~OptimizationContext() { delete required_prop_; } + ~OptimizationContext() { + delete required_props_; + delete optional_props_; + } /** * @returns OptimizerContext @@ -39,7 +46,12 @@ class OptimizationContext { /** * @returns Properties to satisfy, owned by this OptimizationContext */ - PropertySet *GetRequiredProperties() const { return required_prop_; } + PropertySet *GetRequiredProperties() const { return required_props_; } + + /** + * @returns Properties to attempt, owned by this OptimizationContext + */ + PropertySet *GetOptionalProperties() const { return optional_props_; } /** * @returns Current context's upper bound cost @@ -61,7 +73,12 @@ class OptimizationContext { /** * Required properties */ - PropertySet *required_prop_; + PropertySet *required_props_; + + /** + * Required properties + */ + PropertySet *optional_props_; /** * Cost Upper Bound (for pruning) diff --git a/src/include/optimizer/properties.h b/src/include/optimizer/properties.h index 265053f993..2f52f55b72 100644 --- a/src/include/optimizer/properties.h +++ b/src/include/optimizer/properties.h @@ -21,8 +21,8 @@ class PropertySort : public Property { * @param sort_ascending Whether each sort_column is ascending or descending */ PropertySort(std::vector> sort_columns, - std::vector sort_ascending) - : sort_columns_(std::move(sort_columns)), sort_ascending_(std::move(sort_ascending)) {} + std::vector sort_ascending, bool required = true) + : Property(required), sort_columns_(std::move(sort_columns)), sort_ascending_(std::move(sort_ascending)) {} /** * Returns the type of PropertySort diff --git a/src/include/optimizer/property.h b/src/include/optimizer/property.h index 8d65b4ba84..b3d8e630f7 100644 --- a/src/include/optimizer/property.h +++ b/src/include/optimizer/property.h @@ -28,6 +28,12 @@ enum class PropertyType { SORT }; */ class Property { public: + /** + * Constructor for property + * @param required whether the property is required + */ + explicit Property(bool required) : required_(required) {}; + /** * Trivial destructor for Property */ @@ -75,6 +81,24 @@ class Property { } return nullptr; } + + /** + * Sets the property's required status + */ + void SetRequired(bool required) { + required_ = required; + } + + /** + * @return whether the property is required + */ + auto GetRequired() const -> bool { return required_; } + + private: + /** + * Whether the property is required + */ + bool required_{true}; }; } // namespace noisepage::optimizer diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp index 944cb7bb72..83b7cdb49f 100644 --- a/src/optimizer/child_property_deriver.cpp +++ b/src/optimizer/child_property_deriver.cpp @@ -38,6 +38,8 @@ void ChildPropertyDeriver::Visit(const IndexScan *op) { std::vector tbl_indexes = accessor_->GetIndexOids(tbl_id); auto *property_set = new PropertySet(); + + // Output properties of IndexScan should be required properties satisfied by index for (auto prop : requirements_->Properties()) { if (prop->Type() == PropertyType::SORT) { auto sort_prop = prop->As(); @@ -52,6 +54,7 @@ void ChildPropertyDeriver::Visit(const IndexScan *op) { } } + // Index scan children should have no properties output_.emplace_back(property_set, std::vector{}); } @@ -93,10 +96,14 @@ void ChildPropertyDeriver::Visit(const Limit *op) { // Limit fulfill the internal sort property std::vector child_input_properties{new PropertySet()}; auto provided_prop = new PropertySet(); + + // Limit must satisfy output sort properties but child can attempt to satisfy sort property optionally if (!op->GetSortExpressions().empty()) { const std::vector> &exprs = op->GetSortExpressions(); const std::vector &sorts{op->GetSortAscending()}; provided_prop->AddProperty(new PropertySort(exprs, sorts)); + + child_input_properties[0]->AddProperty(new PropertySort(exprs, sorts, false)); } output_.emplace_back(provided_prop, std::move(child_input_properties)); diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index f079e95594..3e5367bfd5 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -256,7 +256,7 @@ bool IndexUtil::CheckPredicates( // Note: if scan type could be exact, it would have already been set by this point // TODO(dpatra): It may be possible to get away with being more lenient here on the index scan type to push down // limits but needs further investigation - if (scan_type == planner::IndexScanType::Dummy) scan_type = planner::IndexScanType::AscendingClosed; + if (scan_type == planner::IndexScanType::Dummy) scan_type = planner::IndexScanType::AscendingClosedLimit; *idx_scan_type = scan_type; return !bounds->empty(); diff --git a/src/optimizer/optimizer_task.cpp b/src/optimizer/optimizer_task.cpp index e7f511a69a..e0f769771a 100644 --- a/src/optimizer/optimizer_task.cpp +++ b/src/optimizer/optimizer_task.cpp @@ -286,20 +286,42 @@ void OptimizeExpressionCostWithEnforcedProperty::Execute() { for (; cur_child_idx_ < static_cast(group_expr_->GetChildrenGroupsSize()); cur_child_idx_++) { auto &i_prop = input_props[cur_child_idx_]; + + // Fill input properties based on required properties and optional proper ties + auto *req_input_props = new PropertySet(); + auto optional_input_props = PropertySet(); + + for (auto *prop : i_prop->Properties()) { + if (prop->GetRequired()) { + // Required property + req_input_props->AddProperty(prop->Copy()); + } else { + // Optional property + optional_input_props.AddProperty(prop->Copy()); + } + } + + delete input_props[cur_child_idx_]; + // Only preserve required input properties, optional properties should be in child operator + input_props[cur_child_idx_] = req_input_props; + auto child_group = context_->GetOptimizerContext()->GetMemo().GetGroupByID(group_expr_->GetChildGroupId(cur_child_idx_)); - // Check whether the child group is already optimized for the prop - auto child_best_expr = child_group->GetBestExpression(i_prop); + // Check whether the child group is already optimized for the required input properties + auto child_best_expr = child_group->GetBestExpression(req_input_props); if (child_best_expr != nullptr) { // Directly get back the best expr if the child group is optimized - cur_total_cost_ += child_best_expr->GetCost(i_prop); + // Only cost on required properties + // TODO(dpatra): Update costing structure for completed optional properties + cur_total_cost_ += child_best_expr->GetCost(req_input_props); if (cur_total_cost_ > context_->GetCostUpperBound()) break; } else if (prev_child_idx_ != cur_child_idx_) { // We haven't optimized child group prev_child_idx_ = cur_child_idx_; PushTask(new OptimizeExpressionCostWithEnforcedProperty(this)); auto cost_high = context_->GetCostUpperBound() - cur_total_cost_; - auto ctx = new OptimizationContext(context_->GetOptimizerContext(), i_prop->Copy(), cost_high); + auto ctx = new OptimizationContext(context_->GetOptimizerContext(), req_input_props->Copy(), + optional_input_props.Copy(), cost_high); PushTask(new OptimizeGroup(child_group, ctx)); context_->GetOptimizerContext()->AddOptimizationContext(ctx); return; diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index 0dfb410073..2e130bdfe0 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -117,8 +117,14 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetLimit(); auto indexes = accessor->GetIndexOids(get->GetTableOid()); - // Get sort property + // TODO(dpatra): This assumes there will only be ONE sort property -- may want to review this later + // Try setting sort property based on properties in context auto sort = context->GetRequiredProperties()->GetPropertyOfType(PropertyType::SORT); + // If no sort found in required properties, try optional properties + if (sort == nullptr) { + sort = context->GetOptionalProperties()->GetPropertyOfType(PropertyType::SORT); + } + bool sort_exists = sort != nullptr; auto sort_prop = sort_exists ? sort->As() : nullptr; bool sort_possible = sort_exists && IndexUtil::CheckSortProperty(sort_prop); @@ -136,10 +142,10 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetPredicates(); // There is an index that satisfies predicates for at least one column From 10ac10b1f0f6e95fa84ba155a1d3977a72000181 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Tue, 29 Dec 2020 06:01:49 -0500 Subject: [PATCH 11/20] Doxygen --- src/include/optimizer/optimization_context.h | 3 ++- src/include/optimizer/properties.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/include/optimizer/optimization_context.h b/src/include/optimizer/optimization_context.h index b76ffc0b71..7aaf72941b 100644 --- a/src/include/optimizer/optimization_context.h +++ b/src/include/optimizer/optimization_context.h @@ -19,7 +19,8 @@ class OptimizationContext { /** * Constructor * @param context OptimizerContext containing optimization context - * @param required_prop Properties required to satisfy. acquires ownership + * @param required_props Properties required to satisfy. acquires ownership + * @param optional_props Set of properties expressions in group can optionally attempt satisfying * @param cost_upper_bound Upper cost bound */ OptimizationContext(OptimizerContext *context, PropertySet *required_props, diff --git a/src/include/optimizer/properties.h b/src/include/optimizer/properties.h index 2f52f55b72..c3fd62b5ec 100644 --- a/src/include/optimizer/properties.h +++ b/src/include/optimizer/properties.h @@ -19,6 +19,7 @@ class PropertySort : public Property { * Constructor for PropertySort * @param sort_columns vector of AbstractExpressions representing sort columns * @param sort_ascending Whether each sort_column is ascending or descending + * @param required Whether the sort property needs to be satisfied at the active level */ PropertySort(std::vector> sort_columns, std::vector sort_ascending, bool required = true) From 271ebeca7d783f42e41d1eab6092d6dd71c59fc7 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Tue, 29 Dec 2020 06:15:05 -0500 Subject: [PATCH 12/20] Format --- src/include/optimizer/optimization_context.h | 2 +- src/include/optimizer/property.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/include/optimizer/optimization_context.h b/src/include/optimizer/optimization_context.h index 7aaf72941b..702df13533 100644 --- a/src/include/optimizer/optimization_context.h +++ b/src/include/optimizer/optimization_context.h @@ -24,7 +24,7 @@ class OptimizationContext { * @param cost_upper_bound Upper cost bound */ OptimizationContext(OptimizerContext *context, PropertySet *required_props, - PropertySet *optional_props = new PropertySet (), + PropertySet *optional_props = new PropertySet(), double cost_upper_bound = std::numeric_limits::max()) : context_(context), required_props_(required_props), diff --git a/src/include/optimizer/property.h b/src/include/optimizer/property.h index b3d8e630f7..588b5242c7 100644 --- a/src/include/optimizer/property.h +++ b/src/include/optimizer/property.h @@ -32,7 +32,7 @@ class Property { * Constructor for property * @param required whether the property is required */ - explicit Property(bool required) : required_(required) {}; + explicit Property(bool required) : required_(required){}; /** * Trivial destructor for Property @@ -85,9 +85,7 @@ class Property { /** * Sets the property's required status */ - void SetRequired(bool required) { - required_ = required; - } + void SetRequired(bool required) { required_ = required; } /** * @return whether the property is required From 9c3eb1ed2b0846410303055f3f6b95b3c4d62ce5 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Tue, 29 Dec 2020 06:32:36 -0500 Subject: [PATCH 13/20] Lint --- src/include/optimizer/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/optimizer/property.h b/src/include/optimizer/property.h index 588b5242c7..9c87788f25 100644 --- a/src/include/optimizer/property.h +++ b/src/include/optimizer/property.h @@ -32,7 +32,7 @@ class Property { * Constructor for property * @param required whether the property is required */ - explicit Property(bool required) : required_(required){}; + explicit Property(bool required) : required_(required){} /** * Trivial destructor for Property From c75bab20013d5457177c96d38820414a81a57d9a Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Tue, 29 Dec 2020 13:38:19 -0500 Subject: [PATCH 14/20] Format --- src/include/optimizer/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/optimizer/property.h b/src/include/optimizer/property.h index 9c87788f25..84c730e7e2 100644 --- a/src/include/optimizer/property.h +++ b/src/include/optimizer/property.h @@ -32,7 +32,7 @@ class Property { * Constructor for property * @param required whether the property is required */ - explicit Property(bool required) : required_(required){} + explicit Property(bool required) : required_(required) {} /** * Trivial destructor for Property From ad10de932feaaa48d75de51f4e1f2cfdfa042057 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Tue, 29 Dec 2020 16:14:36 -0500 Subject: [PATCH 15/20] Fix bug --- src/optimizer/index_util.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index 3e5367bfd5..a39984e0e5 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -252,14 +252,24 @@ bool IndexUtil::CheckPredicates( return false; } - // Lower scan type to valid type - // Note: if scan type could be exact, it would have already been set by this point - // TODO(dpatra): It may be possible to get away with being more lenient here on the index scan type to push down - // limits but needs further investigation - if (scan_type == planner::IndexScanType::Dummy) scan_type = planner::IndexScanType::AscendingClosedLimit; + if (bounds->empty()) return false; + + // Scan may be uninitialized if all bound columns are equality checks, but not all index columns are bound so is not exact + if (open_lows.size() == open_highs.size() && scan_type == planner::IndexScanType::Dummy) { + if (bounds->size() == open_lows.size()) scan_type = planner::IndexScanType::AscendingClosedLimit; + else scan_type = planner::IndexScanType::AscendingClosed; + } + + NOISEPAGE_ASSERT(scan_type != planner::IndexScanType::Dummy, "Dummy scan should never exist if any predicate is satisfied"); + + // Lower scan type allowance if not all predicates are satisfied + if (scan_type == planner::IndexScanType::AscendingClosedLimit && bounds->size() != predicates.size()) + scan_type = planner::IndexScanType::AscendingClosed; + else if (scan_type == planner::IndexScanType::AscendingOpenHighLimit && bounds->size() != predicates.size()) + scan_type = planner::IndexScanType::AscendingOpenHigh; *idx_scan_type = scan_type; - return !bounds->empty(); + return true; } bool IndexUtil::ConvertIndexKeyOidToColOid(catalog::CatalogAccessor *accessor, catalog::table_oid_t tbl_oid, From 6bb0ad118d31d942e8dd3dea811474e5b37d3b9d Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Tue, 29 Dec 2020 22:11:39 -0500 Subject: [PATCH 16/20] Format --- src/optimizer/index_util.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index a39984e0e5..cae8657324 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -254,13 +254,17 @@ bool IndexUtil::CheckPredicates( if (bounds->empty()) return false; - // Scan may be uninitialized if all bound columns are equality checks, but not all index columns are bound so is not exact + // Scan may be uninitialized if all bound columns are equality checks, but not all index columns are bound so is not + // exact if (open_lows.size() == open_highs.size() && scan_type == planner::IndexScanType::Dummy) { - if (bounds->size() == open_lows.size()) scan_type = planner::IndexScanType::AscendingClosedLimit; - else scan_type = planner::IndexScanType::AscendingClosed; + if (bounds->size() == open_lows.size()) + scan_type = planner::IndexScanType::AscendingClosedLimit; + else + scan_type = planner::IndexScanType::AscendingClosed; } - NOISEPAGE_ASSERT(scan_type != planner::IndexScanType::Dummy, "Dummy scan should never exist if any predicate is satisfied"); + NOISEPAGE_ASSERT(scan_type != planner::IndexScanType::Dummy, + "Dummy scan should never exist if any predicate is satisfied"); // Lower scan type allowance if not all predicates are satisfied if (scan_type == planner::IndexScanType::AscendingClosedLimit && bounds->size() != predicates.size()) From f5622242c234aca3dd237cda18e0d3847e30bc77 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Wed, 30 Dec 2020 05:58:30 -0500 Subject: [PATCH 17/20] Fix bug + improve test documentation --- script/testing/junit/sql/index-select.sql | 32 ++- script/testing/junit/traces/index-select.test | 226 +++++++++++++----- src/optimizer/index_util.cpp | 5 +- 3 files changed, 192 insertions(+), 71 deletions(-) diff --git a/script/testing/junit/sql/index-select.sql b/script/testing/junit/sql/index-select.sql index 1054d45290..ed9f16034c 100644 --- a/script/testing/junit/sql/index-select.sql +++ b/script/testing/junit/sql/index-select.sql @@ -8,41 +8,59 @@ CREATE INDEX ind ON foo (var1, var2, var3); -- Predicates + Sort exist -- At least one predicate satisfied and sort not satisfied +-- Index should be chosen due to satisfied predicate, but limit should not be pushed down due to invalid order by SELECT * FROM foo WHERE var1 >= 1 ORDER BY var3 LIMIT 17; + DROP INDEX ind; CREATE INDEX ind ON foo (var1, var3); + +-- At least one predicate satisfied and sort not satisfied +-- Index should be chosen due to satisfied predicate, but limit should not be pushed down due to invalid order by +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var2 LIMIT 11; +SELECT * FROM foo WHERE var1 = 1 AND var3 >= 1 ORDER BY var2 LIMIT 5; +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; + + -- No predicates satisfied +-- Sequential scan chosen since no predicates or sort satisfied by index SELECT * FROM foo WHERE var2 >= 1 ORDER BY var2 LIMIT 17; + -- At least one predicate satisfied and sort satisfied --- Predicate must be checked +-- Index can be chosen due to bounds, but limit cannot be pushed down because of complex predicate check +-- TODO(dpatra): Once predicate can be pushed down to index iterator, limit can also be pushed down SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var1, var3 LIMIT 11; SELECT * FROM foo WHERE var1 >= 1 AND var2 >= 1 ORDER BY var1, var3 LIMIT 11; -SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var2 LIMIT 11; -SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; - +-- Index can be chosen due to bounds, and limit can be pushed down because of valid sort and enforced predicate check +SELECT * FROM foo WHERE var1 = 1 AND var3 >= 1 ORDER BY var3 LIMIT 5; -- Sort exists --- Sort not satisfied +-- Sequential scan chosen since sort not satisfied by index SELECT * FROM foo ORDER BY var2 LIMIT 26; SELECT * FROM foo ORDER BY var3 LIMIT 26; + -- Sort satisfied +-- Index can be chosen due to bounds and limit pushed down due to satisfied order by and lack of predicates SELECT * FROM foo ORDER BY var1, var3 LIMIT 26; -- Predicates exist --- Predicates not satisfied +-- Sequential scan chosen since predicate not satisfied by index SELECT * FROM foo WHERE var2 >= 1 LIMIT 17; -- Predicates satisfied --- Predicate must be checked +-- Index can be chosen due to bounds, but limit cannot be pushed down because of complex predicate check +-- TODO(dpatra): Once predicate can be pushed down to index iterator, limit can also be pushed down SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 LIMIT 11; +-- Index can be chosen due to bounds, and limit can be pushed down because of enforced predicate check +SELECT * FROM foo WHERE var1 = 1 AND var3 >= 1 LIMIT 5; -- Neither predicates nor sort exist +-- Sequential scan chosen since no predicates nor order by's exist SELECT * FROM foo LIMIT 26; diff --git a/script/testing/junit/traces/index-select.test b/script/testing/junit/traces/index-select.test index 21bc61e0f1..0012780f3e 100644 --- a/script/testing/junit/traces/index-select.test +++ b/script/testing/junit/traces/index-select.test @@ -28,6 +28,9 @@ statement ok statement ok -- At least one predicate satisfied and sort not satisfied +statement ok +-- Index should be chosen due to satisfied predicate, but limit should not be pushed down due to invalid order by + query III nosort SELECT * FROM foo WHERE var1 >= 1 ORDER BY var3 LIMIT 17; ---- @@ -86,6 +89,9 @@ SELECT * FROM foo WHERE var1 >= 1 ORDER BY var3 LIMIT 17; statement ok +statement ok + + statement ok DROP INDEX ind; @@ -96,74 +102,72 @@ statement ok statement ok --- No predicates satisfied + + +statement ok +-- At least one predicate satisfied and sort not satisfied + +statement ok +-- Index should be chosen due to satisfied predicate, but limit should not be pushed down due to invalid order by query III nosort -SELECT * FROM foo WHERE var2 >= 1 ORDER BY var2 LIMIT 17; +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var2 LIMIT 11; ---- -0 1 -2 0 1 1 -1 -1 0 2 +2 +0 1 +2 0 2 1 1 2 +2 +1 1 2 1 +2 1 1 1 1 2 -0 1 -0 +1 2 2 2 -0 2 +1 + +query III nosort +SELECT * FROM foo WHERE var1 = 1 AND var3 >= 1 ORDER BY var2 LIMIT 5; +---- +1 0 -0 -2 +1 1 0 2 -2 1 -2 -0 1 -2 +1 1 1 2 +1 2 -2 -2 -0 - -statement ok - - -statement ok --- At least one predicate satisfied and sort satisfied - -statement ok --- Predicate must be checked +1 query III nosort -SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var1, var3 LIMIT 11; +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; ---- 1 0 @@ -174,36 +178,46 @@ SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var1, var3 LIMIT 11; 1 2 1 -1 -0 2 -1 +0 1 2 1 +1 2 2 -2 -0 1 2 1 +2 1 +0 2 2 -1 +0 2 1 +1 2 2 -0 2 +2 + +statement ok + + +statement ok + + +statement ok +-- No predicates satisfied + +statement ok +-- Sequential scan chosen since no predicates or sort satisfied by index query III nosort -SELECT * FROM foo WHERE var1 >= 1 AND var2 >= 1 ORDER BY var1, var3 LIMIT 11; +SELECT * FROM foo WHERE var2 >= 1 ORDER BY var2 LIMIT 17; ---- -1 -1 0 1 2 @@ -212,90 +226,123 @@ SELECT * FROM foo WHERE var1 >= 1 AND var2 >= 1 ORDER BY var1, var3 LIMIT 11; 1 1 1 +0 2 1 +0 +2 1 1 2 1 2 +1 +1 +1 +1 +1 2 -2 +0 1 0 2 2 +2 0 2 +0 +0 2 1 +0 +2 2 1 +2 +0 1 2 1 +1 2 +2 +2 +2 +0 + +statement ok + + +statement ok + + +statement ok +-- At least one predicate satisfied and sort satisfied + +statement ok +-- Index can be chosen due to bounds, but limit cannot be pushed down because of complex predicate check + +statement ok +-- TODO(dpatra): Once predicate can be pushed down to index iterator, limit can also be pushed down query III nosort -SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var2 LIMIT 11; +SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var1, var3 LIMIT 11; ---- 1 0 1 1 -0 -2 -2 -0 1 -2 -0 -2 1 1 2 -2 1 1 +0 2 1 +1 2 1 +2 +2 +2 +0 1 +2 1 1 2 -1 +2 1 2 +1 2 2 +0 2 -1 query III nosort -SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; +SELECT * FROM foo WHERE var1 >= 1 AND var2 >= 1 ORDER BY var1, var3 LIMIT 11; ---- 1 +1 0 1 +2 +0 1 1 1 1 2 1 -2 -0 -1 -2 1 1 2 -2 1 2 -1 +2 2 1 0 @@ -303,15 +350,36 @@ SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 ORDER BY var3 LIMIT 11; 2 0 2 -1 -1 2 +1 2 +1 +1 2 +1 2 statement ok +-- Index can be chosen due to bounds, and limit can be pushed down because of valid sort and enforced predicate check +query III nosort +SELECT * FROM foo WHERE var1 = 1 AND var3 >= 1 ORDER BY var3 LIMIT 5; +---- +1 +0 +1 +1 +1 +1 +1 +2 +1 +1 +0 +2 +1 +1 +2 statement ok @@ -320,7 +388,7 @@ statement ok -- Sort exists statement ok --- Sort not satisfied +-- Sequential scan chosen since sort not satisfied by index query III nosort SELECT * FROM foo ORDER BY var2 LIMIT 26; @@ -489,9 +557,15 @@ SELECT * FROM foo ORDER BY var3 LIMIT 26; statement ok +statement ok + + statement ok -- Sort satisfied +statement ok +-- Index can be chosen due to bounds and limit pushed down due to satisfied order by and lack of predicates + query III nosort SELECT * FROM foo ORDER BY var1, var3 LIMIT 26; ---- @@ -584,7 +658,7 @@ statement ok -- Predicates exist statement ok --- Predicates not satisfied +-- Sequential scan chosen since predicate not satisfied by index query III rowsort SELECT * FROM foo WHERE var2 >= 1 LIMIT 17; @@ -648,7 +722,10 @@ statement ok -- Predicates satisfied statement ok --- Predicate must be checked +-- Index can be chosen due to bounds, but limit cannot be pushed down because of complex predicate check + +statement ok +-- TODO(dpatra): Once predicate can be pushed down to index iterator, limit can also be pushed down query III rowsort SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 LIMIT 11; @@ -687,6 +764,28 @@ SELECT * FROM foo WHERE var1 >= 1 AND var3 >= 1 LIMIT 11; 2 1 +statement ok +-- Index can be chosen due to bounds, and limit can be pushed down because of enforced predicate check + +query III rowsort +SELECT * FROM foo WHERE var1 = 1 AND var3 >= 1 LIMIT 5; +---- +1 +0 +1 +1 +0 +2 +1 +1 +1 +1 +1 +2 +1 +2 +1 + statement ok @@ -696,6 +795,9 @@ statement ok statement ok -- Neither predicates nor sort exist +statement ok +-- Sequential scan chosen since no predicates nor order by's exist + query III rowsort SELECT * FROM foo LIMIT 26; ---- diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index a39984e0e5..a3d11de6aa 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -49,7 +49,7 @@ bool IndexUtil::SatisfiesSortWithIndex( // Column is present in both sort and index so increment both sort_ind++, idx_ind++; } else if (bounds != nullptr && bounds->find(lookup[mapped_cols[idx_ind]]) != bounds->end()) { - // If column is found in bounds but not index, continue + // If column is found in bounds but not sort, continue idx_ind++; } else { // Column not found in index so cannot use this index @@ -57,7 +57,8 @@ bool IndexUtil::SatisfiesSortWithIndex( } } - return true; + // Ensure all of sort satisfied + return sort_ind == sort_col_size; } bool IndexUtil::SatisfiesPredicateWithIndex( From 79a2761522ce9d4385ffa7b09d91c4861a753218 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 4 Jan 2021 20:32:17 -0500 Subject: [PATCH 18/20] Fix sort and predicate correctness bugs and remove optional properties --- src/include/optimizer/index_util.h | 2 +- src/include/optimizer/optimization_context.h | 17 +----------- src/include/optimizer/properties.h | 5 ++-- src/include/optimizer/property.h | 22 ---------------- src/optimizer/child_property_deriver.cpp | 10 +++----- src/optimizer/index_util.cpp | 23 +++++++++-------- src/optimizer/optimizer_task.cpp | 27 +++----------------- src/optimizer/rules/implementation_rules.cpp | 6 +---- 8 files changed, 25 insertions(+), 87 deletions(-) diff --git a/src/include/optimizer/index_util.h b/src/include/optimizer/index_util.h index 281a7ce95d..8e955679be 100644 --- a/src/include/optimizer/index_util.h +++ b/src/include/optimizer/index_util.h @@ -68,7 +68,7 @@ class IndexUtil { static bool SatisfiesSortWithIndex( catalog::CatalogAccessor *accessor, const PropertySort *prop, catalog::table_oid_t tbl_oid, catalog::index_oid_t idx_oid, - std::unordered_map> *bounds = nullptr); + std::unordered_map> *bounds); /** * Checks whether a set of predicates can be satisfied with an index. diff --git a/src/include/optimizer/optimization_context.h b/src/include/optimizer/optimization_context.h index 702df13533..4b8a1574e4 100644 --- a/src/include/optimizer/optimization_context.h +++ b/src/include/optimizer/optimization_context.h @@ -24,20 +24,15 @@ class OptimizationContext { * @param cost_upper_bound Upper cost bound */ OptimizationContext(OptimizerContext *context, PropertySet *required_props, - PropertySet *optional_props = new PropertySet(), double cost_upper_bound = std::numeric_limits::max()) : context_(context), required_props_(required_props), - optional_props_(optional_props), cost_upper_bound_(cost_upper_bound) {} /** * Destructor */ - ~OptimizationContext() { - delete required_props_; - delete optional_props_; - } + ~OptimizationContext() { delete required_props_; } /** * @returns OptimizerContext @@ -49,11 +44,6 @@ class OptimizationContext { */ PropertySet *GetRequiredProperties() const { return required_props_; } - /** - * @returns Properties to attempt, owned by this OptimizationContext - */ - PropertySet *GetOptionalProperties() const { return optional_props_; } - /** * @returns Current context's upper bound cost */ @@ -76,11 +66,6 @@ class OptimizationContext { */ PropertySet *required_props_; - /** - * Required properties - */ - PropertySet *optional_props_; - /** * Cost Upper Bound (for pruning) */ diff --git a/src/include/optimizer/properties.h b/src/include/optimizer/properties.h index c3fd62b5ec..265053f993 100644 --- a/src/include/optimizer/properties.h +++ b/src/include/optimizer/properties.h @@ -19,11 +19,10 @@ class PropertySort : public Property { * Constructor for PropertySort * @param sort_columns vector of AbstractExpressions representing sort columns * @param sort_ascending Whether each sort_column is ascending or descending - * @param required Whether the sort property needs to be satisfied at the active level */ PropertySort(std::vector> sort_columns, - std::vector sort_ascending, bool required = true) - : Property(required), sort_columns_(std::move(sort_columns)), sort_ascending_(std::move(sort_ascending)) {} + std::vector sort_ascending) + : sort_columns_(std::move(sort_columns)), sort_ascending_(std::move(sort_ascending)) {} /** * Returns the type of PropertySort diff --git a/src/include/optimizer/property.h b/src/include/optimizer/property.h index 84c730e7e2..8d65b4ba84 100644 --- a/src/include/optimizer/property.h +++ b/src/include/optimizer/property.h @@ -28,12 +28,6 @@ enum class PropertyType { SORT }; */ class Property { public: - /** - * Constructor for property - * @param required whether the property is required - */ - explicit Property(bool required) : required_(required) {} - /** * Trivial destructor for Property */ @@ -81,22 +75,6 @@ class Property { } return nullptr; } - - /** - * Sets the property's required status - */ - void SetRequired(bool required) { required_ = required; } - - /** - * @return whether the property is required - */ - auto GetRequired() const -> bool { return required_; } - - private: - /** - * Whether the property is required - */ - bool required_{true}; }; } // namespace noisepage::optimizer diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp index 83b7cdb49f..b61c13a89a 100644 --- a/src/optimizer/child_property_deriver.cpp +++ b/src/optimizer/child_property_deriver.cpp @@ -36,6 +36,7 @@ void ChildPropertyDeriver::Visit(const IndexScan *op) { // Use GetIndexOids() to get all indexes on table_alias auto tbl_id = op->GetTableOID(); std::vector tbl_indexes = accessor_->GetIndexOids(tbl_id); + auto bounds = op->GetBounds(); auto *property_set = new PropertySet(); @@ -48,7 +49,7 @@ void ChildPropertyDeriver::Visit(const IndexScan *op) { } auto idx_oid = op->GetIndexOID(); - if (IndexUtil::SatisfiesSortWithIndex(accessor_, sort_prop, tbl_id, idx_oid)) { + if (IndexUtil::SatisfiesSortWithIndex(accessor_, sort_prop, tbl_id, idx_oid, &bounds)) { property_set->AddProperty(prop->Copy()); } } @@ -95,18 +96,15 @@ void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const Aggregate *op) { void ChildPropertyDeriver::Visit(const Limit *op) { // Limit fulfill the internal sort property std::vector child_input_properties{new PropertySet()}; - auto provided_prop = new PropertySet(); // Limit must satisfy output sort properties but child can attempt to satisfy sort property optionally if (!op->GetSortExpressions().empty()) { const std::vector> &exprs = op->GetSortExpressions(); const std::vector &sorts{op->GetSortAscending()}; - provided_prop->AddProperty(new PropertySort(exprs, sorts)); - - child_input_properties[0]->AddProperty(new PropertySort(exprs, sorts, false)); + child_input_properties[0]->AddProperty(new PropertySort(exprs, sorts)); } - output_.emplace_back(provided_prop, std::move(child_input_properties)); + output_.emplace_back(new PropertySet(), std::move(child_input_properties)); } void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const OrderBy *op) {} diff --git a/src/optimizer/index_util.cpp b/src/optimizer/index_util.cpp index 57982b17fd..a34fb24657 100644 --- a/src/optimizer/index_util.cpp +++ b/src/optimizer/index_util.cpp @@ -48,9 +48,15 @@ bool IndexUtil::SatisfiesSortWithIndex( if (tv_col_oid == mapped_cols[idx_ind]) { // Column is present in both sort and index so increment both sort_ind++, idx_ind++; - } else if (bounds != nullptr && bounds->find(lookup[mapped_cols[idx_ind]]) != bounds->end()) { - // If column is found in bounds but not sort, continue - idx_ind++; + } else if (bounds != nullptr) { + auto bound_column = bounds->find(lookup[mapped_cols[idx_ind]]); + if (bound_column != bounds->end() && bound_column->second[0] == bound_column->second[1]) { + // If column is exactly bound but not in sort, continue + idx_ind++; + } else { + // Index column is not exactly bound so cannot use this index + return false; + } } else { // Column not found in index so cannot use this index return false; @@ -255,13 +261,10 @@ bool IndexUtil::CheckPredicates( if (bounds->empty()) return false; - // Scan may be uninitialized if all bound columns are equality checks, but not all index columns are bound so is not - // exact - if (open_lows.size() == open_highs.size() && scan_type == planner::IndexScanType::Dummy) { - if (bounds->size() == open_lows.size()) - scan_type = planner::IndexScanType::AscendingClosedLimit; - else - scan_type = planner::IndexScanType::AscendingClosed; + // Scan may be uninitialized if all bound columns are equality checks, but not all index columns are bound + if (scan_type == planner::IndexScanType::Dummy) { + // Cannot push down limit due to additional predicates not satisfied by index + scan_type = planner::IndexScanType::AscendingClosed; } NOISEPAGE_ASSERT(scan_type != planner::IndexScanType::Dummy, diff --git a/src/optimizer/optimizer_task.cpp b/src/optimizer/optimizer_task.cpp index e0f769771a..017ba9bed5 100644 --- a/src/optimizer/optimizer_task.cpp +++ b/src/optimizer/optimizer_task.cpp @@ -287,41 +287,20 @@ void OptimizeExpressionCostWithEnforcedProperty::Execute() { for (; cur_child_idx_ < static_cast(group_expr_->GetChildrenGroupsSize()); cur_child_idx_++) { auto &i_prop = input_props[cur_child_idx_]; - // Fill input properties based on required properties and optional proper ties - auto *req_input_props = new PropertySet(); - auto optional_input_props = PropertySet(); - - for (auto *prop : i_prop->Properties()) { - if (prop->GetRequired()) { - // Required property - req_input_props->AddProperty(prop->Copy()); - } else { - // Optional property - optional_input_props.AddProperty(prop->Copy()); - } - } - - delete input_props[cur_child_idx_]; - // Only preserve required input properties, optional properties should be in child operator - input_props[cur_child_idx_] = req_input_props; - auto child_group = context_->GetOptimizerContext()->GetMemo().GetGroupByID(group_expr_->GetChildGroupId(cur_child_idx_)); // Check whether the child group is already optimized for the required input properties - auto child_best_expr = child_group->GetBestExpression(req_input_props); + auto child_best_expr = child_group->GetBestExpression(i_prop); if (child_best_expr != nullptr) { // Directly get back the best expr if the child group is optimized - // Only cost on required properties - // TODO(dpatra): Update costing structure for completed optional properties - cur_total_cost_ += child_best_expr->GetCost(req_input_props); + cur_total_cost_ += child_best_expr->GetCost(i_prop); if (cur_total_cost_ > context_->GetCostUpperBound()) break; } else if (prev_child_idx_ != cur_child_idx_) { // We haven't optimized child group prev_child_idx_ = cur_child_idx_; PushTask(new OptimizeExpressionCostWithEnforcedProperty(this)); auto cost_high = context_->GetCostUpperBound() - cur_total_cost_; - auto ctx = new OptimizationContext(context_->GetOptimizerContext(), req_input_props->Copy(), - optional_input_props.Copy(), cost_high); + auto ctx = new OptimizationContext(context_->GetOptimizerContext(), i_prop->Copy(), cost_high); PushTask(new OptimizeGroup(child_group, ctx)); context_->GetOptimizerContext()->AddOptimizationContext(ctx); return; diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index 2e130bdfe0..d1e8c6cda9 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -120,10 +120,6 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetRequiredProperties()->GetPropertyOfType(PropertyType::SORT); - // If no sort found in required properties, try optional properties - if (sort == nullptr) { - sort = context->GetOptionalProperties()->GetPropertyOfType(PropertyType::SORT); - } bool sort_exists = sort != nullptr; auto sort_prop = sort_exists ? sort->As() : nullptr; @@ -185,7 +181,7 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetTableOid(), index)) { + if (sort_possible && IndexUtil::SatisfiesSortWithIndex(accessor, sort_prop, get->GetTableOid(), index, nullptr)) { std::vector preds = get->GetPredicates(); auto op = std::make_unique( IndexScan::Make(db_oid, get->GetTableOid(), index, std::move(preds), is_update, From 0e38cf1d7a5941e1259c4db4d472cecdc8c25e43 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 4 Jan 2021 22:43:12 -0500 Subject: [PATCH 19/20] Address some PR comments --- src/execution/compiler/codegen.cpp | 2 +- src/execution/compiler/operator/index_join_translator.cpp | 2 -- src/include/planner/plannodes/plan_node_defs.h | 2 +- src/optimizer/child_property_deriver.cpp | 3 +-- src/optimizer/rules/implementation_rules.cpp | 3 --- src/optimizer/rules/transformation_rules.cpp | 2 +- 6 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/execution/compiler/codegen.cpp b/src/execution/compiler/codegen.cpp index 76846927eb..e36b4f2b43 100644 --- a/src/execution/compiler/codegen.cpp +++ b/src/execution/compiler/codegen.cpp @@ -431,7 +431,7 @@ ast::Expr *CodeGen::IndexIteratorScan(ast::Identifier iter, planner::IndexScanTy storage::index::ScanType asc_type; switch (scan_type) { case planner::IndexScanType::Exact: - // Exact scan does not take advantage of limit now, but it can + // TODO(Deepayan): Exact scan builtin does not take advantage of limit now, but it can builtin = ast::Builtin::IndexIteratorScanKey; break; case planner::IndexScanType::AscendingClosedLimit: diff --git a/src/execution/compiler/operator/index_join_translator.cpp b/src/execution/compiler/operator/index_join_translator.cpp index b0bbaed965..73b0bac96f 100644 --- a/src/execution/compiler/operator/index_join_translator.cpp +++ b/src/execution/compiler/operator/index_join_translator.cpp @@ -202,7 +202,6 @@ void IndexJoinTranslator::FillKey( WorkContext *context, FunctionBuilder *builder, ast::Identifier pr, const std::unordered_map &index_exprs) const { // For each key attribute, - uint32_t count = 0; for (const auto &key : index_exprs) { // @prSet(pr, type, nullable, attr, expr, true) uint16_t attr_offset = index_pm_.at(key.first); @@ -211,7 +210,6 @@ void IndexJoinTranslator::FillKey( auto *set_key_call = GetCodeGen()->PRSet(GetCodeGen()->MakeExpr(pr), attr_type, nullable, attr_offset, context->DeriveValue(*key.second.Get(), this), true); builder->Append(GetCodeGen()->MakeStmt(set_key_call)); - count++; } } diff --git a/src/include/planner/plannodes/plan_node_defs.h b/src/include/planner/plannodes/plan_node_defs.h index 6d14a3d135..d5205a51ea 100644 --- a/src/include/planner/plannodes/plan_node_defs.h +++ b/src/include/planner/plannodes/plan_node_defs.h @@ -141,7 +141,7 @@ enum class SetOpType { INVALID = INVALID_TYPE_ID, INTERSECT = 1, INTERSECT_ALL = using IndexExpression = common::ManagedPointer; /** Type of index scan. */ enum class IndexScanType : uint8_t { - Dummy, + Dummy, // Dummy scans are placeholders for index scan selection, and should never be used as a scan type Exact, AscendingClosed, AscendingClosedLimit, diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp index b61c13a89a..dc713c88ff 100644 --- a/src/optimizer/child_property_deriver.cpp +++ b/src/optimizer/child_property_deriver.cpp @@ -80,7 +80,7 @@ void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const HashGroupBy *op) { output_.emplace_back(new PropertySet(), std::vector{new PropertySet()}); } -void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const SortGroupBy *op) { +void ChildPropertyDeriver::Visit(const SortGroupBy *op) { // Child must provide sort for Groupby columns std::vector sort_ascending(op->GetColumns().size(), catalog::OrderByOrderingType::ASC); @@ -94,7 +94,6 @@ void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const Aggregate *op) { } void ChildPropertyDeriver::Visit(const Limit *op) { - // Limit fulfill the internal sort property std::vector child_input_properties{new PropertySet()}; // Limit must satisfy output sort properties but child can attempt to satisfy sort property optionally diff --git a/src/optimizer/rules/implementation_rules.cpp b/src/optimizer/rules/implementation_rules.cpp index d1e8c6cda9..8a95a15b83 100644 --- a/src/optimizer/rules/implementation_rules.cpp +++ b/src/optimizer/rules/implementation_rules.cpp @@ -136,13 +136,10 @@ void LogicalGetToPhysicalIndexScan::Transform(common::ManagedPointerGetTableOid(), get->GetTableAlias(), index, preds, allow_cves_, &scan_type, &bounds)) { // Limit can only be pushed down in the following cases - // TODO(dpatra): Limit can be pushed down in the case of an exact scan as well, but the index framework - // doesn't currently support it. limit_exists = limit_exists && (scan_type == planner::IndexScanType::Exact || scan_type == planner::IndexScanType::AscendingOpenHighLimit || scan_type == planner::IndexScanType::AscendingClosedLimit || scan_type == planner::IndexScanType::DescendingLimit); - preds = get->GetPredicates(); // There is an index that satisfies predicates for at least one column if (sort_exists) { diff --git a/src/optimizer/rules/transformation_rules.cpp b/src/optimizer/rules/transformation_rules.cpp index a674b09471..4d43a0076a 100644 --- a/src/optimizer/rules/transformation_rules.cpp +++ b/src/optimizer/rules/transformation_rules.cpp @@ -187,7 +187,7 @@ void SetLimitInGet::Transform(common::ManagedPointer inpu } /////////////////////////////////////////////////////////////////////////////// -/// SetLimitInGet +/// SetLimitInLogicalInnerJoin /////////////////////////////////////////////////////////////////////////////// SetLimitInLogicalInnerJoin::SetLimitInLogicalInnerJoin() { type_ = RuleType::SET_LIMIT_IN_LOGICAL_INNER_JOIN; From d355fe0ba7c1bfe2c290b2b65945366c9c467204 Mon Sep 17 00:00:00 2001 From: Deepayan Patra Date: Mon, 4 Jan 2021 22:46:47 -0500 Subject: [PATCH 20/20] Address PR property fix --- src/optimizer/child_property_deriver.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/optimizer/child_property_deriver.cpp b/src/optimizer/child_property_deriver.cpp index dc713c88ff..50d8c2e4aa 100644 --- a/src/optimizer/child_property_deriver.cpp +++ b/src/optimizer/child_property_deriver.cpp @@ -94,16 +94,19 @@ void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const Aggregate *op) { } void ChildPropertyDeriver::Visit(const Limit *op) { + // Limit pushes down and preserves the internal sort property std::vector child_input_properties{new PropertySet()}; + auto provided_prop = new PropertySet(); // Limit must satisfy output sort properties but child can attempt to satisfy sort property optionally if (!op->GetSortExpressions().empty()) { const std::vector> &exprs = op->GetSortExpressions(); const std::vector &sorts{op->GetSortAscending()}; + provided_prop->AddProperty(new PropertySort(exprs, sorts)); child_input_properties[0]->AddProperty(new PropertySort(exprs, sorts)); } - output_.emplace_back(new PropertySet(), std::move(child_input_properties)); + output_.emplace_back(provided_prop, std::move(child_input_properties)); } void ChildPropertyDeriver::Visit(UNUSED_ATTRIBUTE const OrderBy *op) {}