From c76a118f16f38bf034c4e2c40ef5071a19304a7d Mon Sep 17 00:00:00 2001 From: msm-cert <156842376+msm-cert@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:20:35 +0000 Subject: [PATCH] opt4: simplify_minof (#223) --- libursa/QueryOptimizer.cpp | 29 +++++++++++++++++++++++++++++ libursa/Version.h.in | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/libursa/QueryOptimizer.cpp b/libursa/QueryOptimizer.cpp index 316d6dc..2583e0c 100644 --- a/libursa/QueryOptimizer.cpp +++ b/libursa/QueryOptimizer.cpp @@ -2,6 +2,13 @@ #include +// Returns a query that represents every element in the dataset. +// There's no magic here, AND() just behaves like this. +Query q_everything() { + std::vector queries; + return std::move(q_and(std::move(queries))); +} + // Run the optimization pases on subqueries. // After this step, every subquery should be maximally optimized, // So I believe there's no need to run this in a loop. @@ -76,6 +83,27 @@ Query deduplicate_primitives(Query &&q, bool *changed) { return std::move(Query(q.get_type(), std::move(newqueries))); } +// Minof is the slowest operation, so replace it by others if possible. +// This may also enable other optimizations to take place. +// MIN 5 OF (a, b, c, d, e) --> AND(a, b, c, d, e) +// MIN 1 OF (a, b, c, d, e) --> OR(a, b, c, d, e) +// MIN 0 OF (a, b, c, d, e) --> everything() +Query simplify_minof(Query &&q, bool *changed) { + if (q.get_type() == QueryType::MIN_OF) { + if (q.as_count() == q.as_queries().size()) { + *changed = true; + return std::move(q_and(std::move(q.as_queries()))); + } else if (q.as_count() == 1) { + *changed = true; + return std::move(q_or(std::move(q.as_queries()))); + } else if (q.as_count() == 0) { + *changed = true; + return std::move(q_everything()); + } + } + return std::move(q); +} + Query q_optimize(Query &&q) { if (q.get_type() == QueryType::PRIMITIVE) { // Nothing to improve here. @@ -89,6 +117,7 @@ Query q_optimize(Query &&q) { q = flatten_trivial_operations(std::move(q), &changed); q = inline_suboperations(std::move(q), &changed); q = deduplicate_primitives(std::move(q), &changed); + q = simplify_minof(std::move(q), &changed); } return std::move(q); diff --git a/libursa/Version.h.in b/libursa/Version.h.in index 15d1ab5..ddc9415 100644 --- a/libursa/Version.h.in +++ b/libursa/Version.h.in @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0"; // Project version. // Consider updating the version tag when doing PRs. // clang-format off -constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt3"; +constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt4"; // clang-format on