diff --git a/libursa/QueryOptimizer.cpp b/libursa/QueryOptimizer.cpp index 362c76f..316d6dc 100644 --- a/libursa/QueryOptimizer.cpp +++ b/libursa/QueryOptimizer.cpp @@ -53,6 +53,29 @@ Query inline_suboperations(Query &&q, bool *changed) { return std::move(Query(q.get_type(), std::move(newqueries))); } +// This optimization gets rid of duplicated primitive queries. +// AND(a, a, a, a, b, b) == AND(a, b) +// This also applies to OR(), but it'll happen very rarely. +Query deduplicate_primitives(Query &&q, bool *changed) { + if (q.get_type() != QueryType::AND && q.get_type() != QueryType::OR) { + return std::move(q); + } + + std::set seen; + std::vector newqueries; + for (auto &&query : q.as_queries()) { + if (query.get_type() != QueryType::PRIMITIVE) { + newqueries.emplace_back(std::move(query)); + } else if (seen.count(query.as_ngram()) == 0) { + newqueries.emplace_back(std::move(query)); + seen.insert(query.as_ngram()); + } else { + *changed = true; + } + } + return std::move(Query(q.get_type(), std::move(newqueries))); +} + Query q_optimize(Query &&q) { if (q.get_type() == QueryType::PRIMITIVE) { // Nothing to improve here. @@ -65,6 +88,7 @@ Query q_optimize(Query &&q) { changed = false; q = flatten_trivial_operations(std::move(q), &changed); q = inline_suboperations(std::move(q), &changed); + q = deduplicate_primitives(std::move(q), &changed); } return std::move(q); diff --git a/libursa/Version.h.in b/libursa/Version.h.in index e85d3b8..15d1ab5 100644 --- a/libursa/Version.h.in +++ b/libursa/Version.h.in @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0"; // Project version. // Consider updating the version tag when doing PRs. // clang-format off -constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt2"; +constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt3"; // clang-format on