Skip to content

Commit

Permalink
opt3: deduplicate_primitives
Browse files Browse the repository at this point in the history
  • Loading branch information
msm-code committed Oct 1, 2024
1 parent 3ffacf7 commit 2d3ff4b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
24 changes: 24 additions & 0 deletions libursa/QueryOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,29 @@ Query inline_suboperations(Query &&q, bool *changed) {
return std::move(Query(q.get_type(), std::move(newqueries)));
}

// This optimization gets rid of duplicated primitive queries.
// AND(a, a, a, a, b, b) == AND(a, b)
// This also applies to OR(), but it'll happen very rarely.
Query deduplicate_primitives(Query &&q, bool *changed) {
if (q.get_type() != QueryType::AND && q.get_type() != QueryType::OR) {
return std::move(q);
}

std::set<PrimitiveQuery> seen;
std::vector<Query> newqueries;
for (auto &&query : q.as_queries()) {
if (query.get_type() != QueryType::PRIMITIVE) {
newqueries.emplace_back(std::move(query));
} else if (seen.count(query.as_ngram()) == 0) {
newqueries.emplace_back(std::move(query));
seen.insert(query.as_ngram());
} else {
*changed = true;
}
}
return std::move(Query(q.get_type(), std::move(newqueries)));
}

Query q_optimize(Query &&q) {
if (q.get_type() == QueryType::PRIMITIVE) {
// Nothing to improve here.
Expand All @@ -65,6 +88,7 @@ Query q_optimize(Query &&q) {
changed = false;
q = flatten_trivial_operations(std::move(q), &changed);
q = inline_suboperations(std::move(q), &changed);
q = deduplicate_primitives(std::move(q), &changed);
}

return std::move(q);
Expand Down
2 changes: 1 addition & 1 deletion libursa/Version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0";
// Project version.
// Consider updating the version tag when doing PRs.
// clang-format off
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt2";
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt3";
// clang-format on

0 comments on commit 2d3ff4b

Please sign in to comment.