Skip to content

Commit

Permalink
Short-term cache
Browse files Browse the repository at this point in the history
  • Loading branch information
msm-code committed Oct 17, 2024
1 parent c2ffdea commit 6e341c8
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
2 changes: 2 additions & 0 deletions libursa/OnDiskDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ std::string OnDiskDataset::get_file_name(FileId fid) const {
QueryResult OnDiskDataset::query(const Query &query,
QueryCounters *counters) const {
std::set<PrimitiveQuery> seen;
std::map<std::vector<PrimitiveQuery>, SortedRun> string_cache;
return query.run(
[this, &seen](PrimitiveQuery primitive, QueryCounters *counters) {
std::optional<QueryOperation> operation;
Expand All @@ -90,6 +91,7 @@ QueryResult OnDiskDataset::query(const Query &query,
}
}
},
&string_cache,
counters);
}

Expand Down
28 changes: 25 additions & 3 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ void Query::prefetch(int from_index, int howmany, bool only_last,

QueryResult Query::run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetcher,
std::map<std::vector<PrimitiveQuery>, SortedRun> *cache,
QueryCounters *counters) const {
// Case: primitive query - reduces to AND with tokens from query plan.
if (type == QueryType::PRIMITIVE) {
Expand All @@ -244,23 +245,33 @@ QueryResult Query::run(const QueryPrimitive &primitive,

// Case: and. Short circuits when result is already empty.
if (type == QueryType::AND) {
auto cache_key = get_cache_key();
auto cached_it = cache->find(cache_key);
if (cached_it != cache->end()) {
spdlog::info("found a cached string {}!", cached_it->second.size());
return QueryResult(cached_it->second.clone());
}
auto result = QueryResult::everything();
for (int i = 0; i < queries.size(); i++) {
prefetch(i + 1, PRETECTH_RANGE, true, prefetcher);
const auto &query = queries[i];
result.do_and(query.run(primitive, prefetcher, counters),
result.do_and(query.run(primitive, prefetcher, cache, counters),
&counters->ands());
if (result.is_empty()) {
break;
}
}
if (!cache_key.empty() && !result.is_everything()) {
spdlog::debug("caching (len: {})...", cache_key.size());
cache->emplace(std::move(cache_key), result.vector().clone());
}
return result;
}
// Case: or. Short circuits when result is already everything.
if (type == QueryType::OR) {
auto result = QueryResult::empty();
for (auto &query : queries) {
result.do_or(query.run(primitive, prefetcher, counters),
result.do_or(query.run(primitive, prefetcher, cache, counters),
&counters->ors());
if (result.is_everything()) {
break;
Expand All @@ -281,7 +292,7 @@ QueryResult Query::run(const QueryPrimitive &primitive,
int cutoff = count;
int nonempty_sources = queries.size();
for (const auto &query : queries) {
QueryResult next = query.run(primitive, prefetcher, counters);
QueryResult next = query.run(primitive, prefetcher, cache, counters);
if (next.is_everything()) {
cutoff -= 1;
if (cutoff <= 0) {
Expand All @@ -302,3 +313,14 @@ QueryResult Query::run(const QueryPrimitive &primitive,
}
throw std::runtime_error("Unexpected query type");
}

std::vector<PrimitiveQuery> Query::get_cache_key() const {
std::vector<PrimitiveQuery> result;
for (const auto &query : queries) {
if (query.get_type() != QueryType::PRIMITIVE) {
return {};
}
result.push_back(query.as_ngram());
}
return result;
}
3 changes: 3 additions & 0 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,12 @@ class Query {

QueryResult run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetch,
std::map<std::vector<PrimitiveQuery>, SortedRun> *cache,
QueryCounters *counters) const;
Query plan(const std::unordered_set<IndexType> &types_to_query) const;

std::vector<PrimitiveQuery> get_cache_key() const;

private:
void prefetch(int from_index, int howmany, bool only_last,
const PrefetchFunc &prefetch) const;
Expand Down

0 comments on commit 6e341c8

Please sign in to comment.