Skip to content

Commit

Permalink
feat(py): expose granular caching via pyO3 API
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdoret committed Oct 22, 2024
1 parent 143abe7 commit c6547ff
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
18 changes: 16 additions & 2 deletions pyfuzon/python/pyfuzon/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@

from pathlib import Path

from .pyfuzon import get_cache_key as _get_cache_key
from .pyfuzon import get_cache_path as _get_cache_path
from .matcher import TermMatcher
from .pyfuzon import (
get_cache_key as _get_cache_key,
get_cache_path as _get_cache_path,
cache_by_source as _cache_by_source,
load_by_source as _load_by_source,
)

def get_cache_key(sources: list[str]) -> str:
"""Return a deterministic cache key based on a collection of source paths."""
Expand All @@ -26,3 +31,12 @@ def get_cache_key(sources: list[str]) -> str:
def get_cache_path(sources: list[str]) -> Path:
"""Return a full platform-specific cache path based on a collection of source paths."""
return Path(_get_cache_path(sources))

def cache_by_source(sources: list[str]):
"""Save each source into an independent TermMatcher cache file."""
_cache_by_source(sources)

def load_by_source(sources: list[str]) -> TermMatcher:
"""Load and combine single-source cache entries into a combined TermMatcher."""
terms = _load_by_source(sources)
return TermMatcher(terms)
24 changes: 24 additions & 0 deletions pyfuzon/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,28 @@ pub fn get_cache_key(sources: Vec<String>) -> PyResult<String> {
Ok(cache::get_cache_key(&mut src_ref)?)
}

/// Save each source in a dedicated TermMatcher cache file.
#[pyfunction]
pub fn cache_by_source(sources: Vec<String>) -> PyResult<()> {
let src_ref = sources.iter().map(|s| s.as_str()).collect();
cache::cache_by_source(src_ref)?;

Ok(())
}

/// Load terms from individual TermMatcher cache files for each source.
#[pyfunction]
pub fn load_by_source(sources: Vec<String>) -> PyResult<Vec<Term>> {
let src_ref = sources.iter().map(|s| s.as_str()).collect();
let terms = cache::load_by_source(src_ref)?
.terms
.into_iter()
.map(|t| Term::new(t.uri, t.label))
.collect();

Ok(terms)
}

#[pymodule]
fn pyfuzon(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(score_terms, m)?)?;
Expand All @@ -118,6 +140,8 @@ fn pyfuzon(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(dump_terms, m)?)?;
m.add_function(wrap_pyfunction!(get_cache_key, m)?)?;
m.add_function(wrap_pyfunction!(get_cache_path, m)?)?;
m.add_function(wrap_pyfunction!(cache_by_source, m)?)?;
m.add_function(wrap_pyfunction!(load_by_source, m)?)?;
m.add_class::<Term>()?;

Ok(())
Expand Down

0 comments on commit c6547ff

Please sign in to comment.