feat(py): expose granular caching via pyO3 API

sdsc-ordes · Oct 22, 2024 · c6547ff · c6547ff
1 parent 143abe7
commit c6547ff
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 2 deletions.
diff --git a/pyfuzon/python/pyfuzon/cache.py b/pyfuzon/python/pyfuzon/cache.py
@@ -16,8 +16,13 @@
 
 from pathlib import Path
 
-from .pyfuzon import get_cache_key as _get_cache_key
-from .pyfuzon import get_cache_path as _get_cache_path
+from .matcher import TermMatcher
+from .pyfuzon import (
+    get_cache_key as _get_cache_key,
+    get_cache_path as _get_cache_path,
+    cache_by_source as _cache_by_source,
+    load_by_source as _load_by_source,
+)
 
 def get_cache_key(sources: list[str]) -> str:
     """Return a deterministic cache key based on a collection of source paths."""
@@ -26,3 +31,12 @@ def get_cache_key(sources: list[str]) -> str:
 def get_cache_path(sources: list[str]) -> Path:
     """Return a full platform-specific cache path based on a collection of source paths."""
     return Path(_get_cache_path(sources))
+
+def cache_by_source(sources: list[str]):
+    """Save each source into an independent TermMatcher cache file."""
+    _cache_by_source(sources)
+
+def load_by_source(sources: list[str]) -> TermMatcher:
+    """Load and combine single-source cache entries into a combined TermMatcher."""
+    terms = _load_by_source(sources)
+    return TermMatcher(terms)
diff --git a/pyfuzon/src/lib.rs b/pyfuzon/src/lib.rs
@@ -110,6 +110,28 @@ pub fn get_cache_key(sources: Vec<String>) -> PyResult<String> {
     Ok(cache::get_cache_key(&mut src_ref)?)
 }
 
+/// Save each source in a dedicated TermMatcher cache file.
+#[pyfunction]
+pub fn cache_by_source(sources: Vec<String>) -> PyResult<()> {
+    let src_ref = sources.iter().map(|s| s.as_str()).collect();
+    cache::cache_by_source(src_ref)?;
+
+    Ok(())
+}
+
+/// Load terms from individual TermMatcher cache files for each source.
+#[pyfunction]
+pub fn load_by_source(sources: Vec<String>) -> PyResult<Vec<Term>> {
+    let src_ref = sources.iter().map(|s| s.as_str()).collect();
+    let terms = cache::load_by_source(src_ref)?
+        .terms
+        .into_iter()
+        .map(|t| Term::new(t.uri, t.label))
+        .collect();
+
+    Ok(terms)
+}
+
 #[pymodule]
 fn pyfuzon(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(score_terms, m)?)?;
@@ -118,6 +140,8 @@ fn pyfuzon(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(dump_terms, m)?)?;
     m.add_function(wrap_pyfunction!(get_cache_key, m)?)?;
     m.add_function(wrap_pyfunction!(get_cache_path, m)?)?;
+    m.add_function(wrap_pyfunction!(cache_by_source, m)?)?;
+    m.add_function(wrap_pyfunction!(load_by_source, m)?)?;
     m.add_class::<Term>()?;
 
     Ok(())