Do not take OCR languages into account when reading the cache

This may fix #37
scambier · Jan 23, 2024 · 15feb4a · 15feb4a
1 parent 7baa897
commit 15feb4a
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 18 deletions.
diff --git a/lib/src/cache.ts b/lib/src/cache.ts
@@ -50,20 +50,13 @@ export function getCachePath(file: TFile): {
  * @param optLangs
  * @returns
  */
-export async function readCache(
-  file: TFile,
-  optLangs = ''
-): Promise<ExtractedText | null> {
+export async function readCache(file: TFile): Promise<ExtractedText | null> {
   const cachePath = getCachePath(file)
 
   // Get the text from the cache if it exists
   if (await app.vault.adapter.exists(cachePath.fullpath)) {
     const raw = await app.vault.adapter.read(cachePath.fullpath)
-    const cache = JSON.parse(raw) as ExtractedText
-    // Check that the languages list has not changed since the cache was created
-    if (cache.langs === optLangs) {
-      return cache
-    }
+    return JSON.parse(raw) as ExtractedText
   }
   return null
 }

diff --git a/lib/src/ocr/ocr-manager.ts b/lib/src/ocr/ocr-manager.ts
@@ -14,10 +14,10 @@ import type { ocrLangs } from './ocr-langs'
  * Concatenates an array of langs to a single string to be passed to Tesseract
  * e.g. ['fra', 'eng'] => 'eng+fra'
  * The langs are sorted alphabetically because it's also used a cache key
- * @param langs 
- * @returns 
+ * @param langs
+ * @returns
  */
-function concatLangs (langs: Array<typeof ocrLangs[number]>): string {
+function concatLangs(langs: Array<(typeof ocrLangs)[number]>): string {
   return langs.sort().join('+')
 }
 
@@ -102,7 +102,11 @@ class OCRManager {
    */
   public async getImageText(file: TFile, options: OcrOptions): Promise<string> {
     try {
-      return await imagesProcessQueue.add(() => this.#getImageText(file, options)) ?? ''
+      return (
+        (await imagesProcessQueue.add(() =>
+          this.#getImageText(file, options)
+        )) ?? ''
+      )
     } catch (e) {
       console.warn(
         `Text Extractor - Error while extracting text from ${file.basename}`
@@ -113,9 +117,8 @@ class OCRManager {
   }
 
   async #getImageText(file: TFile, options: OcrOptions): Promise<string> {
-    const langs = concatLangs(options.langs)
     // Get the text from the cache if it exists
-    const cache = await readCache(file, langs)
+    const cache = await readCache(file)
     if (cache) {
       return cache.text ?? FAILED_TO_EXTRACT
     }
@@ -128,6 +131,7 @@ class OCRManager {
     const cachePath = getCachePath(file)
     const data = new Uint8ClampedArray(await app.vault.readBinary(file))
     const worker = OCRWorker.getWorker()
+    const langs = concatLangs(options.langs)
 
     return new Promise(async (resolve, reject) => {
       try {
@@ -144,12 +148,24 @@ class OCRManager {
           .trim()
 
         // Add it to the cache
-        await writeCache(cachePath.folder, cachePath.filename, text, file.path, langs)
+        await writeCache(
+          cachePath.folder,
+          cachePath.filename,
+          text,
+          file.path,
+          langs
+        )
         resolve(text)
       } catch (e) {
         // In case of error (unreadable PDF or timeout) just add
         // an empty string to the cache
-        await writeCache(cachePath.folder, cachePath.filename, '', file.path, langs)
+        await writeCache(
+          cachePath.folder,
+          cachePath.filename,
+          '',
+          file.path,
+          langs
+        )
         resolve('')
       }
     })

diff --git a/plugin/src/settings.ts b/plugin/src/settings.ts
@@ -43,7 +43,7 @@ export class TextExtractorSettingsTab extends PluginSettingTab {
     info.createDiv({
       cls: 'setting-item-description',
       text: `A list of languages to use for OCR. e.g. if your vault contains documents in English and French, you'd want to add 'eng' and 'fra' here.
-        This setting only applies to images, not PDFs.`,
+        This setting only applies to images, not PDFs. You may have to clear the cache after changing this setting.`,
     })
 
     new LangSelector({