Skip to content

Commit

Permalink
Merge pull request #14 from hadynz/aho-corasick
Browse files Browse the repository at this point in the history
Use Aho-Corasick algorithm for searching
  • Loading branch information
hadynz authored Feb 18, 2022
2 parents b6a2f9d + 09dd66e commit 7944372
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 61 deletions.
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"id": "obsidian-sidekick",
"name": "Sidekick",
"description": "A companion to identify hidden connections that match your tags and pages",
"version": "1.1.1",
"version": "1.2.0",
"minAppVersion": "0.13.8",
"author": "Hady Osman",
"authorUrl": "https://hady.geek.nz",
Expand Down
5 changes: 2 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "obsidian-sidekick",
"version": "1.1.1",
"version": "1.2.0",
"description": "A companion to identify hidden connections that match your tags and pages",
"main": "src/index.ts",
"repository": {
Expand Down Expand Up @@ -35,7 +35,6 @@
"@types/faker": "^5.5.8",
"@types/jest": "^26.0.22",
"@types/lodash": "^4.14.178",
"@types/lunr": "^2.3.4",
"@types/webpack": "^5.28.0",
"@typescript-eslint/eslint-plugin": "^4.22.0",
"@typescript-eslint/parser": "^4.22.0",
Expand Down Expand Up @@ -69,8 +68,8 @@
"*.{js,css,md}": "prettier --write"
},
"dependencies": {
"@tanishiking/aho-corasick": "^0.0.1",
"lodash": "^4.17.21",
"lunr": "^2.3.9",
"tippy.js": "^6.3.7"
}
}
9 changes: 0 additions & 9 deletions src/indexing/indexModels.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,36 @@
import { TFile } from 'obsidian';

import { stemmer } from '../utils/stemmer';

export interface SearchIndex {
replaceText: string;
originalText: string;
stem: string;
}

export class TagIndex implements SearchIndex {
public readonly originalText: string;
public readonly replaceText: string;
public readonly stem: string;

constructor(tag: string) {
this.originalText = tag.replace(/#/, '');
this.replaceText = tag;
this.stem = stemmer(this.originalText);
}
}

export class AliasIndex implements SearchIndex {
public readonly originalText: string;
public readonly replaceText: string;
public readonly stem: string;

constructor(file: TFile, word: string) {
this.originalText = word;
this.replaceText = `[[${file.basename}|${word}]]`;
this.stem = stemmer(this.originalText);
}
}

export class PageIndex implements SearchIndex {
public readonly originalText: string;
public readonly replaceText: string;
public readonly stem: string;

constructor(file: TFile) {
this.originalText = file.basename;
this.replaceText = `[[${file.basename}]]`;
this.stem = stemmer(this.originalText);
}
}
2 changes: 1 addition & 1 deletion src/indexing/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export class Indexer {
return this.indexAllTags(allFiles)
.concat(allFiles.map((file) => this.indexFile(file)).flat())
.reduce((acc: Index, index) => {
return { ...acc, [index.stem]: index };
return { ...acc, [index.originalText.toLowerCase()]: index };
}, {});
}

Expand Down
58 changes: 17 additions & 41 deletions src/search/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import lunr from 'lunr';
import { Trie, Emit } from '@tanishiking/aho-corasick';

import { Indexer, Index } from '../indexing/indexer';

Expand All @@ -8,58 +8,34 @@ type SearchResult = {
replaceText: string;
};

// Any arbitrary key to use for the search index
const DocumentKey = 'text';

export default class Search {
constructor(private indexer: Indexer) {}

public find(text: string): SearchResult[] {
// Redact text that we don't want to be searched
const redactedText = this.redactText(text);
const indices = this.indexer.getIndices();

const idx = lunr(function () {
this.metadataWhitelist = ['position'];
this.ref(DocumentKey);
this.field(DocumentKey);
this.add({ [DocumentKey]: redactedText });
const trie = new Trie(Object.keys(indices), {
allowOverlaps: false,
onlyWholeWords: true,
caseInsensitive: true,
});

const indices = this.indexer.getIndices();
// Redact text that we don't want to be searched
const redactedText = this.redactText(text);

const results = idx.query(function () {
Object.keys(indices).map((index) => {
this.term(index, {});
});
});
const results = trie.parseText(redactedText);

return this.toSearchResults(results, indices);
}

private toSearchResults(results: lunr.Index.Result[], indices: Index): SearchResult[] {
if (results.length === 0) {
return [];
}

// We will always ever only have one result as we only index one document
const indexHits = results[0].matchData.metadata;

return Object.keys(indexHits)
.filter((indexHit) => this.existsInIndex(indexHit, indices))
.reduce((acc: SearchResult[], indexHit) => {
const positions: number[][] = indexHits[indexHit][DocumentKey].position;

const searchResults = positions.map(
(position): SearchResult => ({
start: position[0],
end: position[0] + position[1],
replaceText: indices[indexHit].replaceText,
})
);

acc.push(...searchResults);
return acc;
}, [])
private toSearchResults(results: Emit[], indices: Index): SearchResult[] {
return results
.filter((result) => this.existsInIndex(result.keyword, indices))
.map((result) => ({
start: result.start,
end: result.end + 1,
replaceText: indices[result.keyword].replaceText,
}))
.sort((a, b) => a.start - b.start); // Must sort by start position to prepare for highlighting
}

Expand Down
6 changes: 0 additions & 6 deletions src/utils/stemmer.ts

This file was deleted.

0 comments on commit 7944372

Please sign in to comment.