-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
110 additions
and
31 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import { type webcrypto } from 'node:crypto' | ||
import { type StoredDocument } from '../corpus' | ||
|
||
/** | ||
* A unique identifier for a document's content (based on a hash of the document's text). | ||
*/ | ||
export type DocContentID = string | ||
|
||
export async function docContentID(text: string): Promise<DocContentID> { | ||
const crypto: webcrypto.Crypto = (globalThis as any).crypto || (await import('node:crypto')).default.webcrypto | ||
|
||
const encoder = new TextEncoder() | ||
const data = encoder.encode(text) | ||
|
||
// Calculate the SHA-256 hash | ||
const hashBuffer = await crypto.subtle.digest('SHA-256', data) | ||
|
||
// Convert the buffer to a hexadecimal string | ||
const hashArray = Array.from(new Uint8Array(hashBuffer)) | ||
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join('') | ||
|
||
return hashHex | ||
} | ||
|
||
export interface CachedDocument extends Omit<StoredDocument, 'docID'> {} | ||
|
||
export interface CorpusCache { | ||
get(docContentID: DocContentID): Promise<CachedDocument | null> | ||
set(docContentID: DocContentID, doc: CachedDocument): Promise<void> | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import { mkdir, readFile, writeFile } from 'fs/promises' | ||
import path from 'path' | ||
import { type CachedDocument, type CorpusCache, type DocContentID } from './cache' | ||
|
||
/** | ||
* Create a {@link CorpusCache} that stores cache data in the file system. | ||
*/ | ||
export function createFileSystemCorpusCache(basePath: string): CorpusCache { | ||
function docPath(docContentID: DocContentID): string { | ||
return path.join(basePath, `doc-${docContentID}.json`) | ||
} | ||
|
||
return { | ||
async get(docContentID) { | ||
try { | ||
const data = await readFile(docPath(docContentID), 'utf8') | ||
return JSON.parse(data) as CachedDocument | ||
} catch (error: unknown) { | ||
if (error instanceof Error && 'code' in error && error.code === 'ENOENT') { | ||
return null | ||
} | ||
throw error | ||
} | ||
}, | ||
async set(docContentID, doc) { | ||
const filePath = docPath(docContentID) | ||
await mkdir(path.basename(filePath), { recursive: true, mode: 0o700 }) | ||
await writeFile(filePath, JSON.stringify(doc, null, 2)) | ||
}, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
import { describe, expect, test } from 'vitest' | ||
import { createCorpus, DocID, Document } from './corpus' | ||
import { createCorpus, type DocID, type Document } from './corpus' | ||
|
||
export function doc(docID: DocID | number, text: string): Document { | ||
export function doc(docID: DocID, text: string): Document { | ||
return { docID, text } | ||
} | ||
|
||
describe('Corpus', () => { | ||
test('#length', () => { | ||
expect(createCorpus([doc(1, 'a'), doc(2, 'b')]).length).toEqual | ||
expect(createCorpus([doc(1, 'a'), doc(2, 'b')]).length).toBe(2) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters