Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs committed Dec 31, 2023
1 parent 275d274 commit 7d09628
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
7 changes: 4 additions & 3 deletions provider/docs/src/corpus/search/embeddings.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import { describe, expect, test } from 'vitest'
import { indexCorpus, type CorpusSearchResult } from '..'
import { noopCache } from '../cache/cache'
import { corpusData } from '../data'
import { doc } from '../index.test'
import { embeddingsSearch, embedTextInThisScope, similarity } from './embeddings'

describe('embeddingsSearch', () => {
test('finds matches', async () => {
expect(await embeddingsSearch(await indexCorpus(corpusData([doc(1, 'a'), doc(2, 'b')])), 'b')).toEqual<
CorpusSearchResult[]
>([{ doc: 2, chunk: 0, score: 1, excerpt: 'b' }])
expect(
await embeddingsSearch(await indexCorpus(corpusData([doc(1, 'a'), doc(2, 'b')])), 'b', { cache: noopCache })
).toEqual<CorpusSearchResult[]>([{ doc: 2, chunk: 0, score: 1, excerpt: 'b' }])
})
})

Expand Down
9 changes: 6 additions & 3 deletions provider/docs/src/corpus/search/keyword.test.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import { describe, expect, test } from 'vitest'
import { indexCorpus, type CorpusSearchResult } from '..'
import { noopCache } from '../cache/cache'
import { corpusData } from '../data'
import { doc } from '../index.test'
import { keywordSearch } from './keyword'
import { calculateTFIDF } from './tfidf'

describe('keywordSearch', () => {
test('finds matches', async () => {
expect(keywordSearch(await indexCorpus(corpusData([doc(1, 'aaa'), doc(2, 'bbb')])), 'bbb')).toEqual<
CorpusSearchResult[]
>([
expect(
await keywordSearch(await indexCorpus(corpusData([doc(1, 'aaa'), doc(2, 'bbb')])), 'bbb', {
cache: noopCache,
})
).toEqual<CorpusSearchResult[]>([
{
doc: 2,
chunk: 0,
Expand Down
12 changes: 6 additions & 6 deletions provider/docs/src/corpus/search/tfidf.test.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
import { describe, expect, test } from 'vitest'
import { indexCorpus } from '..'
import { corpusData } from '../data'
import { calculateTFIDF, createTFIDFIndex } from './tfidf'
import { calculateTFIDF, computeTFIDF, createTFIDFIndex } from './tfidf'

describe('createIndexForTFIDF', async () => {
describe('createTFIDFIndex', async () => {
const data = corpusData([
{ id: 1, text: 'a b c c c' },
{ id: 2, text: 'b c d' },
{ id: 3, text: 'c d e' },
])
const docIDs = data.docs.map(({ id }) => id)
const index = await indexCorpus(data)
const tfidf = createTFIDFIndex(index.docs)
const tfidfIndex = createTFIDFIndex(index.docs)

test('term in 1 doc', () => {
expect(docIDs.map(docID => tfidf('a', docID, 0))).toEqual([
expect(docIDs.map(docID => computeTFIDF('a', docID, 0, tfidfIndex))).toEqual([
calculateTFIDF({ termOccurrencesInChunk: 1, chunkTermLength: 5, totalChunks: 3, termChunkFrequency: 1 }),
0,
0,
])
})

test('term in all docs', () => {
expect(docIDs.map(docID => tfidf('c', docID, 0))).toEqual([
expect(docIDs.map(docID => computeTFIDF('c', docID, 0, tfidfIndex))).toEqual([
calculateTFIDF({ termOccurrencesInChunk: 3, chunkTermLength: 5, totalChunks: 3, termChunkFrequency: 3 }),
calculateTFIDF({ termOccurrencesInChunk: 1, chunkTermLength: 3, totalChunks: 3, termChunkFrequency: 3 }),
calculateTFIDF({ termOccurrencesInChunk: 1, chunkTermLength: 3, totalChunks: 3, termChunkFrequency: 3 }),
])
})

test('unknown term', () => {
expect(docIDs.map(docID => tfidf('x', docID, 0))).toEqual([0, 0, 0])
expect(docIDs.map(docID => computeTFIDF('x', docID, 0, tfidfIndex))).toEqual([0, 0, 0])
})
})

0 comments on commit 7d09628

Please sign in to comment.