Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs committed Dec 24, 2023
1 parent 18d0a46 commit af95491
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 66 deletions.
51 changes: 0 additions & 51 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions provider/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
},
"dependencies": {
"@opencodegraph/provider": "workspace:*",
"@xenova/transformers": "^2.12.1",
"onnxruntime-node": "^1.16.3",
"onnxruntime-web": "^1.16.3"
"@xenova/transformers": "^2.12.1"
}
}
11 changes: 9 additions & 2 deletions provider/docs/src/corpus/search.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, expect, test } from 'vitest'
import { createCorpus } from './corpus'
import { doc } from './corpus.test'
import { CorpusSearchResult, embedText } from './search'
import { CorpusSearchResult, embedText, similarity } from './search'

describe('Corpus#search', () => {
test('finds matches', async () => {
Expand All @@ -14,6 +14,13 @@ describe('Corpus#search', () => {
describe('embedText', () => {
test('embeds', async () => {
const s = await embedText('hello world')
expect(s).toBe('123')
expect(s).toBeInstanceOf(Float32Array)
})
})

describe('similarity', () => {
test('works', async () => {
expect(await similarity('what is the current time', 'what time is it')).toBeCloseTo(0.7217, 4)
expect(await similarity('hello world', 'seafood')).toBeCloseTo(0.2025, 4)
})
})
21 changes: 11 additions & 10 deletions provider/docs/src/corpus/search.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import { InferenceSession, Tensor } from 'onnxruntime-node/lib/index'
import { cos_sim, pipeline } from '@xenova/transformers'

export interface CorpusSearchResult {
docID: string
score: number
excerpt: string
}

export async function embedText(text: string): Promise<string> {
const session = await InferenceSession.create(
'/Users/sqs/src/github.com/chroma-core/onnx-embedding/onnx/model.onnx',
{}
)
const inputTensor = new Tensor('float32', Float32Array.from(text.split(' ').map(x => x.charCodeAt(0))))
const outputMap = await session.run({ input: inputTensor })
const outputTensor = outputMap['output']
return outputTensor.data.buffer.slice(0, outputTensor.data.byteLength).toString()
export async function embedText(text: string): Promise<Float32Array> {
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2')
const out = await pipe(text, { pooling: 'mean', normalize: true })
return out.data
}

export async function similarity(text1: string, text2: string): Promise<number> {
const emb1 = await embedText(text1)
const emb2 = await embedText(text2)
return cos_sim(emb1, emb2)
}

0 comments on commit af95491

Please sign in to comment.