Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs committed Dec 26, 2023
1 parent e187100 commit 3ca366e
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 14 deletions.
15 changes: 13 additions & 2 deletions client/web-playground/src/demo/settings.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
import { type ProviderSettings } from '@opencodegraph/client'

const USE_STORED_CORPUS = true

async function getProviders(): Promise<Record<string, ProviderSettings | boolean>> {
const providerSettings: Record<string, ProviderSettings | boolean> = {
'../../../../provider/hello-world/index.ts': false,
'../../../../provider/docs/src/provider/provider.ts': {
entryPage: 'http://localhost:5800/docs/start',
prefix: 'http://localhost:5800/docs',
corpus: USE_STORED_CORPUS
? {
url: new URL(
'tmp-ocg-provider-docs/sourcegraph-docs-old-web-corpus.json',
import.meta.url
).toString(),
}
: {
entryPage: 'http://localhost:5800/docs/start',
prefix: 'http://localhost:5800/docs',
},
} satisfies import('@opencodegraph/provider-docs').Settings,
'../../../../provider/links/index.ts': {
links: [
Expand Down
19 changes: 15 additions & 4 deletions client/web-playground/vite.config.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import { resolve } from 'path'
import react from '@vitejs/plugin-react'
import { defineConfig } from 'vite'
import { defineConfig, searchForWorkspaceRoot } from 'vite'

// TODO(sqs): un-hardcode
const docsProviderDataDir = resolve('/Users/sqs/tmp/ocg-provider-docs')

export default defineConfig(({ mode }) => ({
plugins: [react()],
resolve: {
alias:
mode === 'development'
alias: [
...(mode === 'development'
? [
// In dev mode, build from TypeScript sources so we don't need to run `tsc -b`
// in the background.
Expand All @@ -17,7 +20,12 @@ export default defineConfig(({ mode }) => ({
replacement: '$1/src/index',
},
]
: [],
: []),
{
find: 'tmp-ocg-provider-docs',
replacement: docsProviderDataDir,
},
],
},
define: {},
css: {
Expand All @@ -28,6 +36,9 @@ export default defineConfig(({ mode }) => ({
},
server: {
port: 5900,
fs: {
allow: [searchForWorkspaceRoot(process.cwd()), docsProviderDataDir],
},
},
build: {
emptyOutDir: false,
Expand Down
4 changes: 3 additions & 1 deletion provider/docs/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Docs context provider for OpenCodeGraph
¿# Docs context provider for OpenCodeGraph

This is a context provider for [OpenCodeGraph](https://opencodegraph.org) that adds contextual documentation to your code from an existing documentation corpus.

Expand Down Expand Up @@ -56,6 +56,8 @@ time p run -s docs-query 'making provider work in vscode' $(find ../../web/conte

TODOs:

- use indexeddb for more storage https://stackoverflow.com/questions/5663166/is-there-a-way-to-increase-the-size-of-localstorage-in-google-chrome-to-avoid-qu
- use worker for onnx https://huggingface.co/docs/transformers.js/tutorials/react#step-4-connecting-everything-together
- simplify cache interface
- deal with different content types (markdown/html) differently
- make it slurp up gdocs/confluence/markdown in repos
Expand Down
7 changes: 6 additions & 1 deletion provider/docs/src/corpus/cache/localStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@ export function createWebStorageCorpusCache(storage: Storage, keyPrefix: string)
}
},
async set(contentID, key, value) {
storage.setItem(storageKey(contentID, key), JSON.stringify(value))
const valueData = JSON.stringify(value)
try {
storage.setItem(storageKey(contentID, key), valueData)
} catch (error) {
// console.error(`failed to store data for ${contentID}:${key}`, error)
}
},
}
}
3 changes: 2 additions & 1 deletion provider/docs/src/corpus/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ export async function indexCorpus(
const indexedDocs: IndexedDoc[] = []

for (const doc of data.docs) {
const content = await cachedExtractContent(cache, contentExtractor, doc)
const USE_NOOP_CACHE = typeof window !== 'undefined' // TODO(sqs): dont cache in browser
const content = await cachedExtractContent(USE_NOOP_CACHE ? noopCache : cache, contentExtractor, doc)

const chunks = chunk(content?.content ?? doc.text, { isMarkdown: doc.text.includes('##') })

Expand Down
14 changes: 11 additions & 3 deletions provider/docs/src/corpus/search/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,21 @@ function cachedEmbedText(text: string, cache: CorpusCache): Promise<Float32Array
)
}

const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {})

/**
* Embed the text and return the vector.
*/
export async function embedText(text: string): Promise<Float32Array> {
const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {})
const out = await pipe(text, { pooling: 'mean', normalize: true })
return out.data
try {
console.time('embed')
const out = await pipe(text, { pooling: 'mean', normalize: true })
console.timeEnd('embed')
return out.data
} catch (error) {
console.log(error)
throw error
}
}

/**
Expand Down
3 changes: 3 additions & 0 deletions provider/docs/src/corpus/source/source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export function corpusDataURLSource(url: string): CorpusSource {
if (!resp.ok) {
throw new Error(`failed to fetch corpus data from ${url}: ${resp.status} ${resp.statusText}`)
}
if (!resp.headers.get('Content-Type')?.includes('json')) {
throw new Error(`corpus data from ${url} is not JSON`)
}
return resp.json()
})
)
Expand Down
8 changes: 6 additions & 2 deletions provider/docs/src/provider/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,15 @@ export default multiplex<Settings>(async settings => {

const result: AnnotationsResult = { items: [], annotations: [] }
for (const [i, sr] of searchResults.entries()) {
const MAX_RESULTS = 4
if (i >= MAX_RESULTS) break

const doc = index.doc(sr.doc)
const item: OpenCodeGraphItem = {
id: i.toString(),
title: truncate(doc.content?.title ?? doc.doc.url ?? 'Untitled', 50),
detail: sr.excerpt,
title: truncate(doc.content?.title || doc.doc.url || 'Untitled', 50),
detail: truncate(doc.content?.textContent || sr.excerpt, 100),
url: doc.doc.url,
}
result.items.push(item)
result.annotations.push({
Expand Down

0 comments on commit 3ca366e

Please sign in to comment.