Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs committed Dec 26, 2023
1 parent 4267153 commit d573fba
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 27 deletions.
6 changes: 4 additions & 2 deletions client/web-playground/src/demo/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ import { type ProviderSettings } from '@opencodegraph/client'
async function getProviders(): Promise<Record<string, ProviderSettings | boolean>> {
const providerSettings: Record<string, ProviderSettings | boolean> = {
'../../../../provider/hello-world/index.ts': false,
'../../../../provider/docs/src/provider/provider.ts':
{} satisfies import('@opencodegraph/provider-docs').Settings,
'../../../../provider/docs/src/provider/provider.ts': {
entryPage: 'http://localhost:5800/docs/start',
prefix: 'http://localhost:5800/docs',
} satisfies import('@opencodegraph/provider-docs').Settings,
'../../../../provider/links/index.ts': {
links: [
{
Expand Down
1 change: 1 addition & 0 deletions client/web-playground/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export default defineConfig(({ mode }) => ({
]
: [],
},
define: {},
css: {
devSourcemap: true,
modules: {
Expand Down
1 change: 1 addition & 0 deletions lib/client/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export function observeAnnotations<R extends OpenCodeGraphRange>(
emitPartial ? startWith(null) : tap(),
catchError(error => {
logger?.(`failed to get annotations: ${error}`)
console.error(error)
return of(null)
})
)
Expand Down
10 changes: 10 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion provider/docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ time p run -s docs-query 'making provider work in vscode' $(find ../../web/conte

TODOs:

- make it slurp up a base URL of docs
- deal with different content types (markdown/html) differently
- make it slurp up gdocs/confluence/markdown in repos
- show OCG annotations (but in a way that doesn't overlay lines in the file, is more passive?)
- show a demo of Cody working with this
1 change: 1 addition & 0 deletions provider/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"@mozilla/readability": "^0.5.0",
"@opencodegraph/provider": "workspace:*",
"@xenova/transformers": "^2.12.1",
"buffer": "^6.0.3",
"env-paths": "^3.0.0",
"jsdom": "^23.0.1",
"onnxruntime-web": "*"
Expand Down
4 changes: 2 additions & 2 deletions provider/docs/src/corpus/doc/contentExtractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import { describe, expect, test } from 'vitest'
import { Content, extractContentUsingMozillaReadability } from './contentExtractor'

describe('extractContentUsingMozillaReadability', () => {
test('extracts content', () =>
test('extracts content', async () =>
expect(
extractContentUsingMozillaReadability.extractContent({
await extractContentUsingMozillaReadability.extractContent({
id: 1,
text: '<html><head><title>Bar - MySite</title></head><body><aside><nav><h1><a href="/">MySite</a></h1> <a href="/foo">foo</a></nav></aside><main><h1>Bar</h1>\n<p>Baz</p></main></body>',
})
Expand Down
27 changes: 23 additions & 4 deletions provider/docs/src/corpus/doc/contentExtractor.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { Readability } from '@mozilla/readability'
import { JSDOM } from 'jsdom'
import { type Doc } from './doc'

export interface Content {
Expand All @@ -23,13 +22,33 @@ export interface Content {

export interface ContentExtractor {
id: string
extractContent(doc: Doc): Content | null
extractContent(doc: Doc): Promise<Content | null>
}

export const extractContentUsingMozillaReadability: ContentExtractor = {
id: 'mozillaReadability',
extractContent(doc) {
const info = new Readability(new JSDOM(doc.text, { url: doc.url }).window.document, {
async extractContent(doc) {
type ParseDOM = (html: string, url: string | undefined) => Promise<Document>
const parseDOM: ParseDOM =
typeof DOMParser === 'undefined'
? async (html, url) => {
const { JSDOM } = await import('jsdom')
return new JSDOM(html, { url }).window.document
}
: (html, url) => {
const document = new DOMParser().parseFromString(html, 'text/html')

// Set base URL.
if (url && document.head.querySelectorAll('base').length === 0) {
const baseEl = document.createElement('base')
baseEl.setAttribute('href', url)
document.head.append(baseEl)
}

return Promise.resolve(document)
}

const info = new Readability(await parseDOM(doc.text, doc.url), {
charThreshold: 500,
}).parse()
return info
Expand Down
4 changes: 1 addition & 3 deletions provider/docs/src/corpus/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,5 @@ function cachedExtractContent(
if (!extractor) {
return Promise.resolve(null)
}
return memo(cache, `${doc.url}:${doc.text}`, `extractContent:${extractor.id}`, () =>
Promise.resolve(extractor.extractContent(doc))
)
return memo(cache, `${doc.url}:${doc.text}`, `extractContent:${extractor.id}`, () => extractor.extractContent(doc))
}
26 changes: 23 additions & 3 deletions provider/docs/src/provider/multiplex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,30 @@ import { OpenCodeGraphProvider } from '@opencodegraph/provider'
* @template S The settings type.
*/
export function multiplex<S extends {}>(
createProvider: (settings: S) => OpenCodeGraphProvider<S>
createProvider: (settings: S) => Promise<OpenCodeGraphProvider<S>>
): OpenCodeGraphProvider<S> {
const providerCache = new Map<string, Promise<OpenCodeGraphProvider<S>>>()

function getProvider(settings: S): Promise<OpenCodeGraphProvider<S>> {
const key = JSON.stringify(settings)
let provider = providerCache.get(key)
if (!provider) {
provider = createProvider(settings)
providerCache.set(key, provider)

// Prevent accidental memory leaks in case `settings` keeps changing.
//
// TODO(sqs): use an LRU cache or something
const MAX_SIZE = 10
if (providerCache.size > MAX_SIZE) {
throw new Error(`provider cache is too big (max size ${MAX_SIZE})`)
}
}
return provider
}

return {
capabilities: (params, settings) => createProvider(settings).capabilities(params, settings),
annotations: (params, settings) => createProvider(settings).annotations(params, settings),
capabilities: (params, settings) => getProvider(settings).then(p => p.capabilities(params, settings)),
annotations: (params, settings) => getProvider(settings).then(p => p.annotations(params, settings)),
}
}
33 changes: 21 additions & 12 deletions provider/docs/src/provider/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,18 @@ import {
type CapabilitiesParams,
type CapabilitiesResult,
} from '@opencodegraph/provider'
import { createCorpus } from '../corpus'
import { indexCorpus } from '../corpus'
import { createWebStorageCorpusCache } from '../corpus/cache/localStorage'
import { corpusData } from '../corpus/data'
import { extractContentUsingMozillaReadability } from '../corpus/doc/contentExtractor'
import { createWebCorpusSource } from '../corpus/source/web/webCorpusSource'
import { multiplex } from './multiplex'

/** Settings for the docs OpenCodeGraph provider. */
export interface Settings {}
export interface Settings {
entryPage: string
prefix: string
}

const CORPUS_CACHE =
typeof localStorage !== 'undefined' ? createWebStorageCorpusCache(localStorage, 'ocg-provider-docs') : undefined
Expand All @@ -19,24 +25,27 @@ const CORPUS_CACHE =
* An [OpenCodeGraph](https://opencodegraph.org) provider that adds contextual documentation to your
* code from an existing documentation corpus.
*/
export default multiplex<Settings>(settings => {
const corpus = createCorpus(
[
{ id: 1, text: 'Signinpage is cool allowSignup authProviders' },
{ id: 2, text: 'Bazel build here is how to do it' },
],
{
cache: CORPUS_CACHE,
}
export default multiplex<Settings>(async settings => {
const data = corpusData(
await createWebCorpusSource({
entryPage: new URL(settings.entryPage),
prefix: new URL(settings.prefix),
logger: message => console.log(message),
}).documents()
)
const index = await indexCorpus(data, {
cache: CORPUS_CACHE,
contentExtractor: extractContentUsingMozillaReadability,
})

return {
capabilities(_params: CapabilitiesParams, settings: Settings): CapabilitiesResult {
return {}
},

async annotations(params: AnnotationsParams, settings: Settings): Promise<AnnotationsResult> {
console.time('search')
const searchResults = await corpus.search(params.content)
const searchResults = await index.search(params.content)
console.timeEnd('search')

const result: AnnotationsResult = { items: [], annotations: [] }
Expand Down

0 comments on commit d573fba

Please sign in to comment.