Skip to content

Commit

Permalink
Merge pull request #2723 from continuedev/pe/jb-codebase-transformers
Browse files Browse the repository at this point in the history
bugfix(jb): no embeddings w/ transformers.js
  • Loading branch information
Patrick-Erichsen authored Oct 29, 2024
2 parents 425af8e + 501848b commit c487d99
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 42 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,5 @@ extensions/intellij/.idea/**
**/.idea/workspace.xml
**/.idea/usage.statistics.xml
**/.idea/shelf/

extensions/intellij/bin
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"**/core/vendor/**": true,
"**/gui/dist": true,
"**/extensions/vscode/gui/**": true,
"**/extensions/vscode/out/**": true
"**/extensions/vscode/out/**": true,
"**/extensions/intellij/build/**": true
},
"eslint.workingDirectories": ["./core"]
}
3 changes: 2 additions & 1 deletion core/context/retrieval/pipelines/BaseRetrievalPipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ export interface RetrievalPipelineOptions {
llm: ILLM;
config: ContinueConfig;
ide: IDE;

input: string;
nRetrieve: number;
nFinal: number;
tags: BranchAndDir[];
pathSep: string;
filterDirectory?: string;
includeEmbeddings?: boolean; // Used to handle JB w/o an embeddings model
}

export interface IRetrievalPipeline {
Expand All @@ -29,6 +29,7 @@ export interface IRetrievalPipeline {

export default class BaseRetrievalPipeline implements IRetrievalPipeline {
private lanceDbIndex: LanceDbIndex;

constructor(protected readonly options: RetrievalPipelineOptions) {
this.lanceDbIndex = new LanceDbIndex(
options.config.embeddingsProvider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import BaseRetrievalPipeline from "./BaseRetrievalPipeline.js";

export default class NoRerankerRetrievalPipeline extends BaseRetrievalPipeline {
async run(): Promise<Chunk[]> {
const { input, nFinal, filterDirectory } = this.options;
const { input, nFinal, filterDirectory, includeEmbeddings } = this.options;

// We give 1/4 weight to recently edited files, 1/4 to full text search,
// and the remaining 1/2 to embeddings
Expand All @@ -17,10 +17,9 @@ export default class NoRerankerRetrievalPipeline extends BaseRetrievalPipeline {

const ftsChunks = await this.retrieveFts(input, ftsNFinal);

const embeddingsChunks = await this.retrieveEmbeddings(
input,
embeddingsNFinal,
);
const embeddingsChunks = includeEmbeddings
? await this.retrieveEmbeddings(input, embeddingsNFinal)
: [];

const recentlyEditedFilesChunks =
await this.retrieveAndChunkRecentlyEditedFiles(recentlyEditedNFinal);
Expand Down
7 changes: 5 additions & 2 deletions core/context/retrieval/pipelines/RerankerRetrievalPipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ import BaseRetrievalPipeline from "./BaseRetrievalPipeline.js";

export default class RerankerRetrievalPipeline extends BaseRetrievalPipeline {
private async _retrieveInitial(): Promise<Chunk[]> {
const { input, nRetrieve, filterDirectory } = this.options;
const { input, nRetrieve, filterDirectory, includeEmbeddings } =
this.options;

let retrievalResults: Chunk[] = [];

const ftsChunks = await this.retrieveFts(input, nRetrieve);
const embeddingsChunks = await this.retrieveEmbeddings(input, nRetrieve);
const embeddingsChunks = includeEmbeddings
? await this.retrieveEmbeddings(input, nRetrieve)
: [];
const recentlyEditedFilesChunks =
await this.retrieveAndChunkRecentlyEditedFiles(nRetrieve);

Expand Down
17 changes: 11 additions & 6 deletions core/context/retrieval/retrieval.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import path from "path";

import { BranchAndDir, ContextItem, ContextProviderExtras } from "../../";
import TransformersJsEmbeddingsProvider from "../../indexing/embeddings/TransformersJsEmbeddingsProvider";
import { resolveRelativePathInWorkspace } from "../../util/ideUtils";
import { INSTRUCTIONS_BASE_ITEM } from "../providers/utils";

import { RetrievalPipelineOptions } from "./pipelines/BaseRetrievalPipeline";
import NoRerankerRetrievalPipeline from "./pipelines/NoRerankerRetrievalPipeline";
import RerankerRetrievalPipeline from "./pipelines/RerankerRetrievalPipeline";
Expand All @@ -21,15 +23,17 @@ export async function retrieveContextItemsFromEmbeddings(
// transformers.js not supported in JetBrains IDEs right now

const isJetBrainsAndTransformersJs =
extras.embeddingsProvider.id === TransformersJsEmbeddingsProvider.model &&
extras.embeddingsProvider.providerName ===
TransformersJsEmbeddingsProvider.providerName &&
(await extras.ide.getIdeInfo()).ideType === "jetbrains";

if (isJetBrainsAndTransformersJs) {
throw new Error(
"The 'transformers.js' context provider is not currently supported in JetBrains. " +
"For now, you can use Ollama to set up local embeddings, or use our 'free-trial' " +
"embeddings provider. See here to learn more: " +
"https://docs.continue.dev/walkthroughs/codebase-embeddings#embeddings-providers",
void extras.ide.showToast(
"warning",
"Codebase retrieval is limited when `embeddingsProvider` is empty or set to `transformers.js` in JetBrains. " +
"You can use Ollama to set up local embeddings, use our 'free-trial', " +
"or configure your own. See here to learn more: " +
"https://docs.continue.dev/customize/model-types/embeddings",
);
}

Expand Down Expand Up @@ -85,6 +89,7 @@ export async function retrieveContextItemsFromEmbeddings(
input: extras.fullInput,
llm: extras.llm,
config: extras.config,
includeEmbeddings: !isJetBrainsAndTransformersJs,
};

const pipeline = new pipelineType(pipelineOptions);
Expand Down
6 changes: 5 additions & 1 deletion docs/docs/customize/deep-dives/codebase.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ keywords: [talk, embeddings, reranker, codebase, experimental]

# @Codebase

Continue indexes your codebase so that it can later automatically pull in the most relevant context from throughout your workspace. This is done via a combination of embeddings-based retrieval and keyword search. By default, all embeddings are calculated locally with `all-MiniLM-L6-v2` and stored locally in `~/.continue/index`.
Continue indexes your codebase so that it can later automatically pull in the most relevant context from throughout your workspace. This is done via a combination of embeddings-based retrieval and keyword search. By default, all embeddings are calculated locally using `transformers.js` and stored locally in `~/.continue/index`.

:::info[`transformers.js cannot be used in JetBrains`]
Currently, `transformers.js` cannot be used in JetBrains IDEs. However, you can select a differet embeddings model from [the list here](../model-types/embeddings.md).
:::

Currently, the codebase retrieval feature is available as the "codebase" and "folder" context providers. You can use them by typing `@Codebase` or `@Folder` in the input box, and then asking a question. The contents of the input box will be compared with the embeddings from the rest of the codebase (or folder) to determine relevant files.

Expand Down
8 changes: 6 additions & 2 deletions docs/docs/customize/model-types/embeddings.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ An "embeddings model" is trained to convert a piece of text into a vector, which

In Continue, embeddings are generated during indexing and then used by [@Codebase](../deep-dives/codebase.md) to perform similarity search over your codebase.

:::info[Default model (VS Code only)]
`transformers.js` is used as a default embeddings model in VS Code. In JetBrains, there currently is no default.
:::

## Recommended embedding models

If you have the ability to use any model, we recommend `voyage-code-2`, which is listed below along with the rest of the options for embeddings models.
Expand All @@ -35,7 +39,7 @@ See [here](../model-providers/top-level/ollama.md#embeddings-model) for instruct

### Transformers.js (currently VS Code only)

[Transformers.js](https://huggingface.co/docs/transformers.js/index) is a JavaScript port of the popular [Transformers](https://huggingface.co/transformers/) library. It allows embeddings to be calculated entirely locally. The model used is `all-MiniLM-L6-v2`, which is shipped alongside the Continue extension and used as the default when you have not explicitly configured an embeddings provider.
[Transformers.js](https://huggingface.co/docs/transformers.js/index) is a JavaScript port of the popular [Transformers](https://huggingface.co/transformers/) library. It allows embeddings to be calculated entirely locally. The model used is `all-MiniLM-L6-v2`, which is shipped alongside the Continue extension.

```json title="config.json"
{
Expand Down Expand Up @@ -72,4 +76,4 @@ See [here](../model-providers/top-level/gemini.md#embeddings-model) for instruct

### Vertex

See [here](../model-providers/more/vertex.md#embeddings-model) for instructions on how to use Vertex for embeddings.
See [here](../model-providers/more/vertex.md#embeddings-model) for instructions on how to use Vertex for embeddings.
4 changes: 2 additions & 2 deletions extensions/intellij/src/main/resources/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2425,8 +2425,8 @@
},
"embeddingsProvider": {
"title": "Embeddings Provider",
"markdownDescription": "The method that will be used to generate codebase embeddings. The default is transformers.js, which will run locally in the browser. Learn about the other options [here](https://docs.continue.dev/features/codebase-embeddings#embeddings-providers).",
"x-intellij-html-description": "The method that will be used to generate codebase embeddings. The default is transformers.js, which will run locally in the browser. Learn about the other options <a href='https://docs.continue.dev/features/codebase-embeddings#embeddings-providers'>here</a>.",
"markdownDescription": "The method that will be used to generate codebase embeddings. The default is `transformers.js`, which will run locally in the browser. Learn about the other options [here](https://docs.continue.dev/features/codebase-embeddings#embeddings-providers).\n\n **Note**: `transformers.js` currently cannot be used in JetBrains.",
"x-intellij-html-description": "The method that will be used to generate codebase embeddings. The default is <code>transformers.js</code>, which will run locally in the browser. Learn about the other options <a href='https://docs.continue.dev/features/codebase-embeddings#embeddings-providers'>here</a>.<br><br><strong>Note</strong>: <code>transformers.js</code> currently cannot be used in JetBrains.",
"type": "object",
"properties": {
"provider": {
Expand Down
4 changes: 2 additions & 2 deletions extensions/vscode/config_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2425,8 +2425,8 @@
},
"embeddingsProvider": {
"title": "Embeddings Provider",
"markdownDescription": "The method that will be used to generate codebase embeddings. The default is transformers.js, which will run locally in the browser. Learn about the other options [here](https://docs.continue.dev/features/codebase-embeddings#embeddings-providers).",
"x-intellij-html-description": "The method that will be used to generate codebase embeddings. The default is transformers.js, which will run locally in the browser. Learn about the other options <a href='https://docs.continue.dev/features/codebase-embeddings#embeddings-providers'>here</a>.",
"markdownDescription": "The method that will be used to generate codebase embeddings. The default is `transformers.js`, which will run locally in the browser. Learn about the other options [here](https://docs.continue.dev/features/codebase-embeddings#embeddings-providers).",
"x-intellij-html-description": "The method that will be used to generate codebase embeddings. The default is <code>transformers.js</code>, which will run locally in the browser. Learn about the other options <a href='https://docs.continue.dev/features/codebase-embeddings#embeddings-providers'>here</a>.<br><br><strong>Note</strong>: <code>transformers.js</code> currently cannot be used in JetBrains.",
"type": "object",
"properties": {
"provider": {
Expand Down
Loading

0 comments on commit c487d99

Please sign in to comment.