Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autocomplete context improvement for codestral and qwen coder #3927

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 101 additions & 31 deletions core/autocomplete/templating/AutocompleteTemplate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import {
AutocompleteSnippetType,
} from "../snippets/types.js";

export interface AutocompleteCompletionOptions {
promptOnly?: boolean;
}

export interface AutocompleteTemplate {
compilePrefixSuffix?: (
prefix: string,
Expand All @@ -21,17 +25,17 @@ export interface AutocompleteTemplate {
workspaceUris: string[],
) => [string, string];
template:
| string
| ((
prefix: string,
suffix: string,
filepath: string,
reponame: string,
language: string,
snippets: AutocompleteSnippet[],
workspaceUris: string[],
) => string);
completionOptions?: Partial<CompletionOptions>;
| string
| ((
prefix: string,
suffix: string,
filepath: string,
reponame: string,
language: string,
snippets: AutocompleteSnippet[],
workspaceUris: string[],
) => string);
completionOptions?: Partial<CompletionOptions> & Partial<AutocompleteCompletionOptions>;
}

// https://huggingface.co/stabilityai/stable-code-3b
Expand All @@ -52,8 +56,47 @@ const stableCodeFimTemplate: AutocompleteTemplate = {

// https://github.com/QwenLM/Qwen2.5-Coder?tab=readme-ov-file#3-file-level-code-completion-fill-in-the-middle
const qwenCoderFimTemplate: AutocompleteTemplate = {
template:
"<|fim_prefix|>{{{prefix}}}<|fim_suffix|>{{{suffix}}}<|fim_middle|>",
compilePrefixSuffix: (
prefix: string,
suffix: string,
filepath: string,
reponame: string,
snippets: AutocompleteSnippet[],
workspaceUris: string[]
): [string, string] => {
// Helper function to get file name from snippet
function getFileName(snippet: { uri: string; uniquePath: string }) {
return snippet.uri.startsWith("file://") ? snippet.uniquePath : snippet.uri;
}

// Start building the prompt with repo name
let prompt = `<|repo_name|>${reponame}`;

const relativePaths = getShortestUniqueRelativeUriPaths(
[
...snippets.map((snippet) =>
"filepath" in snippet ? snippet.filepath : "file:///Untitled.txt"
),
filepath,
],
workspaceUris
);

// Add each snippet with its file path
snippets.forEach((snippet, i) => {
const content = snippet.type === AutocompleteSnippetType.Diff
? snippet.content
: snippet.content;
prompt += `\n<|file_sep|>${getFileName(relativePaths[i])}\n${content}`;
});

// Add the current file's prefix and suffix
prompt += `<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>`;

// Empty suffix will make the prefix be used as a single prompt
return [prompt, ""];
},
template: "{{{prefix}}}", // output of compilePrefixSuffix already compiles everything into a single prompt
completionOptions: {
stop: [
"<|endoftext|>",
Expand All @@ -66,6 +109,7 @@ const qwenCoderFimTemplate: AutocompleteTemplate = {
"<|im_start|>",
"<|im_end|>",
],
promptOnly: true // with ollama provider this makes sure a single prompt is sent (with suffix as part of the prompt, not as a separate parameter)
},
};

Expand All @@ -85,16 +129,23 @@ const codestralMultifileFimTemplate: AutocompleteTemplate = {
snippets,
workspaceUris,
): [string, string] => {

function getFileName(snippet: { uri: string, uniquePath: string }) {
return snippet.uri.startsWith("file://") ? snippet.uniquePath : snippet.uri
}

if (snippets.length === 0) {
if (suffix.trim().length === 0 && prefix.trim().length === 0) {
return [
`+++++ ${getLastNUriRelativePathParts(workspaceUris, filepath, 2)}\n${prefix}`,
suffix,
];
return [`+++++ ${getLastNUriRelativePathParts(workspaceUris, filepath, 2)}\n\n[PREFIX]\n${prefix}`, suffix];
}
return [prefix, suffix];
}

//snippets = snippets.filter((snippet) => "filepath" in snippet);

// reverse the snippets so that the most recent snippet is last
snippets = [...snippets].reverse();

const relativePaths = getShortestUniqueRelativeUriPaths(
[
...snippets.map((snippet) =>
Expand All @@ -111,22 +162,42 @@ const codestralMultifileFimTemplate: AutocompleteTemplate = {
return snippet.content;
}

return `+++++ ${relativePaths[i].uri} \n${snippet.content}`;
return `+++++ ${getFileName(relativePaths[i])} \n${snippet.content}`;
})
.join("\n\n");

return [
`${otherFiles}\n\n+++++ ${
relativePaths[relativePaths.length - 1].uri
}\n${prefix}`,
suffix,
`${otherFiles}\n\n+++++ ${getFileName(relativePaths[relativePaths.length - 1])}\n[PREFIX]${prefix}`,
`${suffix}`,
];
},
template: (prefix: string, suffix: string): string => {
return `[SUFFIX]${suffix}[PREFIX]${prefix}`;
/*
This template is ignored with codestral provider, however theoretically it's possible that a provider
not supporting fim endpoint would have a model name matched with this template,
or the codestral implementation will be changed, so we provide a usable implementation.
*/

const prefixMarkerIndex = prefix.lastIndexOf('[PREFIX]');

if (prefixMarkerIndex === -1) {
return suffix ? `[SUFFIX]${suffix}[PREFIX]${prefix}` : `[PREFIX]${prefix}`;
}

if (!suffix) {
// [PREFIX] already in the prompt, but suffix is an empty string
return prefix;
}

// Insert [SUFFIX]${suffix} just before [PREFIX]
return (
prefix.substring(0, prefixMarkerIndex) +
'[SUFFIX]' + suffix +
prefix.substring(prefixMarkerIndex)
);
},
completionOptions: {
stop: ["[PREFIX]", "[SUFFIX]"],
stop: ["[PREFIX]", "[SUFFIX]", "\n+++++ "],
},
};

Expand Down Expand Up @@ -160,10 +231,10 @@ const starcoder2FimTemplate: AutocompleteTemplate = {
snippets.length === 0
? ""
: `<file_sep>${snippets
.map((snippet) => {
return snippet.content;
})
.join("<file_sep>")}<file_sep>`;
.map((snippet) => {
return snippet.content;
})
.join("<file_sep>")}<file_sep>`;

const prompt = `${otherFiles}<fim_prefix>${prefix}<fim_suffix>${suffix}<fim_middle>`;
return prompt;
Expand Down Expand Up @@ -218,9 +289,8 @@ const codegeexFimTemplate: AutocompleteTemplate = {
[...snippets.map((snippet) => snippet.filepath), filepath],
workspaceUris,
);
const baseTemplate = `###PATH:${
relativePaths[relativePaths.length - 1]
}\n###LANGUAGE:${language}\n###MODE:BLOCK\n<|code_suffix|>${suffix}<|code_prefix|>${prefix}<|code_middle|>`;
const baseTemplate = `###PATH:${relativePaths[relativePaths.length - 1]
}\n###LANGUAGE:${language}\n###MODE:BLOCK\n<|code_suffix|>${suffix}<|code_prefix|>${prefix}<|code_middle|>`;
if (snippets.length === 0) {
return `<|user|>\n${baseTemplate}<|assistant|>\n`;
}
Expand Down
104 changes: 90 additions & 14 deletions core/autocomplete/templating/filtering.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,100 @@ export const getSnippets = (
helper: HelperVars,
payload: SnippetPayload,
): AutocompleteSnippet[] => {
const snippets = [
...payload.diffSnippets,
...payload.clipboardSnippets,
...payload.recentlyVisitedRangesSnippets,
...shuffleArray(
filterSnippetsAlreadyInCaretWindow(
[...payload.rootPathSnippets, ...payload.importDefinitionSnippets],
helper.prunedCaretWindow,
),
),
];

const finalSnippets = [];
const snippets = {
"clipboard": payload.clipboardSnippets,
"recentlyVisitedRanges": payload.recentlyVisitedRangesSnippets,
"recentlyEditedRanges": payload.recentlyEditedRangeSnippets,
"diff": payload.diffSnippets,
"base": shuffleArray(filterSnippetsAlreadyInCaretWindow(
[...payload.rootPathSnippets, ...payload.importDefinitionSnippets],
helper.prunedCaretWindow,
)),
}

// Define snippets with their priorities
const snippetConfigs: {
key: keyof typeof snippets;
enabledOrPriority: boolean | number;
defaultPriority: number;
snippets: AutocompleteSnippet[];
}[] = [
{
key: "clipboard",
enabledOrPriority: helper.options.experimental_includeClipboard,
defaultPriority: 1,
snippets: payload.clipboardSnippets,
},
{
key: "recentlyVisitedRanges",
enabledOrPriority: helper.options.experimental_includeRecentlyVisitedRanges,
defaultPriority: 2,
snippets: payload.recentlyVisitedRangesSnippets,
/* TODO: recentlyVisitedRanges also contain contents from other windows like terminal or output
if they are visible. We should handle them separately so that we can control their priority
and whether they should be included or not. */
},
{
key: "recentlyEditedRanges",
enabledOrPriority: helper.options.experimental_includeRecentlyEditedRanges,
defaultPriority: 3,
snippets: payload.recentlyEditedRangeSnippets,
},
{
key: "diff",
enabledOrPriority: helper.options.experimental_includeDiff,
defaultPriority: 4,
snippets: payload.diffSnippets,
// TODO: diff is commonly too large, thus anything lower in priority is not included.
},
{
key: "base",
enabledOrPriority: true,
defaultPriority: 99, // make sure it's the last one to be processed, but still possible to override
snippets: shuffleArray(filterSnippetsAlreadyInCaretWindow(
[...payload.rootPathSnippets, ...payload.importDefinitionSnippets],
helper.prunedCaretWindow,
)),
// TODO: Add this too to experimental config, maybe move upper in the order, since it's almost
// always not inlucded due to diff being commonly large
},
];

// Create a readable order of enabled snippets
const snippetOrder = snippetConfigs
.filter(({ enabledOrPriority }) => enabledOrPriority)
.map(({ key, enabledOrPriority, defaultPriority }) => ({
key,
priority: typeof enabledOrPriority === 'number' ? enabledOrPriority : defaultPriority,
}))
.sort((a, b) => a.priority - b.priority);

// Log the snippet order for debugging - uncomment if needed
/* console.log(
'Snippet processing order:',
snippetOrder
.map(({ key, priority }) => `${key} (priority: ${priority})`).join("\n")
); */

// Convert configs to prioritized snippets
let prioritizedSnippets = snippetOrder
.flatMap(({ key, priority }) =>
snippets[key].map(snippet => ({ snippet, priority }))
)
.sort((a, b) => a.priority - b.priority)
.map(({ snippet }) => snippet);

// Exclude Continue's own output as it makes it super-hard for users to test the autocomplete feature
// while looking at the prompts in the Continue's output
prioritizedSnippets = prioritizedSnippets.filter((snippet) =>
!(snippet as AutocompleteCodeSnippet).filepath?.startsWith("output:extension-output-Continue.continue"));

const finalSnippets = [];
let remainingTokenCount = getRemainingTokenCount(helper);

while (remainingTokenCount > 0 && snippets.length > 0) {
const snippet = snippets.shift();
while (remainingTokenCount > 0 && prioritizedSnippets.length > 0) {
const snippet = prioritizedSnippets.shift();
if (!snippet || !isValidSnippet(snippet)) {
continue;
}
Expand Down
Loading