Skip to content

Commit

Permalink
Merge pull request #2030 from continuedev/pe/char-stream-testing
Browse files Browse the repository at this point in the history
tests: charStream
  • Loading branch information
Patrick-Erichsen authored Aug 17, 2024
2 parents 3b28464 + d05d056 commit 20f0c87
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 0 deletions.
138 changes: 138 additions & 0 deletions core/autocomplete/charStream.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import { jest } from "@jest/globals";
import * as charStream from "./charStream";
import { Typescript } from "./languages";

describe("charStream", () => {
let mockFullStop: jest.Mock;

async function getCharGenerator(chars: string[]) {
return (async function* () {
for (const char of chars) {
yield char;
}
})();
}

async function getFilteredChars(results: AsyncGenerator<string>) {
const output = [];
for await (const char of results) {
output.push(char);
}
return output;
}

beforeEach(() => {
mockFullStop = jest.fn();
});

describe("onlyWhitespaceAfterEndOfLine", () => {
const endOfLineChar = Typescript.endOfLine[0];

it("should stop at end of line if non-whitespace follows", async () => {
const charGenerator = await getCharGenerator([
`Hello${endOfLineChar}World`,
]);

const result = charStream.onlyWhitespaceAfterEndOfLine(
charGenerator,
[endOfLineChar],
mockFullStop,
);
const filteredChars = await getFilteredChars(result);

expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}`);
expect(mockFullStop).toHaveBeenCalledTimes(1);
});

it("should continue past end of line if only whitespace follows", async () => {
const charGenerator = await getCharGenerator([
`Hello${endOfLineChar} World`,
]);
const result = charStream.onlyWhitespaceAfterEndOfLine(
charGenerator,
[endOfLineChar],
mockFullStop,
);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe(`Hello${endOfLineChar} World`);
expect(mockFullStop).not.toHaveBeenCalled();
});

it("should handle end of line at the end of chunk", async () => {
const charGenerator = await getCharGenerator([
`Hello${endOfLineChar}`,
"World",
]);
const result = charStream.onlyWhitespaceAfterEndOfLine(
charGenerator,
[endOfLineChar],
mockFullStop,
);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe(`Hello${endOfLineChar}`);
expect(mockFullStop).toHaveBeenCalledTimes(1);
});
});

describe("noFirstCharNewline", () => {
it("should remove leading newline", async () => {
const charGenerator = await getCharGenerator(["\nHello"]);
const result = charStream.noFirstCharNewline(charGenerator);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("");
});

it("should keep content if no leading newline", async () => {
const charGenerator = await getCharGenerator(["Hello\nWorld"]);
const result = charStream.noFirstCharNewline(charGenerator);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("Hello\nWorld");
});

it("should remove leading carriage return", async () => {
const charGenerator = await getCharGenerator(["\rHello"]);
const result = charStream.noFirstCharNewline(charGenerator);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("");
});
});

describe("stopAtStopTokens", () => {
it("should stop at the first occurrence of a stop token", async () => {
const charGenerator = await getCharGenerator(["Hello<|endoftext|>World"]);
const result = charStream.stopAtStopTokens(charGenerator, [
"<|endoftext|>",
]);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("Hello");
});

it("should return all content if no stop tokens are provided", async () => {
const charGenerator = await getCharGenerator(["Hello<|endoftext|>World"]);
const result = charStream.stopAtStopTokens(charGenerator, []);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("Hello<|endoftext|>World");
});

it("should handle stop tokens that span multiple chunks", async () => {
const charGenerator = await getCharGenerator([
"Hello<|",
"endoftext|>World",
]);
const result = charStream.stopAtStopTokens(charGenerator, [
"<|endoftext|>",
]);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("Hello");
});

it("should yield remaining characters in buffer if no stop token is found", async () => {
const charGenerator = await getCharGenerator(["Hello", "World"]);
const result = charStream.stopAtStopTokens(charGenerator, [
"<|endoftext|>",
]);
const filteredChars = await getFilteredChars(result);
expect(filteredChars.join("")).toBe("HelloWorld");
});
});
});
32 changes: 32 additions & 0 deletions core/autocomplete/charStream.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
/**
* Asynchronous generator that yields characters from the input stream until it encounters
* an end-of-line character followed by a non-whitespace character.
*
* @param {AsyncGenerator<string>} stream - The input stream of characters.
* @param {string[]} endOfLine - An array of characters considered as end-of-line markers.
* @param {() => void} fullStop - A function to be called when the generator stops.
* @yields {string} Characters from the input stream.
* @returns {AsyncGenerator<string>} An async generator that yields characters.
*/
export async function* onlyWhitespaceAfterEndOfLine(
stream: AsyncGenerator<string>,
endOfLine: string[],
fullStop: () => void,
): AsyncGenerator<string> {
let pending = "";

for await (let chunk of stream) {
chunk = pending + chunk;
pending = "";

for (let i = 0; i < chunk.length - 1; i++) {
if (
endOfLine.includes(chunk[i]) &&
Expand All @@ -17,6 +29,7 @@ export async function* onlyWhitespaceAfterEndOfLine(
return;
}
}

if (endOfLine.includes(chunk[chunk.length - 1])) {
pending = chunk[chunk.length - 1];
yield chunk.slice(0, chunk.length - 1);
Expand All @@ -27,6 +40,11 @@ export async function* onlyWhitespaceAfterEndOfLine(
yield pending;
}

/**
* Yields characters from the stream, stopping if the first character is a newline.
* @param {AsyncGenerator<string>} stream - The input character stream.
* @yields {string} Characters from the stream.
*/
export async function* noFirstCharNewline(stream: AsyncGenerator<string>) {
let first = true;
for await (const char of stream) {
Expand All @@ -40,6 +58,20 @@ export async function* noFirstCharNewline(stream: AsyncGenerator<string>) {
}
}

/**
* Asynchronously yields characters from the input stream, stopping if a stop token is encountered.
*
* @param {AsyncGenerator<string>} stream - The input stream of characters.
* @param {string[]} stopTokens - Array of tokens that signal when to stop yielding.
* @yields {string} Characters from the input stream.
* @returns {AsyncGenerator<string>} An async generator that yields characters until a stop condition is met.
* @description
* 1. If no stop tokens are provided, yields all characters from the stream.
* 2. Otherwise, buffers incoming chunks and checks for stop tokens.
* 3. Yields characters one by one if no stop token is found at the start of the buffer.
* 4. Stops yielding and returns if a stop token is encountered.
* 5. After the stream ends, yields any remaining buffered characters.
*/
export async function* stopAtStopTokens(
stream: AsyncGenerator<string>,
stopTokens: string[],
Expand Down

0 comments on commit 20f0c87

Please sign in to comment.