diff --git a/README.md b/README.md index 73aad8f..19399fe 100644 --- a/README.md +++ b/README.md @@ -107,16 +107,18 @@ Caveats: ### Tool streaming -> This feature is not completed and unstable +> This feature is experimental and uses simulation. -Ollama tooling does not support it in streams, but this provider can detect tool responses. +Tool streaming is not natively supported by Ollama tooling. Previously, the `experimentalStreamTools` option was used to handle tool responses in a simulated streaming mode. While this option is still active by default for backward compatibility, it has been deprecated in favor of the new simulateStreaming option. -You can disable this experimental feature with `` setting: +The simulateStreaming option provides a streaming-like experience by generating the complete response using the generate method and then sending it to the client in segments. This approach simulates real-time streaming while ensuring compatibility with models that do not natively support this feature. + +To enable simulated streaming, update your configuration as follows: ```ts ollama("model", { - experimentalStreamTools: false, -}) + simulateStreaming: true +}); ``` ### Intercepting Fetch Requests diff --git a/examples/ai-core/src/stream-object/ollama-tool.ts b/examples/ai-core/src/stream-object/ollama-tool.ts index da04b69..2e9ae09 100755 --- a/examples/ai-core/src/stream-object/ollama-tool.ts +++ b/examples/ai-core/src/stream-object/ollama-tool.ts @@ -14,7 +14,7 @@ async function main(model: Parameters[0]) { const result = streamObject({ maxTokens: 2000, mode: 'tool', - model: ollama(model), + model: ollama(model, { simulateStreaming: true, structuredOutputs: true }), prompt: 'Generate 3 character descriptions for a fantasy role playing game.', schema: z.object({ diff --git a/examples/weather-ollama/src/lib/ai/actions.tsx b/examples/weather-ollama/src/lib/ai/actions.tsx index 1659a0b..0e17ebf 100644 --- a/examples/weather-ollama/src/lib/ai/actions.tsx +++ b/examples/weather-ollama/src/lib/ai/actions.tsx @@ -56,7 +56,7 @@ export async function submitUserMessage(content: string): Promise<{ name: message.name, role: message.role, })), - model: ollama('llama3.1'), + model: ollama('llama3.1', { simulateStreaming: true }), system: PROMPT, // eslint-disable-next-line @typescript-eslint/no-shadow text: ({ content, delta, done }) => { diff --git a/packages/ollama/src/ollama-chat-language-model.ts b/packages/ollama/src/ollama-chat-language-model.ts index 140d25f..6a024c7 100644 --- a/packages/ollama/src/ollama-chat-language-model.ts +++ b/packages/ollama/src/ollama-chat-language-model.ts @@ -232,6 +232,52 @@ export class OllamaChatLanguageModel implements LanguageModelV1 { async doStream( options: Parameters[0], ): Promise>> { + if (this.settings.simulateStreaming) { + const result = await this.doGenerate(options) + + const simulatedStream = new ReadableStream({ + start(controller) { + controller.enqueue({ type: 'response-metadata', ...result.response }) + if (result.text) { + controller.enqueue({ + textDelta: result.text, + type: 'text-delta', + }) + } + if (result.toolCalls) { + for (const toolCall of result.toolCalls) { + controller.enqueue({ + argsTextDelta: toolCall.args, + toolCallId: toolCall.toolCallId, + toolCallType: 'function', + toolName: toolCall.toolName, + type: 'tool-call-delta', + }) + + controller.enqueue({ + type: 'tool-call', + ...toolCall, + }) + } + } + controller.enqueue({ + finishReason: result.finishReason, + logprobs: result.logprobs, + providerMetadata: result.providerMetadata, + type: 'finish', + usage: result.usage, + }) + controller.close() + }, + }) + return { + rawCall: result.rawCall, + rawResponse: result.rawResponse, + stream: simulatedStream, + warnings: result.warnings, + } + } + const { args: body, type, warnings } = this.getArguments(options) const { responseHeaders, value: response } = await postJsonToApi({ diff --git a/packages/ollama/src/ollama-chat-settings.ts b/packages/ollama/src/ollama-chat-settings.ts index 46dc590..384f68d 100644 --- a/packages/ollama/src/ollama-chat-settings.ts +++ b/packages/ollama/src/ollama-chat-settings.ts @@ -157,6 +157,8 @@ export interface OllamaChatSettings { /** * Until Ollama officially supports tool calling in streams, the provider can try to detect function calls. Enabled by * default to maintain backward compatibility, disable it if you encounter any issues. + * + * @deprecated Use `simulateStreaming` instead. */ experimentalStreamTools?: boolean @@ -248,6 +250,14 @@ export interface OllamaChatSettings { */ repeatPenalty?: number + /** + Simulates streaming by using a normal generate call and returning it as a stream. + Enable this if the model that you are using does not support streaming. + + Defaults to `false`. + */ + simulateStreaming?: boolean + /** * Whether to use structured outputs. Defaults to false. *