feat: implement simulateStreaming setting

sgomez · Jan 17, 2025 · da3c2fd · da3c2fd
1 parent 1c6c05b
commit da3c2fd
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -107,16 +107,18 @@ Caveats:
 
 ### Tool streaming
 
-> This feature is not completed and unstable
+> This feature is experimental and uses simulation.
 
-Ollama tooling does not support it in streams, but this provider can detect tool responses.
+Tool streaming is not natively supported by Ollama tooling. Previously, the `experimentalStreamTools` option was used to handle tool responses in a simulated streaming mode. While this option is still active by default for backward compatibility, it has been deprecated in favor of the new simulateStreaming option.
 
-You can disable this experimental feature with `` setting:
+The simulateStreaming option provides a streaming-like experience by generating the complete response using the generate method and then sending it to the client in segments. This approach simulates real-time streaming while ensuring compatibility with models that do not natively support this feature.
+
+To enable simulated streaming, update your configuration as follows:
 
 ```ts
 ollama("model", {
-  experimentalStreamTools: false,
-})
+  simulateStreaming: true
+});
 ```
 
 ### Intercepting Fetch Requests

diff --git a/examples/ai-core/src/stream-object/ollama-tool.ts b/examples/ai-core/src/stream-object/ollama-tool.ts
@@ -14,7 +14,7 @@ async function main(model: Parameters<typeof ollama>[0]) {
   const result = streamObject({
     maxTokens: 2000,
     mode: 'tool',
-    model: ollama(model),
+    model: ollama(model, { simulateStreaming: true, structuredOutputs: true }),
     prompt:
       'Generate 3 character descriptions for a fantasy role playing game.',
     schema: z.object({

diff --git a/examples/weather-ollama/src/lib/ai/actions.tsx b/examples/weather-ollama/src/lib/ai/actions.tsx
@@ -56,7 +56,7 @@ export async function submitUserMessage(content: string): Promise<{
       name: message.name,
       role: message.role,
     })),
-    model: ollama('llama3.1'),
+    model: ollama('llama3.1', { simulateStreaming: true }),
     system: PROMPT,
     // eslint-disable-next-line @typescript-eslint/no-shadow
     text: ({ content, delta, done }) => {

diff --git a/packages/ollama/src/ollama-chat-language-model.ts b/packages/ollama/src/ollama-chat-language-model.ts
@@ -232,6 +232,52 @@ export class OllamaChatLanguageModel implements LanguageModelV1 {
   async doStream(
     options: Parameters<LanguageModelV1['doStream']>[0],
   ): Promise<Awaited<ReturnType<LanguageModelV1['doStream']>>> {
+    if (this.settings.simulateStreaming) {
+      const result = await this.doGenerate(options)
+
+      const simulatedStream = new ReadableStream<LanguageModelV1StreamPart>({
+        start(controller) {
+          controller.enqueue({ type: 'response-metadata', ...result.response })
+          if (result.text) {
+            controller.enqueue({
+              textDelta: result.text,
+              type: 'text-delta',
+            })
+          }
+          if (result.toolCalls) {
+            for (const toolCall of result.toolCalls) {
+              controller.enqueue({
+                argsTextDelta: toolCall.args,
+                toolCallId: toolCall.toolCallId,
+                toolCallType: 'function',
+                toolName: toolCall.toolName,
+                type: 'tool-call-delta',
+              })
+
+              controller.enqueue({
+                type: 'tool-call',
+                ...toolCall,
+              })
+            }
+          }
+          controller.enqueue({
+            finishReason: result.finishReason,
+            logprobs: result.logprobs,
+            providerMetadata: result.providerMetadata,
+            type: 'finish',
+            usage: result.usage,
+          })
+          controller.close()
+        },
+      })
+      return {
+        rawCall: result.rawCall,
+        rawResponse: result.rawResponse,
+        stream: simulatedStream,
+        warnings: result.warnings,
+      }
+    }
+
     const { args: body, type, warnings } = this.getArguments(options)
 
     const { responseHeaders, value: response } = await postJsonToApi({

diff --git a/packages/ollama/src/ollama-chat-settings.ts b/packages/ollama/src/ollama-chat-settings.ts
@@ -157,6 +157,8 @@ export interface OllamaChatSettings {
   /**
    * Until Ollama officially supports tool calling in streams, the provider can try to detect function calls. Enabled by
    * default to maintain backward compatibility, disable it if you encounter any issues.
+   *
+   * @deprecated Use `simulateStreaming` instead.
    */
   experimentalStreamTools?: boolean
 
@@ -248,6 +250,14 @@ export interface OllamaChatSettings {
    */
   repeatPenalty?: number
 
+  /**
+   Simulates streaming by using a normal generate call and returning it as a stream.
+   Enable this if the model that you are using does not support streaming.
+
+   Defaults to `false`.
+   */
+  simulateStreaming?: boolean
+
   /**
    * Whether to use structured outputs. Defaults to false.
    *