Skip to content

Commit

Permalink
feat: implement simulateStreaming setting
Browse files Browse the repository at this point in the history
  • Loading branch information
sgomez committed Jan 17, 2025
1 parent 1c6c05b commit da3c2fd
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 7 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,18 @@ Caveats:

### Tool streaming

> This feature is not completed and unstable
> This feature is experimental and uses simulation.
Ollama tooling does not support it in streams, but this provider can detect tool responses.
Tool streaming is not natively supported by Ollama tooling. Previously, the `experimentalStreamTools` option was used to handle tool responses in a simulated streaming mode. While this option is still active by default for backward compatibility, it has been deprecated in favor of the new simulateStreaming option.

You can disable this experimental feature with `` setting:
The simulateStreaming option provides a streaming-like experience by generating the complete response using the generate method and then sending it to the client in segments. This approach simulates real-time streaming while ensuring compatibility with models that do not natively support this feature.

To enable simulated streaming, update your configuration as follows:

```ts
ollama("model", {
experimentalStreamTools: false,
})
simulateStreaming: true
});
```

### Intercepting Fetch Requests
Expand Down
2 changes: 1 addition & 1 deletion examples/ai-core/src/stream-object/ollama-tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ async function main(model: Parameters<typeof ollama>[0]) {
const result = streamObject({
maxTokens: 2000,
mode: 'tool',
model: ollama(model),
model: ollama(model, { simulateStreaming: true, structuredOutputs: true }),
prompt:
'Generate 3 character descriptions for a fantasy role playing game.',
schema: z.object({
Expand Down
2 changes: 1 addition & 1 deletion examples/weather-ollama/src/lib/ai/actions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ export async function submitUserMessage(content: string): Promise<{
name: message.name,
role: message.role,
})),
model: ollama('llama3.1'),
model: ollama('llama3.1', { simulateStreaming: true }),
system: PROMPT,
// eslint-disable-next-line @typescript-eslint/no-shadow
text: ({ content, delta, done }) => {
Expand Down
46 changes: 46 additions & 0 deletions packages/ollama/src/ollama-chat-language-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,52 @@ export class OllamaChatLanguageModel implements LanguageModelV1 {
async doStream(
options: Parameters<LanguageModelV1['doStream']>[0],
): Promise<Awaited<ReturnType<LanguageModelV1['doStream']>>> {
if (this.settings.simulateStreaming) {
const result = await this.doGenerate(options)

const simulatedStream = new ReadableStream<LanguageModelV1StreamPart>({
start(controller) {
controller.enqueue({ type: 'response-metadata', ...result.response })
if (result.text) {
controller.enqueue({
textDelta: result.text,
type: 'text-delta',
})
}
if (result.toolCalls) {
for (const toolCall of result.toolCalls) {
controller.enqueue({
argsTextDelta: toolCall.args,
toolCallId: toolCall.toolCallId,
toolCallType: 'function',
toolName: toolCall.toolName,
type: 'tool-call-delta',
})

controller.enqueue({
type: 'tool-call',
...toolCall,
})
}
}
controller.enqueue({
finishReason: result.finishReason,
logprobs: result.logprobs,
providerMetadata: result.providerMetadata,
type: 'finish',
usage: result.usage,
})
controller.close()
},
})
return {
rawCall: result.rawCall,
rawResponse: result.rawResponse,
stream: simulatedStream,
warnings: result.warnings,
}
}

const { args: body, type, warnings } = this.getArguments(options)

const { responseHeaders, value: response } = await postJsonToApi({
Expand Down
10 changes: 10 additions & 0 deletions packages/ollama/src/ollama-chat-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ export interface OllamaChatSettings {
/**
* Until Ollama officially supports tool calling in streams, the provider can try to detect function calls. Enabled by
* default to maintain backward compatibility, disable it if you encounter any issues.
*
* @deprecated Use `simulateStreaming` instead.
*/
experimentalStreamTools?: boolean

Expand Down Expand Up @@ -248,6 +250,14 @@ export interface OllamaChatSettings {
*/
repeatPenalty?: number

/**
Simulates streaming by using a normal generate call and returning it as a stream.
Enable this if the model that you are using does not support streaming.
Defaults to `false`.
*/
simulateStreaming?: boolean

/**
* Whether to use structured outputs. Defaults to false.
*
Expand Down

0 comments on commit da3c2fd

Please sign in to comment.