diff --git a/package.json b/package.json index 19ecbb9..256bbb6 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "dependencies": { "@anthropic-ai/sdk": "^0.19.1", "@inkjs/ui": "^1.0.0", + "@mistralai/mistralai": "^0.1.3", "chalk": "^4.1.2", "date-fns": "^3.3.1", "dotenv": "^16.3.1", diff --git a/src/commands/chat/providers.tsx b/src/commands/chat/providers.tsx index 8768d3f..1ebd11f 100644 --- a/src/commands/chat/providers.tsx +++ b/src/commands/chat/providers.tsx @@ -1,15 +1,17 @@ import openAi from '../../engine/providers/openAi.js'; import anthropic from '../../engine/providers/anthropic.js'; import perplexity from '../../engine/providers/perplexity.js'; +import mistral from '../../engine/providers/mistral.js'; import { getProvider, type Provider, type ProviderName } from '../../engine/providers/provider.js'; import type { ConfigFile } from '../../config-file.js'; export const providerOptionMapping: Record = { openai: openAi, - anthropic: anthropic, + anthropic, anth: anthropic, - perplexity: perplexity, + perplexity, pplx: perplexity, + mistral, }; export const providerOptions = Object.keys(providerOptionMapping); diff --git a/src/config-file.ts b/src/config-file.ts index 69dd5e4..c9fb911 100644 --- a/src/config-file.ts +++ b/src/config-file.ts @@ -15,8 +15,9 @@ const CommonProviderSchema = z.object({ const ProvidersSchema = z.object({ openAi: z.optional(CommonProviderSchema), - perplexity: z.optional(CommonProviderSchema), anthropic: z.optional(CommonProviderSchema), + perplexity: z.optional(CommonProviderSchema), + mistral: z.optional(CommonProviderSchema), }); const ConfigFileSchema = z.object({ diff --git a/src/engine/providers/mistral.ts b/src/engine/providers/mistral.ts new file mode 100644 index 0000000..eacb5df --- /dev/null +++ b/src/engine/providers/mistral.ts @@ -0,0 +1,143 @@ +import MistralClient, { type ChatCompletionResponseChunk } from '@mistralai/mistralai'; +import { type Message, type ModelResponseUpdate } from '../inference.js'; +import { estimateInputTokens, estimateOutputTokens } from '../tokenizer.js'; +import { responseStyles, type ProviderConfig } from './config.js'; +import type { Provider } from './provider.js'; + +// Mistral provides output data in the last chunk. +interface ChunkWithUsage extends ChatCompletionResponseChunk { + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +const Mistral: Provider = { + label: 'Mistral', + name: 'mistral', + apiKeyUrl: 'https://console.mistral.ai/api-keys/', + + // OpenAI models: https://docs.mistral.ai/platform/endpoints/ + defaultModel: 'mistral-medium-latest', + + // Price per 1M tokens [input, output]. + // Source: https://docs.mistral.ai/platform/pricing/ + modelPricing: { + 'open-mistral-7b': { inputTokensCost: 0.25, outputTokensCost: 0.25 }, + 'open-mixtral-8x7b': { inputTokensCost: 0.7, outputTokensCost: 0.7 }, + 'mistral-small-latest': { inputTokensCost: 2, outputTokensCost: 6 }, + 'mistral-medium-latest': { inputTokensCost: 2.7, outputTokensCost: 8.1 }, + 'mistral-large-latest': { inputTokensCost: 8, outputTokensCost: 24 }, + }, + + modelAliases: { + mistral: 'open-mistral-7b', + mixtral: 'open-mixtral-8x7b', + small: 'mistral-small-latest', + medium: 'mistral-medium-latest', + large: 'mistral-large-latest', + }, + + getChatCompletion: async (config: ProviderConfig, messages: Message[]) => { + const api = new MistralClient(config.apiKey); + const allMessages = getMessages(config, messages); + + const startTime = performance.now(); + const response = await api.chat({ + messages: allMessages, + model: config.model, + ...getMistralResponseStyle(config), + }); + const responseTime = performance.now() - startTime; + + return { + message: { + role: 'assistant', + content: response.choices[0]?.message.content ?? '', + }, + usage: { + inputTokens: response.usage?.prompt_tokens ?? 0, + outputTokens: response.usage?.completion_tokens ?? 0, + requests: 1, + }, + responseTime, + responseModel: response.model, + data: response, + }; + }, + + getChatCompletionStream: async function ( + config: ProviderConfig, + messages: Message[], + onResponseUpdate: (update: ModelResponseUpdate) => void, + ) { + const api = new MistralClient(config.apiKey); + const allMessages = getMessages(config, messages); + + const startTime = performance.now(); + const stream = await api.chatStream({ + messages: allMessages, + model: config.model, + ...getMistralResponseStyle(config), + }); + + let lastChunk: ChunkWithUsage | null = null; + let content = ''; + for await (const chunk of stream) { + lastChunk = chunk; + content += chunk.choices[0]?.delta?.content || ''; + onResponseUpdate({ content }); + } + + const responseTime = performance.now() - startTime; + + return { + message: { + role: 'assistant', + content, + }, + usage: { + inputTokens: lastChunk?.usage?.prompt_tokens ?? estimateInputTokens(allMessages), + outputTokens: lastChunk?.usage?.completion_tokens ?? estimateOutputTokens(content), + requests: 1, + }, + responseTime, + responseModel: lastChunk?.model || 'unknown', + data: lastChunk, + }; + }, +}; + +function getMessages(config: ProviderConfig, messages: Message[]): Message[] { + if (!config.systemPrompt) { + return messages; + } + + const systemMessage: Message = { + role: 'system', + content: config.systemPrompt, + }; + return [systemMessage, ...messages]; +} + +interface MistralResponseStyle { + temperature?: number; + topP?: number; +} + +function getMistralResponseStyle(config: ProviderConfig): MistralResponseStyle { + const style = responseStyles[config.responseStyle]; + + const result: MistralResponseStyle = {}; + if ('temperature' in style) { + result.temperature = style.temperature; + } + if ('top_p' in style) { + result.topP = style.top_p; + } + + return result; +} + +export default Mistral; diff --git a/src/engine/providers/provider.ts b/src/engine/providers/provider.ts index 965b283..98bc360 100644 --- a/src/engine/providers/provider.ts +++ b/src/engine/providers/provider.ts @@ -3,8 +3,9 @@ import type { ProviderConfig } from './config.js'; import openAi from './openAi.js'; import perplexity from './perplexity.js'; import anthropic from './anthropic.js'; +import mistral from './mistral.js'; -export const providerNames = ['openAi', 'anthropic', 'perplexity'] as const; +export const providerNames = ['openAi', 'anthropic', 'perplexity', 'mistral'] as const; export type ProviderName = (typeof providerNames)[number]; export interface Provider { @@ -41,6 +42,7 @@ const providersMap: Record = { openAi, anthropic, perplexity, + mistral, }; export const providers = Object.values(providersMap); diff --git a/src/engine/providers/utils/open-ai-api.ts b/src/engine/providers/utils/open-ai-api.ts index 5b64c26..2225fe5 100644 --- a/src/engine/providers/utils/open-ai-api.ts +++ b/src/engine/providers/utils/open-ai-api.ts @@ -36,7 +36,7 @@ export async function getChatCompletion( } // Perplexity provides output data in the last chunk, while OpenAI does not. -interface ChunkWithExtras extends ChatCompletionChunk { +interface ChunkWithUsage extends ChatCompletionChunk { usage?: { prompt_tokens: number; completion_tokens: number; @@ -60,7 +60,7 @@ export async function getChatCompletionStream( ...responseStyles[config.responseStyle], }); - const chunks: ChunkWithExtras[] = []; + const chunks: ChunkWithUsage[] = []; let content = ''; for await (const chunk of stream) { @@ -70,7 +70,7 @@ export async function getChatCompletionStream( } const responseTime = performance.now() - startTime; - const lastChunk = chunks[chunks.length - 1] as ChunkWithExtras; + const lastChunk = chunks[chunks.length - 1] as ChunkWithUsage; return { message: { diff --git a/website/docs/config-file.md b/website/docs/config-file.md index f91995d..3c37b13 100644 --- a/website/docs/config-file.md +++ b/website/docs/config-file.md @@ -46,6 +46,17 @@ A minimal `~/.airc.json` file consists only of API key for selected AI inference } ``` + +```json +{ + "providers": { + "mistral": { + "apiKey": "Your API key" + } + } +} +``` + ## Provider Options @@ -57,6 +68,7 @@ Supported providers are currently: - `openAi` - `anthropic` - `perplexity` +- `mistral` ### AI Model @@ -99,6 +111,18 @@ Each of supported providers can be tuned with `model` option to select an exact } ``` + +```json +{ + "providers": { + "mistral": { + // ... + "model": "open-mixtral-8x7b" + } + } +} +``` + Choosing proper model can have a huge impact on your AI assistant response quality, response time, as well as costs (although costs should be reasonable for manual, single-user interactions). @@ -108,6 +132,7 @@ Available models: - [OpenAI](https://platform.openai.com/docs/models) - [Anthropic](https://docs.anthropic.com/claude/docs/models-overview) - [Perplexity](https://docs.perplexity.ai/docs/model-cards) +- [Mistral](https://docs.mistral.ai/platform/endpoints/) ### System Prompt @@ -150,6 +175,18 @@ You can specify system prompt for each of the supported providers: } ``` + +```json +{ + "providers": { + "mistral": { + // ... + "systemPrompt": "You are a helpful AI assistant. Respond in a concise way." + } + } +} +``` + System prompt is an important part of AI model "personality" and should specify the key aspects you expect from AI. LLMs typically put great weight to the instructions given in the system prompt. diff --git a/website/docs/getting-started.md b/website/docs/getting-started.md index d044226..58f49f9 100644 --- a/website/docs/getting-started.md +++ b/website/docs/getting-started.md @@ -60,6 +60,7 @@ CLI options are passed when invoking the `ai` commend: - `openai` - `anthropic` (or `anth`) - `perplexity` (or `pplx`) + - `mistral` - `--model [name]` (or `-m [name]`): select a model to use. This should be a model available for the selected provider. - `--creative`: respond in a creative way - `--precise`: respond in a more accurate way @@ -93,6 +94,15 @@ Using full model names can be tedious, so AI CLI supports shorthand model aliase | `mistral` | `mistral-7b-instruct` | | `mixtral` | `mixtral-8x7b-instruct` | + +| Alias | Model | +| --------- | ----------------------- | +| `mistral` | `open-mistral-7b` | +| `mixtral` | `open-mixtral-8x7b` | +| `small` | `mistral-small-latest` | +| `medium` | `mistral-medium-latest` | +| `large` | `mistral-large-latest` | + ## CLI commands diff --git a/yarn.lock b/yarn.lock index 83dc130..a613e8a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -509,6 +509,7 @@ __metadata: "@commitlint/config-conventional": ^17.0.2 "@evilmartians/lefthook": ^1.5.0 "@inkjs/ui": ^1.0.0 + "@mistralai/mistralai": ^0.1.3 "@release-it/conventional-changelog": ^5.0.0 "@types/jest": ^28.1.2 "@types/mock-fs": ^4.13.4 @@ -1201,6 +1202,15 @@ __metadata: languageName: node linkType: hard +"@mistralai/mistralai@npm:^0.1.3": + version: 0.1.3 + resolution: "@mistralai/mistralai@npm:0.1.3" + dependencies: + node-fetch: ^2.6.7 + checksum: 3f8299811b06027dfbdae4fd86564ccda1a48ec4276940e1dcace4fa447cc4f7e61808a38d71ef73116fc6cf90b74257e6537e1c47c96d50ff52bfb8f62d2947 + languageName: node + linkType: hard + "@nicolo-ribaudo/eslint-scope-5-internals@npm:5.1.1-v1": version: 5.1.1-v1 resolution: "@nicolo-ribaudo/eslint-scope-5-internals@npm:5.1.1-v1"