feat: Mistral API (#47)

callstack · Apr 10, 2024 · 4f2fe04 · 4f2fe04
1 parent 2d3f234
commit 4f2fe04
Show file tree

Hide file tree

Showing 9 changed files with 215 additions and 7 deletions.
diff --git a/package.json b/package.json
@@ -43,6 +43,7 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.19.1",
     "@inkjs/ui": "^1.0.0",
+    "@mistralai/mistralai": "^0.1.3",
     "chalk": "^4.1.2",
     "date-fns": "^3.3.1",
     "dotenv": "^16.3.1",

diff --git a/src/commands/chat/providers.tsx b/src/commands/chat/providers.tsx
@@ -1,15 +1,17 @@
 import openAi from '../../engine/providers/openAi.js';
 import anthropic from '../../engine/providers/anthropic.js';
 import perplexity from '../../engine/providers/perplexity.js';
+import mistral from '../../engine/providers/mistral.js';
 import { getProvider, type Provider, type ProviderName } from '../../engine/providers/provider.js';
 import type { ConfigFile } from '../../config-file.js';
 
 export const providerOptionMapping: Record<string, Provider> = {
   openai: openAi,
-  anthropic: anthropic,
+  anthropic,
   anth: anthropic,
-  perplexity: perplexity,
+  perplexity,
   pplx: perplexity,
+  mistral,
 };
 
 export const providerOptions = Object.keys(providerOptionMapping);

diff --git a/src/config-file.ts b/src/config-file.ts
@@ -15,8 +15,9 @@ const CommonProviderSchema = z.object({
 
 const ProvidersSchema = z.object({
   openAi: z.optional(CommonProviderSchema),
-  perplexity: z.optional(CommonProviderSchema),
   anthropic: z.optional(CommonProviderSchema),
+  perplexity: z.optional(CommonProviderSchema),
+  mistral: z.optional(CommonProviderSchema),
 });
 
 const ConfigFileSchema = z.object({

diff --git a/src/engine/providers/mistral.ts b/src/engine/providers/mistral.ts
@@ -0,0 +1,145 @@
+import MistralClient, { type ChatCompletionResponseChunk } from '@mistralai/mistralai';
+import { type Message, type ModelResponseUpdate } from '../inference.js';
+import { estimateInputTokens, estimateOutputTokens } from '../tokenizer.js';
+import { responseStyles, type ProviderConfig } from './config.js';
+import type { Provider } from './provider.js';
+
+// Mistral provides output data in the last chunk.
+interface ChunkWithUsage extends ChatCompletionResponseChunk {
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+const Mistral: Provider = {
+  label: 'Mistral',
+  name: 'mistral',
+  apiKeyUrl: 'https://console.mistral.ai/api-keys/',
+
+  // OpenAI models: https://docs.mistral.ai/platform/endpoints/
+  defaultModel: 'mistral-medium-latest',
+
+  // Price per 1k tokens [input, output].
+  // Source: https://docs.mistral.ai/platform/pricing/
+  modelPricing: {
+    'open-mistral-7b': { inputTokensCost: 0.25 / 1000, outputTokensCost: 0.25 / 1000 },
+    'open-mixtral-8x7b': { inputTokensCost: 0.7 / 1000, outputTokensCost: 0.7 / 1000 },
+    'mistral-small-latest': { inputTokensCost: 2 / 1000, outputTokensCost: 6 / 1000 },
+    'mistral-medium-latest': { inputTokensCost: 2.7 / 1000, outputTokensCost: 8.1 / 1000 },
+    'mistral-large-latest': { inputTokensCost: 8 / 1000, outputTokensCost: 24 / 1000 },
+  },
+
+  modelAliases: {
+    mistral: 'open-mistral-7b',
+    mixtral: 'open-mixtral-8x7b',
+    small: 'mistral-small-latest',
+    medium: 'mistral-medium-latest',
+    large: 'mistral-large-latest',
+  },
+
+  getChatCompletion: async (config: ProviderConfig, messages: Message[]) => {
+    const api = new MistralClient(config.apiKey);
+
+    const allMessages = getMessages(config, messages);
+
+    const startTime = performance.now();
+    const response = await api.chat({
+      messages: allMessages,
+      model: config.model,
+      ...getMistralResponseStyle(config),
+    });
+    const responseTime = performance.now() - startTime;
+
+    return {
+      message: {
+        role: 'assistant',
+        content: response.choices[0]?.message.content ?? '',
+      },
+      usage: {
+        inputTokens: response.usage?.prompt_tokens ?? 0,
+        outputTokens: response.usage?.completion_tokens ?? 0,
+        requests: 1,
+      },
+      responseTime,
+      responseModel: response.model,
+      data: response,
+    };
+  },
+
+  getChatCompletionStream: async function (
+    config: ProviderConfig,
+    messages: Message[],
+    onResponseUpdate: (update: ModelResponseUpdate) => void,
+  ) {
+    const api = new MistralClient(config.apiKey);
+
+    const allMessages = getMessages(config, messages);
+
+    const startTime = performance.now();
+    const stream = await api.chatStream({
+      messages: allMessages,
+      model: config.model,
+      ...getMistralResponseStyle(config),
+    });
+
+    let lastChunk: ChunkWithUsage | null = null;
+    let content = '';
+    for await (const chunk of stream) {
+      lastChunk = chunk;
+      content += chunk.choices[0]?.delta?.content || '';
+      onResponseUpdate({ content });
+    }
+
+    const responseTime = performance.now() - startTime;
+
+    return {
+      message: {
+        role: 'assistant',
+        content,
+      },
+      usage: {
+        inputTokens: lastChunk?.usage?.prompt_tokens ?? estimateInputTokens(allMessages),
+        outputTokens: lastChunk?.usage?.completion_tokens ?? estimateOutputTokens(content),
+        requests: 1,
+      },
+      responseTime,
+      responseModel: lastChunk?.model || 'unknown',
+      data: lastChunk,
+    };
+  },
+};
+
+function getMessages(config: ProviderConfig, messages: Message[]): Message[] {
+  if (!config.systemPrompt) {
+    return messages;
+  }
+
+  const systemMessage: Message = {
+    role: 'system',
+    content: config.systemPrompt,
+  };
+  return [systemMessage, ...messages];
+}
+
+interface MistralResponseStyle {
+  temperature?: number;
+  topP?: number;
+}
+
+function getMistralResponseStyle(config: ProviderConfig): MistralResponseStyle {
+  const style = responseStyles[config.responseStyle];
+
+  const result: MistralResponseStyle = {};
+  if ('temperature' in style) {
+    result.temperature = style.temperature;
+  }
+  if ('top_p' in style) {
+    result.topP = style.top_p;
+  }
+
+  return result;
+}
+
+export default Mistral;
diff --git a/src/engine/providers/provider.ts b/src/engine/providers/provider.ts
@@ -3,8 +3,9 @@ import type { ProviderConfig } from './config.js';
 import openAi from './openAi.js';
 import perplexity from './perplexity.js';
 import anthropic from './anthropic.js';
+import mistral from './mistral.js';
 
-export const providerNames = ['openAi', 'anthropic', 'perplexity'] as const;
+export const providerNames = ['openAi', 'anthropic', 'perplexity', 'mistral'] as const;
 export type ProviderName = (typeof providerNames)[number];
 
 export interface Provider {
@@ -41,6 +42,7 @@ const providersMap: Record<ProviderName, Provider> = {
   openAi,
   anthropic,
   perplexity,
+  mistral,
 };
 
 export const providers = Object.values(providersMap);

diff --git a/src/engine/providers/utils/open-ai-api.ts b/src/engine/providers/utils/open-ai-api.ts
@@ -36,7 +36,7 @@ export async function getChatCompletion(
 }
 
 // Perplexity provides output data in the last chunk, while OpenAI does not.
-interface ChunkWithExtras extends ChatCompletionChunk {
+interface ChunkWithUsage extends ChatCompletionChunk {
   usage?: {
     prompt_tokens: number;
     completion_tokens: number;
@@ -60,7 +60,7 @@ export async function getChatCompletionStream(
     ...responseStyles[config.responseStyle],
   });
 
-  const chunks: ChunkWithExtras[] = [];
+  const chunks: ChunkWithUsage[] = [];
   let content = '';
 
   for await (const chunk of stream) {
@@ -70,7 +70,7 @@ export async function getChatCompletionStream(
   }
 
   const responseTime = performance.now() - startTime;
-  const lastChunk = chunks[chunks.length - 1] as ChunkWithExtras;
+  const lastChunk = chunks[chunks.length - 1] as ChunkWithUsage;
 
   return {
     message: {

diff --git a/website/docs/config-file.md b/website/docs/config-file.md
@@ -46,6 +46,17 @@ A minimal `~/.airc.json` file consists only of API key for selected AI inference
 }
 ```
 </TabItem>
+<TabItem value="mistral" label="Mistral">
+```json
+{
+  "providers": {
+    "mistral": {
+      "apiKey": "Your API key"
+    }
+  }
+}
+```
+</TabItem>
 </Tabs>
 
 ## Provider Options
@@ -57,6 +68,7 @@ Supported providers are currently:
 - `openAi`
 - `anthropic`
 - `perplexity`
+- `mistral`
 
 ### AI Model
 
@@ -99,6 +111,18 @@ Each of supported providers can be tuned with `model` option to select an exact
 }
 ```
 </TabItem>
+<TabItem value="mistral" label="Mistral">
+```json
+{
+  "providers": {
+    "mistral": {
+      // ...
+      "model": "open-mixtral-8x7b"
+    }
+  }
+}
+```
+</TabItem>
 </Tabs>
 
 Choosing proper model can have a huge impact on your AI assistant response quality, response time, as well as costs (although costs should be reasonable for manual, single-user interactions).
@@ -108,6 +132,7 @@ Available models:
 - [OpenAI](https://platform.openai.com/docs/models)
 - [Anthropic](https://docs.anthropic.com/claude/docs/models-overview)
 - [Perplexity](https://docs.perplexity.ai/docs/model-cards)
+- [Mistral](https://docs.mistral.ai/platform/endpoints/)
 
 ### System Prompt
 
@@ -150,6 +175,18 @@ You can specify system prompt for each of the supported providers:
 }
 ```
 </TabItem>
+<TabItem value="mistral" label="Mistral">
+```json
+{
+  "providers": {
+    "mistral": {
+      // ...
+      "systemPrompt": "You are a helpful AI assistant. Respond in a concise way."
+    }
+  }
+}
+```
+</TabItem>
 </Tabs>
 
 System prompt is an important part of AI model "personality" and should specify the key aspects you expect from AI. LLMs typically put great weight to the instructions given in the system prompt.
diff --git a/website/docs/getting-started.md b/website/docs/getting-started.md
@@ -60,6 +60,7 @@ CLI options are passed when invoking the `ai` commend:
   - `openai`
   - `anthropic` (or `anth`)
   - `perplexity` (or `pplx`)
+  - `mistral`
 - `--model [name]` (or `-m [name]`): select a model to use. This should be a model available for the selected provider.
 - `--creative`: respond in a creative way
 - `--precise`: respond in a more accurate way
@@ -93,6 +94,15 @@ Using full model names can be tedious, so AI CLI supports shorthand model aliase
 | `mistral`   | `mistral-7b-instruct`    |
 | `mixtral`   | `mixtral-8x7b-instruct`  |
 </TabItem>
+<TabItem value="mistral" label="Mistral">
+| Alias     | Model                   |
+| --------- | ----------------------- |
+| `mistral` | `open-mistral-7b`       |
+| `mixtral` | `open-mixtral-8x7b`     |
+| `small`   | `mistral-small-latest`  |
+| `medium`  | `mistral-medium-latest` |
+| `large`   | `mistral-large-latest`  |
+</TabItem>
 </Tabs>
 
 ## CLI commands

diff --git a/yarn.lock b/yarn.lock
@@ -509,6 +509,7 @@ __metadata:
     "@commitlint/config-conventional": ^17.0.2
     "@evilmartians/lefthook": ^1.5.0
     "@inkjs/ui": ^1.0.0
+    "@mistralai/mistralai": ^0.1.3
     "@release-it/conventional-changelog": ^5.0.0
     "@types/jest": ^28.1.2
     "@types/mock-fs": ^4.13.4
@@ -1201,6 +1202,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@mistralai/mistralai@npm:^0.1.3":
+  version: 0.1.3
+  resolution: "@mistralai/mistralai@npm:0.1.3"
+  dependencies:
+    node-fetch: ^2.6.7
+  checksum: 3f8299811b06027dfbdae4fd86564ccda1a48ec4276940e1dcace4fa447cc4f7e61808a38d71ef73116fc6cf90b74257e6537e1c47c96d50ff52bfb8f62d2947
+  languageName: node
+  linkType: hard
+
 "@nicolo-ribaudo/eslint-scope-5-internals@npm:5.1.1-v1":
   version: 5.1.1-v1
   resolution: "@nicolo-ribaudo/eslint-scope-5-internals@npm:5.1.1-v1"