Merge remote-tracking branch 'upstream/main'

GOvEy1nw · Aug 17, 2024 · 70819c3 · 70819c3
2 parents 5fd4f10 + a45b384
commit 70819c3
Show file tree

Hide file tree

Showing 245 changed files with 7,606 additions and 4,190 deletions.
diff --git a/.env.example b/.env.example
@@ -147,7 +147,7 @@ GOOGLE_KEY=user_provided
 #============#
 
 OPENAI_API_KEY=user_provided
-# OPENAI_MODELS=gpt-4o,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k
+# OPENAI_MODELS=gpt-4o,chatgpt-4o-latest,gpt-4o-mini,gpt-3.5-turbo-0125,gpt-3.5-turbo-0301,gpt-3.5-turbo,gpt-4,gpt-4-0613,gpt-4-vision-preview,gpt-3.5-turbo-0613,gpt-3.5-turbo-16k-0613,gpt-4-0125-preview,gpt-4-turbo-preview,gpt-4-1106-preview,gpt-3.5-turbo-1106,gpt-3.5-turbo-instruct,gpt-3.5-turbo-instruct-0914,gpt-3.5-turbo-16k
 
 DEBUG_OPENAI=false
 

diff --git a/.github/workflows/a11y.yml b/.github/workflows/a11y.yml
@@ -4,14 +4,23 @@ on:
   pull_request:
     paths:
       - 'client/src/**'
+  workflow_dispatch:
+    inputs:
+      run_workflow:
+        description: 'Set to true to run this workflow'
+        required: true
+        default: 'false'
 
 jobs:
   axe-linter:
     runs-on: ubuntu-latest
+    if: >
+      (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == 'danny-avila/LibreChat') ||
+      (github.event_name == 'workflow_dispatch' && github.event.inputs.run_workflow == 'true')
     
     steps:
       - uses: actions/checkout@v4
       - uses: dequelabs/axe-linter-action@v1
         with:
           api_key: ${{ secrets.AXE_LINTER_API_KEY }}
-          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml
@@ -0,0 +1,41 @@
+name: Update Test Server
+
+on:
+  workflow_run:
+    workflows: ["Docker Dev Images Build"]
+    types:
+      - completed
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: |
+      github.repository == 'danny-avila/LibreChat' &&
+      (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success')
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Install SSH Key
+      uses: shimataro/ssh-key-action@v2
+      with:
+        key: ${{ secrets.DO_SSH_PRIVATE_KEY }}
+        known_hosts: ${{ secrets.DO_KNOWN_HOSTS }}
+
+    - name: Run update script on DigitalOcean Droplet
+      env:
+        DO_HOST: ${{ secrets.DO_HOST }}
+        DO_USER: ${{ secrets.DO_USER }}
+      run: |
+        ssh -o StrictHostKeyChecking=no ${DO_USER}@${DO_HOST} << EOF
+        sudo -i -u danny bash << EEOF
+        cd ~/LibreChat && \
+        git fetch origin main && \
+        npm run update:deployed && \
+        git checkout do-deploy && \
+        git rebase main && \
+        npm run start:deployed && \
+        echo "Update completed. Application should be running now."
+        EEOF
+        EOF
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "type": "node",
+      "request": "launch",
+      "name": "Launch LibreChat (debug)",
+      "skipFiles": ["<node_internals>/**"],
+      "program": "${workspaceFolder}/api/server/index.js",
+      "env": {
+        "NODE_ENV": "production"
+      },
+      "console": "integratedTerminal"
+    }
+  ]
+}
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-# v0.7.3
+# v0.7.4
 
 # Base node image
 FROM node:20-alpine AS node

diff --git a/Dockerfile.multi b/Dockerfile.multi
@@ -1,4 +1,4 @@
-# v0.7.3
+# v0.7.4
 
 # Build API, Client and Data Provider
 FROM node:20-alpine AS base

diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@ LibreChat brings together the future of assistant AIs with the revolutionary tec
 
 With LibreChat, you no longer need to opt for ChatGPT Plus and can instead use free or pay-per-call APIs. We welcome contributions, cloning, and forking to enhance the capabilities of this advanced chatbot platform.
 
-[![Watch the video](https://img.youtube.com/vi/bSVHEbVPNl4/maxresdefault.jpg)](https://www.youtube.com/watch?v=bSVHEbVPNl4)
+[![Watch the video](https://raw.githubusercontent.com/LibreChat-AI/librechat.ai/main/public/images/changelog/v0.7.4.png)](https://www.youtube.com/watch?v=cvosUxogdpI)
 Click on the thumbnail to open the video☝️
 
 ---

diff --git a/api/app/clients/AnthropicClient.js b/api/app/clients/AnthropicClient.js
@@ -12,12 +12,13 @@ const { encodeAndFormat } = require('~/server/services/Files/images/encode');
 const {
   truncateText,
   formatMessage,
+  addCacheControl,
   titleFunctionPrompt,
   parseParamFromPrompt,
   createContextHandlers,
 } = require('./prompts');
-const spendTokens = require('~/models/spendTokens');
-const { getModelMaxTokens } = require('~/utils');
+const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
+const { getModelMaxTokens, matchModelName } = require('~/utils');
 const { sleep } = require('~/server/utils');
 const BaseClient = require('./BaseClient');
 const { logger } = require('~/config');
@@ -32,6 +33,7 @@ function delayBeforeRetry(attempts, baseDelay = 1000) {
   return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts));
 }
 
+const tokenEventTypes = new Set(['message_start', 'message_delta']);
 const { legacy } = anthropicSettings;
 
 class AnthropicClient extends BaseClient {
@@ -44,6 +46,24 @@ class AnthropicClient extends BaseClient {
       ? options.contextStrategy.toLowerCase()
       : 'discard';
     this.setOptions(options);
+    /** @type {string | undefined} */
+    this.systemMessage;
+    /** @type {AnthropicMessageStartEvent| undefined} */
+    this.message_start;
+    /** @type {AnthropicMessageDeltaEvent| undefined} */
+    this.message_delta;
+    /** Whether the model is part of the Claude 3 Family
+     * @type {boolean} */
+    this.isClaude3;
+    /** Whether to use Messages API or Completions API
+     * @type {boolean} */
+    this.useMessages;
+    /** Whether or not the model is limited to the legacy amount of output tokens
+     * @type {boolean} */
+    this.isLegacyOutput;
+    /** Whether or not the model supports Prompt Caching
+     * @type {boolean} */
+    this.supportsCacheControl;
   }
 
   setOptions(options) {
@@ -69,8 +89,10 @@ class AnthropicClient extends BaseClient {
       model: modelOptions.model || anthropicSettings.model.default,
     };
 
-    this.isClaude3 = this.modelOptions.model.includes('claude-3');
-    this.isLegacyOutput = !this.modelOptions.model.includes('claude-3-5-sonnet');
+    const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic);
+    this.isClaude3 = modelMatch.startsWith('claude-3');
+    this.isLegacyOutput = !modelMatch.startsWith('claude-3-5-sonnet');
+    this.supportsCacheControl = this.checkPromptCacheSupport(modelMatch);
 
     if (
       this.isLegacyOutput &&
@@ -147,19 +169,74 @@ class AnthropicClient extends BaseClient {
       options.baseURL = this.options.reverseProxyUrl;
     }
 
-    if (requestOptions?.model && requestOptions.model.includes('claude-3-5-sonnet')) {
+    if (
+      this.supportsCacheControl &&
+      requestOptions?.model &&
+      requestOptions.model.includes('claude-3-5-sonnet')
+    ) {
       options.defaultHeaders = {
-        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
+        'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15,prompt-caching-2024-07-31',
+      };
+    } else if (this.supportsCacheControl) {
+      options.defaultHeaders = {
+        'anthropic-beta': 'prompt-caching-2024-07-31',
       };
     }
 
     return new Anthropic(options);
   }
 
-  getTokenCountForResponse(response) {
+  /**
+   * Get stream usage as returned by this client's API response.
+   * @returns {AnthropicStreamUsage} The stream usage object.
+   */
+  getStreamUsage() {
+    const inputUsage = this.message_start?.message?.usage ?? {};
+    const outputUsage = this.message_delta?.usage ?? {};
+    return Object.assign({}, inputUsage, outputUsage);
+  }
+
+  /**
+   * Calculates the correct token count for the current message based on the token count map and API usage.
+   * Edge case: If the calculation results in a negative value, it returns the original estimate.
+   * If revisiting a conversation with a chat history entirely composed of token estimates,
+   * the cumulative token count going forward should become more accurate as the conversation progresses.
+   * @param {Object} params - The parameters for the calculation.
+   * @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
+   * @param {string} params.currentMessageId - The ID of the current message to calculate.
+   * @param {AnthropicStreamUsage} params.usage - The usage object returned by the API.
+   * @returns {number} The correct token count for the current message.
+   */
+  calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
+    const originalEstimate = tokenCountMap[currentMessageId] || 0;
+
+    if (!usage || typeof usage.input_tokens !== 'number') {
+      return originalEstimate;
+    }
+
+    tokenCountMap[currentMessageId] = 0;
+    const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => {
+      const numCount = Number(count);
+      return sum + (isNaN(numCount) ? 0 : numCount);
+    }, 0);
+    const totalInputTokens =
+      (usage.input_tokens ?? 0) +
+      (usage.cache_creation_input_tokens ?? 0) +
+      (usage.cache_read_input_tokens ?? 0);
+
+    const currentMessageTokens = totalInputTokens - totalTokensFromMap;
+    return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate;
+  }
+
+  /**
+   * Get Token Count for LibreChat Message
+   * @param {TMessage} responseMessage
+   * @returns {number}
+   */
+  getTokenCountForResponse(responseMessage) {
     return this.getTokenCountForMessage({
       role: 'assistant',
-      content: response.text,
+      content: responseMessage.text,
     });
   }
 
@@ -212,7 +289,38 @@ class AnthropicClient extends BaseClient {
     return files;
   }
 
-  async recordTokenUsage({ promptTokens, completionTokens, model, context = 'message' }) {
+  /**
+   * @param {object} params
+   * @param {number} params.promptTokens
+   * @param {number} params.completionTokens
+   * @param {AnthropicStreamUsage} [params.usage]
+   * @param {string} [params.model]
+   * @param {string} [params.context='message']
+   * @returns {Promise<void>}
+   */
+  async recordTokenUsage({ promptTokens, completionTokens, usage, model, context = 'message' }) {
+    if (usage != null && usage?.input_tokens != null) {
+      const input = usage.input_tokens ?? 0;
+      const write = usage.cache_creation_input_tokens ?? 0;
+      const read = usage.cache_read_input_tokens ?? 0;
+
+      await spendStructuredTokens(
+        {
+          context,
+          user: this.user,
+          conversationId: this.conversationId,
+          model: model ?? this.modelOptions.model,
+          endpointTokenConfig: this.options.endpointTokenConfig,
+        },
+        {
+          promptTokens: { input, write, read },
+          completionTokens,
+        },
+      );
+
+      return;
+    }
+
     await spendTokens(
       {
         context,
@@ -560,6 +668,18 @@ class AnthropicClient extends BaseClient {
       : await client.completions.create(options);
   }
 
+  /**
+   * @param {string} modelName
+   * @returns {boolean}
+   */
+  checkPromptCacheSupport(modelName) {
+    const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
+    if (modelMatch === 'claude-3-5-sonnet' || modelMatch === 'claude-3-haiku') {
+      return true;
+    }
+    return false;
+  }
+
   async sendCompletion(payload, { onProgress, abortController }) {
     if (!abortController) {
       abortController = new AbortController();
@@ -606,10 +726,22 @@ class AnthropicClient extends BaseClient {
       requestOptions.max_tokens_to_sample = maxOutputTokens || 1500;
     }
 
-    if (this.systemMessage) {
+    if (this.systemMessage && this.supportsCacheControl === true) {
+      requestOptions.system = [
+        {
+          type: 'text',
+          text: this.systemMessage,
+          cache_control: { type: 'ephemeral' },
+        },
+      ];
+    } else if (this.systemMessage) {
       requestOptions.system = this.systemMessage;
     }
 
+    if (this.supportsCacheControl === true && this.useMessages) {
+      requestOptions.messages = addCacheControl(requestOptions.messages);
+    }
+
     logger.debug('[AnthropicClient]', { ...requestOptions });
 
     const handleChunk = (currentChunk) => {
@@ -639,6 +771,11 @@ class AnthropicClient extends BaseClient {
 
           for await (const completion of response) {
             // Handle each completion as before
+            const type = completion?.type ?? '';
+            if (tokenEventTypes.has(type)) {
+              logger.debug(`[AnthropicClient] ${type}`, completion);
+              this[type] = completion;
+            }
             if (completion?.delta?.text) {
               handleChunk(completion.delta.text);
             } else if (completion.completion) {
@@ -727,6 +864,8 @@ class AnthropicClient extends BaseClient {
    */
   async titleConvo({ text, responseText = '' }) {
     let title = 'New Chat';
+    this.message_delta = undefined;
+    this.message_start = undefined;
     const convo = `<initial_message>
   ${truncateText(text)}
   </initial_message>