🤖 feat: Custom Endpoint Agents (experimental) (#4627)

* wip: first pass, custom endpoint agents * chore: imports * chore: consolidate exports * fix: imports * feat: convert message.content array to strings for legacy format handling (deepseek/groq) * refactor: normalize ollama endpoint name * refactor: update mocking in isDomainAllowed.spec.js * refactor: update deepseekModels in tokens.js and tokens.spec.js
danny-avila · Nov 4, 2024 · 2e519f9 · 2e519f9
1 parent 9437e95
commit 2e519f9
Show file tree

Hide file tree

Showing 23 changed files with 230 additions and 73 deletions.
diff --git a/api/app/clients/prompts/formatMessages.js b/api/app/clients/prompts/formatMessages.js
@@ -217,9 +217,41 @@ const formatAgentMessages = (payload) => {
   return messages;
 };
 
+/**
+ * Formats an array of messages for LangChain, making sure all content fields are strings
+ * @param {Array<(HumanMessage|AIMessage|SystemMessage|ToolMessage)>} payload - The array of messages to format.
+ * @returns {Array<(HumanMessage|AIMessage|SystemMessage|ToolMessage)>} - The array of formatted LangChain messages, including ToolMessages for tool calls.
+ */
+const formatContentStrings = (payload) => {
+  const messages = [];
+
+  for (const message of payload) {
+    if (typeof message.content === 'string') {
+      continue;
+    }
+
+    if (!Array.isArray(message.content)) {
+      continue;
+    }
+
+    // Reduce text types to a single string, ignore all other types
+    const content = message.content.reduce((acc, curr) => {
+      if (curr.type === ContentTypes.TEXT) {
+        return `${acc}${curr[ContentTypes.TEXT]}\n`;
+      }
+      return acc;
+    }, '');
+
+    message.content = content.trim();
+  }
+
+  return messages;
+};
+
 module.exports = {
   formatMessage,
   formatFromLangChain,
   formatAgentMessages,
+  formatContentStrings,
   formatLangChainMessages,
 };
diff --git a/api/server/controllers/agents/client.js b/api/server/controllers/agents/client.js
@@ -13,6 +13,7 @@ const {
   VisionModes,
   openAISchema,
   EModelEndpoint,
+  KnownEndpoints,
   anthropicSchema,
   bedrockOutputParser,
   removeNullishValues,
@@ -25,6 +26,7 @@ const {
 const {
   formatMessage,
   formatAgentMessages,
+  formatContentStrings,
   createContextHandlers,
 } = require('~/app/clients/prompts');
 const { encodeAndFormat } = require('~/server/services/Files/images/encode');
@@ -44,6 +46,8 @@ const providerParsers = {
   [EModelEndpoint.bedrock]: bedrockOutputParser,
 };
 
+const legacyContentEndpoints = new Set([KnownEndpoints.groq, KnownEndpoints.deepseek]);
+
 class AgentClient extends BaseClient {
   constructor(options = {}) {
     super(null, options);
@@ -74,6 +78,7 @@ class AgentClient extends BaseClient {
     this.collectedUsage = collectedUsage;
     /** @type {ArtifactPromises} */
     this.artifactPromises = artifactPromises;
+    /** @type {AgentClientOptions} */
     this.options = Object.assign({ endpoint: options.endpoint }, clientOptions);
   }
 
@@ -478,6 +483,9 @@ class AgentClient extends BaseClient {
       this.run = run;
 
       const messages = formatAgentMessages(payload);
+      if (legacyContentEndpoints.has(this.options.agent.endpoint)) {
+        formatContentStrings(messages);
+      }
       await run.processStream({ messages }, config, {
         [Callback.TOOL_ERROR]: (graph, error, toolId) => {
           logger.error(

diff --git a/api/server/routes/files/multer.js b/api/server/routes/files/multer.js
@@ -3,7 +3,7 @@ const path = require('path');
 const crypto = require('crypto');
 const multer = require('multer');
 const { fileConfig: defaultFileConfig, mergeFileConfig } = require('librechat-data-provider');
-const getCustomConfig = require('~/server/services/Config/getCustomConfig');
+const { getCustomConfig } = require('~/server/services/Config');
 
 const storage = multer.diskStorage({
   destination: function (req, file, cb) {

diff --git a/api/server/services/Config/getCustomConfig.js b/api/server/services/Config/getCustomConfig.js
@@ -1,4 +1,4 @@
-const { CacheKeys } = require('librechat-data-provider');
+const { CacheKeys, EModelEndpoint } = require('librechat-data-provider');
 const loadCustomConfig = require('./loadCustomConfig');
 const getLogStores = require('~/cache/getLogStores');
 
@@ -22,4 +22,19 @@ async function getCustomConfig() {
   return customConfig;
 }
 
-module.exports = getCustomConfig;
+/**
+ *
+ * @param {string | EModelEndpoint} endpoint
+ */
+const getCustomEndpointConfig = async (endpoint) => {
+  const customConfig = await getCustomConfig();
+  if (!customConfig) {
+    throw new Error(`Config not found for the ${endpoint} custom endpoint.`);
+  }
+
+  const { endpoints = {} } = customConfig;
+  const customEndpoints = endpoints[EModelEndpoint.custom] ?? [];
+  return customEndpoints.find((endpointConfig) => endpointConfig.name === endpoint);
+};
+
+module.exports = { getCustomConfig, getCustomEndpointConfig };
diff --git a/api/server/services/Config/index.js b/api/server/services/Config/index.js
@@ -10,12 +10,12 @@ const loadDefaultEndpointsConfig = require('./loadDefaultEConfig');
 
 module.exports = {
   config,
-  getCustomConfig,
   loadCustomConfig,
   loadConfigModels,
   loadDefaultModels,
   loadOverrideConfig,
   loadAsyncEndpoints,
+  ...getCustomConfig,
   loadConfigEndpoints,
   loadDefaultEndpointsConfig,
 };
diff --git a/api/server/services/Config/loadConfigEndpoints.js b/api/server/services/Config/loadConfigEndpoints.js
@@ -1,6 +1,6 @@
 const { EModelEndpoint, extractEnvVariable } = require('librechat-data-provider');
+const { getCustomConfig } = require('./getCustomConfig');
 const { isUserProvided } = require('~/server/utils');
-const getCustomConfig = require('./getCustomConfig');
 
 /**
  * Load config endpoints from the cached configuration object

diff --git a/api/server/services/Config/loadConfigModels.js b/api/server/services/Config/loadConfigModels.js
@@ -1,7 +1,16 @@
+const { Providers } = require('@librechat/agents');
 const { EModelEndpoint, extractEnvVariable } = require('librechat-data-provider');
 const { fetchModels } = require('~/server/services/ModelService');
+const { getCustomConfig } = require('./getCustomConfig');
 const { isUserProvided } = require('~/server/utils');
-const getCustomConfig = require('./getCustomConfig');
+
+/**
+ * @param {string} name
+ * @returns {string}
+ */
+function normalizeEndpointName(name = '') {
+  return name.toLowerCase() === Providers.OLLAMA ? Providers.OLLAMA : name;
+}
 
 /**
  * Load config endpoints from the cached configuration object
@@ -61,7 +70,8 @@ async function loadConfigModels(req) {
 
   for (let i = 0; i < customEndpoints.length; i++) {
     const endpoint = customEndpoints[i];
-    const { models, name, baseURL, apiKey } = endpoint;
+    const { models, name: configName, baseURL, apiKey } = endpoint;
+    const name = normalizeEndpointName(configName);
     endpointsMap[name] = endpoint;
 
     const API_KEY = extractEnvVariable(apiKey);

diff --git a/api/server/services/Config/loadConfigModels.spec.js b/api/server/services/Config/loadConfigModels.spec.js
@@ -1,6 +1,6 @@
 const { fetchModels } = require('~/server/services/ModelService');
+const { getCustomConfig } = require('./getCustomConfig');
 const loadConfigModels = require('./loadConfigModels');
-const getCustomConfig = require('./getCustomConfig');
 
 jest.mock('~/server/services/ModelService');
 jest.mock('./getCustomConfig');
@@ -253,21 +253,21 @@ describe('loadConfigModels', () => {
       }),
     );
 
-    // For groq and Ollama, since the apiKey is "user_provided", models should not be fetched
+    // For groq and ollama, since the apiKey is "user_provided", models should not be fetched
     // Depending on your implementation's behavior regarding "default" models without fetching,
     // you may need to adjust the following assertions:
     expect(result.groq).toBe(exampleConfig.endpoints.custom[2].models.default);
-    expect(result.Ollama).toBe(exampleConfig.endpoints.custom[3].models.default);
+    expect(result.ollama).toBe(exampleConfig.endpoints.custom[3].models.default);
 
-    // Verifying fetchModels was not called for groq and Ollama
+    // Verifying fetchModels was not called for groq and ollama
     expect(fetchModels).not.toHaveBeenCalledWith(
       expect.objectContaining({
         name: 'groq',
       }),
     );
     expect(fetchModels).not.toHaveBeenCalledWith(
       expect.objectContaining({
-        name: 'Ollama',
+        name: 'ollama',
       }),
     );
   });
@@ -335,4 +335,68 @@ describe('loadConfigModels', () => {
 
     expect(result.FalsyFetchModel).toEqual(['defaultModel1', 'defaultModel2']);
   });
+
+  it('normalizes Ollama endpoint name to lowercase', async () => {
+    const testCases = [
+      {
+        name: 'Ollama',
+        apiKey: 'user_provided',
+        baseURL: 'http://localhost:11434/v1/',
+        models: {
+          default: ['mistral', 'llama2'],
+          fetch: false,
+        },
+      },
+      {
+        name: 'OLLAMA',
+        apiKey: 'user_provided',
+        baseURL: 'http://localhost:11434/v1/',
+        models: {
+          default: ['mixtral', 'codellama'],
+          fetch: false,
+        },
+      },
+      {
+        name: 'OLLaMA',
+        apiKey: 'user_provided',
+        baseURL: 'http://localhost:11434/v1/',
+        models: {
+          default: ['phi', 'neural-chat'],
+          fetch: false,
+        },
+      },
+    ];
+
+    getCustomConfig.mockResolvedValue({
+      endpoints: {
+        custom: testCases,
+      },
+    });
+
+    const result = await loadConfigModels(mockRequest);
+
+    // All variations of "Ollama" should be normalized to lowercase "ollama"
+    // and the last config in the array should override previous ones
+    expect(result.Ollama).toBeUndefined();
+    expect(result.OLLAMA).toBeUndefined();
+    expect(result.OLLaMA).toBeUndefined();
+    expect(result.ollama).toEqual(['phi', 'neural-chat']);
+
+    // Verify fetchModels was not called since these are user_provided
+    expect(fetchModels).not.toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'Ollama',
+      }),
+    );
+    expect(fetchModels).not.toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'OLLAMA',
+      }),
+    );
+    expect(fetchModels).not.toHaveBeenCalledWith(
+      expect.objectContaining({
+        name: 'OLLaMA',
+      }),
+    );
+  });
 });
diff --git a/api/server/services/Endpoints/agents/initialize.js b/api/server/services/Endpoints/agents/initialize.js
@@ -1,16 +1,3 @@
-// const {
-//   ErrorTypes,
-//   EModelEndpoint,
-//   resolveHeaders,
-//   mapModelToAzureConfig,
-// } = require('librechat-data-provider');
-// const { getUserKeyValues, checkUserKeyExpiry } = require('~/server/services/UserService');
-// const { isEnabled, isUserProvided } = require('~/server/utils');
-// const { getAzureCredentials } = require('~/utils');
-// const { OpenAIClient } = require('~/app');
-
-const { z } = require('zod');
-const { tool } = require('@langchain/core/tools');
 const { createContentAggregator, Providers } = require('@librechat/agents');
 const {
   EModelEndpoint,
@@ -25,30 +12,11 @@ const initAnthropic = require('~/server/services/Endpoints/anthropic/initialize'
 const getBedrockOptions = require('~/server/services/Endpoints/bedrock/options');
 const initOpenAI = require('~/server/services/Endpoints/openAI/initialize');
 const initCustom = require('~/server/services/Endpoints/custom/initialize');
+const { getCustomEndpointConfig } = require('~/server/services/Config');
 const { loadAgentTools } = require('~/server/services/ToolService');
 const AgentClient = require('~/server/controllers/agents/client');
 const { getModelMaxTokens } = require('~/utils');
 
-/* For testing errors */
-const _getWeather = tool(
-  async ({ location }) => {
-    if (location === 'SAN FRANCISCO') {
-      return 'It\'s 60 degrees and foggy';
-    } else if (location.toLowerCase() === 'san francisco') {
-      throw new Error('Input queries must be all capitals');
-    } else {
-      throw new Error('Invalid input.');
-    }
-  },
-  {
-    name: 'get_weather',
-    description: 'Call to get the current weather',
-    schema: z.object({
-      location: z.string(),
-    }),
-  },
-);
-
 const providerConfigMap = {
   [EModelEndpoint.openAI]: initOpenAI,
   [EModelEndpoint.azureOpenAI]: initOpenAI,
@@ -85,18 +53,25 @@ const initializeClient = async ({ req, res, endpointOption }) => {
   if (!agent) {
     throw new Error('Agent not found');
   }
+
   const { tools, toolMap } = await loadAgentTools({
     req,
     tools: agent.tools,
     agent_id: agent.id,
     tool_resources: agent.tool_resources,
-    // openAIApiKey: process.env.OPENAI_API_KEY,
   });
 
+  const provider = agent.provider;
   let modelOptions = { model: agent.model };
-  let getOptions = providerConfigMap[agent.provider];
+  let getOptions = providerConfigMap[provider];
   if (!getOptions) {
-    throw new Error(`Provider ${agent.provider} not supported`);
+    const customEndpointConfig = await getCustomEndpointConfig(provider);
+    if (!customEndpointConfig) {
+      throw new Error(`Provider ${provider} not supported`);
+    }
+    getOptions = initCustom;
+    agent.provider = Providers.OPENAI;
+    agent.endpoint = provider.toLowerCase();
   }
 
   // TODO: pass-in override settings that are specific to current run
@@ -106,10 +81,14 @@ const initializeClient = async ({ req, res, endpointOption }) => {
     res,
     endpointOption,
     optionsOnly: true,
-    overrideEndpoint: agent.provider,
+    overrideEndpoint: provider,
     overrideModel: agent.model,
   });
+
   modelOptions = Object.assign(modelOptions, options.llmConfig);
+  if (options.configOptions) {
+    modelOptions.configuration = options.configOptions;
+  }
 
   const sender = getResponseSender({
     ...endpointOption,
@@ -128,11 +107,11 @@ const initializeClient = async ({ req, res, endpointOption }) => {
     collectedUsage,
     artifactPromises,
     endpoint: EModelEndpoint.agents,
-    configOptions: options.configOptions,
     attachments: endpointOption.attachments,
     maxContextTokens:
       agent.max_context_tokens ??
-      getModelMaxTokens(modelOptions.model, providerEndpointMap[agent.provider]),
+      getModelMaxTokens(modelOptions.model, providerEndpointMap[provider]) ??
+      4000,
   });
   return { client };
 };