Skip to content

Commit

Permalink
Merge pull request #421 from devchat-ai/add_max_tokens_for_llm_api
Browse files Browse the repository at this point in the history
Add max_tokens configuration for LLM API
  • Loading branch information
kagami-l authored Nov 12, 2024
2 parents ed1a2ac + 8b6dd86 commit 1fb5b9a
Showing 1 changed file with 35 additions and 1 deletion.
36 changes: 35 additions & 1 deletion devchat/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
import json
import os
import re
from typing import Dict, List
from pathlib import Path
from typing import Any, Dict, List

import httpx
import openai
import oyaml as yaml

from devchat.ide import IDEService
from devchat.workflow.path import CHAT_CONFIG_FILENAME, CHAT_DIR

from .pipeline import (
RetryException, # Import RetryException class
Expand Down Expand Up @@ -42,6 +45,35 @@ def _try_remove_markdown_block_flag(content):
return content


# 模块级变量用于缓存配置
_chat_config: Dict[str, Any] = {}


def _load_chat_config() -> None:
"""加载聊天配置到全局变量"""
global _chat_config
chat_config_path = Path(CHAT_DIR) / CHAT_CONFIG_FILENAME
with open(chat_config_path, "r", encoding="utf-8") as file:
_chat_config = yaml.safe_load(file)


def get_maxtokens_by_model(model: str) -> int:
# 如果配置还没有加载,则加载配置
if not _chat_config:
_load_chat_config()

# 默认值设置为1024
default_max_tokens = 1024

# 检查模型是否在配置中
if model in _chat_config.get("models", {}):
# 如果模型存在,尝试获取max_tokens,如果不存在则返回默认值
return _chat_config["models"][model].get("max_tokens", default_max_tokens)
else:
# 如果模型不在配置中,返回默认值
return default_max_tokens


def chat_completion_stream_commit(
messages: List[Dict], # [{"role": "user", "content": "hello"}]
llm_config: Dict, # {"model": "...", ...}
Expand All @@ -62,6 +94,7 @@ def chat_completion_stream_commit(
# Update llm_config dictionary
llm_config["stream"] = True
llm_config["timeout"] = 60
llm_config["max_tokens"] = get_maxtokens_by_model(llm_config["model"])
# Return chat completions
return client.chat.completions.create(messages=messages, **llm_config)

Expand All @@ -83,6 +116,7 @@ def chat_completion_stream_raw(**kwargs):
# Update kwargs dictionary
kwargs["stream"] = True
kwargs["timeout"] = 60
kwargs["max_tokens"] = get_maxtokens_by_model(kwargs["model"])
# Return chat completions
return client.chat.completions.create(**kwargs)

Expand Down

0 comments on commit 1fb5b9a

Please sign in to comment.