Skip to content

Commit

Permalink
添加接入 火山引擎在线大模型 内容的支持 (#2165)
Browse files Browse the repository at this point in the history
* use oai adaptive bridge function to handle vol engine

* add vol engine deepseek v3

---------

Co-authored-by: binary-husky <[email protected]>
  • Loading branch information
littleolaf and binary-husky authored Mar 4, 2025
1 parent 4a79aa6 commit 72dbe85
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 48 deletions.
7 changes: 6 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
"gemini-1.5-pro", "chatglm3", "chatglm4",
"deepseek-chat", "deepseek-coder", "deepseek-reasoner"
"deepseek-chat", "deepseek-coder", "deepseek-reasoner",
"volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
]

EMBEDDING_MODEL = "text-embedding-3-small"
Expand Down Expand Up @@ -267,6 +268,10 @@
YIMODEL_API_KEY = ""


# 接入火山引擎的在线大模型),api-key获取地址 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint
ARK_API_KEY = "00000000-0000-0000-0000-000000000000" # 火山引擎 API KEY


# 紫东太初大模型 https://ai-maas.wair.ac.cn
TAICHU_API_KEY = ""

Expand Down
62 changes: 60 additions & 2 deletions request_llms/bridge_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def decode(self, *args, **kwargs):
yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"

if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
Expand All @@ -102,6 +103,7 @@ def decode(self, *args, **kwargs):
if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
if grok_model_endpoint in API_URL_REDIRECT: grok_model_endpoint = API_URL_REDIRECT[grok_model_endpoint]
if volcengine_endpoint in API_URL_REDIRECT: volcengine_endpoint = API_URL_REDIRECT[volcengine_endpoint]

# 获取tokenizer
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
Expand Down Expand Up @@ -954,7 +956,7 @@ def decode(self, *args, **kwargs):
try:
grok_beta_128k_noui, grok_beta_128k_ui = get_predict_function(
api_key_conf_name="GROK_API_KEY", max_output_token=8192, disable_proxy=False
)
)

model_info.update({
"grok-beta": {
Expand Down Expand Up @@ -1089,8 +1091,10 @@ def decode(self, *args, **kwargs):
})
except:
logger.error(trimmed_format_exc())

# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
claude_models = ["deepseek-chat", "deepseek-coder", "deepseek-reasoner"]
if any(item in claude_models for item in AVAIL_LLM_MODELS):
try:
deepseekapi_noui, deepseekapi_ui = get_predict_function(
api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
Expand Down Expand Up @@ -1127,6 +1131,60 @@ def decode(self, *args, **kwargs):
})
except:
logger.error(trimmed_format_exc())

# -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("volcengine-")]:
# 为了更灵活地接入volcengine多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
# 其中
# "volcengine-" 是前缀(必要)
# "deepseek-r1-250120" 是模型名(必要)
# "(max_token=6666)" 是配置(非必要)
model_info_extend = model_info
model_info_extend.update({
"deepseek-r1-250120": {
"max_token": 16384,
"enable_reasoning": True,
"can_multi_thread": True,
"endpoint": volcengine_endpoint,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"deepseek-v3-241226": {
"max_token": 16384,
"enable_reasoning": False,
"can_multi_thread": True,
"endpoint": volcengine_endpoint,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
try:
origin_model_name, max_token_tmp = read_one_api_model_name(model)
# 如果是已知模型,则尝试获取其信息
original_model_info = model_info_extend.get(origin_model_name.replace("volcengine-", "", 1), None)
except:
logger.error(f"volcengine模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue

volcengine_noui, volcengine_ui = get_predict_function(api_key_conf_name="ARK_API_KEY", max_output_token=8192, disable_proxy=True, model_remove_prefix = ["volcengine-"])

this_model_info = {
"fn_with_ui": volcengine_ui,
"fn_without_ui": volcengine_noui,
"endpoint": volcengine_endpoint,
"can_multi_thread": True,
"max_token": 64000,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}

# 同步已知模型的其他信息
attribute = "has_multimodal_capacity"
if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
attribute = "enable_reasoning"
if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
model_info.update({model: this_model_info})

# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
Expand Down
21 changes: 17 additions & 4 deletions request_llms/oai_std_model_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def decode_chunk(chunk):
finish_reason = chunk["error"]["code"]
except:
finish_reason = "API_ERROR"
return response, reasoning_content, finish_reason
return response, reasoning_content, finish_reason, str(chunk)

try:
if chunk["choices"][0]["delta"]["content"] is not None:
Expand Down Expand Up @@ -122,7 +122,8 @@ def generate_message(input, model, key, history, max_output_token, system_prompt
def get_predict_function(
api_key_conf_name,
max_output_token,
disable_proxy = False
disable_proxy = False,
model_remove_prefix = [],
):
"""
为openai格式的API生成响应函数,其中传入参数:
Expand All @@ -137,6 +138,16 @@ def get_predict_function(

APIKEY = get_conf(api_key_conf_name)

def remove_prefix(model_name):
# 去除模型名字的前缀,输入 volcengine-deepseek-r1-250120 会返回 deepseek-r1-250120
if not model_remove_prefix:
return model_name
model_without_prefix = model_name
for prefix in model_remove_prefix:
if model_without_prefix.startswith(prefix):
model_without_prefix = model_without_prefix[len(prefix):]
return model_without_prefix

def predict_no_ui_long_connection(
inputs,
llm_kwargs,
Expand Down Expand Up @@ -164,9 +175,11 @@ def predict_no_ui_long_connection(
raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
if inputs == "":
inputs = "你好👋"


headers, payload = generate_message(
input=inputs,
model=llm_kwargs["llm_model"],
model=remove_prefix(llm_kwargs["llm_model"]),
key=APIKEY,
history=history,
max_output_token=max_output_token,
Expand Down Expand Up @@ -302,7 +315,7 @@ def predict(

headers, payload = generate_message(
input=inputs,
model=llm_kwargs["llm_model"],
model=remove_prefix(llm_kwargs["llm_model"]),
key=APIKEY,
history=history,
max_output_token=max_output_token,
Expand Down
101 changes: 60 additions & 41 deletions tests/test_llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,46 +11,65 @@ def validate_path():


validate_path() # validate path so you can run from base directory
if __name__ == "__main__":
# from request_llms.bridge_taichu import predict_no_ui_long_connection
from request_llms.bridge_volcengine import predict_no_ui_long_connection
# from request_llms.bridge_cohere import predict_no_ui_long_connection
# from request_llms.bridge_spark import predict_no_ui_long_connection
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
# from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
llm_kwargs = {
"llm_model": "volcengine",
"max_length": 4096,
"top_p": 1,
"temperature": 1,
}

if "在线模型":
if __name__ == "__main__":
from request_llms.bridge_taichu import predict_no_ui_long_connection
# from request_llms.bridge_cohere import predict_no_ui_long_connection
# from request_llms.bridge_spark import predict_no_ui_long_connection
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
# from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
llm_kwargs = {
"llm_model": "taichu",
"max_length": 4096,
"top_p": 1,
"temperature": 1,
}

result = predict_no_ui_long_connection(
inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="系统"
)
print("final result:", result)
print("final result:", result)


if "本地模型":
if __name__ == "__main__":
# from request_llms.bridge_newbingfree import predict_no_ui_long_connection
# from request_llms.bridge_moss import predict_no_ui_long_connection
# from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
# from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
# from request_llms.bridge_claude import predict_no_ui_long_connection
# from request_llms.bridge_internlm import predict_no_ui_long_connection
# from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
# from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
# from request_llms.bridge_qwen_local import predict_no_ui_long_connection
llm_kwargs = {
"max_length": 4096,
"top_p": 1,
"temperature": 1,
}
result = predict_no_ui_long_connection(
inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt=""
)
print("final result:", result)
result = predict_no_ui_long_connection(
inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="系统"
)
print("final result:", result)
print("final result:", result)
# if "在线模型":
# if __name__ == "__main__":
# # from request_llms.bridge_taichu import predict_no_ui_long_connection
# from request_llms.bridge_volcengine import predict_no_ui_long_connection
# # from request_llms.bridge_cohere import predict_no_ui_long_connection
# # from request_llms.bridge_spark import predict_no_ui_long_connection
# # from request_llms.bridge_zhipu import predict_no_ui_long_connection
# # from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
# llm_kwargs = {
# "llm_model": "ep-20250222011816-5cq8z",
# "max_length": 4096,
# "top_p": 1,
# "temperature": 1,
# }

# result = predict_no_ui_long_connection(
# inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt="系统"
# )
# print("final result:", result)
# print("final result:", result)


# if "本地模型":
# if __name__ == "__main__":
# # from request_llms.bridge_newbingfree import predict_no_ui_long_connection
# # from request_llms.bridge_moss import predict_no_ui_long_connection
# # from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
# # from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
# # from request_llms.bridge_claude import predict_no_ui_long_connection
# # from request_llms.bridge_internlm import predict_no_ui_long_connection
# # from request_llms.bridge_deepseekcoder import predict_no_ui_long_connection
# # from request_llms.bridge_qwen_7B import predict_no_ui_long_connection
# # from request_llms.bridge_qwen_local import predict_no_ui_long_connection
# llm_kwargs = {
# "max_length": 4096,
# "top_p": 1,
# "temperature": 1,
# }
# result = predict_no_ui_long_connection(
# inputs="请问什么是质子?", llm_kwargs=llm_kwargs, history=["你好", "我好!"], sys_prompt=""
# )
# print("final result:", result)

0 comments on commit 72dbe85

Please sign in to comment.