Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add siliconflow client #831

Merged
merged 1 commit into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Argcfile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ OPENAI_COMPATIBLE_PLATFORMS=( \
ollama,llama3.1:latest,http://localhost:11434/v1 \
perplexity,llama-3.1-8b-instruct,https://api.perplexity.ai \
qianwen,qwen-turbo,https://dashscope.aliyuncs.com/compatible-mode/v1 \
siliconflow,meta-llama/Meta-Llama-3.1-8B-Instruct,https://api.siliconflow.cn/v1 \
together,meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo,https://api.together.xyz/v1 \
zhipuai,glm-4-0520,https://open.bigmodel.cn/api/paas/v4 \
)
Expand Down
24 changes: 15 additions & 9 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ clients:
# See https://platform.openai.com/docs/quickstart
- type: openai
api_base: https://api.openai.com/v1 # Optional
api_key: sk-xxx
api_key: xxx
organization_id: org-xxx # Optional

# For any platform compatible with OpenAI's API
Expand Down Expand Up @@ -149,7 +149,7 @@ clients:
# See https://docs.anthropic.com/claude/reference/getting-started-with-the-api
- type: claude
api_base: https://api.anthropic.com/v1 # Optional
api_key: sk-ant-xxx
api_key: xxx

# See https://docs.mistral.ai/
- type: openai-compatible
Expand All @@ -172,13 +172,13 @@ clients:
- type: openai-compatible
name: perplexity
api_base: https://api.perplexity.ai
api_key: pplx-xxx
api_key: xxx

# See https://console.groq.com/docs/quickstart
- type: openai-compatible
name: groq
api_base: https://api.groq.com/openai/v1
api_key: gsk_xxx
api_key: xxx

# See https://github.com/jmorganca/ollama
- type: openai-compatible
Expand Down Expand Up @@ -233,7 +233,7 @@ clients:
- type: openai-compatible
name: huggingface
api_base: https://api-inference.huggingface.co/v1
api_key: hf_xxx
api_key: xxx

# See https://replicate.com/docs
- type: replicate
Expand All @@ -242,25 +242,25 @@ clients:
# See https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html
- type: ernie
api_key: xxx
secret_key: xxxx
secret_key: xxx

# See https://help.aliyun.com/zh/dashscope/
- type: openai-compatible
name: qianwen
api_base: https://dashscope.aliyuncs.com/compatible-mode/v1
api_key: sk-xxx
api_key: xxx

# See https://platform.moonshot.cn/docs/intro
- type: openai-compatible
name: moonshot
api_base: https://api.moonshot.cn/v1
api_key: sk-xxx
api_key: xxx

# See https://platform.deepseek.com/api-docs/
- type: openai-compatible
name: deepseek
api_base: https://api.deepseek.com
api_key: sk-xxx
api_key: xxx

# See https://open.bigmodel.cn/dev/howuse/introduction
- type: openai-compatible
Expand Down Expand Up @@ -304,6 +304,12 @@ clients:
api_base: https://text.octoai.run/v1
api_key: xxx

# See https://docs.siliconflow.cn/docs/getting-started
- type: openai-compatible
name: siliconflow
api_base: https://api.siliconflow.cn/v1
api_key: xxx

# See https://docs.together.ai/docs/quickstart
- type: openai-compatible
name: together
Expand Down
63 changes: 62 additions & 1 deletion models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@
max_batch_size: 100

# Links:
# - https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&other=text-generation-inference&sort=trending
# - https://huggingface.co/models?other=text-generation-inference
# - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference
- platform: huggingface
models:
Expand Down Expand Up @@ -1266,6 +1266,67 @@
default_chunk_size: 1000
max_batch_size: 100

# Links
# - https://siliconflow.cn/zh-cn/models
# - https://siliconflow.cn/zh-cn/maaspricing
# - https://docs.siliconflow.cn/reference/chat-completions-3
- platform: siliconflow
models:
- name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
- name: meta-llama/Meta-Llama-3.1-405B-Instruct
max_input_tokens: 32768
input_price: 2.94
output_price: 2.94
- name: meta-llama/Meta-Llama-3.1-70B-Instruct
max_input_tokens: 32768
input_price: 0.578
output_price: 0.578
- name: meta-llama/Meta-Llama-3.1-8B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
- name: google/gemma-2-27b-it
max_input_tokens: 8192
input_price: 0.176
output_price: 0.176
- name: google/gemma-2-9b-it
max_input_tokens: 8192
input_price: 0
output_price: 0
- name: deepseek-ai/DeepSeek-V2-Chat
max_input_tokens: 32768
input_price: 0.186
output_price: 0.186
- name: deepseek-ai/DeepSeek-Coder-V2-Instruct
max_input_tokens: 32768
input_price: 0.186
output_price: 0.186
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-large-zh-v1.5
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: BAAI/bge-reranker-v2-m3
type: reranker
max_input_tokens: 8192
input_price: 0

# Links:
# - https://docs.together.ai/docs/inference-models
# - https://docs.together.ai/docs/embedding-models
Expand Down
5 changes: 3 additions & 2 deletions src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ register_client!(
(ernie, "ernie", ErnieConfig, ErnieClient),
);

pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [
pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 21] = [
("ai21", "https://api.ai21.com/studio/v1"),
("cloudflare", ""),
("deepinfra", "https://api.deepinfra.com/v1/openai"),
Expand All @@ -51,12 +51,13 @@ pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [
("moonshot", "https://api.moonshot.cn/v1"),
("openrouter", "https://openrouter.ai/api/v1"),
("octoai", "https://text.octoai.run/v1"),
("ollama", "http://localhost:11434/v1"),
("ollama", ""),
("perplexity", "https://api.perplexity.ai"),
(
"qianwen",
"https://dashscope.aliyuncs.com/compatible-mode/v1",
),
("siliconflow", "https://api.siliconflow.cn/v1"),
("together", "https://api.together.xyz/v1"),
("zhipuai", "https://open.bigmodel.cn/api/paas/v4"),
// RAG-dedicated
Expand Down