diff --git a/Argcfile.sh b/Argcfile.sh index 31d34bd0..f55f2c63 100755 --- a/Argcfile.sh +++ b/Argcfile.sh @@ -96,6 +96,7 @@ OPENAI_COMPATIBLE_PLATFORMS=( \ ollama,llama3.1:latest,http://localhost:11434/v1 \ perplexity,llama-3.1-8b-instruct,https://api.perplexity.ai \ qianwen,qwen-turbo,https://dashscope.aliyuncs.com/compatible-mode/v1 \ + siliconflow,meta-llama/Meta-Llama-3.1-8B-Instruct,https://api.siliconflow.cn/v1 \ together,meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo,https://api.together.xyz/v1 \ zhipuai,glm-4-0520,https://open.bigmodel.cn/api/paas/v4 \ ) diff --git a/config.example.yaml b/config.example.yaml index 67f2f443..99db5661 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -109,7 +109,7 @@ clients: # See https://platform.openai.com/docs/quickstart - type: openai api_base: https://api.openai.com/v1 # Optional - api_key: sk-xxx + api_key: xxx organization_id: org-xxx # Optional # For any platform compatible with OpenAI's API @@ -149,7 +149,7 @@ clients: # See https://docs.anthropic.com/claude/reference/getting-started-with-the-api - type: claude api_base: https://api.anthropic.com/v1 # Optional - api_key: sk-ant-xxx + api_key: xxx # See https://docs.mistral.ai/ - type: openai-compatible @@ -172,13 +172,13 @@ clients: - type: openai-compatible name: perplexity api_base: https://api.perplexity.ai - api_key: pplx-xxx + api_key: xxx # See https://console.groq.com/docs/quickstart - type: openai-compatible name: groq api_base: https://api.groq.com/openai/v1 - api_key: gsk_xxx + api_key: xxx # See https://github.com/jmorganca/ollama - type: openai-compatible @@ -233,7 +233,7 @@ clients: - type: openai-compatible name: huggingface api_base: https://api-inference.huggingface.co/v1 - api_key: hf_xxx + api_key: xxx # See https://replicate.com/docs - type: replicate @@ -242,25 +242,25 @@ clients: # See https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html - type: ernie api_key: xxx - secret_key: xxxx + secret_key: xxx # See https://help.aliyun.com/zh/dashscope/ - type: openai-compatible name: qianwen api_base: https://dashscope.aliyuncs.com/compatible-mode/v1 - api_key: sk-xxx + api_key: xxx # See https://platform.moonshot.cn/docs/intro - type: openai-compatible name: moonshot api_base: https://api.moonshot.cn/v1 - api_key: sk-xxx + api_key: xxx # See https://platform.deepseek.com/api-docs/ - type: openai-compatible name: deepseek api_base: https://api.deepseek.com - api_key: sk-xxx + api_key: xxx # See https://open.bigmodel.cn/dev/howuse/introduction - type: openai-compatible @@ -304,6 +304,12 @@ clients: api_base: https://text.octoai.run/v1 api_key: xxx + # See https://docs.siliconflow.cn/docs/getting-started + - type: openai-compatible + name: siliconflow + api_base: https://api.siliconflow.cn/v1 + api_key: xxx + # See https://docs.together.ai/docs/quickstart - type: openai-compatible name: together diff --git a/models.yaml b/models.yaml index 5a79454d..7f3b954a 100644 --- a/models.yaml +++ b/models.yaml @@ -546,7 +546,7 @@ max_batch_size: 100 # Links: -# - https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&other=text-generation-inference&sort=trending +# - https://huggingface.co/models?other=text-generation-inference # - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference - platform: huggingface models: @@ -1266,6 +1266,67 @@ default_chunk_size: 1000 max_batch_size: 100 +# Links +# - https://siliconflow.cn/zh-cn/models +# - https://siliconflow.cn/zh-cn/maaspricing +# - https://docs.siliconflow.cn/reference/chat-completions-3 +- platform: siliconflow + models: + - name: Qwen/Qwen2-72B-Instruct + max_input_tokens: 32768 + input_price: 0 + output_price: 0 + - name: meta-llama/Meta-Llama-3.1-405B-Instruct + max_input_tokens: 32768 + input_price: 2.94 + output_price: 2.94 + - name: meta-llama/Meta-Llama-3.1-70B-Instruct + max_input_tokens: 32768 + input_price: 0.578 + output_price: 0.578 + - name: meta-llama/Meta-Llama-3.1-8B-Instruct + max_input_tokens: 32768 + input_price: 0 + output_price: 0 + - name: google/gemma-2-27b-it + max_input_tokens: 8192 + input_price: 0.176 + output_price: 0.176 + - name: google/gemma-2-9b-it + max_input_tokens: 8192 + input_price: 0 + output_price: 0 + - name: deepseek-ai/DeepSeek-V2-Chat + max_input_tokens: 32768 + input_price: 0.186 + output_price: 0.186 + - name: deepseek-ai/DeepSeek-Coder-V2-Instruct + max_input_tokens: 32768 + input_price: 0.186 + output_price: 0.186 + - name: BAAI/bge-large-en-v1.5 + type: embedding + input_price: 0 + max_tokens_per_chunk: 512 + default_chunk_size: 1000 + max_batch_size: 100 + - name: BAAI/bge-large-zh-v1.5 + type: embedding + input_price: 0 + max_tokens_per_chunk: 512 + default_chunk_size: 1000 + max_batch_size: 100 + - name: BAAI/bge-m3 + type: embedding + input_price: 0 + max_tokens_per_chunk: 8192 + default_chunk_size: 2000 + max_batch_size: 100 + - name: BAAI/bge-reranker-v2-m3 + type: reranker + max_input_tokens: 8192 + input_price: 0 + # Links: # - https://docs.together.ai/docs/inference-models # - https://docs.together.ai/docs/embedding-models diff --git a/src/client/mod.rs b/src/client/mod.rs index eb9d3e9d..500d5abb 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -37,7 +37,7 @@ register_client!( (ernie, "ernie", ErnieConfig, ErnieClient), ); -pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [ +pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 21] = [ ("ai21", "https://api.ai21.com/studio/v1"), ("cloudflare", ""), ("deepinfra", "https://api.deepinfra.com/v1/openai"), @@ -51,12 +51,13 @@ pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 20] = [ ("moonshot", "https://api.moonshot.cn/v1"), ("openrouter", "https://openrouter.ai/api/v1"), ("octoai", "https://text.octoai.run/v1"), - ("ollama", "http://localhost:11434/v1"), + ("ollama", ""), ("perplexity", "https://api.perplexity.ai"), ( "qianwen", "https://dashscope.aliyuncs.com/compatible-mode/v1", ), + ("siliconflow", "https://api.siliconflow.cn/v1"), ("together", "https://api.together.xyz/v1"), ("zhipuai", "https://open.bigmodel.cn/api/paas/v4"), // RAG-dedicated