From 777fc02e0f4e534705c3a0decffd3eebc26ff597 Mon Sep 17 00:00:00 2001 From: sigoden Date: Wed, 13 Nov 2024 07:58:18 +0800 Subject: [PATCH] chore: update models.yaml --- models.yaml | 117 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 37 deletions(-) diff --git a/models.yaml b/models.yaml index 7c2b2526..9eefc2ca 100644 --- a/models.yaml +++ b/models.yaml @@ -3,7 +3,7 @@ # Links: # - https://platform.openai.com/docs/models -# - https://openai.com/pricing +# - https://openai.com/api/pricing/ # - https://platform.openai.com/docs/api-reference/chat - platform: openai models: @@ -375,18 +375,16 @@ - name: llama3.2 max_input_tokens: 128000 supports_function_calling: true + - name: llama3.2-vision + max_input_tokens: 128000 + supports_vision: true - name: gemma2 max_input_tokens: 8192 - name: qwen2.5 max_input_tokens: 128000 supports_function_calling: true - - name: phi3.5 - max_input_tokens: 128000 - - name: nemotron-mini - max_input_tokens: 128000 - supports_function_calling: true - - name: mistral-small - max_input_tokens: 128000 + - name: qwen2.5-coder + max_input_tokens: 32768 supports_function_calling: true - name: deepseek-coder-v2 max_input_tokens: 32768 @@ -732,9 +730,8 @@ input_price: 0.07 # Links: -# - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction -# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing -# - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api +# - https://help.aliyun.com/zh/model-studio/getting-started/models +# - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api - platform: qianwen models: - name: qwen-max-latest @@ -755,6 +752,12 @@ input_price: 0.042 output_price: 0.084 supports_function_calling: true + - name: qwen-coder-plus-latest + max_input_tokens: 129024 + max_output_tokens: 8192 + input_price: 0.49 + output_price: 0.98 + supports_function_calling: true - name: qwen-coder-turbo-latest max_input_tokens: 129024 max_output_tokens: 8192 @@ -765,14 +768,26 @@ max_input_tokens: 1000000 input_price: 0.07 output_price: 0.28 - - name: qwen-vl-max + - name: qwen-vl-max-latest input_price: 2.8 output_price: 2.8 supports_vision: true - - name: qwen-vl-plus + - name: qwen-vl-plus-latest input_price: 1.12 output_price: 1.12 supports_vision: true + - name: qwen2.5-72b-instruct + max_input_tokens: 129024 + max_output_tokens: 8192 + input_price: 0.56 + output_price: 1.68 + supports_function_calling: true + - name: qwen2.5-coder-32b-instruct + max_input_tokens: 129024 + max_output_tokens: 8192 + input_price: 0.49 + output_price: 0.98 + supports_function_calling: true - name: text-embedding-v3 type: embedding input_price: 0.1 @@ -808,6 +823,10 @@ max_input_tokens: 28000 max_output_tokens: 4096 supports_function_calling: true + - name: hunyuan-large-longcontext + max_input_tokens: 128000 + max_output_tokens: 6144 + supports_function_calling: true - name: hunyuan-standard max_input_tokens: 30000 max_output_tokens: 2048 @@ -874,7 +893,7 @@ - platform: deepseek models: - name: deepseek-chat - max_input_tokens: 32768 + max_input_tokens: 65536 max_output_tokens: 4096 input_price: 0.14 output_price: 0.28 @@ -928,6 +947,10 @@ # - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion - platform: lingyiwanwu models: + - name: yi-lightning + max_input_tokens: 16384 + input_price: 0.14 + output_price: 0.14 - name: yi-large max_input_tokens: 32768 input_price: 2.8 @@ -941,18 +964,10 @@ max_input_tokens: 16384 input_price: 3.5 output_price: 3.5 - - name: yi-large-turbo - max_input_tokens: 16384 - input_price: 1.68 - output_price: 1.68 - name: yi-medium-200k max_input_tokens: 200000 input_price: 1.68 output_price: 1.68 - - name: yi-lightning - max_input_tokens: 16384 - input_price: 0.14 - output_price: 0.14 - name: yi-vision max_input_tokens: 16384 input_price: 0.84 @@ -1091,6 +1106,10 @@ input_price: 0.35 output_price: 0.40 supports_function_calling: true + - name: Qwen/Qwen2.5-Coder-32B-Instruct + max_input_tokens: 32768 + input_price: 0.18 + output_price: 0.18 - name: nvidia/Llama-3.1-Nemotron-70B-Instruct max_input_tokens: 128000 input_price: 0.35 @@ -1170,6 +1189,10 @@ max_input_tokens: 32768 input_price: 0.9 output_price: 0.9 + - name: accounts/fireworks/models/qwen2p5-coder-32b-instruct + max_input_tokens: 32768 + input_price: 0.9 + output_price: 0.9 - name: accounts/fireworks/models/phi-3-vision-128k-instruct max_input_tokens: 131072 input_price: 0.2 @@ -1205,7 +1228,7 @@ max_batch_size: 100 # Links: -# - https://openrouter.ai/docs#models +# - https://openrouter.ai/models - platform: openrouter models: - name: openai/gpt-4o @@ -1489,6 +1512,11 @@ max_input_tokens: 131072 input_price: 0.35 output_price: 0.4 + supports_function_calling: true + - name: qwen/qwen-2.5-coder-32b-instruct + max_input_tokens: 32768 + input_price: 0.18 + output_price: 0.18 - name: qwen/qwen-2-vl-72b-instruct max_input_tokens: 32768 input_price: 0.4 @@ -1525,11 +1553,20 @@ input_price: 0.578 output_price: 0.578 supports_function_calling: true + - name: Qwen/Qwen2.5-72B-Instruct-128K + max_input_tokens: 128000 + input_price: 0.578 + output_price: 0.578 + supports_function_calling: true - name: Qwen/Qwen2.5-7B-Instruct max_input_tokens: 32768 input_price: 0 output_price: 0 supports_function_calling: true + - name: Qwen/Qwen2.5-Coder-32B-Instruct + max_input_tokens: 32768 + input_price: 0.176 + output_price: 0.176 - name: Qwen/Qwen2.5-Coder-7B-Instruct max_input_tokens: 32768 input_price: 0 @@ -1551,6 +1588,10 @@ max_input_tokens: 32768 input_price: 0.578 output_price: 0.578 + - name: Tencent/Hunyuan-A52B-Instruct + max_input_tokens: 32768 + input_price: 2.94 + output_price: 2.94 - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0 @@ -1609,6 +1650,22 @@ max_input_tokens: 131072 input_price: 0.06 output_price: 0.06 + - name: google/gemma-2-27b-it + max_input_tokens: 8192 + input_price: 0.8 + output_price: 0.8 + - name: google/gemma-2-9b-it + max_input_tokens: 8192 + input_price: 0.3 + output_price: 0.3 + - name: Qwen/Qwen2.5-72B-Instruct-Turbo + max_input_tokens: 32768 + input_price: 1.2 + output_price: 1.2 + - name: Qwen/Qwen2.5-7B-Instruct-Turbo + max_input_tokens: 32768 + input_price: 0.3 + output_price: 0.3 - name: WhereIsAI/UAE-Large-V1 type: embedding input_price: 0.016 @@ -1678,20 +1735,6 @@ max_tokens_per_chunk: 32000 default_chunk_size: 1000 max_batch_size: 128 - - name: voyage-multilingual-2 - type: embedding - max_input_tokens: 120000 - input_price: 0.12 - max_tokens_per_chunk: 32000 - default_chunk_size: 2000 - max_batch_size: 128 - - name: voyage-code-2 - type: embedding - max_input_tokens: 120000 - input_price: 0.12 - max_tokens_per_chunk: 16000 - default_chunk_size: 2000 - max_batch_size: 128 - name: rerank-2 type: reranker max_input_tokens: 16000