From 777fc02e0f4e534705c3a0decffd3eebc26ff597 Mon Sep 17 00:00:00 2001
From: sigoden <sigoden@gmail.com>
Date: Wed, 13 Nov 2024 07:58:18 +0800
Subject: [PATCH] chore: update models.yaml

---
 models.yaml | 117 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 80 insertions(+), 37 deletions(-)

diff --git a/models.yaml b/models.yaml
index 7c2b2526..9eefc2ca 100644
--- a/models.yaml
+++ b/models.yaml
@@ -3,7 +3,7 @@
 
 # Links:
 #  - https://platform.openai.com/docs/models
-#  - https://openai.com/pricing
+#  - https://openai.com/api/pricing/
 #  - https://platform.openai.com/docs/api-reference/chat
 - platform: openai
   models:
@@ -375,18 +375,16 @@
     - name: llama3.2
       max_input_tokens: 128000
       supports_function_calling: true
+    - name: llama3.2-vision
+      max_input_tokens: 128000
+      supports_vision: true
     - name: gemma2
       max_input_tokens: 8192
     - name: qwen2.5
       max_input_tokens: 128000
       supports_function_calling: true
-    - name: phi3.5
-      max_input_tokens: 128000
-    - name: nemotron-mini
-      max_input_tokens: 128000
-      supports_function_calling: true
-    - name: mistral-small
-      max_input_tokens: 128000
+    - name: qwen2.5-coder
+      max_input_tokens: 32768
       supports_function_calling: true
     - name: deepseek-coder-v2
       max_input_tokens: 32768
@@ -732,9 +730,8 @@
       input_price: 0.07
 
 # Links:
-#  - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
-#  - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
-#  - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api
+#  - https://help.aliyun.com/zh/model-studio/getting-started/models
+#  - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api
 - platform: qianwen
   models:
     - name: qwen-max-latest
@@ -755,6 +752,12 @@
       input_price: 0.042
       output_price: 0.084
       supports_function_calling: true
+    - name: qwen-coder-plus-latest
+      max_input_tokens: 129024
+      max_output_tokens: 8192
+      input_price: 0.49
+      output_price: 0.98
+      supports_function_calling: true
     - name: qwen-coder-turbo-latest
       max_input_tokens: 129024
       max_output_tokens: 8192
@@ -765,14 +768,26 @@
       max_input_tokens: 1000000
       input_price: 0.07
       output_price: 0.28
-    - name: qwen-vl-max
+    - name: qwen-vl-max-latest
       input_price: 2.8
       output_price: 2.8
       supports_vision: true
-    - name: qwen-vl-plus
+    - name: qwen-vl-plus-latest
       input_price: 1.12
       output_price: 1.12
       supports_vision: true
+    - name: qwen2.5-72b-instruct
+      max_input_tokens: 129024
+      max_output_tokens: 8192
+      input_price: 0.56
+      output_price: 1.68
+      supports_function_calling: true
+    - name: qwen2.5-coder-32b-instruct
+      max_input_tokens: 129024
+      max_output_tokens: 8192
+      input_price: 0.49
+      output_price: 0.98
+      supports_function_calling: true
     - name: text-embedding-v3
       type: embedding
       input_price: 0.1
@@ -808,6 +823,10 @@
       max_input_tokens: 28000
       max_output_tokens: 4096
       supports_function_calling: true
+    - name: hunyuan-large-longcontext
+      max_input_tokens: 128000
+      max_output_tokens: 6144
+      supports_function_calling: true
     - name: hunyuan-standard
       max_input_tokens: 30000
       max_output_tokens: 2048
@@ -874,7 +893,7 @@
 - platform: deepseek
   models:
     - name: deepseek-chat
-      max_input_tokens: 32768
+      max_input_tokens: 65536
       max_output_tokens: 4096
       input_price: 0.14
       output_price: 0.28
@@ -928,6 +947,10 @@
 #  - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion
 - platform: lingyiwanwu
   models:
+    - name: yi-lightning
+      max_input_tokens: 16384
+      input_price: 0.14
+      output_price: 0.14
     - name: yi-large
       max_input_tokens: 32768
       input_price: 2.8
@@ -941,18 +964,10 @@
       max_input_tokens: 16384
       input_price: 3.5
       output_price: 3.5
-    - name: yi-large-turbo
-      max_input_tokens: 16384
-      input_price: 1.68
-      output_price: 1.68
     - name: yi-medium-200k
       max_input_tokens: 200000
       input_price: 1.68
       output_price: 1.68
-    - name: yi-lightning
-      max_input_tokens: 16384
-      input_price: 0.14
-      output_price: 0.14
     - name: yi-vision
       max_input_tokens: 16384
       input_price: 0.84
@@ -1091,6 +1106,10 @@
       input_price: 0.35
       output_price: 0.40
       supports_function_calling: true
+    - name: Qwen/Qwen2.5-Coder-32B-Instruct
+      max_input_tokens: 32768
+      input_price: 0.18
+      output_price: 0.18
     - name: nvidia/Llama-3.1-Nemotron-70B-Instruct
       max_input_tokens: 128000
       input_price: 0.35
@@ -1170,6 +1189,10 @@
       max_input_tokens: 32768
       input_price: 0.9
       output_price: 0.9
+    - name: accounts/fireworks/models/qwen2p5-coder-32b-instruct
+      max_input_tokens: 32768
+      input_price: 0.9
+      output_price: 0.9
     - name: accounts/fireworks/models/phi-3-vision-128k-instruct
       max_input_tokens: 131072
       input_price: 0.2
@@ -1205,7 +1228,7 @@
       max_batch_size: 100
 
 # Links:
-#  - https://openrouter.ai/docs#models
+#  - https://openrouter.ai/models
 - platform: openrouter
   models:
     - name: openai/gpt-4o
@@ -1489,6 +1512,11 @@
       max_input_tokens: 131072
       input_price: 0.35
       output_price: 0.4
+      supports_function_calling: true
+    - name: qwen/qwen-2.5-coder-32b-instruct
+      max_input_tokens: 32768
+      input_price: 0.18
+      output_price: 0.18
     - name: qwen/qwen-2-vl-72b-instruct
       max_input_tokens: 32768
       input_price: 0.4
@@ -1525,11 +1553,20 @@
       input_price: 0.578
       output_price: 0.578
       supports_function_calling: true
+    - name: Qwen/Qwen2.5-72B-Instruct-128K
+      max_input_tokens: 128000
+      input_price: 0.578
+      output_price: 0.578
+      supports_function_calling: true
     - name: Qwen/Qwen2.5-7B-Instruct
       max_input_tokens: 32768
       input_price: 0
       output_price: 0
       supports_function_calling: true
+    - name: Qwen/Qwen2.5-Coder-32B-Instruct
+      max_input_tokens: 32768
+      input_price: 0.176
+      output_price: 0.176
     - name: Qwen/Qwen2.5-Coder-7B-Instruct
       max_input_tokens: 32768
       input_price: 0
@@ -1551,6 +1588,10 @@
       max_input_tokens: 32768
       input_price: 0.578
       output_price: 0.578
+    - name: Tencent/Hunyuan-A52B-Instruct
+      max_input_tokens: 32768
+      input_price: 2.94
+      output_price: 2.94
     - name: BAAI/bge-large-en-v1.5
       type: embedding
       input_price: 0
@@ -1609,6 +1650,22 @@
       max_input_tokens: 131072
       input_price: 0.06
       output_price: 0.06
+    - name: google/gemma-2-27b-it
+      max_input_tokens: 8192
+      input_price: 0.8
+      output_price: 0.8
+    - name: google/gemma-2-9b-it
+      max_input_tokens: 8192
+      input_price: 0.3
+      output_price: 0.3
+    - name: Qwen/Qwen2.5-72B-Instruct-Turbo
+      max_input_tokens: 32768
+      input_price: 1.2
+      output_price: 1.2
+    - name: Qwen/Qwen2.5-7B-Instruct-Turbo
+      max_input_tokens: 32768
+      input_price: 0.3
+      output_price: 0.3
     - name: WhereIsAI/UAE-Large-V1
       type: embedding
       input_price: 0.016
@@ -1678,20 +1735,6 @@
       max_tokens_per_chunk: 32000
       default_chunk_size: 1000
       max_batch_size: 128
-    - name: voyage-multilingual-2
-      type: embedding
-      max_input_tokens: 120000
-      input_price: 0.12
-      max_tokens_per_chunk: 32000
-      default_chunk_size: 2000
-      max_batch_size: 128
-    - name: voyage-code-2
-      type: embedding
-      max_input_tokens: 120000
-      input_price: 0.12
-      max_tokens_per_chunk: 16000
-      default_chunk_size: 2000
-      max_batch_size: 128
     - name: rerank-2
       type: reranker
       max_input_tokens: 16000