Skip to content

Commit

Permalink
chore: update models.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
sigoden committed Nov 12, 2024
1 parent f720135 commit 777fc02
Showing 1 changed file with 80 additions and 37 deletions.
117 changes: 80 additions & 37 deletions models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# Links:
# - https://platform.openai.com/docs/models
# - https://openai.com/pricing
# - https://openai.com/api/pricing/
# - https://platform.openai.com/docs/api-reference/chat
- platform: openai
models:
Expand Down Expand Up @@ -375,18 +375,16 @@
- name: llama3.2
max_input_tokens: 128000
supports_function_calling: true
- name: llama3.2-vision
max_input_tokens: 128000
supports_vision: true
- name: gemma2
max_input_tokens: 8192
- name: qwen2.5
max_input_tokens: 128000
supports_function_calling: true
- name: phi3.5
max_input_tokens: 128000
- name: nemotron-mini
max_input_tokens: 128000
supports_function_calling: true
- name: mistral-small
max_input_tokens: 128000
- name: qwen2.5-coder
max_input_tokens: 32768
supports_function_calling: true
- name: deepseek-coder-v2
max_input_tokens: 32768
Expand Down Expand Up @@ -732,9 +730,8 @@
input_price: 0.07

# Links:
# - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
# - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api
# - https://help.aliyun.com/zh/model-studio/getting-started/models
# - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api
- platform: qianwen
models:
- name: qwen-max-latest
Expand All @@ -755,6 +752,12 @@
input_price: 0.042
output_price: 0.084
supports_function_calling: true
- name: qwen-coder-plus-latest
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
supports_function_calling: true
- name: qwen-coder-turbo-latest
max_input_tokens: 129024
max_output_tokens: 8192
Expand All @@ -765,14 +768,26 @@
max_input_tokens: 1000000
input_price: 0.07
output_price: 0.28
- name: qwen-vl-max
- name: qwen-vl-max-latest
input_price: 2.8
output_price: 2.8
supports_vision: true
- name: qwen-vl-plus
- name: qwen-vl-plus-latest
input_price: 1.12
output_price: 1.12
supports_vision: true
- name: qwen2.5-72b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.56
output_price: 1.68
supports_function_calling: true
- name: qwen2.5-coder-32b-instruct
max_input_tokens: 129024
max_output_tokens: 8192
input_price: 0.49
output_price: 0.98
supports_function_calling: true
- name: text-embedding-v3
type: embedding
input_price: 0.1
Expand Down Expand Up @@ -808,6 +823,10 @@
max_input_tokens: 28000
max_output_tokens: 4096
supports_function_calling: true
- name: hunyuan-large-longcontext
max_input_tokens: 128000
max_output_tokens: 6144
supports_function_calling: true
- name: hunyuan-standard
max_input_tokens: 30000
max_output_tokens: 2048
Expand Down Expand Up @@ -874,7 +893,7 @@
- platform: deepseek
models:
- name: deepseek-chat
max_input_tokens: 32768
max_input_tokens: 65536
max_output_tokens: 4096
input_price: 0.14
output_price: 0.28
Expand Down Expand Up @@ -928,6 +947,10 @@
# - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion
- platform: lingyiwanwu
models:
- name: yi-lightning
max_input_tokens: 16384
input_price: 0.14
output_price: 0.14
- name: yi-large
max_input_tokens: 32768
input_price: 2.8
Expand All @@ -941,18 +964,10 @@
max_input_tokens: 16384
input_price: 3.5
output_price: 3.5
- name: yi-large-turbo
max_input_tokens: 16384
input_price: 1.68
output_price: 1.68
- name: yi-medium-200k
max_input_tokens: 200000
input_price: 1.68
output_price: 1.68
- name: yi-lightning
max_input_tokens: 16384
input_price: 0.14
output_price: 0.14
- name: yi-vision
max_input_tokens: 16384
input_price: 0.84
Expand Down Expand Up @@ -1091,6 +1106,10 @@
input_price: 0.35
output_price: 0.40
supports_function_calling: true
- name: Qwen/Qwen2.5-Coder-32B-Instruct
max_input_tokens: 32768
input_price: 0.18
output_price: 0.18
- name: nvidia/Llama-3.1-Nemotron-70B-Instruct
max_input_tokens: 128000
input_price: 0.35
Expand Down Expand Up @@ -1170,6 +1189,10 @@
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/qwen2p5-coder-32b-instruct
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/phi-3-vision-128k-instruct
max_input_tokens: 131072
input_price: 0.2
Expand Down Expand Up @@ -1205,7 +1228,7 @@
max_batch_size: 100

# Links:
# - https://openrouter.ai/docs#models
# - https://openrouter.ai/models
- platform: openrouter
models:
- name: openai/gpt-4o
Expand Down Expand Up @@ -1489,6 +1512,11 @@
max_input_tokens: 131072
input_price: 0.35
output_price: 0.4
supports_function_calling: true
- name: qwen/qwen-2.5-coder-32b-instruct
max_input_tokens: 32768
input_price: 0.18
output_price: 0.18
- name: qwen/qwen-2-vl-72b-instruct
max_input_tokens: 32768
input_price: 0.4
Expand Down Expand Up @@ -1525,11 +1553,20 @@
input_price: 0.578
output_price: 0.578
supports_function_calling: true
- name: Qwen/Qwen2.5-72B-Instruct-128K
max_input_tokens: 128000
input_price: 0.578
output_price: 0.578
supports_function_calling: true
- name: Qwen/Qwen2.5-7B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
supports_function_calling: true
- name: Qwen/Qwen2.5-Coder-32B-Instruct
max_input_tokens: 32768
input_price: 0.176
output_price: 0.176
- name: Qwen/Qwen2.5-Coder-7B-Instruct
max_input_tokens: 32768
input_price: 0
Expand All @@ -1551,6 +1588,10 @@
max_input_tokens: 32768
input_price: 0.578
output_price: 0.578
- name: Tencent/Hunyuan-A52B-Instruct
max_input_tokens: 32768
input_price: 2.94
output_price: 2.94
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0
Expand Down Expand Up @@ -1609,6 +1650,22 @@
max_input_tokens: 131072
input_price: 0.06
output_price: 0.06
- name: google/gemma-2-27b-it
max_input_tokens: 8192
input_price: 0.8
output_price: 0.8
- name: google/gemma-2-9b-it
max_input_tokens: 8192
input_price: 0.3
output_price: 0.3
- name: Qwen/Qwen2.5-72B-Instruct-Turbo
max_input_tokens: 32768
input_price: 1.2
output_price: 1.2
- name: Qwen/Qwen2.5-7B-Instruct-Turbo
max_input_tokens: 32768
input_price: 0.3
output_price: 0.3
- name: WhereIsAI/UAE-Large-V1
type: embedding
input_price: 0.016
Expand Down Expand Up @@ -1678,20 +1735,6 @@
max_tokens_per_chunk: 32000
default_chunk_size: 1000
max_batch_size: 128
- name: voyage-multilingual-2
type: embedding
max_input_tokens: 120000
input_price: 0.12
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-code-2
type: embedding
max_input_tokens: 120000
input_price: 0.12
max_tokens_per_chunk: 16000
default_chunk_size: 2000
max_batch_size: 128
- name: rerank-2
type: reranker
max_input_tokens: 16000
Expand Down

0 comments on commit 777fc02

Please sign in to comment.