Skip to content

Commit

Permalink
add qwen 2.5 32b chat model
Browse files Browse the repository at this point in the history
  • Loading branch information
BBC-Esq authored Oct 21, 2024
1 parent 543fb2b commit 122d45e
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/chart_models_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def create_chat_models_comparison_plot():
"Qwen 2.5 - 14b",
"Mistral Small - 22b",
"Internlm2_5 - 20b",
"Qwen 2.5 - 32b",
],
"color": "#CD5C5C",
"label": "8k Context"
Expand Down
17 changes: 14 additions & 3 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
system_message = "You are a helpful person who clearly and directly answers questions in a succinct fashion based on contexts provided to you. If you cannot find the answer within the contexts simply tell me that the contexts do not provide an answer. However, if the contexts partially address my question I still want you to answer based on what the contexts say and then briefly summarize the parts of my question that the contexts didn't provide an answer."
rag_string = "Here are the contexts to base your answer on. However, I need to reiterate that I only want you to base your response on these contexts and do not use outside knowledge that you may have been trained with."

# to change the default of 8192 in module_chat.py
# changes the default of 8192 in module_chat.py
MODEL_MAX_TOKENS = {
'Qwen 2.5 - 1.5b': 4096,
'Qwen 2.5 Coder - 1.5b': 4096,
Expand All @@ -13,7 +13,7 @@
'Internlm2_5 - 1.8b': 4096
}

# to change the default of 1024 in module_chat.mpy
# changes the default of 1024 in module_chat.mpy
MODEL_MAX_NEW_TOKENS = {
'Qwen 2.5 - 1.5b': 512,
'Qwen 2.5 Coder - 1.5b': 512,
Expand Down Expand Up @@ -166,7 +166,7 @@
'repo_id': 'Qwen/Qwen2.5-14B-Instruct',
'cache_dir': 'Qwen--Qwen2.5-14B-Instruct',
'cps': 139.26,
'context_length': 4096,
'context_length': 8192,
'vram': 12599.22,
'function': 'Qwen_2_5_14b',
'precision': 'bfloat16',
Expand Down Expand Up @@ -205,6 +205,17 @@
'precision': 'bfloat16',
'gated': False,
},
'Qwen 2.5 - 32b': {
'model': 'Qwen 2.5 - 32b',
'repo_id': 'Qwen/Qwen2.5-32B-Instruct',
'cache_dir': 'Qwen--Qwen2.5-32B-Instruct',
'cps': 101.51,
'context_length': 8192,
'vram': 21128.30,
'function': 'Qwen_2_5_32b',
'precision': 'bfloat16',
'gated': False,
},
}

WHISPER_SPEECH_MODELS = {
Expand Down
14 changes: 14 additions & 0 deletions src/module_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,20 @@ def create_prompt(self, augmented_query):
"""


class Qwen_2_5_32b(BaseModel):
def __init__(self, generation_settings):
model_info = CHAT_MODELS['Qwen 2.5 - 32b']
super().__init__(model_info, bnb_bfloat16_settings, generation_settings)

def create_prompt(self, augmented_query):
return f"""<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{augmented_query}<|im_end|>
<|im_start|>assistant
"""


@torch.inference_mode()
def generate_response(model_instance, augmented_query):
prompt = model_instance.create_prompt(augmented_query)
Expand Down

0 comments on commit 122d45e

Please sign in to comment.