add qwen 2.5 32b chat model

BBC-Esq · Oct 21, 2024 · 122d45e · 122d45e
1 parent 543fb2b
commit 122d45e
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 3 deletions.
diff --git a/src/chart_models_chat.py b/src/chart_models_chat.py
@@ -28,6 +28,7 @@ def create_chat_models_comparison_plot():
                 "Qwen 2.5 - 14b",
                 "Mistral Small - 22b",
                 "Internlm2_5 - 20b",
+                "Qwen 2.5 - 32b",
             ],
             "color": "#CD5C5C",
             "label": "8k Context"

diff --git a/src/constants.py b/src/constants.py
@@ -2,7 +2,7 @@
 system_message = "You are a helpful person who clearly and directly answers questions in a succinct fashion based on contexts provided to you. If you cannot find the answer within the contexts simply tell me that the contexts do not provide an answer. However, if the contexts partially address my question I still want you to answer based on what the contexts say and then briefly summarize the parts of my question that the contexts didn't provide an answer."
 rag_string = "Here are the contexts to base your answer on.  However, I need to reiterate that I only want you to base your response on these contexts and do not use outside knowledge that you may have been trained with."
 
-# to change the default of 8192 in module_chat.py
+# changes the default of 8192 in module_chat.py
 MODEL_MAX_TOKENS = {
     'Qwen 2.5 - 1.5b': 4096,
     'Qwen 2.5 Coder - 1.5b': 4096,
@@ -13,7 +13,7 @@
     'Internlm2_5 - 1.8b': 4096
 }
 
-# to change the default of 1024 in module_chat.mpy
+# changes the default of 1024 in module_chat.mpy
 MODEL_MAX_NEW_TOKENS = {
     'Qwen 2.5 - 1.5b': 512,
     'Qwen 2.5 Coder - 1.5b': 512,
@@ -166,7 +166,7 @@
         'repo_id': 'Qwen/Qwen2.5-14B-Instruct',
         'cache_dir': 'Qwen--Qwen2.5-14B-Instruct',
         'cps': 139.26,
-        'context_length': 4096,
+        'context_length': 8192,
         'vram': 12599.22,
         'function': 'Qwen_2_5_14b',
         'precision': 'bfloat16',
@@ -205,6 +205,17 @@
         'precision': 'bfloat16',
         'gated': False,
     },
+    'Qwen 2.5 - 32b': {
+        'model': 'Qwen 2.5 - 32b',
+        'repo_id': 'Qwen/Qwen2.5-32B-Instruct',
+        'cache_dir': 'Qwen--Qwen2.5-32B-Instruct',
+        'cps': 101.51,
+        'context_length': 8192,
+        'vram': 21128.30,
+        'function': 'Qwen_2_5_32b',
+        'precision': 'bfloat16',
+        'gated': False,
+    },
 }
 
 WHISPER_SPEECH_MODELS = {

diff --git a/src/module_chat.py b/src/module_chat.py
@@ -468,6 +468,20 @@ def create_prompt(self, augmented_query):
 """
 
 
+class Qwen_2_5_32b(BaseModel):
+    def __init__(self, generation_settings):
+        model_info = CHAT_MODELS['Qwen 2.5 - 32b']
+        super().__init__(model_info, bnb_bfloat16_settings, generation_settings)
+
+    def create_prompt(self, augmented_query):
+        return f"""<|im_start|>system
+{system_message}<|im_end|>
+<|im_start|>user
+{augmented_query}<|im_end|>
+<|im_start|>assistant
+"""
+
+
 @torch.inference_mode()
 def generate_response(model_instance, augmented_query):
     prompt = model_instance.create_prompt(augmented_query)