Run DeepSeek-R1-Distill-Qwen-32B (#258) (#259)

Run DeepSeek-R1-Distill-Qwen-32B without validation, as that would consume too many resources. ### Ticket [Link to Github Issue](#258) ### Problem description Add DeepSeek-R1-Distill-Qwen-32B and run without validation. ### What's changed Added tests/models/deepseek/test_deepseek_qwen.py
tenstorrent · Feb 4, 2025 · 71af6bc · 71af6bc
1 parent 6482dae
commit 71af6bc
Show file tree

Hide file tree

Showing 2 changed files with 56 additions and 0 deletions.
diff --git a/.github/workflows/run-op-by-op-model-tests.yml b/.github/workflows/run-op-by-op-model-tests.yml
@@ -95,6 +95,7 @@ jobs:
               tests/models/gpt_neo/test_gpt_neo.py::test_gpt_neo
               tests/models/falcon/test_falcon.py::test_falcon
               tests/models/mamba/test_mamba.py::test_mamba[state-spaces/mamba-790m-hf]
+              tests/models/deepseek/test_deepseek_qwen.py::test_deepseek_qwen
               "
           },
         ]

diff --git a/tests/models/deepseek/test_deepseek_qwen.py b/tests/models/deepseek/test_deepseek_qwen.py
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+#
+# SPDX-License-Identifier: Apache-2.0
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import pytest
+from tests.utils import ModelTester
+from tt_torch.tools.utils import CompilerConfig, CompileDepth
+
+
+class ThisTester(ModelTester):
+    def _load_model(self):
+        return AutoModelForCausalLM.from_pretrained(
+            self.model_name, torch_dtype=torch.bfloat16
+        )
+
+    def _load_inputs(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name, torch_dtype=torch.bfloat16
+        )
+        prompt = "Who are you?"
+        messages = [{"role": "user", "content": prompt}]
+        self.text = self.tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        self.inputs = self.tokenizer(self.text, return_tensors="pt")
+        return self.inputs
+
+
+@pytest.mark.parametrize(
+    "mode",
+    ["eval", "train"],
+)
+@pytest.mark.parametrize(
+    "model_name",
+    [
+        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+    ],
+)
+@pytest.mark.parametrize("op_by_op", [True, False], ids=["op_by_op", "full"])
+def test_deepseek_qwen(record_property, model_name, mode, op_by_op):
+    if mode == "train":
+        pytest.skip()
+    record_property("model_name", model_name)
+    record_property("mode", mode)
+
+    cc = CompilerConfig()
+    if op_by_op:
+        cc.compile_depth = CompileDepth.EXECUTE_OP_BY_OP
+
+    tester = ThisTester(
+        model_name, mode, assert_on_output_mismatch=False, compiler_config=cc
+    )
+
+    results = tester.test_model()