Skip to content

Commit

Permalink
Run DeepSeek-R1-Distill-Qwen-32B (#258) (#259)
Browse files Browse the repository at this point in the history
Run DeepSeek-R1-Distill-Qwen-32B without validation, as that would
consume too many resources.

### Ticket
[Link to Github
Issue](#258)

### Problem description
Add DeepSeek-R1-Distill-Qwen-32B and run without validation.

### What's changed
Added tests/models/deepseek/test_deepseek_qwen.py
  • Loading branch information
ddilbazTT authored Feb 4, 2025
1 parent 6482dae commit 71af6bc
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/run-op-by-op-model-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ jobs:
tests/models/gpt_neo/test_gpt_neo.py::test_gpt_neo
tests/models/falcon/test_falcon.py::test_falcon
tests/models/mamba/test_mamba.py::test_mamba[state-spaces/mamba-790m-hf]
tests/models/deepseek/test_deepseek_qwen.py::test_deepseek_qwen
"
},
]
Expand Down
55 changes: 55 additions & 0 deletions tests/models/deepseek/test_deepseek_qwen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import pytest
from tests.utils import ModelTester
from tt_torch.tools.utils import CompilerConfig, CompileDepth


class ThisTester(ModelTester):
def _load_model(self):
return AutoModelForCausalLM.from_pretrained(
self.model_name, torch_dtype=torch.bfloat16
)

def _load_inputs(self):
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name, torch_dtype=torch.bfloat16
)
prompt = "Who are you?"
messages = [{"role": "user", "content": prompt}]
self.text = self.tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
self.inputs = self.tokenizer(self.text, return_tensors="pt")
return self.inputs


@pytest.mark.parametrize(
"mode",
["eval", "train"],
)
@pytest.mark.parametrize(
"model_name",
[
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
],
)
@pytest.mark.parametrize("op_by_op", [True, False], ids=["op_by_op", "full"])
def test_deepseek_qwen(record_property, model_name, mode, op_by_op):
if mode == "train":
pytest.skip()
record_property("model_name", model_name)
record_property("mode", mode)

cc = CompilerConfig()
if op_by_op:
cc.compile_depth = CompileDepth.EXECUTE_OP_BY_OP

tester = ThisTester(
model_name, mode, assert_on_output_mismatch=False, compiler_config=cc
)

results = tester.test_model()

0 comments on commit 71af6bc

Please sign in to comment.