Skip to content

Commit

Permalink
add repeat tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka committed Apr 16, 2024
1 parent 342d056 commit 3cb6ac6
Show file tree
Hide file tree
Showing 10 changed files with 383 additions and 226 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cadence: "nightly"
test_type: "regression"
model: "zoo:llama2-7b-llama2_pretrain-base"
dataset: open_platypus
first_recipe: "tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml"
second_recipe: "tests/sparseml/transformers/obcq/recipes/additional_sparsity.yaml"
device: "auto"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
dataset: open_platypus
first_recipe: "tests/sparseml/transformers/obcq/recipes/quant_and_sparse.yaml"
second_recipe: "tests/sparseml/transformers/obcq/recipes/additional_sparsity.yaml"
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
dataset: open_platypus
first_recipe: |
first_stage:
quant_modifiers:
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLU
scheme_overrides:
Embedding:
input_activations: null
second_recipe: |
second_stage:
quant_modifiers:
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLU
- Embedding
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
cadence: "commit"
test_type: "sanity"
model: "Xenova/llama2.c-stories15M"
dataset: open_platypus
first_recipe: |
first_stage:
quant_modifiers:
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLU
- Linear
scheme_overrides:
Embedding:
input_activations: null
second_recipe: |
second_stage:
quant_modifiers:
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLU
- Embedding
- MatMulLeftInput_QK
- MatMulRightInput_QK
- MatMulOutput_QK
- MatMulLeftInput_PV
- MatMulRightInput_PV
- MatMulOutput_PV
- QuantizableMatMul
145 changes: 145 additions & 0 deletions tests/sparseml/transformers/obcq/test_consecutive_runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import shutil
import unittest
from pathlib import Path

import pytest
import yaml

from parameterized import parameterized_class
from tests.testing_utils import parse_params, requires_gpu, requires_torch


CONFIGS_DIRECTORY = "tests/sparseml/transformers/obcq/obcq_configs/consec_runs"
GPU_CONFIGS_DIRECTORY = "tests/sparseml/transformers/obcq/obcq_configs/consec_runs/gpu"


class TestConsecutiveRuns(unittest.TestCase):
def _test_consecutive_runs(
self, tolerance: float, num_calibration_samples: int = 16
):
import math

import sparseml.core.session as session_manager
from sparseml.pytorch.model_load.helpers import get_session_model
from sparseml.pytorch.utils.helpers import tensor_sparsity
from sparseml.transformers import oneshot
from sparseml.utils.pytorch import qat_active

# test recipe with 50% sparsity, quantization and smoothquant
oneshot(
model=self.model,
dataset=self.dataset,
num_calibration_samples=num_calibration_samples,
recipe=self.first_recipe,
output_dir=self.output_first,
oneshot_device=self.device,
clear_sparse_session=False,
)
first_tiny_model = get_session_model()
layer_0_sparse = tensor_sparsity(
first_tiny_model.model.layers[0].self_attn.k_proj.module.weight
)
assert math.isclose(layer_0_sparse.item(), 0.5, rel_tol=tolerance)
assert qat_active(first_tiny_model)

session = session_manager.active_session()
session_recipe = session.lifecycle.recipe_container.compiled_recipe
stages = [stage.group for stage in session_recipe.stages]
self.assertEqual(len(stages), 1)
session.reset()

# reload saved model and up sparsity to 0.7
oneshot(
model=self.output_first,
dataset=self.dataset,
num_calibration_samples=num_calibration_samples,
recipe=self.second_recipe,
output_dir=self.output_second,
oneshot_device=self.device,
clear_sparse_session=False,
)

second_tiny_model = get_session_model()
layer_0_sparse = tensor_sparsity(
second_tiny_model.model.layers[0].self_attn.k_proj.module.weight
)
assert math.isclose(layer_0_sparse.item(), 0.7, rel_tol=tolerance)
assert qat_active(second_tiny_model)

session = session_manager.active_session()
session_recipe = session.lifecycle.recipe_container.compiled_recipe
stages = [stage.group for stage in session_recipe.stages]
self.assertEqual(len(stages), 2)

recipe_path = self.output_second / "recipe.yaml"
recipe_data = yaml.safe_load(recipe_path.read_text())
stage_keys = recipe_data.keys()
self.assertEqual(len(stage_keys), 2)
self.assertIn("test_stage_0", stage_keys)
self.assertIn("test_stage_1", stage_keys)

def tearDown(self):
shutil.rmtree(self.output)


@requires_torch
@pytest.mark.integration
@parameterized_class(parse_params(CONFIGS_DIRECTORY))
class TestConsecutiveRunsSmall(TestConsecutiveRuns):
model = None
first_recipe = None
second_recipe = None
dataset = None

def setUp(self):
import torch

self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.output = "./oneshot_output"
self.output_first = Path(self.output) / "test_1"
self.output_second = Path(self.output) / "test_2"

def test_consecutive_runs_small(self):
self._test_consecutive_runs(tolerance=1e-3)


@requires_gpu
@requires_torch
@pytest.mark.integration
@parameterized_class(parse_params(GPU_CONFIGS_DIRECTORY))
class TestConsecutiveRunsGPU(TestConsecutiveRuns):
# Will be populated using the config files
model = None
first_recipe = None
second_recipe = None
dataset = None
device = None

def setUp(self):
from sparseml.transformers import SparseAutoModelForCausalLM

if "zoo:" in self.model:
self.model = SparseAutoModelForCausalLM.from_pretrained(
self.model, device_map=self.device
)

self.output = "./oneshot_output"
self.output_first = Path(self.output) / "test_1"
self.output_second = Path(self.output) / "test_2"

def test_consecutive_runs_gpu(self):
self._test_consecutive_runs(tolerance=1e-0, num_calibration_samples=16)
4 changes: 2 additions & 2 deletions tests/sparseml/transformers/obcq/test_obcq_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def test_oneshot_completion(self):
dataset=self.dataset,
oneshot_device=self.device,
recipe=self.recipe,
max_seq_length=128,
num_calibration_samples=32,
max_seq_length=512,
num_calibration_samples=512,
pad_to_max_length=False,
output_dir=self.output,
)
Expand Down
74 changes: 74 additions & 0 deletions tests/sparseml/transformers/obcq/test_repeat_quant_fails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import shutil
import unittest
from pathlib import Path

import pytest

from parameterized import parameterized_class
from tests.testing_utils import parse_params, requires_torch


CONFIGS_DIRECTORY = "tests/sparseml/transformers/obcq/obcq_configs/repeat_quants"


@requires_torch
@pytest.mark.integration
@parameterized_class(parse_params(CONFIGS_DIRECTORY))
class TestRepeatQuants(unittest.TestCase):
model = None
first_recipe = None
second_recipe = None
dataset = None

def setUp(self):
import torch

self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.output = "./oneshot_output"
self.output_first = Path(self.output) / "test_1"
self.output_second = Path(self.output) / "test_2"

def test_fail_on_repeated_quant(self):
import sparseml.core.session as session_manager
from sparseml.transformers import oneshot

oneshot(
model=self.model,
dataset=self.dataset,
num_calibration_samples=4,
oneshot_device=self.device,
recipe=self.first_recipe,
output_dir=self.output_first,
clear_sparse_session=False,
)

session = session_manager.active_session()
session.reset()

# When trying to re-quantize with the second recipe, we should error out
# to avoid nested quantizations
with pytest.raises(RuntimeError):
oneshot(
model=self.output_first,
dataset=self.dataset,
num_calibration_samples=4,
oneshot_device=self.device,
recipe=self.second_recipe,
)

def tearDown(self):
shutil.rmtree(self.output)
Loading

0 comments on commit 3cb6ac6

Please sign in to comment.