Skip to content

Commit

Permalink
Add --all-layers argument to openvino CLI (#713)
Browse files Browse the repository at this point in the history
* Add --all-layers argument to CLI

* Update description
  • Loading branch information
nikita-savelyevv authored May 17, 2024
1 parent 60d5bf6 commit bc5051f
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 14 deletions.
11 changes: 11 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def parse_args_openvino(parser: "ArgumentParser"):
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
),
)
optional_group.add_argument(
"--all-layers",
action="store_true",
default=None,
help=(
"Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight "
"compression is applied, they are compressed to INT8."
),
)
optional_group.add_argument(
"--disable-stateful",
action="store_true",
Expand Down Expand Up @@ -198,6 +207,7 @@ def run(self):
and self.args.ratio is None
and self.args.group_size is None
and self.args.sym is None
and self.args.all_layers is None
and self.args.model in _DEFAULT_4BIT_CONFIGS
):
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
Expand All @@ -207,6 +217,7 @@ def run(self):
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
"sym": self.args.sym or False,
"group_size": -1 if is_int8 else self.args.group_size,
"all_layers": None if is_int8 else self.args.all_layers,
}

if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
Expand Down
21 changes: 9 additions & 12 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from parameterized import parameterized
from utils_tests import (
_ARCHITECTURES_TO_EXPECTED_INT4_INT8,
_ARCHITECTURES_TO_EXPECTED_INT8,
MODEL_NAMES,
get_num_quantized_nodes,
Expand Down Expand Up @@ -84,14 +83,13 @@ class OVCLIExportTestCase(unittest.TestCase):
("latent-consistency", 50, 135),
)

SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)

SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"]

TEST_4BIT_CONFIGURATONS = []
for arch in SUPPORTED_4BIT_ARCHITECTURES:
for option in SUPPORTED_4BIT_OPTIONS:
TEST_4BIT_CONFIGURATONS.append([arch[0], arch[1], option])
TEST_4BIT_CONFIGURATONS = [
("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86),
("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86),
("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86),
("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86),
("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32),
]

def _openvino_export(
self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None
Expand Down Expand Up @@ -197,17 +195,16 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in
self.assertEqual(exp_num_fq, num_fq)

@parameterized.expand(TEST_4BIT_CONFIGURATONS)
def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expected_int8: int, expected_int4: int):
with TemporaryDirectory() as tmpdir:
subprocess.run(
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
shell=True,
check=True,
)
model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {}
model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs)

expected_int8, expected_int4 = _ARCHITECTURES_TO_EXPECTED_INT4_INT8[model_type]
_, num_int8, num_int4 = get_num_quantized_nodes(model)
self.assertEqual(expected_int8, num_int8)
self.assertEqual(expected_int4, num_int4)
Expand Down
2 changes: 0 additions & 2 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,6 @@
"stable-diffusion-xl-refiner": (366, 34, 42, 66),
}

_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 86)}


def get_num_quantized_nodes(ov_model):
num_fake_quantize = 0
Expand Down

0 comments on commit bc5051f

Please sign in to comment.