Skip to content

Commit

Permalink
Apply comments
Browse files Browse the repository at this point in the history
  • Loading branch information
KodiaqQ committed Jan 14, 2025
1 parent 022908a commit 0a8e3e7
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 16 deletions.
20 changes: 8 additions & 12 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,13 @@ def __init__(
self.overflow_fix = overflow_fix
self.smooth_quant_alpha = smooth_quant_alpha
self.activation_format = activation_format

f8_formats = ["f8e4m3", "f8e5m2"]
if self.activation_format in f8_formats and self.weight_format in f8_formats:
logger.info(
f"{self.activation_format} for activations and {self.weight_format} weights were found. A symmetrical scheme will be used."
)
self.sym = True
self.post_init()

def post_init(self):
Expand All @@ -674,16 +681,6 @@ def post_init(self):
f"SmoothQuant alpha parameter must be in range [0, 1], but found {self.smooth_quant_alpha}"
)

if not self.sym:
if self.activation_format != "int8":
raise ValueError(
f"Asymmetric quantization can not be performed in {self.activation_format} activation format."
)
if self.weight_format != "int8":
raise ValueError(
f"Asymmetric quantization can not be performed in {self.weight_format} weight format."
)


class OVConfig(BaseConfig):
CONFIG_NAME = "openvino_config.json"
Expand All @@ -708,8 +705,7 @@ def __init__(
"compression", None
) # A field for backward-compatability of training-time compression parameters
if self.quantization_config is not None:
if isinstance(self.quantization_config, OVWeightQuantizationConfig):
self.dtype = self.quantization_config.weight_format
self.dtype = self.quantization_config.weight_format
else:
self.dtype = dtype

Expand Down
8 changes: 4 additions & 4 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class OVCLIExportTestCase(unittest.TestCase):
"text-generation",
"llama",
"f8e4m3",
"--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
"--dataset wikitext2 --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
(13,),
(16,),
),
Expand Down Expand Up @@ -418,7 +418,7 @@ def test_exporters_cli_full_quantization(
model_type: str,
quant_mode: str,
option: str,
expected_num_fq_nodes_per_model: Tuple[int],
expected_num_f_nodes_per_model: Tuple[int],
expected_num_weight_nodes_per_model: Tuple[int],
):
with TemporaryDirectory() as tmpdir:
Expand All @@ -432,10 +432,10 @@ def test_exporters_cli_full_quantization(
models = [model]
if task == "automatic-speech-recognition":
models = [model.encoder, model.decoder, model.decoder_with_past]
self.assertEqual(len(expected_num_fq_nodes_per_model), len(models))
self.assertEqual(len(expected_num_f_nodes_per_model), len(models))
for i, model in enumerate(models):
actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes)
self.assertEqual(expected_num_f_nodes_per_model[i], actual_num_f_nodes)
self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode])

def test_exporters_cli_int4_with_local_model_and_default_config(self):
Expand Down

0 comments on commit 0a8e3e7

Please sign in to comment.