From ef558f9e28e2464000be4ab7d7b88986b7f84d54 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Sat, 16 Nov 2024 06:06:53 +0100 Subject: [PATCH] Add compression tests to internvl2 and phi3v (#999) * Fix NanoLLava quantization * Add internvl2 compression tests * Revert "Fix NanoLLava quantization" This reverts commit 3eba1de92dd29b9d259dfd31b8e5bb3b3dd74c29. * Add phi3 compression tests; fix phi3 preprocessors saving with optimum-cli quantization * Trigger Tests * Trigger Tests * Trigger Tests --- optimum/commands/export/openvino.py | 9 ++-- optimum/exporters/openvino/convert.py | 37 ++++++++++------ tests/openvino/test_exporters_cli.py | 20 +++++++++ tests/openvino/test_quantization.py | 64 ++++++++++++++++++++------- 4 files changed, 96 insertions(+), 34 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 32c8c0dc16..3b6b4de69f 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -21,9 +21,10 @@ from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE from ...exporters import TasksManager +from ...exporters.openvino.convert import save_preprocessors from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available from ...intel.utils.modeling_utils import _infer_library_from_model_name_or_path -from ...utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors +from ...utils.save_utils import maybe_load_preprocessors from ..base import BaseOptimumCLICommand, CommandInfo @@ -350,11 +351,9 @@ def run(self): ) model.save_pretrained(self.args.output) - maybe_save_preprocessors(self.args.model, self.args.output, trust_remote_code=self.args.trust_remote_code) + preprocessors = maybe_load_preprocessors(self.args.model, trust_remote_code=self.args.trust_remote_code) + save_preprocessors(preprocessors, model.config, self.args.output, self.args.trust_remote_code) if not self.args.disable_convert_tokenizer: - preprocessors = maybe_load_preprocessors( - self.args.model, trust_remote_code=self.args.trust_remote_code - ) maybe_convert_tokenizers(library_name, self.args.output, preprocessors=preprocessors, task=task) else: # TODO : add input shapes diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index a84ecfabde..fdcfbecf53 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -21,6 +21,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union import onnx +from transformers import PretrainedConfig from transformers.generation import GenerationMixin from transformers.utils import is_tf_available, is_torch_available @@ -711,19 +712,7 @@ def export_from_model( f"The generation config will not be saved, saving failed with following error:\n{exception}" ) - model_name_or_path = model.config._name_or_path - if preprocessors is not None: - # phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk - if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1: - if not hasattr(preprocessors[1], "chat_template"): - preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None) - for processor in preprocessors: - try: - processor.save_pretrained(output) - except Exception as ex: - logger.error(f"Saving {type(processor)} failed with {ex}") - else: - maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code) + save_preprocessors(preprocessors, model.config, output, trust_remote_code) files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()] @@ -838,6 +827,28 @@ def export_tokenizer( save_model(model, output / file_name.format(suffix)) +def save_preprocessors( + preprocessors: List, config: PretrainedConfig, output: Union[str, Path], trust_remote_code: bool +): + model_name_or_path = config._name_or_path + if hasattr(config, "export_model_type"): + model_type = config.export_model_type.replace("_", "-") + else: + model_type = config.model_type.replace("_", "-") + if preprocessors is not None: + # phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk + if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1: + if not hasattr(preprocessors[1], "chat_template"): + preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None) + for processor in preprocessors: + try: + processor.save_pretrained(output) + except Exception as ex: + logger.error(f"Saving {type(processor)} failed with {ex}") + else: + maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code) + + def _add_runtime_options_to_rt_info(model: Model, options: Dict): """ Add runtime optinos diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index f218fa05ba..783b994c1e 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -152,6 +152,26 @@ class OVCLIExportTestCase(unittest.TestCase): ] ) + if is_transformers_version(">=", "4.45.0"): + TEST_4BIT_CONFIGURATIONS.extend( + [ + ( + "image-text-to-text", + "internvl2", + 'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "hessian_input_activation" ' + "--dataset contextual --num-samples 1 --trust-remote-code", + {"int8": 6, "int4": 24}, + ), + ( + "image-text-to-text", + "phi3_v", + 'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "mean_activation_magnitude" ' + "--dataset contextual --num-samples 1 --trust-remote-code", + {"int8": 4, "int4": 14}, + ), + ] + ) + def _openvino_export(self, model_name: str, task: str): with TemporaryDirectory() as tmpdir: main_export( diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 7a415c3a3a..48a36f604e 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -347,23 +347,55 @@ class OVWeightCompressionTest(unittest.TestCase): ) if is_transformers_version(">=", "4.45.0"): - LOAD_IN_4_BITS_SCOPE.append( - ( - OVModelForVisualCausalLM, - "minicpmv", - True, - dict( - bits=4, - group_size=16, - dataset="contextual", - ratio=0.8, - sensitivity_metric="mean_activation_magnitude", - num_samples=1, - processor=MODEL_NAMES["minicpmv"], - trust_remote_code=True, + LOAD_IN_4_BITS_SCOPE.extend( + [ + ( + OVModelForVisualCausalLM, + "minicpmv", + True, + dict( + bits=4, + group_size=16, + dataset="contextual", + ratio=0.8, + sensitivity_metric="mean_activation_magnitude", + num_samples=1, + processor=MODEL_NAMES["minicpmv"], + trust_remote_code=True, + ), + {"int4": 22, "int8": 8}, ), - {"int4": 22, "int8": 8}, - ) + ( + OVModelForVisualCausalLM, + "internvl2", + True, + dict( + bits=4, + group_size=4, + dataset="contextual", + ratio=0.8, + sensitivity_metric="mean_activation_magnitude", + num_samples=1, + trust_remote_code=True, + ), + {"int4": 22, "int8": 8}, + ), + ( + OVModelForVisualCausalLM, + "phi3_v", + True, + dict( + bits=4, + group_size=16, + dataset="contextual", + ratio=0.8, + sensitivity_metric="mean_activation_magnitude", + num_samples=1, + trust_remote_code=True, + ), + {"int4": 14, "int8": 4}, + ), + ] ) SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = [