From cfa1c49105fba44facf310fb8080373b3762b83e Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Fri, 7 Jun 2024 20:51:06 +0200 Subject: [PATCH] Fix compatibility with transformers < v4.39.0 release (#754) * fix compatibility with transformers < v4.38.0 * add minimum supported transformers version in test * update setup * model id fixes * fix model id * fix mixtral patcher --- .github/workflows/test_openvino.yml | 2 ++ optimum/exporters/openvino/model_patcher.py | 10 +++++--- optimum/intel/openvino/modeling_decoder.py | 10 +++++--- setup.py | 2 +- tests/openvino/test_modeling.py | 27 ++++++++++++--------- tests/openvino/test_quantization.py | 6 +++-- tests/openvino/test_training.py | 3 +++ tests/openvino/utils_tests.py | 2 +- 8 files changed, 40 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml index a0bc916c75..37cf81fecc 100644 --- a/.github/workflows/test_openvino.yml +++ b/.github/workflows/test_openvino.yml @@ -21,6 +21,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.12"] + transformers-version: ["4.36.0", "4.41.*"] os: [ubuntu-latest] runs-on: ${{ matrix.os }} @@ -35,6 +36,7 @@ jobs: python -m pip install --upgrade pip # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + pip install transformers==${{ matrix.transformers-version }} pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime - name: Test with Pytest run: | diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 0265b3a5fc..9138bed02c 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -20,7 +20,6 @@ import torch import torch.nn.functional as F -from transformers.cache_utils import Cache, StaticCache from transformers.modeling_outputs import BaseModelOutputWithPast from transformers.utils import is_tf_available @@ -36,6 +35,7 @@ if TYPE_CHECKING: + from transformers.cache_utils import Cache from transformers.modeling_utils import PreTrainedModel from optimum.exporters.onnx.config import OnnxConfig @@ -131,7 +131,10 @@ def _mixtral_sparse_moe_block_forward(self, hidden_states: torch.Tensor) -> torc # the current expert. We need to make sure to multiply the output hidden # states by `routing_weights` on the corresponding tokens (top-1 and top-2) current_state = hidden_states[None, top_x].reshape(-1, hidden_dim) - current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None] + if is_transformers_version("<", "4.37.0"): + current_hidden_states = expert_layer(current_state, routing_weights[top_x, idx, None]) + else: + current_hidden_states = expert_layer(current_state) * routing_weights[top_x, idx, None] final_hidden_states.index_add_(0, top_x, current_hidden_states.to(hidden_states.dtype)) final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim) @@ -1656,9 +1659,10 @@ def _dbrx_update_causal_mask_latest( attention_mask: torch.Tensor, input_tensor: torch.Tensor, cache_position: torch.Tensor, - past_key_values: Cache, + past_key_values: "Cache", output_attentions: bool, ): + from transformers.cache_utils import StaticCache from transformers.modeling_attn_mask_utils import AttentionMaskConverter # TODO: As of torch==2.2.0, the `attention_mask` passed to the model in `generate` is 2D and of dynamic length even when the static diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b504d6482c..d7f8929aa8 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -38,7 +38,7 @@ from ...exporters.openvino import ensure_stateful_is_available, main_export, patch_stateful from ...exporters.openvino.stateful import model_has_state -from ..utils.import_utils import is_nncf_available +from ..utils.import_utils import is_nncf_available, is_transformers_version from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS from .configuration import _DEFAULT_4BIT_CONFIGS, OVConfig, OVWeightQuantizationConfig, _check_default_4bit_configs from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel @@ -633,8 +633,12 @@ def generate( negative_prompt_attention_mask: Optional[torch.Tensor] = None, **kwargs, ) -> Union[GenerateOutput, torch.LongTensor]: - _generation_config, _ = self._prepare_generation_config(generation_config, **kwargs) - generation_mode = _generation_config.get_generation_mode(assistant_model) + if is_transformers_version(">=", "4.39.0"): + _generation_config, _ = self._prepare_generation_config(generation_config, **kwargs) + generation_mode = _generation_config.get_generation_mode(assistant_model) + else: + _generation_config = generation_config or self.generation_config + generation_mode = self._get_generation_mode(_generation_config, assistant_model) is_beam_search = generation_mode in [ GenerationMode.BEAM_SEARCH, diff --git a/setup.py b/setup.py index b8869f46ac..4eb89c2a3d 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ TESTS_REQUIRE = [ "accelerate", - "pytest<8.2", + "pytest>=7.2.0,<8.0.0", "parameterized", "Pillow", "evaluate", diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 0cb332276c..262a5396f5 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -81,7 +81,7 @@ from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder from optimum.intel.openvino.modeling_timm import TimmImageProcessor from optimum.intel.openvino.utils import _print_compiled_model_properties -from optimum.intel.utils.import_utils import is_openvino_version +from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version from optimum.utils import ( DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, @@ -528,8 +528,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "chatglm", "codegen", "codegen2", - # "data2vec-text", # TODO : enable when enabled in exporters - "gemma", "gpt2", "gpt_neo", "gpt_neox", @@ -540,15 +538,10 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mistral", "mixtral", "mpt", - "olmo", "opt", "pegasus", "qwen", - "qwen2", - "stablelm", - "starcoder2", "phi", - "phi3", "internlm2", "orion", "falcon", @@ -556,15 +549,26 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "persimmon", "biogpt", "gpt_neox_japanese", - "cohere", "xglm", "aquila", "aquila2", "xverse", "internlm", - "dbrx", - "qwen2-moe", ) + + if is_transformers_version(">=", "4.40.0"): + SUPPORTED_ARCHITECTURES += ( + "gemma", + "olmo", + "stablelm", + "starcoder2", + "dbrx", + "phi3", + "cohere", + "qwen2", + "qwen2-moe", + ) + GENERATION_LENGTH = 100 REMOTE_CODE_MODELS = ( "chatglm", @@ -575,7 +579,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "qwen", "internlm2", "orion", - "phi3", "aquila", "aquila2", "xverse", diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index b7ed36d3e6..bae0ad772f 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -63,7 +63,7 @@ from optimum.intel.openvino.configuration import OVQuantizationMethod, OVQuantizationConfigBase from optimum.intel.openvino.quantization import InferRequestWrapper -from optimum.intel.utils.import_utils import is_openvino_version +from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version from utils_tests import MODEL_NAMES, get_num_quantized_nodes, _ARCHITECTURES_TO_EXPECTED_INT8 _TASK_TO_DATASET = { @@ -89,6 +89,9 @@ def test_automodel_static_quantization(self, model_cls, model_name, expected_fak dataset_name, dataset_config_name, column_name = _TASK_TO_DATASET[task] file_name = "openvino_quantized_model.xml" + if model_name == "bert" and is_transformers_version("<", "4.41.0"): + expected_fake_quantize = 32 + def preprocess_function(examples, tokenizer): return tokenizer(examples[column_name], padding="max_length", max_length=128, truncation=True) @@ -114,7 +117,6 @@ def preprocess_function(examples, tokenizer): ov_config=ov_config, ) model = model_cls.from_pretrained(tmp_dir, file_name=file_name) - num_fake_quantize, num_int8, _ = get_num_quantized_nodes(model) self.assertEqual(expected_fake_quantize, num_fake_quantize) self.assertEqual(expected_int8, num_int8) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 89d644319c..639a77b4a6 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -54,6 +54,7 @@ ) from optimum.intel.openvino.trainer import DEFAULT_QUANTIZATION_CONFIG, OVTrainer from optimum.intel.openvino.utils import OV_XML_FILE_NAME +from optimum.intel.utils.import_utils import is_transformers_version F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"} @@ -463,6 +464,7 @@ class OVTrainerTextClassificationTrainingTest(OVTrainerBaseTrainingTest): task = "sequence-classification" @parameterized.expand(OVTRAINER_TEXT_CLASSIFICATION_TEST_DESCRIPTORS.items()) + @unittest.skipIf(is_transformers_version("<", "4.41.0"), reason="Mismatch in expected fake quantized op") def test_training(self, _, desc: OVTrainerTestDescriptor): self.run_ovtrainer_training_checks(desc) @@ -611,6 +613,7 @@ class OVTrainerImageClassificationTrainingTest(OVTrainerBaseTrainingTest): task = "image-classification" @parameterized.expand(OVTRAINER_IMAGE_CLASSIFICATION_TEST_DESCRIPTORS.items()) + @unittest.skipIf(is_transformers_version("<", "4.41.0"), reason="Mismatch in expected fake quantized op") def test_training(self, _, desc: OVTrainerTestDescriptor): self.run_ovtrainer_training_checks(desc) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index f19c248f77..a92af8c0a9 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -89,7 +89,7 @@ "persimmon": "hf-internal-testing/tiny-random-PersimmonForCausalLM", "pix2struct": "fxmarty/pix2struct-tiny-random", "phi": "echarlaix/tiny-random-PhiForCausalLM", - "phi3": "katuni4ka/tiny-random-phi3", + "phi3": "Xenova/tiny-random-Phi3ForCausalLM", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "qwen": "katuni4ka/tiny-random-qwen", "qwen2": "fxmarty/tiny-dummy-qwen2",