intel · dvrogozh · Jan 13, 2025 · Feb 26, 2025
diff --git a/.github/scripts/check-transformers.py b/.github/scripts/check-transformers.py
@@ -7,13 +7,16 @@
 parser.add_argument('junitxml', nargs='+')
 args = parser.parse_args()
 
-benchmark_failures = {
-    'link': 'https://github.com/huggingface/transformers/pull/35620',
+layernorm_accuracy_failures = {
+    'link': 'https://github.com/pytorch/pytorch/issues/141642',
     'cuda': 'passed',
 }
 
-layernorm_accuracy_failures = {
-    'link': 'https://github.com/pytorch/pytorch/issues/141642',
+# Tests were enabled for non-cuda backends by v4.49.0 (previously were
+# skipped for xpu):
+# https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b
+test_cpu_offload_failures = {
+    'link': 'https://github.com/huggingface/accelerate/issues/3402',
     'cuda': 'passed',
 }
 
@@ -30,21 +33,6 @@
 #   }
 # Use None if no metadata is needed.
 failing_cases = {
-    'tests.benchmark.test_benchmark.BenchmarkTest': {
-        'test_inference_encoder_decoder_with_configs': benchmark_failures,
-        'test_inference_fp16': benchmark_failures,
-        'test_inference_no_configs': benchmark_failures,
-        'test_inference_no_configs_only_pretrain': benchmark_failures,
-        'test_inference_no_model_no_architectures': benchmark_failures,
-        'test_inference_torchscript': benchmark_failures,
-        'test_inference_with_configs': benchmark_failures,
-        'test_save_csv_files': benchmark_failures,
-        'test_trace_memory': benchmark_failures,
-        'test_train_encoder_decoder_with_configs': benchmark_failures,
-        'test_train_no_configs': benchmark_failures,
-        'test_train_no_configs_fp16': benchmark_failures,
-        'test_train_with_configs': benchmark_failures,
-    },
     'tests.generation.test_logits_process.LogitsProcessorTest': {
         'test_watermarking_processor': { 'cuda': 'passed', },
     },
@@ -54,10 +42,15 @@
         'test_assisted_generation_early_exit': { 'cuda': 'failed', },
         'test_custom_logits_processor': { 'cuda': 'failed', },
         'test_default_max_length_warning': { 'cuda': 'failed', },
+        # v4.49.0+ (regression)
+        # https://github.com/huggingface/transformers/commit/365fecb4d0b6c87f20b93561e11c3d4c77938012
+        'test_encoder_decoder_generate_attention_mask': { 'cuda': 'failed', },
         'test_eos_token_id_int_and_list_beam_search': { 'cuda': 'failed', },
         'test_eos_token_id_int_and_list_top_k_top_sampling': { 'cuda': 'failed', },
         'test_generate_compile_fullgraph_tiny': { 'cuda': 'failed', },
-        'test_generated_length_assisted_generation': { 'cuda': 'failed', },
+        # v4.49.0+ (regression)
+        # https://github.com/huggingface/transformers/commit/da334bcfa8ff7feb85138ce90ca7340e4fc6e704
+        'test_generate_input_features_as_encoder_kwarg': { 'cuda': 'failed' },
         'test_max_new_tokens_encoder_decoder': { 'cuda': 'failed', },
         'test_min_length_if_input_embeds': { 'cuda': 'passed' },
         'test_model_kwarg_assisted_decoding_decoder_only': { 'cuda': 'failed' },
@@ -66,6 +59,21 @@
         'test_prepare_inputs_for_generation_decoder_llm': { 'cuda': 'failed' },
         'test_stop_sequence_stopping_criteria': { 'cuda': 'failed' },
     },
+    'tests.models.blip.test_modeling_blip.BlipTextImageModelTest': {
+        'test_cpu_offload': test_cpu_offload_failures,
+        'test_disk_offload_bin': test_cpu_offload_failures,
+        'test_disk_offload_safetensors': test_cpu_offload_failures,
+    },
+    'tests.models.blip.test_modeling_blip.BlipVQAModelTest': {
+        'test_cpu_offload': test_cpu_offload_failures,
+        'test_disk_offload_bin': test_cpu_offload_failures,
+        'test_disk_offload_safetensors': test_cpu_offload_failures,
+    },
+    'tests.models.dab_detr.test_modeling_dab_detr.DabDetrModelTest': {
+        'test_cpu_offload': test_cpu_offload_failures,
+        'test_disk_offload_bin': test_cpu_offload_failures,
+        'test_disk_offload_safetensors': test_cpu_offload_failures,
+    },
     'tests.models.detr.test_image_processing_detr.DetrImageProcessingTest': {
         'test_fast_is_faster_than_slow': { 'flaky': True },
     },
@@ -75,28 +83,70 @@
     'tests.models.encoder_decoder.test_modeling_encoder_decoder.BartEncoderDecoderModelTest': {
         'test_save_and_load_from_pretrained': { 'flaky': True },
     },
-    'tests.models.fuyu.test_modeling_fuyu.FuyuModelTest': {
-        'test_prompt_lookup_decoding_matches_greedy_search': { 'flaky': True },
-    },
     'tests.models.git.test_modeling_git.GitModelTest': {
         'test_generate_continue_from_past_key_values': { 'flaky': True, 'cuda': 'passed' },
-        'test_inputs_embeds_matches_input_ids': { 'cuda': 'passed' },
     },
     'tests.models.hiera.test_modeling_hiera.HieraModelTest': {
         'test_torch_fx': layernorm_accuracy_failures,
         'test_torch_fx_output_loss': layernorm_accuracy_failures,
     },
+    'tests.models.llava.test_modeling_llava.LlavaForConditionalGenerationModelTest': {
+        # v4.49.0+ (regression)
+        # https://github.com/huggingface/transformers/commit/bcfc9d795e1330faaa8b39ffa18732f8b40fe7c0
+        'test_config': { 'cuda': 'failed' },
+    },
     'tests.models.mamba.test_modeling_mamba.MambaIntegrationTests': {
         'test_simple_generate_1_cpu': { 'cuda': 'passed' },
     },
+    # v4.49.0 (new test)
+    # https://github.com/huggingface/transformers/commit/be2ac0916a7902e1683d708805270142257a254a
+    'tests.models.paligemma.test_modeling_paligemma.PaliGemmaForConditionalGenerationModelTest': {
+        'test_generate_compilation_all_outputs': { 'cuda': 'failed' },
+    },
+    # v4.49.0 (new test)
+    # https://github.com/huggingface/transformers/commit/be2ac0916a7902e1683d708805270142257a254a
+    'tests.models.paligemma2.test_modeling_paligemma2.PaliGemma2ForConditionalGenerationModelTest': {
+        'test_generate_compilation_all_outputs': { 'cuda': 'failed' },
+    },
     'tests.models.pix2struct.test_modeling_pix2struct.Pix2StructModelTest': {
         'test_new_cache_format_0': { 'cuda': 'passed' },
         'test_new_cache_format_1': { 'cuda': 'passed' },
         'test_new_cache_format_2': { 'cuda': 'passed' },
     },
+    'tests.models.qwen2_5_vl.test_processor_qwen2_5_vl.Qwen2_5_VLProcessorTest': {
+        # v4.49.0+ (new test)
+        # https://github.com/huggingface/transformers/commit/15ec971b8ec999c6a511debe04ba32c115fb7413
+        'test_chat_template_video_custom_sampling': { 'cuda': 'failed' },
+        # v4.49.0+ (new test)
+        # https://github.com/huggingface/transformers/commit/15ec971b8ec999c6a511debe04ba32c115fb7413
+        'test_chat_template_video_special_processing': { 'cuda': 'failed' },
+    },
+    'tests.models.qwen2_vl.test_processor_qwen2_vl.Qwen2VLProcessorTest': {
+        'test_chat_template_video_custom_sampling': { 'cuda': 'failed' },
+        'test_chat_template_video_special_processing': { 'cuda': 'failed' },
+    },
+    # different failure signature than described in 'test_cpu_offload_failures'
+    'tests.models.roberta.test_modeling_roberta.RobertaModelTest': {
+        'test_cpu_offload': { 'cuda': 'failed' },
+        'test_disk_offload_bin': { 'cuda': 'failed' },
+        'test_disk_offload_safetensors': { 'cuda': 'failed' },
+    },
+    'tests.models.rt_detr.test_image_processing_rt_detr.RtDetrImageProcessingTest': {
+        'test_fast_is_faster_than_slow': { 'flaky': True },
+    },
     'tests.models.speecht5.test_modeling_speecht5.SpeechT5ForTextToSpeechIntegrationTests': {
         'test_batch_generation': { 'cuda': 'passed' },
     },
+    'tests.models.vilt.test_modeling_vilt.ViltModelTest': {
+        'test_cpu_offload': test_cpu_offload_failures,
+        'test_disk_offload_bin': test_cpu_offload_failures,
+        'test_disk_offload_safetensors': test_cpu_offload_failures,
+    },
+    'tests.pipelines.test_pipelines_audio_classification.AudioClassificationPipelineTests': {
+        # v4.49.0+ (new test)
+        # https://github.com/huggingface/transformers/commit/f19135afc77053834f1b0cdf46d9a6bf7faf7cc3
+        'test_small_model_pt_fp16': { 'cuda': "failed", 'link': 'https://github.com/huggingface/transformers/issues/36340' },
+    },
     'tests.pipelines.test_pipelines_automatic_speech_recognition.AutomaticSpeechRecognitionPipelineTests': {
         'test_small_model_pt_seq2seq': { 'cuda': "failed" },
     },
@@ -113,19 +163,29 @@
         'test_small_model_pt': { 'cuda': "failed" },
     },
     'tests.pipelines.test_pipelines_text_generation.TextGenerationPipelineTests': {
+        # v4.49.0+ (new test)
+        # https://github.com/huggingface/transformers/commit/23d782ead2fceec3e197c57de70489ccfc3bd0ee
+        'test_return_dict_in_generate': { 'cuda': "failed" },
+        # v4.47.0+
         'test_small_model_pt': { 'cuda': "failed" },
+        # v4.49.0+ (generalized for non-cuda devices)
+        # https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b
+        'test_small_model_pt_bloom_accelerate': { 'cuda': "failed" },
+        # v4.47.0+
         'test_stop_sequence_stopping_criteria': { 'cuda': "failed" },
     },
-    'tests.pipelines.test_pipelines_video_classification.VideoClassificationPipelineTests': {
-        'test_small_model_pt': { 'cuda': "failed" },
-    },
     'tests.pipelines.test_pipelines_visual_question_answering.VisualQuestionAnsweringPipelineTests': {
         'test_small_model_pt_blip2': { 'cuda': "failed" },
     },
     'tests.pipelines.test_pipelines_zero_shot_image_classification.ZeroShotImageClassificationPipelineTests': {
         'test_small_model_pt': { 'cuda': "failed" },
         'test_small_model_pt_fp16': { 'cuda': "failed" },
     },
+    'tests.test_pipeline_mixin.AudioClassificationPipelineTests': {
+        # v4.49.0+ (new test)
+        # https://github.com/huggingface/transformers/commit/f19135afc77053834f1b0cdf46d9a6bf7faf7cc3
+        'test_small_model_pt_fp16': { 'cuda': "failed", 'link': 'https://github.com/huggingface/transformers/issues/36340' },
+    },
     'tests.test_pipeline_mixin.AutomaticSpeechRecognitionPipelineTests': {
         'test_small_model_pt_seq2seq': { 'cuda': "failed" },
     },
@@ -139,19 +199,29 @@
         'test_small_model_pt': { 'cuda': "failed" },
     },
     'tests.test_pipeline_mixin.TextGenerationPipelineTests': {
+        # v4.49.0+ (new test)
+        # https://github.com/huggingface/transformers/commit/23d782ead2fceec3e197c57de70489ccfc3bd0ee
+        'test_return_dict_in_generate': { 'cuda': "failed" },
+        # v4.47.0+
         'test_small_model_pt': { 'cuda': "failed" },
+        # v4.49.0+ (generalized for non-cuda devices)
+        # https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b
+        'test_small_model_pt_bloom_accelerate': { 'cuda': "failed" },
+        # v4.47.0+
         'test_stop_sequence_stopping_criteria': { 'cuda': "failed" },
     },
-    'tests.test_pipeline_mixin.VideoClassificationPipelineTests': {
-        'test_small_model_pt': { 'cuda': "failed" },
-    },
     'tests.test_pipeline_mixin.VisualQuestionAnsweringPipelineTests': {
         'test_small_model_pt_blip2': { 'cuda': "failed" },
     },
     'tests.test_pipeline_mixin.ZeroShotImageClassificationPipelineTests': {
         'test_small_model_pt': { 'cuda': "failed" },
         'test_small_model_pt_fp16': { 'cuda': "failed" },
     },
+    'tests.trainer.test_trainer.TrainerIntegrationPrerunTest': {
+        # v4.49.0+ (promoted from slow tests)
+        # https://github.com/huggingface/transformers/commit/1fae54c7216e144b426e753400abdc1299d4fc74
+        'test_gradient_accumulation_loss_alignment_with_model_loss': { 'cuda': "failed" },
+    },
 }
 
 new_failures = []

diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml
@@ -41,7 +41,7 @@ on:
       transformers:
         required: false
         type: string
-        default: 'v4.47.0'
+        default: 'v4.49.0'
         description: Transformers version
 
 permissions: read-all
@@ -56,7 +56,7 @@ jobs:
       DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
       python: ${{ inputs.python != '' && inputs.python || '3.10' }}
       pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
-      transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }}
+      transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.49.0' }}
       PYTORCH_DEBUG_XPU_FALLBACK: '1'
       TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py'
     steps:
@@ -144,22 +144,18 @@ jobs:
         run: |
           source activate $CONDA_ENV_NAME
           cd transformers
-          python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml -k backbone tests || \
-            (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
+          # Excluding tests due to:
+          # * https://github.com/huggingface/transformers/issues/36267 (marian tests)
+          python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \
+            --ignore=tests/models/marian/test_modeling_marian.py \
+            -k backbone tests || true
       - name: Run tests/*.py
         env:
           TEST_CASE: 'tests_py'
         run: |
           source activate $CONDA_ENV_NAME
           cd transformers
           python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/*.py || true
-      - name: Run tests/benchmark
-        env:
-          TEST_CASE: 'tests_benchmark'
-        run: |
-          source activate $CONDA_ENV_NAME
-          cd transformers
-          python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/benchmark || true
       - name: Run tests/generation
         env:
           TEST_CASE: 'tests_generation'
@@ -177,14 +173,14 @@ jobs:
           source activate $CONDA_ENV_NAME
           cd transformers
           # Excluding tests due to:
-          # * https://github.com/huggingface/transformers/issues/35252 (CUDA specific tests)
           # * https://github.com/pytorch/pytorch/issues/140965 (aten::_linalg_eigvals)
+          # * https://github.com/huggingface/transformers/issues/36267 (marian tests)
           pattern=" \
-            not test_model_parallelization and \
-            not test_model_parallel_equal_results and \
             not test_resize_embeddings_untied and \
             not test_resize_tokens_embeddings"
-          python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/models -k "$pattern" || true
+          python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \
+            -k "$pattern" --ignore=tests/models/marian/test_modeling_marian.py \
+            tests/models || true
       - name: Run tests/pipelines
         env:
           TEST_CASE: 'tests_pipelines'
@@ -209,7 +205,7 @@ jobs:
             not TestTrainerDistributedXPU and \
             not TestFSDPTrainer"
           python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer --junit-xml=reports/$TEST_CASE.xml -k "$pattern" || \
-            (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
+            true
       - name: Run tests/utils
         env:
           TEST_CASE: 'tests_utils'
@@ -219,13 +215,13 @@ jobs:
           # Excluding tests due to:
           # * Network proxy connection issue, reason unknown
           pattern="not test_load_img_url_timeout"
-          python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils --junit-xml=reports/$TEST_CASE.xml -k "$pattern" || \
-            (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV)
+          # 'tests/utils/test_import_utils.py' invalidates state of the test engine causing
+          # next tests to fail. See: https://github.com/huggingface/transformers/issues/36267
+          python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \
+            -k "$pattern" --ignore=tests/utils/test_import_utils.py \
+            tests/utils || true
       - name: Check for errors in tests
         run: |
-          FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//')
-          echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]"
-          test -z "$FAILED_CASES"
           source activate $CONDA_ENV_NAME
           python3 torch-xpu-ops/.github/scripts/check-transformers.py transformers/reports/*.xml
       - name: Clean HF home directory and cache