From 937501c2f50abd4b2b76c056b2fd8cdd80cc02c5 Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Mon, 13 Jan 2025 16:41:43 +0000 Subject: [PATCH 1/2] ci: update Transformers to v4.49.0 Changes: * Benchmarking scripts are pruned from Transformers by v4.49.0 due to deprecation. So we don't need to test them anymore. * Some cuda specific tests were generalized to cover non-cuda devices which uncovered some issues. * Some new tests were added which fail for both cuda and xpu. * Few regressions due to changes on Transformers side Fixed tests: * https://github.com/huggingface/transformers/commit/b912f5ee438a1644247da13d789166ec77bb2304 * `tests/models/git/test_modeling_git.py::GitModelTest::test_inputs_embeds_matches_input_ids` * https://github.com/huggingface/transformers/commit/b5aaf875090388e2bbdbf2d8641ed7967365f435 * `tests/pipelines/test_pipelines_video_classification.py::VideoClassificationPipelineTests::test_small_model_pt` * `tests/test_pipeline_mixin.py::VideoClassificationPipelineTests::test_small_model_pt` * https://github.com/huggingface/transformers/commit/42c8ccfd4c466eedabc5e30ac34f242e2c9f9455 * `tests/generation/test_utils.py::GenerationIntegrationTests::test_generated_length_assisted_generation` * https://github.com/huggingface/transformers/commit/9fd123ac31b8597c5004be7986547914d5302069 * `test_model_parallelization` * `test_model_parallel_equal_results` Commits which added new tests (or enabled previously skipped tests) which fail: * https://github.com/huggingface/transformers/commit/23d782ead2fceec3e197c57de70489ccfc3bd0ee * `tests/pipelines/test_pipelines_text_generation.py::TextGenerationPipelineTests::test_return_dict_in_generate` * `tests/test_pipeline_mixin.py::TextGenerationPipelineTests::test_return_dict_in_generate` * https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b * `test_cpu_offload` (some of) * `test_disk_offload_bin` (some of) * `test_disk_offload_safetensors` (some of) * `tests/pipelines/test_pipelines_text_generation.py::TextGenerationPipelineTests::test_small_model_pt_bloom_accelerate` * https://github.com/huggingface/transformers/commit/be2ac0916a7902e1683d708805270142257a254a * `tests/models/paligemma/test_modeling_paligemma.py::PaliGemmaForConditionalGenerationModelTest::test_generate_compilation_all_outputs` * `tests/models/paligemma2/test_modeling_paligemma2.py::PaliGemma2ForConditionalGenerationModelTest::test_generate_compilation_all_outputs` * https://github.com/huggingface/transformers/issues/36340 * `tests/pipelines/test_pipelines_audio_classification.py::AudioClassificationPipelineTests::test_small_model_pt_fp16` * https://github.com/huggingface/transformers/commit/1fae54c7216e144b426e753400abdc1299d4fc74 * `tests/trainer/test_trainer.py::TrainerIntegrationPrerunTest::test_gradient_accumulation_loss_alignment_with_model_loss` * https://github.com/huggingface/transformers/commit/15ec971b8ec999c6a511debe04ba32c115fb7413 * `tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py::Qwen2_5_VLProcessorTest::test_chat_template_video_custom_sampling` * `tests/models/qwen2_5_vl/test_processor_qwen2_5_vl.py::Qwen2_5_VLProcessorTest::test_chat_template_video_special_processing` Regressions: * https://github.com/huggingface/transformers/commit/365fecb4d0b6c87f20b93561e11c3d4c77938012 * `tests/generation/test_utils.py::GenerationIntegrationTests::test_encoder_decoder_generate_attention_mask` * https://github.com/huggingface/transformers/commit/da334bcfa8ff7feb85138ce90ca7340e4fc6e704 * `tests/generation/test_utils.py::GenerationIntegrationTests::test_generate_input_features_as_encoder_kwarg` * https://github.com/huggingface/transformers/commit/bcfc9d795e1330faaa8b39ffa18732f8b40fe7c0 * `tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_config` * https://github.com/huggingface/transformers/issues/36267 * `tests/utils/test_import_utils.py` * https://github.com/huggingface/transformers/issues/36267 * `tests/models/marian/test_modeling_marian.py` Signed-off-by: Dmitry Rogozhkin --- .github/scripts/check-transformers.py | 130 +++++++++++++++++----- .github/workflows/_linux_transformers.yml | 32 +++--- 2 files changed, 116 insertions(+), 46 deletions(-) diff --git a/.github/scripts/check-transformers.py b/.github/scripts/check-transformers.py index daf6f2056..ef76f4b36 100644 --- a/.github/scripts/check-transformers.py +++ b/.github/scripts/check-transformers.py @@ -7,13 +7,16 @@ parser.add_argument('junitxml', nargs='+') args = parser.parse_args() -benchmark_failures = { - 'link': 'https://github.com/huggingface/transformers/pull/35620', +layernorm_accuracy_failures = { + 'link': 'https://github.com/pytorch/pytorch/issues/141642', 'cuda': 'passed', } -layernorm_accuracy_failures = { - 'link': 'https://github.com/pytorch/pytorch/issues/141642', +# Tests were enabled for non-cuda backends by v4.49.0 (previously were +# skipped for xpu): +# https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b +test_cpu_offload_failures = { + 'link': 'https://github.com/huggingface/accelerate/issues/3402', 'cuda': 'passed', } @@ -30,21 +33,6 @@ # } # Use None if no metadata is needed. failing_cases = { - 'tests.benchmark.test_benchmark.BenchmarkTest': { - 'test_inference_encoder_decoder_with_configs': benchmark_failures, - 'test_inference_fp16': benchmark_failures, - 'test_inference_no_configs': benchmark_failures, - 'test_inference_no_configs_only_pretrain': benchmark_failures, - 'test_inference_no_model_no_architectures': benchmark_failures, - 'test_inference_torchscript': benchmark_failures, - 'test_inference_with_configs': benchmark_failures, - 'test_save_csv_files': benchmark_failures, - 'test_trace_memory': benchmark_failures, - 'test_train_encoder_decoder_with_configs': benchmark_failures, - 'test_train_no_configs': benchmark_failures, - 'test_train_no_configs_fp16': benchmark_failures, - 'test_train_with_configs': benchmark_failures, - }, 'tests.generation.test_logits_process.LogitsProcessorTest': { 'test_watermarking_processor': { 'cuda': 'passed', }, }, @@ -54,10 +42,15 @@ 'test_assisted_generation_early_exit': { 'cuda': 'failed', }, 'test_custom_logits_processor': { 'cuda': 'failed', }, 'test_default_max_length_warning': { 'cuda': 'failed', }, + # v4.49.0+ (regression) + # https://github.com/huggingface/transformers/commit/365fecb4d0b6c87f20b93561e11c3d4c77938012 + 'test_encoder_decoder_generate_attention_mask': { 'cuda': 'failed', }, 'test_eos_token_id_int_and_list_beam_search': { 'cuda': 'failed', }, 'test_eos_token_id_int_and_list_top_k_top_sampling': { 'cuda': 'failed', }, 'test_generate_compile_fullgraph_tiny': { 'cuda': 'failed', }, - 'test_generated_length_assisted_generation': { 'cuda': 'failed', }, + # v4.49.0+ (regression) + # https://github.com/huggingface/transformers/commit/da334bcfa8ff7feb85138ce90ca7340e4fc6e704 + 'test_generate_input_features_as_encoder_kwarg': { 'cuda': 'failed' }, 'test_max_new_tokens_encoder_decoder': { 'cuda': 'failed', }, 'test_min_length_if_input_embeds': { 'cuda': 'passed' }, 'test_model_kwarg_assisted_decoding_decoder_only': { 'cuda': 'failed' }, @@ -66,6 +59,21 @@ 'test_prepare_inputs_for_generation_decoder_llm': { 'cuda': 'failed' }, 'test_stop_sequence_stopping_criteria': { 'cuda': 'failed' }, }, + 'tests.models.blip.test_modeling_blip.BlipTextImageModelTest': { + 'test_cpu_offload': test_cpu_offload_failures, + 'test_disk_offload_bin': test_cpu_offload_failures, + 'test_disk_offload_safetensors': test_cpu_offload_failures, + }, + 'tests.models.blip.test_modeling_blip.BlipVQAModelTest': { + 'test_cpu_offload': test_cpu_offload_failures, + 'test_disk_offload_bin': test_cpu_offload_failures, + 'test_disk_offload_safetensors': test_cpu_offload_failures, + }, + 'tests.models.dab_detr.test_modeling_dab_detr.DabDetrModelTest': { + 'test_cpu_offload': test_cpu_offload_failures, + 'test_disk_offload_bin': test_cpu_offload_failures, + 'test_disk_offload_safetensors': test_cpu_offload_failures, + }, 'tests.models.detr.test_image_processing_detr.DetrImageProcessingTest': { 'test_fast_is_faster_than_slow': { 'flaky': True }, }, @@ -75,28 +83,70 @@ 'tests.models.encoder_decoder.test_modeling_encoder_decoder.BartEncoderDecoderModelTest': { 'test_save_and_load_from_pretrained': { 'flaky': True }, }, - 'tests.models.fuyu.test_modeling_fuyu.FuyuModelTest': { - 'test_prompt_lookup_decoding_matches_greedy_search': { 'flaky': True }, - }, 'tests.models.git.test_modeling_git.GitModelTest': { 'test_generate_continue_from_past_key_values': { 'flaky': True, 'cuda': 'passed' }, - 'test_inputs_embeds_matches_input_ids': { 'cuda': 'passed' }, }, 'tests.models.hiera.test_modeling_hiera.HieraModelTest': { 'test_torch_fx': layernorm_accuracy_failures, 'test_torch_fx_output_loss': layernorm_accuracy_failures, }, + 'tests.models.llava.test_modeling_llava.LlavaForConditionalGenerationModelTest': { + # v4.49.0+ (regression) + # https://github.com/huggingface/transformers/commit/bcfc9d795e1330faaa8b39ffa18732f8b40fe7c0 + 'test_config': { 'cuda': 'failed' }, + }, 'tests.models.mamba.test_modeling_mamba.MambaIntegrationTests': { 'test_simple_generate_1_cpu': { 'cuda': 'passed' }, }, + # v4.49.0 (new test) + # https://github.com/huggingface/transformers/commit/be2ac0916a7902e1683d708805270142257a254a + 'tests.models.paligemma.test_modeling_paligemma.PaliGemmaForConditionalGenerationModelTest': { + 'test_generate_compilation_all_outputs': { 'cuda': 'failed' }, + }, + # v4.49.0 (new test) + # https://github.com/huggingface/transformers/commit/be2ac0916a7902e1683d708805270142257a254a + 'tests.models.paligemma2.test_modeling_paligemma2.PaliGemma2ForConditionalGenerationModelTest': { + 'test_generate_compilation_all_outputs': { 'cuda': 'failed' }, + }, 'tests.models.pix2struct.test_modeling_pix2struct.Pix2StructModelTest': { 'test_new_cache_format_0': { 'cuda': 'passed' }, 'test_new_cache_format_1': { 'cuda': 'passed' }, 'test_new_cache_format_2': { 'cuda': 'passed' }, }, + 'tests.models.qwen2_5_vl.test_processor_qwen2_5_vl.Qwen2_5_VLProcessorTest': { + # v4.49.0+ (new test) + # https://github.com/huggingface/transformers/commit/15ec971b8ec999c6a511debe04ba32c115fb7413 + 'test_chat_template_video_custom_sampling': { 'cuda': 'failed' }, + # v4.49.0+ (new test) + # https://github.com/huggingface/transformers/commit/15ec971b8ec999c6a511debe04ba32c115fb7413 + 'test_chat_template_video_special_processing': { 'cuda': 'failed' }, + }, + 'tests.models.qwen2_vl.test_processor_qwen2_vl.Qwen2VLProcessorTest': { + 'test_chat_template_video_custom_sampling': { 'cuda': 'failed' }, + 'test_chat_template_video_special_processing': { 'cuda': 'failed' }, + }, + # different failure signature than described in 'test_cpu_offload_failures' + 'tests.models.roberta.test_modeling_roberta.RobertaModelTest': { + 'test_cpu_offload': { 'cuda': 'failed' }, + 'test_disk_offload_bin': { 'cuda': 'failed' }, + 'test_disk_offload_safetensors': { 'cuda': 'failed' }, + }, + 'tests.models.rt_detr.test_image_processing_rt_detr.RtDetrImageProcessingTest': { + 'test_fast_is_faster_than_slow': { 'flaky': True }, + }, 'tests.models.speecht5.test_modeling_speecht5.SpeechT5ForTextToSpeechIntegrationTests': { 'test_batch_generation': { 'cuda': 'passed' }, }, + 'tests.models.vilt.test_modeling_vilt.ViltModelTest': { + 'test_cpu_offload': test_cpu_offload_failures, + 'test_disk_offload_bin': test_cpu_offload_failures, + 'test_disk_offload_safetensors': test_cpu_offload_failures, + }, + 'tests.pipelines.test_pipelines_audio_classification.AudioClassificationPipelineTests': { + # v4.49.0+ (new test) + # https://github.com/huggingface/transformers/commit/f19135afc77053834f1b0cdf46d9a6bf7faf7cc3 + 'test_small_model_pt_fp16': { 'cuda': "failed", 'link': 'https://github.com/huggingface/transformers/issues/36340' }, + }, 'tests.pipelines.test_pipelines_automatic_speech_recognition.AutomaticSpeechRecognitionPipelineTests': { 'test_small_model_pt_seq2seq': { 'cuda': "failed" }, }, @@ -113,12 +163,17 @@ 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.pipelines.test_pipelines_text_generation.TextGenerationPipelineTests': { + # v4.49.0+ (new test) + # https://github.com/huggingface/transformers/commit/23d782ead2fceec3e197c57de70489ccfc3bd0ee + 'test_return_dict_in_generate': { 'cuda': "failed" }, + # v4.47.0+ 'test_small_model_pt': { 'cuda': "failed" }, + # v4.49.0+ (generalized for non-cuda devices) + # https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b + 'test_small_model_pt_bloom_accelerate': { 'cuda': "failed" }, + # v4.47.0+ 'test_stop_sequence_stopping_criteria': { 'cuda': "failed" }, }, - 'tests.pipelines.test_pipelines_video_classification.VideoClassificationPipelineTests': { - 'test_small_model_pt': { 'cuda': "failed" }, - }, 'tests.pipelines.test_pipelines_visual_question_answering.VisualQuestionAnsweringPipelineTests': { 'test_small_model_pt_blip2': { 'cuda': "failed" }, }, @@ -126,6 +181,11 @@ 'test_small_model_pt': { 'cuda': "failed" }, 'test_small_model_pt_fp16': { 'cuda': "failed" }, }, + 'tests.test_pipeline_mixin.AudioClassificationPipelineTests': { + # v4.49.0+ (new test) + # https://github.com/huggingface/transformers/commit/f19135afc77053834f1b0cdf46d9a6bf7faf7cc3 + 'test_small_model_pt_fp16': { 'cuda': "failed", 'link': 'https://github.com/huggingface/transformers/issues/36340' }, + }, 'tests.test_pipeline_mixin.AutomaticSpeechRecognitionPipelineTests': { 'test_small_model_pt_seq2seq': { 'cuda': "failed" }, }, @@ -139,12 +199,17 @@ 'test_small_model_pt': { 'cuda': "failed" }, }, 'tests.test_pipeline_mixin.TextGenerationPipelineTests': { + # v4.49.0+ (new test) + # https://github.com/huggingface/transformers/commit/23d782ead2fceec3e197c57de70489ccfc3bd0ee + 'test_return_dict_in_generate': { 'cuda': "failed" }, + # v4.47.0+ 'test_small_model_pt': { 'cuda': "failed" }, + # v4.49.0+ (generalized for non-cuda devices) + # https://github.com/huggingface/transformers/commit/2fa876d2d824123b80ced9d689f75a153731769b + 'test_small_model_pt_bloom_accelerate': { 'cuda': "failed" }, + # v4.47.0+ 'test_stop_sequence_stopping_criteria': { 'cuda': "failed" }, }, - 'tests.test_pipeline_mixin.VideoClassificationPipelineTests': { - 'test_small_model_pt': { 'cuda': "failed" }, - }, 'tests.test_pipeline_mixin.VisualQuestionAnsweringPipelineTests': { 'test_small_model_pt_blip2': { 'cuda': "failed" }, }, @@ -152,6 +217,11 @@ 'test_small_model_pt': { 'cuda': "failed" }, 'test_small_model_pt_fp16': { 'cuda': "failed" }, }, + 'tests.trainer.test_trainer.TrainerIntegrationPrerunTest': { + # v4.49.0+ (promoted from slow tests) + # https://github.com/huggingface/transformers/commit/1fae54c7216e144b426e753400abdc1299d4fc74 + 'test_gradient_accumulation_loss_alignment_with_model_loss': { 'cuda': "failed" }, + }, } new_failures = [] diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml index 32e23d19b..462d7d965 100644 --- a/.github/workflows/_linux_transformers.yml +++ b/.github/workflows/_linux_transformers.yml @@ -41,7 +41,7 @@ on: transformers: required: false type: string - default: 'v4.47.0' + default: 'v4.49.0' description: Transformers version permissions: read-all @@ -56,7 +56,7 @@ jobs: DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }} python: ${{ inputs.python != '' && inputs.python || '3.10' }} pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }} - transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.47.0' }} + transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.49.0' }} PYTORCH_DEBUG_XPU_FALLBACK: '1' TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py' steps: @@ -144,7 +144,11 @@ jobs: run: | source activate $CONDA_ENV_NAME cd transformers - python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml -k backbone tests || \ + # Excluding tests due to: + # * https://github.com/huggingface/transformers/issues/36267 (marian tests) + python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \ + --ignore=tests/models/marian/test_modeling_marian.py \ + -k backbone tests || \ (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV) - name: Run tests/*.py env: @@ -153,13 +157,6 @@ jobs: source activate $CONDA_ENV_NAME cd transformers python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/*.py || true - - name: Run tests/benchmark - env: - TEST_CASE: 'tests_benchmark' - run: | - source activate $CONDA_ENV_NAME - cd transformers - python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/benchmark || true - name: Run tests/generation env: TEST_CASE: 'tests_generation' @@ -177,14 +174,14 @@ jobs: source activate $CONDA_ENV_NAME cd transformers # Excluding tests due to: - # * https://github.com/huggingface/transformers/issues/35252 (CUDA specific tests) # * https://github.com/pytorch/pytorch/issues/140965 (aten::_linalg_eigvals) + # * https://github.com/huggingface/transformers/issues/36267 (marian tests) pattern=" \ - not test_model_parallelization and \ - not test_model_parallel_equal_results and \ not test_resize_embeddings_untied and \ not test_resize_tokens_embeddings" - python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml tests/models -k "$pattern" || true + python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \ + -k "$pattern" --ignore=tests/models/marian/test_modeling_marian.py \ + tests/models || true - name: Run tests/pipelines env: TEST_CASE: 'tests_pipelines' @@ -219,8 +216,11 @@ jobs: # Excluding tests due to: # * Network proxy connection issue, reason unknown pattern="not test_load_img_url_timeout" - python3 -m pytest -rsf --make-reports=$TEST_CASE tests/utils --junit-xml=reports/$TEST_CASE.xml -k "$pattern" || \ - (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV) + # 'tests/utils/test_import_utils.py' invalidates state of the test engine causing + # next tests to fail. See: https://github.com/huggingface/transformers/issues/36267 + python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \ + -k "$pattern" --ignore=tests/utils/test_import_utils.py \ + tests/utils || (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV) - name: Check for errors in tests run: | FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//') From 93a13666d18df966a4b2ce2589f72ddead26ecce Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Wed, 26 Feb 2025 00:26:19 +0000 Subject: [PATCH 2/2] ci: rely on check-transformers.py to test status Signed-off-by: Dmitry Rogozhkin --- .github/workflows/_linux_transformers.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml index 462d7d965..e2c1f85b5 100644 --- a/.github/workflows/_linux_transformers.yml +++ b/.github/workflows/_linux_transformers.yml @@ -148,8 +148,7 @@ jobs: # * https://github.com/huggingface/transformers/issues/36267 (marian tests) python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \ --ignore=tests/models/marian/test_modeling_marian.py \ - -k backbone tests || \ - (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV) + -k backbone tests || true - name: Run tests/*.py env: TEST_CASE: 'tests_py' @@ -206,7 +205,7 @@ jobs: not TestTrainerDistributedXPU and \ not TestFSDPTrainer" python3 -m pytest -rsf --make-reports=$TEST_CASE tests/trainer --junit-xml=reports/$TEST_CASE.xml -k "$pattern" || \ - (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV) + true - name: Run tests/utils env: TEST_CASE: 'tests_utils' @@ -220,12 +219,9 @@ jobs: # next tests to fail. See: https://github.com/huggingface/transformers/issues/36267 python3 -m pytest -rsf --make-reports=$TEST_CASE --junit-xml=reports/$TEST_CASE.xml \ -k "$pattern" --ignore=tests/utils/test_import_utils.py \ - tests/utils || (echo "FAILED_CASES=$FAILED_CASES,$TEST_CASE" >> $GITHUB_ENV) + tests/utils || true - name: Check for errors in tests run: | - FAILED_CASES=$(echo $FAILED_CASES | sed 's/^,//') - echo "Failed cases: [$(echo $FAILED_CASES | sed 's/,/, /g')]" - test -z "$FAILED_CASES" source activate $CONDA_ENV_NAME python3 torch-xpu-ops/.github/scripts/check-transformers.py transformers/reports/*.xml - name: Clean HF home directory and cache