Merge branch 'main' into ns/whisper-quantization

huggingface · Dec 5, 2024 · 19e0a4f · 19e0a4f
2 parents 72bd28c + 958eb6e
commit 19e0a4f
Show file tree

Hide file tree

Showing 23 changed files with 1,172 additions and 882 deletions.
diff --git a/.github/workflows/test_inc.yml b/.github/workflows/test_inc.yml
@@ -18,7 +18,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        torch-version: ["2.4.*", "2.5.0"]
+        torch-version: ["2.4.0", "2.5.*"]
 
     runs-on: ubuntu-22.04
 
@@ -35,7 +35,7 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install torch==${{ matrix.torch-version }} torchaudio torchvision --index-url https://download.pytorch.org/whl/cpu
-          pip install .[neural-compressor,ipex,diffusers,peft,tests] transformers[testing] intel-extension-for-pytorch==${{ matrix.torch-version }}
+          pip install .[neural-compressor,diffusers,peft,tests] transformers[testing] intel-extension-for-pytorch==${{ matrix.torch-version }}
 
       - name: Assert versions
         run: |

diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml
@@ -18,8 +18,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        torch-version: ["2.2.0", "2.3.*", "2.4.*"]
-        transformers-version: ["4.39.0", "4.44.*"]
+        transformers-version: ["4.46.0", "4.46.3"]
+        torch-version: ["2.4.0", "2.5.*"]
 
     runs-on: ubuntu-22.04
 
@@ -38,10 +38,6 @@ jobs:
           pip install torch==${{ matrix.torch-version }} torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
           pip install .[ipex,tests] transformers[testing]==${{ matrix.transformers-version }} intel_extension_for_pytorch==${{ matrix.torch-version }}
 
-      - if: ${{ matrix.torch-version == '2.2.0' }}
-        name: Downgrade Numpy
-        run: pip install numpy==1.*
-
       - name: Assert versions
         run: |
           python -c "import torch; print(torch.__version__); assert torch.__version__.startswith('${{ matrix.torch-version }}'.replace('.*', ''))"
@@ -50,4 +46,4 @@ jobs:
 
       - name: Test with Pytest
         run: |
-          pytest tests/ipex
+          pytest tests/ipex
diff --git a/.github/workflows/test_openvino_full.yml b/.github/workflows/test_openvino_full.yml
@@ -0,0 +1,88 @@
+name: OpenVINO - Full Test
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "41 3 * * *" # run every day at 3:41
+  push:
+    branches:
+      - v*-release
+  pull_request:
+    types: [opened, synchronize, reopened, labeled]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') ||  (github.event_name == 'push') || contains( github.event.pull_request.labels.*.name, 'openvino-test') }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-stable"
+            nncf: "nncf-stable"
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-nightly"
+            nncf: "nncf-stable"
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-stable"
+            nncf: "nncf-develop"
+          - python-version: "3.9"
+            os: "ubuntu-22.04"
+            transformers-version: "latest"
+            openvino: "ov-nightly"
+            nncf: "nncf-develop"
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
+          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install .[tests]
+
+      - name: Install openvino-nightly
+        if: ${{ matrix.openvino == 'ov-nightly' }}
+        run: pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+
+      - name: Install openvino release
+        if: ${{ matrix.openvino == 'ov-stable' }}
+        run: pip install .[openvino]
+
+      - name: Install nncf develop
+        if: ${{ matrix.nncf == 'nncf-develop' }}
+        run: pip install git+https://github.com/openvinotoolkit/nncf.git
+
+      - name: Install nncf release
+        if: ${{ matrix.nncf == 'nncf-stable' }}
+        run: pip install .[nncf]
+
+      - name: Install the lowest compatible transformers version
+        if: ${{ matrix.transformers-version != 'latest' }}
+        run: pip install transformers==${{ matrix.transformers-version }}
+
+      - name: Pip freeze
+        run: pip freeze
+
+      - name: OpenVINO tests
+        run: pytest tests/openvino --durations=0
+        env:
+          RUN_SLOW: 1
+          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
diff --git a/.github/workflows/test_openvino_slow.yml b/.github/workflows/test_openvino_slow.yml
@@ -25,9 +25,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-22.04", "windows-2019"]
-        openvino-version: ["stable", "nightly"]
         transformers-version: ["4.36.0", "latest"]
-        nncf: ["nncf", "git+https://github.com/openvinotoolkit/nncf.git"]
 
     runs-on: ${{ matrix.os }}
 
@@ -47,11 +45,6 @@ jobs:
           pip install .[openvino,tests] transformers[testing]
           pip uninstall -y nncf
 
-      - if: ${{ matrix.openvino-version == 'nightly' }}
-        name: Install nightly OpenVINO
-        run: |
-          pip install openvino openvino-tokenizers --pre --upgrade --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-
       - if: ${{ matrix.transformers-version != 'latest' }}
         name: Downgrade Transformers and Accelerate
         run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
@@ -65,7 +58,11 @@ jobs:
 
       - name: Install dependencies (slow)
         run: |
-          pip install ${{ matrix.nncf }}
+          pip install .[nncf]
+
+      - if: ${{ matrix.transformers-version != 'latest' }}
+        name: Downgrade Transformers and Accelerate
+        run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
 
       - name: Test with Pytest (slow)
         run: |

diff --git a/docs/source/ipex/inference.mdx b/docs/source/ipex/inference.mdx
@@ -14,8 +14,8 @@ Optimum Intel can be used to load models from the [Hub](https://huggingface.co/m
 
 ## Loading
 
-You can load your model and apply IPEX optimizations (including weight prepacking and graph mode). For supported architectures like LLaMA, BERT and ViT, further optimizations will be applied by patching the model to use custom operators.
-For now, support is only enabled for CPUs and the original model will be exported via TorchScript. In the future `torch.compile` will be used and model exported via TorchScript will get deprecated.
+You can load your model and apply IPEX optimizations (apply torch.compile for non-generation tasks). For supported architectures like LLaMA, BERT and ViT, further optimizations will be applied by patching the model to use custom operators.
+For now, support is enabled for Intel CPU/GPU. Previous models converted to TorchScript will be deprecated in v1.22.
 
 ```diff
   import torch
@@ -25,7 +25,7 @@ For now, support is only enabled for CPUs and the original model will be exporte
 
   model_id = "gpt2"
 - model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
-+ model = IPEXModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, export=True)
++ model = IPEXModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
   tokenizer = AutoTokenizer.from_pretrained(model_id)
   pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
   results = pipe("He's a dreadful magician and")