Skip to content

Commit

Permalink
Merge branch 'main' into ns/whisper-quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
nikita-savelyevv committed Dec 5, 2024
2 parents 72bd28c + 958eb6e commit 19e0a4f
Show file tree
Hide file tree
Showing 23 changed files with 1,172 additions and 882 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test_inc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
torch-version: ["2.4.*", "2.5.0"]
torch-version: ["2.4.0", "2.5.*"]

runs-on: ubuntu-22.04

Expand All @@ -35,7 +35,7 @@ jobs:
run: |
pip install --upgrade pip
pip install torch==${{ matrix.torch-version }} torchaudio torchvision --index-url https://download.pytorch.org/whl/cpu
pip install .[neural-compressor,ipex,diffusers,peft,tests] transformers[testing] intel-extension-for-pytorch==${{ matrix.torch-version }}
pip install .[neural-compressor,diffusers,peft,tests] transformers[testing] intel-extension-for-pytorch==${{ matrix.torch-version }}
- name: Assert versions
run: |
Expand Down
10 changes: 3 additions & 7 deletions .github/workflows/test_ipex.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ jobs:
strategy:
fail-fast: false
matrix:
torch-version: ["2.2.0", "2.3.*", "2.4.*"]
transformers-version: ["4.39.0", "4.44.*"]
transformers-version: ["4.46.0", "4.46.3"]
torch-version: ["2.4.0", "2.5.*"]

runs-on: ubuntu-22.04

Expand All @@ -38,10 +38,6 @@ jobs:
pip install torch==${{ matrix.torch-version }} torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
pip install .[ipex,tests] transformers[testing]==${{ matrix.transformers-version }} intel_extension_for_pytorch==${{ matrix.torch-version }}
- if: ${{ matrix.torch-version == '2.2.0' }}
name: Downgrade Numpy
run: pip install numpy==1.*

- name: Assert versions
run: |
python -c "import torch; print(torch.__version__); assert torch.__version__.startswith('${{ matrix.torch-version }}'.replace('.*', ''))"
Expand All @@ -50,4 +46,4 @@ jobs:
- name: Test with Pytest
run: |
pytest tests/ipex
pytest tests/ipex
88 changes: 88 additions & 0 deletions .github/workflows/test_openvino_full.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
name: OpenVINO - Full Test

on:
workflow_dispatch:
schedule:
- cron: "41 3 * * *" # run every day at 3:41
push:
branches:
- v*-release
pull_request:
types: [opened, synchronize, reopened, labeled]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
build:
if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || (github.event_name == 'push') || contains( github.event.pull_request.labels.*.name, 'openvino-test') }}
strategy:
fail-fast: false
matrix:
include:
- python-version: "3.9"
os: "ubuntu-22.04"
transformers-version: "latest"
openvino: "ov-stable"
nncf: "nncf-stable"
- python-version: "3.9"
os: "ubuntu-22.04"
transformers-version: "latest"
openvino: "ov-nightly"
nncf: "nncf-stable"
- python-version: "3.9"
os: "ubuntu-22.04"
transformers-version: "latest"
openvino: "ov-stable"
nncf: "nncf-develop"
- python-version: "3.9"
os: "ubuntu-22.04"
transformers-version: "latest"
openvino: "ov-nightly"
nncf: "nncf-develop"

runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
# Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests]
- name: Install openvino-nightly
if: ${{ matrix.openvino == 'ov-nightly' }}
run: pip install --pre -U openvino openvino-tokenizers --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly

- name: Install openvino release
if: ${{ matrix.openvino == 'ov-stable' }}
run: pip install .[openvino]

- name: Install nncf develop
if: ${{ matrix.nncf == 'nncf-develop' }}
run: pip install git+https://github.com/openvinotoolkit/nncf.git

- name: Install nncf release
if: ${{ matrix.nncf == 'nncf-stable' }}
run: pip install .[nncf]

- name: Install the lowest compatible transformers version
if: ${{ matrix.transformers-version != 'latest' }}
run: pip install transformers==${{ matrix.transformers-version }}

- name: Pip freeze
run: pip freeze

- name: OpenVINO tests
run: pytest tests/openvino --durations=0
env:
RUN_SLOW: 1
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
13 changes: 5 additions & 8 deletions .github/workflows/test_openvino_slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-22.04", "windows-2019"]
openvino-version: ["stable", "nightly"]
transformers-version: ["4.36.0", "latest"]
nncf: ["nncf", "git+https://github.com/openvinotoolkit/nncf.git"]

runs-on: ${{ matrix.os }}

Expand All @@ -47,11 +45,6 @@ jobs:
pip install .[openvino,tests] transformers[testing]
pip uninstall -y nncf
- if: ${{ matrix.openvino-version == 'nightly' }}
name: Install nightly OpenVINO
run: |
pip install openvino openvino-tokenizers --pre --upgrade --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
- if: ${{ matrix.transformers-version != 'latest' }}
name: Downgrade Transformers and Accelerate
run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
Expand All @@ -65,7 +58,11 @@ jobs:
- name: Install dependencies (slow)
run: |
pip install ${{ matrix.nncf }}
pip install .[nncf]
- if: ${{ matrix.transformers-version != 'latest' }}
name: Downgrade Transformers and Accelerate
run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*

- name: Test with Pytest (slow)
run: |
Expand Down
6 changes: 3 additions & 3 deletions docs/source/ipex/inference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ Optimum Intel can be used to load models from the [Hub](https://huggingface.co/m

## Loading

You can load your model and apply IPEX optimizations (including weight prepacking and graph mode). For supported architectures like LLaMA, BERT and ViT, further optimizations will be applied by patching the model to use custom operators.
For now, support is only enabled for CPUs and the original model will be exported via TorchScript. In the future `torch.compile` will be used and model exported via TorchScript will get deprecated.
You can load your model and apply IPEX optimizations (apply torch.compile for non-generation tasks). For supported architectures like LLaMA, BERT and ViT, further optimizations will be applied by patching the model to use custom operators.
For now, support is enabled for Intel CPU/GPU. Previous models converted to TorchScript will be deprecated in v1.22.

```diff
import torch
Expand All @@ -25,7 +25,7 @@ For now, support is only enabled for CPUs and the original model will be exporte

model_id = "gpt2"
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+ model = IPEXModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, export=True)
+ model = IPEXModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
results = pipe("He's a dreadful magician and")
Expand Down
Loading

0 comments on commit 19e0a4f

Please sign in to comment.