-
Notifications
You must be signed in to change notification settings - Fork 123
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add IPEX inference_mode contextmanager to enable optimization on Inte…
…l platform. (#125) * Add IPEX dependency * Initial support for IPEX inference mode. * Added docker image * Force float32 for now with kernel selection. * Implement default fallback in case of Exception for optimized model. * Move IPEx to optional dependency * Simplify the usage of inference_mode by forcing usage of oneDNN * Enable the use of AMP for bfloat16 * Added documentation. * Style. * Making sure we are not importing ipex if not available.
- Loading branch information
1 parent
bcf3c33
commit d16ca55
Showing
5 changed files
with
180 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# syntax = docker/dockerfile:1 | ||
# based onhttps://github.com/pytorch/pytorch/blob/master/Dockerfile | ||
# | ||
# NOTE: To build this you will need a docker version >= 19.03 and DOCKER_BUILDKIT=1 | ||
# | ||
# If you do not use buildkit you are not going to have a good time | ||
# | ||
# For reference: | ||
# https://docs.docker.com/develop/develop-images/build_enhancements/ | ||
|
||
ARG BASE_IMAGE=ubuntu:22.04 | ||
FROM ${BASE_IMAGE} AS dev-base | ||
RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ | ||
apt-get update && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ | ||
ca-certificates \ | ||
git \ | ||
curl \ | ||
vim \ | ||
build-essential \ | ||
ccache \ | ||
libgoogle-perftools-dev \ | ||
numactl \ | ||
cmake \ | ||
libjpeg-dev \ | ||
pybind11-dev \ | ||
libpng-dev \ | ||
pybind11-dev \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
RUN /usr/sbin/update-ccache-symlinks | ||
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache | ||
ENV PATH /opt/conda/bin:$PATH | ||
|
||
FROM dev-base as conda | ||
ARG PYTHON_VERSION=3.10 | ||
RUN curl -fsSL -v -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ | ||
chmod +x ~/miniconda.sh && \ | ||
~/miniconda.sh -b -p /opt/conda && \ | ||
rm ~/miniconda.sh && \ | ||
/opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda-build pyyaml numpy ipython mkl mkl-include ninja cython typing pybind11 Pillow && \ | ||
/opt/conda/bin/conda clean -ya | ||
|
||
FROM dev-base AS build | ||
ARG IPEX_VERSION=v1.13.0 | ||
ARG PYTORCH_VERSION=v1.13.0 | ||
ARG TORCHVISION_VERSION=0.13.0+cpu | ||
ARG TORCHAUDIO_VERSION=0.13.0+cpu | ||
COPY --from=conda /opt/conda /opt/conda | ||
RUN --mount=type=cache,target=/opt/ccache \ | ||
python -m pip install --no-cache-dir torch==${PYTORCH_VERSION}+cpu torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html && \ | ||
git clone https://github.com/intel/intel-extension-for-pytorch && \ | ||
cd intel-extension-for-pytorch && \ | ||
git checkout ${IPEX_VERSION} && \ | ||
git submodule sync && \ | ||
git submodule update --init --recursive && \ | ||
python -m pip install --no-cache-dir -r requirements.txt && \ | ||
python setup.py bdist_wheel && \ | ||
python -m pip install --no-cache-dir dist/*.whl && \ | ||
cd .. && rm -rf intel-extension-for-pytorch | ||
|
||
FROM dev-base as dev | ||
COPY --from=build /opt/conda /opt/conda | ||
ARG OMP_NUM_THREADS=1 | ||
ENV OMP_NUM_THREADS ${OMP_NUM_THREADS} | ||
ARG KMP_BLOCKTIME=1 | ||
ENV KMP_BLOCKTIME ${KMP_BLOCKTIME} | ||
ARG KMP_HW_SUBSET=1T | ||
ENV KMP_HW_SUBSET ${KMP_HW_SUBSET} | ||
ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so" | ||
ENV LD_LIBRARY_PATH "/opt/conda/lib/python3.8/site-packages/lib/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .inference import inference_mode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
from typing import Union | ||
|
||
import torch | ||
from torch import nn | ||
from transformers import add_start_docstrings | ||
from transformers.pipelines import Pipeline | ||
from transformers.utils import is_ipex_available | ||
|
||
|
||
IPEX_NOT_AVAILABLE_ERROR_MSG = ( | ||
"Intel PyTorch Extensions was not found." | ||
"please make sure you've installed the package or run " | ||
"pip install intel_extension_for_pytorch" | ||
) | ||
|
||
if is_ipex_available(): | ||
import intel_extension_for_pytorch as ipex | ||
|
||
|
||
class _ModelFallbackWrapper: | ||
|
||
__slots__ = ("_optimized", "_default") | ||
|
||
def __init__(self, optimized, default): | ||
self._optimized = optimized | ||
self._default = default | ||
|
||
def __call__(self, *args, **kwargs): | ||
try: | ||
return self._optimized(*args, **kwargs) | ||
except Exception: | ||
return self._default(*args, **kwargs) | ||
|
||
def __getattr__(self, item): | ||
if not item.startswith("__"): | ||
return getattr(self._default, item) | ||
else: | ||
return self.item | ||
|
||
|
||
@add_start_docstrings( | ||
""" | ||
inference_mode is an Intel specific context-manager analogous to PyTorch's inference_mode to use for inference | ||
workload on Intel CPUs, especially Intel Xeon Scalable CPUs. | ||
""", | ||
) | ||
class inference_mode: | ||
__slots__ = ("_model", "_dtype", "_graph_mode", "_verbose", "_original") | ||
|
||
def __init__(self, model: Union[nn.Module, Pipeline], dtype: torch.dtype = torch.float32, verbose: bool = False): | ||
""" | ||
Args: | ||
model (`torch.nn.Module` or `transformers.Pipeline`): | ||
The model or pipeline instance to optimize. | ||
dtype (`torch.dtype = torch.float32`), *optional*): | ||
The data type used to do the computation. | ||
Acceptable type are `torch.float32` (default) and `torch.bfloat16`. | ||
Please note `torch.bfloat16` requires `avx512_bf16` instructions set as present on | ||
4th Generation of Intel Xeon Scalable CPUs (Sapphire Rapids). | ||
verbose (`boolean = False`, *optional*): | ||
Enable IPEx verbose output to see the kernels and optimizations applied. | ||
""" | ||
if not is_ipex_available(): | ||
raise ImportError(IPEX_NOT_AVAILABLE_ERROR_MSG) | ||
|
||
self._model = model | ||
self._verbose = ipex.utils.verbose.VERBOSE_ON if verbose else ipex.utils.verbose.VERBOSE_OFF | ||
self._dtype = dtype | ||
self._graph_mode = False # Let's keep for future use when it doesn't hang anymore | ||
self._original = None | ||
|
||
def __enter__(self): | ||
with torch.inference_mode(): | ||
with ipex.verbose(self._verbose): | ||
ipex.enable_onednn_fusion(True) | ||
if isinstance(self._model, Pipeline): | ||
self._original = self._model.model | ||
|
||
model = ipex.optimize( | ||
self._model.model, | ||
dtype=self._dtype, | ||
graph_mode=self._graph_mode, | ||
level="O1", | ||
auto_kernel_selection=True, | ||
) | ||
|
||
# Enable automatic mixed precision (AMP) if we are going to target `bfloat16` | ||
with torch.cpu.amp.autocast(enabled=(self._dtype == torch.bfloat16)): | ||
# Patching model with the new one | ||
self._model.model = _ModelFallbackWrapper(model, self._original) | ||
return self._model | ||
else: | ||
self._original = self._model | ||
model = ipex.optimize( | ||
self._model, | ||
dtype=self._dtype, | ||
graph_mode=self._graph_mode, | ||
level="O1", | ||
auto_kernel_selection=True, | ||
) | ||
|
||
# Enable automatic mixed precision (AMP) if we are going to target `bfloat16` | ||
with torch.cpu.amp.autocast(enabled=(self._dtype == torch.bfloat16)): | ||
return model | ||
|
||
def __exit__(self, exc_type, exc_val, exc_tb): | ||
self._model = self._original |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters