Skip to content

Commit

Permalink
Add IPEX inference_mode contextmanager to enable optimization on Inte…
Browse files Browse the repository at this point in the history
…l platform. (#125)

* Add IPEX dependency

* Initial support for IPEX inference mode.

* Added docker image

* Force float32 for now with kernel selection.

* Implement default fallback in case of Exception for optimized model.

* Move IPEx to optional dependency

* Simplify the usage of inference_mode by forcing usage of oneDNN

* Enable the use of AMP for bfloat16

* Added documentation.

* Style.

* Making sure we are not importing ipex if not available.
  • Loading branch information
mfuntowicz authored Dec 5, 2022
1 parent bcf3c33 commit d16ca55
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 0 deletions.
70 changes: 70 additions & 0 deletions docker/Dockerfile.intel
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# syntax = docker/dockerfile:1
# based onhttps://github.com/pytorch/pytorch/blob/master/Dockerfile
#
# NOTE: To build this you will need a docker version >= 19.03 and DOCKER_BUILDKIT=1
#
# If you do not use buildkit you are not going to have a good time
#
# For reference:
# https://docs.docker.com/develop/develop-images/build_enhancements/

ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE} AS dev-base
RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
git \
curl \
vim \
build-essential \
ccache \
libgoogle-perftools-dev \
numactl \
cmake \
libjpeg-dev \
pybind11-dev \
libpng-dev \
pybind11-dev \
&& rm -rf /var/lib/apt/lists/*
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
ENV PATH /opt/conda/bin:$PATH

FROM dev-base as conda
ARG PYTHON_VERSION=3.10
RUN curl -fsSL -v -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda-build pyyaml numpy ipython mkl mkl-include ninja cython typing pybind11 Pillow && \
/opt/conda/bin/conda clean -ya

FROM dev-base AS build
ARG IPEX_VERSION=v1.13.0
ARG PYTORCH_VERSION=v1.13.0
ARG TORCHVISION_VERSION=0.13.0+cpu
ARG TORCHAUDIO_VERSION=0.13.0+cpu
COPY --from=conda /opt/conda /opt/conda
RUN --mount=type=cache,target=/opt/ccache \
python -m pip install --no-cache-dir torch==${PYTORCH_VERSION}+cpu torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html && \
git clone https://github.com/intel/intel-extension-for-pytorch && \
cd intel-extension-for-pytorch && \
git checkout ${IPEX_VERSION} && \
git submodule sync && \
git submodule update --init --recursive && \
python -m pip install --no-cache-dir -r requirements.txt && \
python setup.py bdist_wheel && \
python -m pip install --no-cache-dir dist/*.whl && \
cd .. && rm -rf intel-extension-for-pytorch

FROM dev-base as dev
COPY --from=build /opt/conda /opt/conda
ARG OMP_NUM_THREADS=1
ENV OMP_NUM_THREADS ${OMP_NUM_THREADS}
ARG KMP_BLOCKTIME=1
ENV KMP_BLOCKTIME ${KMP_BLOCKTIME}
ARG KMP_HW_SUBSET=1T
ENV KMP_HW_SUBSET ${KMP_HW_SUBSET}
ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so"
ENV LD_LIBRARY_PATH "/opt/conda/lib/python3.8/site-packages/lib/"
1 change: 1 addition & 0 deletions optimum/intel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .ipex import inference_mode
from .version import __version__
1 change: 1 addition & 0 deletions optimum/intel/ipex/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .inference import inference_mode
107 changes: 107 additions & 0 deletions optimum/intel/ipex/inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Union

import torch
from torch import nn
from transformers import add_start_docstrings
from transformers.pipelines import Pipeline
from transformers.utils import is_ipex_available


IPEX_NOT_AVAILABLE_ERROR_MSG = (
"Intel PyTorch Extensions was not found."
"please make sure you've installed the package or run "
"pip install intel_extension_for_pytorch"
)

if is_ipex_available():
import intel_extension_for_pytorch as ipex


class _ModelFallbackWrapper:

__slots__ = ("_optimized", "_default")

def __init__(self, optimized, default):
self._optimized = optimized
self._default = default

def __call__(self, *args, **kwargs):
try:
return self._optimized(*args, **kwargs)
except Exception:
return self._default(*args, **kwargs)

def __getattr__(self, item):
if not item.startswith("__"):
return getattr(self._default, item)
else:
return self.item


@add_start_docstrings(
"""
inference_mode is an Intel specific context-manager analogous to PyTorch's inference_mode to use for inference
workload on Intel CPUs, especially Intel Xeon Scalable CPUs.
""",
)
class inference_mode:
__slots__ = ("_model", "_dtype", "_graph_mode", "_verbose", "_original")

def __init__(self, model: Union[nn.Module, Pipeline], dtype: torch.dtype = torch.float32, verbose: bool = False):
"""
Args:
model (`torch.nn.Module` or `transformers.Pipeline`):
The model or pipeline instance to optimize.
dtype (`torch.dtype = torch.float32`), *optional*):
The data type used to do the computation.
Acceptable type are `torch.float32` (default) and `torch.bfloat16`.
Please note `torch.bfloat16` requires `avx512_bf16` instructions set as present on
4th Generation of Intel Xeon Scalable CPUs (Sapphire Rapids).
verbose (`boolean = False`, *optional*):
Enable IPEx verbose output to see the kernels and optimizations applied.
"""
if not is_ipex_available():
raise ImportError(IPEX_NOT_AVAILABLE_ERROR_MSG)

self._model = model
self._verbose = ipex.utils.verbose.VERBOSE_ON if verbose else ipex.utils.verbose.VERBOSE_OFF
self._dtype = dtype
self._graph_mode = False # Let's keep for future use when it doesn't hang anymore
self._original = None

def __enter__(self):
with torch.inference_mode():
with ipex.verbose(self._verbose):
ipex.enable_onednn_fusion(True)
if isinstance(self._model, Pipeline):
self._original = self._model.model

model = ipex.optimize(
self._model.model,
dtype=self._dtype,
graph_mode=self._graph_mode,
level="O1",
auto_kernel_selection=True,
)

# Enable automatic mixed precision (AMP) if we are going to target `bfloat16`
with torch.cpu.amp.autocast(enabled=(self._dtype == torch.bfloat16)):
# Patching model with the new one
self._model.model = _ModelFallbackWrapper(model, self._original)
return self._model
else:
self._original = self._model
model = ipex.optimize(
self._model,
dtype=self._dtype,
graph_mode=self._graph_mode,
level="O1",
auto_kernel_selection=True,
)

# Enable automatic mixed precision (AMP) if we are going to target `bfloat16`
with torch.cpu.amp.autocast(enabled=(self._dtype == torch.bfloat16)):
return model

def __exit__(self, exc_type, exc_val, exc_tb):
self._model = self._original
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"neural-compressor": "neural-compressor>=1.13.0",
"openvino": ["openvino>=2022.2.0", "transformers>=4.20.0,<4.24.1"],
"nncf": ["nncf"],
"ipex": ["intel_extension_for_pytorch"],
"quality": QUALITY_REQUIRES,
"tests": TESTS_REQUIRE,
}
Expand Down

0 comments on commit d16ca55

Please sign in to comment.