Skip to content

Commit

Permalink
Add IPEX sentence transformers support (#1034)
Browse files Browse the repository at this point in the history
* add import

* add IPEX sentence transformers support

* style

* fix style

* fix for python < 3.10

* Update tests/ipex/utils_tests.py

* Update tests/ipex/test_modeling.py

---------

Co-authored-by: Ilyas Moutawwakil <[email protected]>
  • Loading branch information
echarlaix and IlyasMoutawwakil authored Dec 9, 2024
1 parent 5c73548 commit 965540f
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 19 deletions.
44 changes: 29 additions & 15 deletions optimum/intel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@
from .utils import dummy_ipex_objects

_import_structure["utils.dummy_ipex_objects"] = [
name for name in dir(dummy_ipex_objects) if not name.startswith("_")
"IPEXModelForCausalLM",
"IPEXModelForSequenceClassification",
"IPEXModelForMaskedLM",
"IPEXModelForTokenClassification",
"IPEXModelForQuestionAnswering",
"IPEXModelForImageClassification",
"IPEXModelForAudioClassification",
"IPEXModel",
]
else:
_import_structure["ipex"] = [
Expand All @@ -55,6 +62,15 @@
"IPEXModel",
]

try:
if not (is_ipex_available() and is_sentence_transformers_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
_import_structure["utils.dummy_ipex_objects"].extend(["IPEXSentenceTransformer"])
else:
_import_structure["ipex"].extend(["IPEXSentenceTransformer"])


try:
if not (is_openvino_available() and is_nncf_available()):
raise OptionalDependencyNotAvailable()
Expand Down Expand Up @@ -212,15 +228,9 @@
if not (is_openvino_available() and is_sentence_transformers_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
_import_structure["utils.dummy_openvino_and_sentence_transformers_objects"] = [
"OVSentenceTransformer",
]
_import_structure["utils.dummy_openvino_and_sentence_transformers_objects"] = ["OVSentenceTransformer"]
else:
_import_structure["openvino"].extend(
[
"OVSentenceTransformer",
]
)
_import_structure["openvino"].extend(["OVSentenceTransformer"])


if TYPE_CHECKING:
Expand All @@ -241,6 +251,14 @@
IPEXModelForTokenClassification,
)

try:
if not (is_ipex_available() and is_sentence_transformers_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from .utils.dummy_ipex_objects import IPEXSentenceTransformer
else:
from .ipex import IPEXSentenceTransformer

try:
if not (is_openvino_available() and is_nncf_available()):
raise OptionalDependencyNotAvailable()
Expand Down Expand Up @@ -372,13 +390,9 @@
if not (is_openvino_available() and is_sentence_transformers_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from .utils.dummy_openvino_and_sentence_transformers_objects import (
OVSentenceTransformer,
)
from .utils.dummy_openvino_and_sentence_transformers_objects import OVSentenceTransformer
else:
from .openvino import (
OVSentenceTransformer,
)
from .openvino import OVSentenceTransformer

else:
import sys
Expand Down
7 changes: 6 additions & 1 deletion optimum/intel/ipex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from optimum.intel.ipex.modeling_base import (
from ..utils.import_utils import is_sentence_transformers_available
from .modeling_base import (
IPEXModel,
IPEXModelForAudioClassification,
IPEXModelForCausalLM,
Expand All @@ -22,3 +23,7 @@
IPEXModelForSequenceClassification,
IPEXModelForTokenClassification,
)


if is_sentence_transformers_available():
from .modeling_sentence_transformers import IPEXSentenceTransformer
98 changes: 98 additions & 0 deletions optimum/intel/ipex/modeling_sentence_transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from pathlib import Path
from typing import Any, Dict, Optional

import torch
from sentence_transformers import SentenceTransformer
from sentence_transformers.models import Transformer
from sentence_transformers.models.Transformer import _save_pretrained_wrapper
from sentence_transformers.util import import_from_string
from transformers import MT5Config, T5Config
from transformers.dynamic_module_utils import get_class_from_dynamic_module

from .modeling_base import IPEXModel


class IPEXTransformer(Transformer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.backend = "ipex"

def _load_model(self, model_name_or_path, config, cache_dir, backend, **model_args) -> None:
self._load_ipex_model(model_name_or_path, config, cache_dir, **model_args)

def _load_ipex_model(self, model_name_or_path, config, cache_dir, **model_args) -> None:
if isinstance(config, T5Config) or isinstance(config, MT5Config):
raise ValueError("T5 models are not yet supported by the IPEX backend.")

export = model_args.pop("export", None)

if export is None:
export = not getattr(config, "torchscript", False)

load_path = Path(model_name_or_path)
is_local = load_path.exists()

self.auto_model = IPEXModel.from_pretrained(
model_name_or_path,
config=config,
cache_dir=cache_dir,
export=export,
**model_args,
)

# Wrap the save_pretrained method to save the model in the correct subfolder
self.auto_model._save_pretrained = _save_pretrained_wrapper(self.auto_model._save_pretrained, "ipex")

# Warn the user to save the model if they haven't already
if export:
self._backend_warn_to_save(model_name_or_path, is_local, "IPEX")


class IPEXSentenceTransformer(SentenceTransformer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.backend = "ipex"

def _load_module_class_from_ref(
self,
class_ref: str,
model_name_or_path: str,
trust_remote_code: bool,
revision: Optional[str] = None,
model_kwargs: Optional[Dict[str, Any]] = None,
) -> torch.nn.Module:
if class_ref.startswith("sentence_transformers."):
if class_ref == "sentence_transformers.models.Transformer":
class_ref = "optimum.intel.ipex.modeling_sentence_transformers.IPEXTransformer"
return import_from_string(class_ref)

if trust_remote_code:
code_revision = model_kwargs.pop("code_revision", None) if model_kwargs else None
try:
return get_class_from_dynamic_module(
class_ref,
model_name_or_path,
revision=revision,
code_revision=code_revision,
)
except OSError:
# Ignore the error if the file does not exist, and fall back to the default import
pass

return import_from_string(class_ref)
11 changes: 11 additions & 0 deletions optimum/intel/utils/dummy_ipex_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,14 @@ def __init__(self, *args, **kwargs):
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["ipex"])


class IPEXSentenceTransformer(metaclass=DummyObject):
_backends = ["ipex", "sentence_transformers"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["ipex", "sentence_transformers"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["ipex", "sentence_transformers"])
40 changes: 37 additions & 3 deletions tests/ipex/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import tempfile
import time
import unittest

import os
import numpy as np
import requests
import torch
Expand All @@ -33,7 +33,6 @@
pipeline,
set_seed,
)

from optimum.intel import (
IPEXModel,
IPEXModelForAudioClassification,
Expand All @@ -43,8 +42,13 @@
IPEXModelForQuestionAnswering,
IPEXModelForSequenceClassification,
IPEXModelForTokenClassification,
IPEXSentenceTransformer,
)
from optimum.utils.testing_utils import grid_parameters
from optimum.utils.testing_utils import grid_parameters, require_sentence_transformers
from optimum.intel.utils.import_utils import is_sentence_transformers_available

if is_sentence_transformers_available():
from sentence_transformers import SentenceTransformer
from utils_tests import MODEL_NAMES, IS_XPU_AVAILABLE


Expand Down Expand Up @@ -510,3 +514,33 @@ def test_patched_model(self):
transformers_outputs = transformers_model(**inputs)
outputs = ipex_model(**inputs)
self.assertTrue(torch.allclose(outputs.logits, transformers_outputs.logits, atol=1e-4))


class IPEXSTModel(unittest.TestCase):
SUPPORTED_ARCHITECTURES = (
"st-bert",
"st-mpnet",
)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_sentence_transformers
def test_compare_to_original_model(self, model_arch):
model_id = MODEL_NAMES[model_arch]
set_seed(SEED)
ipex_model = IPEXSentenceTransformer(model_id)
st_model = SentenceTransformer(model_id)
sentences = ["This is an example sentence", "Each sentence is converted"]
st_embeddings = st_model.encode(sentences)
ov_embeddings = ipex_model.encode(sentences)
self.assertTrue(np.allclose(ov_embeddings, st_embeddings, atol=1e-4))

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_sentence_transformers
def test_sentence_transformers_save_and_infer(self, model_arch):
model_id = MODEL_NAMES[model_arch]
ipex_model = IPEXSentenceTransformer(model_id)
with tempfile.TemporaryDirectory() as tmpdirname:
ipex_model.save_pretrained(tmpdirname)
model = IPEXSentenceTransformer(tmpdirname, model_kwargs={"subfolder": "ipex"})
sentences = ["This is an example sentence", "Each sentence is converted"]
model.encode(sentences)
2 changes: 2 additions & 0 deletions tests/ipex/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
"resnet": "hf-internal-testing/tiny-random-resnet",
"roberta": "hf-internal-testing/tiny-random-roberta",
"roformer": "hf-internal-testing/tiny-random-roformer",
"st-bert": "sentence-transformers-testing/stsb-bert-tiny-safetensors",
"st-mpnet": "sentence-transformers/all-mpnet-base-v2",
"squeezebert": "hf-internal-testing/tiny-random-squeezebert",
"t5": "hf-internal-testing/tiny-random-t5",
"unispeech": "hf-internal-testing/tiny-random-unispeech",
Expand Down

0 comments on commit 965540f

Please sign in to comment.