Skip to content

Commit

Permalink
Add langchain test (#1123)
Browse files Browse the repository at this point in the history
* Add langchain test

* add ipex tests

* fix

* add embeddings

* remove langchain version restriction for ov tests

* fix style
  • Loading branch information
echarlaix authored Jan 27, 2025
1 parent 833ab0d commit 479577a
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 3 deletions.
9 changes: 9 additions & 0 deletions optimum/intel/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,15 @@
_sentence_transformers_available = False


_langchain_hf_available = importlib.util.find_spec("langchain_huggingface") is not None
_langchain_hf_version = "N/A"
if _langchain_hf_available:
try:
_langchain_hf_version = importlib.metadata.version("langchain_huggingface")
except importlib.metadata.PackageNotFoundError:
_langchain_hf_available = False


def is_transformers_available():
return _transformers_available

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"peft",
"datasets[audio]>=1.4.0",
"tbb",
"langchain-huggingface",
]

QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]
Expand Down
60 changes: 59 additions & 1 deletion tests/ipex/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
import requests
import torch
from typing import Generator
from parameterized import parameterized
from PIL import Image
from transformers import (
Expand All @@ -34,6 +35,7 @@
pipeline,
set_seed,
)
from packaging import version
from optimum.intel import (
IPEXModel,
IPEXModelForAudioClassification,
Expand All @@ -47,7 +49,12 @@
IPEXSentenceTransformer,
)
from optimum.utils.testing_utils import grid_parameters, require_sentence_transformers
from optimum.intel.utils.import_utils import is_sentence_transformers_available, is_torch_version
from optimum.intel.utils.import_utils import (
is_sentence_transformers_available,
is_torch_version,
_langchain_hf_available,
_langchain_hf_version,
)

if is_sentence_transformers_available():
from sentence_transformers import SentenceTransformer
Expand Down Expand Up @@ -707,3 +714,54 @@ def test_sentence_transformers_save_and_infer(self, model_arch):
model = IPEXSentenceTransformer(tmpdirname, model_kwargs={"subfolder": "ipex"})
sentences = ["This is an example sentence", "Each sentence is converted"]
model.encode(sentences)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_sentence_transformers
@unittest.skipIf(
not _langchain_hf_available or version.parse(_langchain_hf_version) <= version.parse("0.1.2"),
reason="Unsupported langchain version",
)
def test_langchain(self, model_arch):
from langchain_huggingface import HuggingFaceEmbeddings

model_id = MODEL_NAMES[model_arch]
model_kwargs = {"device": "cpu", "backend": "ipex"}

embedding = HuggingFaceEmbeddings(
model_name=model_id,
model_kwargs=model_kwargs,
)
output = embedding.embed_query("foo bar")
self.assertTrue(len(output) > 0)


class IPEXLangchainTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES = ("gpt2",)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@unittest.skipIf(
not _langchain_hf_available or version.parse(_langchain_hf_version) <= version.parse("0.1.2"),
reason="Unsupported langchain version",
)
def test_huggingface_pipeline_streaming(self, model_arch):
from langchain_huggingface import HuggingFacePipeline

model_id = MODEL_NAMES[model_arch]

hf_pipe = HuggingFacePipeline.from_model_id(
model_id=model_id,
task="text-generation",
pipeline_kwargs={"max_new_tokens": 10},
backend="ipex",
)

generator = hf_pipe.stream("Q: How do you say 'hello' in German? A:'", stop=["."])

self.assertIsInstance(generator, Generator)

stream_results_string = ""
for chunk in generator:
self.assertIsInstance(chunk, str)
stream_results_string = chunk

self.assertTrue(len(stream_results_string.strip()) > 1)
55 changes: 53 additions & 2 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import time
import unittest
from pathlib import Path
from typing import Dict
from typing import Dict, Generator

import numpy as np
import open_clip
Expand Down Expand Up @@ -107,7 +107,11 @@
_print_compiled_model_properties,
)
from optimum.intel.pipelines import pipeline as optimum_pipeline
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
from optimum.intel.utils.import_utils import (
_langchain_hf_available,
is_openvino_version,
is_transformers_version,
)
from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
from optimum.utils import (
DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
Expand Down Expand Up @@ -2796,3 +2800,50 @@ def test_sentence_transformers_save_and_infer(self, model_arch):
sentences = ["This is an example sentence", "Each sentence is converted"]
model.encode(sentences)
gc.collect()

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@unittest.skipIf(not _langchain_hf_available, reason="langchain not installed")
def test_langchain(self, model_arch):
from langchain_huggingface import HuggingFaceEmbeddings

model_id = MODEL_NAMES[model_arch]
model_kwargs = {"device": "cpu", "backend": "openvino"}

embedding = HuggingFaceEmbeddings(
model_name=model_id,
model_kwargs=model_kwargs,
)
output = embedding.embed_query("foo bar")
self.assertTrue(len(output) > 0)


class OVLangchainTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES = ("gpt2",)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@unittest.skipIf(not _langchain_hf_available, reason="langchain not installed")
def test_huggingface_pipeline_streaming(self, model_arch):
from langchain_huggingface import HuggingFacePipeline

model_id = MODEL_NAMES[model_arch]

hf_pipe = HuggingFacePipeline.from_model_id(
model_id=model_id,
task="text-generation",
pipeline_kwargs={"max_new_tokens": 10},
backend="openvino",
)

generator = hf_pipe.stream("Q: How do you say 'hello' in German? A:'", stop=["."])

self.assertIsInstance(generator, Generator)

stream_results_string = ""
for chunk in generator:
self.assertIsInstance(chunk, str)
stream_results_string = chunk

self.assertTrue(len(stream_results_string.strip()) > 1)

del hf_pipe
gc.collect()

0 comments on commit 479577a

Please sign in to comment.