From 58aec63e15c3700622c1f7d3eb2bd116b0a23b02 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Wed, 8 Jan 2025 17:13:35 +0400 Subject: [PATCH] Add support FluxFill inpainting pipeline (#1095) * add support FluxFill inpainting pipeline * add tests * register dummy model class * enable cli export tests --- optimum/exporters/openvino/convert.py | 39 +++++++++++------ optimum/exporters/openvino/model_configs.py | 11 ++++- optimum/intel/__init__.py | 2 + optimum/intel/openvino/__init__.py | 1 + optimum/intel/openvino/modeling_diffusion.py | 19 ++++++++- optimum/intel/openvino/utils.py | 1 + .../dummy_openvino_and_diffusers_objects.py | 11 +++++ tests/openvino/test_diffusion.py | 42 ++++++++++++++----- tests/openvino/test_exporters_cli.py | 6 ++- tests/openvino/utils_tests.py | 2 + 10 files changed, 106 insertions(+), 28 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 636bc00f05..04d4944e02 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -43,6 +43,7 @@ _torch_version, _transformers_version, compare_versions, + is_diffusers_version, is_openvino_tokenizers_version, is_tokenizers_version, is_transformers_version, @@ -988,24 +989,36 @@ def _get_submodels_and_export_configs( def get_diffusion_models_for_export_ext( pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino" ): - try: - from diffusers import ( - StableDiffusion3Img2ImgPipeline, - StableDiffusion3InpaintPipeline, - StableDiffusion3Pipeline, - ) + if is_diffusers_version(">=", "0.29.0"): + from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline - is_sd3 = isinstance( - pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline) - ) - except ImportError: + sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline] + if is_diffusers_version(">=", "0.30.0"): + from diffusers import StableDiffusion3InpaintPipeline + + sd3_pipes.append(StableDiffusion3InpaintPipeline) + + is_sd3 = isinstance(pipeline, tuple(sd3_pipes)) + else: is_sd3 = False - try: + if is_diffusers_version(">=", "0.30.0"): from diffusers import FluxPipeline - is_flux = isinstance(pipeline, FluxPipeline) - except ImportError: + flux_pipes = [FluxPipeline] + + if is_diffusers_version(">=", "0.31.0"): + from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline + + flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline]) + + if is_diffusers_version(">=", "0.32.0"): + from diffusers import FluxFillPipeline + + flux_pipes.append(FluxFillPipeline) + + is_flux = isinstance(pipeline, tuple(flux_pipes)) + else: is_flux = False if not is_sd3 and not is_flux: diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 2fafb65575..0cfb6c958b 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -56,7 +56,12 @@ ) from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig -from ...intel.utils.import_utils import _transformers_version, is_diffusers_version, is_transformers_version +from ...intel.utils.import_utils import ( + _transformers_version, + is_diffusers_available, + is_diffusers_version, + is_transformers_version, +) from .model_patcher import ( AquilaModelPatcher, ArcticModelPatcher, @@ -119,6 +124,10 @@ def init_model_configs(): "image-text-to-text" ] = TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"] + if is_diffusers_available() and "fill" not in TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS: + TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS["fill"] = "FluxFillPipeline" + TasksManager._DIFFUSERS_TASKS_TO_MODEL_MAPPINGS["fill"] = {"flux": "FluxFillPipeline"} + supported_model_types = [ "_SUPPORTED_MODEL_TYPE", "_DIFFUSERS_SUPPORTED_MODEL_TYPE", diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py index ad9fdca078..91aaf57ae0 100644 --- a/optimum/intel/__init__.py +++ b/optimum/intel/__init__.py @@ -126,6 +126,7 @@ "OVFluxPipeline", "OVFluxImg2ImgPipeline", "OVFluxInpaintPipeline", + "OVFluxFillPipeline", "OVPipelineForImage2Image", "OVPipelineForText2Image", "OVPipelineForInpainting", @@ -148,6 +149,7 @@ "OVFluxPipeline", "OVFluxImg2ImgPipeline", "OVFluxInpaintPipeline", + "OVFluxFillPipeline", "OVPipelineForImage2Image", "OVPipelineForText2Image", "OVPipelineForInpainting", diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py index 3737733329..8945dc6382 100644 --- a/optimum/intel/openvino/__init__.py +++ b/optimum/intel/openvino/__init__.py @@ -82,6 +82,7 @@ if is_diffusers_available(): from .modeling_diffusion import ( OVDiffusionPipeline, + OVFluxFillPipeline, OVFluxImg2ImgPipeline, OVFluxInpaintPipeline, OVFluxPipeline, diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index d1204ca78d..c059165417 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -101,6 +101,11 @@ FluxImg2ImgPipeline = object FluxInpaintPipeline = object +if is_diffusers_version(">=", "0.32.0"): + from diffusers import FluxFillPipeline +else: + FluxFillPipeline = object + DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer" DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3" @@ -1458,17 +1463,23 @@ class OVFluxPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxPip class OVFluxImg2ImgPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxImg2ImgPipeline): - main_input_name = "prompt" + main_input_name = "image" export_feature = "image-to-image" auto_model_class = FluxImg2ImgPipeline class OVFluxInpaintPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxInpaintPipeline): - main_input_name = "prompt" + main_input_name = "image" export_feature = "inpainting" auto_model_class = FluxInpaintPipeline +class OVFluxFillPipeline(OVDiffusionPipeline, OVTextualInversionLoaderMixin, FluxFillPipeline): + main_input_name = "image" + export_feature = "inpainting" + auto_model_class = FluxFillPipeline + + SUPPORTED_OV_PIPELINES = [ OVStableDiffusionPipeline, OVStableDiffusionImg2ImgPipeline, @@ -1537,6 +1548,10 @@ def _get_ov_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tru OV_INPAINT_PIPELINES_MAPPING["flux"] = OVFluxInpaintPipeline OV_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = OVFluxImg2ImgPipeline +if is_diffusers_version(">=", "0.32.0"): + OV_INPAINT_PIPELINES_MAPPING["flux-fill"] = OVFluxFillPipeline + SUPPORTED_OV_PIPELINES.append(OVFluxFillPipeline) + SUPPORTED_OV_PIPELINES_MAPPINGS = [ OV_TEXT2IMAGE_PIPELINES_MAPPING, OV_IMAGE2IMAGE_PIPELINES_MAPPING, diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py index 9b7ca4387c..a1950e9268 100644 --- a/optimum/intel/openvino/utils.py +++ b/optimum/intel/openvino/utils.py @@ -126,6 +126,7 @@ "stable-diffusion-xl": "OVStableDiffusionXLPipeline", "stable-diffusion-3": "OVStableDiffusion3Pipeline", "flux": "OVFluxPipeline", + "flux-fill": "OVFluxFillPipeline", "pix2struct": "OVModelForPix2Struct", "latent-consistency": "OVLatentConsistencyModelPipeline", "open_clip_text": "OVModelOpenCLIPText", diff --git a/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py b/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py index a6a10651cf..2e669875fc 100644 --- a/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py +++ b/optimum/intel/utils/dummy_openvino_and_diffusers_objects.py @@ -211,3 +211,14 @@ def __init__(self, *args, **kwargs): @classmethod def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["openvino", "diffusers"]) + + +class OVFluxFillPipeline(metaclass=DummyObject): + _backends = ["openvino", "diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["openvino", "diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["openvino", "diffusers"]) diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py index bb808ecc00..477799345b 100644 --- a/tests/openvino/test_diffusion.py +++ b/tests/openvino/test_diffusion.py @@ -667,13 +667,14 @@ class OVPipelineForInpaintingTest(unittest.TestCase): if is_transformers_version(">=", "4.40.0"): SUPPORTED_ARCHITECTURES.append("stable-diffusion-3") SUPPORTED_ARCHITECTURES.append("flux") + SUPPORTED_ARCHITECTURES.append("flux-fill") AUTOMODEL_CLASS = AutoPipelineForInpainting OVMODEL_CLASS = OVPipelineForInpainting TASK = "inpainting" - def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil"): + def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_type="pil", model_arch=""): inputs = _generate_prompts(batch_size=batch_size) inputs["image"] = _generate_images( @@ -683,7 +684,8 @@ def generate_inputs(self, height=128, width=128, batch_size=1, channel=3, input_ height=height, width=width, batch_size=batch_size, channel=1, input_type=input_type ) - inputs["strength"] = 0.75 + if model_arch != "flux-fill": + inputs["strength"] = 0.75 inputs["height"] = height inputs["width"] = width @@ -699,7 +701,12 @@ def test_load_vanilla_model_which_is_not_supported(self): @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_ov_pipeline_class_dispatch(self, model_arch: str): - auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + if model_arch != "flux-fill": + auto_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + else: + from diffusers import FluxFillPipeline + + auto_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch]) ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) self.assertEqual(ov_pipeline.auto_model_class, auto_pipeline.__class__) @@ -713,7 +720,9 @@ def test_num_images_per_prompt(self, model_arch: str): for height in [64, 128]: for width in [64, 128]: for num_images_per_prompt in [1, 3]: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs = self.generate_inputs( + height=height, width=width, batch_size=batch_size, model_arch=model_arch + ) outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3)) @@ -752,7 +761,9 @@ def test_shape(self, model_arch: str): height, width, batch_size = 128, 64, 1 for input_type in ["pil", "np", "pt"]: - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, input_type=input_type) + inputs = self.generate_inputs( + height=height, width=width, batch_size=batch_size, input_type=input_type, model_arch=model_arch + ) for output_type in ["pil", "np", "pt", "latent"]: inputs["output_type"] = output_type @@ -764,7 +775,7 @@ def test_shape(self, model_arch: str): elif output_type == "pt": self.assertEqual(outputs.shape, (batch_size, 3, height, width)) else: - if model_arch != "flux": + if not model_arch.startswith("flux"): out_channels = ( pipeline.unet.config.out_channels if pipeline.unet is not None @@ -782,17 +793,26 @@ def test_shape(self, model_arch: str): else: packed_height = height // pipeline.vae_scale_factor // 2 packed_width = width // pipeline.vae_scale_factor // 2 - channels = pipeline.transformer.config.in_channels + channels = ( + pipeline.transformer.config.in_channels + if model_arch != "flux-fill" + else pipeline.transformer.out_channels + ) self.assertEqual(outputs.shape, (batch_size, packed_height * packed_width, channels)) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers def test_compare_to_diffusers_pipeline(self, model_arch: str): ov_pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + if model_arch != "flux-fill": + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) + else: + from diffusers import FluxFillPipeline + + diffusers_pipeline = FluxFillPipeline.from_pretrained(MODEL_NAMES[model_arch]) height, width, batch_size = 64, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch) for output_type in ["latent", "np", "pt"]: inputs["output_type"] = output_type @@ -804,7 +824,7 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): # test generation when input resolution nondevisible on 64 height, width, batch_size = 96, 96, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch) for output_type in ["latent", "np", "pt"]: inputs["output_type"] = output_type @@ -820,7 +840,7 @@ def test_image_reproducibility(self, model_arch: str): pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) height, width, batch_size = 64, 64, 1 - inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size, model_arch=model_arch) for generator_framework in ["np", "pt"]: ov_outputs_1 = pipeline(**inputs, generator=get_generator(generator_framework, SEED)) diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index f03b4fbc57..ab6b935a16 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -27,6 +27,7 @@ from optimum.exporters.openvino.__main__ import main_export from optimum.intel import ( # noqa + OVFluxFillPipeline, OVFluxPipeline, OVLatentConsistencyModelPipeline, OVModelForAudioClassification, @@ -82,7 +83,9 @@ class OVCLIExportTestCase(unittest.TestCase): ] if is_transformers_version(">=", "4.45"): - SUPPORTED_ARCHITECTURES.extend([("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux")]) + SUPPORTED_ARCHITECTURES.extend( + [("text-to-image", "stable-diffusion-3"), ("text-to-image", "flux"), ("inpainting", "flux-fill")] + ) EXPECTED_NUMBER_OF_TOKENIZER_MODELS = { "gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0, "t5": 0, # no .model file in the repository @@ -97,6 +100,7 @@ class OVCLIExportTestCase(unittest.TestCase): "stable-diffusion-xl": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0, "stable-diffusion-3": 6 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 2, "flux": 4 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0, + "flux-fill": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0, "llava": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0, } diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index cf663f0638..59fe660d52 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -66,6 +66,7 @@ "falcon-40b": "katuni4ka/tiny-random-falcon-40b", "flaubert": "hf-internal-testing/tiny-random-flaubert", "flux": "katuni4ka/tiny-random-flux", + "flux-fill": "katuni4ka/tiny-random-flux-fill", "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", @@ -193,6 +194,7 @@ "open-clip": (20, 28), "stable-diffusion-3": (66, 42, 58, 30), "flux": (56, 24, 28, 64), + "flux-fill": (56, 24, 28, 64), "llava": (30, 9, 1), "llava_next": (30, 9, 1), "minicpmv": (30, 26, 1, 6),