From 179522b5149c15a61610256b6c04f943b3254ca1 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Fri, 28 Apr 2023 11:05:42 -0400 Subject: [PATCH 1/8] Add backend api --- __init__.py | 8 + api/__init__.py | 4 + api/backend.py | 77 ++ api/generation_result.py | 23 + api/model.py | 7 + diffusers_backend.py | 14 + engine/nodes/pipeline_nodes.py | 6 +- generator_process/actions/control_net.py | 964 +++++++++---------- generator_process/actions/depth_to_image.py | 703 +++++++------- generator_process/actions/image_to_image.py | 368 ++++--- generator_process/actions/inpaint.py | 463 ++++----- generator_process/actions/prompt_to_image.py | 355 +++---- generator_process/models/__init__.py | 1 - generator_process/models/pipeline.py | 72 -- operators/dream_texture.py | 2 +- operators/project.py | 9 +- preferences.py | 108 +-- property_groups/dream_prompt.py | 78 +- property_groups/dream_prompt_validation.py | 132 +-- ui/panels/dream_texture.py | 26 +- ui/panels/render_properties.py | 15 +- ui/panels/upscaling.py | 5 - 22 files changed, 1655 insertions(+), 1785 deletions(-) create mode 100644 api/__init__.py create mode 100644 api/backend.py create mode 100644 api/generation_result.py create mode 100644 api/model.py create mode 100644 diffusers_backend.py delete mode 100644 generator_process/models/pipeline.py diff --git a/__init__.py b/__init__.py index b3a2ff15..5d24d69a 100644 --- a/__init__.py +++ b/__init__.py @@ -49,6 +49,8 @@ def clear_modules(): from . import engine + from .diffusers_backend import DiffusersBackend + requirements_path_items = ( ('requirements/win-linux-cuda.txt', 'Linux/Windows (CUDA)', 'Linux or Windows with NVIDIA GPU'), ('requirements/mac-mps-cpu.txt', 'Apple Silicon', 'Apple M1/M2'), @@ -127,6 +129,9 @@ def project_use_controlnet(self, context): register_render_pass() register_default_presets() + + # Register the default backend. + bpy.utils.register_class(DiffusersBackend) def unregister(): for cls in PREFERENCE_CLASSES: @@ -143,4 +148,7 @@ def unregister(): unregister_render_pass() + # Unregister the default backend + bpy.utils.unregister_class(DiffusersBackend) + kill_generator() \ No newline at end of file diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 00000000..21bc242a --- /dev/null +++ b/api/__init__.py @@ -0,0 +1,4 @@ +from ..property_groups.dream_prompt import * +from .generation_result import * +from .model import * +from .backend import * \ No newline at end of file diff --git a/api/backend.py b/api/backend.py new file mode 100644 index 00000000..b1d325c0 --- /dev/null +++ b/api/backend.py @@ -0,0 +1,77 @@ +import bpy +from ..property_groups.dream_prompt import DreamPrompt +from typing import Callable, List +from .generation_result import GenerationResult +from .model import Model + +StepCallback = Callable[[], GenerationResult] +Callback = Callable[[], List[GenerationResult] | Exception] + +class Backend(bpy.types.PropertyGroup): + """A backend for Dream Textures. + + Provide the following methods to create a valid backend. + + ```python + def list_models(self) -> List[Model] + def generate(self, prompt: DreamPrompt, step_callback: StepCallback, callback: Callback) + ``` + """ + + def list_models(self) -> List[Model]: + """Provide a list of available models. + + The `id` of the model will be provided + """ + ... + + def draw_prompt(self, layout, context): + """Draw additional UI in the 'Prompt' panel""" + ... + + def draw_speed_optimzations(self, layout, context): + """Draw additional UI in the 'Speed Optimizations' panel""" + ... + + def draw_memory_optimzations(self, layout, context): + """Draw additional UI in the 'Memory Optimizations' panel""" + ... + + def draw_extra(self, layout, context): + """Draw additional UI in the panel""" + ... + + def generate( + self, + prompt: DreamPrompt, + step_callback: StepCallback, + callback: Callback + ): + """A request to generate an image. + + Use the `DreamPrompt` to get all of the arguments for the generation. + Call `step_callback` at each step as needed. + Call `callback` when the generation is complete. + + `DreamPrompt` has several helper functions to access generation options. + + ```python + prompt.generate_prompt() # get the full prompt string + prompt.get_seed() # an `int` or `None` (in which case you should provide a random seed yourself). + prompt.get_optimizations() # creates an `Optimization` type. + ``` + + After collecting the necessary arguments, generate an image in the background and call `step_callback` and `callback` with the results. + + > Generation should happen on a separate thread or process, as this method is called from the main thread and will block Blender's UI. + + ```python + call_my_api( + prompt=prompt.generate_prompt(), + seed=prompt.get_seed(), + on_step=lambda res: callback(GenerationResult(res.image, res.seed)), + on_response=lambda res: callback([GenerationResult(res.image, res.seed)]) + ) + ``` + """ + ... \ No newline at end of file diff --git a/api/generation_result.py b/api/generation_result.py new file mode 100644 index 00000000..748336bc --- /dev/null +++ b/api/generation_result.py @@ -0,0 +1,23 @@ +import numpy as np +from dataclasses import dataclass +from typing import List + +@dataclass +class GenerationResult: + """The output of a `Backend`. + + Create a result with an `image` and a `seed`. + + ```python + result = GenerationResult( + image=np.zeros((512, 512, 3)), + seed=42 + ) + ``` + """ + image: np.ndarray + """The generated image as a Numpy array. + The shape should be `(height, width, channels)`, where `channels` is 3 or 4. + """ + seed: int + """The seed used to generate the image.""" \ No newline at end of file diff --git a/api/model.py b/api/model.py new file mode 100644 index 00000000..7c5a69e9 --- /dev/null +++ b/api/model.py @@ -0,0 +1,7 @@ +from dataclasses import dataclass + +@dataclass +class Model: + name: str + description: str + id: str \ No newline at end of file diff --git a/diffusers_backend.py b/diffusers_backend.py new file mode 100644 index 00000000..2f7c6bfe --- /dev/null +++ b/diffusers_backend.py @@ -0,0 +1,14 @@ +import bpy +from .api import Backend, Model, DreamPrompt, StepCallback, Callback + +class DiffusersBackend(Backend): + name = "HuggingFace Diffusers" + description = "Local image generation inside of Blender" + + def list_models(self): + return [ + Model("Stable Diffusion v2.1", "The 2.1 revision of SD", "stabilityai/stable-diffusion-v2-1"), + ] + + def generate(self, prompt: DreamPrompt, step_callback: StepCallback, callback: Callback): + pass \ No newline at end of file diff --git a/engine/nodes/pipeline_nodes.py b/engine/nodes/pipeline_nodes.py index d195939f..d0a43039 100644 --- a/engine/nodes/pipeline_nodes.py +++ b/engine/nodes/pipeline_nodes.py @@ -96,7 +96,7 @@ def init(self, context): def draw_buttons(self, context, layout): layout.prop(self, "task") prompt = self.prompt - layout.prop(prompt, "pipeline", text="") + layout.prop(prompt, "backend", text="") layout.prop(prompt, "model", text="") layout.prop(prompt, "scheduler", text="") layout.prop(prompt, "seamless_axes", text="") @@ -119,7 +119,6 @@ def execute(self, context, prompt, negative_prompt, width, height, steps, seed, if not isinstance(controlnets, list): controlnets = [controlnets] future = Generator.shared().control_net( - pipeline=args['pipeline'], model=args['model'], scheduler=args['scheduler'], optimizations=shared_args['optimizations'], @@ -152,7 +151,6 @@ def execute(self, context, prompt, negative_prompt, width, height, steps, seed, match self.task: case 'prompt_to_image': future = Generator.shared().prompt_to_image( - pipeline=args['pipeline'], model=args['model'], scheduler=args['scheduler'], optimizations=shared_args['optimizations'], @@ -170,7 +168,6 @@ def execute(self, context, prompt, negative_prompt, width, height, steps, seed, ) case 'image_to_image': future = Generator.shared().image_to_image( - pipeline=args['pipeline'], model=args['model'], scheduler=args['scheduler'], optimizations=shared_args['optimizations'], @@ -193,7 +190,6 @@ def execute(self, context, prompt, negative_prompt, width, height, steps, seed, ) case 'depth_to_image': future = Generator.shared().depth_to_image( - pipeline=args['pipeline'], model=args['model'], scheduler=args['scheduler'], optimizations=shared_args['optimizations'], diff --git a/generator_process/actions/control_net.py b/generator_process/actions/control_net.py index 246bd898..b564065b 100644 --- a/generator_process/actions/control_net.py +++ b/generator_process/actions/control_net.py @@ -4,14 +4,11 @@ from numpy.typing import NDArray import numpy as np import random -from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe -from ..models import Pipeline +from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, load_pipe from .detect_seamless import SeamlessAxes def control_net( self, - pipeline: Pipeline, - model: str, scheduler: Scheduler, @@ -47,496 +44,489 @@ def control_net( **kwargs ) -> Generator[NDArray, None, None]: - match pipeline: - case Pipeline.STABLE_DIFFUSION: - import diffusers - from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel, ControlNetModel - from diffusers.utils import deprecate, randn_tensor - import torch - import PIL.Image - import PIL.ImageOps - - class GeneratorPipeline(diffusers.StableDiffusionControlNetPipeline): - # copied from diffusers.StableDiffusionImg2ImgPipeline - def get_timesteps(self, num_inference_steps, strength, device): - # get the original timestep using init_timestep - init_timestep = min(int(num_inference_steps * strength), num_inference_steps) - - t_start = max(num_inference_steps - init_timestep, 0) - timesteps = self.scheduler.timesteps[t_start:] - - return timesteps, num_inference_steps - t_start - - # copied from diffusers.StableDiffusionImg2ImgPipeline - def prepare_img2img_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None): - if not isinstance(image, (torch.Tensor, PIL.Image.Image, list)): - raise ValueError( - f"`image` has to be of type `torch.Tensor`, `PIL.Image.Image` or list but is {type(image)}" - ) - - image = image.to(device=device, dtype=dtype) - - batch_size = batch_size * num_images_per_prompt - if isinstance(generator, list) and len(generator) != batch_size: - raise ValueError( - f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" - f" size of {batch_size}. Make sure the batch size matches the length of the generators." - ) - - if isinstance(generator, list): - init_latents = [ - self.vae.encode(image[i : i + 1]).latent_dist.sample(generator[i]) for i in range(batch_size) - ] - init_latents = torch.cat(init_latents, dim=0) - else: - init_latents = self.vae.encode(image).latent_dist.sample(generator) - - init_latents = self.vae.config.scaling_factor * init_latents - - if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0: - # expand init_latents for batch_size - deprecation_message = ( - f"You have passed {batch_size} text prompts (`prompt`), but only {init_latents.shape[0]} initial" - " images (`image`). Initial images are now duplicating to match the number of text prompts. Note" - " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" - " your script to pass as many initial images as text prompts to suppress this warning." - ) - deprecate("len(prompt) != len(image)", "1.0.0", deprecation_message, standard_warn=False) - additional_image_per_prompt = batch_size // init_latents.shape[0] - init_latents = torch.cat([init_latents] * additional_image_per_prompt, dim=0) - elif batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] != 0: - raise ValueError( - f"Cannot duplicate `image` of batch size {init_latents.shape[0]} to {batch_size} text prompts." - ) - else: - init_latents = torch.cat([init_latents], dim=0) - - shape = init_latents.shape - noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype) - - # get latents - init_latents = self.scheduler.add_noise(init_latents, noise, timestep) - latents = init_latents - - return latents - - # copied from diffusers.StableDiffusionInpaintPipeline - def prepare_mask_latents( - self, mask, masked_image, batch_size, height, width, dtype, device, generator, do_classifier_free_guidance - ): - # resize the mask to latents shape as we concatenate the mask to the latents - # we do that before converting to dtype to avoid breaking in case we're using cpu_offload - # and half precision - mask = torch.nn.functional.interpolate( - mask, size=(height // self.vae_scale_factor, width // self.vae_scale_factor) + import diffusers + from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel, ControlNetModel + from diffusers.utils import deprecate, randn_tensor + import torch + import PIL.Image + import PIL.ImageOps + + class GeneratorPipeline(diffusers.StableDiffusionControlNetPipeline): + # copied from diffusers.StableDiffusionImg2ImgPipeline + def get_timesteps(self, num_inference_steps, strength, device): + # get the original timestep using init_timestep + init_timestep = min(int(num_inference_steps * strength), num_inference_steps) + + t_start = max(num_inference_steps - init_timestep, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps, num_inference_steps - t_start + + # copied from diffusers.StableDiffusionImg2ImgPipeline + def prepare_img2img_latents(self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None): + if not isinstance(image, (torch.Tensor, PIL.Image.Image, list)): + raise ValueError( + f"`image` has to be of type `torch.Tensor`, `PIL.Image.Image` or list but is {type(image)}" + ) + + image = image.to(device=device, dtype=dtype) + + batch_size = batch_size * num_images_per_prompt + if isinstance(generator, list) and len(generator) != batch_size: + raise ValueError( + f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" + f" size of {batch_size}. Make sure the batch size matches the length of the generators." + ) + + if isinstance(generator, list): + init_latents = [ + self.vae.encode(image[i : i + 1]).latent_dist.sample(generator[i]) for i in range(batch_size) + ] + init_latents = torch.cat(init_latents, dim=0) + else: + init_latents = self.vae.encode(image).latent_dist.sample(generator) + + init_latents = self.vae.config.scaling_factor * init_latents + + if batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] == 0: + # expand init_latents for batch_size + deprecation_message = ( + f"You have passed {batch_size} text prompts (`prompt`), but only {init_latents.shape[0]} initial" + " images (`image`). Initial images are now duplicating to match the number of text prompts. Note" + " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update" + " your script to pass as many initial images as text prompts to suppress this warning." + ) + deprecate("len(prompt) != len(image)", "1.0.0", deprecation_message, standard_warn=False) + additional_image_per_prompt = batch_size // init_latents.shape[0] + init_latents = torch.cat([init_latents] * additional_image_per_prompt, dim=0) + elif batch_size > init_latents.shape[0] and batch_size % init_latents.shape[0] != 0: + raise ValueError( + f"Cannot duplicate `image` of batch size {init_latents.shape[0]} to {batch_size} text prompts." + ) + else: + init_latents = torch.cat([init_latents], dim=0) + + shape = init_latents.shape + noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype) + + # get latents + init_latents = self.scheduler.add_noise(init_latents, noise, timestep) + latents = init_latents + + return latents + + # copied from diffusers.StableDiffusionInpaintPipeline + def prepare_mask_latents( + self, mask, masked_image, batch_size, height, width, dtype, device, generator, do_classifier_free_guidance + ): + # resize the mask to latents shape as we concatenate the mask to the latents + # we do that before converting to dtype to avoid breaking in case we're using cpu_offload + # and half precision + mask = torch.nn.functional.interpolate( + mask, size=(height // self.vae_scale_factor, width // self.vae_scale_factor) + ) + mask = mask.to(device=device, dtype=dtype) + + masked_image = masked_image.to(device=device, dtype=dtype) + + # encode the mask image into latents space so we can concatenate it to the latents + if isinstance(generator, list): + masked_image_latents = [ + self.vae.encode(masked_image[i : i + 1]).latent_dist.sample(generator=generator[i]) + for i in range(batch_size) + ] + masked_image_latents = torch.cat(masked_image_latents, dim=0) + else: + masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) + masked_image_latents = self.vae.config.scaling_factor * masked_image_latents + + # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method + if mask.shape[0] < batch_size: + if not batch_size % mask.shape[0] == 0: + raise ValueError( + "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to" + f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number" + " of masks that you pass is divisible by the total requested batch size." ) - mask = mask.to(device=device, dtype=dtype) - - masked_image = masked_image.to(device=device, dtype=dtype) - - # encode the mask image into latents space so we can concatenate it to the latents - if isinstance(generator, list): - masked_image_latents = [ - self.vae.encode(masked_image[i : i + 1]).latent_dist.sample(generator=generator[i]) - for i in range(batch_size) - ] - masked_image_latents = torch.cat(masked_image_latents, dim=0) - else: - masked_image_latents = self.vae.encode(masked_image).latent_dist.sample(generator=generator) - masked_image_latents = self.vae.config.scaling_factor * masked_image_latents - - # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method - if mask.shape[0] < batch_size: - if not batch_size % mask.shape[0] == 0: - raise ValueError( - "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to" - f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number" - " of masks that you pass is divisible by the total requested batch size." - ) - mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1) - if masked_image_latents.shape[0] < batch_size: - if not batch_size % masked_image_latents.shape[0] == 0: - raise ValueError( - "The passed images and the required batch size don't match. Images are supposed to be duplicated" - f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed." - " Make sure the number of images that you pass is divisible by the total requested batch size." - ) - masked_image_latents = masked_image_latents.repeat(batch_size // masked_image_latents.shape[0], 1, 1, 1) - - mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask - masked_image_latents = ( - torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents + mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1) + if masked_image_latents.shape[0] < batch_size: + if not batch_size % masked_image_latents.shape[0] == 0: + raise ValueError( + "The passed images and the required batch size don't match. Images are supposed to be duplicated" + f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed." + " Make sure the number of images that you pass is divisible by the total requested batch size." ) + masked_image_latents = masked_image_latents.repeat(batch_size // masked_image_latents.shape[0], 1, 1, 1) - # aligning device to prevent device errors when concating it with the latent model input - masked_image_latents = masked_image_latents.to(device=device, dtype=dtype) - return mask, masked_image_latents - - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]] = None, - image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None, - - # NOTE: Modified to support initial image and inpaint. - init_image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None, - strength: float = 1.0, - mask: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None, - - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: Optional[int] = 1, - eta: float = 0.0, - generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, - latents: Optional[torch.FloatTensor] = None, - prompt_embeds: Optional[torch.FloatTensor] = None, - negative_prompt_embeds: Optional[torch.FloatTensor] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, - callback_steps: int = 1, - cross_attention_kwargs: Optional[Dict[str, Any]] = None, - controlnet_conditioning_scale: Union[float, List[float]] = 1.0, - - **kwargs - ): - # 0. Default height and width to unet - height, width = self._default_height_width(height, width, image) - - # 1. Check inputs. Raise error if not correct - self.check_inputs( - prompt, - image, - height, - width, - callback_steps, - negative_prompt, - prompt_embeds, - negative_prompt_embeds, - controlnet_conditioning_scale - ) + mask = torch.cat([mask] * 2) if do_classifier_free_guidance else mask + masked_image_latents = ( + torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents + ) - # 2. Define call parameters - if prompt is not None and isinstance(prompt, str): - batch_size = 1 - elif prompt is not None and isinstance(prompt, list): - batch_size = len(prompt) - else: - batch_size = prompt_embeds.shape[0] - - device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - if isinstance(self.controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float): - controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(self.controlnet.nets) - - # 3. Encode input prompt - prompt_embeds = self._encode_prompt( - prompt, - device, - num_images_per_prompt, - do_classifier_free_guidance, - negative_prompt, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - ) + # aligning device to prevent device errors when concating it with the latent model input + masked_image_latents = masked_image_latents.to(device=device, dtype=dtype) + return mask, masked_image_latents - # 4. Prepare image - if isinstance(self.controlnet, ControlNetModel): - image = self.prepare_image( - image=image, - width=width, - height=height, - batch_size=batch_size * num_images_per_prompt, - num_images_per_prompt=num_images_per_prompt, - device=device, - dtype=self.controlnet.dtype, - do_classifier_free_guidance=do_classifier_free_guidance, - guess_mode=False - ) - elif isinstance(self.controlnet, MultiControlNetModel): - images = [] - - for image_ in image: - image_ = self.prepare_image( - image=image_, - width=width, - height=height, - batch_size=batch_size * num_images_per_prompt, - num_images_per_prompt=num_images_per_prompt, - device=device, - dtype=self.controlnet.dtype, - do_classifier_free_guidance=do_classifier_free_guidance, - guess_mode=False - ) - - images.append(image_) - - image = images - else: - assert False - - # 5. Prepare timesteps - # NOTE: Modified to support initial image - if init_image is not None and not inpaint: - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) - latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - else: - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = self.scheduler.timesteps - - # 6. Prepare latent variables - num_channels_latents = self.unet.in_channels - # NOTE: Modified to support initial image - if mask is not None: - num_channels_latents = self.vae.config.latent_channels - mask, masked_image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.prepare_mask_and_masked_image(init_image, mask) - mask, masked_image_latents = self.prepare_mask_latents( - mask, - masked_image, - batch_size * num_images_per_prompt, - height, - width, - prompt_embeds.dtype, - device, - generator, - do_classifier_free_guidance, - ) - num_channels_mask = mask.shape[1] - num_channels_masked_image = masked_image_latents.shape[1] - if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: - raise ValueError( - f"Select an inpainting model, such as 'stabilityai/stable-diffusion-2-inpainting'" - ) - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - prompt_embeds.dtype, - device, - generator, - latents, - ) - elif init_image is not None: - init_image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess(init_image) - latents = self.prepare_img2img_latents( - init_image, - latent_timestep, - batch_size, - num_images_per_prompt, - prompt_embeds.dtype, - device, - generator - ) - else: - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - prompt_embeds.dtype, - device, - generator, - latents, - ) - - # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - - # 8. Denoising loop - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - with self.progress_bar(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # NOTE: Modified to support disabling CFG - if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: - do_classifier_free_guidance = False - prompt_embeds = prompt_embeds[prompt_embeds.size(0) // 2:] - image = [i[i.size(0) // 2:] for i in image] - if mask is not None: - mask = mask[mask.size(0) // 2:] - masked_image_latents = masked_image_latents[masked_image_latents.size(0) // 2:] - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - - # controlnet(s) inference - down_block_res_samples, mid_block_res_sample = self.controlnet( - latent_model_input, - t, - encoder_hidden_states=prompt_embeds, - controlnet_cond=image, - conditioning_scale=controlnet_conditioning_scale, - return_dict=False, - ) - - if mask is not None: - latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1) - - # predict the noise residual - noise_pred = self.unet( - latent_model_input, - t, - encoder_hidden_states=prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - down_block_additional_residuals=down_block_res_samples, - mid_block_additional_residual=mid_block_res_sample, - ).sample - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample - - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - progress_bar.update() - # NOTE: Modified to yield the latents instead of calling a callback. - yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) - - # If we do sequential model offloading, let's offload unet and controlnet - # manually for max memory savings - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.unet.to("cpu") - self.controlnet.to("cpu") - torch.cuda.empty_cache() - - if output_type == "latent": - image = latents - has_nsfw_concept = None - elif output_type == "pil": - # 8. Post-processing - image = self.decode_latents(latents) - - # NOTE: Add UI to enable this. - # 9. Run safety checker - # image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) - - # 10. Convert to PIL - image = self.numpy_to_pil(image) - else: - # 8. Post-processing - image = self.decode_latents(latents) - - # NOTE: Add UI to enable this. - # 9. Run safety checker - # image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) - - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() - - # NOTE: Modified to yield the decoded image as a numpy array. - yield ImageGenerationResult( - [np.asarray(PIL.ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. - for i, image in enumerate(image)], - [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], - num_inference_steps, - True - ) + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]] = None, + image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None, - if optimizations.cpu_only: - device = "cpu" + # NOTE: Modified to support initial image and inpaint. + init_image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None, + strength: float = 1.0, + mask: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]] = None, + + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + latents: Optional[torch.FloatTensor] = None, + prompt_embeds: Optional[torch.FloatTensor] = None, + negative_prompt_embeds: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: int = 1, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + controlnet_conditioning_scale: Union[float, List[float]] = 1.0, + + **kwargs + ): + # 0. Default height and width to unet + height, width = self._default_height_width(height, width, image) + + # 1. Check inputs. Raise error if not correct + self.check_inputs( + prompt, + image, + height, + width, + callback_steps, + negative_prompt, + prompt_embeds, + negative_prompt_embeds, + controlnet_conditioning_scale + ) + + # 2. Define call parameters + if prompt is not None and isinstance(prompt, str): + batch_size = 1 + elif prompt is not None and isinstance(prompt, list): + batch_size = len(prompt) else: - device = self.choose_device() - - # Load the ControlNet model - controlnet = [] - for controlnet_name in control_net: - controlnet.append(load_pipe(self, f"control_net_model-{controlnet_name}", diffusers.ControlNetModel, controlnet_name, optimizations, None, device)) - controlnet = MultiControlNetModel(controlnet) - - # StableDiffusionPipeline w/ caching - pipe = load_pipe(self, "control_net", GeneratorPipeline, model, optimizations, scheduler, device, controlnet=controlnet) - - # Optimizations - pipe = optimizations.apply(pipe, device) - - # RNG - batch_size = len(prompt) if isinstance(prompt, list) else 1 - generator = [] - for _ in range(batch_size): - gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API - generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) - if batch_size == 1: - # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 - generator = generator[0] - - # Init Image - # FIXME: The `unet.config.sample_size` of the depth model is `32`, not `64`. For now, this will be hardcoded to `512`. - height = height or 512 - width = width or 512 - rounded_size = ( - int(8 * (width // 8)), - int(8 * (height // 8)), + batch_size = prompt_embeds.shape[0] + + device = self._execution_device + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + if isinstance(self.controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float): + controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(self.controlnet.nets) + + # 3. Encode input prompt + prompt_embeds = self._encode_prompt( + prompt, + device, + num_images_per_prompt, + do_classifier_free_guidance, + negative_prompt, + prompt_embeds=prompt_embeds, + negative_prompt_embeds=negative_prompt_embeds, ) - control_image = [PIL.Image.fromarray(np.uint8(c * 255)).convert('RGB').resize(rounded_size) for c in control] if control is not None else None - init_image = None if image is None else (PIL.Image.open(image) if isinstance(image, str) else PIL.Image.fromarray(image.astype(np.uint8))).resize(rounded_size) - if inpaint: - match inpaint_mask_src: - case 'alpha': - mask_image = PIL.ImageOps.invert(init_image.getchannel('A')) - case 'prompt': - from transformers import AutoProcessor, CLIPSegForImageSegmentation - - processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") - clipseg = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined") - inputs = processor(text=[text_mask], images=[init_image.convert('RGB')], return_tensors="pt", padding=True) - outputs = clipseg(**inputs) - mask_image = PIL.Image.fromarray(np.uint8((1 - torch.sigmoid(outputs.logits).lt(text_mask_confidence).int().detach().numpy()) * 255), 'L').resize(init_image.size) + + # 4. Prepare image + if isinstance(self.controlnet, ControlNetModel): + image = self.prepare_image( + image=image, + width=width, + height=height, + batch_size=batch_size * num_images_per_prompt, + num_images_per_prompt=num_images_per_prompt, + device=device, + dtype=self.controlnet.dtype, + do_classifier_free_guidance=do_classifier_free_guidance, + guess_mode=False + ) + elif isinstance(self.controlnet, MultiControlNetModel): + images = [] + + for image_ in image: + image_ = self.prepare_image( + image=image_, + width=width, + height=height, + batch_size=batch_size * num_images_per_prompt, + num_images_per_prompt=num_images_per_prompt, + device=device, + dtype=self.controlnet.dtype, + do_classifier_free_guidance=do_classifier_free_guidance, + guess_mode=False + ) + + images.append(image_) + + image = images + else: + assert False + + # 5. Prepare timesteps + # NOTE: Modified to support initial image + if init_image is not None and not inpaint: + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) + else: + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.scheduler.timesteps + + # 6. Prepare latent variables + num_channels_latents = self.unet.in_channels + # NOTE: Modified to support initial image + if mask is not None: + num_channels_latents = self.vae.config.latent_channels + mask, masked_image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.prepare_mask_and_masked_image(init_image, mask) + mask, masked_image_latents = self.prepare_mask_latents( + mask, + masked_image, + batch_size * num_images_per_prompt, + height, + width, + prompt_embeds.dtype, + device, + generator, + do_classifier_free_guidance, + ) + num_channels_mask = mask.shape[1] + num_channels_masked_image = masked_image_latents.shape[1] + if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: + raise ValueError( + f"Select an inpainting model, such as 'stabilityai/stable-diffusion-2-inpainting'" + ) + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + prompt_embeds.dtype, + device, + generator, + latents, + ) + elif init_image is not None: + init_image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess(init_image) + latents = self.prepare_img2img_latents( + init_image, + latent_timestep, + batch_size, + num_images_per_prompt, + prompt_embeds.dtype, + device, + generator + ) else: - mask_image = None - - # Seamless - if seamless_axes == SeamlessAxes.AUTO: - init_sa = None if init_image is None else self.detect_seamless(np.array(init_image) / 255) - control_sa = None if control_image is None else self.detect_seamless(np.array(control_image[0]) / 255) - if init_sa is not None and control_sa is not None: - seamless_axes = SeamlessAxes((init_sa.x and control_sa.x, init_sa.y and control_sa.y)) - elif init_sa is not None: - seamless_axes = init_sa - elif control_sa is not None: - seamless_axes = control_sa - _configure_model_padding(pipe.unet, seamless_axes) - _configure_model_padding(pipe.vae, seamless_axes) - - # Inference - with (torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext()), \ - (torch.autocast(device) if optimizations.can_use("amp", device) else nullcontext()): - yield from pipe( - prompt=prompt, - image=control_image, - controlnet_conditioning_scale=controlnet_conditioning_scale, - init_image=init_image.convert('RGB') if init_image is not None else None, - mask=mask_image, - strength=strength, - width=rounded_size[0], - height=rounded_size[1], - num_inference_steps=steps, - guidance_scale=cfg_scale, - negative_prompt=negative_prompt if use_negative_prompt else None, - num_images_per_prompt=1, - eta=0.0, - generator=generator, - latents=None, - output_type="pil", - return_dict=True, - callback=None, - callback_steps=1, - step_preview_mode=step_preview_mode, - cfg_end=optimizations.cfg_end + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + prompt_embeds.dtype, + device, + generator, + latents, ) - case Pipeline.STABILITY_SDK: - import stability_sdk - raise NotImplementedError() - case _: - raise Exception(f"Unsupported pipeline {pipeline}.") \ No newline at end of file + + # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + # 8. Denoising loop + num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + # NOTE: Modified to support disabling CFG + if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: + do_classifier_free_guidance = False + prompt_embeds = prompt_embeds[prompt_embeds.size(0) // 2:] + image = [i[i.size(0) // 2:] for i in image] + if mask is not None: + mask = mask[mask.size(0) // 2:] + masked_image_latents = masked_image_latents[masked_image_latents.size(0) // 2:] + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # controlnet(s) inference + down_block_res_samples, mid_block_res_sample = self.controlnet( + latent_model_input, + t, + encoder_hidden_states=prompt_embeds, + controlnet_cond=image, + conditioning_scale=controlnet_conditioning_scale, + return_dict=False, + ) + + if mask is not None: + latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1) + + # predict the noise residual + noise_pred = self.unet( + latent_model_input, + t, + encoder_hidden_states=prompt_embeds, + cross_attention_kwargs=cross_attention_kwargs, + down_block_additional_residuals=down_block_res_samples, + mid_block_additional_residual=mid_block_res_sample, + ).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): + progress_bar.update() + # NOTE: Modified to yield the latents instead of calling a callback. + yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) + + # If we do sequential model offloading, let's offload unet and controlnet + # manually for max memory savings + if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: + self.unet.to("cpu") + self.controlnet.to("cpu") + torch.cuda.empty_cache() + + if output_type == "latent": + image = latents + has_nsfw_concept = None + elif output_type == "pil": + # 8. Post-processing + image = self.decode_latents(latents) + + # NOTE: Add UI to enable this. + # 9. Run safety checker + # image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + + # 10. Convert to PIL + image = self.numpy_to_pil(image) + else: + # 8. Post-processing + image = self.decode_latents(latents) + + # NOTE: Add UI to enable this. + # 9. Run safety checker + # image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) + + # Offload last model to CPU + if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: + self.final_offload_hook.offload() + + # NOTE: Modified to yield the decoded image as a numpy array. + yield ImageGenerationResult( + [np.asarray(PIL.ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. + for i, image in enumerate(image)], + [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], + num_inference_steps, + True + ) + + if optimizations.cpu_only: + device = "cpu" + else: + device = self.choose_device() + + # Load the ControlNet model + controlnet = [] + for controlnet_name in control_net: + controlnet.append(load_pipe(self, f"control_net_model-{controlnet_name}", diffusers.ControlNetModel, controlnet_name, optimizations, None, device)) + controlnet = MultiControlNetModel(controlnet) + + # StableDiffusionPipeline w/ caching + pipe = load_pipe(self, "control_net", GeneratorPipeline, model, optimizations, scheduler, device, controlnet=controlnet) + + # Optimizations + pipe = optimizations.apply(pipe, device) + + # RNG + batch_size = len(prompt) if isinstance(prompt, list) else 1 + generator = [] + for _ in range(batch_size): + gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API + generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) + if batch_size == 1: + # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 + generator = generator[0] + + # Init Image + # FIXME: The `unet.config.sample_size` of the depth model is `32`, not `64`. For now, this will be hardcoded to `512`. + height = height or 512 + width = width or 512 + rounded_size = ( + int(8 * (width // 8)), + int(8 * (height // 8)), + ) + control_image = [PIL.Image.fromarray(np.uint8(c * 255)).convert('RGB').resize(rounded_size) for c in control] if control is not None else None + init_image = None if image is None else (PIL.Image.open(image) if isinstance(image, str) else PIL.Image.fromarray(image.astype(np.uint8))).resize(rounded_size) + if inpaint: + match inpaint_mask_src: + case 'alpha': + mask_image = PIL.ImageOps.invert(init_image.getchannel('A')) + case 'prompt': + from transformers import AutoProcessor, CLIPSegForImageSegmentation + + processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") + clipseg = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined") + inputs = processor(text=[text_mask], images=[init_image.convert('RGB')], return_tensors="pt", padding=True) + outputs = clipseg(**inputs) + mask_image = PIL.Image.fromarray(np.uint8((1 - torch.sigmoid(outputs.logits).lt(text_mask_confidence).int().detach().numpy()) * 255), 'L').resize(init_image.size) + else: + mask_image = None + + # Seamless + if seamless_axes == SeamlessAxes.AUTO: + init_sa = None if init_image is None else self.detect_seamless(np.array(init_image) / 255) + control_sa = None if control_image is None else self.detect_seamless(np.array(control_image[0]) / 255) + if init_sa is not None and control_sa is not None: + seamless_axes = SeamlessAxes((init_sa.x and control_sa.x, init_sa.y and control_sa.y)) + elif init_sa is not None: + seamless_axes = init_sa + elif control_sa is not None: + seamless_axes = control_sa + _configure_model_padding(pipe.unet, seamless_axes) + _configure_model_padding(pipe.vae, seamless_axes) + + # Inference + with (torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext()), \ + (torch.autocast(device) if optimizations.can_use("amp", device) else nullcontext()): + yield from pipe( + prompt=prompt, + image=control_image, + controlnet_conditioning_scale=controlnet_conditioning_scale, + init_image=init_image.convert('RGB') if init_image is not None else None, + mask=mask_image, + strength=strength, + width=rounded_size[0], + height=rounded_size[1], + num_inference_steps=steps, + guidance_scale=cfg_scale, + negative_prompt=negative_prompt if use_negative_prompt else None, + num_images_per_prompt=1, + eta=0.0, + generator=generator, + latents=None, + output_type="pil", + return_dict=True, + callback=None, + callback_steps=1, + step_preview_mode=step_preview_mode, + cfg_end=optimizations.cfg_end + ) \ No newline at end of file diff --git a/generator_process/actions/depth_to_image.py b/generator_process/actions/depth_to_image.py index 306b3550..548f1d86 100644 --- a/generator_process/actions/depth_to_image.py +++ b/generator_process/actions/depth_to_image.py @@ -6,13 +6,11 @@ import numpy as np import random from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe -from ..models import Pipeline from .detect_seamless import SeamlessAxes def depth_to_image( self, - pipeline: Pipeline, model: str, @@ -40,361 +38,354 @@ def depth_to_image( **kwargs ) -> Generator[NDArray, None, None]: - match pipeline: - case Pipeline.STABLE_DIFFUSION: - import diffusers - import torch - import PIL.Image - import PIL.ImageOps - - class GeneratorPipeline(diffusers.StableDiffusionInpaintPipeline): - def prepare_depth(self, depth, image, dtype, device): - device = torch.device('cpu' if device.type == 'mps' else device.type) - if depth is None: - from transformers import DPTFeatureExtractor, DPTForDepthEstimation - import contextlib - feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") - depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") - depth_estimator = depth_estimator.to(device) - - pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values - pixel_values = pixel_values.to(device=device) - # The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16. - # So we use `torch.autocast` here for half precision inference. - context_manger = torch.autocast("cuda", dtype=dtype) if device.type == "cuda" else contextlib.nullcontext() - with context_manger: - depth_map = depth_estimator(pixel_values).predicted_depth - depth_map = torch.nn.functional.interpolate( - depth_map.unsqueeze(1), - size=(height // self.vae_scale_factor, width // self.vae_scale_factor), - mode="bicubic", - align_corners=False, - ) - - depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True) - depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True) - depth_map = 2.0 * (depth_map - depth_min) / (depth_max - depth_min) - 1.0 - depth_map = depth_map.to(device) - return depth_map - else: - if isinstance(depth, PIL.Image.Image): - depth = np.array(depth.convert("L")) - depth = depth.astype(np.float32) / 255.0 - depth = depth[None, None] - depth = torch.from_numpy(depth) - return depth - - def prepare_depth_latents( - self, depth, batch_size, height, width, dtype, device, generator, do_classifier_free_guidance - ): - # resize the mask to latents shape as we concatenate the mask to the latents - # we do that before converting to dtype to avoid breaking in case we're using cpu_offload - # and half precision - depth = torch.nn.functional.interpolate( - depth, size=(height // self.vae_scale_factor, width // self.vae_scale_factor) - ) - depth = depth.to(device=device, dtype=dtype) - - # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method - depth = depth.repeat(batch_size, 1, 1, 1) - depth = torch.cat([depth] * 2) if do_classifier_free_guidance else depth - return depth - - def prepare_img2img_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None, image=None, timestep=None): - shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor) - if isinstance(generator, list) and len(generator) != batch_size: - raise ValueError( - f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" - f" size of {batch_size}. Make sure the batch size matches the length of the generators." - ) - - if latents is None: - rand_device = "cpu" if device.type == "mps" else device - - if isinstance(generator, list): - shape = (1,) + shape[1:] - latents = [ - torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) - for i in range(batch_size) - ] - latents = torch.cat(latents, dim=0).to(device) - else: - latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device) - else: - if latents.shape != shape: - raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") - latents = latents.to(device) - - # scale the initial noise by the standard deviation required by the scheduler - latents = latents * self.scheduler.init_noise_sigma - - if image is not None: - image = image.to(device=device, dtype=dtype) - if isinstance(generator, list): - image_latents = [ - self.vae.encode(image[0:1]).latent_dist.sample(generator[i]) for i in range(batch_size) - ] - image_latents = torch.cat(image_latents, dim=0) - else: - image_latents = self.vae.encode(image).latent_dist.sample(generator) - image_latents = torch.nn.functional.interpolate( - image_latents, size=(height // self.vae_scale_factor, width // self.vae_scale_factor) - ) - image_latents = 0.18215 * image_latents - rand_device = "cpu" if device.type == "mps" else device - shape = image_latents.shape - if isinstance(generator, list): - shape = (1,) + shape[1:] - noise = [ - torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) for i in - range(batch_size) - ] - noise = torch.cat(noise, dim=0).to(device) - else: - noise = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device) - latents = self.scheduler.add_noise(image_latents, noise, timestep) - - return latents - - - def get_timesteps(self, num_inference_steps, strength, device): - # get the original timestep using init_timestep - offset = self.scheduler.config.get("steps_offset", 0) - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) - - t_start = max(num_inference_steps - init_timestep + offset, 0) - timesteps = self.scheduler.timesteps[t_start:] - - return timesteps, num_inference_steps - t_start - - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - depth_image: Union[torch.FloatTensor, PIL.Image.Image], - image: Optional[Union[torch.FloatTensor, PIL.Image.Image]] = None, - strength: float = 0.8, - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: Optional[int] = 1, - eta: float = 0.0, - generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, - latents: Optional[torch.FloatTensor] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, - callback_steps: Optional[int] = 1, - **kwargs, - ): - - # 0. Default height and width to unet - height = height or self.unet.config.sample_size * self.vae_scale_factor - width = width or self.unet.config.sample_size * self.vae_scale_factor - - # 1. Check inputs - self.check_inputs(prompt, height, width, callback_steps) - - # 2. Define call parameters - batch_size = 1 if isinstance(prompt, str) else len(prompt) - device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # 3. Encode input prompt - text_embeddings = self._encode_prompt( - prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt - ) - - # 4. Prepare the depth image - depth = self.prepare_depth(depth_image, image, text_embeddings.dtype, device) - - if image is not None and isinstance(image, PIL.Image.Image): - image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess(image) - - # 5. set timesteps - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = self.scheduler.timesteps - if image is not None: - timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) - - # 6. Prepare latent variables - num_channels_latents = self.vae.config.latent_channels - if image is not None: - latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - latents = self.prepare_img2img_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - text_embeddings.dtype, - device, - generator, - latents, - image, - latent_timestep - ) - else: - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - text_embeddings.dtype, - device, - generator, - latents, - ) - - # 7. Prepare mask latent variables - depth = self.prepare_depth_latents( - depth, - batch_size * num_images_per_prompt, - height, - width, - text_embeddings.dtype, - device, - generator, - do_classifier_free_guidance, - ) - - # 8. Check that sizes of mask, masked image and latents match - num_channels_depth = depth.shape[1] - if num_channels_latents + num_channels_depth != self.unet.config.in_channels: - raise ValueError( - f"Select a depth model, such as 'stabilityai/stable-diffusion-2-depth'" - ) - - # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - - # 10. Denoising loop - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - with self.progress_bar(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # NOTE: Modified to support disabling CFG - if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: - do_classifier_free_guidance = False - text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] - depth = depth[depth.size(0) // 2:] - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - - # concat latents, mask, masked_image_latents in the channel dimension - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - latent_model_input = torch.cat([latent_model_input, depth], dim=1) - - # predict the noise residual - noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample - - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - progress_bar.update() - # NOTE: Modified to yield the latents instead of calling a callback. - yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) - - # 11. Post-processing - image = self.decode_latents(latents) - - # TODO: Add UI to enable this. - # 12. Run safety checker - # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) - - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() - - # NOTE: Modified to yield the decoded image as a numpy array. - yield ImageGenerationResult( - [np.asarray(PIL.ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. - for i, image in enumerate(self.numpy_to_pil(image))], - [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], - num_inference_steps, - True - ) + import diffusers + import torch + import PIL.Image + import PIL.ImageOps + + class GeneratorPipeline(diffusers.StableDiffusionInpaintPipeline): + def prepare_depth(self, depth, image, dtype, device): + device = torch.device('cpu' if device.type == 'mps' else device.type) + if depth is None: + from transformers import DPTFeatureExtractor, DPTForDepthEstimation + import contextlib + feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") + depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") + depth_estimator = depth_estimator.to(device) + + pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values + pixel_values = pixel_values.to(device=device) + # The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16. + # So we use `torch.autocast` here for half precision inference. + context_manger = torch.autocast("cuda", dtype=dtype) if device.type == "cuda" else contextlib.nullcontext() + with context_manger: + depth_map = depth_estimator(pixel_values).predicted_depth + depth_map = torch.nn.functional.interpolate( + depth_map.unsqueeze(1), + size=(height // self.vae_scale_factor, width // self.vae_scale_factor), + mode="bicubic", + align_corners=False, + ) + + depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True) + depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True) + depth_map = 2.0 * (depth_map - depth_min) / (depth_max - depth_min) - 1.0 + depth_map = depth_map.to(device) + return depth_map + else: + if isinstance(depth, PIL.Image.Image): + depth = np.array(depth.convert("L")) + depth = depth.astype(np.float32) / 255.0 + depth = depth[None, None] + depth = torch.from_numpy(depth) + return depth + + def prepare_depth_latents( + self, depth, batch_size, height, width, dtype, device, generator, do_classifier_free_guidance + ): + # resize the mask to latents shape as we concatenate the mask to the latents + # we do that before converting to dtype to avoid breaking in case we're using cpu_offload + # and half precision + depth = torch.nn.functional.interpolate( + depth, size=(height // self.vae_scale_factor, width // self.vae_scale_factor) + ) + depth = depth.to(device=device, dtype=dtype) + + # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method + depth = depth.repeat(batch_size, 1, 1, 1) + depth = torch.cat([depth] * 2) if do_classifier_free_guidance else depth + return depth + + def prepare_img2img_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None, image=None, timestep=None): + shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor) + if isinstance(generator, list) and len(generator) != batch_size: + raise ValueError( + f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" + f" size of {batch_size}. Make sure the batch size matches the length of the generators." + ) + + if latents is None: + rand_device = "cpu" if device.type == "mps" else device + + if isinstance(generator, list): + shape = (1,) + shape[1:] + latents = [ + torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) + for i in range(batch_size) + ] + latents = torch.cat(latents, dim=0).to(device) + else: + latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device) + else: + if latents.shape != shape: + raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}") + latents = latents.to(device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + + if image is not None: + image = image.to(device=device, dtype=dtype) + if isinstance(generator, list): + image_latents = [ + self.vae.encode(image[0:1]).latent_dist.sample(generator[i]) for i in range(batch_size) + ] + image_latents = torch.cat(image_latents, dim=0) + else: + image_latents = self.vae.encode(image).latent_dist.sample(generator) + image_latents = torch.nn.functional.interpolate( + image_latents, size=(height // self.vae_scale_factor, width // self.vae_scale_factor) + ) + image_latents = 0.18215 * image_latents + rand_device = "cpu" if device.type == "mps" else device + shape = image_latents.shape + if isinstance(generator, list): + shape = (1,) + shape[1:] + noise = [ + torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype) for i in + range(batch_size) + ] + noise = torch.cat(noise, dim=0).to(device) + else: + noise = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device) + latents = self.scheduler.add_noise(image_latents, noise, timestep) + + return latents + + + def get_timesteps(self, num_inference_steps, strength, device): + # get the original timestep using init_timestep + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + t_start = max(num_inference_steps - init_timestep + offset, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps, num_inference_steps - t_start + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + depth_image: Union[torch.FloatTensor, PIL.Image.Image], + image: Optional[Union[torch.FloatTensor, PIL.Image.Image]] = None, + strength: float = 0.8, + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): - if optimizations.cpu_only: - device = "cpu" + # 0. Default height and width to unet + height = height or self.unet.config.sample_size * self.vae_scale_factor + width = width or self.unet.config.sample_size * self.vae_scale_factor + + # 1. Check inputs + self.check_inputs(prompt, height, width, callback_steps) + + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) + device = self._execution_device + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # 3. Encode input prompt + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + # 4. Prepare the depth image + depth = self.prepare_depth(depth_image, image, text_embeddings.dtype, device) + + if image is not None and isinstance(image, PIL.Image.Image): + image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess(image) + + # 5. set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.scheduler.timesteps + if image is not None: + timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) + + # 6. Prepare latent variables + num_channels_latents = self.vae.config.latent_channels + if image is not None: + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) + latents = self.prepare_img2img_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, + image, + latent_timestep + ) else: - device = self.choose_device() - - # StableDiffusionPipeline w/ caching - pipe = load_pipe(self, "depth", GeneratorPipeline, model, optimizations, scheduler, device) - - # Optimizations - pipe = optimizations.apply(pipe, device) - - # RNG - batch_size = len(prompt) if isinstance(prompt, list) else 1 - generator = [] - for _ in range(batch_size): - gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API - generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) - if batch_size == 1: - # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 - generator = generator[0] - - # Init Image - # FIXME: The `unet.config.sample_size` of the depth model is `32`, not `64`. For now, this will be hardcoded to `512`. - height = height or 512 - width = width or 512 - rounded_size = ( - int(8 * (width // 8)), - int(8 * (height // 8)), + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, + ) + + # 7. Prepare mask latent variables + depth = self.prepare_depth_latents( + depth, + batch_size * num_images_per_prompt, + height, + width, + text_embeddings.dtype, + device, + generator, + do_classifier_free_guidance, ) - depth_image = PIL.ImageOps.flip(PIL.Image.fromarray(np.uint8(depth * 255)).convert('L')).resize(rounded_size) if depth is not None else None - init_image = None if image is None else (PIL.Image.open(image) if isinstance(image, str) else PIL.Image.fromarray(image.astype(np.uint8))).convert('RGB').resize(rounded_size) - - # Seamless - if seamless_axes == SeamlessAxes.AUTO: - init_sa = None if init_image is None else self.detect_seamless(np.array(init_image) / 255) - depth_sa = None if depth_image is None else self.detect_seamless(np.array(depth_image.convert('RGB')) / 255) - if init_sa is not None and depth_sa is not None: - seamless_axes = SeamlessAxes((init_sa.x and depth_sa.x, init_sa.y and depth_sa.y)) - elif init_sa is not None: - seamless_axes = init_sa - elif depth_sa is not None: - seamless_axes = depth_sa - _configure_model_padding(pipe.unet, seamless_axes) - _configure_model_padding(pipe.vae, seamless_axes) - - # Inference - with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): - yield from pipe( - prompt=prompt, - depth_image=depth_image, - image=init_image, - strength=strength, - width=rounded_size[0], - height=rounded_size[1], - num_inference_steps=steps, - guidance_scale=cfg_scale, - negative_prompt=negative_prompt if use_negative_prompt else None, - num_images_per_prompt=1, - eta=0.0, - generator=generator, - latents=None, - output_type="pil", - return_dict=True, - callback=None, - callback_steps=1, - step_preview_mode=step_preview_mode, - cfg_end=optimizations.cfg_end + + # 8. Check that sizes of mask, masked image and latents match + num_channels_depth = depth.shape[1] + if num_channels_latents + num_channels_depth != self.unet.config.in_channels: + raise ValueError( + f"Select a depth model, such as 'stabilityai/stable-diffusion-2-depth'" ) - case Pipeline.STABILITY_SDK: - import stability_sdk - raise NotImplementedError() - case _: - raise Exception(f"Unsupported pipeline {pipeline}.") \ No newline at end of file + + # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + # 10. Denoising loop + num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + # NOTE: Modified to support disabling CFG + if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: + do_classifier_free_guidance = False + text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] + depth = depth[depth.size(0) // 2:] + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + + # concat latents, mask, masked_image_latents in the channel dimension + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + latent_model_input = torch.cat([latent_model_input, depth], dim=1) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): + progress_bar.update() + # NOTE: Modified to yield the latents instead of calling a callback. + yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) + + # 11. Post-processing + image = self.decode_latents(latents) + + # TODO: Add UI to enable this. + # 12. Run safety checker + # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + + # Offload last model to CPU + if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: + self.final_offload_hook.offload() + + # NOTE: Modified to yield the decoded image as a numpy array. + yield ImageGenerationResult( + [np.asarray(PIL.ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. + for i, image in enumerate(self.numpy_to_pil(image))], + [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], + num_inference_steps, + True + ) + + if optimizations.cpu_only: + device = "cpu" + else: + device = self.choose_device() + + # StableDiffusionPipeline w/ caching + pipe = load_pipe(self, "depth", GeneratorPipeline, model, optimizations, scheduler, device) + + # Optimizations + pipe = optimizations.apply(pipe, device) + + # RNG + batch_size = len(prompt) if isinstance(prompt, list) else 1 + generator = [] + for _ in range(batch_size): + gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API + generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) + if batch_size == 1: + # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 + generator = generator[0] + + # Init Image + # FIXME: The `unet.config.sample_size` of the depth model is `32`, not `64`. For now, this will be hardcoded to `512`. + height = height or 512 + width = width or 512 + rounded_size = ( + int(8 * (width // 8)), + int(8 * (height // 8)), + ) + depth_image = PIL.ImageOps.flip(PIL.Image.fromarray(np.uint8(depth * 255)).convert('L')).resize(rounded_size) if depth is not None else None + init_image = None if image is None else (PIL.Image.open(image) if isinstance(image, str) else PIL.Image.fromarray(image.astype(np.uint8))).convert('RGB').resize(rounded_size) + + # Seamless + if seamless_axes == SeamlessAxes.AUTO: + init_sa = None if init_image is None else self.detect_seamless(np.array(init_image) / 255) + depth_sa = None if depth_image is None else self.detect_seamless(np.array(depth_image.convert('RGB')) / 255) + if init_sa is not None and depth_sa is not None: + seamless_axes = SeamlessAxes((init_sa.x and depth_sa.x, init_sa.y and depth_sa.y)) + elif init_sa is not None: + seamless_axes = init_sa + elif depth_sa is not None: + seamless_axes = depth_sa + _configure_model_padding(pipe.unet, seamless_axes) + _configure_model_padding(pipe.vae, seamless_axes) + + # Inference + with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): + yield from pipe( + prompt=prompt, + depth_image=depth_image, + image=init_image, + strength=strength, + width=rounded_size[0], + height=rounded_size[1], + num_inference_steps=steps, + guidance_scale=cfg_scale, + negative_prompt=negative_prompt if use_negative_prompt else None, + num_images_per_prompt=1, + eta=0.0, + generator=generator, + latents=None, + output_type="pil", + return_dict=True, + callback=None, + callback_steps=1, + step_preview_mode=step_preview_mode, + cfg_end=optimizations.cfg_end + ) \ No newline at end of file diff --git a/generator_process/actions/image_to_image.py b/generator_process/actions/image_to_image.py index a2f15c7a..34960932 100644 --- a/generator_process/actions/image_to_image.py +++ b/generator_process/actions/image_to_image.py @@ -5,14 +5,12 @@ from numpy.typing import NDArray import numpy as np import random -from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe -from ..models import Pipeline +from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, load_pipe from .detect_seamless import SeamlessAxes def image_to_image( self, - pipeline: Pipeline, model: str, @@ -42,207 +40,167 @@ def image_to_image( **kwargs ) -> Generator[ImageGenerationResult, None, None]: - match pipeline: - case Pipeline.STABLE_DIFFUSION: - import diffusers - import torch - from PIL import Image, ImageOps - import PIL.Image - - # Mostly copied from `diffusers.StableDiffusionImg2ImgPipeline`, with slight modifications to yield the latents at each step. - class GeneratorPipeline(diffusers.StableDiffusionImg2ImgPipeline): - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - image: Union[torch.FloatTensor, PIL.Image.Image], - strength: float = 0.8, - num_inference_steps: Optional[int] = 50, - guidance_scale: Optional[float] = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: Optional[int] = 1, - eta: Optional[float] = 0.0, - generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, - callback_steps: Optional[int] = 1, - **kwargs, - ): - # 1. Check inputs - self.check_inputs(prompt, strength, callback_steps) - - # 2. Define call parameters - batch_size = 1 if isinstance(prompt, str) else len(prompt) - device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # 3. Encode input prompt - text_embeddings = self._encode_prompt( - prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt - ) - - # 4. Preprocess image - image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess(image) - - # 5. set timesteps - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) - latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - - # 6. Prepare latent variables - latents = self.prepare_latents( - image, latent_timestep, batch_size, num_images_per_prompt, text_embeddings.dtype, device, generator - ) - - # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - - # 8. Denoising loop - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - with self.progress_bar(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # NOTE: Modified to support disabling CFG - if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: - do_classifier_free_guidance = False - text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - - # predict the noise residual - noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample - - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - progress_bar.update() - # NOTE: Modified to yield the latents instead of calling a callback. - yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) - - # 9. Post-processing - image = self.decode_latents(latents) - - # TODO: Add UI to enable this - # 10. Run safety checker - # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) - - image = self.image_processor.postprocess(image, output_type=output_type) - - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() - - # NOTE: Modified to yield the decoded image as a numpy array. - yield ImageGenerationResult( - [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. - for i, image in enumerate(image)], - [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], - num_inference_steps, - True - ) - - if optimizations.cpu_only: - device = "cpu" - else: - device = self.choose_device() - - # StableDiffusionPipeline w/ caching - pipe = load_pipe(self, "modify", GeneratorPipeline, model, optimizations, scheduler, device) - - # Optimizations - pipe = optimizations.apply(pipe, device) - - # RNG - batch_size = len(prompt) if isinstance(prompt, list) else 1 - generator = [] - for _ in range(batch_size): - gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API - generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) - if batch_size == 1: - # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 - generator = generator[0] - - # Init Image - init_image = Image.fromarray(image).convert('RGB') - - if fit: - height = height or pipe.unet.config.sample_size * pipe.vae_scale_factor - width = width or pipe.unet.config.sample_size * pipe.vae_scale_factor - init_image = init_image.resize((width, height)) - else: - width = init_image.width - height = init_image.height + import diffusers + import torch + from PIL import Image, ImageOps + import PIL.Image + + # Mostly copied from `diffusers.StableDiffusionImg2ImgPipeline`, with slight modifications to yield the latents at each step. + class GeneratorPipeline(diffusers.StableDiffusionImg2ImgPipeline): + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + image: Union[torch.FloatTensor, PIL.Image.Image], + strength: float = 0.8, + num_inference_steps: Optional[int] = 50, + guidance_scale: Optional[float] = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: Optional[float] = 0.0, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): + # 1. Check inputs + self.check_inputs(prompt, strength, callback_steps) + + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) + device = self._execution_device + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # 3. Encode input prompt + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + # 4. Preprocess image + image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess(image) + + # 5. set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength, device) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) + + # 6. Prepare latent variables + latents = self.prepare_latents( + image, latent_timestep, batch_size, num_images_per_prompt, text_embeddings.dtype, device, generator + ) + + # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + # 8. Denoising loop + num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + # NOTE: Modified to support disabling CFG + if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: + do_classifier_free_guidance = False + text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): + progress_bar.update() + # NOTE: Modified to yield the latents instead of calling a callback. + yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) + + # 9. Post-processing + image = self.decode_latents(latents) + + # TODO: Add UI to enable this + # 10. Run safety checker + # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) - # Seamless - if seamless_axes == SeamlessAxes.AUTO: - seamless_axes = self.detect_seamless(np.array(init_image) / 255) - _configure_model_padding(pipe.unet, seamless_axes) - _configure_model_padding(pipe.vae, seamless_axes) - - # Inference - with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): - yield from pipe( - prompt=prompt, - image=[init_image] * batch_size, - strength=strength, - num_inference_steps=steps, - guidance_scale=cfg_scale, - negative_prompt=negative_prompt if use_negative_prompt else None, - num_images_per_prompt=1, - eta=0.0, - generator=generator, - output_type="pil", - return_dict=True, - callback=None, - callback_steps=1, - step_preview_mode=step_preview_mode, - cfg_end=optimizations.cfg_end - ) - case Pipeline.STABILITY_SDK: - import stability_sdk.client - import stability_sdk.interfaces.gooseai.generation.generation_pb2 - from PIL import Image, ImageOps - import io - - if key is None: - raise ValueError("DreamStudio key not provided. Enter your key in the add-on preferences.") - client = stability_sdk.client.StabilityInference(key=key, engine=model) - - if seed is None: - seed = random.randrange(0, np.iinfo(np.uint32).max) - - answers = client.generate( - prompt=prompt, - width=width, - height=height, - cfg_scale=cfg_scale, - sampler=scheduler.stability_sdk(), - steps=steps, - seed=seed, - init_image=(Image.open(image) if isinstance(image, str) else Image.fromarray(image)).convert('RGB'), - start_schedule=strength, + image = self.image_processor.postprocess(image, output_type=output_type) + + # Offload last model to CPU + if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: + self.final_offload_hook.offload() + + # NOTE: Modified to yield the decoded image as a numpy array. + yield ImageGenerationResult( + [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. + for i, image in enumerate(image)], + [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], + num_inference_steps, + True ) - for answer in answers: - for artifact in answer.artifacts: - if artifact.finish_reason == stability_sdk.interfaces.gooseai.generation.generation_pb2.FILTER: - raise ValueError("Your request activated DreamStudio's safety filter. Please modify your prompt and try again.") - if artifact.type == stability_sdk.interfaces.gooseai.generation.generation_pb2.ARTIFACT_IMAGE: - image = Image.open(io.BytesIO(artifact.binary)) - yield ImageGenerationResult( - [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.], - [seed], - steps, - True - ) - case _: - raise Exception(f"Unsupported pipeline {pipeline}.") \ No newline at end of file + + if optimizations.cpu_only: + device = "cpu" + else: + device = self.choose_device() + + # StableDiffusionPipeline w/ caching + pipe = load_pipe(self, "modify", GeneratorPipeline, model, optimizations, scheduler, device) + + # Optimizations + pipe = optimizations.apply(pipe, device) + + # RNG + batch_size = len(prompt) if isinstance(prompt, list) else 1 + generator = [] + for _ in range(batch_size): + gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API + generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) + if batch_size == 1: + # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 + generator = generator[0] + + # Init Image + init_image = Image.fromarray(image).convert('RGB') + + if fit: + height = height or pipe.unet.config.sample_size * pipe.vae_scale_factor + width = width or pipe.unet.config.sample_size * pipe.vae_scale_factor + init_image = init_image.resize((width, height)) + else: + width = init_image.width + height = init_image.height + + # Seamless + if seamless_axes == SeamlessAxes.AUTO: + seamless_axes = self.detect_seamless(np.array(init_image) / 255) + _configure_model_padding(pipe.unet, seamless_axes) + _configure_model_padding(pipe.vae, seamless_axes) + + # Inference + with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): + yield from pipe( + prompt=prompt, + image=[init_image] * batch_size, + strength=strength, + num_inference_steps=steps, + guidance_scale=cfg_scale, + negative_prompt=negative_prompt if use_negative_prompt else None, + num_images_per_prompt=1, + eta=0.0, + generator=generator, + output_type="pil", + return_dict=True, + callback=None, + callback_steps=1, + step_preview_mode=step_preview_mode, + cfg_end=optimizations.cfg_end + ) \ No newline at end of file diff --git a/generator_process/actions/inpaint.py b/generator_process/actions/inpaint.py index 758a7356..b8bfd454 100644 --- a/generator_process/actions/inpaint.py +++ b/generator_process/actions/inpaint.py @@ -4,12 +4,11 @@ from numpy.typing import NDArray import numpy as np import random -from .prompt_to_image import Pipeline, Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe +from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe from .detect_seamless import SeamlessAxes def inpaint( self, - pipeline: Pipeline, model: str, @@ -45,257 +44,213 @@ def inpaint( **kwargs ) -> Generator[NDArray, None, None]: - match pipeline: - case Pipeline.STABLE_DIFFUSION: - import diffusers - import torch - from PIL import Image, ImageOps - import PIL.Image - - # Mostly copied from `diffusers.StableDiffusionInpaintPipeline`, with slight modifications to yield the latents at each step. - class GeneratorPipeline(diffusers.StableDiffusionInpaintPipeline): - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - image: Union[torch.FloatTensor, PIL.Image.Image], - mask_image: Union[torch.FloatTensor, PIL.Image.Image], - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: Optional[int] = 1, - eta: float = 0.0, - generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, - latents: Optional[torch.FloatTensor] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, - callback_steps: Optional[int] = 1, - **kwargs, - ): - # 0. Default height and width to unet - height = height or self.unet.config.sample_size * self.vae_scale_factor - width = width or self.unet.config.sample_size * self.vae_scale_factor - - # 1. Check inputs - self.check_inputs(prompt, height, width, callback_steps) - - # 2. Define call parameters - batch_size = 1 if isinstance(prompt, str) else len(prompt) - device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # 3. Encode input prompt - text_embeddings = self._encode_prompt( - prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt - ) - - # 4. Preprocess mask and image - mask, masked_image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.prepare_mask_and_masked_image(image, mask_image) - - # 5. set timesteps - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = self.scheduler.timesteps - - # 6. Prepare latent variables - num_channels_latents = self.vae.config.latent_channels - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - text_embeddings.dtype, - device, - generator, - latents, - ) - - # 7. Prepare mask latent variables - mask, masked_image_latents = self.prepare_mask_latents( - mask, - masked_image, - batch_size * num_images_per_prompt, - height, - width, - text_embeddings.dtype, - device, - generator, - do_classifier_free_guidance, - ) - - # 8. Check that sizes of mask, masked image and latents match - num_channels_mask = mask.shape[1] - num_channels_masked_image = masked_image_latents.shape[1] - if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: - raise ValueError( - f"Select an inpainting model, such as 'stabilityai/stable-diffusion-2-inpainting'" - ) - - # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - - # 10. Denoising loop - num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order - with self.progress_bar(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # NOTE: Modified to support disabling CFG - if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: - do_classifier_free_guidance = False - text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] - mask = mask[mask.size(0) // 2:] - masked_image_latents = masked_image_latents[masked_image_latents.size(0) // 2:] - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - - # concat latents, mask, masked_image_latents in the channel dimension - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1) - - # predict the noise residual - noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample - - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): - progress_bar.update() - # NOTE: Modified to yield the latents instead of calling a callback. - yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) - - # 11. Post-processing - image = self.decode_latents(latents) - - # TODO: Add UI to enable this - # 10. Run safety checker - # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) - - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() - - # NOTE: Modified to yield the decoded image as a numpy array. - yield ImageGenerationResult( - [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. - for i, image in enumerate(self.numpy_to_pil(image))], - [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], - num_inference_steps, - True - ) - - if optimizations.cpu_only: - device = "cpu" - else: - device = self.choose_device() - - # StableDiffusionPipeline w/ caching - pipe = load_pipe(self, "inpaint", GeneratorPipeline, model, optimizations, scheduler, device) - - # Optimizations - pipe = optimizations.apply(pipe, device) - - # RNG - batch_size = len(prompt) if isinstance(prompt, list) else 1 - generator = [] - for _ in range(batch_size): - gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API - generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) - if batch_size == 1: - # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 - generator = generator[0] - - # Init Image - init_image = Image.fromarray(image) - - # Seamless - if seamless_axes == SeamlessAxes.AUTO: - seamless_axes = self.detect_seamless(np.array(init_image) / 255) - _configure_model_padding(pipe.unet, seamless_axes) - _configure_model_padding(pipe.vae, seamless_axes) - - # Inference - with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): - match inpaint_mask_src: - case 'alpha': - mask_image = ImageOps.invert(init_image.getchannel('A')) - case 'prompt': - from transformers import AutoProcessor, CLIPSegForImageSegmentation - - processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") - clipseg = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined") - inputs = processor(text=[text_mask], images=[init_image.convert('RGB')], return_tensors="pt", padding=True) - outputs = clipseg(**inputs) - mask_image = Image.fromarray(np.uint8((1 - torch.sigmoid(outputs.logits).lt(text_mask_confidence).int().detach().numpy()) * 255), 'L').resize(init_image.size) - - yield from pipe( - prompt=prompt, - image=[init_image.convert('RGB')] * batch_size, - mask_image=[mask_image] * batch_size, - strength=strength, - height=init_image.size[1] if fit else height, - width=init_image.size[0] if fit else width, - num_inference_steps=steps, - guidance_scale=cfg_scale, - negative_prompt=negative_prompt if use_negative_prompt else None, - num_images_per_prompt=1, - eta=0.0, - generator=generator, - latents=None, - output_type="pil", - return_dict=True, - callback=None, - callback_steps=1, - step_preview_mode=step_preview_mode, - cfg_end=optimizations.cfg_end + import diffusers + import torch + from PIL import Image, ImageOps + import PIL.Image + + # Mostly copied from `diffusers.StableDiffusionInpaintPipeline`, with slight modifications to yield the latents at each step. + class GeneratorPipeline(diffusers.StableDiffusionInpaintPipeline): + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + image: Union[torch.FloatTensor, PIL.Image.Image], + mask_image: Union[torch.FloatTensor, PIL.Image.Image], + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): + # 0. Default height and width to unet + height = height or self.unet.config.sample_size * self.vae_scale_factor + width = width or self.unet.config.sample_size * self.vae_scale_factor + + # 1. Check inputs + self.check_inputs(prompt, height, width, callback_steps) + + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) + device = self._execution_device + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # 3. Encode input prompt + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + # 4. Preprocess mask and image + mask, masked_image = diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.prepare_mask_and_masked_image(image, mask_image) + + # 5. set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.scheduler.timesteps + + # 6. Prepare latent variables + num_channels_latents = self.vae.config.latent_channels + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, + ) + + # 7. Prepare mask latent variables + mask, masked_image_latents = self.prepare_mask_latents( + mask, + masked_image, + batch_size * num_images_per_prompt, + height, + width, + text_embeddings.dtype, + device, + generator, + do_classifier_free_guidance, + ) + + # 8. Check that sizes of mask, masked image and latents match + num_channels_mask = mask.shape[1] + num_channels_masked_image = masked_image_latents.shape[1] + if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels: + raise ValueError( + f"Select an inpainting model, such as 'stabilityai/stable-diffusion-2-inpainting'" ) - case Pipeline.STABILITY_SDK: - import stability_sdk.client - import stability_sdk.interfaces.gooseai.generation.generation_pb2 - from PIL import Image, ImageOps - import io - - if key is None: - raise ValueError("DreamStudio key not provided. Enter your key in the add-on preferences.") - client = stability_sdk.client.StabilityInference(key=key, engine=model) - - if seed is None: - seed = random.randrange(0, np.iinfo(np.uint32).max) - - init_image = Image.open(image) if isinstance(image, str) else Image.fromarray(image) - - answers = client.generate( - prompt=prompt, - width=width or 512, - height=height or 512, - cfg_scale=cfg_scale, - sampler=scheduler.stability_sdk(), - steps=steps, - seed=seed, - init_image=init_image.convert('RGB'), - mask_image=init_image.getchannel('A'), - start_schedule=strength, + + # 9. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + # 10. Denoising loop + num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + # NOTE: Modified to support disabling CFG + if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: + do_classifier_free_guidance = False + text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] + mask = mask[mask.size(0) // 2:] + masked_image_latents = masked_image_latents[masked_image_latents.size(0) // 2:] + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + + # concat latents, mask, masked_image_latents in the channel dimension + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + latent_model_input = torch.cat([latent_model_input, mask, masked_image_latents], dim=1) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0): + progress_bar.update() + # NOTE: Modified to yield the latents instead of calling a callback. + yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) + + # 11. Post-processing + image = self.decode_latents(latents) + + # TODO: Add UI to enable this + # 10. Run safety checker + # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + + # Offload last model to CPU + if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: + self.final_offload_hook.offload() + + # NOTE: Modified to yield the decoded image as a numpy array. + yield ImageGenerationResult( + [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. + for i, image in enumerate(self.numpy_to_pil(image))], + [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], + num_inference_steps, + True ) - for answer in answers: - for artifact in answer.artifacts: - if artifact.finish_reason == stability_sdk.interfaces.gooseai.generation.generation_pb2.FILTER: - raise ValueError("Your request activated DreamStudio's safety filter. Please modify your prompt and try again.") - if artifact.type == stability_sdk.interfaces.gooseai.generation.generation_pb2.ARTIFACT_IMAGE: - image = Image.open(io.BytesIO(artifact.binary)) - yield ImageGenerationResult( - [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.], - [seed], - steps, - True - ) - - case _: - raise Exception(f"Unsupported pipeline {pipeline}.") \ No newline at end of file + + if optimizations.cpu_only: + device = "cpu" + else: + device = self.choose_device() + + # StableDiffusionPipeline w/ caching + pipe = load_pipe(self, "inpaint", GeneratorPipeline, model, optimizations, scheduler, device) + + # Optimizations + pipe = optimizations.apply(pipe, device) + + # RNG + batch_size = len(prompt) if isinstance(prompt, list) else 1 + generator = [] + for _ in range(batch_size): + gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API + generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) + if batch_size == 1: + # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 + generator = generator[0] + + # Init Image + init_image = Image.fromarray(image) + + # Seamless + if seamless_axes == SeamlessAxes.AUTO: + seamless_axes = self.detect_seamless(np.array(init_image) / 255) + _configure_model_padding(pipe.unet, seamless_axes) + _configure_model_padding(pipe.vae, seamless_axes) + + # Inference + with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): + match inpaint_mask_src: + case 'alpha': + mask_image = ImageOps.invert(init_image.getchannel('A')) + case 'prompt': + from transformers import AutoProcessor, CLIPSegForImageSegmentation + + processor = AutoProcessor.from_pretrained("CIDAS/clipseg-rd64-refined") + clipseg = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined") + inputs = processor(text=[text_mask], images=[init_image.convert('RGB')], return_tensors="pt", padding=True) + outputs = clipseg(**inputs) + mask_image = Image.fromarray(np.uint8((1 - torch.sigmoid(outputs.logits).lt(text_mask_confidence).int().detach().numpy()) * 255), 'L').resize(init_image.size) + + yield from pipe( + prompt=prompt, + image=[init_image.convert('RGB')] * batch_size, + mask_image=[mask_image] * batch_size, + strength=strength, + height=init_image.size[1] if fit else height, + width=init_image.size[0] if fit else width, + num_inference_steps=steps, + guidance_scale=cfg_scale, + negative_prompt=negative_prompt if use_negative_prompt else None, + num_images_per_prompt=1, + eta=0.0, + generator=generator, + latents=None, + output_type="pil", + return_dict=True, + callback=None, + callback_steps=1, + step_preview_mode=step_preview_mode, + cfg_end=optimizations.cfg_end + ) \ No newline at end of file diff --git a/generator_process/actions/prompt_to_image.py b/generator_process/actions/prompt_to_image.py index 8a9e845d..36d0c0da 100644 --- a/generator_process/actions/prompt_to_image.py +++ b/generator_process/actions/prompt_to_image.py @@ -13,8 +13,6 @@ from .detect_seamless import SeamlessAxes from ..models.upscale_tiler import tiled_decode_latents -from ..models import Pipeline - class CachedPipeline: """A pipeline that has been cached for subsequent runs.""" @@ -166,9 +164,10 @@ class Optimizations: @staticmethod def infer_device() -> str: + from ...absolute_path import absolute_path if sys.platform == "darwin": return "mps" - elif Pipeline.directml_available(): + elif os.path.exists(absolute_path(".python_dependencies/torch_directml")): return "privateuseone" else: return "cuda" @@ -373,11 +372,12 @@ def choose_device(self) -> str: Automatically select which PyTorch device to use. """ import torch + from ...absolute_path import absolute_path if torch.cuda.is_available(): return "cuda" elif torch.backends.mps.is_available(): return "mps" - if Pipeline.directml_available(): + if os.path.exists(absolute_path(".python_dependencies/torch_directml")): import torch_directml if torch_directml.is_available(): # can be named better when torch.utils.rename_privateuse1_backend() is released @@ -444,7 +444,6 @@ def model_snapshot_folder(model, preferred_revision: str | None = None): def prompt_to_image( self, - pipeline: Pipeline, model: str, @@ -473,199 +472,161 @@ def prompt_to_image( **kwargs ) -> Generator[ImageGenerationResult, None, None]: - match pipeline: - case Pipeline.STABLE_DIFFUSION: - import diffusers - import torch - from PIL import Image, ImageOps - - # Mostly copied from `diffusers.StableDiffusionPipeline`, with slight modifications to yield the latents at each step. - class GeneratorPipeline(diffusers.StableDiffusionPipeline): - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - height: Optional[int] = None, - width: Optional[int] = None, - num_inference_steps: int = 50, - guidance_scale: float = 7.5, - negative_prompt: Optional[Union[str, List[str]]] = None, - num_images_per_prompt: Optional[int] = 1, - eta: float = 0.0, - generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, - latents: Optional[torch.FloatTensor] = None, - output_type: Optional[str] = "pil", - return_dict: bool = True, - callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, - callback_steps: Optional[int] = 1, - **kwargs, - ): - # 0. Default height and width to unet - height = height or self.unet.config.sample_size * self.vae_scale_factor - width = width or self.unet.config.sample_size * self.vae_scale_factor - - # 1. Check inputs. Raise error if not correct - self.check_inputs(prompt, height, width, callback_steps) - - # 2. Define call parameters - batch_size = 1 if isinstance(prompt, str) else len(prompt) - device = self._execution_device - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - # 3. Encode input prompt - text_embeddings = self._encode_prompt( - prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt - ) - - # 4. Prepare timesteps - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = self.scheduler.timesteps - - # 5. Prepare latent variables - num_channels_latents = self.unet.in_channels - latents = self.prepare_latents( - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - text_embeddings.dtype, - device, - generator, - latents, - ) - - # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline - extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) - - # 7. Denoising loop - for i, t in enumerate(self.progress_bar(timesteps)): - # NOTE: Modified to support disabling CFG - if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: - do_classifier_free_guidance = False - text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] - - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - - # predict the noise residual - noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - - # compute the previous noisy sample x_t -> x_t-1 - latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample - - # NOTE: Modified to yield the latents instead of calling a callback. - yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) - - # 8. Post-processing - image = self.decode_latents(latents) - - # TODO: Add UI to enable this. - # 9. Run safety checker - # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) - - # Offload last model to CPU - if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: - self.final_offload_hook.offload() - - # NOTE: Modified to yield the decoded image as a numpy array. - yield ImageGenerationResult( - [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. - for i, image in enumerate(self.numpy_to_pil(image))], - [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], - num_inference_steps, - True - ) - - if optimizations.cpu_only: - device = "cpu" - else: - device = self.choose_device() - - # StableDiffusionPipeline w/ caching - pipe = load_pipe(self, "prompt", GeneratorPipeline, model, optimizations, scheduler, device) - - # Optimizations - pipe = optimizations.apply(pipe, device) - - # RNG - batch_size = len(prompt) if isinstance(prompt, list) else 1 - generator = [] - for _ in range(batch_size): - gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API - generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) - if batch_size == 1: - # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 - generator = generator[0] - - # Seamless - _configure_model_padding(pipe.unet, seamless_axes) - _configure_model_padding(pipe.vae, seamless_axes) - - # Inference - with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): - yield from pipe( - prompt=prompt, - height=height, - width=width, - num_inference_steps=steps, - guidance_scale=cfg_scale, - negative_prompt=negative_prompt if use_negative_prompt else None, - num_images_per_prompt=1, - eta=0.0, - generator=generator, - latents=None, - output_type="pil", - return_dict=True, - callback=None, - callback_steps=1, - step_preview_mode=step_preview_mode, - cfg_end=optimizations.cfg_end - ) - case Pipeline.STABILITY_SDK: - import stability_sdk.client - import stability_sdk.interfaces.gooseai.generation.generation_pb2 - from PIL import Image, ImageOps - import io - - if key is None: - raise ValueError("DreamStudio key not provided. Enter your key in the add-on preferences.") - client = stability_sdk.client.StabilityInference(key=key, engine=model) - - if seed is None: - seed = random.randrange(0, np.iinfo(np.uint32).max) - - answers = client.generate( - prompt=prompt, - width=width or 512, - height=height or 512, - cfg_scale=cfg_scale, - sampler=scheduler.stability_sdk(), - steps=steps, - seed=seed + import diffusers + import torch + from PIL import Image, ImageOps + + # Mostly copied from `diffusers.StableDiffusionPipeline`, with slight modifications to yield the latents at each step. + class GeneratorPipeline(diffusers.StableDiffusionPipeline): + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + latents: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: Optional[int] = 1, + **kwargs, + ): + # 0. Default height and width to unet + height = height or self.unet.config.sample_size * self.vae_scale_factor + width = width or self.unet.config.sample_size * self.vae_scale_factor + + # 1. Check inputs. Raise error if not correct + self.check_inputs(prompt, height, width, callback_steps) + + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) + device = self._execution_device + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # 3. Encode input prompt + text_embeddings = self._encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt + ) + + # 4. Prepare timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.scheduler.timesteps + + # 5. Prepare latent variables + num_channels_latents = self.unet.in_channels + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, + ) + + # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + # 7. Denoising loop + for i, t in enumerate(self.progress_bar(timesteps)): + # NOTE: Modified to support disabling CFG + if do_classifier_free_guidance and (i / len(timesteps)) >= kwargs['cfg_end']: + do_classifier_free_guidance = False + text_embeddings = text_embeddings[text_embeddings.size(0) // 2:] + + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs).prev_sample + + # NOTE: Modified to yield the latents instead of calling a callback. + yield ImageGenerationResult.step_preview(self, kwargs['step_preview_mode'], width, height, latents, generator, i) + + # 8. Post-processing + image = self.decode_latents(latents) + + # TODO: Add UI to enable this. + # 9. Run safety checker + # image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype) + + # Offload last model to CPU + if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: + self.final_offload_hook.offload() + + # NOTE: Modified to yield the decoded image as a numpy array. + yield ImageGenerationResult( + [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. + for i, image in enumerate(self.numpy_to_pil(image))], + [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()], + num_inference_steps, + True ) - for answer in answers: - for artifact in answer.artifacts: - if artifact.finish_reason == stability_sdk.interfaces.gooseai.generation.generation_pb2.FILTER: - raise ValueError("Your request activated DreamStudio's safety filter. Please modify your prompt and try again.") - if artifact.type == stability_sdk.interfaces.gooseai.generation.generation_pb2.ARTIFACT_IMAGE: - image = Image.open(io.BytesIO(artifact.binary)) - yield ImageGenerationResult( - [np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255.], - [seed], - steps, - True - ) - case _: - raise Exception(f"Unsupported pipeline {pipeline}.") + + if optimizations.cpu_only: + device = "cpu" + else: + device = self.choose_device() + + # StableDiffusionPipeline w/ caching + pipe = load_pipe(self, "prompt", GeneratorPipeline, model, optimizations, scheduler, device) + + # Optimizations + pipe = optimizations.apply(pipe, device) + + # RNG + batch_size = len(prompt) if isinstance(prompt, list) else 1 + generator = [] + for _ in range(batch_size): + gen = torch.Generator(device="cpu" if device in ("mps", "privateuseone") else device) # MPS and DML do not support the `Generator` API + generator.append(gen.manual_seed(random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed)) + if batch_size == 1: + # Some schedulers don't handle a list of generators: https://github.com/huggingface/diffusers/issues/1909 + generator = generator[0] + + # Seamless + _configure_model_padding(pipe.unet, seamless_axes) + _configure_model_padding(pipe.vae, seamless_axes) + + # Inference + with torch.inference_mode() if device not in ('mps', "privateuseone") else nullcontext(): + yield from pipe( + prompt=prompt, + height=height, + width=width, + num_inference_steps=steps, + guidance_scale=cfg_scale, + negative_prompt=negative_prompt if use_negative_prompt else None, + num_images_per_prompt=1, + eta=0.0, + generator=generator, + latents=None, + output_type="pil", + return_dict=True, + callback=None, + callback_steps=1, + step_preview_mode=step_preview_mode, + cfg_end=optimizations.cfg_end + ) def _conv_forward_asymmetric(self, input, weight, bias): import torch.nn as nn diff --git a/generator_process/models/__init__.py b/generator_process/models/__init__.py index 36631dd9..6b9245f3 100644 --- a/generator_process/models/__init__.py +++ b/generator_process/models/__init__.py @@ -1,2 +1 @@ -from .pipeline import * from .fix_it_error import * \ No newline at end of file diff --git a/generator_process/models/pipeline.py b/generator_process/models/pipeline.py deleted file mode 100644 index 59474112..00000000 --- a/generator_process/models/pipeline.py +++ /dev/null @@ -1,72 +0,0 @@ -import enum -import os - -class Pipeline(enum.IntEnum): - STABLE_DIFFUSION = 0 - - STABILITY_SDK = 1 - - @staticmethod - def local_available(): - from ...absolute_path import absolute_path - return os.path.exists(absolute_path(".python_dependencies/diffusers")) - - @staticmethod - def directml_available(): - from ...absolute_path import absolute_path - return os.path.exists(absolute_path(".python_dependencies/torch_directml")) - - def __str__(self): - return self.name - - def model(self): - return True - - def init_img_actions(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return ['modify', 'inpaint', 'outpaint'] - case Pipeline.STABILITY_SDK: - return ['modify', 'inpaint'] - - def inpaint_mask_sources(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return ['alpha', 'prompt'] - case Pipeline.STABILITY_SDK: - return ['alpha'] - - def color_correction(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return True - case Pipeline.STABILITY_SDK: - return False - - def negative_prompts(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return True - case Pipeline.STABILITY_SDK: - return False - - def seamless(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return True - case Pipeline.STABILITY_SDK: - return False - - def upscaling(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return True - case Pipeline.STABILITY_SDK: - return False - - def depth(self): - match self: - case Pipeline.STABLE_DIFFUSION: - return True - case Pipeline.STABILITY_SDK: - return False \ No newline at end of file diff --git a/operators/dream_texture.py b/operators/dream_texture.py index 324e4fe5..db8caee3 100644 --- a/operators/dream_texture.py +++ b/operators/dream_texture.py @@ -8,7 +8,7 @@ from ..pil_to_image import * from ..prompt_engineering import * from ..generator_process import Generator -from ..generator_process.actions.prompt_to_image import ImageGenerationResult, Pipeline +from ..generator_process.actions.prompt_to_image import ImageGenerationResult from ..generator_process.actions.huggingface_hub import ModelType import time diff --git a/operators/project.py b/operators/project.py index d5a7292d..b22476b4 100644 --- a/operators/project.py +++ b/operators/project.py @@ -8,7 +8,7 @@ import numpy as np from .view_history import ImportPromptFile -from ..property_groups.dream_prompt import pipeline_options +from ..property_groups.dream_prompt import backend_options from .open_latest_version import OpenLatestVersion, is_force_show_download, new_version_available from ..ui.panels.dream_texture import advanced_panel, create_panel, prompt_panel, size_panel @@ -17,7 +17,7 @@ from ..preferences import StableDiffusionPreferences from ..generator_process import Generator -from ..generator_process.models import Pipeline, FixItError +from ..generator_process.models import FixItError from ..generator_process.actions.huggingface_hub import ModelType import tempfile @@ -96,9 +96,8 @@ def draw(self, context): elif new_version_available(): layout.operator(OpenLatestVersion.bl_idname, icon="IMPORT") - layout.prop(context.scene.dream_textures_project_prompt, "pipeline") - if Pipeline[context.scene.dream_textures_project_prompt.pipeline].model(): - layout.prop(context.scene.dream_textures_project_prompt, 'model') + layout.prop(context.scene.dream_textures_project_prompt, "backend") + layout.prop(context.scene.dream_textures_project_prompt, 'model') yield DREAM_PT_dream_panel_projection diff --git a/preferences.py b/preferences.py index 8529a490..dec468a6 100644 --- a/preferences.py +++ b/preferences.py @@ -11,7 +11,6 @@ from .operators.open_latest_version import OpenLatestVersion from .ui.presets import RestoreDefaultPresets, default_presets_missing from .generator_process import Generator -from .generator_process.actions.prompt_to_image import Pipeline from .generator_process.actions.huggingface_hub import DownloadStatus, ModelType from .generator_process.actions.convert_original_stable_diffusion_to_diffusers import ModelConfig @@ -185,10 +184,9 @@ class StableDiffusionPreferences(bpy.types.AddonPreferences): @staticmethod def register(): - if Pipeline.local_available(): - def on_done(future): - set_model_list('installed_models', future.result()) - Generator.shared().hf_list_installed_models().add_done_callback(on_done) + def on_done(future): + set_model_list('installed_models', future.result()) + Generator.shared().hf_list_installed_models().add_done_callback(on_done) def draw(self, context): layout = self.layout @@ -200,62 +198,52 @@ def draw(self, context): has_dependencies = len(os.listdir(absolute_path(".python_dependencies"))) > 2 if has_dependencies: - has_local = Pipeline.local_available() - - if has_local: - if not _template_model_download_progress(context, layout): - conflicting_packages = ["wandb", "k_diffusion"] - conflicting_package_specs = {} + if not _template_model_download_progress(context, layout): + conflicting_packages = ["wandb", "k_diffusion"] + conflicting_package_specs = {} + for package in conflicting_packages: + spec = importlib.util.find_spec(package) + if spec is not None: + conflicting_package_specs[package] = spec + if len(conflicting_package_specs) > 0: + conflicts_box = layout.box() + conflicts_box.label(text="WARNING", icon="ERROR") + conflicts_box.label(text=f"The following packages conflict with Dream Textures: {', '.join(conflicting_packages)}") + conflicts_box.label(text=f"You may need to run Blender as an administrator to remove these packages") + conflicts_box.operator(UninstallDependencies.bl_idname, text="Uninstall Conflicting Packages", icon="CANCEL").conflicts = ' '.join(conflicting_packages) + conflicts_box.label(text=f"If the button above fails, you can remove the following folders manually:") for package in conflicting_packages: - spec = importlib.util.find_spec(package) - if spec is not None: - conflicting_package_specs[package] = spec - if len(conflicting_package_specs) > 0: - conflicts_box = layout.box() - conflicts_box.label(text="WARNING", icon="ERROR") - conflicts_box.label(text=f"The following packages conflict with Dream Textures: {', '.join(conflicting_packages)}") - conflicts_box.label(text=f"You may need to run Blender as an administrator to remove these packages") - conflicts_box.operator(UninstallDependencies.bl_idname, text="Uninstall Conflicting Packages", icon="CANCEL").conflicts = ' '.join(conflicting_packages) - conflicts_box.label(text=f"If the button above fails, you can remove the following folders manually:") - for package in conflicting_packages: - if package not in conflicting_package_specs: - continue - location = conflicting_package_specs[package].submodule_search_locations[0] - conflicts_box.operator(OpenURL.bl_idname, text=f"Open '{location}'").url = f"file://{location}" - - if not weights_installed: - default_weights_box = layout.box() - default_weights_box.label(text="You need to download at least one model.") - install_model = default_weights_box.operator(InstallModel.bl_idname, text="Download Stable Diffusion v2.1 (Recommended)", icon="IMPORT") - install_model.model = "stabilityai/stable-diffusion-2-1" - install_model.prefer_fp16_revision = self.prefer_fp16_revision - - search_box = layout.box() - search_box.label(text="Find Models", icon="SETTINGS") - search_box.label(text="Search Hugging Face Hub for more compatible models.") - - search_box.prop(self, "model_query", text="", icon="VIEWZOOM") - - if len(self.model_results) > 0: - search_box.template_list(PREFERENCES_UL_ModelList.__name__, "dream_textures_model_results", self, "model_results", self, "active_model_result") - - search_box.label(text="Some models require authentication. Provide a token to download gated models.") - - auth_row = search_box.row() - auth_row.prop(self, "hf_token", text="Token") - auth_row.operator(OpenURL.bl_idname, text="Get Your Token", icon="KEYINGSET").url = "https://huggingface.co/settings/tokens" - - search_box.prop(self, "prefer_fp16_revision") - - layout.template_list(PREFERENCES_UL_ModelList.__name__, "dream_textures_installed_models", self, "installed_models", self, "active_installed_model") - layout.operator(ImportWeights.bl_idname, icon='IMPORT') - - dream_studio_box = layout.box() - dream_studio_box.label(text=f"DreamStudio{' (Optional)' if has_local else ''}", icon="HIDE_OFF") - dream_studio_box.label(text=f"Link to your DreamStudio account to run in the cloud{' instead of locally.' if has_local else '.'}") - key_row = dream_studio_box.row() - key_row.prop(self, "dream_studio_key", text="Key") - key_row.operator(OpenURL.bl_idname, text="Find Your Key", icon="KEYINGSET").url = "https://beta.dreamstudio.ai/membership?tab=apiKeys" + if package not in conflicting_package_specs: + continue + location = conflicting_package_specs[package].submodule_search_locations[0] + conflicts_box.operator(OpenURL.bl_idname, text=f"Open '{location}'").url = f"file://{location}" + + if not weights_installed: + default_weights_box = layout.box() + default_weights_box.label(text="You need to download at least one model.") + install_model = default_weights_box.operator(InstallModel.bl_idname, text="Download Stable Diffusion v2.1 (Recommended)", icon="IMPORT") + install_model.model = "stabilityai/stable-diffusion-2-1" + install_model.prefer_fp16_revision = self.prefer_fp16_revision + + search_box = layout.box() + search_box.label(text="Find Models", icon="SETTINGS") + search_box.label(text="Search Hugging Face Hub for more compatible models.") + + search_box.prop(self, "model_query", text="", icon="VIEWZOOM") + + if len(self.model_results) > 0: + search_box.template_list(PREFERENCES_UL_ModelList.__name__, "dream_textures_model_results", self, "model_results", self, "active_model_result") + + search_box.label(text="Some models require authentication. Provide a token to download gated models.") + + auth_row = search_box.row() + auth_row.prop(self, "hf_token", text="Token") + auth_row.operator(OpenURL.bl_idname, text="Get Your Token", icon="KEYINGSET").url = "https://huggingface.co/settings/tokens" + + search_box.prop(self, "prefer_fp16_revision") + + layout.template_list(PREFERENCES_UL_ModelList.__name__, "dream_textures_installed_models", self, "installed_models", self, "active_installed_model") + layout.operator(ImportWeights.bl_idname, icon='IMPORT') if weights_installed or len(self.dream_studio_key) > 0: complete_box = layout.box() diff --git a/property_groups/dream_prompt.py b/property_groups/dream_prompt.py index 60166d1f..d319255d 100644 --- a/property_groups/dream_prompt.py +++ b/property_groups/dream_prompt.py @@ -5,7 +5,7 @@ from typing import _AnnotatedAlias from ..generator_process.actions.detect_seamless import SeamlessAxes -from ..generator_process.actions.prompt_to_image import Optimizations, Scheduler, StepPreviewMode, Pipeline +from ..generator_process.actions.prompt_to_image import Optimizations, Scheduler, StepPreviewMode from ..generator_process.actions.huggingface_hub import ModelType from ..prompt_engineering import * from ..preferences import StableDiffusionPreferences @@ -39,7 +39,7 @@ ] def init_image_actions_filtered(self, context): - available = Pipeline[self.pipeline].init_img_actions() + available = ['modify', 'inpaint', 'outpaint'] return list(filter(lambda x: x[0] in available, init_image_actions)) inpaint_mask_sources = [ @@ -48,7 +48,7 @@ def init_image_actions_filtered(self, context): ] def inpaint_mask_sources_filtered(self, context): - available = Pipeline[self.pipeline].inpaint_mask_sources() + available = ['alpha', 'prompt'] return list(filter(lambda x: x[0] in available, inpaint_mask_sources)) seamless_axes = [ @@ -69,48 +69,35 @@ def modify_action_source_type(self, context): ] def model_options(self, context): - match Pipeline[self.pipeline]: - case Pipeline.STABLE_DIFFUSION: - def model_case(model, i): - return ( - model.model_base, - model.model_base.replace('models--', '').replace('--', '/'), - ModelType[model.model_type].name, - i - ) - models = {} - for i, model in enumerate(context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models): - if model.model_type in {ModelType.CONTROL_NET.name, ModelType.UNKNOWN.name}: - continue - if model.model_type not in models: - models[model.model_type] = [model_case(model, i)] - else: - models[model.model_type].append(model_case(model, i)) - return reduce( - lambda a, b: a + [None] + sorted(b, key=lambda m: m[0]), - [ - models[group] - for group in sorted(models.keys()) - ], - [] - ) - case Pipeline.STABILITY_SDK: - return [ - ("stable-diffusion-v1", "Stable Diffusion v1.4", ModelType.PROMPT_TO_IMAGE.name), - ("stable-diffusion-v1-5", "Stable Diffusion v1.5", ModelType.PROMPT_TO_IMAGE.name), - ("stable-diffusion-512-v2-0", "Stable Diffusion v2.0", ModelType.PROMPT_TO_IMAGE.name), - ("stable-diffusion-768-v2-0", "Stable Diffusion v2.0-768", ModelType.PROMPT_TO_IMAGE.name), - ("stable-diffusion-512-v2-1", "Stable Diffusion v2.1", ModelType.PROMPT_TO_IMAGE.name), - ("stable-diffusion-768-v2-1", "Stable Diffusion v2.1-768", ModelType.PROMPT_TO_IMAGE.name), - None, - ("stable-inpainting-v1-0", "Stable Inpainting v1.0", ModelType.INPAINTING.name), - ("stable-inpainting-512-v2-0", "Stable Inpainting v2.0", ModelType.INPAINTING.name), - ] - -def pipeline_options(self, context): + def model_case(model, i): + return ( + model.model_base, + model.model_base.replace('models--', '').replace('--', '/'), + ModelType[model.model_type].name, + i + ) + models = {} + for i, model in enumerate(context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models): + if model.model_type in {ModelType.CONTROL_NET.name, ModelType.UNKNOWN.name}: + continue + if model.model_type not in models: + models[model.model_type] = [model_case(model, i)] + else: + models[model.model_type].append(model_case(model, i)) + return reduce( + lambda a, b: a + [None] + sorted(b, key=lambda m: m[0]), + [ + models[group] + for group in sorted(models.keys()) + ], + [] + ) + +def backend_options(self, context): + from .. import api return [ - (Pipeline.STABLE_DIFFUSION.name, 'Stable Diffusion', 'Stable Diffusion on your own hardware', 1), - (Pipeline.STABILITY_SDK.name, 'DreamStudio', 'Cloud compute via DreamStudio', 2), + (f"{backend.__module__}.{backend.__name__}", backend.name if hasattr(backend, "name") else backend.__name__, backend.description if hasattr(backend, "description") else "") + for backend in api.Backend.__subclasses__() ] def seed_clamp(self, ctx): @@ -123,7 +110,7 @@ def seed_clamp(self, ctx): pass # will get hashed once generated attributes = { - "pipeline": EnumProperty(name="Pipeline", items=pipeline_options, default=1 if Pipeline.local_available() else 2, description="Specify which model and target should be used."), + "backend": EnumProperty(name="Backend", items=backend_options, default=1, description="Specify which generation backend to use"), "model": EnumProperty(name="Model", items=model_options, description="Specify which model to use for inference"), "control_nets": CollectionProperty(type=ControlNet), @@ -299,7 +286,6 @@ def generate_args(self): args['optimizations'] = self.get_optimizations() args['scheduler'] = Scheduler(args['scheduler']) args['step_preview_mode'] = StepPreviewMode(args['step_preview_mode']) - args['pipeline'] = Pipeline[args['pipeline']] args['outpaint_origin'] = (args['outpaint_origin'][0], args['outpaint_origin'][1]) args['key'] = bpy.context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.dream_studio_key args['seamless_axes'] = SeamlessAxes(args['seamless_axes']) diff --git a/property_groups/dream_prompt_validation.py b/property_groups/dream_prompt_validation.py index 15a5b633..69da4857 100644 --- a/property_groups/dream_prompt_validation.py +++ b/property_groups/dream_prompt_validation.py @@ -1,5 +1,5 @@ from ..preferences import StableDiffusionPreferences, _template_model_download_progress, InstallModel -from ..generator_process.models import Pipeline, FixItError +from ..generator_process.models import FixItError from ..generator_process.actions.huggingface_hub import ModelType from ..preferences import OpenURL @@ -19,73 +19,73 @@ def validate(self, context, task: ModelType | None = None) -> bool: task = ModelType.PROMPT_TO_IMAGE # Check if the pipeline supports the task. - pipeline = Pipeline[self.pipeline] - match task: - case ModelType.DEPTH: - if not pipeline.depth(): - raise FixItError( - f"""The selected pipeline does not support {task.name.replace('_', ' ').lower()} tasks. -Select a different pipeline below.""", - lambda _, layout: layout.prop(self, "pipeline") - ) +# pipeline = Pipeline[self.pipeline] +# match task: +# case ModelType.DEPTH: +# if not pipeline.depth(): +# raise FixItError( +# f"""The selected pipeline does not support {task.name.replace('_', ' ').lower()} tasks. +# Select a different pipeline below.""", +# lambda _, layout: layout.prop(self, "backend") +# ) - # Pipeline-specific checks - match pipeline: - case Pipeline.STABLE_DIFFUSION: - if not Pipeline.local_available(): - raise FixItError( - "Local generation is not available for the variant of the add-on you have installed. Choose a different Pipeline such as 'DreamStudio'", - lambda _, layout: layout.prop(self, "pipeline") - ) +# # Pipeline-specific checks +# match pipeline: +# case Pipeline.STABLE_DIFFUSION: +# if not Pipeline.local_available(): +# raise FixItError( +# "Local generation is not available for the variant of the add-on you have installed. Choose a different Pipeline such as 'DreamStudio'", +# lambda _, layout: layout.prop(self, "backend") +# ) - installed_models = context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models - model = next((m for m in installed_models if m.model_base == self.model), None) - if model is None: - raise FixItError("No model selected.", lambda _, layout: layout.prop(self, "model")) - else: - if model.model_type != task.name: - def fix_model(context, layout): - layout.prop(self, "model") - if not any(m.model_type == task.name for m in installed_models): - if not _template_model_download_progress(context, layout): - layout.label(text="You do not have any compatible models downloaded:") - install_model = layout.operator(InstallModel.bl_idname, text=f"Download {task.recommended_model()} (Recommended)", icon="IMPORT") - install_model.model = task.recommended_model() - install_model.prefer_fp16_revision = context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.prefer_fp16_revision - raise FixItError( - f"""Incorrect model type selected for {task.name.replace('_', ' ').lower()} tasks. -The selected model is for {model.model_type.replace('_', ' ').lower()}. -Select a different model below.""", - fix_model - ) - case Pipeline.STABILITY_SDK: - if len(context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.dream_studio_key) <= 0: - raise FixItError( - f"""No DreamStudio key entered. -Enter your API key below{', or change the pipeline' if Pipeline.local_available() else ''}.""", - lambda ctx, layout: layout.prop(ctx.preferences.addons[StableDiffusionPreferences.bl_idname].preferences, "dream_studio_key") - ) +# installed_models = context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models +# model = next((m for m in installed_models if m.model_base == self.model), None) +# if model is None: +# raise FixItError("No model selected.", lambda _, layout: layout.prop(self, "model")) +# else: +# if model.model_type != task.name: +# def fix_model(context, layout): +# layout.prop(self, "model") +# if not any(m.model_type == task.name for m in installed_models): +# if not _template_model_download_progress(context, layout): +# layout.label(text="You do not have any compatible models downloaded:") +# install_model = layout.operator(InstallModel.bl_idname, text=f"Download {task.recommended_model()} (Recommended)", icon="IMPORT") +# install_model.model = task.recommended_model() +# install_model.prefer_fp16_revision = context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.prefer_fp16_revision +# raise FixItError( +# f"""Incorrect model type selected for {task.name.replace('_', ' ').lower()} tasks. +# The selected model is for {model.model_type.replace('_', ' ').lower()}. +# Select a different model below.""", +# fix_model +# ) +# case Pipeline.STABILITY_SDK: +# if len(context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.dream_studio_key) <= 0: +# raise FixItError( +# f"""No DreamStudio key entered. +# Enter your API key below{', or change the pipeline' if Pipeline.local_available() else ''}.""", +# lambda ctx, layout: layout.prop(ctx.preferences.addons[StableDiffusionPreferences.bl_idname].preferences, "dream_studio_key") +# ) - init_image = None - if self.use_init_img: - match self.init_img_src: - case 'file': - init_image = context.scene.init_img - case 'open_editor': - for area in context.screen.areas: - if area.type == 'IMAGE_EDITOR': - if area.spaces.active.image is not None: - init_image = area.spaces.active.image - if init_image is not None and init_image.type == 'RENDER_RESULT': - def fix_init_img(ctx, layout): - layout.prop(self, "init_img_src", expand=True) - if self.init_img_src == 'file': - layout.template_ID(context.scene, "init_img", open="image.open") - layout.label(text="Or, enable the render pass to generate after each render.") - layout.operator(OpenURL.bl_idname, text="Learn More", icon="QUESTION").url = "https://github.com/carson-katri/dream-textures/blob/main/docs/RENDER_PASS.md" - raise FixItError("""'Render Result' cannot be used as a source image. -Save the image then open the file to use it as a source image.""", - fix_init_img - ) +# init_image = None +# if self.use_init_img: +# match self.init_img_src: +# case 'file': +# init_image = context.scene.init_img +# case 'open_editor': +# for area in context.screen.areas: +# if area.type == 'IMAGE_EDITOR': +# if area.spaces.active.image is not None: +# init_image = area.spaces.active.image +# if init_image is not None and init_image.type == 'RENDER_RESULT': +# def fix_init_img(ctx, layout): +# layout.prop(self, "init_img_src", expand=True) +# if self.init_img_src == 'file': +# layout.template_ID(context.scene, "init_img", open="image.open") +# layout.label(text="Or, enable the render pass to generate after each render.") +# layout.operator(OpenURL.bl_idname, text="Learn More", icon="QUESTION").url = "https://github.com/carson-katri/dream-textures/blob/main/docs/RENDER_PASS.md" +# raise FixItError("""'Render Result' cannot be used as a source image. +# Save the image then open the file to use it as a source image.""", +# fix_init_img +# ) return True \ No newline at end of file diff --git a/ui/panels/dream_texture.py b/ui/panels/dream_texture.py index 25ed24ab..13d6d3aa 100644 --- a/ui/panels/dream_texture.py +++ b/ui/panels/dream_texture.py @@ -13,10 +13,10 @@ from ...operators.open_latest_version import OpenLatestVersion, is_force_show_download, new_version_available from ...operators.view_history import ImportPromptFile from ..space_types import SPACE_TYPES -from ...property_groups.dream_prompt import DreamPrompt, pipeline_options +from ...property_groups.dream_prompt import DreamPrompt, backend_options from ...generator_process.actions.prompt_to_image import Optimizations from ...generator_process.actions.detect_seamless import SeamlessAxes -from ...generator_process.models import Pipeline, FixItError +from ...generator_process.models import FixItError def dream_texture_panels(): for space_type in SPACE_TYPES: @@ -50,9 +50,8 @@ def draw(self, context): elif new_version_available(): layout.operator(OpenLatestVersion.bl_idname, icon="IMPORT") - layout.prop(context.scene.dream_textures_prompt, "pipeline") - if Pipeline[context.scene.dream_textures_prompt.pipeline].model(): - layout.prop(context.scene.dream_textures_prompt, 'model') + layout.prop(context.scene.dream_textures_prompt, "backend") + layout.prop(context.scene.dream_textures_prompt, 'model') DreamTexturePanel.__name__ = f"DREAM_PT_dream_panel_{space_type}" yield DreamTexturePanel @@ -127,12 +126,12 @@ def draw(self, context): segment_row.prop(prompt, enum_prop, icon_only=is_custom) if prompt.prompt_structure == file_batch_structure.id: layout.template_ID(context.scene, "dream_textures_prompt_file", open="text.open") - if Pipeline[prompt.pipeline].seamless(): - layout.prop(prompt, "seamless_axes") - if prompt.seamless_axes == SeamlessAxes.AUTO and get_seamless_result is not None: - auto_row = self.layout.row() - auto_row.enabled = False - auto_row.prop(get_seamless_result(context, prompt), "result") + + layout.prop(prompt, "seamless_axes") + if prompt.seamless_axes == SeamlessAxes.AUTO and get_seamless_result is not None: + auto_row = self.layout.row() + auto_row.enabled = False + auto_row.prop(get_seamless_result(context, prompt), "result") yield PromptPanel @@ -144,7 +143,7 @@ class NegativePromptPanel(sub_panel): @classmethod def poll(cls, context): - return get_prompt(context).prompt_structure != file_batch_structure.id and Pipeline[get_prompt(context).pipeline].negative_prompts() + return get_prompt(context).prompt_structure != file_batch_structure.id def draw_header(self, context): layout = self.layout @@ -229,8 +228,7 @@ def _outpaint_warning_box(warning): elif prompt.init_img_action == 'modify': layout.prop(prompt, "fit") layout.prop(prompt, "strength") - if Pipeline[prompt.pipeline].color_correction(): - layout.prop(prompt, "use_init_img_color") + layout.prop(prompt, "use_init_img_color") if prompt.init_img_action == 'modify': layout.prop(prompt, "modify_action_source_type") if prompt.modify_action_source_type == 'depth_map': diff --git a/ui/panels/render_properties.py b/ui/panels/render_properties.py index 731f9b69..1826fd20 100644 --- a/ui/panels/render_properties.py +++ b/ui/panels/render_properties.py @@ -1,7 +1,6 @@ import bpy from .dream_texture import create_panel, prompt_panel, advanced_panel -from ...property_groups.dream_prompt import pipeline_options -from ...generator_process.actions.prompt_to_image import Pipeline +from ...property_groups.dream_prompt import backend_options from ...generator_process.actions.huggingface_hub import ModelType from ...preferences import StableDiffusionPreferences @@ -27,10 +26,9 @@ def draw(self, context): layout.use_property_decorate = False layout.active = context.scene.dream_textures_render_properties_enabled - if len(pipeline_options(self, context)) > 1: - layout.prop(context.scene.dream_textures_render_properties_prompt, "pipeline") - if Pipeline[context.scene.dream_textures_render_properties_prompt.pipeline].model(): - layout.prop(context.scene.dream_textures_render_properties_prompt, 'model') + if len(backend_options(self, context)) > 1: + layout.prop(context.scene.dream_textures_render_properties_prompt, "backend") + layout.prop(context.scene.dream_textures_render_properties_prompt, 'model') layout.prop(context.scene.dream_textures_render_properties_prompt, "strength") layout.prop(context.scene, "dream_textures_render_properties_pass_inputs") if context.scene.dream_textures_render_properties_pass_inputs != 'color': @@ -40,11 +38,6 @@ def draw(self, context): box.label(text="Enable the Z pass to use depth pass inputs") box.use_property_split = False box.prop(context.view_layer, "use_pass_z") - - if not Pipeline[context.scene.dream_textures_render_properties_prompt.pipeline].depth(): - box = layout.box() - box.label(text="Unsupported pipeline", icon="ERROR") - box.label(text="The selected pipeline does not support depth to image.") models = list(filter( lambda m: m.model_base == context.scene.dream_textures_render_properties_prompt.model, diff --git a/ui/panels/upscaling.py b/ui/panels/upscaling.py index b9231ab5..5e8d3038 100644 --- a/ui/panels/upscaling.py +++ b/ui/panels/upscaling.py @@ -4,7 +4,6 @@ from ...operators.upscale import Upscale from ...operators.dream_texture import CancelGenerator, ReleaseGenerator from ...generator_process.actions.detect_seamless import SeamlessAxes -from ...generator_process.actions.prompt_to_image import Pipeline from .dream_texture import create_panel, advanced_panel from ..space_types import SPACE_TYPES @@ -21,8 +20,6 @@ class UpscalingPanel(Panel): @classmethod def poll(cls, context): - if not Pipeline[context.scene.dream_textures_prompt.pipeline].upscaling(): - return False if cls.bl_space_type == 'NODE_EDITOR': return context.area.ui_type == "ShaderNodeTree" or context.area.ui_type == "CompositorNodeTree" else: @@ -75,8 +72,6 @@ class ActionsPanel(Panel): @classmethod def poll(cls, context): - if not Pipeline[context.scene.dream_textures_prompt.pipeline].upscaling(): - return False if cls.bl_space_type == 'NODE_EDITOR': return context.area.ui_type == "ShaderNodeTree" or context.area.ui_type == "CompositorNodeTree" else: From 91947cdf43c27af3acbc85b35220c3287463bbcf Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Fri, 28 Apr 2023 13:34:09 -0400 Subject: [PATCH 2/8] Implement Diffusers backend and move optimization UI --- api/__init__.py | 4 +- api/backend.py | 77 ---- api/backend/__init__.py | 1 + api/backend/backend.py | 107 ++++++ api/models/__init__.py | 6 + api/{ => models}/generation_result.py | 5 +- api/{ => models}/model.py | 0 api/models/prompt.py | 6 + api/models/seamless_axes.py | 75 ++++ api/models/step_preview_mode.py | 8 + api/models/task.py | 68 ++++ diffusers_backend.py | 210 ++++++++++- generator_process/actions/control_net.py | 2 +- generator_process/actions/depth_to_image.py | 2 +- .../actions/detect_seamless/__init__.py | 76 +--- generator_process/actions/image_to_image.py | 2 +- generator_process/actions/inpaint.py | 2 +- generator_process/actions/prompt_to_image.py | 328 +----------------- generator_process/actions/upscale.py | 2 +- generator_process/models/__init__.py | 6 +- .../models/image_generation_result.py | 106 ++++++ generator_process/models/optimizations.py | 146 ++++++++ generator_process/models/scheduler.py | 48 +++ property_groups/dream_prompt.py | 98 +----- ui/panels/dream_texture.py | 38 +- 25 files changed, 816 insertions(+), 607 deletions(-) delete mode 100644 api/backend.py create mode 100644 api/backend/__init__.py create mode 100644 api/backend/backend.py create mode 100644 api/models/__init__.py rename api/{ => models}/generation_result.py (87%) rename api/{ => models}/model.py (100%) create mode 100644 api/models/prompt.py create mode 100644 api/models/seamless_axes.py create mode 100644 api/models/step_preview_mode.py create mode 100644 api/models/task.py create mode 100644 generator_process/models/image_generation_result.py create mode 100644 generator_process/models/optimizations.py create mode 100644 generator_process/models/scheduler.py diff --git a/api/__init__.py b/api/__init__.py index 21bc242a..36654ceb 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -1,4 +1,2 @@ -from ..property_groups.dream_prompt import * -from .generation_result import * -from .model import * +from .models import * from .backend import * \ No newline at end of file diff --git a/api/backend.py b/api/backend.py deleted file mode 100644 index b1d325c0..00000000 --- a/api/backend.py +++ /dev/null @@ -1,77 +0,0 @@ -import bpy -from ..property_groups.dream_prompt import DreamPrompt -from typing import Callable, List -from .generation_result import GenerationResult -from .model import Model - -StepCallback = Callable[[], GenerationResult] -Callback = Callable[[], List[GenerationResult] | Exception] - -class Backend(bpy.types.PropertyGroup): - """A backend for Dream Textures. - - Provide the following methods to create a valid backend. - - ```python - def list_models(self) -> List[Model] - def generate(self, prompt: DreamPrompt, step_callback: StepCallback, callback: Callback) - ``` - """ - - def list_models(self) -> List[Model]: - """Provide a list of available models. - - The `id` of the model will be provided - """ - ... - - def draw_prompt(self, layout, context): - """Draw additional UI in the 'Prompt' panel""" - ... - - def draw_speed_optimzations(self, layout, context): - """Draw additional UI in the 'Speed Optimizations' panel""" - ... - - def draw_memory_optimzations(self, layout, context): - """Draw additional UI in the 'Memory Optimizations' panel""" - ... - - def draw_extra(self, layout, context): - """Draw additional UI in the panel""" - ... - - def generate( - self, - prompt: DreamPrompt, - step_callback: StepCallback, - callback: Callback - ): - """A request to generate an image. - - Use the `DreamPrompt` to get all of the arguments for the generation. - Call `step_callback` at each step as needed. - Call `callback` when the generation is complete. - - `DreamPrompt` has several helper functions to access generation options. - - ```python - prompt.generate_prompt() # get the full prompt string - prompt.get_seed() # an `int` or `None` (in which case you should provide a random seed yourself). - prompt.get_optimizations() # creates an `Optimization` type. - ``` - - After collecting the necessary arguments, generate an image in the background and call `step_callback` and `callback` with the results. - - > Generation should happen on a separate thread or process, as this method is called from the main thread and will block Blender's UI. - - ```python - call_my_api( - prompt=prompt.generate_prompt(), - seed=prompt.get_seed(), - on_step=lambda res: callback(GenerationResult(res.image, res.seed)), - on_response=lambda res: callback([GenerationResult(res.image, res.seed)]) - ) - ``` - """ - ... \ No newline at end of file diff --git a/api/backend/__init__.py b/api/backend/__init__.py new file mode 100644 index 00000000..3583fd62 --- /dev/null +++ b/api/backend/__init__.py @@ -0,0 +1 @@ +from .backend import * \ No newline at end of file diff --git a/api/backend/backend.py b/api/backend/backend.py new file mode 100644 index 00000000..b7a594ab --- /dev/null +++ b/api/backend/backend.py @@ -0,0 +1,107 @@ +try: + import bpy + from typing import Callable, List, Tuple + from ..models.generation_result import GenerationResult + from ..models.task import Task + from ..models.model import Model + from ..models.prompt import Prompt + from ..models.seamless_axes import SeamlessAxes + from ..models.step_preview_mode import StepPreviewMode + + StepCallback = Callable[[GenerationResult], None] + Callback = Callable[[List[GenerationResult] | Exception], None] + + class Backend(bpy.types.PropertyGroup): + """A backend for Dream Textures. + + Provide the following methods to create a valid backend. + + ```python + def list_models(self) -> List[Model] + def generate( + self, + task: Task, + model: Model, + prompt: Prompt, + size: Tuple[int, int] | None, + seamless_axes: SeamlessAxes, + + step_callback: StepCallback, + callback: Callback + ) + ``` + """ + + @classmethod + def register(cls): + from ...property_groups.dream_prompt import DreamPrompt + setattr(DreamPrompt, cls._attribute(), bpy.props.PointerProperty(type=cls)) + + @classmethod + def _id(cls) -> str: + return f"{cls.__module__}.{cls.__name__}" + + @classmethod + def _attribute(cls) -> str: + return cls._id().replace('.', '_') + + @classmethod + def _lookup(cls, id): + return next(backend for backend in cls._list_backends() if backend._id() == id) + + @classmethod + def _list_backends(cls): + return cls.__subclasses__() + + def list_models(self, context) -> List[Model]: + """Provide a list of available models. + + The `id` of the model will be provided + """ + ... + + def list_schedulers(self, context) -> List[str]: + """Provide a list of available schedulers.""" + ... + + def draw_prompt(self, layout, context): + """Draw additional UI in the 'Prompt' panel""" + ... + + def draw_advanced(self, layout, context): + """Draw additional UI in the 'Advanced' panel""" + ... + + def draw_speed_optimizations(self, layout, context): + """Draw additional UI in the 'Speed Optimizations' panel""" + ... + + def draw_memory_optimizations(self, layout, context): + """Draw additional UI in the 'Memory Optimizations' panel""" + ... + + def draw_extra(self, layout, context): + """Draw additional UI in the panel""" + ... + + def generate( + self, + task: Task, + model: Model, + prompt: Prompt, + size: Tuple[int, int] | None, + seed: int, + steps: int, + guidance_scale: float, + scheduler: str, + seamless_axes: SeamlessAxes, + step_preview_mode: StepPreviewMode, + iterations: int, + + step_callback: StepCallback, + callback: Callback + ): + """A request to generate an image.""" + ... +except: + pass \ No newline at end of file diff --git a/api/models/__init__.py b/api/models/__init__.py new file mode 100644 index 00000000..5041e364 --- /dev/null +++ b/api/models/__init__.py @@ -0,0 +1,6 @@ +from .generation_result import * +from .model import * +from .prompt import * +from .seamless_axes import * +from .step_preview_mode import * +from .task import * \ No newline at end of file diff --git a/api/generation_result.py b/api/models/generation_result.py similarity index 87% rename from api/generation_result.py rename to api/models/generation_result.py index 748336bc..4b11a462 100644 --- a/api/generation_result.py +++ b/api/models/generation_result.py @@ -1,6 +1,5 @@ -import numpy as np from dataclasses import dataclass -from typing import List +from numpy.typing import NDArray @dataclass class GenerationResult: @@ -15,7 +14,7 @@ class GenerationResult: ) ``` """ - image: np.ndarray + image: NDArray """The generated image as a Numpy array. The shape should be `(height, width, channels)`, where `channels` is 3 or 4. """ diff --git a/api/model.py b/api/models/model.py similarity index 100% rename from api/model.py rename to api/models/model.py diff --git a/api/models/prompt.py b/api/models/prompt.py new file mode 100644 index 00000000..f2fe2c45 --- /dev/null +++ b/api/models/prompt.py @@ -0,0 +1,6 @@ +from dataclasses import dataclass + +@dataclass +class Prompt: + positive: str + negative: str | None \ No newline at end of file diff --git a/api/models/seamless_axes.py b/api/models/seamless_axes.py new file mode 100644 index 00000000..739be2f4 --- /dev/null +++ b/api/models/seamless_axes.py @@ -0,0 +1,75 @@ +from enum import Enum + +class SeamlessAxes(Enum): + """Unified handling of seamless axes. + Can be converted from str (id or text) or bool tuple/list (x, y). + Each enum is equal to their respective convertible values. + Special cases: + AUTO: None + OFF: False, empty str + BOTH: True + """ + + AUTO = 'auto', 'Auto-detect', None, None + OFF = 'off', 'Off', False, False + HORIZONTAL = 'x', 'X', True, False + VERTICAL = 'y', 'Y', False, True + BOTH = 'xy', 'Both', True, True + + def __init__(self, id, text, x, y): + self.id = id + self.text = text + self.x = x + self.y = y + + def __eq__(self, other): + if isinstance(other, type(self)): + return self is other + if isinstance(other, str): + return self.id == other or self.text == other or (other == '' and self is self.OFF) + if isinstance(other, (tuple, list)) and len(other) == 2: + return self.x == other[0] and self.y == other[1] + if other is True and self is self.BOTH: + return True + if other is False and self is self.OFF: + return True + if other is None and self is self.AUTO: + return True + return False + + def __and__(self, other): + return SeamlessAxes((self.x and other.x, self.y and other.y)) + + def __or__(self, other): + return SeamlessAxes((self.x or other.x, self.y or other.y)) + + def __xor__(self, other): + return SeamlessAxes((self.x != other.x, self.y != other.y)) + + def __invert__(self): + return SeamlessAxes((not self.x, not self.y)) + + @classmethod + def _missing_(cls, value): + if isinstance(value, str): + if value == '': + return cls.OFF + for e in cls: + if e.id == value or e.text == value: + return e + raise ValueError(f'no {cls.__name__} with id {repr(id)}') + elif isinstance(value, (tuple, list)) and len(value) == 2: + for e in cls: + if e.x == value[0] and e.y == value[1]: + return e + raise ValueError(f'no {cls.__name__} with x {value[0]} and y {value[1]}') + elif value is True: + return cls.BOTH + elif value is False: + return cls.OFF + elif value is None: + return cls.AUTO + raise TypeError(f'expected str, bool, tuple[bool, bool], or None, got {repr(value)}') + + def bpy_enum(self, *args): + return self.id, self.text, *args \ No newline at end of file diff --git a/api/models/step_preview_mode.py b/api/models/step_preview_mode.py new file mode 100644 index 00000000..2ae2441d --- /dev/null +++ b/api/models/step_preview_mode.py @@ -0,0 +1,8 @@ +import enum + +class StepPreviewMode(enum.Enum): + NONE = "None" + FAST = "Fast" + FAST_BATCH = "Fast (Batch Tiled)" + ACCURATE = "Accurate" + ACCURATE_BATCH = "Accurate (Batch Tiled)" \ No newline at end of file diff --git a/api/models/task.py b/api/models/task.py new file mode 100644 index 00000000..1ff11bcc --- /dev/null +++ b/api/models/task.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass +from typing import Tuple +from numpy.typing import NDArray +from enum import IntEnum + +class Task: + """A specific task type. + + Access the properties of the task using dot notation. + + ```python + # Task.ImageToImage + task.image + task.strength + task.fit + ``` + + Switch over the task to perform the correct actions. + + ```python + match type(task): + case PromptToImage: + ... + case ImageToImage: + ... + case Inpaint: + ... + case DepthToImage: + ... + case Outpaint: + ... + ``` + """ + pass + +@dataclass +class PromptToImage(Task): + pass + +@dataclass +class ImageToImage(Task): + image: NDArray + strength: float + fit: bool + +@dataclass +class Inpaint(Task): + class MaskSource(IntEnum): + ALPHA = 0 + PROMPT = 1 + + image: NDArray + strength: float + fit: bool + mask_source: MaskSource + mask_prompt: str + confidence: float + +@dataclass +class DepthToImage(Task): + depth: NDArray | None + image: NDArray | None + strength: float + +@dataclass +class Outpaint(Task): + image: NDArray + origin: Tuple[int, int] \ No newline at end of file diff --git a/diffusers_backend.py b/diffusers_backend.py index 2f7c6bfe..c4841152 100644 --- a/diffusers_backend.py +++ b/diffusers_backend.py @@ -1,14 +1,208 @@ -import bpy -from .api import Backend, Model, DreamPrompt, StepCallback, Callback +from bpy.props import FloatProperty, IntProperty, EnumProperty, BoolProperty +from typing import List, Tuple + +from .api import Backend, StepCallback, Callback +from .api.models import Model, Task, Prompt, SeamlessAxes, GenerationResult, StepPreviewMode +from .api.models.task import PromptToImage, ImageToImage, Inpaint, DepthToImage, Outpaint + +from .generator_process import Generator +from .generator_process.actions.prompt_to_image import ImageGenerationResult +from .generator_process.future import Future +from .generator_process.models import Optimizations, Scheduler +from .generator_process.actions.huggingface_hub import ModelType +from .preferences import StableDiffusionPreferences + +from functools import reduce class DiffusersBackend(Backend): name = "HuggingFace Diffusers" description = "Local image generation inside of Blender" - def list_models(self): - return [ - Model("Stable Diffusion v2.1", "The 2.1 revision of SD", "stabilityai/stable-diffusion-v2-1"), - ] + attention_slicing: BoolProperty(name="Attention Slicing", default=True, description="Computes attention in several steps. Saves some memory in exchange for a small speed decrease") + attention_slice_size_src: EnumProperty( + name="Attention Slice Size", + items=( + ("auto", "Automatic", "Computes attention in two steps", 1), + ("manual", "Manual", "Computes attention in `attention_head_dim // size` steps. A smaller `size` saves more memory.\n" + "`attention_head_dim` must be a multiple of `size`, otherwise the image won't generate properly.\n" + "`attention_head_dim` can be found within the model snapshot's unet/config.json file", 2) + ), + default=1 + ) + attention_slice_size: IntProperty(name="Attention Slice Size", default=1, min=1) + cudnn_benchmark: BoolProperty(name="cuDNN Benchmark", description="Allows cuDNN to benchmark multiple convolution algorithms and select the fastest", default=False) + tf32: BoolProperty(name="TF32", description="Utilizes tensor cores on Ampere (RTX 30xx) or newer GPUs for matrix multiplications.\nHas no effect if half precision is enabled", default=False) + half_precision: BoolProperty(name="Half Precision", description="Reduces memory usage and increases speed in exchange for a slight loss in image quality.\nHas no effect if CPU only is enabled or using a GTX 16xx GPU", default=True) + cpu_offload: EnumProperty( + name="CPU Offload", + items=( + ("off", "Off", "", 0), + ("model", "Model", "Some memory savings with minimal speed penalty", 1), + ("submodule", "Submodule", "Better memory savings with large speed penalty", 2) + ), + default=0, + description="Dynamically moves models in and out of device memory for reduced memory usage with reduced speed" + ) + channels_last_memory_format: BoolProperty(name="Channels Last Memory Format", description="An alternative way of ordering NCHW tensors that may be faster or slower depending on the device", default=False) + sdp_attention: BoolProperty( + name="SDP Attention", + description="Scaled dot product attention requires less memory and often comes with a good speed increase.\n" + "Prompt recall may not produce the exact same image, but usually only minor noise differences.\n" + "Overrides attention slicing", + default=True + ) + batch_size: IntProperty(name="Batch Size", default=1, min=1, description="Improves speed when using iterations or upscaling in exchange for higher memory usage.\nHighly recommended to use with VAE slicing enabled") + vae_slicing: BoolProperty(name="VAE Slicing", description="Reduces memory usage of batched VAE decoding. Has no effect if batch size is 1.\nMay have a small performance improvement with large batches", default=True) + vae_tiling: EnumProperty( + name="VAE Tiling", + items=( + ("off", "Off", "", 0), + ("half", "Half", "Uses tiles of half the selected model's default size. Likely to cause noticeably inaccurate colors", 1), + ("full", "Full", "Uses tiles of the selected model's default size, intended for use where image size is manually set higher. May cause slightly inaccurate colors", 2), + ("manual", "Manual", "", 3) + ), + default=0, + description="Decodes generated images in tiled regions to reduce memory usage in exchange for longer decode time and less accurate colors.\nCan allow for generating larger images that would otherwise run out of memory on the final step" + ) + vae_tile_size: IntProperty(name="VAE Tile Size", min=1, default=512, description="Width and height measurement of tiles. Smaller sizes are more likely to cause inaccurate colors and other undesired artifacts") + vae_tile_blend: IntProperty(name="VAE Tile Blend", min=0, default=64, description="Minimum amount of how much each edge of a tile will intersect its adjacent tile") + cfg_end: FloatProperty(name="CFG End", min=0, max=1, default=1, description="The percentage of steps to complete before disabling classifier-free guidance") + cpu_only: BoolProperty(name="CPU Only", default=False, description="Disables GPU acceleration and is extremely slow") + + def list_models(self, context): + def model_case(model, i): + return Model( + name=model.model_base.replace('models--', '').replace('--', '/'), + description=ModelType[model.model_type].name, + id=model.model_base + ) + models = {} + for i, model in enumerate(context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models): + if model.model_type in {ModelType.CONTROL_NET.name, ModelType.UNKNOWN.name}: + continue + if model.model_type not in models: + models[model.model_type] = [model_case(model, i)] + else: + models[model.model_type].append(model_case(model, i)) + return reduce( + lambda a, b: a + [None] + sorted(b, key=lambda m: m.id), + [ + models[group] + for group in sorted(models.keys()) + ], + [] + ) + + def list_schedulers(self, context) -> List[str]: + return [scheduler.value for scheduler in Scheduler] + + def optimizations(self) -> Optimizations: + optimizations = Optimizations() + for prop in dir(self): + if hasattr(optimizations, prop): + setattr(optimizations, prop, getattr(self, prop)) + if self.attention_slice_size_src == 'auto': + optimizations.attention_slice_size = 'auto' + return optimizations + + def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, int] | None, seed: int, steps: int, guidance_scale: float, scheduler: str, seamless_axes: SeamlessAxes, step_preview_mode: StepPreviewMode, iterations: int, step_callback: StepCallback, callback: Callback): + gen = Generator.shared() + common_kwargs = { + 'model': model.id, + 'scheduler': Scheduler[scheduler], + 'optimizations': self.optimizations(), + 'prompt': prompt.positive, + 'steps': steps, + 'width': size[0] if size is not None else None, + 'height': size[1] if size is not None else None, + 'seed': seed, + 'cfg_scale': guidance_scale, + 'use_negative_prompt': prompt.negative is not None, + 'negative_prompt': prompt.negative or "", + 'seamless_axes': seamless_axes, + 'iterations': 1, + 'step_preview_mode': step_preview_mode, + } + future: Future + match task: + case PromptToImage(): + future = gen.prompt_to_image(**common_kwargs) + case ImageToImage(image=image, strength=strength, fit=fit): + future = gen.image_to_image(image=image, fit=fit, strength=strength, **common_kwargs) + case Inpaint(image=image, fit=fit, strength=strength, mask_source=mask_source, mask_prompt=mask_prompt, confidence=confidence): + future = gen.inpaint( + image=image, + fit=fit, + strength=strength, + inpaint_mask_src='alpha' if mask_source == Inpaint.MaskSource.ALPHA else 'prompt', + text_mask=mask_prompt, + text_mask_confidence=confidence, + **common_kwargs + ) + case DepthToImage(depth=depth, image=image, strength=strength): + future = gen.depth_to_image( + depth=depth, + image=image, + strength=strength, + **common_kwargs + ) + case Outpaint(image=image, origin=origin): + future = gen.outpaint( + image=image, + width=size[0] if size is not None else None, + height=size[1] if size is not None else None, + outpaint_origin=origin, + **common_kwargs + ) + case _: + raise NotImplementedError() + def on_step(_, step_image: ImageGenerationResult): + step_callback(GenerationResult(image=step_image.images[-1], seed=step_image.seeds[-1])) + def on_done(future: Future): + result: ImageGenerationResult = future.result(last_only=True) + callback([ + GenerationResult(image=result.images[i], seed=result.seeds[i]) + for i in range(len(result.images)) + ]) + def on_exception(_, exception): + callback(exception) + future.add_response_callback(on_step) + future.add_exception_callback(on_exception) + future.add_done_callback(on_done) + + def draw_speed_optimizations(self, layout, context): + inferred_device = Optimizations.infer_device() + if self.cpu_only: + inferred_device = "cpu" + def optimization(prop): + if Optimizations.device_supports(prop, inferred_device): + layout.prop(self, prop) + + optimization("cudnn_benchmark") + optimization("tf32") + optimization("half_precision") + optimization("channels_last_memory_format") + optimization("batch_size") + optimization("cfg_end") + + def draw_memory_optimizations(self, layout, context): + inferred_device = Optimizations.infer_device() + if self.cpu_only: + inferred_device = "cpu" + def optimization(prop): + if Optimizations.device_supports(prop, inferred_device): + layout.prop(self, prop) - def generate(self, prompt: DreamPrompt, step_callback: StepCallback, callback: Callback): - pass \ No newline at end of file + optimization("sdp_attention") + optimization("attention_slicing") + slice_size_row = layout.row() + slice_size_row.prop(self, "attention_slice_size_src") + if self.attention_slice_size_src == 'manual': + slice_size_row.prop(self, "attention_slice_size", text="Size") + optimization("cpu_offload") + optimization("cpu_only") + optimization("vae_slicing") + optimization("vae_tiling") + if self.vae_tiling == "manual": + optimization("vae_tile_size") + optimization("vae_tile_blend") \ No newline at end of file diff --git a/generator_process/actions/control_net.py b/generator_process/actions/control_net.py index b564065b..95f6de18 100644 --- a/generator_process/actions/control_net.py +++ b/generator_process/actions/control_net.py @@ -5,7 +5,7 @@ import numpy as np import random from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, load_pipe -from .detect_seamless import SeamlessAxes +from ...api.models.seamless_axes import SeamlessAxes def control_net( self, diff --git a/generator_process/actions/depth_to_image.py b/generator_process/actions/depth_to_image.py index 548f1d86..ce47e847 100644 --- a/generator_process/actions/depth_to_image.py +++ b/generator_process/actions/depth_to_image.py @@ -6,7 +6,7 @@ import numpy as np import random from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe -from .detect_seamless import SeamlessAxes +from ...api.models.seamless_axes import SeamlessAxes def depth_to_image( diff --git a/generator_process/actions/detect_seamless/__init__.py b/generator_process/actions/detect_seamless/__init__.py index 221e55d5..48cba657 100644 --- a/generator_process/actions/detect_seamless/__init__.py +++ b/generator_process/actions/detect_seamless/__init__.py @@ -3,81 +3,7 @@ import numpy as np from numpy.typing import NDArray - -class SeamlessAxes(Enum): - """Unified handling of seamless axes. - Can be converted from str (id or text) or bool tuple/list (x, y). - Each enum is equal to their respective convertible values. - Special cases: - AUTO: None - OFF: False, empty str - BOTH: True - """ - - AUTO = 'auto', 'Auto-detect', None, None - OFF = 'off', 'Off', False, False - HORIZONTAL = 'x', 'X', True, False - VERTICAL = 'y', 'Y', False, True - BOTH = 'xy', 'Both', True, True - - def __init__(self, id, text, x, y): - self.id = id - self.text = text - self.x = x - self.y = y - - def __eq__(self, other): - if isinstance(other, type(self)): - return self is other - if isinstance(other, str): - return self.id == other or self.text == other or (other == '' and self is self.OFF) - if isinstance(other, (tuple, list)) and len(other) == 2: - return self.x == other[0] and self.y == other[1] - if other is True and self is self.BOTH: - return True - if other is False and self is self.OFF: - return True - if other is None and self is self.AUTO: - return True - return False - - def __and__(self, other): - return SeamlessAxes((self.x and other.x, self.y and other.y)) - - def __or__(self, other): - return SeamlessAxes((self.x or other.x, self.y or other.y)) - - def __xor__(self, other): - return SeamlessAxes((self.x != other.x, self.y != other.y)) - - def __invert__(self): - return SeamlessAxes((not self.x, not self.y)) - - @classmethod - def _missing_(cls, value): - if isinstance(value, str): - if value == '': - return cls.OFF - for e in cls: - if e.id == value or e.text == value: - return e - raise ValueError(f'no {cls.__name__} with id {repr(id)}') - elif isinstance(value, (tuple, list)) and len(value) == 2: - for e in cls: - if e.x == value[0] and e.y == value[1]: - return e - raise ValueError(f'no {cls.__name__} with x {value[0]} and y {value[1]}') - elif value is True: - return cls.BOTH - elif value is False: - return cls.OFF - elif value is None: - return cls.AUTO - raise TypeError(f'expected str, bool, tuple[bool, bool], or None, got {repr(value)}') - - def bpy_enum(self, *args): - return self.id, self.text, *args - +from ....api.models.seamless_axes import SeamlessAxes def detect_seamless(self, image: NDArray) -> SeamlessAxes: import os diff --git a/generator_process/actions/image_to_image.py b/generator_process/actions/image_to_image.py index 34960932..6f16c5a5 100644 --- a/generator_process/actions/image_to_image.py +++ b/generator_process/actions/image_to_image.py @@ -6,7 +6,7 @@ import numpy as np import random from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, load_pipe -from .detect_seamless import SeamlessAxes +from ...api.models.seamless_axes import SeamlessAxes def image_to_image( diff --git a/generator_process/actions/inpaint.py b/generator_process/actions/inpaint.py index b8bfd454..eaa77324 100644 --- a/generator_process/actions/inpaint.py +++ b/generator_process/actions/inpaint.py @@ -5,7 +5,7 @@ import numpy as np import random from .prompt_to_image import Scheduler, Optimizations, StepPreviewMode, ImageGenerationResult, _configure_model_padding, model_snapshot_folder, load_pipe -from .detect_seamless import SeamlessAxes +from ...api.models.seamless_axes import SeamlessAxes def inpaint( self, diff --git a/generator_process/actions/prompt_to_image.py b/generator_process/actions/prompt_to_image.py index 36d0c0da..4298cf4f 100644 --- a/generator_process/actions/prompt_to_image.py +++ b/generator_process/actions/prompt_to_image.py @@ -1,17 +1,14 @@ -from typing import Annotated, Union, _AnnotatedAlias, Generator, Callable, List, Optional, Any -import enum -import functools -import math +from typing import Union, Generator, Callable, List, Optional, Any import os -import sys -from dataclasses import dataclass from contextlib import nullcontext -from numpy.typing import NDArray import numpy as np import random -from .detect_seamless import SeamlessAxes -from ..models.upscale_tiler import tiled_decode_latents +from ...api.models.seamless_axes import SeamlessAxes +from ...api.models.step_preview_mode import StepPreviewMode +from ..models.scheduler import Scheduler +from ..models.optimizations import Optimizations +from ..models.image_generation_result import ImageGenerationResult class CachedPipeline: """A pipeline that has been cached for subsequent runs.""" @@ -79,294 +76,6 @@ def load_pipe(self, action, generator_pipeline, model, optimizations, scheduler, pipe.scheduler = scheduler.create(pipe, None) return pipe -class Scheduler(enum.Enum): - DDIM = "DDIM" - DDPM = "DDPM" - DEIS_MULTISTEP = "DEIS Multistep" - DPM_SOLVER_MULTISTEP = "DPM Solver Multistep" - DPM_SOLVER_SINGLESTEP = "DPM Solver Singlestep" - EULER_DISCRETE = "Euler Discrete" - EULER_ANCESTRAL_DISCRETE = "Euler Ancestral Discrete" - HEUN_DISCRETE = "Heun Discrete" - KDPM2_DISCRETE = "KDPM2 Discrete" # Non-functional on mps - KDPM2_ANCESTRAL_DISCRETE = "KDPM2 Ancestral Discrete" - LMS_DISCRETE = "LMS Discrete" - PNDM = "PNDM" - - def create(self, pipeline, pretrained): - import diffusers - def scheduler_class(): - match self: - case Scheduler.DDIM: - return diffusers.schedulers.DDIMScheduler - case Scheduler.DDPM: - return diffusers.schedulers.DDPMScheduler - case Scheduler.DEIS_MULTISTEP: - return diffusers.schedulers.DEISMultistepScheduler - case Scheduler.DPM_SOLVER_MULTISTEP: - return diffusers.schedulers.DPMSolverMultistepScheduler - case Scheduler.DPM_SOLVER_SINGLESTEP: - return diffusers.schedulers.DPMSolverSinglestepScheduler - case Scheduler.EULER_DISCRETE: - return diffusers.schedulers.EulerDiscreteScheduler - case Scheduler.EULER_ANCESTRAL_DISCRETE: - return diffusers.schedulers.EulerAncestralDiscreteScheduler - case Scheduler.HEUN_DISCRETE: - return diffusers.schedulers.HeunDiscreteScheduler - case Scheduler.KDPM2_DISCRETE: - return diffusers.schedulers.KDPM2DiscreteScheduler - case Scheduler.KDPM2_ANCESTRAL_DISCRETE: - return diffusers.schedulers.KDPM2AncestralDiscreteScheduler - case Scheduler.LMS_DISCRETE: - return diffusers.schedulers.LMSDiscreteScheduler - case Scheduler.PNDM: - return diffusers.schedulers.PNDMScheduler - if pretrained is not None: - return scheduler_class().from_pretrained(pretrained['model_path'], subfolder=pretrained['subfolder']) - else: - return scheduler_class().from_config(pipeline.scheduler.config) - - def stability_sdk(self): - import stability_sdk.interfaces.gooseai.generation.generation_pb2 - match self: - case Scheduler.LMS_DISCRETE: - return stability_sdk.interfaces.gooseai.generation.generation_pb2.SAMPLER_K_LMS - case Scheduler.DDIM: - return stability_sdk.interfaces.gooseai.generation.generation_pb2.SAMPLER_DDIM - case Scheduler.DDPM: - return stability_sdk.interfaces.gooseai.generation.generation_pb2.SAMPLER_DDPM - case Scheduler.EULER_DISCRETE: - return stability_sdk.interfaces.gooseai.generation.generation_pb2.SAMPLER_K_EULER - case Scheduler.EULER_ANCESTRAL_DISCRETE: - return stability_sdk.interfaces.gooseai.generation.generation_pb2.SAMPLER_K_EULER_ANCESTRAL - case _: - raise ValueError(f"{self} cannot be used with DreamStudio.") - -@dataclass(eq=True) -class Optimizations: - attention_slicing: bool = True - attention_slice_size: Union[str, int] = "auto" - cudnn_benchmark: Annotated[bool, "cuda"] = False - tf32: Annotated[bool, "cuda"] = False - amp: Annotated[bool, "cuda"] = False - half_precision: Annotated[bool, {"cuda", "privateuseone"}] = True - cpu_offload: Annotated[str, {"cuda", "privateuseone"}] = "off" - channels_last_memory_format: bool = False - sdp_attention: Annotated[bool, {"cpu", "cuda", "mps"}] = True - batch_size: int = 1 - vae_slicing: bool = True - vae_tiling: str = "off" - vae_tile_size: int = 512 - vae_tile_blend: int = 64 - cfg_end: float = 1.0 - - cpu_only: bool = False - - @staticmethod - def infer_device() -> str: - from ...absolute_path import absolute_path - if sys.platform == "darwin": - return "mps" - elif os.path.exists(absolute_path(".python_dependencies/torch_directml")): - return "privateuseone" - else: - return "cuda" - - @classmethod - def device_supports(cls, property, device) -> bool: - annotation = cls.__annotations__.get(property, None) - if isinstance(annotation, _AnnotatedAlias): - opt_dev = annotation.__metadata__[0] - if isinstance(opt_dev, str): - return opt_dev == device - return device in opt_dev - return annotation is not None - - def can_use(self, property, device) -> bool: - return self.device_supports(property, device) and getattr(self, property) - - def can_use_half(self, device): - if self.half_precision and device == "cuda": - import torch - name = torch.cuda.get_device_name() - return not ("GTX 1650" in name or "GTX 1660" in name) - return self.can_use("half_precision", device) - - def can_use_cpu_offload(self, device): - return self.cpu_offload if self.device_supports("cpu_offload", device) else "off" - - def apply(self, pipeline, device): - """ - Apply the optimizations to a diffusers pipeline. - - All exceptions are ignored to make this more general purpose across different pipelines. - """ - import torch - - torch.backends.cudnn.benchmark = self.can_use("cudnn_benchmark", device) - torch.backends.cuda.matmul.allow_tf32 = self.can_use("tf32", device) - - try: - if self.can_use("sdp_attention", device): - from diffusers.models.cross_attention import AttnProcessor2_0 - pipeline.unet.set_attn_processor(AttnProcessor2_0()) - elif self.can_use("attention_slicing", device): - pipeline.enable_attention_slicing(self.attention_slice_size) - else: - pipeline.disable_attention_slicing() # will also disable AttnProcessor2_0 - except: pass - - try: - if pipeline.device != pipeline._execution_device: - pass # pipeline is already offloaded, offloading again can cause `pipeline._execution_device` to be incorrect - elif self.can_use_cpu_offload(device) == "model": - # adapted from diffusers.StableDiffusionPipeline.enable_model_cpu_offload() to allow DirectML device and unimplemented pipelines - from accelerate import cpu_offload_with_hook - - hook = None - models = [pipeline.text_encoder, pipeline.unet, pipeline.vae] - if hasattr(pipeline, "controlnet"): - models.append(pipeline.controlnet) - for cpu_offloaded_model in models: - _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) - - # FIXME: due to the safety checker not running it prevents the VAE from being offloaded, uncomment when safety checker is enabled - # if pipeline.safety_checker is not None: - # _, hook = cpu_offload_with_hook(pipeline.safety_checker, device, prev_module_hook=hook) - - # We'll offload the last model manually. - pipeline.final_offload_hook = hook - elif self.can_use_cpu_offload(device) == "submodule": - # adapted from diffusers.StableDiffusionPipeline.enable_sequential_cpu_offload() to allow DirectML device and unimplemented pipelines - from accelerate import cpu_offload - - models = [pipeline.text_encoder, pipeline.unet, pipeline.vae] - if hasattr(pipeline, "controlnet"): - models.append(pipeline.controlnet) - for cpu_offloaded_model in models: - cpu_offload(cpu_offloaded_model, device) - - if pipeline.safety_checker is not None: - cpu_offload(pipeline.safety_checker.vision_model, device, offload_buffers=True) - except: pass - - try: - if self.can_use("channels_last_memory_format", device): - pipeline.unet.to(memory_format=torch.channels_last) - else: - pipeline.unet.to(memory_format=torch.contiguous_format) - except: pass - - try: - if self.can_use("vae_slicing", device): - # Not many pipelines implement the enable_vae_slicing()/disable_vae_slicing() - # methods but all they do is forward their call to the vae anyway. - pipeline.vae.enable_slicing() - else: - pipeline.vae.disable_slicing() - except: pass - - try: - if self.vae_tiling != "off": - if not isinstance(pipeline.decode_latents, functools.partial): - pipeline.decode_latents = functools.partial(tiled_decode_latents.__get__(pipeline), pre_patch=pipeline.decode_latents) - pipeline.decode_latents.keywords['optimizations'] = self - elif self.vae_tiling == "off" and isinstance(pipeline.decode_latents, functools.partial): - pipeline.decode_latents = pipeline.decode_latents.keywords["pre_patch"] - except: pass - - from .. import directml_patches - if device == "privateuseone": - directml_patches.enable(pipeline) - else: - directml_patches.disable(pipeline) - - return pipeline - -class StepPreviewMode(enum.Enum): - NONE = "None" - FAST = "Fast" - FAST_BATCH = "Fast (Batch Tiled)" - ACCURATE = "Accurate" - ACCURATE_BATCH = "Accurate (Batch Tiled)" - -@dataclass -class ImageGenerationResult: - images: List[NDArray] - seeds: List[int] - step: int - final: bool - - @staticmethod - def step_preview(pipe, mode, width, height, latents, generator, iteration): - from PIL import Image, ImageOps - seeds = [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()] - match mode: - case StepPreviewMode.FAST: - return ImageGenerationResult( - [np.asarray(ImageOps.flip(Image.fromarray(approximate_decoded_latents(latents[-1:]))).resize((width, height), Image.Resampling.NEAREST).convert('RGBA'), dtype=np.float32) / 255.], - seeds[-1:], - iteration, - False - ) - case StepPreviewMode.FAST_BATCH: - return ImageGenerationResult( - [ - np.asarray(ImageOps.flip(Image.fromarray(approximate_decoded_latents(latents[i:i + 1]))).resize((width, height), Image.Resampling.NEAREST).convert('RGBA'), - dtype=np.float32) / 255. - for i in range(latents.size(0)) - ], - seeds, - iteration, - False - ) - case StepPreviewMode.ACCURATE: - return ImageGenerationResult( - [np.asarray(ImageOps.flip(pipe.numpy_to_pil(pipe.decode_latents(latents[-1:]))[0]).convert('RGBA'), - dtype=np.float32) / 255.], - seeds[-1:], - iteration, - False - ) - case StepPreviewMode.ACCURATE_BATCH: - return ImageGenerationResult( - [ - np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. - for image in pipe.numpy_to_pil(pipe.decode_latents(latents)) - ], - seeds, - iteration, - False - ) - return ImageGenerationResult( - [], - seeds, - iteration, - False - ) - - def tile_images(self): - images = self.images - if len(images) == 0: - return None - elif len(images) == 1: - return images[0] - width = images[0].shape[1] - height = images[0].shape[0] - tiles_x = math.ceil(math.sqrt(len(images))) - tiles_y = math.ceil(len(images) / tiles_x) - tiles = np.zeros((height * tiles_y, width * tiles_x, 4), dtype=np.float32) - bottom_offset = (tiles_x*tiles_y-len(images)) * width // 2 - for i, image in enumerate(images): - x = i % tiles_x - y = tiles_y - 1 - int((i - x) / tiles_x) - x *= width - y *= height - if y == 0: - x += bottom_offset - tiles[y: y + height, x: x + width] = image - return tiles - def choose_device(self) -> str: """ Automatically select which PyTorch device to use. @@ -384,31 +93,6 @@ def choose_device(self) -> str: return "privateuseone" return "cpu" -def approximate_decoded_latents(latents): - """ - Approximate the decoded latents without using the VAE. - """ - import torch - # origingally adapted from code by @erucipe and @keturn here: - # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 - - # these updated numbers for v1.5 are from @torridgristle - v1_5_latent_rgb_factors = torch.tensor([ - # R G B - [ 0.3444, 0.1385, 0.0670], # L1 - [ 0.1247, 0.4027, 0.1494], # L2 - [-0.3192, 0.2513, 0.2103], # L3 - [-0.1307, -0.1874, -0.7445] # L4 - ], dtype=latents.dtype, device=latents.device) - - latent_image = latents[0].permute(1, 2, 0) @ v1_5_latent_rgb_factors - latents_ubyte = (((latent_image + 1) / 2) - .clamp(0, 1) # change scale from -1..1 to 0..1 - .mul(0xFF) # to 0..255 - .byte()).cpu() - - return latents_ubyte.numpy() - def model_snapshot_folder(model, preferred_revision: str | None = None): """ Try to find the preferred revision, but fallback to another revision if necessary. """ import diffusers diff --git a/generator_process/actions/upscale.py b/generator_process/actions/upscale.py index e44f1d36..372b198f 100644 --- a/generator_process/actions/upscale.py +++ b/generator_process/actions/upscale.py @@ -1,6 +1,6 @@ import numpy as np from .prompt_to_image import Optimizations, Scheduler, StepPreviewMode, _configure_model_padding -from .detect_seamless import SeamlessAxes +from ...api.models.seamless_axes import SeamlessAxes import random from dataclasses import dataclass from numpy.typing import NDArray diff --git a/generator_process/models/__init__.py b/generator_process/models/__init__.py index 6b9245f3..cf1a0b83 100644 --- a/generator_process/models/__init__.py +++ b/generator_process/models/__init__.py @@ -1 +1,5 @@ -from .fix_it_error import * \ No newline at end of file +from .fix_it_error import * +from .image_generation_result import * +from .optimizations import * +from .scheduler import * +from .upscale_tiler import * \ No newline at end of file diff --git a/generator_process/models/image_generation_result.py b/generator_process/models/image_generation_result.py new file mode 100644 index 00000000..ae5edf8c --- /dev/null +++ b/generator_process/models/image_generation_result.py @@ -0,0 +1,106 @@ +from typing import List +import math +from dataclasses import dataclass +from numpy.typing import NDArray + +@dataclass +class ImageGenerationResult: + images: List[NDArray] + seeds: List[int] + step: int + final: bool + + @staticmethod + def step_preview(pipe, mode, width, height, latents, generator, iteration): + from PIL import Image, ImageOps + seeds = [gen.initial_seed() for gen in generator] if isinstance(generator, list) else [generator.initial_seed()] + match mode: + case StepPreviewMode.FAST: + return ImageGenerationResult( + [np.asarray(ImageOps.flip(Image.fromarray(approximate_decoded_latents(latents[-1:]))).resize((width, height), Image.Resampling.NEAREST).convert('RGBA'), dtype=np.float32) / 255.], + seeds[-1:], + iteration, + False + ) + case StepPreviewMode.FAST_BATCH: + return ImageGenerationResult( + [ + np.asarray(ImageOps.flip(Image.fromarray(approximate_decoded_latents(latents[i:i + 1]))).resize((width, height), Image.Resampling.NEAREST).convert('RGBA'), + dtype=np.float32) / 255. + for i in range(latents.size(0)) + ], + seeds, + iteration, + False + ) + case StepPreviewMode.ACCURATE: + return ImageGenerationResult( + [np.asarray(ImageOps.flip(pipe.numpy_to_pil(pipe.decode_latents(latents[-1:]))[0]).convert('RGBA'), + dtype=np.float32) / 255.], + seeds[-1:], + iteration, + False + ) + case StepPreviewMode.ACCURATE_BATCH: + return ImageGenerationResult( + [ + np.asarray(ImageOps.flip(image).convert('RGBA'), dtype=np.float32) / 255. + for image in pipe.numpy_to_pil(pipe.decode_latents(latents)) + ], + seeds, + iteration, + False + ) + return ImageGenerationResult( + [], + seeds, + iteration, + False + ) + + def tile_images(self): + images = self.images + if len(images) == 0: + return None + elif len(images) == 1: + return images[0] + width = images[0].shape[1] + height = images[0].shape[0] + tiles_x = math.ceil(math.sqrt(len(images))) + tiles_y = math.ceil(len(images) / tiles_x) + tiles = np.zeros((height * tiles_y, width * tiles_x, 4), dtype=np.float32) + bottom_offset = (tiles_x*tiles_y-len(images)) * width // 2 + for i, image in enumerate(images): + x = i % tiles_x + y = tiles_y - 1 - int((i - x) / tiles_x) + x *= width + y *= height + if y == 0: + x += bottom_offset + tiles[y: y + height, x: x + width] = image + return tiles + +def approximate_decoded_latents(latents): + """ + Approximate the decoded latents without using the VAE. + """ + import torch + # origingally adapted from code by @erucipe and @keturn here: + # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 + + # these updated numbers for v1.5 are from @torridgristle + v1_5_latent_rgb_factors = torch.tensor([ + # R G B + [ 0.3444, 0.1385, 0.0670], # L1 + [ 0.1247, 0.4027, 0.1494], # L2 + [-0.3192, 0.2513, 0.2103], # L3 + [-0.1307, -0.1874, -0.7445] # L4 + ], dtype=latents.dtype, device=latents.device) + + latent_image = latents[0].permute(1, 2, 0) @ v1_5_latent_rgb_factors + latents_ubyte = (((latent_image + 1) / 2) + .clamp(0, 1) # change scale from -1..1 to 0..1 + .mul(0xFF) # to 0..255 + .byte()).cpu() + + return latents_ubyte.numpy() \ No newline at end of file diff --git a/generator_process/models/optimizations.py b/generator_process/models/optimizations.py new file mode 100644 index 00000000..55ae645b --- /dev/null +++ b/generator_process/models/optimizations.py @@ -0,0 +1,146 @@ +from typing import Annotated, Union, _AnnotatedAlias +import functools +import os +import sys +from dataclasses import dataclass + +@dataclass(eq=True) +class Optimizations: + attention_slicing: bool = True + attention_slice_size: Union[str, int] = "auto" + cudnn_benchmark: Annotated[bool, "cuda"] = False + tf32: Annotated[bool, "cuda"] = False + amp: Annotated[bool, "cuda"] = False + half_precision: Annotated[bool, {"cuda", "privateuseone"}] = True + cpu_offload: Annotated[str, {"cuda", "privateuseone"}] = "off" + channels_last_memory_format: bool = False + sdp_attention: Annotated[bool, {"cpu", "cuda", "mps"}] = True + batch_size: int = 1 + vae_slicing: bool = True + vae_tiling: str = "off" + vae_tile_size: int = 512 + vae_tile_blend: int = 64 + cfg_end: float = 1.0 + + cpu_only: bool = False + + @staticmethod + def infer_device() -> str: + from ...absolute_path import absolute_path + if sys.platform == "darwin": + return "mps" + elif os.path.exists(absolute_path(".python_dependencies/torch_directml")): + return "privateuseone" + else: + return "cuda" + + @classmethod + def device_supports(cls, property, device) -> bool: + annotation = cls.__annotations__.get(property, None) + if isinstance(annotation, _AnnotatedAlias): + opt_dev = annotation.__metadata__[0] + if isinstance(opt_dev, str): + return opt_dev == device + return device in opt_dev + return annotation is not None + + def can_use(self, property, device) -> bool: + return self.device_supports(property, device) and getattr(self, property) + + def can_use_half(self, device): + if self.half_precision and device == "cuda": + import torch + name = torch.cuda.get_device_name() + return not ("GTX 1650" in name or "GTX 1660" in name) + return self.can_use("half_precision", device) + + def can_use_cpu_offload(self, device): + return self.cpu_offload if self.device_supports("cpu_offload", device) else "off" + + def apply(self, pipeline, device): + """ + Apply the optimizations to a diffusers pipeline. + + All exceptions are ignored to make this more general purpose across different pipelines. + """ + import torch + + torch.backends.cudnn.benchmark = self.can_use("cudnn_benchmark", device) + torch.backends.cuda.matmul.allow_tf32 = self.can_use("tf32", device) + + try: + if self.can_use("sdp_attention", device): + from diffusers.models.cross_attention import AttnProcessor2_0 + pipeline.unet.set_attn_processor(AttnProcessor2_0()) + elif self.can_use("attention_slicing", device): + pipeline.enable_attention_slicing(self.attention_slice_size) + else: + pipeline.disable_attention_slicing() # will also disable AttnProcessor2_0 + except: pass + + try: + if pipeline.device != pipeline._execution_device: + pass # pipeline is already offloaded, offloading again can cause `pipeline._execution_device` to be incorrect + elif self.can_use_cpu_offload(device) == "model": + # adapted from diffusers.StableDiffusionPipeline.enable_model_cpu_offload() to allow DirectML device and unimplemented pipelines + from accelerate import cpu_offload_with_hook + + hook = None + models = [pipeline.text_encoder, pipeline.unet, pipeline.vae] + if hasattr(pipeline, "controlnet"): + models.append(pipeline.controlnet) + for cpu_offloaded_model in models: + _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) + + # FIXME: due to the safety checker not running it prevents the VAE from being offloaded, uncomment when safety checker is enabled + # if pipeline.safety_checker is not None: + # _, hook = cpu_offload_with_hook(pipeline.safety_checker, device, prev_module_hook=hook) + + # We'll offload the last model manually. + pipeline.final_offload_hook = hook + elif self.can_use_cpu_offload(device) == "submodule": + # adapted from diffusers.StableDiffusionPipeline.enable_sequential_cpu_offload() to allow DirectML device and unimplemented pipelines + from accelerate import cpu_offload + + models = [pipeline.text_encoder, pipeline.unet, pipeline.vae] + if hasattr(pipeline, "controlnet"): + models.append(pipeline.controlnet) + for cpu_offloaded_model in models: + cpu_offload(cpu_offloaded_model, device) + + if pipeline.safety_checker is not None: + cpu_offload(pipeline.safety_checker.vision_model, device, offload_buffers=True) + except: pass + + try: + if self.can_use("channels_last_memory_format", device): + pipeline.unet.to(memory_format=torch.channels_last) + else: + pipeline.unet.to(memory_format=torch.contiguous_format) + except: pass + + try: + if self.can_use("vae_slicing", device): + # Not many pipelines implement the enable_vae_slicing()/disable_vae_slicing() + # methods but all they do is forward their call to the vae anyway. + pipeline.vae.enable_slicing() + else: + pipeline.vae.disable_slicing() + except: pass + + try: + if self.vae_tiling != "off": + if not isinstance(pipeline.decode_latents, functools.partial): + pipeline.decode_latents = functools.partial(tiled_decode_latents.__get__(pipeline), pre_patch=pipeline.decode_latents) + pipeline.decode_latents.keywords['optimizations'] = self + elif self.vae_tiling == "off" and isinstance(pipeline.decode_latents, functools.partial): + pipeline.decode_latents = pipeline.decode_latents.keywords["pre_patch"] + except: pass + + from .. import directml_patches + if device == "privateuseone": + directml_patches.enable(pipeline) + else: + directml_patches.disable(pipeline) + + return pipeline \ No newline at end of file diff --git a/generator_process/models/scheduler.py b/generator_process/models/scheduler.py new file mode 100644 index 00000000..73120826 --- /dev/null +++ b/generator_process/models/scheduler.py @@ -0,0 +1,48 @@ +import enum + +class Scheduler(enum.Enum): + DDIM = "DDIM" + DDPM = "DDPM" + DEIS_MULTISTEP = "DEIS Multistep" + DPM_SOLVER_MULTISTEP = "DPM Solver Multistep" + DPM_SOLVER_SINGLESTEP = "DPM Solver Singlestep" + EULER_DISCRETE = "Euler Discrete" + EULER_ANCESTRAL_DISCRETE = "Euler Ancestral Discrete" + HEUN_DISCRETE = "Heun Discrete" + KDPM2_DISCRETE = "KDPM2 Discrete" # Non-functional on mps + KDPM2_ANCESTRAL_DISCRETE = "KDPM2 Ancestral Discrete" + LMS_DISCRETE = "LMS Discrete" + PNDM = "PNDM" + + def create(self, pipeline, pretrained): + import diffusers + def scheduler_class(): + match self: + case Scheduler.DDIM: + return diffusers.schedulers.DDIMScheduler + case Scheduler.DDPM: + return diffusers.schedulers.DDPMScheduler + case Scheduler.DEIS_MULTISTEP: + return diffusers.schedulers.DEISMultistepScheduler + case Scheduler.DPM_SOLVER_MULTISTEP: + return diffusers.schedulers.DPMSolverMultistepScheduler + case Scheduler.DPM_SOLVER_SINGLESTEP: + return diffusers.schedulers.DPMSolverSinglestepScheduler + case Scheduler.EULER_DISCRETE: + return diffusers.schedulers.EulerDiscreteScheduler + case Scheduler.EULER_ANCESTRAL_DISCRETE: + return diffusers.schedulers.EulerAncestralDiscreteScheduler + case Scheduler.HEUN_DISCRETE: + return diffusers.schedulers.HeunDiscreteScheduler + case Scheduler.KDPM2_DISCRETE: + return diffusers.schedulers.KDPM2DiscreteScheduler + case Scheduler.KDPM2_ANCESTRAL_DISCRETE: + return diffusers.schedulers.KDPM2AncestralDiscreteScheduler + case Scheduler.LMS_DISCRETE: + return diffusers.schedulers.LMSDiscreteScheduler + case Scheduler.PNDM: + return diffusers.schedulers.PNDMScheduler + if pretrained is not None: + return scheduler_class().from_pretrained(pretrained['model_path'], subfolder=pretrained['subfolder']) + else: + return scheduler_class().from_config(pipeline.scheduler.config) \ No newline at end of file diff --git a/property_groups/dream_prompt.py b/property_groups/dream_prompt.py index d319255d..8485a34c 100644 --- a/property_groups/dream_prompt.py +++ b/property_groups/dream_prompt.py @@ -16,7 +16,13 @@ from functools import reduce -scheduler_options = [(scheduler.value, scheduler.value, '') for scheduler in Scheduler] +from .. import api + +def scheduler_options(self, context): + return [ + (scheduler, scheduler, '') + for scheduler in self.get_backend().list_schedulers(context) + ] step_preview_mode_options = [(mode.value, mode.value, '') for mode in StepPreviewMode] @@ -69,35 +75,15 @@ def modify_action_source_type(self, context): ] def model_options(self, context): - def model_case(model, i): - return ( - model.model_base, - model.model_base.replace('models--', '').replace('--', '/'), - ModelType[model.model_type].name, - i - ) - models = {} - for i, model in enumerate(context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models): - if model.model_type in {ModelType.CONTROL_NET.name, ModelType.UNKNOWN.name}: - continue - if model.model_type not in models: - models[model.model_type] = [model_case(model, i)] - else: - models[model.model_type].append(model_case(model, i)) - return reduce( - lambda a, b: a + [None] + sorted(b, key=lambda m: m[0]), - [ - models[group] - for group in sorted(models.keys()) - ], - [] - ) + return [ + None if model is None else (model.id, model.name, model.description) + for model in self.get_backend().list_models(context) + ] def backend_options(self, context): - from .. import api return [ - (f"{backend.__module__}.{backend.__name__}", backend.name if hasattr(backend, "name") else backend.__name__, backend.description if hasattr(backend, "description") else "") - for backend in api.Backend.__subclasses__() + (backend._id(), backend.name if hasattr(backend, "name") else backend.__name__, backend.description if hasattr(backend, "description") else "") + for backend in api.Backend._list_backends() ] def seed_clamp(self, ctx): @@ -161,58 +147,6 @@ def seed_clamp(self, ctx): "hash": StringProperty(name="Image Hash"), } -default_optimizations = Optimizations() -def optimization(optim, property=None, **kwargs): - if "name" not in kwargs: - kwargs["name"] = optim.replace('_', ' ').title() - if "default" not in kwargs: - kwargs["default"] = getattr(default_optimizations, optim) - if property is None: - match kwargs["default"]: - case bool(): - property = BoolProperty - case int(): - property = IntProperty - case float(): - property = FloatProperty - case _: - raise TypeError(f"{optim} cannot infer optimization property from {type(kwargs['default'])}") - attributes[f"optimizations_{optim}"] = property(**kwargs) - -optimization("attention_slicing", description="Computes attention in several steps. Saves some memory in exchange for a small speed decrease") -optimization("attention_slice_size_src", property=EnumProperty, items=( - ("auto", "Automatic", "Computes attention in two steps", 1), - ("manual", "Manual", "Computes attention in `attention_head_dim // size` steps. A smaller `size` saves more memory.\n" - "`attention_head_dim` must be a multiple of `size`, otherwise the image won't generate properly.\n" - "`attention_head_dim` can be found within the model snapshot's unet/config.json file", 2), -), default=1, name="Attention Slice Size") -optimization("attention_slice_size", default=1, min=1) -optimization("cudnn_benchmark", name="cuDNN Benchmark", description="Allows cuDNN to benchmark multiple convolution algorithms and select the fastest") -optimization("tf32", name="TF32", description="Utilizes tensor cores on Ampere (RTX 30xx) or newer GPUs for matrix multiplications.\nHas no effect if half precision is enabled") -optimization("half_precision", description="Reduces memory usage and increases speed in exchange for a slight loss in image quality.\nHas no effect if CPU only is enabled or using a GTX 16xx GPU") -optimization("cpu_offload", property=EnumProperty, items=( - ("off", "Off", "", 0), - ("model", "Model", "Some memory savings with minimal speed penalty", 1), - ("submodule", "Submodule", "Better memory savings with large speed penalty", 2) -), default=0, name="CPU Offload", description="Dynamically moves models in and out of device memory for reduced memory usage with reduced speed") -optimization("channels_last_memory_format", description="An alternative way of ordering NCHW tensors that may be faster or slower depending on the device") -optimization("sdp_attention", name="SDP Attention", - description="Scaled dot product attention requires less memory and often comes with a good speed increase.\n" - "Prompt recall may not produce the exact same image, but usually only minor noise differences.\n" - "Overrides attention slicing") -optimization("batch_size", default=1, min=1, description="Improves speed when using iterations or upscaling in exchange for higher memory usage.\nHighly recommended to use with VAE slicing enabled") -optimization("vae_slicing", name="VAE Slicing", description="Reduces memory usage of batched VAE decoding. Has no effect if batch size is 1.\nMay have a small performance improvement with large batches") -optimization("vae_tiling", property=EnumProperty, items=( - ("off", "Off", "", 0), - ("half", "Half", "Uses tiles of half the selected model's default size. Likely to cause noticeably inaccurate colors", 1), - ("full", "Full", "Uses tiles of the selected model's default size, intended for use where image size is manually set higher. May cause slightly inaccurate colors", 2), - ("manual", "Manual", "", 3) -), default=0, name="VAE Tiling", description="Decodes generated images in tiled regions to reduce memory usage in exchange for longer decode time and less accurate colors.\nCan allow for generating larger images that would otherwise run out of memory on the final step") -optimization("vae_tile_size", min=1, name="VAE Tile Size", description="Width and height measurement of tiles. Smaller sizes are more likely to cause inaccurate colors and other undesired artifacts") -optimization("vae_tile_blend", min=0, name="VAE Tile Blend", description="Minimum amount of how much each edge of a tile will intersect its adjacent tile") -optimization("cfg_end", name="CFG End", min=0, max=1, description="The percentage of steps to complete before disabling classifier-free guidance") -optimization("cpu_only", name="CPU Only", description="Disables GPU acceleration and is extremely slow") - def map_structure_token_items(value): return (value[0], value[1], '') for structure in prompt_structures: @@ -305,9 +239,13 @@ def generate_args(self): del args['control_nets'] return args +def get_backend(self) -> api.Backend: + return getattr(self, api.Backend._lookup(self.backend)._attribute()) + DreamPrompt.generate_prompt = generate_prompt DreamPrompt.get_prompt_subject = get_prompt_subject DreamPrompt.get_seed = get_seed DreamPrompt.get_optimizations = get_optimizations DreamPrompt.generate_args = generate_args -DreamPrompt.validate = validate \ No newline at end of file +DreamPrompt.validate = validate +DreamPrompt.get_backend = get_backend \ No newline at end of file diff --git a/ui/panels/dream_texture.py b/ui/panels/dream_texture.py index 13d6d3aa..5d4dc464 100644 --- a/ui/panels/dream_texture.py +++ b/ui/panels/dream_texture.py @@ -17,6 +17,7 @@ from ...generator_process.actions.prompt_to_image import Optimizations from ...generator_process.actions.detect_seamless import SeamlessAxes from ...generator_process.models import FixItError +from ... import api def dream_texture_panels(): for space_type in SPACE_TYPES: @@ -294,19 +295,8 @@ def draw(self, context): layout.use_property_split = True prompt = get_prompt(context) - inferred_device = Optimizations.infer_device() - if prompt.optimizations_cpu_only: - inferred_device = "cpu" - def optimization(prop): - if Optimizations.device_supports(prop, inferred_device): - layout.prop(prompt, f"optimizations_{prop}") - - optimization("cudnn_benchmark") - optimization("tf32") - optimization("half_precision") - optimization("channels_last_memory_format") - optimization("batch_size") - optimization("cfg_end") + backend: api.Backend = prompt.get_backend() + backend.draw_speed_optimizations(layout, context) yield SpeedOptimizationPanel class MemoryOptimizationPanel(sub_panel): @@ -321,26 +311,8 @@ def draw(self, context): layout.use_property_split = True prompt = get_prompt(context) - inferred_device = Optimizations.infer_device() - if prompt.optimizations_cpu_only: - inferred_device = "cpu" - def optimization(prop): - if Optimizations.device_supports(prop, inferred_device): - layout.prop(prompt, f"optimizations_{prop}") - - optimization("attention_slicing") - slice_size_row = layout.row() - slice_size_row.prop(prompt, "optimizations_attention_slice_size_src") - if prompt.optimizations_attention_slice_size_src == 'manual': - slice_size_row.prop(prompt, "optimizations_attention_slice_size", text="Size") - optimization("sdp_attention") - optimization("cpu_offload") - optimization("cpu_only") - optimization("vae_slicing") - optimization("vae_tiling") - if prompt.optimizations_vae_tiling == "manual": - optimization("vae_tile_size") - optimization("vae_tile_blend") + backend: api.Backend = prompt.get_backend() + backend.draw_memory_optimizations(layout, context) yield MemoryOptimizationPanel def actions_panel(sub_panel, space_type, get_prompt): From 867755ead0b6d79e7c8225448c778b554cdd6bd8 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Fri, 28 Apr 2023 14:25:15 -0400 Subject: [PATCH 3/8] Functional text to image --- api/models/prompt.py | 5 +- diffusers_backend.py | 8 +- .../models/image_generation_result.py | 2 + operators/dream_texture.py | 248 +++++------------- operators/view_history.py | 2 +- property_groups/dream_prompt.py | 119 +++++++-- 6 files changed, 175 insertions(+), 209 deletions(-) diff --git a/api/models/prompt.py b/api/models/prompt.py index f2fe2c45..3affcf00 100644 --- a/api/models/prompt.py +++ b/api/models/prompt.py @@ -1,6 +1,7 @@ from dataclasses import dataclass +from typing import List @dataclass class Prompt: - positive: str - negative: str | None \ No newline at end of file + positive: str | List[str] + negative: str | List[str] | None \ No newline at end of file diff --git a/diffusers_backend.py b/diffusers_backend.py index c4841152..1200f17a 100644 --- a/diffusers_backend.py +++ b/diffusers_backend.py @@ -109,7 +109,7 @@ def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, in gen = Generator.shared() common_kwargs = { 'model': model.id, - 'scheduler': Scheduler[scheduler], + 'scheduler': Scheduler(scheduler), 'optimizations': self.optimizations(), 'prompt': prompt.positive, 'steps': steps, @@ -120,12 +120,16 @@ def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, in 'use_negative_prompt': prompt.negative is not None, 'negative_prompt': prompt.negative or "", 'seamless_axes': seamless_axes, - 'iterations': 1, + 'iterations': iterations, 'step_preview_mode': step_preview_mode, } future: Future match task: case PromptToImage(): + print(common_kwargs) + import pickle + del common_kwargs['optimizations'].__annotations__ + print(pickle.dumps(common_kwargs)) future = gen.prompt_to_image(**common_kwargs) case ImageToImage(image=image, strength=strength, fit=fit): future = gen.image_to_image(image=image, fit=fit, strength=strength, **common_kwargs) diff --git a/generator_process/models/image_generation_result.py b/generator_process/models/image_generation_result.py index ae5edf8c..a9690c65 100644 --- a/generator_process/models/image_generation_result.py +++ b/generator_process/models/image_generation_result.py @@ -2,6 +2,8 @@ import math from dataclasses import dataclass from numpy.typing import NDArray +import numpy as np +from ...api.models.step_preview_mode import StepPreviewMode @dataclass class ImageGenerationResult: diff --git a/operators/dream_texture.py b/operators/dream_texture.py index db8caee3..8b7de597 100644 --- a/operators/dream_texture.py +++ b/operators/dream_texture.py @@ -1,15 +1,13 @@ import bpy import hashlib import numpy as np -import math +from typing import List from .notify_result import NotifyResult -from ..preferences import StableDiffusionPreferences from ..pil_to_image import * from ..prompt_engineering import * from ..generator_process import Generator -from ..generator_process.actions.prompt_to_image import ImageGenerationResult -from ..generator_process.actions.huggingface_hub import ModelType +from .. import api import time def bpy_image(name, width, height, pixels, existing_image): @@ -41,44 +39,27 @@ def poll(cls, context): return Generator.shared().can_use() def execute(self, context): + screen = context.screen + scene = context.scene + prompt = scene.dream_textures_prompt + backend: api.Backend = prompt.get_backend() + history_template = {prop: getattr(context.scene.dream_textures_prompt, prop) for prop in context.scene.dream_textures_prompt.__annotations__.keys()} history_template["iterations"] = 1 history_template["random_seed"] = False + is_file_batch = context.scene.dream_textures_prompt.prompt_structure == file_batch_structure.id file_batch_lines = [] - file_batch_lines_negative = [] if is_file_batch: context.scene.dream_textures_prompt.iterations = 1 file_batch_lines = [line.body for line in context.scene.dream_textures_prompt_file.lines if len(line.body.strip()) > 0] - file_batch_lines_negative = [""] * len(file_batch_lines) history_template["prompt_structure"] = custom_structure.id node_tree = context.material.node_tree if hasattr(context, 'material') and hasattr(context.material, 'node_tree') else None node_tree_center = np.array(node_tree.view_center) if node_tree is not None else None - screen = context.screen - scene = context.scene - - generated_args = scene.dream_textures_prompt.generate_args() - context.scene.seamless_result.update_args(generated_args) + context.scene.seamless_result.update_args(history_template, as_id=True) - init_image = None - if generated_args['use_init_img']: - match generated_args['init_img_src']: - case 'file': - init_image = scene.init_img - case 'open_editor': - for area in screen.areas: - if area.type == 'IMAGE_EDITOR': - if area.spaces.active.image is not None: - init_image = area.spaces.active.image - if init_image is not None: - init_image = np.flipud( - (np.array(init_image.pixels) * 255) - .astype(np.uint8) - .reshape((init_image.size[1], init_image.size[0], init_image.channels)) - ) - # Setup the progress indicator def step_progress_update(self, context): if hasattr(context.area, "regions"): @@ -86,166 +67,77 @@ def step_progress_update(self, context): if region.type == "UI": region.tag_redraw() return None - bpy.types.Scene.dream_textures_progress = bpy.props.IntProperty(name="", default=0, min=0, max=generated_args['steps'], update=step_progress_update) + bpy.types.Scene.dream_textures_progress = bpy.props.IntProperty(name="", default=0, min=0, max=prompt.steps, update=step_progress_update) scene.dream_textures_info = "Starting..." last_data_block = None execution_start = time.time() - def step_callback(_, step_image: ImageGenerationResult): + def step_callback(progress: api.GenerationResult): nonlocal last_data_block scene.dream_textures_last_execution_time = f"{time.time() - execution_start:.2f} seconds" - if step_image.final: - return - scene.dream_textures_progress = step_image.step - if len(step_image.images) > 0: - image = step_image.tile_images() - last_data_block = bpy_image(f"Step {step_image.step}/{generated_args['steps']}", image.shape[1], image.shape[0], image.ravel(), last_data_block) - for area in screen.areas: - if area.type == 'IMAGE_EDITOR': - area.spaces.active.image = last_data_block - - iteration = 0 - iteration_limit = len(file_batch_lines) if is_file_batch else generated_args['iterations'] - iteration_square = math.ceil(math.sqrt(iteration_limit)) - def done_callback(future): - nonlocal last_data_block - nonlocal iteration - if hasattr(gen, '_active_generation_future'): - del gen._active_generation_future - result: ImageGenerationResult = future.result(last_only=True) - for i, result_image in enumerate(result.images): - seed = result.seeds[i] - prompt_string = context.scene.dream_textures_prompt.prompt_structure_token_subject - seed_str_length = len(str(seed)) - trim_aware_name = (prompt_string[:54 - seed_str_length] + '..') if len(prompt_string) > 54 else prompt_string - name_with_trimmed_prompt = f"{trim_aware_name} ({seed})" - image = bpy_image(name_with_trimmed_prompt, result_image.shape[1], result_image.shape[0], result_image.ravel(), last_data_block) - last_data_block = None - if node_tree is not None: - nodes = node_tree.nodes - texture_node = nodes.new("ShaderNodeTexImage") - texture_node.image = image - texture_node.location = node_tree_center + ((iteration % iteration_square) * 260, -(iteration // iteration_square) * 297) - nodes.active = texture_node - for area in screen.areas: - if area.type == 'IMAGE_EDITOR': - area.spaces.active.image = image - scene.dream_textures_prompt.seed = str(seed) # update property in case seed was sourced randomly or from hash - # create a hash from the Blender image datablock to use as unique ID of said image and store it in the prompt history - # and as custom property of the image. Needs to be a string because the int from the hash function is too large - image_hash = hashlib.sha256((np.array(image.pixels) * 255).tobytes()).hexdigest() - image['dream_textures_hash'] = image_hash - scene.dream_textures_prompt.hash = image_hash - history_entry = context.scene.dream_textures_history.add() - for key, value in history_template.items(): - match key: - case 'control_nets': - for net in value: - n = history_entry.control_nets.add() - for prop in n.__annotations__.keys(): - setattr(n, prop, getattr(net, prop)) - case _: - setattr(history_entry, key, value) - history_entry.seed = str(seed) - history_entry.hash = image_hash - if is_file_batch: - history_entry.prompt_structure_token_subject = file_batch_lines[iteration] - iteration += 1 - if iteration < iteration_limit and not future.cancelled: - generate_next() + scene.dream_textures_progress = scene.dream_textures_progress + 1 + last_data_block = bpy_image(f"Step {scene.dream_textures_progress}/{prompt.steps}", progress.image.shape[1], progress.image.shape[0], progress.image.ravel(), last_data_block) + for area in screen.areas: + if area.type == 'IMAGE_EDITOR': + area.spaces.active.image = last_data_block + + def callback(result: List[api.GenerationResult] | Exception): + if isinstance(result, Exception): + scene.dream_textures_info = "" + scene.dream_textures_progress = 0 + eval('bpy.ops.' + NotifyResult.bl_idname)('INVOKE_DEFAULT', exception=repr(result)) + raise result else: + nonlocal last_data_block + for i, generation in enumerate(result): + # Create a trimmed image name + prompt_string = context.scene.dream_textures_prompt.prompt_structure_token_subject + seed_str_length = len(str(generation.seed)) + trim_aware_name = (prompt_string[:54 - seed_str_length] + '..') if len(prompt_string) > 54 else prompt_string + name_with_trimmed_prompt = f"{trim_aware_name} ({generation.seed})" + + # Create the image datablock + image = bpy_image(name_with_trimmed_prompt, generation.image.shape[1], generation.image.shape[0], generation.image.ravel(), last_data_block) + last_data_block = None + + # Add Image Texture node + if node_tree is not None: + nodes = node_tree.nodes + texture_node = nodes.new("ShaderNodeTexImage") + texture_node.image = image + texture_node.location = node_tree_center + (i * 260, -i * 297) + nodes.active = texture_node + + # Open the image in any active image editors + for area in screen.areas: + if area.type == 'IMAGE_EDITOR': + area.spaces.active.image = image + scene.dream_textures_prompt.seed = str(generation.seed) # update property in case seed was sourced randomly or from hash + + # create a hash from the Blender image datablock to use as unique ID of said image and store it in the prompt history + # and as custom property of the image. Needs to be a string because the int from the hash function is too large + image_hash = hashlib.sha256((np.array(image.pixels) * 255).tobytes()).hexdigest() + image['dream_textures_hash'] = image_hash + scene.dream_textures_prompt.hash = image_hash + history_entry = context.scene.dream_textures_history.add() + for key, value in history_template.items(): + match key: + case 'control_nets': + for net in value: + n = history_entry.control_nets.add() + for prop in n.__annotations__.keys(): + setattr(n, prop, getattr(net, prop)) + case _: + setattr(history_entry, key, value) + history_entry.seed = str(generation.seed) + history_entry.hash = image_hash + if is_file_batch: + history_entry.prompt_structure_token_subject = file_batch_lines[i] scene.dream_textures_info = "" scene.dream_textures_progress = 0 + + backend.generate(**prompt.generate_args(context), step_callback=step_callback, callback=callback) - def exception_callback(_, exception): - scene.dream_textures_info = "" - scene.dream_textures_progress = 0 - if hasattr(gen, '_active_generation_future'): - del gen._active_generation_future - eval('bpy.ops.' + NotifyResult.bl_idname)('INVOKE_DEFAULT', exception=repr(exception)) - raise exception - - original_prompt = generated_args["prompt"] - original_negative_prompt = generated_args["negative_prompt"] - gen = Generator.shared() - def generate_next(): - batch_size = min(generated_args["optimizations"].batch_size, iteration_limit-iteration) - if generated_args['pipeline'] == Pipeline.STABILITY_SDK: - # Stability SDK is able to accept a list of prompts, but I can - # only ever get it to generate multiple of the first one. - batch_size = 1 - if is_file_batch: - generated_args["prompt"] = file_batch_lines[iteration: iteration+batch_size] - generated_args["negative_prompt"] = file_batch_lines_negative[iteration: iteration+batch_size] - else: - generated_args["prompt"] = [original_prompt] * batch_size - generated_args["negative_prompt"] = [original_negative_prompt] * batch_size - if len(generated_args['control_net']) > 0: - f = gen.control_net( - image=init_image, - inpaint=generated_args['init_img_action'] == 'inpaint', - **generated_args - ) - elif init_image is not None: - match generated_args['init_img_action']: - case 'modify': - models = list(filter( - lambda m: m.model_base == generated_args['model'], - context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.installed_models - )) - supports_depth = generated_args['pipeline'].depth() and len(models) > 0 and ModelType[models[0].model_type] == ModelType.DEPTH - def require_depth(): - if not supports_depth: - raise ValueError("Selected pipeline and model do not support depth conditioning. Please select a different model, such as 'stable-diffusion-2-depth' or change the 'Image Type' to 'Color'.") - match generated_args['modify_action_source_type']: - case 'color': - f = gen.image_to_image( - image=init_image, - **generated_args - ) - case 'depth_generated': - require_depth() - f = gen.depth_to_image( - image=init_image, - depth=None, - **generated_args, - ) - case 'depth_map': - require_depth() - f = gen.depth_to_image( - image=init_image, - depth=np.array(scene.init_depth.pixels) - .astype(np.float32) - .reshape((scene.init_depth.size[1], scene.init_depth.size[0], scene.init_depth.channels)), - **generated_args, - ) - case 'depth': - require_depth() - f = gen.depth_to_image( - image=None, - depth=np.flipud(init_image.astype(np.float32) / 255.), - **generated_args, - ) - case 'inpaint': - f = gen.inpaint( - image=init_image, - **generated_args - ) - case 'outpaint': - f = gen.outpaint( - image=init_image, - **generated_args - ) - else: - f = gen.prompt_to_image( - **generated_args, - ) - gen._active_generation_future = f - f.call_done_on_exception = False - f.add_response_callback(step_callback) - f.add_exception_callback(exception_callback) - f.add_done_callback(done_callback) - generate_next() return {"FINISHED"} def kill_generator(context=bpy.context): diff --git a/operators/view_history.py b/operators/view_history.py index 45440c03..ea5e4025 100644 --- a/operators/view_history.py +++ b/operators/view_history.py @@ -12,7 +12,7 @@ def draw_item(self, context, layout, data, item, icon, active_data, active_propn layout.label(text=f"{item.seed}", translate=False) layout.label(text=f"{item.width}x{item.height}", translate=False) layout.label(text=f"{item.steps} steps", translate=False) - layout.label(text=next(x for x in scheduler_options if x[0] == item.scheduler)[1], translate=False) + layout.label(text=item.scheduler, translate=False) elif self.layout_type == 'GRID': layout.alignment = 'CENTER' layout.label(text="", icon_value=icon) diff --git a/property_groups/dream_prompt.py b/property_groups/dream_prompt.py index 8485a34c..70af9580 100644 --- a/property_groups/dream_prompt.py +++ b/property_groups/dream_prompt.py @@ -211,32 +211,99 @@ def get_optimizations(self: DreamPrompt): optimizations.attention_slice_size = 'auto' return optimizations -def generate_args(self): - args = { key: getattr(self, key) for key in DreamPrompt.__annotations__ } - if not args['use_negative_prompt']: - args['negative_prompt'] = None - args['prompt'] = self.generate_prompt() - args['seed'] = self.get_seed() - args['optimizations'] = self.get_optimizations() - args['scheduler'] = Scheduler(args['scheduler']) - args['step_preview_mode'] = StepPreviewMode(args['step_preview_mode']) - args['outpaint_origin'] = (args['outpaint_origin'][0], args['outpaint_origin'][1]) - args['key'] = bpy.context.preferences.addons[StableDiffusionPreferences.bl_idname].preferences.dream_studio_key - args['seamless_axes'] = SeamlessAxes(args['seamless_axes']) - args['width'] = args['width'] if args['use_size'] else None - args['height'] = args['height'] if args['use_size'] else None - - args['control_net'] = [net.control_net for net in args['control_nets']] - args['controlnet_conditioning_scale'] = [net.conditioning_scale for net in args['control_nets']] - args['control'] = [ - np.flipud( - np.array(net.control_image.pixels) - .reshape((net.control_image.size[1], net.control_image.size[0], net.control_image.channels)) - ) - for net in args['control_nets'] - if net.control_image is not None - ] - del args['control_nets'] +def generate_args(self, context, iteration=0): + is_file_batch = self.prompt_structure == file_batch_structure.id + file_batch_lines = [] + file_batch_lines_negative = [] + if is_file_batch: + file_batch_lines = [line.body for line in context.scene.dream_textures_prompt_file.lines if len(line.body.strip()) > 0] + file_batch_lines_negative = [""] * len(file_batch_lines) + + task: api.Task = api.PromptToImage() + if self.use_init_img: + init_image = None + match self.init_img_src: + case 'file': + init_image = context.scene.init_img + case 'open_editor': + for area in context.screen.areas: + if area.type == 'IMAGE_EDITOR': + if area.spaces.active.image is not None: + init_image = area.spaces.active.image + if init_image is not None: + init_image = np.flipud( + (np.array(init_image.pixels) * 255) + .astype(np.uint8) + .reshape((init_image.size[1], init_image.size[0], init_image.channels)) + ) + match self.init_img_action: + case 'modify': + match self.modify_action_source_type: + case 'color': + task = api.ImageToImage( + image=init_image, + strength=self.strength, + fit=self.fit + ) + case 'depth_generated': + task = api.DepthToImage( + depth=None, + image=init_image, + strength=self.strength + ) + case 'depth_map': + task = api.DepthToImage( + depth=np.array(context.scene.init_depth.pixels) + .astype(np.float32) + .reshape((scene.init_depth.size[1], scene.init_depth.size[0], scene.init_depth.channels)), + image=init_image, + strength=self.strength + ) + case 'depth': + task = api.DepthToImage( + image=None, + depth=np.flipud(init_image.astype(np.float32) / 255.), + strength=self.strength + ) + case 'inpaint': + task = api.Inpaint( + image=init_image, + strength=self.strength, + fit=self.fit, + mask_source=api.Inpaint.MaskSource.ALPHA if self.inpaint_mask_src == 'alpha' else api.Inpaint.MaskSource.PROMPT, + mask_prompt=self.text_mask, + confidence=self.text_mask_confidence + ) + case 'outpaint': + task = api.Outpaint( + image=init_image, + origin=(self.outpaint_origin[0], self.outpaint_origin[1]) + ) + + args = { + 'task': task, + 'model': next(model for model in self.get_backend().list_models(context) if model is not None and model.id == self.model), + 'prompt': api.Prompt( + file_batch_lines if is_file_batch else self.generate_prompt(), + file_batch_lines_negative if is_file_batch else (self.negative_prompt if self.use_negative_prompt else None) + ), + 'size': (self.width, self.height) if self.use_size else None, + 'seed': self.get_seed(), + 'steps': self.steps, + 'guidance_scale': self.cfg_scale, + 'scheduler': self.scheduler, + 'seamless_axes': SeamlessAxes(self.seamless_axes), + 'step_preview_mode': StepPreviewMode(self.step_preview_mode), + 'iterations': self.iterations + } + # args['control'] = [ + # np.flipud( + # np.array(net.control_image.pixels) + # .reshape((net.control_image.size[1], net.control_image.size[0], net.control_image.channels)) + # ) + # for net in args['control_nets'] + # if net.control_image is not None + # ] return args def get_backend(self) -> api.Backend: From a11bc18158a1433de1c817022ec1cd37cb6a97b4 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Fri, 28 Apr 2023 14:42:21 -0400 Subject: [PATCH 4/8] Add test backend addon --- api/backend/backend.py | 5 +++++ community_backends/test.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 community_backends/test.py diff --git a/api/backend/backend.py b/api/backend/backend.py index b7a594ab..3beb5285 100644 --- a/api/backend/backend.py +++ b/api/backend/backend.py @@ -36,6 +36,11 @@ def generate( def register(cls): from ...property_groups.dream_prompt import DreamPrompt setattr(DreamPrompt, cls._attribute(), bpy.props.PointerProperty(type=cls)) + + @classmethod + def unregister(cls): + from ...property_groups.dream_prompt import DreamPrompt + delattr(DreamPrompt, cls._attribute()) @classmethod def _id(cls) -> str: diff --git a/community_backends/test.py b/community_backends/test.py new file mode 100644 index 00000000..663ada05 --- /dev/null +++ b/community_backends/test.py @@ -0,0 +1,33 @@ +bl_info = { + "name": "Test Backend", + "blender": (3, 1, 0), + "category": "Paint", +} + +import bpy +from typing import List, Tuple +from dream_textures.api import * + +class TestBackend(Backend): + name = "Test" + description = "A short description of this backend" + + custom_optimization: bpy.props.BoolProperty(name="My Custom Optimization") + + def list_models(self, context) -> List[Model]: + return [] + + def list_schedulers(self, context) -> List[str]: + return [] + + def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, int] | None, seed: int, steps: int, guidance_scale: float, scheduler: str, seamless_axes: SeamlessAxes, step_preview_mode: StepPreviewMode, iterations: int, step_callback: StepCallback, callback: Callback): + raise NotImplementedError() + + def draw_speed_optimizations(self, layout, context): + layout.prop(self, "custom_optimization") + +def register(): + bpy.utils.register_class(TestBackend) + +def unregister(): + bpy.utils.unregister_class(TestBackend) From db77f56eeb100e1aa5a1071bce9634ac9bcb1de9 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Sat, 29 Apr 2023 15:08:41 -0400 Subject: [PATCH 5/8] Fix optimization eval --- diffusers_backend.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/diffusers_backend.py b/diffusers_backend.py index 1200f17a..6ee93b23 100644 --- a/diffusers_backend.py +++ b/diffusers_backend.py @@ -99,7 +99,7 @@ def list_schedulers(self, context) -> List[str]: def optimizations(self) -> Optimizations: optimizations = Optimizations() for prop in dir(self): - if hasattr(optimizations, prop): + if hasattr(optimizations, prop) and not prop.startswith('__'): setattr(optimizations, prop, getattr(self, prop)) if self.attention_slice_size_src == 'auto': optimizations.attention_slice_size = 'auto' @@ -126,10 +126,6 @@ def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, in future: Future match task: case PromptToImage(): - print(common_kwargs) - import pickle - del common_kwargs['optimizations'].__annotations__ - print(pickle.dumps(common_kwargs)) future = gen.prompt_to_image(**common_kwargs) case ImageToImage(image=image, strength=strength, fit=fit): future = gen.image_to_image(image=image, fit=fit, strength=strength, **common_kwargs) From e8113d04591b1077be05de90a1e8e6a22c3a4d86 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Fri, 5 May 2023 17:21:30 -0400 Subject: [PATCH 6/8] Add support for showing messages during generation --- api/models/generation_result.py | 28 ++++++++++++++++++++++++++-- diffusers_backend.py | 4 ++-- operators/dream_texture.py | 17 +++++++++++------ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/api/models/generation_result.py b/api/models/generation_result.py index 4b11a462..9edfb297 100644 --- a/api/models/generation_result.py +++ b/api/models/generation_result.py @@ -9,14 +9,38 @@ class GenerationResult: ```python result = GenerationResult( + progress=3, + total=5, image=np.zeros((512, 512, 3)), seed=42 ) ``` + + Alternatively, create a result with just a `title` and progress values. + + ```python + result = GenerationResult( + progress=3, + total=5, + title="Loading model" + ) + ``` """ - image: NDArray + + progress: int + """The amount out of `total` that has been completed""" + + total: int + """The number of steps to complete""" + + title: str | None = None + """The name of the currently executing task""" + + image: NDArray | None = None """The generated image as a Numpy array. + The shape should be `(height, width, channels)`, where `channels` is 3 or 4. """ - seed: int + + seed: int | None = None """The seed used to generate the image.""" \ No newline at end of file diff --git a/diffusers_backend.py b/diffusers_backend.py index 6ee93b23..1ab23cb2 100644 --- a/diffusers_backend.py +++ b/diffusers_backend.py @@ -157,11 +157,11 @@ def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, in case _: raise NotImplementedError() def on_step(_, step_image: ImageGenerationResult): - step_callback(GenerationResult(image=step_image.images[-1], seed=step_image.seeds[-1])) + step_callback(GenerationResult(progress=step_image.step, total=steps, image=step_image.images[-1], seed=step_image.seeds[-1])) def on_done(future: Future): result: ImageGenerationResult = future.result(last_only=True) callback([ - GenerationResult(image=result.images[i], seed=result.seeds[i]) + GenerationResult(progress=result.step, total=steps, image=result.images[i], seed=result.seeds[i]) for i in range(len(result.images)) ]) def on_exception(_, exception): diff --git a/operators/dream_texture.py b/operators/dream_texture.py index 8b7de597..16b558f7 100644 --- a/operators/dream_texture.py +++ b/operators/dream_texture.py @@ -74,12 +74,14 @@ def step_progress_update(self, context): execution_start = time.time() def step_callback(progress: api.GenerationResult): nonlocal last_data_block - scene.dream_textures_last_execution_time = f"{time.time() - execution_start:.2f} seconds" - scene.dream_textures_progress = scene.dream_textures_progress + 1 - last_data_block = bpy_image(f"Step {scene.dream_textures_progress}/{prompt.steps}", progress.image.shape[1], progress.image.shape[0], progress.image.ravel(), last_data_block) - for area in screen.areas: - if area.type == 'IMAGE_EDITOR': - area.spaces.active.image = last_data_block + scene.dream_textures_last_execution_time = progress.title or f"{time.time() - execution_start:.2f} seconds" + bpy.types.Scene.dream_textures_progress = bpy.props.IntProperty(name="", min=0, max=progress.total, update=step_progress_update) + scene.dream_textures_progress = progress.progress + if progress.image is not None: + last_data_block = bpy_image(f"Step {scene.dream_textures_progress}/{prompt.steps}", progress.image.shape[1], progress.image.shape[0], progress.image.ravel(), last_data_block) + for area in screen.areas: + if area.type == 'IMAGE_EDITOR': + area.spaces.active.image = last_data_block def callback(result: List[api.GenerationResult] | Exception): if isinstance(result, Exception): @@ -90,6 +92,9 @@ def callback(result: List[api.GenerationResult] | Exception): else: nonlocal last_data_block for i, generation in enumerate(result): + if generation.image is None or generation.seed is None: + continue + # Create a trimmed image name prompt_string = context.scene.dream_textures_prompt.prompt_structure_token_subject seed_str_length = len(str(generation.seed)) From a729b5673bc8ddfaec8afc43b6f1a0c4bc0be1f6 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Fri, 5 May 2023 16:55:01 -0400 Subject: [PATCH 7/8] Add CoreML backend --- .gitignore | 1 + community_backends/coreml/README.md | 4 + community_backends/coreml/__init__.py | 80 ++++++++++++++++ community_backends/coreml/actor.py | 103 +++++++++++++++++++++ community_backends/coreml/preferences.py | 7 ++ community_backends/coreml/requirements.txt | 3 + scripts/package_backend.py | 34 +++++++ 7 files changed, 232 insertions(+) create mode 100644 community_backends/coreml/README.md create mode 100644 community_backends/coreml/__init__.py create mode 100644 community_backends/coreml/actor.py create mode 100644 community_backends/coreml/preferences.py create mode 100644 community_backends/coreml/requirements.txt create mode 100644 scripts/package_backend.py diff --git a/.gitignore b/.gitignore index 98fd9f00..7ee64e06 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .DS_Store +__packaged__ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/community_backends/coreml/README.md b/community_backends/coreml/README.md new file mode 100644 index 00000000..9e2a7cb1 --- /dev/null +++ b/community_backends/coreml/README.md @@ -0,0 +1,4 @@ +# CoreML Backend +Faster inference on Apple Silicon with [apple/ml-stable-diffusion](https://github.com/apple/ml-stable-diffusion). + +Converted mlpackages are stored in the directory specified by `DREAM_TEXTURES_COREML_HOME`, or `~/.cache/dream_textures_coreml` by default. \ No newline at end of file diff --git a/community_backends/coreml/__init__.py b/community_backends/coreml/__init__.py new file mode 100644 index 00000000..d7a3d2db --- /dev/null +++ b/community_backends/coreml/__init__.py @@ -0,0 +1,80 @@ +bl_info = { + "name": "CoreML Backend", + "blender": (3, 1, 0), + "category": "Paint", +} + +from multiprocessing import current_process +import site +import sys +import os + +def _load_dependencies(): + site.addsitedir(os.path.join(os.path.dirname(os.path.realpath(__file__)), ".python_dependencies")) + deps = sys.path.pop(-1) + sys.path.insert(0, deps) + +if current_process().name == "__actor__": + _load_dependencies() +else: + import bpy + from typing import Tuple + from dream_textures.api import Backend, Task, Model, Prompt, SeamlessAxes, StepPreviewMode, StepCallback, Callback + from dream_textures.diffusers_backend import DiffusersBackend + from .actor import CoreMLActor + + class CoreMLBackend(Backend): + name = "CoreML" + description = "CPU/GPU/NE accelerated generation on Apple Silicon" + + compute_unit: bpy.props.EnumProperty( + name="Compute Unit", + items=( + ('ALL', 'All', 'Use all compute units available, including the neural engine'), + ('CPU_ONLY', 'CPU', 'Limit the model to only use the CPU'), + ('CPU_AND_GPU', 'CPU and GPU', 'Use both the CPU and GPU, but not the neural engine'), + ('CPU_AND_NE', 'CPU and NE', 'Use both the CPU and neural engine, but not the GPU'), + ) + ) + + def list_models(self, context): + return DiffusersBackend.list_models(self, context) + + def list_schedulers(self, context): + return [ + "DDIM", + "DPM Solver Multistep", + "Euler Ancestral Discrete", + "Euler Discrete", + "LMS Discrete", + "PNDM" + ] + + def draw_speed_optimizations(self, layout, context): + layout.prop(self, "compute_unit") + + def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, int] | None, seed: int, steps: int, guidance_scale: float, scheduler: str, seamless_axes: SeamlessAxes, step_preview_mode: StepPreviewMode, iterations: int, step_callback: StepCallback, callback: Callback): + gen: CoreMLActor = CoreMLActor.shared() + gen.generate( + model=model.id.replace('models--', '').replace('--', '/'), + prompt=prompt.positive, + negative_prompt=prompt.negative, + size=size, + seed=seed, + steps=steps, + guidance_scale=guidance_scale, + scheduler=scheduler, + seamless_axes=seamless_axes, + step_preview_mode=step_preview_mode, + iterations=iterations, + compute_unit=self.compute_unit, + controlnet=None, + controlnet_inputs=[] + ) + raise NotImplementedError() + + def register(): + bpy.utils.register_class(CoreMLBackend) + + def unregister(): + bpy.utils.unregister_class(CoreMLBackend) diff --git a/community_backends/coreml/actor.py b/community_backends/coreml/actor.py new file mode 100644 index 00000000..4268d92c --- /dev/null +++ b/community_backends/coreml/actor.py @@ -0,0 +1,103 @@ +import numpy as np +from numpy.typing import NDArray +from dream_textures.generator_process import Actor +import os + +class CoreMLActor(Actor): + def generate( + self, + model: str, + prompt: str, + negative_prompt: str | None, + size: tuple[int, int], + seed: int, + steps: int, + guidance_scale: float, + scheduler: str, + seamless_axes: str, + step_preview_mode: str, + iterations: int, + + compute_unit: str, + controlnet: list[str] | None, + controlnet_inputs: list[str] + ) -> NDArray: + from python_coreml_stable_diffusion import pipeline + from python_coreml_stable_diffusion import torch2coreml, unet + + np.random.seed(seed) + + # Initializing PyTorch pipe for reference configuration + from diffusers import StableDiffusionPipeline + pytorch_pipe = StableDiffusionPipeline.from_pretrained(model, + use_auth_token=True) + # There is currently no UI for this, so remove it. + # This avoids wasting time converting and loading it. + pytorch_pipe.safety_checker = None + + mlpackage_cache = os.path.expanduser( + os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "dream_textures_coreml")) + ) + mlpackage_dir = os.path.join(mlpackage_cache, model.replace('/', '_')) + + if not os.path.exists(mlpackage_dir): + os.makedirs(mlpackage_dir, exist_ok=True) + class ConversionArgs: + model_version = model + compute_unit = 'ALL' + latent_h = None + latent_w = None + attention_implementation = unet.ATTENTION_IMPLEMENTATION_IN_EFFECT.name + o = mlpackage_dir + check_output_correctness = False + chunk_unet = False + quantize_weights_to_8bits = False + unet_support_controlnet = False + text_encoder_vocabulary_url = "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/vocab.json" + text_encoder_merges_url = "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/merges.txt" + conversion_args = ConversionArgs() + torch2coreml.convert_vae_decoder(pytorch_pipe, conversion_args) + print("VAE decoder converted") + torch2coreml.convert_vae_encoder(pytorch_pipe, conversion_args) + print("VAE encoder converted") + torch2coreml.convert_unet(pytorch_pipe, conversion_args) + print("U-Net converted") + torch2coreml.convert_text_encoder(pytorch_pipe, conversion_args) + print("Text encoder converted") + + user_specified_scheduler = None + if scheduler is not None: + user_specified_scheduler = pipeline.SCHEDULER_MAP[ + scheduler.replace(' ', '')].from_config(pytorch_pipe.scheduler.config) + + coreml_pipe = pipeline.get_coreml_pipe( + pytorch_pipe=pytorch_pipe, + mlpackages_dir=mlpackage_dir, + model_version=model, + compute_unit=compute_unit, + scheduler_override=user_specified_scheduler, + controlnet_models=controlnet + ) + + if controlnet: + controlnet_cond = [] + for i, _ in enumerate(controlnet): + image_path = controlnet_inputs[i] + image = pipeline.prepare_controlnet_cond(image_path, coreml_pipe.height, coreml_pipe.width) + controlnet_cond.append(image) + else: + controlnet_cond = None + + # Beginning image generation. + image = coreml_pipe( + prompt=prompt, + height=coreml_pipe.height, + width=coreml_pipe.width, + num_inference_steps=steps, + guidance_scale=guidance_scale, + controlnet_cond=controlnet_cond, + negative_prompt=negative_prompt, + ) + + image["images"][0].save('test.png') + return image["images"][0] \ No newline at end of file diff --git a/community_backends/coreml/preferences.py b/community_backends/coreml/preferences.py new file mode 100644 index 00000000..a6c5d247 --- /dev/null +++ b/community_backends/coreml/preferences.py @@ -0,0 +1,7 @@ +import bpy + +class CoreMLBackendPreferences(bpy.types.AddonPreferences): + bl_idname = __package__ + + def draw(self, context): + layout = self.layout \ No newline at end of file diff --git a/community_backends/coreml/requirements.txt b/community_backends/coreml/requirements.txt new file mode 100644 index 00000000..289327f1 --- /dev/null +++ b/community_backends/coreml/requirements.txt @@ -0,0 +1,3 @@ +coremltools +git+https://github.com/apple/ml-stable-diffusion@main#egg=python_coreml_stable_diffusion +scipy \ No newline at end of file diff --git a/scripts/package_backend.py b/scripts/package_backend.py new file mode 100644 index 00000000..fe7c0289 --- /dev/null +++ b/scripts/package_backend.py @@ -0,0 +1,34 @@ +import argparse +import subprocess +from pathlib import Path +import shutil +import sys + +parser = argparse.ArgumentParser() +parser.add_argument("--backend", type=lambda p: Path(p).absolute()) +parser.add_argument("--output", type=lambda p: Path(p).absolute()) +parser.add_argument("--no-deps", action="store_true") +parser.add_argument("--install", action="store_true") + +def main(): + args = parser.parse_args() + + # Copy the files into the packaged addon + shutil.copytree(args.backend, args.output / args.backend.name, dirs_exist_ok=True) + + if args.install: + # Install the dependencies into the package. + subprocess.run( + [ + sys.executable, "-m", "pip", "install", + "-r", args.backend / "requirements.txt", + "--upgrade", + "--no-cache-dir", + "--target", args.output / args.backend.name / ".python_dependencies", + ] + (["--no-deps"] if args.no_deps else []), + check=True, + cwd=args.output + ) + +if __name__ == '__main__': + main() \ No newline at end of file From dfd064fe101ed49481cc085c85416b78ab48d906 Mon Sep 17 00:00:00 2001 From: Carson Katri Date: Sat, 6 May 2023 13:52:48 -0400 Subject: [PATCH 8/8] Improve progress indicators --- community_backends/coreml/__init__.py | 13 +- community_backends/coreml/actor.py | 252 +++++++++++++++++++------- 2 files changed, 202 insertions(+), 63 deletions(-) diff --git a/community_backends/coreml/__init__.py b/community_backends/coreml/__init__.py index d7a3d2db..16156f98 100644 --- a/community_backends/coreml/__init__.py +++ b/community_backends/coreml/__init__.py @@ -55,7 +55,7 @@ def draw_speed_optimizations(self, layout, context): def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, int] | None, seed: int, steps: int, guidance_scale: float, scheduler: str, seamless_axes: SeamlessAxes, step_preview_mode: StepPreviewMode, iterations: int, step_callback: StepCallback, callback: Callback): gen: CoreMLActor = CoreMLActor.shared() - gen.generate( + future = gen.generate( model=model.id.replace('models--', '').replace('--', '/'), prompt=prompt.positive, negative_prompt=prompt.negative, @@ -71,7 +71,16 @@ def generate(self, task: Task, model: Model, prompt: Prompt, size: Tuple[int, in controlnet=None, controlnet_inputs=[] ) - raise NotImplementedError() + def on_step(_, result): + step_callback(result) + def on_done(future): + result = future.result(last_only=True) + callback([result]) + def on_exception(_, exception): + callback(exception) + future.add_response_callback(on_step) + future.add_exception_callback(on_exception) + future.add_done_callback(on_done) def register(): bpy.utils.register_class(CoreMLBackend) diff --git a/community_backends/coreml/actor.py b/community_backends/coreml/actor.py index 4268d92c..999f6f59 100644 --- a/community_backends/coreml/actor.py +++ b/community_backends/coreml/actor.py @@ -1,19 +1,28 @@ import numpy as np from numpy.typing import NDArray from dream_textures.generator_process import Actor +from dream_textures.generator_process.future import Future +from dream_textures.generator_process.models import ImageGenerationResult +from dream_textures.api import GenerationResult import os +import random +import gc class CoreMLActor(Actor): + invalidation_args = None + cached_pipe = None + def generate( self, model: str, prompt: str, negative_prompt: str | None, - size: tuple[int, int], - seed: int, + size: tuple[int, int] | None, + seed: int | None, steps: int, guidance_scale: float, scheduler: str, + seamless_axes: str, step_preview_mode: str, iterations: int, @@ -21,83 +30,204 @@ def generate( compute_unit: str, controlnet: list[str] | None, controlnet_inputs: list[str] - ) -> NDArray: + ): + future = Future() + yield future + + import diffusers from python_coreml_stable_diffusion import pipeline from python_coreml_stable_diffusion import torch2coreml, unet - + import torch + from PIL import ImageOps + + seed = random.randrange(0, np.iinfo(np.uint32).max) if seed is None else seed np.random.seed(seed) - # Initializing PyTorch pipe for reference configuration - from diffusers import StableDiffusionPipeline - pytorch_pipe = StableDiffusionPipeline.from_pretrained(model, - use_auth_token=True) - # There is currently no UI for this, so remove it. - # This avoids wasting time converting and loading it. - pytorch_pipe.safety_checker = None - - mlpackage_cache = os.path.expanduser( - os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "dream_textures_coreml")) - ) - mlpackage_dir = os.path.join(mlpackage_cache, model.replace('/', '_')) - - if not os.path.exists(mlpackage_dir): - os.makedirs(mlpackage_dir, exist_ok=True) - class ConversionArgs: - model_version = model - compute_unit = 'ALL' - latent_h = None - latent_w = None - attention_implementation = unet.ATTENTION_IMPLEMENTATION_IN_EFFECT.name - o = mlpackage_dir - check_output_correctness = False - chunk_unet = False - quantize_weights_to_8bits = False - unet_support_controlnet = False - text_encoder_vocabulary_url = "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/vocab.json" - text_encoder_merges_url = "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/merges.txt" - conversion_args = ConversionArgs() - torch2coreml.convert_vae_decoder(pytorch_pipe, conversion_args) - print("VAE decoder converted") - torch2coreml.convert_vae_encoder(pytorch_pipe, conversion_args) - print("VAE encoder converted") - torch2coreml.convert_unet(pytorch_pipe, conversion_args) - print("U-Net converted") - torch2coreml.convert_text_encoder(pytorch_pipe, conversion_args) - print("Text encoder converted") - - user_specified_scheduler = None - if scheduler is not None: - user_specified_scheduler = pipeline.SCHEDULER_MAP[ - scheduler.replace(' ', '')].from_config(pytorch_pipe.scheduler.config) - - coreml_pipe = pipeline.get_coreml_pipe( - pytorch_pipe=pytorch_pipe, - mlpackages_dir=mlpackage_dir, - model_version=model, - compute_unit=compute_unit, - scheduler_override=user_specified_scheduler, - controlnet_models=controlnet - ) + new_invalidation_args = (model, scheduler, controlnet) + if self.cached_pipe is None or new_invalidation_args != self.invalidation_args: + self.invalidation_args = new_invalidation_args + + + future.add_response(GenerationResult(progress=1, total=1, title="Loading reference pipeline")) + + # Initializing PyTorch pipe for reference configuration + from diffusers import StableDiffusionPipeline + pytorch_pipe = StableDiffusionPipeline.from_pretrained(model, + use_auth_token=True) + # There is currently no UI for this, so remove it. + # This avoids wasting time converting and loading it. + pytorch_pipe.safety_checker = None + + mlpackage_cache = os.path.expanduser( + os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "dream_textures_coreml")) + ) + mlpackage_dir = os.path.join(mlpackage_cache, model.replace('/', '_')) + + if not os.path.exists(mlpackage_dir): + def step_title(i, model_type): + future.add_response(GenerationResult(progress=i, total=4, title=f"Converting model to CoreML ({model_type})")) + os.makedirs(mlpackage_dir, exist_ok=True) + class ConversionArgs: + model_version = model + compute_unit = 'ALL' + latent_h = None + latent_w = None + attention_implementation = unet.ATTENTION_IMPLEMENTATION_IN_EFFECT.name + o = mlpackage_dir + check_output_correctness = False + chunk_unet = False + quantize_weights_to_8bits = False + unet_support_controlnet = False + text_encoder_vocabulary_url = "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/vocab.json" + text_encoder_merges_url = "https://huggingface.co/openai/clip-vit-base-patch32/resolve/main/merges.txt" + conversion_args = ConversionArgs() + + step_title(1, "VAE decoder") + torch2coreml.convert_vae_decoder(pytorch_pipe, conversion_args) + + step_title(2, "VAE encoder") + torch2coreml.convert_vae_encoder(pytorch_pipe, conversion_args) + + step_title(3, "U-Net") + torch2coreml.convert_unet(pytorch_pipe, conversion_args) + + step_title(4, "text encoder") + torch2coreml.convert_text_encoder(pytorch_pipe, conversion_args) + + future.add_response(GenerationResult(progress=0, total=1, title=f"Loading converted CoreML pipeline")) + + user_specified_scheduler = None + if scheduler is not None: + user_specified_scheduler = pipeline.SCHEDULER_MAP[ + scheduler.replace(' ', '')].from_config(pytorch_pipe.scheduler.config) + + # NOTE: Modified to have a `callback` parameter. + def get_coreml_pipe(pytorch_pipe, + mlpackages_dir, + model_version, + compute_unit, + delete_original_pipe=True, + scheduler_override=None, + controlnet_models=None, + callback=lambda model_name: None): + """ Initializes and returns a `CoreMLStableDiffusionPipeline` from an original + diffusers PyTorch pipeline + """ + # Ensure `scheduler_override` object is of correct type if specified + if scheduler_override is not None: + assert isinstance(scheduler_override, diffusers.SchedulerMixin) + pipeline.logger.warning( + "Overriding scheduler in pipeline: " + f"Default={pytorch_pipe.scheduler}, Override={scheduler_override}") + + # Gather configured tokenizer and scheduler attributes from the original pipe + coreml_pipe_kwargs = { + "tokenizer": pytorch_pipe.tokenizer, + "scheduler": pytorch_pipe.scheduler if scheduler_override is None else scheduler_override, + "feature_extractor": pytorch_pipe.feature_extractor, + } + + model_names_to_load = ["text_encoder", "unet", "vae_decoder"] + if getattr(pytorch_pipe, "safety_checker", None) is not None: + model_names_to_load.append("safety_checker") + else: + pipeline.logger.warning( + f"Original diffusers pipeline for {model_version} does not have a safety_checker, " + "Core ML pipeline will mirror this behavior.") + coreml_pipe_kwargs["safety_checker"] = None + + if delete_original_pipe: + del pytorch_pipe + gc.collect() + pipeline.logger.info("Removed PyTorch pipe to reduce peak memory consumption") + + if controlnet_models: + model_names_to_load.remove("unet") + callback("control-unet") + coreml_pipe_kwargs["unet"] = pipeline._load_mlpackage( + "control-unet", + mlpackages_dir, + model_version, + compute_unit, + ) + coreml_pipe_kwargs["controlnet"] = [] + for i, model_version in enumerate(controlnet_models): + callback(f"controlnet-{i}") + coreml_pipe_kwargs["controlnet"].append( + pipeline._load_mlpackage_controlnet( + mlpackages_dir, + model_version, + compute_unit, + ) + ) + else: + coreml_pipe_kwargs["controlnet"] = None + + # Load Core ML models + pipeline.logger.info(f"Loading Core ML models in memory from {mlpackages_dir}") + def load_package_with_callback(model_name): + callback(model_name) + return pipeline._load_mlpackage( + model_name, + mlpackages_dir, + model_version, + compute_unit, + ) + coreml_pipe_kwargs.update({ + model_name: load_package_with_callback(model_name) + for model_name in model_names_to_load + }) + pipeline.logger.info("Done.") + + pipeline.logger.info("Initializing Core ML pipe for image generation") + coreml_pipe = pipeline.CoreMLStableDiffusionPipeline(**coreml_pipe_kwargs) + pipeline.logger.info("Done.") + + return coreml_pipe + + model_i = 1 + def load_callback(model_name): + nonlocal model_i + future.add_response(GenerationResult(progress=model_i, total=3 + len(controlnet_inputs), title=f"Loading {model_name} mlpackage (this can take a while)")) + model_i += 1 + self.cached_pipe = get_coreml_pipe( + pytorch_pipe=pytorch_pipe, + mlpackages_dir=mlpackage_dir, + model_version=model, + compute_unit=compute_unit, + scheduler_override=user_specified_scheduler, + controlnet_models=controlnet, + callback=load_callback + ) + + height = self.cached_pipe.height if size is None else size[1] + width = self.cached_pipe.width if size is None else size[0] if controlnet: controlnet_cond = [] for i, _ in enumerate(controlnet): image_path = controlnet_inputs[i] - image = pipeline.prepare_controlnet_cond(image_path, coreml_pipe.height, coreml_pipe.width) + image = pipeline.prepare_controlnet_cond(image_path, height, width) controlnet_cond.append(image) else: controlnet_cond = None # Beginning image generation. - image = coreml_pipe( + generator = torch.Generator(device="cpu").manual_seed(seed) + def callback(i, t, latents): + preview = ImageGenerationResult.step_preview(self, step_preview_mode, width, height, torch.from_numpy(latents), generator, i) + image = next(iter(preview.images), None) + future.add_response(GenerationResult(progress=i, total=steps, image=image, seed=seed)) + image = self.cached_pipe( prompt=prompt, - height=coreml_pipe.height, - width=coreml_pipe.width, + height=height, + width=width, num_inference_steps=steps, guidance_scale=guidance_scale, controlnet_cond=controlnet_cond, negative_prompt=negative_prompt, + callback=callback ) - image["images"][0].save('test.png') - return image["images"][0] \ No newline at end of file + future.add_response(GenerationResult(progress=steps, total=steps, image=np.asarray(ImageOps.flip(image["images"][0]).convert('RGBA'), dtype=np.float32) / 255., seed=seed)) + future.set_done() \ No newline at end of file