Merge pull request #3518 from vladmandic/dev

refresh master from dev
vladmandic · Oct 24, 2024 · 3ba9ebc · 3ba9ebc
2 parents 0d332ca + ea4df70
commit 3ba9ebc
Show file tree

Hide file tree

Showing 25 changed files with 248 additions and 129 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,25 @@
 # Change Log for SD.Next
 
+## Update for 2024-10-24
+
+Improvements:
+- SD3 loader enhancements
+  - report when loading incomplete model  
+  - handle missing model components  
+  - handle component preloading  
+- OpenVINO: add accuracy option  
+- ZLUDA: guess GPU arch  
+
+Fixes:  
+- fix send-to-control  
+- fix k-diffusion  
+- fix sd3 img2img and hires  
+- fix ipadapter supported model detection  
+- fix t2iadapter auto-download
+- fix omnigen dynamic attention  
+- handle a1111 prompt scheduling  
+- handle omnigen image placeholder in prompt  
+
 ## Update for 2024-10-23
 
 ### Highlights for 2024-10-23

diff --git a/modules/control/run.py b/modules/control/run.py
@@ -219,7 +219,7 @@ def control_run(state: str = '',
     p_extra_args = {}
 
     if shared.sd_model is None:
-        shared.log.warning('Model not loaded')
+        shared.log.warning('Aborted: op=control model not loaded')
         return [], '', '', 'Error: model not loaded'
 
     unit_type = unit_type.strip().lower() if unit_type is not None else ''

diff --git a/modules/control/units/t2iadapter.py b/modules/control/units/t2iadapter.py
@@ -11,17 +11,17 @@
 debug = log.trace if os.environ.get('SD_CONTROL_DEBUG', None) is not None else lambda *args, **kwargs: None
 debug('Trace: CONTROL')
 predefined_sd15 = {
-    'Segment': 'TencentARC/t2iadapter_seg_sd14v1',
-    'Zoe Depth': 'TencentARC/t2iadapter_zoedepth_sd15v1',
-    'OpenPose': 'TencentARC/t2iadapter_openpose_sd14v1',
-    'KeyPose': 'TencentARC/t2iadapter_keypose_sd14v1',
-    'Color': 'TencentARC/t2iadapter_color_sd14v1',
-    'Depth v1': 'TencentARC/t2iadapter_depth_sd14v1',
-    'Depth v2': 'TencentARC/t2iadapter_depth_sd15v2',
-    'Canny v1': 'TencentARC/t2iadapter_canny_sd14v1',
-    'Canny v2': 'TencentARC/t2iadapter_canny_sd15v2',
-    'Sketch v1': 'TencentARC/t2iadapter_sketch_sd14v1',
-    'Sketch v2': 'TencentARC/t2iadapter_sketch_sd15v2',
+    'Segment': ('TencentARC/t2iadapter_seg_sd14v1', {}),
+    'Zoe Depth': ('TencentARC/t2iadapter_zoedepth_sd15v1', {}),
+    'OpenPose': ('TencentARC/t2iadapter_openpose_sd14v1', {}),
+    'KeyPose': ('TencentARC/t2iadapter_keypose_sd14v1', {}),
+    'Color': ('TencentARC/t2iadapter_color_sd14v1', {}),
+    'Depth v1': ('TencentARC/t2iadapter_depth_sd14v1', {}),
+    'Depth v2': ('TencentARC/t2iadapter_depth_sd15v2', {}),
+    'Canny v1': ('TencentARC/t2iadapter_canny_sd14v1', {}),
+    'Canny v2': ('TencentARC/t2iadapter_canny_sd15v2', {}),
+    'Sketch v1': ('TencentARC/t2iadapter_sketch_sd14v1', {}),
+    'Sketch v2': ('TencentARC/t2iadapter_sketch_sd15v2', {}),
     # 'Coadapter Canny': 'TencentARC/T2I-Adapter/models/coadapter-canny-sd15v1.pth',
     # 'Coadapter Color': 'TencentARC/T2I-Adapter/models/coadapter-color-sd15v1.pth',
     # 'Coadapter Depth': 'TencentARC/T2I-Adapter/models/coadapter-depth-sd15v1.pth',
@@ -30,12 +30,12 @@
     # 'Coadapter Style': 'TencentARC/T2I-Adapter/models/coadapter-style-sd15v1.pth',
 }
 predefined_sdxl = {
-    'Canny XL': 'TencentARC/t2i-adapter-canny-sdxl-1.0',
-    'LineArt XL': 'TencentARC/t2i-adapter-lineart-sdxl-1.0',
-    'Sketch XL': 'TencentARC/t2i-adapter-sketch-sdxl-1.0',
-    'Zoe Depth XL': 'TencentARC/t2i-adapter-depth-zoe-sdxl-1.0',
-    'OpenPose XL': 'TencentARC/t2i-adapter-openpose-sdxl-1.0',
-    'Midas Depth XL': 'TencentARC/t2i-adapter-depth-midas-sdxl-1.0',
+    'Canny XL': ('TencentARC/t2i-adapter-canny-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
+    'LineArt XL': ('TencentARC/t2i-adapter-lineart-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
+    'Sketch XL': ('TencentARC/t2i-adapter-sketch-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
+    'Zoe Depth XL': ('TencentARC/t2i-adapter-depth-zoe-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
+    'OpenPose XL': ('TencentARC/t2i-adapter-openpose-sdxl-1.0', { 'use_safetensors': True }),
+    'Midas Depth XL': ('TencentARC/t2i-adapter-depth-midas-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
 }
 
 models = {}
@@ -96,7 +96,8 @@ def load(self, model_id: str = None, force: bool = True) -> str:
             if model_id not in all_models:
                 log.error(f'Control {what} unknown model: id="{model_id}" available={list(all_models)}')
                 return
-            model_path = all_models[model_id]
+            model_path, model_args = all_models[model_id]
+            self.load_config.update(model_args)
             if model_path is None:
                 log.error(f'Control {what} model load failed: id="{model_id}" error=unknown model id')
                 return

diff --git a/modules/devices.py b/modules/devices.py
@@ -46,6 +46,16 @@ def has_xpu() -> bool:
     return bool(hasattr(torch, 'xpu') and torch.xpu.is_available())
 
 
+def has_zluda() -> bool:
+    if not cuda_ok:
+        return False
+    try:
+        device = torch.device("cuda")
+        return torch.cuda.get_device_name(device).endswith("[ZLUDA]")
+    except Exception:
+        return False
+
+
 def get_backend(shared_cmd_opts):
     global args # pylint: disable=global-statement
     args = shared_cmd_opts
@@ -55,6 +65,8 @@ def get_backend(shared_cmd_opts):
         name = 'directml'
     elif has_xpu():
         name = 'ipex'
+    elif has_zluda():
+        name = 'zluda'
     elif torch.cuda.is_available() and torch.version.cuda:
         name = 'cuda'
     elif torch.cuda.is_available() and torch.version.hip:
@@ -109,7 +121,7 @@ def get_package_version(pkg: str):
                     'device': f'{torch.xpu.get_device_name(torch.xpu.current_device())} n={torch.xpu.device_count()}',
                     'ipex': get_package_version('intel-extension-for-pytorch'),
                 }
-            elif backend == 'cuda':
+            elif backend == 'cuda' or backend == 'zluda':
                 return {
                     'device': f'{torch.cuda.get_device_name(torch.cuda.current_device())} n={torch.cuda.device_count()} arch={torch.cuda.get_arch_list()[-1]} capability={torch.cuda.get_device_capability(device)}',
                     'cuda': torch.version.cuda,
@@ -267,9 +279,22 @@ def test_bf16():
     global bf16_ok # pylint: disable=global-statement
     if bf16_ok is not None:
         return bf16_ok
-    if sys.platform == "darwin" or backend == 'openvino' or backend == 'directml': # override
-        bf16_ok = False
-        return bf16_ok
+    if opts.cuda_dtype != 'BF16': # don't override if the user sets it
+        if sys.platform == "darwin" or backend == 'openvino' or backend == 'directml': # override
+            bf16_ok = False
+            return bf16_ok
+        elif backend == 'zluda':
+            device_name = torch.cuda.get_device_name(device)
+            if device_name.startswith("AMD Radeon RX "): # only force AMD
+                device_name = device_name.replace("AMD Radeon RX ", "").split(" ", maxsplit=1)[0]
+                if len(device_name) == 4 and device_name[0] in {"5", "6"}: # RDNA 1 and 2
+                    bf16_ok = False
+                    return bf16_ok
+        elif backend == 'rocm':
+            gcn_arch = getattr(torch.cuda.get_device_properties(device), "gcnArchName", "gfx0000")[3:7]
+            if len(gcn_arch) == 4 and gcn_arch[0:2] == "10": # RDNA 1 and 2
+                bf16_ok = False
+                return bf16_ok
     try:
         import torch.nn.functional as F
         image = torch.randn(1, 4, 32, 32).to(device=device, dtype=torch.bfloat16)

diff --git a/modules/extra_networks.py b/modules/extra_networks.py
@@ -91,7 +91,7 @@ def activate(p, extra_network_data, step=0):
             try:
                 extra_network.activate(p, extra_network_args, step=step)
             except Exception as e:
-                errors.display(e, f"activating extra network: name={extra_network_name} args:{extra_network_args}")
+                errors.display(e, f"Activating network: type={extra_network_name} args:{extra_network_args}")
 
         for extra_network_name, extra_network in extra_network_registry.items():
             args = extra_network_data.get(extra_network_name, None)
@@ -100,7 +100,7 @@ def activate(p, extra_network_data, step=0):
             try:
                 extra_network.activate(p, [])
             except Exception as e:
-                errors.display(e, f"activating extra network: name={extra_network_name}")
+                errors.display(e, f"Activating network: type={extra_network_name}")
 
     if stepwise:
         p.extra_network_data = extra_network_data

diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
@@ -145,7 +145,7 @@ def connect_paste_params_buttons():
         if binding.source_text_component is not None and fields is not None:
             connect_paste(binding.paste_button, fields, binding.source_text_component, override_settings_component, binding.tabname)
         if binding.source_tabname is not None and fields is not None and binding.source_tabname in paste_fields:
-            paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration'] + (["Seed"] if shared.opts.send_seed else []) + binding.paste_field_names
+            paste_field_names = ['Prompt', 'Negative prompt', 'Steps'] + (["Seed"] if shared.opts.send_seed else []) + binding.paste_field_names
             if "fields" in paste_fields[binding.source_tabname] and paste_fields[binding.source_tabname]["fields"] is not None:
                 binding.paste_button.click(
                     fn=lambda *x: x,

diff --git a/modules/img2img.py b/modules/img2img.py
@@ -141,7 +141,7 @@ def img2img(id_task: str, state: str, mode: int,
             *args): # pylint: disable=unused-argument
 
     if shared.sd_model is None:
-        shared.log.warning('Model not loaded')
+        shared.log.warning('Aborted: op=img model not loaded')
         return [], '', '', 'Error: model not loaded'
 
     debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|detailer={detailer}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|resize_context={resize_context}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}')

diff --git a/modules/intel/ipex/attention.py b/modules/intel/ipex/attention.py
@@ -136,6 +136,11 @@ def scaled_dot_product_attention_32_bit(query, key, value, attn_mask=None, dropo
     if do_split:
         batch_size_attention, query_tokens, shape_three = query.shape[0], query.shape[1], query.shape[2]
         hidden_states = torch.zeros(query.shape, device=query.device, dtype=query.dtype)
+        if attn_mask is not None and attn_mask.shape != query.shape:
+            if len(query.shape) == 4:
+                attn_mask = attn_mask.repeat((batch_size_attention // attn_mask.shape[0], query_tokens // attn_mask.shape[1], shape_three // attn_mask.shape[2], 1))
+            else:
+                attn_mask = attn_mask.repeat((batch_size_attention // attn_mask.shape[0], query_tokens // attn_mask.shape[1], shape_three // attn_mask.shape[2]))
         for i in range(batch_size_attention // split_slice_size):
             start_idx = i * split_slice_size
             end_idx = (i + 1) * split_slice_size

diff --git a/modules/intel/openvino/__init__.py b/modules/intel/openvino/__init__.py
@@ -7,6 +7,7 @@
 from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
 from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
 from openvino.runtime import Core, Type, PartialShape, serialize
+from openvino.properties import hint as ov_hints
 
 from torch._dynamo.backends.common import fake_tensor_unsupported
 from torch._dynamo.backends.registry import register_backend
@@ -156,7 +157,6 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
     core = Core()
 
     device = get_device()
-    cache_root = shared.opts.openvino_cache_path
     global dont_use_4bit_nncf
     global dont_use_nncf
     global dont_use_quant
@@ -233,9 +233,14 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
         else:
             om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito)
 
-
+    hints = {}
+    if shared.opts.openvino_accuracy == "performance":
+        hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.PERFORMANCE
+    elif shared.opts.openvino_accuracy == "accuracy":
+        hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.ACCURACY
     if model_hash_str is not None:
-        core.set_property({'CACHE_DIR': cache_root + '/blob'})
+        hints['CACHE_DIR'] = shared.opts.openvino_cache_path + '/blob'
+    core.set_property(hints)
     dont_use_nncf = False
     dont_use_quant = False
     dont_use_4bit_nncf = False
@@ -286,7 +291,12 @@ def openvino_compile_cached_model(cached_model_path, *example_inputs):
         else:
             om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito)
 
-    core.set_property({'CACHE_DIR': shared.opts.openvino_cache_path + '/blob'})
+    hints = {'CACHE_DIR': shared.opts.openvino_cache_path + '/blob'}
+    if shared.opts.openvino_accuracy == "performance":
+        hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.PERFORMANCE
+    elif shared.opts.openvino_accuracy == "accuracy":
+        hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.ACCURACY
+    core.set_property(hints)
     dont_use_nncf = False
     dont_use_quant = False
     dont_use_4bit_nncf = False

diff --git a/modules/ipadapter.py b/modules/ipadapter.py
@@ -113,6 +113,9 @@ def unapply(pipe): # pylint: disable=arguments-differ
 
 def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapter_scales=[1.0], adapter_crops=[False], adapter_starts=[0.0], adapter_ends=[1.0], adapter_images=[]):
     global clip_loaded # pylint: disable=global-statement
+    if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
+        shared.log.error(f'IP adapter: model={shared.sd_model_type} class={pipe.__class__.__name__} not supported')
+        return False
     # overrides
     if hasattr(p, 'ip_adapter_names'):
         if isinstance(p.ip_adapter_names, str):
@@ -183,9 +186,6 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
     if not hasattr(pipe, 'load_ip_adapter'):
         shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}')
         return False
-    if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
-        shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}')
-        return False
 
     for adapter_name in adapter_names:
         # which clip to use

diff --git a/modules/model_flux.py b/modules/model_flux.py
@@ -41,7 +41,7 @@ def load_flux_quanto(checkpoint_info):
             except Exception:
                 shared.log.error(f"Load model: type=FLUX Failed to cast transformer to {devices.dtype}, set dtype to {transformer.dtype}")
     except Exception as e:
-        shared.log.error(f"Load model: type=FLUX Failed to load Quanto transformer: {e}")
+        shared.log.error(f"Load model: type=FLUX failed to load Quanto transformer: {e}")
         if debug:
             from modules import errors
             errors.display(e, 'FLUX Quanto:')
@@ -68,7 +68,7 @@ def load_flux_quanto(checkpoint_info):
             except Exception:
                 shared.log.error(f"Load model: type=FLUX Failed to cast text encoder to {devices.dtype}, set dtype to {text_encoder_2.dtype}")
     except Exception as e:
-        shared.log.error(f"Load model: type=FLUX Failed to load Quanto text encoder: {e}")
+        shared.log.error(f"Load model: type=FLUX failed to load Quanto text encoder: {e}")
         if debug:
             from modules import errors
             errors.display(e, 'FLUX Quanto:')
@@ -100,7 +100,7 @@ def load_flux_bnb(checkpoint_info, diffusers_load_config): # pylint: disable=unu
         else:
             transformer = diffusers.FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config)
     except Exception as e:
-        shared.log.error(f"Load model: type=FLUX Failed to load BnB transformer: {e}")
+        shared.log.error(f"Load model: type=FLUX failed to load BnB transformer: {e}")
         transformer, text_encoder_2 = None, None
         if debug:
             from modules import errors
@@ -222,7 +222,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
                 shared.opts.sd_unet = 'None'
                 sd_unet.failed_unet.append(shared.opts.sd_unet)
         except Exception as e:
-            shared.log.error(f"Load model: type=FLUX Failed to load UNet: {e}")
+            shared.log.error(f"Load model: type=FLUX failed to load UNet: {e}")
             shared.opts.sd_unet = 'None'
             if debug:
                 from modules import errors
@@ -236,7 +236,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
             else:
                 text_encoder_2 = load_t5(name=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir)
         except Exception as e:
-            shared.log.error(f"Load model: type=FLUX Failed to load T5: {e}")
+            shared.log.error(f"Load model: type=FLUX failed to load T5: {e}")
             shared.opts.sd_text_encoder = 'None'
             if debug:
                 from modules import errors
@@ -251,7 +251,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
                 vae_config = os.path.join('configs', 'flux', 'vae', 'config.json')
                 vae = diffusers.AutoencoderKL.from_single_file(vae_file, config=vae_config, **diffusers_load_config)
         except Exception as e:
-            shared.log.error(f"Load model: type=FLUX Failed to load VAE: {e}")
+            shared.log.error(f"Load model: type=FLUX failed to load VAE: {e}")
             shared.opts.sd_vae = 'None'
             if debug:
                 from modules import errors
@@ -267,7 +267,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
             if _text_encoder is not None:
                 text_encoder_2 = _text_encoder
         except Exception as e:
-            shared.log.error(f"Load model: type=FLUX Failed to load NF4 components: {e}")
+            shared.log.error(f"Load model: type=FLUX failed to load NF4 components: {e}")
             if debug:
                 from modules import errors
                 errors.display(e, 'FLUX NF4:')
@@ -279,7 +279,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
             if _text_encoder is not None:
                 text_encoder_2 = _text_encoder
         except Exception as e:
-            shared.log.error(f"Load model: type=FLUX Failed to load Quanto components: {e}")
+            shared.log.error(f"Load model: type=FLUX failed to load Quanto components: {e}")
             if debug:
                 from modules import errors
                 errors.display(e, 'FLUX Quanto:')

diff --git a/modules/model_flux_nf4.py b/modules/model_flux_nf4.py
@@ -200,7 +200,7 @@ def load_flux_nf4(checkpoint_info):
                 create_quantized_param(transformer, param, param_name, target_device=0, state_dict=original_state_dict, pre_quantized=True)
     except Exception as e:
         transformer, text_encoder_2 = None, None
-        shared.log.error(f"Load model: type=FLUX Failed to load UNET: {e}")
+        shared.log.error(f"Load model: type=FLUX failed to load UNET: {e}")
         if debug:
             from modules import errors
             errors.display(e, 'FLUX:')