Skip to content

Commit

Permalink
Merge pull request #3518 from vladmandic/dev
Browse files Browse the repository at this point in the history
refresh master from dev
  • Loading branch information
vladmandic authored Oct 24, 2024
2 parents 0d332ca + ea4df70 commit 3ba9ebc
Show file tree
Hide file tree
Showing 25 changed files with 248 additions and 129 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# Change Log for SD.Next

## Update for 2024-10-24

Improvements:
- SD3 loader enhancements
- report when loading incomplete model
- handle missing model components
- handle component preloading
- OpenVINO: add accuracy option
- ZLUDA: guess GPU arch

Fixes:
- fix send-to-control
- fix k-diffusion
- fix sd3 img2img and hires
- fix ipadapter supported model detection
- fix t2iadapter auto-download
- fix omnigen dynamic attention
- handle a1111 prompt scheduling
- handle omnigen image placeholder in prompt

## Update for 2024-10-23

### Highlights for 2024-10-23
Expand Down
2 changes: 1 addition & 1 deletion modules/control/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def control_run(state: str = '',
p_extra_args = {}

if shared.sd_model is None:
shared.log.warning('Model not loaded')
shared.log.warning('Aborted: op=control model not loaded')
return [], '', '', 'Error: model not loaded'

unit_type = unit_type.strip().lower() if unit_type is not None else ''
Expand Down
37 changes: 19 additions & 18 deletions modules/control/units/t2iadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
debug = log.trace if os.environ.get('SD_CONTROL_DEBUG', None) is not None else lambda *args, **kwargs: None
debug('Trace: CONTROL')
predefined_sd15 = {
'Segment': 'TencentARC/t2iadapter_seg_sd14v1',
'Zoe Depth': 'TencentARC/t2iadapter_zoedepth_sd15v1',
'OpenPose': 'TencentARC/t2iadapter_openpose_sd14v1',
'KeyPose': 'TencentARC/t2iadapter_keypose_sd14v1',
'Color': 'TencentARC/t2iadapter_color_sd14v1',
'Depth v1': 'TencentARC/t2iadapter_depth_sd14v1',
'Depth v2': 'TencentARC/t2iadapter_depth_sd15v2',
'Canny v1': 'TencentARC/t2iadapter_canny_sd14v1',
'Canny v2': 'TencentARC/t2iadapter_canny_sd15v2',
'Sketch v1': 'TencentARC/t2iadapter_sketch_sd14v1',
'Sketch v2': 'TencentARC/t2iadapter_sketch_sd15v2',
'Segment': ('TencentARC/t2iadapter_seg_sd14v1', {}),
'Zoe Depth': ('TencentARC/t2iadapter_zoedepth_sd15v1', {}),
'OpenPose': ('TencentARC/t2iadapter_openpose_sd14v1', {}),
'KeyPose': ('TencentARC/t2iadapter_keypose_sd14v1', {}),
'Color': ('TencentARC/t2iadapter_color_sd14v1', {}),
'Depth v1': ('TencentARC/t2iadapter_depth_sd14v1', {}),
'Depth v2': ('TencentARC/t2iadapter_depth_sd15v2', {}),
'Canny v1': ('TencentARC/t2iadapter_canny_sd14v1', {}),
'Canny v2': ('TencentARC/t2iadapter_canny_sd15v2', {}),
'Sketch v1': ('TencentARC/t2iadapter_sketch_sd14v1', {}),
'Sketch v2': ('TencentARC/t2iadapter_sketch_sd15v2', {}),
# 'Coadapter Canny': 'TencentARC/T2I-Adapter/models/coadapter-canny-sd15v1.pth',
# 'Coadapter Color': 'TencentARC/T2I-Adapter/models/coadapter-color-sd15v1.pth',
# 'Coadapter Depth': 'TencentARC/T2I-Adapter/models/coadapter-depth-sd15v1.pth',
Expand All @@ -30,12 +30,12 @@
# 'Coadapter Style': 'TencentARC/T2I-Adapter/models/coadapter-style-sd15v1.pth',
}
predefined_sdxl = {
'Canny XL': 'TencentARC/t2i-adapter-canny-sdxl-1.0',
'LineArt XL': 'TencentARC/t2i-adapter-lineart-sdxl-1.0',
'Sketch XL': 'TencentARC/t2i-adapter-sketch-sdxl-1.0',
'Zoe Depth XL': 'TencentARC/t2i-adapter-depth-zoe-sdxl-1.0',
'OpenPose XL': 'TencentARC/t2i-adapter-openpose-sdxl-1.0',
'Midas Depth XL': 'TencentARC/t2i-adapter-depth-midas-sdxl-1.0',
'Canny XL': ('TencentARC/t2i-adapter-canny-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
'LineArt XL': ('TencentARC/t2i-adapter-lineart-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
'Sketch XL': ('TencentARC/t2i-adapter-sketch-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
'Zoe Depth XL': ('TencentARC/t2i-adapter-depth-zoe-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
'OpenPose XL': ('TencentARC/t2i-adapter-openpose-sdxl-1.0', { 'use_safetensors': True }),
'Midas Depth XL': ('TencentARC/t2i-adapter-depth-midas-sdxl-1.0', { 'use_safetensors': True, 'variant': 'fp16' }),
}

models = {}
Expand Down Expand Up @@ -96,7 +96,8 @@ def load(self, model_id: str = None, force: bool = True) -> str:
if model_id not in all_models:
log.error(f'Control {what} unknown model: id="{model_id}" available={list(all_models)}')
return
model_path = all_models[model_id]
model_path, model_args = all_models[model_id]
self.load_config.update(model_args)
if model_path is None:
log.error(f'Control {what} model load failed: id="{model_id}" error=unknown model id')
return
Expand Down
33 changes: 29 additions & 4 deletions modules/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ def has_xpu() -> bool:
return bool(hasattr(torch, 'xpu') and torch.xpu.is_available())


def has_zluda() -> bool:
if not cuda_ok:
return False
try:
device = torch.device("cuda")
return torch.cuda.get_device_name(device).endswith("[ZLUDA]")
except Exception:
return False


def get_backend(shared_cmd_opts):
global args # pylint: disable=global-statement
args = shared_cmd_opts
Expand All @@ -55,6 +65,8 @@ def get_backend(shared_cmd_opts):
name = 'directml'
elif has_xpu():
name = 'ipex'
elif has_zluda():
name = 'zluda'
elif torch.cuda.is_available() and torch.version.cuda:
name = 'cuda'
elif torch.cuda.is_available() and torch.version.hip:
Expand Down Expand Up @@ -109,7 +121,7 @@ def get_package_version(pkg: str):
'device': f'{torch.xpu.get_device_name(torch.xpu.current_device())} n={torch.xpu.device_count()}',
'ipex': get_package_version('intel-extension-for-pytorch'),
}
elif backend == 'cuda':
elif backend == 'cuda' or backend == 'zluda':
return {
'device': f'{torch.cuda.get_device_name(torch.cuda.current_device())} n={torch.cuda.device_count()} arch={torch.cuda.get_arch_list()[-1]} capability={torch.cuda.get_device_capability(device)}',
'cuda': torch.version.cuda,
Expand Down Expand Up @@ -267,9 +279,22 @@ def test_bf16():
global bf16_ok # pylint: disable=global-statement
if bf16_ok is not None:
return bf16_ok
if sys.platform == "darwin" or backend == 'openvino' or backend == 'directml': # override
bf16_ok = False
return bf16_ok
if opts.cuda_dtype != 'BF16': # don't override if the user sets it
if sys.platform == "darwin" or backend == 'openvino' or backend == 'directml': # override
bf16_ok = False
return bf16_ok
elif backend == 'zluda':
device_name = torch.cuda.get_device_name(device)
if device_name.startswith("AMD Radeon RX "): # only force AMD
device_name = device_name.replace("AMD Radeon RX ", "").split(" ", maxsplit=1)[0]
if len(device_name) == 4 and device_name[0] in {"5", "6"}: # RDNA 1 and 2
bf16_ok = False
return bf16_ok
elif backend == 'rocm':
gcn_arch = getattr(torch.cuda.get_device_properties(device), "gcnArchName", "gfx0000")[3:7]
if len(gcn_arch) == 4 and gcn_arch[0:2] == "10": # RDNA 1 and 2
bf16_ok = False
return bf16_ok
try:
import torch.nn.functional as F
image = torch.randn(1, 4, 32, 32).to(device=device, dtype=torch.bfloat16)
Expand Down
4 changes: 2 additions & 2 deletions modules/extra_networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def activate(p, extra_network_data, step=0):
try:
extra_network.activate(p, extra_network_args, step=step)
except Exception as e:
errors.display(e, f"activating extra network: name={extra_network_name} args:{extra_network_args}")
errors.display(e, f"Activating network: type={extra_network_name} args:{extra_network_args}")

for extra_network_name, extra_network in extra_network_registry.items():
args = extra_network_data.get(extra_network_name, None)
Expand All @@ -100,7 +100,7 @@ def activate(p, extra_network_data, step=0):
try:
extra_network.activate(p, [])
except Exception as e:
errors.display(e, f"activating extra network: name={extra_network_name}")
errors.display(e, f"Activating network: type={extra_network_name}")

if stepwise:
p.extra_network_data = extra_network_data
Expand Down
2 changes: 1 addition & 1 deletion modules/generation_parameters_copypaste.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def connect_paste_params_buttons():
if binding.source_text_component is not None and fields is not None:
connect_paste(binding.paste_button, fields, binding.source_text_component, override_settings_component, binding.tabname)
if binding.source_tabname is not None and fields is not None and binding.source_tabname in paste_fields:
paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration'] + (["Seed"] if shared.opts.send_seed else []) + binding.paste_field_names
paste_field_names = ['Prompt', 'Negative prompt', 'Steps'] + (["Seed"] if shared.opts.send_seed else []) + binding.paste_field_names
if "fields" in paste_fields[binding.source_tabname] and paste_fields[binding.source_tabname]["fields"] is not None:
binding.paste_button.click(
fn=lambda *x: x,
Expand Down
2 changes: 1 addition & 1 deletion modules/img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def img2img(id_task: str, state: str, mode: int,
*args): # pylint: disable=unused-argument

if shared.sd_model is None:
shared.log.warning('Model not loaded')
shared.log.warning('Aborted: op=img model not loaded')
return [], '', '', 'Error: model not loaded'

debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|detailer={detailer}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|resize_context={resize_context}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}')
Expand Down
5 changes: 5 additions & 0 deletions modules/intel/ipex/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ def scaled_dot_product_attention_32_bit(query, key, value, attn_mask=None, dropo
if do_split:
batch_size_attention, query_tokens, shape_three = query.shape[0], query.shape[1], query.shape[2]
hidden_states = torch.zeros(query.shape, device=query.device, dtype=query.dtype)
if attn_mask is not None and attn_mask.shape != query.shape:
if len(query.shape) == 4:
attn_mask = attn_mask.repeat((batch_size_attention // attn_mask.shape[0], query_tokens // attn_mask.shape[1], shape_three // attn_mask.shape[2], 1))
else:
attn_mask = attn_mask.repeat((batch_size_attention // attn_mask.shape[0], query_tokens // attn_mask.shape[1], shape_three // attn_mask.shape[2]))
for i in range(batch_size_attention // split_slice_size):
start_idx = i * split_slice_size
end_idx = (i + 1) * split_slice_size
Expand Down
18 changes: 14 additions & 4 deletions modules/intel/openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
from openvino.runtime import Core, Type, PartialShape, serialize
from openvino.properties import hint as ov_hints

from torch._dynamo.backends.common import fake_tensor_unsupported
from torch._dynamo.backends.registry import register_backend
Expand Down Expand Up @@ -156,7 +157,6 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
core = Core()

device = get_device()
cache_root = shared.opts.openvino_cache_path
global dont_use_4bit_nncf
global dont_use_nncf
global dont_use_quant
Expand Down Expand Up @@ -233,9 +233,14 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
else:
om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito)


hints = {}
if shared.opts.openvino_accuracy == "performance":
hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.PERFORMANCE
elif shared.opts.openvino_accuracy == "accuracy":
hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.ACCURACY
if model_hash_str is not None:
core.set_property({'CACHE_DIR': cache_root + '/blob'})
hints['CACHE_DIR'] = shared.opts.openvino_cache_path + '/blob'
core.set_property(hints)
dont_use_nncf = False
dont_use_quant = False
dont_use_4bit_nncf = False
Expand Down Expand Up @@ -286,7 +291,12 @@ def openvino_compile_cached_model(cached_model_path, *example_inputs):
else:
om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito)

core.set_property({'CACHE_DIR': shared.opts.openvino_cache_path + '/blob'})
hints = {'CACHE_DIR': shared.opts.openvino_cache_path + '/blob'}
if shared.opts.openvino_accuracy == "performance":
hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.PERFORMANCE
elif shared.opts.openvino_accuracy == "accuracy":
hints[ov_hints.execution_mode] = ov_hints.ExecutionMode.ACCURACY
core.set_property(hints)
dont_use_nncf = False
dont_use_quant = False
dont_use_4bit_nncf = False
Expand Down
6 changes: 3 additions & 3 deletions modules/ipadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ def unapply(pipe): # pylint: disable=arguments-differ

def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapter_scales=[1.0], adapter_crops=[False], adapter_starts=[0.0], adapter_ends=[1.0], adapter_images=[]):
global clip_loaded # pylint: disable=global-statement
if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
shared.log.error(f'IP adapter: model={shared.sd_model_type} class={pipe.__class__.__name__} not supported')
return False
# overrides
if hasattr(p, 'ip_adapter_names'):
if isinstance(p.ip_adapter_names, str):
Expand Down Expand Up @@ -183,9 +186,6 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
if not hasattr(pipe, 'load_ip_adapter'):
shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}')
return False
if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}')
return False

for adapter_name in adapter_names:
# which clip to use
Expand Down
16 changes: 8 additions & 8 deletions modules/model_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def load_flux_quanto(checkpoint_info):
except Exception:
shared.log.error(f"Load model: type=FLUX Failed to cast transformer to {devices.dtype}, set dtype to {transformer.dtype}")
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load Quanto transformer: {e}")
shared.log.error(f"Load model: type=FLUX failed to load Quanto transformer: {e}")
if debug:
from modules import errors
errors.display(e, 'FLUX Quanto:')
Expand All @@ -68,7 +68,7 @@ def load_flux_quanto(checkpoint_info):
except Exception:
shared.log.error(f"Load model: type=FLUX Failed to cast text encoder to {devices.dtype}, set dtype to {text_encoder_2.dtype}")
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load Quanto text encoder: {e}")
shared.log.error(f"Load model: type=FLUX failed to load Quanto text encoder: {e}")
if debug:
from modules import errors
errors.display(e, 'FLUX Quanto:')
Expand Down Expand Up @@ -100,7 +100,7 @@ def load_flux_bnb(checkpoint_info, diffusers_load_config): # pylint: disable=unu
else:
transformer = diffusers.FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config)
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load BnB transformer: {e}")
shared.log.error(f"Load model: type=FLUX failed to load BnB transformer: {e}")
transformer, text_encoder_2 = None, None
if debug:
from modules import errors
Expand Down Expand Up @@ -222,7 +222,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
shared.opts.sd_unet = 'None'
sd_unet.failed_unet.append(shared.opts.sd_unet)
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load UNet: {e}")
shared.log.error(f"Load model: type=FLUX failed to load UNet: {e}")
shared.opts.sd_unet = 'None'
if debug:
from modules import errors
Expand All @@ -236,7 +236,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
else:
text_encoder_2 = load_t5(name=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir)
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load T5: {e}")
shared.log.error(f"Load model: type=FLUX failed to load T5: {e}")
shared.opts.sd_text_encoder = 'None'
if debug:
from modules import errors
Expand All @@ -251,7 +251,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
vae_config = os.path.join('configs', 'flux', 'vae', 'config.json')
vae = diffusers.AutoencoderKL.from_single_file(vae_file, config=vae_config, **diffusers_load_config)
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load VAE: {e}")
shared.log.error(f"Load model: type=FLUX failed to load VAE: {e}")
shared.opts.sd_vae = 'None'
if debug:
from modules import errors
Expand All @@ -267,7 +267,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
if _text_encoder is not None:
text_encoder_2 = _text_encoder
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load NF4 components: {e}")
shared.log.error(f"Load model: type=FLUX failed to load NF4 components: {e}")
if debug:
from modules import errors
errors.display(e, 'FLUX NF4:')
Expand All @@ -279,7 +279,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
if _text_encoder is not None:
text_encoder_2 = _text_encoder
except Exception as e:
shared.log.error(f"Load model: type=FLUX Failed to load Quanto components: {e}")
shared.log.error(f"Load model: type=FLUX failed to load Quanto components: {e}")
if debug:
from modules import errors
errors.display(e, 'FLUX Quanto:')
Expand Down
2 changes: 1 addition & 1 deletion modules/model_flux_nf4.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def load_flux_nf4(checkpoint_info):
create_quantized_param(transformer, param, param_name, target_device=0, state_dict=original_state_dict, pre_quantized=True)
except Exception as e:
transformer, text_encoder_2 = None, None
shared.log.error(f"Load model: type=FLUX Failed to load UNET: {e}")
shared.log.error(f"Load model: type=FLUX failed to load UNET: {e}")
if debug:
from modules import errors
errors.display(e, 'FLUX:')
Expand Down
Loading

0 comments on commit 3ba9ebc

Please sign in to comment.