diff --git a/.gitignore b/.gitignore index 3e5d7807e..2bc819860 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ venv .history cache **/.DS_Store +tunableop_results*.csv # all models and temp files *.log diff --git a/.pylintrc b/.pylintrc index ceb7c1e94..00a59df29 100644 --- a/.pylintrc +++ b/.pylintrc @@ -23,7 +23,9 @@ ignore-paths=/usr/lib/.*$, modules/todo, modules/unipc, modules/xadapter, + modules/dcsolver, repositories, + modules/prompt_parser_xhinker.py, extensions-builtin/sd-webui-agent-scheduler, extensions-builtin/sd-extension-chainner/nodes, extensions-builtin/sdnext-modernui/node_modules, @@ -135,6 +137,7 @@ disable=bad-inline-option, consider-using-get, consider-using-in, consider-using-min-builtin, + consider-using-max-builtin, consider-using-sys-exit, dangerous-default-value, deprecated-pragma, diff --git a/.ruff.toml b/.ruff.toml index ea91ba5a5..52499859d 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -13,11 +13,14 @@ exclude = [ "modules/todo", "modules/unipc", "modules/xadapter", + "modules/dcsolver", "modules/intel/openvino", "modules/intel/ipex", "modules/segmoe", "modules/control/proc", "modules/control/units", + "modules/prompt_parser_xhinker.py", + "modules/postprocess/aurasr_arch.py", "repositories", "extensions-builtin/sd-extension-chainner/nodes", "extensions-builtin/sd-webui-agent-scheduler", @@ -80,4 +83,4 @@ line-ending = "auto" docstring-code-format = false [lint.mccabe] -max-complexity = 99 +max-complexity = 150 diff --git a/.vscode/settings.json b/.vscode/settings.json index 428fc2335..690477350 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,5 +8,6 @@ "./repositories/taming" ], "python.analysis.typeCheckingMode": "off", - "editor.formatOnSave": false + "editor.formatOnSave": false, + "python.REPL.enableREPLSmartSend": false } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index b8444c168..188f7d2ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,150 @@ # Change Log for SD.Next +## Update for 2024-09-13 + +### Highlights for 2024-09-13 + +Major refactor of [FLUX.1](https://blackforestlabs.ai/announcing-black-forest-labs/) support: +- Full **ControlNet** support, better **LoRA** support, full **prompt attention** implementation +- Faster execution, more flexible loading, additional quantization options, and more... +- Added **image-to-image**, **inpaint**, **outpaint**, **hires** modes +- Added workflow where FLUX can be used as **refiner** for other models +- Since both *Optimum-Quanto* and *BitsAndBytes* libraries are limited in their platform support matrix, + try enabling **NNCF** for quantization/compression on-the-fly! + +Few image related goodies... +- **Context-aware** resize that allows for *img2img/inpaint* even at massively different aspect ratios without distortions! +- **LUT Color grading** apply professional color grading to your images using industry-standard *.cube* LUTs! +- Auto **HDR** image create for SD and SDXL with both 16ch true-HDR and 8-ch HDR-effect images ;) + +And few video related goodies... +- [CogVideoX](https://huggingface.co/THUDM/CogVideoX-5b) **2b** and **5b** variants + with support for *text-to-video* and *video-to-video*! +- [AnimateDiff](https://github.com/guoyww/animatediff/) **prompt travel** and **long context windows**! + create video which travels between different prompts and at long video lengths! + +Plus tons of other items and fixes - see [changelog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) for details! +Examples: +- Built-in prompt-enhancer, TAESD optimizations, new DC-Solver scheduler, global XYZ grid management, etc. +- Updates to ZLUDA, IPEX, OpenVINO... + +### Details for 2024-09-13 + +**Major refactor of FLUX.1 support:** +- allow configuration of individual FLUX.1 model components: *transformer, text-encoder, vae* + model load will load selected components first and then initialize model using pre-loaded components + components that were not pre-loaded will be downloaded and initialized as needed + as usual, components can also be loaded after initial model load + *note*: use of transformer/unet is recommended as those are flux.1 finetunes + *note*: manually selecting vae and text-encoder is not recommended + *note*: mix-and-match of different quantizations for different components can lead to unexpected errors + - transformer/unet is list of manually downloaded safetensors + - vae is list of manually downloaded safetensors + - text-encoder is list of predefined and manually downloaded text-encoders +- **controlnet** support: + support for **InstantX/Shakker-Labs** models including [Union-Pro](InstantX/FLUX.1-dev-Controlnet-Union) + note that flux controlnet models are large, up to 6.6GB on top of already large base model! + as such, you may need to use offloading:sequential which is not as fast, but uses far less memory + when using union model, you must also select control mode in the control unit + flux does not yet support *img2img* so to use controlnet, you need to set contronet input via control unit override +- model support loading **all-in-one** safetensors + not recommended due to massive duplication of components, but added due to popular demand + each such model is 20-32GB in size vs ~11GB for typical unet fine-tune +- improve logging, warn when attempting to load unet as base model +- **refiner** support + FLUX.1 can be used as refiner for other models such as sd/sdxl + simply load sd/sdxl model as base and flux model as refiner and use as usual refiner workflow +- **img2img**, **inpaint** and **outpaint** support + *note* flux may require higher denoising strength than typical sd/sdxl models + *note*: img2img is not yet supported with controlnet +- transformer/unet support *fp8/fp4* quantization + this brings supported quants to: *nf4/fp8/fp4/qint8/qint4* +- vae support *fp16* +- **lora** support additional training tools +- **face-hires** support +- support **fuse-qkv** projections + can speed up generate + enable via *settings -> compute -> fused projections* + +**Other improvements & Fixes:** +- [CogVideoX](https://huggingface.co/THUDM/CogVideoX-5b) + - support for both **2B** and **5B** variations + - support for both **text2video** and **video2video** modes + - simply select in *scripts -> cogvideox* + - as with any video modules, includes additional frame interpolation using RIFE + - if init video is used, it will be automatically resized and interpolated to desired number of frames +- **AnimateDiff**: + - **prompt travel** + create video which travels between different prompts at different steps! + example prompt: + > 0: dog + > 5: cat + > 10: bird + - support for **v3** model (finally) + - support for **LCM** model + - support for **free-noise** rolling context window + allow for creation of much longer videos, automatically enabled if frames > 16 +- **Context-aware** image resize, thanks @AI-Casanova! + based on [seam-carving](https://github.com/li-plus/seam-carving) + allows for *img2img/inpaint* even at massively different aspect ratios without distortions! + simply select as resize method when using *img2img* or *control* tabs +- **HDR** high-dynamic-range image create for SD and SDXL + create hdr images from in multiple exposures by latent-space modifications during generation + use via *scripts -> hdr* + option *save hdr images* creates images in standard 8bit/channel (hdr-effect) *and* 16bit/channel (full-hdr) PNG format + ui result is always 8bit/channel hdr-effect image plus grid of original images used to create hdr + grid image can be disabled via settings -> user interface -> show grid + actual full-hdr image is not displayed in ui, only optionally saved to disk +- new scheduler: [DC Solver](https://github.com/wl-zhao/DC-Solver) +- **color grading** apply professional color grading to your images + using industry-standard *.cube* LUTs! + enable via *scripts -> color-grading* +- **hires** workflow now allows for full resize options + not just limited width/height/scale +- **xyz grid** is now availabe as both local and global script! +- **prompt enhance**: improve quality and/or verbosity of your prompts + simply select in *scripts -> prompt enhance* + uses [gokaygokay/Flux-Prompt-Enhance](https://huggingface.co/gokaygokay/Flux-Prompt-Enhance) model +- **taesd** configurable number of layers + can be used to speed-up taesd decoding by reducing number of ops + e.g. if generating 1024px image, reducing layers by 1 will result in preview being 512px + set via *settings -> live preview -> taesd decode layers* +- **xhinker** prompt parser handle offloaded models +- **control** better handle offloading +- **upscale** will use resize-to if set to non-zero values over resize-by + applies to any upscale options, including refine workflow +- **networks** add option to choose if mouse-over on network should attempt to fetch additional info + option:`extra_networks_fetch` enable/disable in *settings -> networks* +- speed up some garbage collection ops +- sampler settings add **dynamic shift** + used by flow-matching samplers to adjust between structure and details +- sampler settings force base shift + improves quality of the flow-matching samplers +- **t5** support manually downloaded models + applies to all models that use t5 transformer +- **modern-ui** add override field +- full **lint** updates +- use `diffusers` from main branch, no longer tied to release +- improve diffusers/transformers/huggingface_hub progress reporting +- use unique identifiers for all ui components +- **visual query** (a.ka vqa or vlm) added support for several models + - [MiaoshouAI PromptGen 1.5 Base](https://huggingface.co/MiaoshouAI/Florence-2-base-PromptGen-v1.5) + - [MiaoshouAI PromptGen 1.5 Large](https://huggingface.co/MiaoshouAI/Florence-2-large-PromptGen-v1.5) + - [CogFlorence 2.2 Large](https://huggingface.co/thwri/CogFlorence-2.2-Large) +- **modernui** update +- **zluda** update to 3.8.4, thanks @lshqqytiger! +- **ipex** update to 2.3.110+xpu on linux, thanks @Disty0! +- **openvino** update to 2024.3.0, thanks @Disty0! +- update `requirements` +- fix **AuraFlow** +- fix handling of model configs if offline config is not available +- fix vae decode in backend original +- fix model path typos +- fix guidance end handler +- fix script sorting +- fix vae dtype during load +- fix all ui labels are unique + ## Update for 2024-08-31 ### Highlights for 2024-08-31 diff --git a/TODO.md b/TODO.md index 61dc038d5..5726e67da 100644 --- a/TODO.md +++ b/TODO.md @@ -4,13 +4,9 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma ## Future Candidates -- cogvideo-x: -- animatediff prompt-travel: - async lowvram: - fp8: - ipadapter-negative: https://github.com/huggingface/diffusers/discussions/7167 -- hd-painter: https://github.com/huggingface/diffusers/blob/main/examples/community/README.md#hd-painter -- init latents: variations, img2img - include reference styles ### Missing diff --git a/cli/image-grid.py b/cli/image-grid.py index 8a48aecd9..f6f8ed755 100755 --- a/cli/image-grid.py +++ b/cli/image-grid.py @@ -52,7 +52,8 @@ def grid(images, labels = None, width = 0, height = 0, border = 0, square = Fals h = round(height / rows) size = tuple(size) image = Image.new('RGB', size = size, color = 'black') # pylint: disable=redefined-outer-name - font = ImageFont.truetype('DejaVuSansMono', round(w / 40)) + font_size = round(w / 40) if params.font == 0 else params.font + font = ImageFont.truetype('DejaVuSansMono', font_size) for i, img in enumerate(images): # pylint: disable=redefined-outer-name x = (i % cols * w) + (i % cols * border) y = (i // cols * h) + (i // cols * border) @@ -76,6 +77,7 @@ def grid(images, labels = None, width = 0, height = 0, border = 0, square = Fals parser.add_argument("--width", type = int, default = 0, required = False, help = "fixed grid width") parser.add_argument("--height", type = int, default = 0, required = False, help = "fixed grid height") parser.add_argument("--border", type = int, default = 0, required = False, help = "image border") + parser.add_argument("--font", type = int, default = 0, required = False, help = "font text size") parser.add_argument('--nolabels', default = False, action='store_true', help = "do not print image labels") parser.add_argument('--debug', default = False, action='store_true', help = "print extra debug information") parser.add_argument('output', type = str) diff --git a/extensions-builtin/Lora/lora_patches.py b/extensions-builtin/Lora/lora_patches.py index 7b0916e3a..532782c80 100644 --- a/extensions-builtin/Lora/lora_patches.py +++ b/extensions-builtin/Lora/lora_patches.py @@ -22,8 +22,8 @@ def apply(self): return if "Model" in shared.opts.optimum_quanto_weights or "Text Encoder" in shared.opts.optimum_quanto_weights: from optimum import quanto - self.QLinear_forward = patches.patch(__name__, quanto.nn.QLinear, 'forward', networks.network_QLinear_forward) - self.QConv2d_forward = patches.patch(__name__, quanto.nn.QConv2d, 'forward', networks.network_QConv2d_forward) + self.QLinear_forward = patches.patch(__name__, quanto.nn.QLinear, 'forward', networks.network_QLinear_forward) # pylint: disable=attribute-defined-outside-init + self.QConv2d_forward = patches.patch(__name__, quanto.nn.QConv2d, 'forward', networks.network_QConv2d_forward) # pylint: disable=attribute-defined-outside-init self.Linear_forward = patches.patch(__name__, torch.nn.Linear, 'forward', networks.network_Linear_forward) self.Linear_load_state_dict = patches.patch(__name__, torch.nn.Linear, '_load_from_state_dict', networks.network_Linear_load_state_dict) self.Conv2d_forward = patches.patch(__name__, torch.nn.Conv2d, 'forward', networks.network_Conv2d_forward) @@ -44,8 +44,8 @@ def undo(self): return if "Model" in shared.opts.optimum_quanto_weights or "Text Encoder" in shared.opts.optimum_quanto_weights: from optimum import quanto - self.QLinear_forward = patches.undo(__name__, quanto.nn.QLinear, 'forward') # pylint: disable=E1128 - self.QConv2d_forward = patches.undo(__name__, quanto.nn.QConv2d, 'forward') # pylint: disable=E1128 + self.QLinear_forward = patches.undo(__name__, quanto.nn.QLinear, 'forward') # pylint: disable=E1128, attribute-defined-outside-init + self.QConv2d_forward = patches.undo(__name__, quanto.nn.QConv2d, 'forward') # pylint: disable=E1128, attribute-defined-outside-init self.Linear_forward = patches.undo(__name__, torch.nn.Linear, 'forward') # pylint: disable=E1128 self.Linear_load_state_dict = patches.undo(__name__, torch.nn.Linear, '_load_from_state_dict') # pylint: disable=E1128 self.Conv2d_forward = patches.undo(__name__, torch.nn.Conv2d, 'forward') # pylint: disable=E1128 diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py index 847d46632..1bc7a74f7 100644 --- a/extensions-builtin/Lora/networks.py +++ b/extensions-builtin/Lora/networks.py @@ -97,10 +97,17 @@ def load_diffusers(name, network_on_disk, lora_scale=1.0) -> network.Network: try: shared.sd_model.load_lora_weights(network_on_disk.filename, adapter_name=name) except Exception as e: - errors.display(e, "LoRA") - return None - diffuser_loaded.append(name) - diffuser_scales.append(lora_scale) + if 'already in use' in str(e): + # shared.log.warning(f"LoRA load failed: file={network_on_disk.filename} {e}") + pass + else: + shared.log.error(f"LoRA load failed: file={network_on_disk.filename} {e}") + if debug: + errors.display(e, "LoRA") + return None + if name not in diffuser_loaded: + diffuser_loaded.append(name) + diffuser_scales.append(lora_scale) net = network.Network(name, network_on_disk) net.mtime = os.path.getmtime(network_on_disk.filename) # lora_cache[name] = net @@ -199,7 +206,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No if recompile_model: backup_cuda_compile = shared.opts.cuda_compile sd_models.unload_model_weights(op='model') - shared.opts.cuda_compile = False + shared.opts.cuda_compile = [] sd_models.reload_model_weights(op='model') shared.opts.cuda_compile = backup_cuda_compile @@ -254,7 +261,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No if recompile_model: shared.log.info("LoRA recompiling model") backup_lora_model = shared.compiled_model_state.lora_model - if shared.opts.cuda_compile: + if 'Model' in shared.opts.cuda_compile: shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model) shared.compiled_model_state.lora_model = backup_lora_model diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui index c84d677e0..2c95d480d 160000 --- a/extensions-builtin/sdnext-modernui +++ b/extensions-builtin/sdnext-modernui @@ -1 +1 @@ -Subproject commit c84d677e0c2df4aabe556dc3b40d5fed024e4cc1 +Subproject commit 2c95d480d63d46232122ddbd4161b73cba8c258a diff --git a/installer.py b/installer.py index cff0fd110..f932f04a8 100644 --- a/installer.py +++ b/installer.py @@ -25,6 +25,7 @@ class Dot(dict): # dot notation access to dictionary attributes version = None current_branch = None log = logging.getLogger("sd") +console = None debug = log.debug if os.environ.get('SD_INSTALL_DEBUG', None) is not None else lambda *args, **kwargs: None pip_log = '--log pip.log ' if os.environ.get('SD_PIP_DEBUG', None) is not None else '' log_file = os.path.join(os.path.dirname(__file__), 'sdnext.log') @@ -55,6 +56,7 @@ class Dot(dict): # dot notation access to dictionary attributes 'uv': False, }) git_commit = "unknown" +diffusers_commit = "unknown" submodules_commit = { 'sd-webui-controlnet': 'ecd33eb', # 'stable-diffusion-webui-images-browser': '27fe4a7', @@ -109,6 +111,7 @@ def get(self): level = logging.DEBUG if args.debug else logging.INFO log.setLevel(logging.DEBUG) # log to file is always at level debug for facility `sd` + global console # pylint: disable=global-statement console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({ "traceback.border": "black", "traceback.border.syntax_error": "black", @@ -435,19 +438,24 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None): # check diffusers version def check_diffusers(): - pass # noop for now, can be used to force specific version based on conditions + sha = '5e1427a7da6e878b958fd5a2422c7763a94ff02b' + pkg = pkg_resources.working_set.by_key.get('diffusers', None) + minor = int(pkg.version.split('.')[1] if pkg is not None else 0) + cur = opts.get('diffusers_version', '') if minor > 0 else '' + if (minor == 0) or (cur != sha): + log.debug(f'Diffusers {"install" if minor == 0 else "upgrade"}: current={pkg}@{cur} target={sha}') + if minor > 0: + pip('uninstall --yes diffusers', ignore=True, quiet=True, uv=False) + pip(f'install --upgrade git+https://github.com/huggingface/diffusers@{sha}', ignore=False, quiet=True, uv=False) + global diffusers_commit # pylint: disable=global-statement + diffusers_commit = sha # check onnx version def check_onnx(): if not installed('onnx', quiet=True): install('onnx', 'onnx', ignore=True) - if not installed('onnxruntime', quiet=True) and not ( - installed('onnxruntime-gpu', quiet=True) or - installed('onnxruntime-openvino', quiet=True) or - installed('onnxruntime-training', quiet=True) - ): # allow either - + if not installed('onnxruntime', quiet=True) and not (installed('onnxruntime-gpu', quiet=True) or installed('onnxruntime-openvino', quiet=True) or installed('onnxruntime-training', quiet=True)): # allow either install('onnxruntime', 'onnxruntime', ignore=True) @@ -492,7 +500,7 @@ def install_rocm_zluda(): break log.info(f'ROCm version detected: {rocm.version}') - + torch_command = '' if sys.platform == "win32": #if args.use_zluda: log.warning("ZLUDA support: experimental") @@ -510,6 +518,7 @@ def install_rocm_zluda(): if error is None: if args.device_id is not None: os.environ['HIP_VISIBLE_DEVICES'] = args.device_id + del args.device_id try: zluda_installer.load(zluda_path) torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.0 torchvision --index-url https://download.pytorch.org/whl/cu118') @@ -521,7 +530,7 @@ def install_rocm_zluda(): log.info('Using CPU-only torch') torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision') #else: - # TODO TBD after ROCm for Windows is released + # TODO after ROCm for Windows is released else: if rocm.version is None or float(rocm.version) > 6.1: # assume the latest if version check fails torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/rocm6.1') @@ -532,9 +541,13 @@ def install_rocm_zluda(): else: torch_command = os.environ.get('TORCH_COMMAND', f'torch torchvision --index-url https://download.pytorch.org/whl/rocm{rocm.version}') - ort_version = os.environ.get('ONNXRUNTIME_VERSION', None) - ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/{rocm.version[0]}{rocm.version[2]} --extra-index-url https://pypi.org/simple") - install(ort_package, 'onnxruntime-training') + if sys.version_info < (3, 11): + ort_version = os.environ.get('ONNXRUNTIME_VERSION', None) + if rocm.version is None or float(rocm.version) > 6.0: + ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/60 --extra-index-url https://pypi.org/simple") + else: + ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/{rocm.version[0]}{rocm.version[2]} --extra-index-url https://pypi.org/simple") + install(ort_package, 'onnxruntime-training') if hip_default_device is not None and rocm.version != "6.2" and rocm.version == rocm.version_torch and rocm.get_blaslt_enabled(): log.debug(f'hipBLASLt arch={hip_default_device.name} available={hip_default_device.blaslt_supported}') @@ -551,13 +564,8 @@ def install_ipex(torch_command): if os.environ.get("ClDeviceGlobalMemSizeAvailablePercent", None) is None: os.environ.setdefault('ClDeviceGlobalMemSizeAvailablePercent', '100') if "linux" in sys.platform: - torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.1.0.post3 torchvision==0.16.0.post3 intel-extension-for-pytorch==2.1.40+xpu oneccl_bind_pt==2.1.400+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/') - # os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.0 intel-extension-for-tensorflow[xpu]==2.15.0.0') - if os.environ.get('DISABLE_VENV_LIBS', None) is None: - install(os.environ.get('MKL_PACKAGE', 'mkl==2024.2.0'), 'mkl') - install(os.environ.get('DPCPP_PACKAGE', 'mkl-dpcpp==2024.2.0'), 'mkl-dpcpp') - install(os.environ.get('ONECCL_PACKAGE', 'oneccl-devel==2021.13.0'), 'oneccl-devel') - install(os.environ.get('MPI_PACKAGE', 'impi-devel==2021.13.0'), 'impi-devel') + torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/') + # os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.1 intel-extension-for-tensorflow[xpu]==2.15.0.1') else: if sys.version_info.minor == 11: pytorch_pip = 'https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.1.10%2Bxpu/torch-2.1.0a0+cxx11.abi-cp311-cp311-win_amd64.whl' @@ -570,26 +578,26 @@ def install_ipex(torch_command): ipex_pip = 'https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.1.10%2Bxpu/intel_extension_for_pytorch-2.1.10+xpu-cp310-cp310-win_amd64.whl' torch_command = os.environ.get('TORCH_COMMAND', f'{pytorch_pip} {torchvision_pip} {ipex_pip}') else: - torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.1.0.post0 torchvision==0.16.0.post0 intel-extension-for-pytorch==2.1.20+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/') + torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.1.0.post3 torchvision==0.16.0.post3 intel-extension-for-pytorch==2.1.40+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/') if os.environ.get('DISABLE_VENV_LIBS', None) is None: - install(os.environ.get('MKL_PACKAGE', 'mkl==2024.1.0'), 'mkl') - install(os.environ.get('DPCPP_PACKAGE', 'mkl-dpcpp==2024.1.0'), 'mkl-dpcpp') - install(os.environ.get('ONECCL_PACKAGE', 'oneccl-devel==2021.12.0'), 'oneccl-devel') - install(os.environ.get('MPI_PACKAGE', 'impi-devel==2021.12.0'), 'impi-devel') + install(os.environ.get('MKL_PACKAGE', 'mkl==2024.2.0'), 'mkl') + install(os.environ.get('DPCPP_PACKAGE', 'mkl-dpcpp==2024.2.0'), 'mkl-dpcpp') + install(os.environ.get('ONECCL_PACKAGE', 'oneccl-devel==2021.13.0'), 'oneccl-devel') + install(os.environ.get('MPI_PACKAGE', 'impi-devel==2021.13.0'), 'impi-devel') torch_command = os.environ.get('TORCH_COMMAND', f'{pytorch_pip} {torchvision_pip} {ipex_pip}') - install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2023.3.0'), 'openvino', ignore=True) + install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino', ignore=True) install('nncf==2.7.0', 'nncf', ignore=True) install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True) return torch_command def install_openvino(torch_command): - check_python(supported_minors=[9, 10, 11], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11') + check_python(supported_minors=[8, 9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11') log.info('Using OpenVINO') - torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.2.0 torchvision==0.17.0 --index-url https://download.pytorch.org/whl/cpu') - install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2023.3.0'), 'openvino') + torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cpu') + install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino') install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True) - install('nncf==2.8.1', 'nncf') + install('nncf==2.12.0', 'nncf') os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX') if os.environ.get("NEOReadDebugKeys", None) is None: os.environ.setdefault('NEOReadDebugKeys', '1') diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js index e0294b4f4..ab818a8d1 100644 --- a/javascript/extraNetworks.js +++ b/javascript/extraNetworks.js @@ -421,21 +421,23 @@ function setupExtraNetworksForTab(tabname) { // card hover let hoverTimer = null; let previousCard = null; - gradioApp().getElementById(`${tabname}_extra_tabs`).onmouseover = (e) => { - const el = e.target.closest('.card'); // bubble-up to card - if (!el || (el.title === previousCard)) return; - if (!hoverTimer) { - hoverTimer = setTimeout(() => { - readCardDescription(el.dataset.page, el.dataset.name); - readCardTags(el, el.dataset.tags); - previousCard = el.title; - }, 300); - } - el.onmouseout = () => { - clearTimeout(hoverTimer); - hoverTimer = null; + if (window.opts.extra_networks_fetch) { + gradioApp().getElementById(`${tabname}_extra_tabs`).onmouseover = async (e) => { + const el = e.target.closest('.card'); // bubble-up to card + if (!el || (el.title === previousCard)) return; + if (!hoverTimer) { + hoverTimer = setTimeout(() => { + readCardDescription(el.dataset.page, el.dataset.name); + readCardTags(el, el.dataset.tags); + previousCard = el.title; + }, 300); + } + el.onmouseout = () => { + clearTimeout(hoverTimer); + hoverTimer = null; + }; }; - }; + } // en style if (!en) return; diff --git a/javascript/sdnext.css b/javascript/sdnext.css index 6f6cfc200..d2b59771b 100644 --- a/javascript/sdnext.css +++ b/javascript/sdnext.css @@ -285,11 +285,12 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt #control_script_container { display: block; margin-top: 1em; border-width: 2px 0 0 0; border-style: solid; border-color: var(--highlight-color); } .control-button { min-height: 42px; max-height: 42px; line-height: 1em; } .control-tabs > .tab-nav { margin-bottom: 0; margin-top: 0; } -.control-unit { max-width: 1200px; padding: 0 !important; margin-top: -10px !important; } +.control-unit { padding: 0 !important; margin-top: -10px !important; } .control-unit > .label-wrap { margin-bottom: 0 !important; } .control-settings { border-width: var(--block-border-width) !important; border-top: var(--button-primary-border-color) !important; border-style: solid !important; margin-top: 1em !important; } .processor-settings { padding: 0 !important; max-width: 300px; } .processor-group > div { flex-flow: wrap;gap: 1em; } +.control-unit .gradio-button.tool { align-self: baseline; margin-top: 2rem; } /* main info */ .main-info { font-weight: var(--section-header-text-weight); color: var(--body-text-color-subdued); padding: 1em !important; margin-top: 2em !important; line-height: var(--line-lg) !important; } diff --git a/javascript/startup.js b/javascript/startup.js index f1a44faf5..245a3eae8 100644 --- a/javascript/startup.js +++ b/javascript/startup.js @@ -17,7 +17,6 @@ async function initStartup() { initImageViewer(); initGallery(); setupControlUI(); - setupExtraNetworks(); // reconnect server session await reconnectUI(); @@ -25,6 +24,7 @@ async function initStartup() { // make sure all of the ui is ready and options are loaded while (Object.keys(window.opts).length === 0) await sleep(50); executeCallbacks(uiReadyCallbacks); + setupExtraNetworks(); // optinally wait for modern ui if (window.waitForUiReady) await waitForUiReady(); diff --git a/modules/api/control.py b/modules/api/control.py index 9c93b6bc6..cf8916095 100644 --- a/modules/api/control.py +++ b/modules/api/control.py @@ -103,6 +103,7 @@ def prepare_ip_adapter(self, request): args['ip_adapter_scales'].append(ipadapter.scale) args['ip_adapter_starts'].append(ipadapter.start) args['ip_adapter_ends'].append(ipadapter.end) + args['ip_adapter_crops'].append(ipadapter.end) args['ip_adapter_images'].append([helpers.decode_base64_to_image(x) for x in ipadapter.images]) if ipadapter.masks: args['ip_adapter_masks'].append([helpers.decode_base64_to_image(x) for x in ipadapter.masks]) diff --git a/modules/api/models.py b/modules/api/models.py index 4aba4fb01..3c5fd6146 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -149,12 +149,13 @@ class ItemEmbedding(BaseModel): vectors: int = Field(title="Vectors", description="The number of vectors in the embedding") class ItemIPAdapter(BaseModel): - adapter: str = Field(title="Adapter", default="Base", description="") - images: List[str] = Field(title="Image", default=[], description="") - masks: Optional[List[str]] = Field(title="Mask", default=[], description="") - scale: float = Field(title="Scale", default=0.5, ge=0, le=1, description="") - start: float = Field(title="Start", default=0.0, ge=0, le=1, description="") - end: float = Field(title="End", default=1.0, gt=0, le=1, description="") + adapter: str = Field(title="Adapter", default="Base", description="IP adapter name") + images: List[str] = Field(title="Image", default=[], description="IP adapter input images") + masks: Optional[List[str]] = Field(title="Mask", default=[], description="IP adapter mask images") + scale: float = Field(title="Scale", default=0.5, ge=0, le=1, description="IP adapter scale") + start: float = Field(title="Start", default=0.0, ge=0, le=1, description="IP adapter start step") + end: float = Field(title="End", default=1.0, gt=0, le=1, description="IP adapter end step") + crop: bool = Field(title="Crop", default=False, description="IP adapter crop face from input") class ItemFace(BaseModel): mode: str = Field(title="Mode", default="FaceID", description="The mode to use (available values: FaceID, FaceSwap, PhotoMaker, InstantID).") diff --git a/modules/control/run.py b/modules/control/run.py index 41204fa49..5b8a4bf36 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -62,11 +62,11 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini full_quality: bool = True, restore_faces: bool = False, tiling: bool = False, hidiffusion: bool = False, hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95, hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0, - resize_mode_before: int = 0, resize_name_before: str = 'None', width_before: int = 512, height_before: int = 512, scale_by_before: float = 1.0, selected_scale_tab_before: int = 0, - resize_mode_after: int = 0, resize_name_after: str = 'None', width_after: int = 0, height_after: int = 0, scale_by_after: float = 1.0, selected_scale_tab_after: int = 0, - resize_mode_mask: int = 0, resize_name_mask: str = 'None', width_mask: int = 0, height_mask: int = 0, scale_by_mask: float = 1.0, selected_scale_tab_mask: int = 0, + resize_mode_before: int = 0, resize_name_before: str = 'None', resize_context_before: str = 'None', width_before: int = 512, height_before: int = 512, scale_by_before: float = 1.0, selected_scale_tab_before: int = 0, + resize_mode_after: int = 0, resize_name_after: str = 'None', resize_context_after: str = 'None', width_after: int = 0, height_after: int = 0, scale_by_after: float = 1.0, selected_scale_tab_after: int = 0, + resize_mode_mask: int = 0, resize_name_mask: str = 'None', resize_context_mask: str = 'None', width_mask: int = 0, height_mask: int = 0, scale_by_mask: float = 1.0, selected_scale_tab_mask: int = 0, denoising_strength: float = 0, batch_count: int = 1, batch_size: int = 1, - enable_hr: bool = False, hr_sampler_index: int = None, hr_denoising_strength: float = 0.3, hr_upscaler: str = None, hr_force: bool = False, hr_second_pass_steps: int = 20, + enable_hr: bool = False, hr_sampler_index: int = None, hr_denoising_strength: float = 0.3, hr_resize_mode: int = 0, hr_resize_context: str = 'None', hr_upscaler: str = None, hr_force: bool = False, hr_second_pass_steps: int = 20, hr_scale: float = 1.0, hr_resize_x: int = 0, hr_resize_y: int = 0, refiner_steps: int = 5, refiner_start: float = 0.0, refiner_prompt: str = '', refiner_negative: str = '', video_skip_frames: int = 0, video_type: str = 'None', video_duration: float = 2.0, video_loop: bool = False, video_pad: int = 0, video_interpolate: int = 0, *input_script_args @@ -180,6 +180,8 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini p.enable_hr = enable_hr p.hr_sampler_name = processing.get_sampler_name(hr_sampler_index) p.hr_denoising_strength = hr_denoising_strength + p.hr_resize_mode = hr_resize_mode + p.hr_resize_context = hr_resize_context p.hr_upscaler = hr_upscaler p.hr_force = hr_force p.hr_second_pass_steps = hr_second_pass_steps @@ -217,10 +219,9 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.cpu}') sd_models.move_model(u.controlnet.model, devices.cpu) continue - else: - if u.controlnet is not None and u.controlnet.model is not None: - debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}') - sd_models.move_model(u.controlnet.model, devices.device) + if u.controlnet is not None and u.controlnet.model is not None: + debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}') + sd_models.move_model(u.controlnet.model, devices.device) if unit_type == 't2i adapter' and u.adapter.model is not None: active_process.append(u.process) active_model.append(u.adapter) @@ -234,7 +235,8 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini active_start.append(float(u.start)) active_end.append(float(u.end)) p.guess_mode = u.guess - shared.log.debug(f'Control ControlNet unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}') + p.control_mode = u.mode + shared.log.debug(f'Control ControlNet unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}') elif unit_type == 'xs' and u.controlnet.model is not None: active_process.append(u.process) active_model.append(u.controlnet) @@ -388,7 +390,7 @@ def set_pipe(): video = cv2.VideoCapture(inputs) if not video.isOpened(): if is_generator: - yield terminate(f'Control: video open failed: path={inputs}') + yield terminate(f'Video open failed: path={inputs}') return [], '', '', 'Error: video open failed' frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(video.get(cv2.CAP_PROP_FPS)) @@ -401,7 +403,7 @@ def set_pipe(): shared.log.debug(f'Control: input video: path={inputs} frames={frames} fps={fps} size={w}x{h} codec={codec}') except Exception as e: if is_generator: - yield terminate(f'Control: video open failed: path={inputs} {e}') + yield terminate(f'Video open failed: path={inputs} {e}') return [], '', '', 'Error: video open failed' while status: @@ -419,7 +421,7 @@ def set_pipe(): if shared.state.interrupted: shared.state.interrupted = False if is_generator: - yield terminate('Control interrupted') + yield terminate('Interrupted') return [], '', '', 'Interrupted' # get input if isinstance(input_image, str): @@ -456,8 +458,8 @@ def set_pipe(): width_before, height_before = int(input_image.width * scale_by_before), int(input_image.height * scale_by_before) if input_image is not None: p.extra_generation_params["Control resize"] = f'{resize_name_before}' - debug(f'Control resize: op=before image={input_image} width={width_before} height={height_before} mode={resize_mode_before} name={resize_name_before}') - input_image = images.resize_image(resize_mode_before, input_image, width_before, height_before, resize_name_before) + debug(f'Control resize: op=before image={input_image} width={width_before} height={height_before} mode={resize_mode_before} name={resize_name_before} context="{resize_context_before}"') + input_image = images.resize_image(resize_mode_before, input_image, width_before, height_before, resize_name_before, context=resize_context_before) if input_image is not None and init_image is not None and init_image.size != input_image.size: debug(f'Control resize init: image={init_image} target={input_image}') init_image = images.resize_image(resize_mode=1, im=init_image, width=input_image.width, height=input_image.height) @@ -508,7 +510,7 @@ def set_pipe(): pass if any(img is None for img in processed_images): if is_generator: - yield terminate('Control: attempting process but output is none') + yield terminate('Attempting process but output is none') return [], '', '', 'Error: output is none' if len(processed_images) > 1 and len(active_process) != len(active_model): processed_image = [np.array(i) for i in processed_images] @@ -527,7 +529,7 @@ def set_pipe(): p.init_images = processed_images elif isinstance(selected_models, list) and len(processed_images) != len(selected_models): if is_generator: - yield terminate(f'Control: number of inputs does not match: input={len(processed_images)} models={len(selected_models)}') + yield terminate(f'Number of inputs does not match: input={len(processed_images)} models={len(selected_models)}') return [], '', '', 'Error: number of inputs does not match' elif selected_models is not None: p.init_images = processed_image @@ -542,19 +544,24 @@ def set_pipe(): debug(f'Control: process=None image={p.ref_image}') if p.ref_image is None: if is_generator: - yield terminate('Control: attempting reference mode but image is none') + yield terminate('Attempting reference mode but image is none') return [], '', '', 'Reference mode without image' - elif unit_type == 'controlnet' and input_type == 1 and has_models: # Init image same as control - p.task_args['control_image'] = p.init_images # switch image and control_image - p.task_args['strength'] = p.denoising_strength - p.init_images = [p.override or input_image] * len(active_model) - elif unit_type == 'controlnet' and input_type == 2 and has_models: # Separate init image - if init_image is None: - shared.log.warning('Control: separate init image not provided') - init_image = input_image - p.task_args['control_image'] = p.init_images # switch image and control_image - p.task_args['strength'] = p.denoising_strength - p.init_images = [init_image] * len(active_model) + elif unit_type == 'controlnet' and has_models: + if input_type == 0: # Control only + if shared.sd_model_type == 'f1': + p.task_args['control_image'] = p.init_images # flux controlnet mandates this + p.task_args['strength'] = p.denoising_strength + elif input_type == 1: # Init image same as control + p.task_args['control_image'] = p.init_images # switch image and control_image + p.task_args['strength'] = p.denoising_strength + p.init_images = [p.override or input_image] * len(active_model) + elif input_type == 2: # Separate init image + if init_image is None: + shared.log.warning('Control: separate init image not provided') + init_image = input_image + p.task_args['control_image'] = p.init_images # switch image and control_image + p.task_args['strength'] = p.denoising_strength + p.init_images = [init_image] * len(active_model) if is_generator: image_txt = f'{blended_image.width}x{blended_image.height}' if blended_image is not None else 'None' @@ -596,6 +603,8 @@ def set_pipe(): if unit_type == 'lite': p.init_image = [input_image] instance.apply(selected_models, processed_image, control_conditioning) + if p.control_mode is not None: + p.task_args['control_mode'] = p.control_mode if hasattr(p, 'init_images') and p.init_images is None: # delete empty del p.init_images @@ -603,7 +612,7 @@ def set_pipe(): if has_models: if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] and p.task_args.get('image', None) is None and getattr(p, 'init_images', None) is None: if is_generator: - yield terminate(f'Control: mode={p.extra_generation_params.get("Control mode", None)} input image is none') + yield terminate(f'Mode={p.extra_generation_params.get("Control mode", None)} input image is none') return [], '', '', 'Error: Input image is none' # resize mask @@ -611,7 +620,7 @@ def set_pipe(): if selected_scale_tab_mask == 1: width_mask, height_mask = int(input_image.width * scale_by_mask), int(input_image.height * scale_by_mask) p.width, p.height = width_mask, height_mask - debug(f'Control resize: op=mask image={mask} width={width_mask} height={height_mask} mode={resize_mode_mask} name={resize_name_mask}') + debug(f'Control resize: op=mask image={mask} width={width_mask} height={height_mask} mode={resize_mode_mask} name={resize_name_mask} context="{resize_context_mask}"') # pipeline output = None @@ -638,6 +647,7 @@ def set_pipe(): processed: processing.Processed = processing.process_images(p) # run actual pipeline else: script_run = True + processed = p.scripts.after(p, processed, *p.script_args) output = None if processed is not None: output = processed.images @@ -659,8 +669,8 @@ def set_pipe(): width_after = int(output_image.width * scale_by_after) height_after = int(output_image.height * scale_by_after) if resize_mode_after != 0 and resize_name_after != 'None' and not is_grid: - debug(f'Control resize: op=after image={output_image} width={width_after} height={height_after} mode={resize_mode_after} name={resize_name_after}') - output_image = images.resize_image(resize_mode_after, output_image, width_after, height_after, resize_name_after) + debug(f'Control resize: op=after image={output_image} width={width_after} height={height_after} mode={resize_mode_after} name={resize_name_after} context="{resize_context_after}"') + output_image = images.resize_image(resize_mode_after, output_image, width_after, height_after, resize_name_after, context=resize_context_after) output_images.append(output_image) if shared.opts.include_mask and not script_run: diff --git a/modules/control/unit.py b/modules/control/unit.py index b2b102b9b..51101493d 100644 --- a/modules/control/unit.py +++ b/modules/control/unit.py @@ -36,6 +36,7 @@ def __init__(self, image_preview = None, control_start = None, control_end = None, + control_mode = None, result_txt = None, extra_controls: list = [], ): @@ -46,6 +47,7 @@ def __init__(self, self.end = end or 1 self.start = min(self.start, self.end) self.end = max(self.start, self.end) + self.mode = None # processor always exists, adapter and controlnet are optional self.process: processors.Processor = processors.Processor() self.adapter: t2iadapter.Adapter = None @@ -83,6 +85,12 @@ def control_change(start, end): self.start = min(start, end) self.end = max(start, end) + def control_mode_change(mode): + self.mode = mode - 1 if mode > 0 else None + + def control_mode_show(model_id): + return gr.update(visible='union' in model_id.lower()) + def adapter_extra(c1): self.factor = c1 @@ -156,6 +164,7 @@ def set_image(image): self.controlnet.load(model_id) else: model_id.change(fn=self.controlnet.load, inputs=[model_id], outputs=[result_txt], show_progress=True) + model_id.change(fn=control_mode_show, inputs=[model_id], outputs=[control_mode], show_progress=False) if extra_controls is not None and len(extra_controls) > 0: extra_controls[0].change(fn=controlnet_extra, inputs=extra_controls) elif self.type == 'xs': @@ -202,3 +211,5 @@ def set_image(image): if control_start is not None and control_end is not None: control_start.change(fn=control_change, inputs=[control_start, control_end]) control_end.change(fn=control_change, inputs=[control_start, control_end]) + if control_mode is not None: + control_mode.change(fn=control_mode_change, inputs=[control_mode]) diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py index 419e119fc..a200f59a2 100644 --- a/modules/control/units/controlnet.py +++ b/modules/control/units/controlnet.py @@ -1,10 +1,10 @@ import os import time from typing import Union -from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, ControlNetModel, StableDiffusionControlNetPipeline, StableDiffusionXLControlNetPipeline +from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, FluxPipeline, ControlNetModel from modules.control.units import detect from modules.shared import log, opts, listdir -from modules import errors, sd_models +from modules import errors, sd_models, devices what = 'ControlNet' @@ -80,6 +80,7 @@ all_models = {} all_models.update(predefined_sd15) all_models.update(predefined_sdxl) +all_models.update(predefined_f1) cache_dir = 'models/control/controlnet' @@ -139,6 +140,19 @@ def reset(self): self.model = None self.model_id = None + def get_class(self): + import modules.shared + if modules.shared.sd_model_type == 'sd': + from diffusers import ControlNetModel as model_class # pylint: disable=reimported + elif modules.shared.sd_model_type == 'sdxl': + from diffusers import ControlNetModel as model_class # pylint: disable=reimported # sdxl shares same model class + elif modules.shared.sd_model_type == 'f1': + from diffusers import FluxControlNetModel as model_class + else: + log.error(f'Control {what}: type={modules.shared.sd_model_type} unsupported model') + return None + return model_class + def load_safetensors(self, model_path): name = os.path.splitext(model_path)[0] config_path = None @@ -164,7 +178,8 @@ def load_safetensors(self, model_path): config_path = f'{name}.json' if config_path is not None: self.load_config['original_config_file '] = config_path - self.model = ControlNetModel.from_single_file(model_path, **self.load_config) + cls = self.get_class() + self.model = cls.from_single_file(model_path, **self.load_config) def load(self, model_id: str = None) -> str: try: @@ -189,7 +204,8 @@ def load(self, model_id: str = None) -> str: if '/bin' in model_path: model_path = model_path.replace('/bin', '') self.load_config['use_safetensors'] = False - self.model = ControlNetModel.from_pretrained(model_path, **self.load_config) + cls = self.get_class() + self.model = cls.from_pretrained(model_path, **self.load_config) if self.dtype is not None: self.model.to(self.dtype) if "ControlNet" in opts.nncf_compress_weights: @@ -223,7 +239,7 @@ def load(self, model_id: str = None) -> str: class ControlNetPipeline(): - def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline], dtype = None): + def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline, FluxPipeline], dtype = None): t0 = time.time() self.orig_pipeline = pipeline self.pipeline = None @@ -231,6 +247,7 @@ def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pi log.error('Control model pipeline: model not loaded') return elif detect.is_sdxl(pipeline): + from diffusers import StableDiffusionXLControlNetPipeline self.pipeline = StableDiffusionXLControlNetPipeline( vae=pipeline.vae, text_encoder=pipeline.text_encoder, @@ -242,8 +259,8 @@ def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pi feature_extractor=getattr(pipeline, 'feature_extractor', None), controlnet=controlnet, # can be a list ) - sd_models.move_model(self.pipeline, pipeline.device) elif detect.is_sd15(pipeline): + from diffusers import StableDiffusionControlNetPipeline self.pipeline = StableDiffusionControlNetPipeline( vae=pipeline.vae, text_encoder=pipeline.text_encoder, @@ -257,17 +274,33 @@ def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pi ) sd_models.move_model(self.pipeline, pipeline.device) elif detect.is_f1(pipeline): - log.warning('Control model pipeline: class=FluxPipeline unsupported model type') + from diffusers import FluxControlNetPipeline + self.pipeline = FluxControlNetPipeline( + vae=pipeline.vae, + text_encoder=pipeline.text_encoder, + text_encoder_2=pipeline.text_encoder_2, + tokenizer=pipeline.tokenizer, + tokenizer_2=pipeline.tokenizer_2, + transformer=pipeline.transformer, + scheduler=pipeline.scheduler, + controlnet=controlnet, # can be a list + ) else: log.error(f'Control {what} pipeline: class={pipeline.__class__.__name__} unsupported model type') return - if dtype is not None and self.pipeline is not None: + + if self.pipeline is None: + log.error(f'Control {what} pipeline: not initialized') + return + if dtype is not None: self.pipeline = self.pipeline.to(dtype) + if opts.diffusers_offload_mode == 'none': + sd_models.move_model(self.pipeline, devices.device) + from modules.sd_models import set_diffuser_offload + set_diffuser_offload(self.pipeline, 'model') + t1 = time.time() - if self.pipeline is not None: - log.debug(f'Control {what} pipeline: class={self.pipeline.__class__.__name__} time={t1-t0:.2f}') - else: - log.error(f'Control {what} pipeline: not initialized') + log.debug(f'Control {what} pipeline: class={self.pipeline.__class__.__name__} time={t1-t0:.2f}') def restore(self): self.pipeline = None diff --git a/modules/dcsolver/__init__.py b/modules/dcsolver/__init__.py new file mode 100644 index 000000000..a1ccfbeba --- /dev/null +++ b/modules/dcsolver/__init__.py @@ -0,0 +1,1096 @@ +# Copyright 2023 TSAIL Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: check https://arxiv.org/abs/2302.04867 and https://github.com/wl-zhao/UniPC for more info +# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py + +import math +from typing import List, Optional, Tuple, Union + +import numpy as np +import torch + +# from ..configuration_utils import ConfigMixin, register_to_config +# from ..utils import deprecate +# from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput +from diffusers.configuration_utils import ConfigMixin, register_to_config +from diffusers.utils import deprecate +from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput + + +# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar +def betas_for_alpha_bar( + num_diffusion_timesteps, + max_beta=0.999, + alpha_transform_type="cosine", +): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up + to that part of the diffusion process. + + + Args: + num_diffusion_timesteps (`int`): the number of betas to produce. + max_beta (`float`): the maximum beta to use; use values lower than 1 to + prevent singularities. + alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. + Choose from `cosine` or `exp` + + Returns: + betas (`np.ndarray`): the betas used by the scheduler to step the model outputs + """ + if alpha_transform_type == "cosine": + + def alpha_bar_fn(t): + return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2 + + elif alpha_transform_type == "exp": + + def alpha_bar_fn(t): + return math.exp(t * -12.0) + + else: + raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}") + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta)) + return torch.tensor(betas, dtype=torch.float32) + + +class DCSolverMultistepScheduler(SchedulerMixin, ConfigMixin): + """ + `UniPCMultistepScheduler` is a training-free framework designed for the fast sampling of diffusion models. + + Dynamic Extropolation + + This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic + methods the library implements for all schedulers such as loading and saving. + + Args: + num_train_timesteps (`int`, defaults to 1000): + The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from + `linear`, `scaled_linear`, or `squaredcos_cap_v2`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. + solver_order (`int`, default `2`): + The UniPC order which can be any positive integer. The effective order of accuracy is `solver_order + 1` + due to the UniC. It is recommended to use `solver_order=2` for guided sampling, and `solver_order=3` for + unconditional sampling. + prediction_type (`str`, defaults to `epsilon`, *optional*): + Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), + `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen + Video](https://imagen.research.google/video/paper.pdf) paper). + thresholding (`bool`, defaults to `False`): + Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such + as Stable Diffusion. + dynamic_thresholding_ratio (`float`, defaults to 0.995): + The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. + sample_max_value (`float`, defaults to 1.0): + The threshold value for dynamic thresholding. Valid only when `thresholding=True` and `predict_x0=True`. + predict_x0 (`bool`, defaults to `True`): + Whether to use the updating algorithm on the predicted x0. + solver_type (`str`, default `bh2`): + Solver type for UniPC. It is recommended to use `bh1` for unconditional sampling when steps < 10, and `bh2` + otherwise. + lower_order_final (`bool`, default `True`): + Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can + stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10. + disable_corrector (`list`, default `[]`): + Decides which step to disable the corrector to mitigate the misalignment between `epsilon_theta(x_t, c)` + and `epsilon_theta(x_t^c, c)` which can influence convergence for a large guidance scale. Corrector is + usually disabled during the first few steps. + solver_p (`SchedulerMixin`, default `None`): + Any other scheduler that if specified, the algorithm becomes `solver_p + UniC`. + use_karras_sigmas (`bool`, *optional*, defaults to `False`): + Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`, + the sigmas are determined according to a sequence of noise levels {σi}. + timestep_spacing (`str`, defaults to `"linspace"`): + The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. + steps_offset (`int`, defaults to 0): + An offset added to the inference steps. You can use a combination of `offset=1` and + `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable + Diffusion. + """ + + _compatibles = [e.name for e in KarrasDiffusionSchedulers] + order = 1 + + @register_to_config + def __init__( + self, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = "linear", + trained_betas: Optional[Union[np.ndarray, List[float]]] = None, + solver_order: int = 2, + dc_order: int = 2, + prediction_type: str = "epsilon", + thresholding: bool = False, + dynamic_thresholding_ratio: float = 0.995, + sample_max_value: float = 1.0, + predict_x0: bool = True, + solver_type: str = "bh2", + lower_order_final: bool = True, + disable_corrector: List[int] = [], + solver_p: SchedulerMixin = None, + use_karras_sigmas: Optional[bool] = False, + timestep_spacing: str = "linspace", + steps_offset: int = 0, + # ddim_gt_path: str = None, + ddim_gt=None, + num_iters=20, + bound_func='none', + ): + if trained_betas is not None: + self.betas = torch.tensor(trained_betas, dtype=torch.float32) + elif beta_schedule == "linear": + self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = ( + torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2 + ) + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = torch.cumprod(self.alphas, dim=0) + # Currently we only support VP-type noise schedule + self.alpha_t = torch.sqrt(self.alphas_cumprod) + self.sigma_t = torch.sqrt(1 - self.alphas_cumprod) + self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t) + + # standard deviation of the initial noise distribution + self.init_noise_sigma = 1.0 + + if solver_type not in ["bh1", "bh2"]: + if solver_type in ["midpoint", "heun", "logrho"]: + self.register_to_config(solver_type="bh2") + else: + raise NotImplementedError(f"{solver_type} does is not implemented for {self.__class__}") + + self.predict_x0 = predict_x0 + # setable values + self.num_inference_steps = None + timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy() + self.timesteps = torch.from_numpy(timesteps) + self.buffer_size = max(solver_order, dc_order + 1) + self.num_iters = num_iters + self.model_outputs = [None] * self.buffer_size + self.timestep_list = [None] * self.buffer_size + self.lower_order_nums = 0 + self.disable_corrector = disable_corrector + self.solver_p = solver_p + self.last_sample = None + self._step_index = None + + if ddim_gt is not None: + self.ddim_gt = dict( + ts=ddim_gt['ts'].cpu().numpy(), + intermediates=ddim_gt['intermediates'].cpu().numpy(), + ) + else: + self.ddim_gt = None + self.bound_func = bound_func + self.dc_order = dc_order + + @property + def step_index(self): + """ + The index counter for current timestep. It will increae 1 after each scheduler step. + """ + return self._step_index + + def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None): + """ + Sets the discrete timesteps used for the diffusion chain (to be run before inference). + + Args: + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + """ + # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891 + if self.config.timestep_spacing == "linspace": + timesteps = ( + np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1) + .round()[::-1][:-1] + .copy() + .astype(np.int64) + ) + elif self.config.timestep_spacing == "leading": + step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1) + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64) + timesteps += self.config.steps_offset + elif self.config.timestep_spacing == "trailing": + step_ratio = self.config.num_train_timesteps / num_inference_steps + # creates integer timesteps by multiplying by ratio + # casting to int to avoid issues when num_inference_step is power of 3 + timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64) + timesteps -= 1 + else: + raise ValueError( + f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'." + ) + + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5) + if self.config.use_karras_sigmas: + log_sigmas = np.log(sigmas) + sigmas = np.flip(sigmas).copy() + sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps) + timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round() + sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32) + else: + sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas) + sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5 + sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32) + + self.sigmas = torch.from_numpy(sigmas) + self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64) + + self.num_inference_steps = len(timesteps) + + self.model_outputs = [None] * self.buffer_size + self.timestep_list = [None] * self.buffer_size + + self.lower_order_nums = 0 + self.last_sample = None + if self.solver_p: + self.solver_p.set_timesteps(self.num_inference_steps, device=device) + + # add an index counter for schedulers that allow duplicated timesteps + self._step_index = None + # also init the ratios + self.dc_ratios = [] + + # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample + def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: + """ + "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the + prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by + s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing + pixels from saturation at each step. We find that dynamic thresholding results in significantly better + photorealism as well as better image-text alignment, especially when using very large guidance weights." + + https://arxiv.org/abs/2205.11487 + """ + dtype = sample.dtype + batch_size, channels, *remaining_dims = sample.shape + + if dtype not in (torch.float32, torch.float64): + sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half + + # Flatten sample for doing quantile calculation along each image + sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) + + abs_sample = sample.abs() # "a certain percentile absolute pixel value" + + s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) + s = torch.clamp( + s, min=1, max=self.config.sample_max_value + ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] + s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 + sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" + + sample = sample.reshape(batch_size, channels, *remaining_dims) + sample = sample.to(dtype) + + return sample + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t + def _sigma_to_t(self, sigma, log_sigmas): + # get log sigma + log_sigma = np.log(sigma) + + # get distribution + dists = log_sigma - log_sigmas[:, np.newaxis] + + # get sigmas range + low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2) + high_idx = low_idx + 1 + + low = log_sigmas[low_idx] + high = log_sigmas[high_idx] + + # interpolate sigmas + w = (low - log_sigma) / (low - high) + w = np.clip(w, 0, 1) + + # transform interpolation to time range + t = (1 - w) * low_idx + w * high_idx + t = t.reshape(sigma.shape) + return t + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t + def _sigma_to_alpha_sigma_t(self, sigma): + alpha_t = 1 / ((sigma**2 + 1) ** 0.5) + sigma_t = sigma * alpha_t + + return alpha_t, sigma_t + + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras + def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor: + """Constructs the noise schedule of Karras et al. (2022).""" + + sigma_min: float = in_sigmas[-1].item() + sigma_max: float = in_sigmas[0].item() + + rho = 7.0 # 7.0 is the value used in the paper + ramp = np.linspace(0, 1, num_inference_steps) + min_inv_rho = sigma_min ** (1 / rho) + max_inv_rho = sigma_max ** (1 / rho) + sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho + return sigmas + + def convert_model_output( + self, + model_output: torch.FloatTensor, + *args, + sample: torch.FloatTensor = None, + **kwargs, + ) -> torch.FloatTensor: + r""" + Convert the model output to the corresponding type the UniPC algorithm needs. + + Args: + model_output (`torch.FloatTensor`): + The direct output from the learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + + Returns: + `torch.FloatTensor`: + The converted model output. + """ + timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError("missing `sample` as a required keyward argument") + if timestep is not None: + deprecate( + "timesteps", + "1.0.0", + "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + sigma = self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + + if self.predict_x0: + if self.config.prediction_type == "epsilon": + x0_pred = (sample - sigma_t * model_output) / alpha_t + elif self.config.prediction_type == "sample": + x0_pred = model_output + elif self.config.prediction_type == "v_prediction": + x0_pred = alpha_t * sample - sigma_t * model_output + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the UniPCMultistepScheduler." + ) + + if self.config.thresholding: + x0_pred = self._threshold_sample(x0_pred) + + return x0_pred + else: + if self.config.prediction_type == "epsilon": + return model_output + elif self.config.prediction_type == "sample": + epsilon = (sample - alpha_t * model_output) / sigma_t + return epsilon + elif self.config.prediction_type == "v_prediction": + epsilon = alpha_t * model_output + sigma_t * sample + return epsilon + else: + raise ValueError( + f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or" + " `v_prediction` for the UniPCMultistepScheduler." + ) + + + def multistep_uni_p_bh_update( + self, + model_output: torch.FloatTensor = None, + *args, + sample: torch.FloatTensor = None, + order: int = None, + **kwargs, + ) -> torch.FloatTensor: + """ + One step for the UniP (B(h) version). Alternatively, `self.solver_p` is used if is specified. + + Args: + model_output (`torch.FloatTensor`): + The direct output from the learned diffusion model at the current timestep. + prev_timestep (`int`): + The previous discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + order (`int`): + The order of UniP at this timestep (corresponds to the *p* in UniPC-p). + + Returns: + `torch.FloatTensor`: + The sample tensor at the previous timestep. + """ + prev_timestep = args[0] if len(args) > 0 else kwargs.pop("prev_timestep", None) + if sample is None: + if len(args) > 1: + sample = args[1] + else: + raise ValueError(" missing `sample` as a required keyward argument") + if order is None: + if len(args) > 2: + order = args[2] + else: + raise ValueError(" missing `order` as a required keyward argument") + if prev_timestep is not None: + deprecate( + "prev_timestep", + "1.0.0", + "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + model_output_list = self.model_outputs + + s0 = self.timestep_list[-1] + m0 = model_output_list[-1] + assert m0 is not None + x = sample + + if self.solver_p: + raise NotImplementedError() + + sigma_t, sigma_s0 = self.sigmas[self.step_index + 1], self.sigmas[self.step_index] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + h = lambda_t - lambda_s0 + device = sample.device + + rks = [] + D1s = [] + for i in range(1, order): + si = self.step_index - i + mi = model_output_list[-(i + 1)] + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + rk = (lambda_si - lambda_s0) / h + rks.append(rk) + D1s.append((mi - m0) / rk) + + rks.append(1.0) + rks = torch.tensor(rks, device=device) + + R = [] + b = [] + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.config.solver_type == "bh1": + B_h = hh + elif self.config.solver_type == "bh2": + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= i + 1 + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=device) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) # (B, K) + # for order 2, we use a simplified version + if order == 2: + rhos_p = torch.tensor([0.5], dtype=x.dtype, device=device) + else: + rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1]) + else: + D1s = None + + if self.predict_x0: + x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 + if D1s is not None: + pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - alpha_t * B_h * pred_res + else: + x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 + if D1s is not None: + pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s) + else: + pred_res = 0 + x_t = x_t_ - sigma_t * B_h * pred_res + + x_t = x_t.to(x.dtype) + return x_t + + def multistep_uni_c_bh_update( + self, + this_model_output: torch.FloatTensor, + *args, + last_sample: torch.FloatTensor = None, + this_sample: torch.FloatTensor = None, + order: int = None, + **kwargs, + ) -> torch.FloatTensor: + """ + One step for the UniC (B(h) version). + + Args: + this_model_output (`torch.FloatTensor`): + The model outputs at `x_t`. + this_timestep (`int`): + The current timestep `t`. + last_sample (`torch.FloatTensor`): + The generated sample before the last predictor `x_{t-1}`. + this_sample (`torch.FloatTensor`): + The generated sample after the last predictor `x_{t}`. + order (`int`): + The `p` of UniC-p at this step. The effective order of accuracy should be `order + 1`. + + Returns: + `torch.FloatTensor`: + The corrected sample tensor at the current timestep. + """ + this_timestep = args[0] if len(args) > 0 else kwargs.pop("this_timestep", None) + if last_sample is None: + if len(args) > 1: + last_sample = args[1] + else: + raise ValueError(" missing`last_sample` as a required keyward argument") + if this_sample is None: + if len(args) > 2: + this_sample = args[2] + else: + raise ValueError(" missing`this_sample` as a required keyward argument") + if order is None: + if len(args) > 3: + order = args[3] + else: + raise ValueError(" missing`order` as a required keyward argument") + if this_timestep is not None: + deprecate( + "this_timestep", + "1.0.0", + "Passing `this_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`", + ) + + model_output_list = self.model_outputs + + m0 = model_output_list[-1] + x = last_sample + x_t = this_sample + model_t = this_model_output + + sigma_t, sigma_s0 = self.sigmas[self.step_index], self.sigmas[self.step_index - 1] + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t) + alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0) + + lambda_t = torch.log(alpha_t) - torch.log(sigma_t) + lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0) + + h = lambda_t - lambda_s0 + device = this_sample.device + + rks = [] + D1s = [] + for i in range(1, order): + si = self.step_index - (i + 1) + mi = model_output_list[-(i + 1)] + alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si]) + lambda_si = torch.log(alpha_si) - torch.log(sigma_si) + rk = (lambda_si - lambda_s0) / h + rks.append(rk) + D1s.append((mi - m0) / rk) + + rks.append(1.0) + rks = torch.tensor(rks, device=device) + + R = [] + b = [] + + hh = -h if self.predict_x0 else h + h_phi_1 = torch.expm1(hh) # h\phi_1(h) = e^h - 1 + h_phi_k = h_phi_1 / hh - 1 + + factorial_i = 1 + + if self.config.solver_type == "bh1": + B_h = hh + elif self.config.solver_type == "bh2": + B_h = torch.expm1(hh) + else: + raise NotImplementedError() + + for i in range(1, order + 1): + R.append(torch.pow(rks, i - 1)) + b.append(h_phi_k * factorial_i / B_h) + factorial_i *= i + 1 + h_phi_k = h_phi_k / hh - 1 / factorial_i + + R = torch.stack(R) + b = torch.tensor(b, device=device) + + if len(D1s) > 0: + D1s = torch.stack(D1s, dim=1) + else: + D1s = None + + # for order 1, we use a simplified version + if order == 1: + rhos_c = torch.tensor([0.5], dtype=x.dtype, device=device) + else: + rhos_c = torch.linalg.solve(R, b) + + if self.predict_x0: + try: + x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0 + except Exception as e: + import pdb; pdb.set_trace() + if D1s is not None: + corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = model_t - m0 + x_t = x_t_ - alpha_t * B_h * (corr_res + rhos_c[-1] * D1_t) + else: + x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0 + if D1s is not None: + corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s) + else: + corr_res = 0 + D1_t = model_t - m0 + x_t = x_t_ - sigma_t * B_h * (corr_res + rhos_c[-1] * D1_t) + x_t = x_t.to(x.dtype) + return x_t + + def _init_step_index(self, timestep): + if isinstance(timestep, torch.Tensor): + timestep = timestep.to(self.timesteps.device) + + index_candidates = (self.timesteps == timestep).nonzero() + + if len(index_candidates) == 0: + step_index = len(self.timesteps) - 1 + # The sigma index that is taken for the **very** first `step` + # is always the second index (or the last index if there is only 1) + # This way we can ensure we don't accidentally skip a sigma in + # case we start in the middle of the denoising schedule (e.g. for image-to-image) + elif len(index_candidates) > 1: + step_index = index_candidates[1].item() + else: + step_index = index_candidates[0].item() + + self._step_index = step_index + + def dynamic_compensation(self, model_prev_list, t_prev_list, ratio): + len_buffer = len([t for t in t_prev_list if t is not None]) + if len_buffer < 2: + return None + + t_ = ratio * (t_prev_list[-1] - t_prev_list[-2]) + t_prev_list[-2] + + inter_order = min(self.dc_order + 1, 4) + + if inter_order is not None: + model_t_dc = torch.zeros_like(model_prev_list[-1]) + for i in range(inter_order): + term = model_prev_list[-(i + 1)] + for j in range(inter_order): + if i != j: + para = (t_ - t_prev_list[-(j + 1)]) / (t_prev_list[-(i + 1)] - t_prev_list[-(j + 1)]) + term = term * para + model_t_dc = model_t_dc + term + else: + model_t_dc = None + return model_t_dc + + def find_optim_ratio(self, sample, ratio_initial=1.0): + if self.bound_func == 'tanh': + bound_func = lambda x: torch.nn.functional.tanh(x) * 0.5 + ratio_initial + param_initial = 0. + else: + bound_func = lambda x: x + param_initial = ratio_initial + + # step 1: define the parameters + if self.step_index < len(self.timesteps) - 2: + scalar_t = self.timesteps[self.step_index + 1].item() + else: + scalar_t = 0 + ratio_param = torch.nn.Parameter(torch.tensor([param_initial], device=sample.device), requires_grad=True) + + sample_clone = sample.clone() + + index = np.where(self.ddim_gt['ts'] >= scalar_t)[0].max() + batch_size = sample.shape[0] + + x_t_gt = torch.from_numpy(self.ddim_gt['intermediates'][:batch_size, index]).to(sample.device) # suppose the first batch + + model_t_bak = self.model_outputs[-1] + def closure(ratio_param): + ratio_bound = bound_func(ratio_param) + # torch.nn.functional.tanh(ratio_param) * 0.5 + ratio_initial + sample = sample_clone.clone() + model_t_dc = self.dynamic_compensation(self.model_outputs, self.timestep_list, ratio=ratio_bound) + if model_t_dc is not None: + self.model_outputs[-1] = model_t_dc + self.last_sample = sample + # run predictor + sample = self.multistep_uni_p_bh_update( + sample=sample, + order=self.this_order, + ) + # run the next corrector + self._step_index += 1 + use_corrector = ( + self.step_index > 0 and self.step_index - 1 not in self.disable_corrector \ + and self.last_sample is not None \ + and self.step_index < len(self.timesteps) + ) + if use_corrector: + model_output = self.model_wrapper(sample, self.timesteps[self.step_index]) + model_output_convert = self.convert_model_output(model_output, sample=sample) + sample = self.multistep_uni_c_bh_update( + this_model_output=model_output_convert, + last_sample=self.last_sample, + this_sample=sample, + order=self.this_order, + ) + x_t_pred = sample + loss = torch.nn.functional.mse_loss(x_t_pred, x_t_gt) + # rewind + self._step_index -= 1 + self.model_outputs[-1] = model_t_bak + return loss + + optimizer = torch.optim.AdamW([ratio_param], lr=0.1) + for iter_ in range(self.num_iters): + optimizer.zero_grad() + loss = closure(ratio_param) + loss.backward() + optimizer.step() + ratio_bound = bound_func(ratio_param) + print(f'iter [{iter_}]', ratio_bound.item(), loss.item()) + + torch.cuda.empty_cache() + return ratio_bound.data.detach().item() + + def cascade_polynomial_regression(self, test_CFG, test_NFE, cpr_path): + def f1(x, a, b, c): + return a * x ** 2 + b * x + c # np.log(np.abs(x - c)) + b + + def f2(x, a, b, c): + return a * x ** 2 + b * x + c # a * np.exp(-b * x) + c + + def predict(xs, *coeffs): + CFG, NFE, x = xs[0], xs[1], xs[2] + CFG = CFG / 12 + x = x / NFE + NFE = NFE / 50 + NFE = NFE.reshape(-1, 1, 1) + CFG = CFG.reshape(-1, 1) + coeffs = np.array(coeffs).reshape(-1, 3, 3) + coeffs1 = f2(NFE, coeffs[..., 0], coeffs[..., 1], coeffs[..., 2]) + coeffs2 = f1(CFG, coeffs1[..., 0], coeffs1[..., 1], coeffs1[..., 2]) + + x_pow = 1 + result = 0 + for i in range(coeffs2.shape[-1]): + result = result + coeffs2[:, i] * x_pow + x_pow = x_pow * x + return result + + cpr_coeffs = np.load(cpr_path) + ratios = [] + steps = list(range(1, test_NFE + 1)) + for step in steps: + if step < 3: + ratio = 1 + else: + infer_x = np.array([test_CFG, test_NFE, step]).reshape(3, -1) + ratio = predict(infer_x, *cpr_coeffs).item() + ratios.append(ratio) + return ratios + + + def step(self, *args, **kwargs): + if self.ddim_gt is None: + return self._step(*args, **kwargs) + else: + return self._step_search(*args, **kwargs) + + @torch.no_grad() + def _step_search( + self, + model_output: torch.FloatTensor, + timestep: int, + sample: torch.FloatTensor, + return_dict: bool = True, + ) -> Union[SchedulerOutput, Tuple]: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep UniPC. + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + use_corrector = ( + self.step_index > 0 and self.step_index - 1 not in self.disable_corrector and self.last_sample is not None + ) + + model_output_convert = self.convert_model_output(model_output, sample=sample) + if use_corrector: + sample = self.multistep_uni_c_bh_update( + this_model_output=model_output_convert, + last_sample=self.last_sample, + this_sample=sample, + order=self.this_order, + ) + + for i in range(self.buffer_size - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.timestep_list[i] = self.timestep_list[i + 1] + + self.model_outputs[-1] = model_output_convert + self.timestep_list[-1] = timestep + + if self.config.lower_order_final: + this_order = min(self.config.solver_order, len(self.timesteps) - self.step_index) + else: + this_order = self.config.solver_order + + self.this_order = min(this_order, self.lower_order_nums + 1) # warmup for multistep + assert self.this_order > 0 + + # here we will use dynamic extrapolation to update the model_output + with torch.enable_grad(): + if self.step_index > 1: + ratio_optim = self.find_optim_ratio(sample, ratio_initial=1.0) + else: + ratio_optim = 1.0 + self.dc_ratios.append(ratio_optim) + + # now update by dynamic compensation + if ratio_optim != 1.0: + self.model_outputs[-1] = self.dynamic_compensation(self.model_outputs, self.timestep_list, ratio=ratio_optim) + + prev_sample = self.multistep_uni_p_bh_update( + # model_output=model_output, # pass the original non-converted model output, in case solver-p is used + sample=sample, + order=self.this_order, + ) + self.last_sample = sample + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + def _step( + self, + model_output: torch.FloatTensor, + timestep: int, + sample: torch.FloatTensor, + return_dict: bool = True, + ) -> Union[SchedulerOutput, Tuple]: + """ + Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with + the multistep UniPC. + + Args: + model_output (`torch.FloatTensor`): + The direct output from learned diffusion model. + timestep (`int`): + The current discrete timestep in the diffusion chain. + sample (`torch.FloatTensor`): + A current instance of a sample created by the diffusion process. + return_dict (`bool`): + Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`. + + Returns: + [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`: + If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a + tuple is returned where the first element is the sample tensor. + + """ + if self.num_inference_steps is None: + raise ValueError( + "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler" + ) + + if self.step_index is None: + self._init_step_index(timestep) + + use_corrector = ( + self.step_index > 0 and self.step_index - 1 not in self.disable_corrector and self.last_sample is not None + ) + + model_output_convert = self.convert_model_output(model_output, sample=sample) + if use_corrector: + sample = self.multistep_uni_c_bh_update( + this_model_output=model_output_convert, + last_sample=self.last_sample, + this_sample=sample, + order=self.this_order, + ) + + for i in range(self.buffer_size - 1): + self.model_outputs[i] = self.model_outputs[i + 1] + self.timestep_list[i] = self.timestep_list[i + 1] + + self.model_outputs[-1] = model_output_convert + self.timestep_list[-1] = timestep + + if self.config.lower_order_final: + this_order = min(self.config.solver_order, len(self.timesteps) - self.step_index) + else: + this_order = self.config.solver_order + + self.this_order = min(this_order, self.lower_order_nums + 1) # warmup for multistep + assert self.this_order > 0 + + self.last_sample = sample + + # here we will use dynamic compensation to update the model_output + # dc_ratio = self.dc_ratios[self.step_index] + # if dc_ratio != 1.0: + # self.model_outputs[-1] = self.dynamic_compensation(self.model_outputs, self.timestep_list, dc_ratio) + + prev_sample = self.multistep_uni_p_bh_update( + model_output=model_output, # pass the original non-converted model output, in case solver-p is used + sample=sample, + order=self.this_order, + ) + + if self.lower_order_nums < self.config.solver_order: + self.lower_order_nums += 1 + + # upon completion increase step index by one + self._step_index += 1 + + if not return_dict: + return (prev_sample,) + + return SchedulerOutput(prev_sample=prev_sample) + + + def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor: + """ + Ensures interchangeability with schedulers that need to scale the denoising model input depending on the + current timestep. + + Args: + sample (`torch.FloatTensor`): + The input sample. + + Returns: + `torch.FloatTensor`: + A scaled input sample. + """ + return sample + + # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.add_noise + def add_noise( + self, + original_samples: torch.FloatTensor, + noise: torch.FloatTensor, + timesteps: torch.IntTensor, + ) -> torch.FloatTensor: + # Make sure sigmas and timesteps have the same device and dtype as original_samples + sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype) + if original_samples.device.type == "mps" and torch.is_floating_point(timesteps): + # mps does not support float64 + schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32) + timesteps = timesteps.to(original_samples.device, dtype=torch.float32) + else: + schedule_timesteps = self.timesteps.to(original_samples.device) + timesteps = timesteps.to(original_samples.device) + + step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps] + + sigma = sigmas[step_indices].flatten() + while len(sigma.shape) < len(original_samples.shape): + sigma = sigma.unsqueeze(-1) + + alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma) + noisy_samples = alpha_t * original_samples + sigma_t * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/devices.py b/modules/devices.py index 1d6ae3703..a006e1ca3 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -128,7 +128,7 @@ def get_device_for(task): return get_optimal_device() -def torch_gc(force=False): +def torch_gc(force=False, fast=False): t0 = time.time() mem = memstats.memory_stats() gpu = mem.get('gpu', {}) @@ -151,7 +151,7 @@ def torch_gc(force=False): return # actual gc - collected = gc.collect() # python gc + collected = gc.collect() if not fast else 0 # python gc if cuda_ok: try: with torch.cuda.device(get_cuda_device_string()): @@ -228,6 +228,11 @@ def test_bf16(): def set_cuda_params(): if debug: log.debug(f'Verifying Torch settings: cuda={cuda_ok}') + if backend == "ipex": + try: + torch.xpu.set_fp32_math_mode(mode=torch.xpu.FP32MathMode.TF32) + except Exception: + pass if cuda_ok: try: torch.backends.cuda.matmul.allow_tf32 = True diff --git a/modules/face/__init__.py b/modules/face/__init__.py index d1ded8c37..d5d51e647 100644 --- a/modules/face/__init__.py +++ b/modules/face/__init__.py @@ -72,7 +72,7 @@ def ui(self, _is_img2img): id_strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.01, value=1.0) id_conditioning = gr.Slider(label='Control', minimum=0.0, maximum=2.0, step=0.01, value=0.5) with gr.Row(visible=True): - id_cache = gr.Checkbox(label='Cache model', value=False) + id_cache = gr.Checkbox(label='Cache model', value=True) with gr.Group(visible=False) as cfg_photomaker: with gr.Row(): gr.HTML('  Tenecent ARC Lab PhotoMaker
') diff --git a/modules/gr_tempdir.py b/modules/gr_tempdir.py index 90a8f7376..0ee15b314 100644 --- a/modules/gr_tempdir.py +++ b/modules/gr_tempdir.py @@ -70,7 +70,7 @@ def pil_to_temp_file(self, img: Image, dir: str, format="png") -> str: # pylint: img.save(name, pnginfo=(metadata if use_metadata else None)) img.already_saved_as = name size = os.path.getsize(name) - shared.log.debug(f'Save temp: image="{name}" resolution={img.width}x{img.height} size={size}') + shared.log.debug(f'Save temp: image="{name}" width={img.width} height={img.height} size={size}') params = ', '.join([f'{k}: {v}' for k, v in img.info.items()]) params = params[12:] if params.startswith('parameters: ') else params with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: diff --git a/modules/images.py b/modules/images.py index bb194dfe2..6882e97f5 100644 --- a/modules/images.py +++ b/modules/images.py @@ -5,6 +5,7 @@ import math import json import uuid +import time import queue import string import random @@ -214,15 +215,13 @@ def draw_prompt_matrix(im, width, height, all_prompts, margin=0): return draw_grid_annotations(im, width, height, hor_texts, ver_texts, margin) -def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image'): - if im.width == width and im.height == height: - shared.log.debug(f'Image resize: input={im} target={width}x{height} mode={shared.resize_modes[resize_mode]} upscaler="{upscaler_name}" fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access +def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image', context=None): upscaler_name = upscaler_name or shared.opts.upscaler_for_img2img def latent(im, w, h, upscaler): from modules.processing_vae import vae_encode, vae_decode import torch - latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO enable full VAE mode + latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO enable full VAE mode for resize-latent latents = torch.nn.functional.interpolate(latents, size=(int(h // 8), int(w // 8)), mode=upscaler["mode"], antialias=upscaler["antialias"]) im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0] return im @@ -288,9 +287,40 @@ def fill(im, color=None): res.paste(im, box=((width - im.width)//2, (height - im.height)//2)) return res + def context_aware(im, width, height, context): + import seam_carving # https://github.com/li-plus/seam-carving + if 'forward' in context: + energy_mode = "forward" + elif 'backward' in context: + energy_mode = "backward" + else: + return im + if 'Add' in context: + src_ratio = min(width / im.width, height / im.height) + src_w = int(im.width * src_ratio) + src_h = int(im.height * src_ratio) + src_image = resize(im, src_w, src_h) + elif 'Remove' in context: + ratio = width / height + src_ratio = im.width / im.height + src_w = width if ratio > src_ratio else im.width * height // im.height + src_h = height if ratio <= src_ratio else im.height * width // im.width + src_image = resize(im, src_w, src_h) + else: + return im + res = Image.fromarray(seam_carving.resize( + src_image, # source image (rgb or gray) + size=(width, height), # target size + energy_mode=energy_mode, # choose from {backward, forward} + order="width-first", # choose from {width-first, height-first} + keep_mask=None, # object mask to protect from removal + )) + return res + + t0 = time.time() if resize_mode is None: resize_mode = 0 - if resize_mode == 0 or (im.width == width and im.height == height): # none + if resize_mode == 0 or (im.width == width and im.height == height) or (width == 0 and height == 0): # none res = im.copy() elif resize_mode == 1: # fixed res = resize(im, width, height) @@ -302,12 +332,14 @@ def fill(im, color=None): from modules import masking res = fill(im, color=0) res, _mask = masking.outpaint(res) + elif resize_mode == 5: # context-aware + res = context_aware(im, width, height, context) else: res = im.copy() shared.log.error(f'Invalid resize mode: {resize_mode}') - if output_type == 'np': - return np.array(res) - return res + t1 = time.time() + shared.log.debug(f'Image resize: input={im} width={width} height={height} mode="{shared.resize_modes[resize_mode]}" upscaler="{upscaler_name}" context="{context}" type={output_type} result={res} time={t1-t0:.2f} fn={sys._getframe(1).f_code.co_filename}:{sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access + return np.array(res) if output_type == 'np' else res re_nonletters = re.compile(r'[\s' + string.punctuation + ']+') @@ -596,7 +628,7 @@ def atomically_save_image(): shared.log.error(f'Save failed: file="{fn}" format={image_format} args={save_args} {e}') errors.display(e, 'Image save') size = os.path.getsize(fn) if os.path.exists(fn) else 0 - shared.log.info(f'Save: image="{fn}" type={image_format} resolution={image.width}x{image.height} size={size}') + shared.log.info(f'Save: image="{fn}" type={image_format} width={image.width} height={image.height} size={size}') if shared.opts.save_log_fn != '' and len(exifinfo) > 0: fn = os.path.join(paths.data_path, shared.opts.save_log_fn) if not fn.endswith('.json'): @@ -621,9 +653,9 @@ def save_image(image, path, basename='', seed=None, prompt=None, extension=share debug(f'Save: fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access if image is None: shared.log.warning('Image is none') - return None, None + return None, None, None if not check_grid_size([image]): - return None, None + return None, None, None if path is None or path == '': # set default path to avoid errors when functions are triggered manually or via api and param is not set path = shared.opts.outdir_save namegen = FilenameGenerator(p, seed, prompt, image, grid=grid) @@ -668,7 +700,7 @@ def save_image(image, path, basename='', seed=None, prompt=None, extension=share debug(f'Image marked: "{params.filename}"') params.image.already_saved_as = params.filename script_callbacks.image_saved_callback(params) - return params.filename, filename_txt + return params.filename, filename_txt, exifinfo def save_video_atomic(images, filename, video_type: str = 'none', duration: float = 2.0, loop: bool = False, interpolate: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3): @@ -719,7 +751,9 @@ def save_video(p, images, filename = None, video_type: str = 'none', duration: f return None image = images[0] if p is not None: - namegen = FilenameGenerator(p, seed=p.all_seeds[0], prompt=p.all_prompts[0], image=image) + seed = p.all_seeds[0] if getattr(p, 'all_seeds', None) is not None else p.seed + prompt = p.all_prompts[0] if getattr(p, 'all_prompts', None) is not None else p.prompt + namegen = FilenameGenerator(p, seed=seed, prompt=prompt, image=image) else: namegen = FilenameGenerator(None, seed=0, prompt='', image=image) if filename is None and p is not None: diff --git a/modules/img2img.py b/modules/img2img.py index 319d02171..4bf057d4a 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -103,6 +103,7 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args) for k, v in items.items(): image.info[k] = v images.save_image(image, path=output_dir, basename=basename, seed=None, prompt=None, extension=ext, info=geninfo, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=image.info, forced_filename=forced_filename) + proc = modules.scripts.scripts_img2img.after(p, proc, *args) shared.log.debug(f'Processed: images={len(batch_image_files)} memory={memory_stats()} batch') @@ -132,7 +133,7 @@ def img2img(id_task: str, mode: int, selected_scale_tab, height, width, scale_by, - resize_mode, resize_name, + resize_mode, resize_name, resize_context, inpaint_full_res, inpaint_full_res_padding, inpainting_mask_invert, img2img_batch_files, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio, @@ -143,7 +144,7 @@ def img2img(id_task: str, mode: int, shared.log.warning('Model not loaded') return [], '', '', 'Error: model not loaded' - debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}') + debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|resize_context={resize_context}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}') if mode == 5: if img2img_batch_files is None or len(img2img_batch_files) == 0: @@ -233,6 +234,8 @@ def img2img(id_task: str, mode: int, inpainting_fill=inpainting_fill, resize_mode=resize_mode, resize_name=resize_name, + resize_context=resize_context, + scale_by=scale_by, denoising_strength=denoising_strength, image_cfg_scale=image_cfg_scale, diffusers_guidance_rescale=diffusers_guidance_rescale, @@ -246,8 +249,6 @@ def img2img(id_task: str, mode: int, hdr_boundary=hdr_boundary, hdr_threshold=hdr_threshold, hdr_maximize=hdr_maximize, hdr_max_center=hdr_max_center, hdr_max_boundry=hdr_max_boundry, hdr_color_picker=hdr_color_picker, hdr_tint_ratio=hdr_tint_ratio, override_settings=override_settings, ) - if selected_scale_tab == 1 and resize_mode != 0: - p.scale_by = scale_by p.scripts = modules.scripts.scripts_img2img p.script_args = args if mask: diff --git a/modules/intel/ipex/__init__.py b/modules/intel/ipex/__init__.py index 38fc81c07..189dd07d0 100644 --- a/modules/intel/ipex/__init__.py +++ b/modules/intel/ipex/__init__.py @@ -66,45 +66,47 @@ def ipex_init(): # pylint: disable=too-many-statements torch.cuda.__file__ = torch.xpu.__file__ # torch.cuda.is_current_stream_capturing = torch.xpu.is_current_stream_capturing - if legacy: + if legacy: torch.cuda.os = torch.xpu.os torch.cuda.Device = torch.xpu.Device torch.cuda.warnings = torch.xpu.warnings torch.cuda.classproperty = torch.xpu.classproperty - torch.cuda._initialization_lock = torch.xpu.lazy_init._initialization_lock - torch.cuda._initialized = torch.xpu.lazy_init._initialized - torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork - torch.cuda._lazy_seed_tracker = torch.xpu.lazy_init._lazy_seed_tracker - torch.cuda._queued_calls = torch.xpu.lazy_init._queued_calls - torch.cuda._tls = torch.xpu.lazy_init._tls - torch.cuda.threading = torch.xpu.lazy_init.threading - torch.cuda.traceback = torch.xpu.lazy_init.traceback - torch.cuda._lazy_new = torch.xpu._lazy_new - torch.UntypedStorage.cuda = torch.UntypedStorage.xpu - torch.cuda.FloatTensor = torch.xpu.FloatTensor - torch.cuda.FloatStorage = torch.xpu.FloatStorage - torch.cuda.BFloat16Tensor = torch.xpu.BFloat16Tensor - torch.cuda.BFloat16Storage = torch.xpu.BFloat16Storage - torch.cuda.HalfTensor = torch.xpu.HalfTensor - torch.cuda.HalfStorage = torch.xpu.HalfStorage - torch.cuda.ByteTensor = torch.xpu.ByteTensor - torch.cuda.ByteStorage = torch.xpu.ByteStorage - torch.cuda.DoubleTensor = torch.xpu.DoubleTensor - torch.cuda.DoubleStorage = torch.xpu.DoubleStorage - torch.cuda.ShortTensor = torch.xpu.ShortTensor - torch.cuda.ShortStorage = torch.xpu.ShortStorage - torch.cuda.LongTensor = torch.xpu.LongTensor - torch.cuda.LongStorage = torch.xpu.LongStorage - torch.cuda.IntTensor = torch.xpu.IntTensor - torch.cuda.IntStorage = torch.xpu.IntStorage - torch.cuda.CharTensor = torch.xpu.CharTensor - torch.cuda.CharStorage = torch.xpu.CharStorage - torch.cuda.BoolTensor = torch.xpu.BoolTensor - torch.cuda.BoolStorage = torch.xpu.BoolStorage - torch.cuda.ComplexFloatStorage = torch.xpu.ComplexFloatStorage - torch.cuda.ComplexDoubleStorage = torch.xpu.ComplexDoubleStorage - else: + if not ipex.__version__.startswith("2.3"): + torch.cuda._initialization_lock = torch.xpu.lazy_init._initialization_lock + torch.cuda._initialized = torch.xpu.lazy_init._initialized + torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork + torch.cuda._lazy_seed_tracker = torch.xpu.lazy_init._lazy_seed_tracker + torch.cuda._queued_calls = torch.xpu.lazy_init._queued_calls + torch.cuda._tls = torch.xpu.lazy_init._tls + torch.cuda.threading = torch.xpu.lazy_init.threading + torch.cuda.traceback = torch.xpu.lazy_init.traceback + torch.cuda._lazy_new = torch.xpu._lazy_new + + torch.cuda.FloatTensor = torch.xpu.FloatTensor + torch.cuda.FloatStorage = torch.xpu.FloatStorage + torch.cuda.BFloat16Tensor = torch.xpu.BFloat16Tensor + torch.cuda.BFloat16Storage = torch.xpu.BFloat16Storage + torch.cuda.HalfTensor = torch.xpu.HalfTensor + torch.cuda.HalfStorage = torch.xpu.HalfStorage + torch.cuda.ByteTensor = torch.xpu.ByteTensor + torch.cuda.ByteStorage = torch.xpu.ByteStorage + torch.cuda.DoubleTensor = torch.xpu.DoubleTensor + torch.cuda.DoubleStorage = torch.xpu.DoubleStorage + torch.cuda.ShortTensor = torch.xpu.ShortTensor + torch.cuda.ShortStorage = torch.xpu.ShortStorage + torch.cuda.LongTensor = torch.xpu.LongTensor + torch.cuda.LongStorage = torch.xpu.LongStorage + torch.cuda.IntTensor = torch.xpu.IntTensor + torch.cuda.IntStorage = torch.xpu.IntStorage + torch.cuda.CharTensor = torch.xpu.CharTensor + torch.cuda.CharStorage = torch.xpu.CharStorage + torch.cuda.BoolTensor = torch.xpu.BoolTensor + torch.cuda.BoolStorage = torch.xpu.BoolStorage + torch.cuda.ComplexFloatStorage = torch.xpu.ComplexFloatStorage + torch.cuda.ComplexDoubleStorage = torch.xpu.ComplexDoubleStorage + + if not legacy or ipex.__version__.startswith("2.3"): torch.cuda._initialization_lock = torch.xpu._initialization_lock torch.cuda._initialized = torch.xpu._initialized torch.cuda._is_in_bad_fork = torch.xpu._is_in_bad_fork @@ -152,8 +154,9 @@ def ipex_init(): # pylint: disable=too-many-statements torch.xpu.amp.custom_fwd = torch.cuda.amp.custom_fwd torch.xpu.amp.custom_bwd = torch.cuda.amp.custom_bwd torch.cuda.amp = torch.xpu.amp - torch.is_autocast_enabled = torch.xpu.is_autocast_xpu_enabled - torch.get_autocast_gpu_dtype = torch.xpu.get_autocast_xpu_dtype + if not ipex.__version__.startswith("2.3"): + torch.is_autocast_enabled = torch.xpu.is_autocast_xpu_enabled + torch.get_autocast_gpu_dtype = torch.xpu.get_autocast_xpu_dtype if not hasattr(torch.cuda.amp, "common"): torch.cuda.amp.common = contextlib.nullcontext() @@ -170,11 +173,16 @@ def ipex_init(): # pylint: disable=too-many-statements torch.cuda.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler # C - if legacy: + if legacy and not ipex.__version__.startswith("2.3"): torch._C._cuda_getCurrentRawStream = ipex._C._getCurrentRawStream ipex._C._DeviceProperties.multi_processor_count = ipex._C._DeviceProperties.gpu_subslice_count - ipex._C._DeviceProperties.major = 2024 + ipex._C._DeviceProperties.major = 12 ipex._C._DeviceProperties.minor = 1 + else: + torch._C._cuda_getCurrentRawStream = torch._C._xpu_getCurrentRawStream + torch._C._XpuDeviceProperties.multi_processor_count = torch._C._XpuDeviceProperties.gpu_subslice_count + torch._C._XpuDeviceProperties.major = 12 + torch._C._XpuDeviceProperties.minor = 1 # Fix functions with ipex: torch.cuda.mem_get_info = lambda device=None: [(torch.xpu.get_device_properties(device).total_memory - torch.xpu.memory_reserved(device)), torch.xpu.get_device_properties(device).total_memory] diff --git a/modules/intel/ipex/hijacks.py b/modules/intel/ipex/hijacks.py index d8ee00e28..e5a71f5e2 100644 --- a/modules/intel/ipex/hijacks.py +++ b/modules/intel/ipex/hijacks.py @@ -26,7 +26,7 @@ def check_device(device): return bool((isinstance(device, torch.device) and device.type == "cuda") or (isinstance(device, str) and "cuda" in device) or isinstance(device, int)) def return_xpu(device): - return f"xpu:{device.split(':')[-1]}" if isinstance(device, str) and ":" in device else f"xpu:{device}" if isinstance(device, int) else torch.device(devices.device) if isinstance(device, torch.device) else devices.device + return f"xpu:{device.split(':')[-1]}" if isinstance(device, str) and ":" in device else f"xpu:{device}" if isinstance(device, int) else torch.device(f"xpu:{device.index}" if device.index is not None else "xpu") if isinstance(device, torch.device) else "xpu" # Autocast diff --git a/modules/intel/openvino/__init__.py b/modules/intel/openvino/__init__.py index c2af24637..975a97672 100644 --- a/modules/intel/openvino/__init__.py +++ b/modules/intel/openvino/__init__.py @@ -12,6 +12,7 @@ from torch._dynamo.backends.registry import register_backend from torch.fx.experimental.proxy_tensor import make_fx from torch.fx import GraphModule +from torch._subclasses.fake_tensor import FakeTensorMode from torch.utils._pytree import tree_flatten from types import MappingProxyType @@ -21,26 +22,6 @@ from modules import shared, devices, sd_models -def BUILD_MAP_UNPACK(self, inst): - items = self.popn(inst.argval) - # ensure everything is a dict - items = [BuiltinVariable(dict).call_function(self, [x], {}) for x in items] # noqa: F821 - result = dict() - for x in items: - assert isinstance(x, ConstDictVariable) # noqa: F821 - result.update(x.items) - self.push( - ConstDictVariable( # noqa: F821 - result, - dict, - mutable_local=MutableLocal(), # noqa: F821 - **VariableTracker.propagate(items), # noqa: F821 - ) - ) -tmp_torch = sys.modules["torch"] -tmp_torch.BUILD_MAP_UNPACK_WITH_CALL = BUILD_MAP_UNPACK -max_openvino_partitions = 0 - DEFAULT_OPENVINO_PYTHON_CONFIG = MappingProxyType( { "use_python_fusion_cache": True, @@ -48,6 +29,7 @@ def BUILD_MAP_UNPACK(self, inst): }, ) + class OpenVINOGraphModule(torch.nn.Module): def __init__(self, gm, partition_id, use_python_fusion_cache, model_hash_str: str = None, file_name=""): super().__init__() @@ -61,10 +43,12 @@ def __call__(self, *args): result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id, file_name=self.file_name) return result + def get_device_list(): core = Core() return core.available_devices + def get_device(): if hasattr(shared, "opts") and len(shared.opts.openvino_devices) == 1: return shared.opts.openvino_devices[0] @@ -96,6 +80,7 @@ def get_device(): shared.log.warning(f"OpenVINO: No compatible GPU detected! Using {device}") return device + def get_openvino_device(): core = Core() try: @@ -103,6 +88,7 @@ def get_openvino_device(): except Exception: return f"OpenVINO {get_device()}" + def cached_model_name(model_hash_str, device, args, cache_root, reversed = False): if model_hash_str is None: return None @@ -120,9 +106,14 @@ def cached_model_name(model_hash_str, device, args, cache_root, reversed = False for input_data in args: if isinstance(input_data, torch.SymInt): if reversed: - inputs_str = "_" + "torch.SymInt" + inputs_str + inputs_str = "_" + "torch.SymInt1" + inputs_str else: inputs_str += "_" + "torch.SymInt1" + elif isinstance(input_data, int): + if reversed: + inputs_str = "_" + "int" + inputs_str + else: + inputs_str += "_" + "int" else: if reversed: inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str @@ -133,18 +124,6 @@ def cached_model_name(model_hash_str, device, args, cache_root, reversed = False return file_name -def check_fully_supported(self, graph_module: GraphModule) -> bool: - num_fused = 0 - for node in graph_module.graph.nodes: - if node.op == "call_module" and "fused_" in node.name: - num_fused += 1 - elif node.op != "placeholder" and node.op != "output": - return False - if num_fused == 1: - return True - return False - -Partitioner.check_fully_supported = functools.partial(check_fully_supported, Partitioner) def execute( gm, @@ -161,6 +140,7 @@ def execute( msg = "Received unexpected value for 'executor': {0}. Allowed values are: openvino, strictly_openvino.".format(executor) raise ValueError(msg) + def execute_cached(compiled_model, *args): flat_args, _ = tree_flatten(args) ov_inputs = [a.detach().cpu().numpy() for a in flat_args] @@ -192,18 +172,21 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non for input_data in example_inputs: if isinstance(input_data, torch.SymInt): input_types.append(torch.SymInt) - input_shapes.append(1) + input_shapes.append(torch.Size([1])) + elif isinstance(input_data, int): + input_types.append(torch.int64) + input_shapes.append(torch.Size([1])) else: input_types.append(input_data.type()) input_shapes.append(input_data.size()) - decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types) + decoder = TorchFXPythonDecoder(gm, input_shapes=input_shapes, input_types=input_types) im = fe.load(decoder) om = fe.convert(im) - if (file_name is not None): + if file_name is not None: serialize(om, file_name + ".xml", file_name + ".bin") if (shared.compiled_model_state.cn_model != []): f = open(file_name + ".txt", "w") @@ -224,8 +207,12 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non } for idx, input_data in enumerate(example_inputs): - om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype]) - om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape))) + if isinstance(input_data, int): + om.inputs[idx].get_node().set_element_type(dtype_mapping[torch.int64]) + om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([1])))) + else: + om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype]) + om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape))) om.validate_nodes_and_infer_types() if shared.opts.nncf_quantize and not dont_use_quant: @@ -256,6 +243,7 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non compiled_model = core.compile_model(om, device) return compiled_model + def openvino_compile_cached_model(cached_model_path, *example_inputs): core = Core() om = core.read_model(cached_model_path + ".xml") @@ -306,6 +294,7 @@ def openvino_compile_cached_model(cached_model_path, *example_inputs): compiled_model = core.compile_model(om, get_device()) return compiled_model + def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition_id, file_name=""): executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG @@ -320,6 +309,7 @@ def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition if use_cache and (partition_id in shared.compiled_model_state.compiled_cache): compiled = shared.compiled_model_state.compiled_cache[partition_id] + req = shared.compiled_model_state.req_cache[partition_id] else: if (shared.compiled_model_state.cn_model != [] and file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin")): @@ -327,17 +317,22 @@ def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition else: compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str, file_name=file_name) shared.compiled_model_state.compiled_cache[partition_id] = compiled + req = compiled.create_infer_request() + shared.compiled_model_state.req_cache[partition_id] = req flat_args, _ = tree_flatten(args) - ov_inputs = [a.detach().cpu().numpy() for a in flat_args] + ov_inputs = [] + for arg in flat_args: + ov_inputs.append((arg if isinstance(arg, int) else arg.detach().cpu().numpy())) - res = compiled(ov_inputs) + res = req.infer(ov_inputs, share_inputs=True, share_outputs=True) results1 = [torch.from_numpy(res[out]) for out in compiled.outputs] if len(results1) == 1: return results1[0] return results1 + def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=None, file_name=""): executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG @@ -360,8 +355,8 @@ def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=Non return shared.compiled_model_state.partitioned_modules[signature](*args) + def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_str: str = None, file_name=""): - global max_openvino_partitions for node in gm.graph.nodes: if node.op == "call_module" and "fused_" in node.name: openvino_submodule = getattr(gm, node.name) @@ -375,16 +370,19 @@ def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_s return gm + def generate_subgraph_str(tensor): if hasattr(tensor, "weight"): shared.compiled_model_state.model_hash_str = shared.compiled_model_state.model_hash_str + sha256(str(tensor.weight).encode('utf-8')).hexdigest() return tensor + def get_subgraph_type(tensor): global subgraph_type subgraph_type.append(type(tensor)) return tensor + @register_backend @fake_tensor_unsupported def openvino_fx(subgraph, example_inputs): @@ -483,14 +481,15 @@ def _call(*args): if inputs_reversed: example_inputs.reverse() - model = make_fx(subgraph)(*example_inputs) + with FakeTensorMode(allow_non_fake_inputs=True): + model = make_fx(subgraph)(*example_inputs) for node in model.graph.nodes: if node.target == torch.ops.aten.mul_.Tensor: node.target = torch.ops.aten.mul.Tensor with devices.inference_context(): model.eval() - partitioner = Partitioner() - compiled_model = partitioner.make_partitions(model) + partitioner = Partitioner(options=None) + compiled_model = partitioner.make_partitions(model, options=None) if executor_parameters is not None and 'model_hash_str' in executor_parameters: # Check if the model is fully supported. @@ -499,7 +498,6 @@ def _call(*args): executor_parameters["model_hash_str"] += "_fs" def _call(*args): - res = execute(compiled_model, *args, executor="openvino", - executor_parameters=executor_parameters, file_name=maybe_fs_cached_name) + res = execute(compiled_model, *args, executor="openvino", executor_parameters=executor_parameters, file_name=maybe_fs_cached_name) return res return _call diff --git a/modules/loader.py b/modules/loader.py index e7cb03339..0aad4d76b 100644 --- a/modules/loader.py +++ b/modules/loader.py @@ -27,6 +27,10 @@ warnings.filterwarnings(action="ignore", category=DeprecationWarning) warnings.filterwarnings(action="ignore", category=FutureWarning) warnings.filterwarnings(action="ignore", category=UserWarning, module="torchvision") +try: + torch._logging.set_logs(all=logging.ERROR, bytecode=False, aot_graphs=False, aot_joint_graph=False, ddp_graphs=False, graph=False, graph_code=False, graph_breaks=False, graph_sizes=False, guards=False, recompiles=False, recompiles_verbose=False, trace_source=False, trace_call=False, trace_bytecode=False, output_code=False, kernel_code=False, schedule=False, perf_hints=False, post_grad_graphs=False, onnx_diagnostics=False, fusion=False, overlap=False, export=None, modules=None, cudagraphs=False, sym_node=False, compiled_autograd_verbose=False) # pylint: disable=protected-access +except Exception: + pass if ".dev" in torch.__version__ or "+git" in torch.__version__: torch.__long_version__ = torch.__version__ torch.__version__ = re.search(r'[\d.]+[\d]', torch.__version__).group(0) @@ -49,10 +53,33 @@ import diffusers # pylint: disable=W0611,C0411 import diffusers.loaders.single_file # pylint: disable=W0611,C0411 +import huggingface_hub # pylint: disable=W0611,C0411 logging.getLogger("diffusers.loaders.single_file").setLevel(logging.ERROR) +timer.startup.record("diffusers") + + +# patch different progress bars +import tqdm as tqdm_lib # pylint: disable=C0411 from tqdm.rich import tqdm # pylint: disable=W0611,C0411 diffusers.loaders.single_file.logging.tqdm = partial(tqdm, unit='C') -timer.startup.record("diffusers") + +class _tqdm_cls(): + def __call__(self, *args, **kwargs): + bar_format = 'Diffusers {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m' + '{desc}' + '\x1b[0m' + return tqdm_lib.tqdm(*args, bar_format=bar_format, ncols=80, colour='#327fba', **kwargs) + +class _tqdm_old(tqdm_lib.tqdm): + def __init__(self, *args, **kwargs): + kwargs.pop("name", None) + kwargs['bar_format'] = 'Diffusers {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m' + '{desc}' + '\x1b[0m' + kwargs['ncols'] = 80 + super().__init__(*args, **kwargs) + + +transformers.utils.logging.tqdm = _tqdm_cls() +diffusers.pipelines.pipeline_utils.logging.tqdm = _tqdm_cls() +huggingface_hub._snapshot_download.hf_tqdm = _tqdm_old # pylint: disable=protected-access + def get_packages(): return { diff --git a/modules/model_auraflow.py b/modules/model_auraflow.py index 344e6558a..6f18bf13c 100644 --- a/modules/model_auraflow.py +++ b/modules/model_auraflow.py @@ -1,15 +1,17 @@ +import os import torch import diffusers +from modules import shared, sd_models, devices -repo_id = 'fal/AuraFlow' +debug = shared.log.trace if os.environ.get('SD_LOAD_DEBUG', None) is not None else lambda *args, **kwargs: None -def load_auraflow(_checkpoint_info, diffusers_load_config={}): - from modules import shared, devices +def load_auraflow(checkpoint_info, diffusers_load_config={}): + repo_id = sd_models.path_to_repo(checkpoint_info.name) if 'torch_dtype' not in diffusers_load_config: diffusers_load_config['torch_dtype'] = torch.float16 - + debug(f'Loading AuraFlow: repo="{repo_id}" config={diffusers_load_config}') pipe = diffusers.AuraFlowPipeline.from_pretrained( repo_id, cache_dir = shared.opts.diffusers_dir, diff --git a/modules/model_flux.py b/modules/model_flux.py index 4940a659e..0fd4e39ca 100644 --- a/modules/model_flux.py +++ b/modules/model_flux.py @@ -5,109 +5,246 @@ import transformers from safetensors.torch import load_file from huggingface_hub import hf_hub_download -from modules import shared, devices - - -def load_quanto_transformer(checkpoint_info): - from optimum.quanto import requantize # pylint: disable=no-name-in-module - repo_path = checkpoint_info.path - quantization_map = os.path.join(repo_path, "transformer", "quantization_map.json") - if not os.path.exists(quantization_map): - repo_id = checkpoint_info.name.replace('Diffusers/', '') - quantization_map = hf_hub_download(repo_id, subfolder='transformer', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir) - with open(quantization_map, "r", encoding='utf8') as f: - quantization_map = json.load(f) - state_dict = load_file(os.path.join(repo_path, "transformer", "diffusion_pytorch_model.safetensors")) - dtype = state_dict['context_embedder.bias'].dtype - with torch.device("meta"): - transformer = diffusers.FluxTransformer2DModel.from_config(os.path.join(repo_path, "transformer", "config.json")).to(dtype=dtype) - requantize(transformer, state_dict, quantization_map, device=torch.device("cpu")) - transformer.eval() - return transformer +from modules import shared, devices, modelloader, sd_models + + +debug = shared.log.trace if os.environ.get('SD_LOAD_DEBUG', None) is not None else lambda *args, **kwargs: None + + +def get_quant(file_path): + if "qint8" in file_path.lower(): + return 'qint8' + if "qint4" in file_path.lower(): + return 'qint4' + if "fp8" in file_path.lower(): + return 'fp8' + if "fp4" in file_path.lower(): + return 'fp4' + if "nf4" in file_path.lower(): + return 'nf4' + return 'none' + + +def load_flux_quanto(checkpoint_info): + transformer, text_encoder_2 = None, None + from installer import install + install('optimum-quanto', quiet=True) + try: + from optimum import quanto # pylint: disable=no-name-in-module + from optimum.quanto import requantize # pylint: disable=no-name-in-module + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to import optimum-quanto: {e}") + raise + quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs) + + if isinstance(checkpoint_info, str): + repo_path = checkpoint_info + else: + repo_path = checkpoint_info.path + + try: + quantization_map = os.path.join(repo_path, "transformer", "quantization_map.json") + debug(f'Loading FLUX: quantization map="{quantization_map}" repo="{checkpoint_info.name}" component="transformer"') + if not os.path.exists(quantization_map): + repo_id = sd_models.path_to_repo(checkpoint_info.name) + quantization_map = hf_hub_download(repo_id, subfolder='transformer', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir) + with open(quantization_map, "r", encoding='utf8') as f: + quantization_map = json.load(f) + state_dict = load_file(os.path.join(repo_path, "transformer", "diffusion_pytorch_model.safetensors")) + dtype = state_dict['context_embedder.bias'].dtype + with torch.device("meta"): + transformer = diffusers.FluxTransformer2DModel.from_config(os.path.join(repo_path, "transformer", "config.json")).to(dtype=dtype) + requantize(transformer, state_dict, quantization_map, device=torch.device("cpu")) + transformer.eval() + if transformer.dtype != devices.dtype: + try: + transformer = transformer.to(dtype=devices.dtype) + except Exception: + shared.log.error(f"Loading FLUX: Failed to cast transformer to {devices.dtype}, set dtype to {transformer.dtype}") + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to load Quanto transformer: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX Quanto:') + + try: + quantization_map = os.path.join(repo_path, "text_encoder_2", "quantization_map.json") + debug(f'Loading FLUX: quantization map="{quantization_map}" repo="{checkpoint_info.name}" component="text_encoder_2"') + if not os.path.exists(quantization_map): + repo_id = sd_models.path_to_repo(checkpoint_info.name) + quantization_map = hf_hub_download(repo_id, subfolder='text_encoder_2', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir) + with open(quantization_map, "r", encoding='utf8') as f: + quantization_map = json.load(f) + with open(os.path.join(repo_path, "text_encoder_2", "config.json"), encoding='utf8') as f: + t5_config = transformers.T5Config(**json.load(f)) + state_dict = load_file(os.path.join(repo_path, "text_encoder_2", "model.safetensors")) + dtype = state_dict['encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight'].dtype + with torch.device("meta"): + text_encoder_2 = transformers.T5EncoderModel(t5_config).to(dtype=dtype) + requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cpu")) + text_encoder_2.eval() + if text_encoder_2.dtype != devices.dtype: + try: + text_encoder_2 = text_encoder_2.to(dtype=devices.dtype) + except Exception: + shared.log.error(f"Loading FLUX: Failed to cast text encoder to {devices.dtype}, set dtype to {text_encoder_2.dtype}") + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to load Quanto text encoder: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX Quanto:') + + return transformer, text_encoder_2 -def load_quanto_text_encoder_2(checkpoint_info): - from optimum.quanto import requantize # pylint: disable=no-name-in-module - repo_path = checkpoint_info.path - quantization_map = os.path.join(repo_path, "text_encoder_2", "quantization_map.json") - if not os.path.exists(quantization_map): - repo_id = checkpoint_info.name.replace('Diffusers/', '') - quantization_map = hf_hub_download(repo_id, subfolder='text_encoder_2', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir) - with open(quantization_map, "r", encoding='utf8') as f: - quantization_map = json.load(f) - with open(os.path.join(repo_path, "text_encoder_2", "config.json"), encoding='utf8') as f: - t5_config = transformers.T5Config(**json.load(f)) - state_dict = load_file(os.path.join(repo_path, "text_encoder_2", "model.safetensors")) - dtype = state_dict['encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight'].dtype - with torch.device("meta"): - text_encoder_2 = transformers.T5EncoderModel(t5_config).to(dtype=dtype) - requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cpu")) - text_encoder_2.eval() - return text_encoder_2 - - -def load_transformer(file_path): +def load_flux_bnb(checkpoint_info, diffusers_load_config): # pylint: disable=unused-argument + transformer, text_encoder_2 = None, None + if isinstance(checkpoint_info, str): + repo_path = checkpoint_info + else: + repo_path = checkpoint_info.path + from installer import install + install('bitsandbytes', quiet=True) + from diffusers import FluxTransformer2DModel + quant = get_quant(repo_path) + if quant == 'fp8': + quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) + if transformer is None: + transformer = FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config, quantization_config=quantization_config) + elif quant == 'fp4': + quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True) + if transformer is None: + transformer = FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config, quantization_config=quantization_config) + else: + if transformer is None: + transformer = FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config) + return transformer, text_encoder_2 + + +def load_transformer(file_path): # triggered by opts.sd_unet change + transformer = None + quant = get_quant(file_path) diffusers_load_config = { "low_cpu_mem_usage": True, "torch_dtype": devices.dtype, "cache_dir": shared.opts.hfcache_dir, } - from diffusers import FluxTransformer2DModel - transformer = FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config) + shared.log.info(f'Loading UNet: type=FLUX file="{file_path}" offload={shared.opts.diffusers_offload_mode} quant={quant} dtype={devices.dtype}') + if 'nf4' in quant: + from modules.model_flux_nf4 import load_flux_nf4 + _transformer, _text_encoder_2 = load_flux_nf4(file_path) + if _transformer is not None: + transformer = _transformer + elif quant == 'qint8' or quant == 'qint4': + _transformer, _text_encoder_2 = load_flux_quanto(file_path) + if _transformer is not None: + transformer = _transformer + elif quant == 'fp8' or quant == 'fp4': + _transformer, _text_encoder_2 = load_flux_bnb(file_path, diffusers_load_config) + if _transformer is not None: + transformer = _transformer + else: + from diffusers import FluxTransformer2DModel + transformer = FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config) if transformer is None: shared.log.error('Failed to load UNet model') return transformer -def load_flux(checkpoint_info, diffusers_load_config): - if "qint8" in checkpoint_info.path.lower(): - quant = 'qint8' - elif "qint4" in checkpoint_info.path.lower(): - quant = 'qint4' - elif "nf4" in checkpoint_info.path.lower(): - quant = 'nf4' - else: - quant = None - shared.log.debug(f'Loading FLUX: model="{checkpoint_info.name}" quant={quant}') +def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_checkpoint change + quant = get_quant(checkpoint_info.path) + repo_id = sd_models.path_to_repo(checkpoint_info.name) + shared.log.debug(f'Loading FLUX: model="{checkpoint_info.name}" repo="{repo_id}" unet="{shared.opts.sd_unet}" t5="{shared.opts.sd_text_encoder}" vae="{shared.opts.sd_vae}" quant={quant} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype}') + debug(f'Loading FLUX: config={diffusers_load_config}') + modelloader.hf_login() + + transformer = None + text_encoder_2 = None + vae = None + + # load overrides if any + if shared.opts.sd_unet != 'None': + try: + debug(f'Loading FLUX: unet="{shared.opts.sd_unet}"') + from modules import sd_unet + _transformer = load_transformer(sd_unet.unet_dict[shared.opts.sd_unet]) + if _transformer is not None: + transformer = _transformer + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to load UNet: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX UNet:') + if shared.opts.sd_text_encoder != 'None': + try: + debug(f'Loading FLUX: t5="{shared.opts.sd_text_encoder}"') + from modules.model_t5 import load_t5 + _text_encoder_2 = load_t5(t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir) + if _text_encoder_2 is not None: + text_encoder_2 = _text_encoder_2 + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to load T5: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX T5:') + if shared.opts.sd_vae != 'None' and shared.opts.sd_vae != 'Automatic': + try: + debug(f'Loading FLUX: vae="{shared.opts.sd_vae}"') + from modules import sd_vae + # vae = sd_vae.load_vae_diffusers(None, sd_vae.vae_dict[shared.opts.sd_vae], 'override') + vae_file = sd_vae.vae_dict[shared.opts.sd_vae] + if os.path.exists(vae_file): + vae_config = os.path.join('configs', 'flux', 'vae', 'config.json') + vae = diffusers.AutoencoderKL.from_single_file(vae_file, config=vae_config, **diffusers_load_config) + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to load VAE: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX VAE:') + + # load quantized components if any if quant == 'nf4': - from installer import install - install('bitsandbytes', quiet=True) try: - import bitsandbytes # pylint: disable=unused-import + from modules.model_flux_nf4 import load_flux_nf4 + _transformer, _text_encoder = load_flux_nf4(checkpoint_info) + if _transformer is not None: + transformer = _transformer + if _text_encoder is not None: + text_encoder_2 = _text_encoder except Exception as e: - shared.log.error(f"FLUX: Failed to import bitsandbytes: {e}") - raise - from modules.model_flux_nf4 import load_flux_nf4 - pipe = load_flux_nf4(checkpoint_info, diffusers_load_config) - elif quant == 'qint8' or quant == 'qint4': - from installer import install - install('optimum-quanto', quiet=True) + shared.log.error(f"Loading FLUX: Failed to load NF4 components: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX NF4:') + if quant == 'qint8' or quant == 'qint4': try: - from optimum import quanto # pylint: disable=no-name-in-module + _transformer, _text_encoder = load_flux_quanto(checkpoint_info) + if _transformer is not None: + transformer = _transformer + if _text_encoder is not None: + text_encoder_2 = _text_encoder except Exception as e: - shared.log.error(f"FLUX: Failed to import optimum-quanto: {e}") - raise - quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs) - pipe = diffusers.FluxPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, transformer=None, text_encoder_2=None, **diffusers_load_config) - pipe.transformer = load_quanto_transformer(checkpoint_info) - pipe.text_encoder_2 = load_quanto_text_encoder_2(checkpoint_info) - if pipe.transformer.dtype != devices.dtype: - try: - pipe.transformer = pipe.transformer.to(dtype=devices.dtype) - except Exception: - shared.log.error(f"FLUX: Failed to cast transformer to {devices.dtype}, set dtype to {pipe.transformer.dtype}") - raise - if pipe.text_encoder_2.dtype != devices.dtype: - try: - pipe.text_encoder_2 = pipe.text_encoder_2.to(dtype=devices.dtype) - except Exception: - shared.log.error(f"FLUX: Failed to cast text encoder to {devices.dtype}, set dtype to {pipe.text_encoder_2.dtype}") - raise - else: - pipe = diffusers.FluxPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) - if devices.dtype == torch.float16 and not shared.opts.no_half_vae: - shared.log.warning("FLUX: does not support FP16 VAE, enabling no-half-vae") - shared.opts.no_half_vae = True - # from accelerate.utils import compute_module_sizes - # shared.log.debug(f'FLUX computed size: {round(compute_module_sizes(pipe.transformer)[""] / 1024 / 1204)}') + shared.log.error(f"Loading FLUX: Failed to load Quanto components: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX Quanto:') + + # initialize pipeline with pre-loaded components + components = {} + if transformer is not None: + components['transformer'] = transformer + if text_encoder_2 is not None: + components['text_encoder_2'] = text_encoder_2 + if vae is not None: + components['vae'] = vae + shared.log.debug(f'Loading FLUX: preloaded={list(components)}') + if repo_id == 'sayakpaul/flux.1-dev-nf4': + repo_id = 'black-forest-labs/FLUX.1-dev' # workaround since sayakpaul model is missing model_index.json + pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **components, **diffusers_load_config) + try: + diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["flux"] = diffusers.FluxPipeline + diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = diffusers.FluxImg2ImgPipeline + diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["flux"] = diffusers.FluxInpaintPipeline + except Exception: + pass return pipe diff --git a/modules/model_flux_nf4.py b/modules/model_flux_nf4.py index a28dc3526..1644a667a 100644 --- a/modules/model_flux_nf4.py +++ b/modules/model_flux_nf4.py @@ -5,17 +5,31 @@ import os import torch import torch.nn as nn -import bitsandbytes as bnb from transformers.quantizers.quantizers_utils import get_module_from_name from huggingface_hub import hf_hub_download from accelerate import init_empty_weights from accelerate.utils import set_module_tensor_to_device -from diffusers import FluxTransformer2DModel, FluxPipeline from diffusers.loaders.single_file_utils import convert_flux_transformer_checkpoint_to_diffusers import safetensors.torch from modules import shared, devices +bnb = None +debug = os.environ.get('SD_LOAD_DEBUG', None) is not None + + +def load_bnb(): + from installer import install + install('bitsandbytes', quiet=True) + try: + import bitsandbytes + global bnb # pylint: disable=global-statement + bnb = bitsandbytes + except Exception as e: + shared.log.error(f"Loading FLUX: Failed to import bitsandbytes: {e}") + raise + + def _replace_with_bnb_linear( model, method="nf4", @@ -148,32 +162,41 @@ def create_quantized_param( module._parameters[tensor_name] = new_value # pylint: disable=protected-access -def load_flux_nf4(checkpoint_info, diffusers_load_config): - repo_path = checkpoint_info.path +def load_flux_nf4(checkpoint_info): + load_bnb() + transformer = None + text_encoder_2 = None + if isinstance(checkpoint_info, str): + repo_path = checkpoint_info + else: + repo_path = checkpoint_info.path if os.path.exists(repo_path) and os.path.isfile(repo_path): ckpt_path = repo_path - if os.path.exists(repo_path) and os.path.isdir(repo_path) and os.path.exists(os.path.join(repo_path, "diffusion_pytorch_model.safetensors")): + elif os.path.exists(repo_path) and os.path.isdir(repo_path) and os.path.exists(os.path.join(repo_path, "diffusion_pytorch_model.safetensors")): ckpt_path = os.path.join(repo_path, "diffusion_pytorch_model.safetensors") else: ckpt_path = hf_hub_download(repo_path, filename="diffusion_pytorch_model.safetensors", cache_dir=shared.opts.diffusers_dir) original_state_dict = safetensors.torch.load_file(ckpt_path) - if 'sayakpaul' in checkpoint_info.path: + if 'sayakpaul' in repo_path: converted_state_dict = original_state_dict # already converted else: try: converted_state_dict = convert_flux_transformer_checkpoint_to_diffusers(original_state_dict) except Exception as e: - from modules import errors - errors.display(e, 'FLUX convert:') - raise + shared.log.error(f"Loading FLUX: Failed to convert UNET: {e}") + if debug: + from modules import errors + errors.display(e, 'FLUX convert:') + converted_state_dict = original_state_dict with init_empty_weights(): - config = FluxTransformer2DModel.load_config("black-forest-labs/flux.1-dev", subfolder="transformer") - model = FluxTransformer2DModel.from_config(config).to(devices.dtype) - expected_state_dict_keys = list(model.state_dict().keys()) + from diffusers import FluxTransformer2DModel + config = FluxTransformer2DModel.load_config(os.path.join('configs', 'flux'), subfolder="transformer") + transformer = FluxTransformer2DModel.from_config(config).to(devices.dtype) + expected_state_dict_keys = list(transformer.state_dict().keys()) - _replace_with_bnb_linear(model, "nf4") + _replace_with_bnb_linear(transformer, "nf4") for param_name, param in converted_state_dict.items(): if param_name not in expected_state_dict_keys: @@ -181,12 +204,11 @@ def load_flux_nf4(checkpoint_info, diffusers_load_config): is_param_float8_e4m3fn = hasattr(torch, "float8_e4m3fn") and param.dtype == torch.float8_e4m3fn if torch.is_floating_point(param) and not is_param_float8_e4m3fn: param = param.to(devices.dtype) - if not check_quantized_param(model, param_name): - set_module_tensor_to_device(model, param_name, device=0, value=param) + if not check_quantized_param(transformer, param_name): + set_module_tensor_to_device(transformer, param_name, device=0, value=param) else: - create_quantized_param(model, param, param_name, target_device=0, state_dict=original_state_dict, pre_quantized=True) + create_quantized_param(transformer, param, param_name, target_device=0, state_dict=original_state_dict, pre_quantized=True) del original_state_dict - pipe = FluxPipeline.from_pretrained("black-forest-labs/flux.1-dev", transformer=model, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config) devices.torch_gc(force=True) - return pipe + return transformer, text_encoder_2 diff --git a/modules/model_t5.py b/modules/model_t5.py index 2e1ff06c9..4d082d2d4 100644 --- a/modules/model_t5.py +++ b/modules/model_t5.py @@ -1,67 +1,60 @@ +import os +import json import torch import transformers +from safetensors.torch import load_file +from modules import shared, devices, files_cache +from installer import install + + +t5_dict = {} def load_t5(t5=None, cache_dir=None): - from modules import devices, modelloader + from modules import modelloader + modelloader.hf_login() repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers' - if 'fp16' in t5.lower(): - modelloader.hf_login() - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - # torch_dtype=dtype, - cache_dir=cache_dir, - torch_dtype=devices.dtype, - ) + fn = t5_dict.get(t5) if t5 in t5_dict else None + if fn is not None: + from accelerate.utils import set_module_tensor_to_device + with open(os.path.join('configs', 'flux', 'text_encoder_2', 'config.json'), encoding='utf8') as f: + t5_config = transformers.T5Config(**json.load(f)) + state_dict = load_file(fn) + dtype = state_dict['encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight'].dtype + with torch.device("meta"): + t5 = transformers.T5EncoderModel(t5_config).to(dtype=dtype) + for param_name, param in state_dict.items(): + is_param_float8_e4m3fn = hasattr(torch, "float8_e4m3fn") and param.dtype == torch.float8_e4m3fn + if torch.is_floating_point(param) and not is_param_float8_e4m3fn: + param = param.to(devices.dtype) + set_module_tensor_to_device(t5, param_name, device=0, value=param) + t5.eval() + if t5.dtype != devices.dtype: + try: + t5 = t5.to(dtype=devices.dtype) + except Exception: + shared.log.error(f"FLUX: Failed to cast text encoder to {devices.dtype}, set dtype to {t5.dtype}") + raise + elif 'fp16' in t5.lower(): + t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', cache_dir=cache_dir, torch_dtype=devices.dtype) elif 'fp4' in t5.lower(): - modelloader.hf_login() - from installer import install install('bitsandbytes', quiet=True) quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True) - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - torch_dtype=devices.dtype, - ) + t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', quantization_config=quantization_config, cache_dir=cache_dir, torch_dtype=devices.dtype) elif 'fp8' in t5.lower(): - modelloader.hf_login() - from installer import install install('bitsandbytes', quiet=True) quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True) - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - quantization_config=quantization_config, - cache_dir=cache_dir, - torch_dtype=devices.dtype, - ) + t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', quantization_config=quantization_config, cache_dir=cache_dir, torch_dtype=devices.dtype) elif 'qint8' in t5.lower(): - modelloader.hf_login() - from installer import install install('optimum-quanto', quiet=True) from modules.sd_models_compile import optimum_quanto_model - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - cache_dir=cache_dir, - torch_dtype=devices.dtype, - ) + t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', cache_dir=cache_dir, torch_dtype=devices.dtype) t5 = optimum_quanto_model(t5, weights="qint8", activations="none") elif 'int8' in t5.lower(): - modelloader.hf_login() - from installer import install install('nncf==2.7.0', quiet=True) from modules.sd_models_compile import nncf_compress_model from modules.sd_hijack import NNCF_T5DenseGatedActDense - t5 = transformers.T5EncoderModel.from_pretrained( - repo_id, - subfolder='text_encoder_3', - cache_dir=cache_dir, - torch_dtype=devices.dtype, - ) + t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', cache_dir=cache_dir, torch_dtype=devices.dtype) for i in range(len(t5.encoder.block)): t5.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense( t5.encoder.block[i].layer[1].DenseReluDense, @@ -74,10 +67,11 @@ def load_t5(t5=None, cache_dir=None): def set_t5(pipe, module, t5=None, cache_dir=None): - from modules import devices, shared if pipe is None or not hasattr(pipe, module): return pipe t5 = load_t5(t5=t5, cache_dir=cache_dir) + if module == "text_encoder_2" and t5 is None: # do not unload te2 + return None setattr(pipe, module, t5) if shared.opts.diffusers_offload_mode == "sequential": from accelerate import cpu_offload @@ -90,3 +84,11 @@ def set_t5(pipe, module, t5=None, cache_dir=None): pipe.maybe_free_model_hooks() devices.torch_gc() return pipe + + +def refresh_t5_list(): + t5_dict.clear() + for file in files_cache.list_files(shared.opts.t5_dir, ext_filter=[".safetensors"]): + name = os.path.splitext(os.path.basename(file))[0] + t5_dict[name] = file + shared.log.debug(f'Available T5s: path="{shared.opts.t5_dir}" items={len(t5_dict)}') diff --git a/modules/modeldata.py b/modules/modeldata.py index f002ca7bd..52895857d 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -103,8 +103,10 @@ def sd_model_type(self): model_type = 'sc' elif "AuraFlow" in self.sd_model.__class__.__name__: model_type = 'auraflow' - elif "FluxPipeline" in self.sd_model.__class__.__name__: + elif "Flux" in self.sd_model.__class__.__name__: model_type = 'f1' + elif "CogVideo" in self.sd_model.__class__.__name__: + model_type = 'cogvideox' else: model_type = self.sd_model.__class__.__name__ except Exception: diff --git a/modules/paths.py b/modules/paths.py index 763eddd31..e71e8e917 100644 --- a/modules/paths.py +++ b/modules/paths.py @@ -101,6 +101,7 @@ def fix_path(folder): create_path(fix_path('diffusers_dir')) create_path(fix_path('vae_dir')) create_path(fix_path('unet_dir')) + create_path(fix_path('t5_dir')) create_path(fix_path('lora_dir')) create_path(fix_path('embeddings_dir')) create_path(fix_path('hypernetwork_dir')) diff --git a/modules/processing.py b/modules/processing.py index 5c7390903..9cca323a4 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -8,6 +8,7 @@ from modules.sd_hijack_hypertile import context_hypertile_vae, context_hypertile_unet from modules.processing_class import StableDiffusionProcessing, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, StableDiffusionProcessingControl # pylint: disable=unused-import from modules.processing_info import create_infotext +from modules.modeldata import model_data from modules import pag @@ -35,23 +36,23 @@ class Processed: def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""): self.images = images_list - self.prompt = p.prompt - self.negative_prompt = p.negative_prompt - self.seed = seed + self.prompt = p.prompt or '' + self.negative_prompt = p.negative_prompt or '' + self.seed = seed if seed != -1 else p.seed self.subseed = subseed self.subseed_strength = p.subseed_strength self.info = info - self.comments = comments + self.comments = comments or '' self.width = p.width if hasattr(p, 'width') else (self.images[0].width if len(self.images) > 0 else 0) self.height = p.height if hasattr(p, 'height') else (self.images[0].height if len(self.images) > 0 else 0) - self.sampler_name = p.sampler_name - self.cfg_scale = p.cfg_scale - self.image_cfg_scale = p.image_cfg_scale - self.steps = p.steps - self.batch_size = p.batch_size - self.restore_faces = p.restore_faces + self.sampler_name = p.sampler_name or '' + self.cfg_scale = p.cfg_scale or 0 + self.image_cfg_scale = p.image_cfg_scale or 0 + self.steps = p.steps or 0 + self.batch_size = max(1, p.batch_size) + self.restore_faces = p.restore_faces or False self.face_restoration_model = shared.opts.face_restoration_model if p.restore_faces else None - self.sd_model_hash = getattr(shared.sd_model, 'sd_model_hash', '') + self.sd_model_hash = getattr(shared.sd_model, 'sd_model_hash', '') if model_data.sd_model is not None else '' self.seed_resize_from_w = p.seed_resize_from_w self.seed_resize_from_h = p.seed_resize_from_h self.denoising_strength = p.denoising_strength @@ -114,7 +115,6 @@ def infotext(self, p: StableDiffusionProcessing, index): return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size) - def process_images(p: StableDiffusionProcessing) -> Processed: debug(f'Process images: {vars(p)}') if not hasattr(p.sd_model, 'sd_checkpoint_info'): diff --git a/modules/processing_args.py b/modules/processing_args.py index d08783256..ef7e9f30f 100644 --- a/modules/processing_args.py +++ b/modules/processing_args.py @@ -34,6 +34,10 @@ def task_specific_kwargs(p, model): 'image': p.init_images, 'strength': p.denoising_strength, } + if model.__class__.__name__ == 'FluxImg2ImgPipeline': # needs explicit width/height + p.width = 8 * math.ceil(p.init_images[0].width / 8) + p.height = 8 * math.ceil(p.init_images[0].height / 8) + task_args['width'], task_args['height'] = p.width, p.height elif sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.INSTRUCT and len(getattr(p, 'init_images', [])) > 0: p.ops.append('instruct') task_args = { @@ -229,6 +233,15 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2 args['cross_attention_kwargs'] = {} args['cross_attention_kwargs'][k] = v + # handle missing resolution + if args.get('image', None) is not None and ('width' not in args or 'height' not in args): + if isinstance(args['image'], torch.Tensor) or isinstance(args['image'], np.ndarray): + args['width'] = 8 * args['image'].shape[-1] + args['height'] = 8 * args['image'].shape[-2] + else: + args['width'] = 8 * math.ceil(args['image'][0].width / 8) + args['height'] = 8 * math.ceil(args['image'][0].height / 8) + # handle implicit controlnet if 'control_image' in possible and 'control_image' not in args and 'image' in args: debug('Diffusers: set control image') diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py index a9c7ab5e8..33416f103 100644 --- a/modules/processing_callbacks.py +++ b/modules/processing_callbacks.py @@ -81,11 +81,21 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict): else: pipe._guidance_scale = 0.0 # pylint: disable=protected-access for key in {"prompt_embeds", "negative_prompt_embeds", "add_text_embeds", "add_time_ids"} & set(kwargs): - kwargs[key] = kwargs[key].chunk(2)[-1] - if hasattr(pipe, "_unpack_latents") and hasattr(pipe, "vae_scale_factor"): # FLUX - shared.state.current_latent = pipe._unpack_latents(kwargs['latents'], p.height, p.width, pipe.vae_scale_factor) # pylint: disable=protected-access - else: - shared.state.current_latent = kwargs['latents'] + if kwargs[key] is not None: + kwargs[key] = kwargs[key].chunk(2)[-1] + try: + if hasattr(pipe, "_unpack_latents") and hasattr(pipe, "vae_scale_factor"): # FLUX + if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5) and p.is_hr_pass: + width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0)) + height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0)) + else: + width = getattr(p, 'width', 0) + height = getattr(p, 'height', 0) + shared.state.current_latent = pipe._unpack_latents(kwargs['latents'], height, width, pipe.vae_scale_factor) # pylint: disable=protected-access + else: + shared.state.current_latent = kwargs['latents'] + except Exception as e: + shared.log.error(f'Callback: {e}') if shared.cmd_opts.profile and shared.profiler is not None: shared.profiler.step() return kwargs diff --git a/modules/processing_class.py b/modules/processing_class.py index ad083ec5a..43cb3407b 100644 --- a/modules/processing_class.py +++ b/modules/processing_class.py @@ -20,7 +20,7 @@ class StableDiffusionProcessing: """ The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing """ - def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, hr_sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, image_cfg_scale: float = None, clip_skip: int = 1, width: int = 512, height: int = 512, full_quality: bool = True, restore_faces: bool = False, tiling: bool = False, hidiffusion: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1, resize_mode: int = 0, resize_name: str = 'None', scale_by: float = 0, selected_scale_tab: int = 0, hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95, hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None): # pylint: disable=unused-argument + def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, hr_sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, image_cfg_scale: float = None, clip_skip: int = 1, width: int = 512, height: int = 512, full_quality: bool = True, restore_faces: bool = False, tiling: bool = False, hidiffusion: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1, resize_mode: int = 0, resize_name: str = 'None', resize_context: str = 'None', scale_by: float = 0, selected_scale_tab: int = 0, hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95, hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None): # pylint: disable=unused-argument self.outpath_samples: str = outpath_samples self.outpath_grids: str = outpath_grids self.prompt: str = prompt @@ -81,6 +81,8 @@ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prom self.enable_hr = None self.hr_scale = None self.hr_upscaler = None + self.hr_resize_mode = 0 + self.hr_resize_context = 'None' self.hr_resize_x = 0 self.hr_resize_y = 0 self.hr_upscale_to_x = 0 @@ -95,6 +97,7 @@ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prom self.ops = [] self.resize_mode: int = resize_mode self.resize_name: str = resize_name + self.resize_context: str = resize_context self.ddim_discretize = shared.opts.ddim_discretize self.s_min_uncond = shared.opts.s_min_uncond self.s_churn = shared.opts.s_churn @@ -169,7 +172,7 @@ def script_args(self, value): def setup_scripts(self): self.scripts_setup_complete = True - self.scripts.setup_scrips(self, is_ui=not self.is_api) + self.scripts.setup_scripts() def comment(self, text): self.comments[text] = 1 @@ -186,13 +189,15 @@ def close(self): class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): - def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_force: bool = False, hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs): + def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_force: bool = False, hr_resize_mode: int = 0, hr_resize_context: str = 'None', hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs): super().__init__(**kwargs) self.enable_hr = enable_hr self.denoising_strength = denoising_strength self.hr_scale = hr_scale self.hr_upscaler = hr_upscaler + self.hr_resize_mode = hr_resize_mode + self.hr_resize_context = hr_resize_context self.hr_force = hr_force self.hr_second_pass_steps = hr_second_pass_steps self.hr_resize_x = hr_resize_x @@ -240,6 +245,9 @@ def init_hr(self, scale = None, upscaler = None, force = False): # pylint: disab elif self.hr_resize_x == 0: self.hr_upscale_to_x = self.hr_resize_y * self.width // self.height self.hr_upscale_to_y = self.hr_resize_y + elif self.hr_resize_x > 0 and self.hr_resize_y > 0 and shared.native: + self.hr_upscale_to_x = self.hr_resize_x + self.hr_upscale_to_y = self.hr_resize_y else: target_w = self.hr_resize_x target_h = self.hr_resize_y @@ -260,7 +268,7 @@ def init_hr(self, scale = None, upscaler = None, force = False): # pylint: disab self.is_hr_pass = True hypertile_set(self, hr=True) shared.state.job_count = 2 * self.n_iter - shared.log.debug(f'Init hires: upscaler="{self.hr_upscaler}" sampler="{self.hr_sampler_name}" resize={self.hr_resize_x}x{self.hr_resize_y} upscale={self.hr_upscale_to_x}x{self.hr_upscale_to_y}') + shared.log.debug(f'Init hires: upscaler="{self.hr_upscaler}" sampler="{self.hr_sampler_name}" resize={self.hr_resize_x}x{self.hr_resize_y} upscale={self.hr_upscale_to_x}x{self.hr_upscale_to_y}') def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts): from modules import processing_original @@ -269,11 +277,12 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): - def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: str = 'None', denoising_strength: float = 0.3, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = False, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, scale_by: float = 1, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs): + def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: str = 'None', resize_context: str = 'None', denoising_strength: float = 0.3, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = False, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, scale_by: float = 1, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs): super().__init__(**kwargs) self.init_images = init_images self.resize_mode: int = resize_mode self.resize_name: str = resize_name + self.resize_context: str = resize_context self.denoising_strength: float = denoising_strength self.hr_denoising_strength: float = denoising_strength self.image_cfg_scale: float = image_cfg_scale @@ -304,6 +313,11 @@ def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: self.script_args = [] def init(self, all_prompts=None, all_seeds=None, all_subseeds=None): + if hasattr(self, 'init_images') and self.init_images is not None and len(self.init_images) > 0: + if self.width is None or self.width == 0: + self.width = int(8 * (self.init_images[0].width * self.scale_by // 8)) + if self.height is None or self.height == 0: + self.height = int(8 * (self.init_images[0].height * self.scale_by // 8)) if shared.native and getattr(self, 'image_mask', None) is not None: shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.INPAINTING) elif shared.native and getattr(self, 'init_images', None) is not None: @@ -383,10 +397,8 @@ def init(self, all_prompts=None, all_seeds=None, all_subseeds=None): if shared.opts.save_init_img: images.save_image(img, path=shared.opts.outdir_init_images, basename=None, forced_filename=self.init_img_hash, suffix="-init-image") image = images.flatten(img, shared.opts.img2img_background_color) - if self.width is None or self.height is None: - self.width, self.height = image.width, image.height if crop_region is None and self.resize_mode > 0: - image = images.resize_image(self.resize_mode, image, self.width, self.height, self.resize_name) + image = images.resize_image(self.resize_mode, image, self.width, self.height, upscaler_name=self.resize_name, context=self.resize_context) self.width = image.width self.height = image.height if self.image_mask is not None and shared.opts.mask_apply_overlay: @@ -456,6 +468,7 @@ def __init__(self, **kwargs): self.controlnet_conditioning_scale = None self.control_guidance_start = None self.control_guidance_end = None + self.control_mode = None self.reference_attn = None self.reference_adain = None self.attention_auto_machine_weight = None @@ -495,15 +508,19 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs def init_hr(self, scale = None, upscaler = None, force = False): scale = scale or self.scale_by upscaler = upscaler or self.resize_name - if upscaler == 'None' or scale == 1.0: + use_scale = self.hr_resize_x == 0 or self.hr_resize_y == 0 + if upscaler == 'None' or (use_scale and scale == 1.0): return self.is_hr_pass = True self.hr_force = force self.hr_upscaler = upscaler - self.hr_upscale_to_x, self.hr_upscale_to_y = 8 * int(self.width * scale / 8), 8 * int(self.height * scale / 8) + if use_scale: + self.hr_upscale_to_x, self.hr_upscale_to_y = 8 * int(self.width * scale / 8), 8 * int(self.height * scale / 8) + else: + self.hr_upscale_to_x, self.hr_upscale_to_y = self.hr_resize_x, self.hr_resize_y # hypertile_set(self, hr=True) shared.state.job_count = 2 * self.n_iter - shared.log.debug(f'Control hires: upscaler="{self.hr_upscaler}" upscale={scale} size={self.hr_upscale_to_x}x{self.hr_upscale_to_y}') + shared.log.debug(f'Control hires: upscaler="{self.hr_upscaler}" scale={scale} fixed={not use_scale} size={self.hr_upscale_to_x}x{self.hr_upscale_to_y}') def switch_class(p: StableDiffusionProcessing, new_class: type, dct: dict = None): diff --git a/modules/processing_correction.py b/modules/processing_correction.py index 6afaaa8d3..c52f30ab3 100644 --- a/modules/processing_correction.py +++ b/modules/processing_correction.py @@ -85,8 +85,7 @@ def correction(p, timestep, latent): if timestep > 950 and p.hdr_clamp: p.extra_generation_params["HDR clamp"] = f'{p.hdr_threshold}/{p.hdr_boundary}' latent = soft_clamp_tensor(latent, threshold=p.hdr_threshold, boundary=p.hdr_boundary) - if 500 < timestep < 800 and (p.hdr_brightness != 0 or p.hdr_color != 0 or p.hdr_tint_ratio != 0): - p.extra_generation_params["HDR center"] = f'{p.hdr_color}/{p.hdr_brightness}' + if 600 < timestep < 900 and (p.hdr_color != 0 or p.hdr_tint_ratio != 0): if p.hdr_brightness != 0: latent[0:1] = center_tensor(latent[0:1], full_shift=float(p.hdr_mode), offset=2*p.hdr_brightness) # Brightness p.extra_generation_params["HDR brightness"] = f'{p.hdr_brightness}' @@ -98,6 +97,11 @@ def correction(p, timestep, latent): if p.hdr_tint_ratio != 0: latent = color_adjust(latent, p.hdr_color_picker, p.hdr_tint_ratio) p.hdr_tint_ratio = 0 + if timestep < 200 and (p.hdr_brightness != 0): # do it late so it doesn't change the composition + if p.hdr_brightness != 0: + latent[0:1] = center_tensor(latent[0:1], full_shift=float(p.hdr_mode), offset=2*p.hdr_brightness) # Brightness + p.extra_generation_params["HDR brightness"] = f'{p.hdr_brightness}' + p.hdr_brightness = 0 if timestep < 350 and p.hdr_sharpen != 0: p.extra_generation_params["HDR sharpen"] = f'{p.hdr_sharpen}' per_step_ratio = 2 ** (timestep / 250) * p.hdr_sharpen / 16 diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py index a5714e775..2233a0798 100644 --- a/modules/processing_diffusers.py +++ b/modules/processing_diffusers.py @@ -26,20 +26,6 @@ def is_refiner_enabled(): return p.enable_hr and p.refiner_steps > 0 and p.refiner_start > 0 and p.refiner_start < 1 and shared.sd_refiner is not None def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): - """ - import diffusers - if p.sag_scale > 0 and is_txt2img(): - update_sampler(shared.sd_model) - supported = ['DDIMScheduler', 'PNDMScheduler', 'DDPMScheduler', 'DEISMultistepScheduler', 'UniPCMultistepScheduler', 'DPMSolverMultistepScheduler', 'DPMSolverSinlgestepScheduler'] - if hasattr(sd_model, 'sfast'): - shared.log.warning(f'SAG incompatible compile mode: backend={shared.opts.cuda_compile_backend}') - elif sd_model.scheduler.__class__.__name__ in supported: - sd_model = sd_models.switch_pipe(diffusers.StableDiffusionSAGPipeline, sd_model) - p.extra_generation_params["SAG scale"] = p.sag_scale - p.task_args['sag_scale'] = p.sag_scale - else: - shared.log.warning(f'SAG incompatible scheduler: current={sd_model.scheduler.__class__.__name__} supported={supported}') - """ if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None: shared.log.warning('Processing: mode=inpaint mask=None') sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) @@ -167,8 +153,8 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): shared.sd_model.restore_pipeline() # upscale - if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_upscaler is not None and p.hr_upscaler != 'None': - shared.log.info(f'Upscale: upscaler="{p.hr_upscaler}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}') + if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_resize_mode >0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5): + shared.log.info(f'Upscale: mode={p.hr_resize_mode} upscaler="{p.hr_upscaler}" context="{p.hr_resize_context}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}') p.ops.append('upscale') if shared.opts.save and not p.do_not_save_samples and shared.opts.save_images_before_highres_fix and hasattr(shared.sd_model, 'vae'): save_intermediate(p, latents=output.images, suffix="-before-hires") @@ -185,13 +171,19 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): p.hr_force = True # hires + p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength) + if p.hr_force and p.denoising_strength == 0: + shared.log.warning('HiRes skip: denoising=0') + p.hr_force = False if p.hr_force: shared.state.job_count = 2 * p.n_iter shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) shared.log.info(f'HiRes: class={shared.sd_model.__class__.__name__} sampler="{p.hr_sampler_name}"') + if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__: + output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height) if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None: if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0: - output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') # controlnet cannnot deal with latent input + output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input p.task_args['image'] = output.images # replace so hires uses new output sd_models.move_model(shared.sd_model, devices.device) orig_denoise = p.denoising_strength @@ -256,8 +248,8 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): image = output.images[i] noise_level = round(350 * p.denoising_strength) output_type='latent' if hasattr(shared.sd_refiner, 'vae') else 'np' - if shared.sd_refiner.__class__.__name__ == 'StableDiffusionUpscalePipeline': - image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') + if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__: + image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height) p.extra_generation_params['Noise level'] = noise_level output_type = 'np' if hasattr(p, 'task_args') and p.task_args.get('image', None) is not None and output is not None: # replace input with output so it can be used by hires/refine @@ -294,7 +286,7 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): shared.log.info(e) if not shared.state.interrupted and not shared.state.skipped: - refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True) + refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True, width=max(p.width, p.hr_upscale_to_x), height=max(p.height, p.hr_upscale_to_y)) for refiner_image in refiner_images: results.append(refiner_image) @@ -313,12 +305,14 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing): if not hasattr(output, 'images') and hasattr(output, 'frames'): shared.log.debug(f'Generated: frames={len(output.frames[0])}') output.images = output.frames[0] - if hasattr(shared.sd_model, "_unpack_latents") and hasattr(shared.sd_model, "vae_scale_factor"): # FLUX - output.images = shared.sd_model._unpack_latents(output.images, p.height, p.width, shared.sd_model.vae_scale_factor) # pylint: disable=protected-access - if torch.is_tensor(output.images) and len(output.images) > 0 and any(s >= 512 for s in output.images.shape): - results = output.images.float().cpu().numpy() - elif hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0: - results = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality) + if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0: + if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5): + width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0)) + height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0)) + else: + width = getattr(p, 'width', 0) + height = getattr(p, 'height', 0) + results = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, width=width, height=height) elif hasattr(output, 'images'): results = output.images else: diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py index 47839ed43..82f68ca35 100644 --- a/modules/processing_helpers.py +++ b/modules/processing_helpers.py @@ -196,7 +196,8 @@ def decode_first_stage(model, x, full_quality=True): try: if full_quality: if hasattr(model, 'decode_first_stage'): - x_sample = model.decode_first_stage(x) * 0.5 + 0.5 + # x_sample = model.decode_first_stage(x) * 0.5 + 0.5 + x_sample = model.decode_first_stage(x) elif hasattr(model, 'vae'): x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np', full_quality=full_quality) else: @@ -382,20 +383,20 @@ def resize_init_images(p): def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else latent if not torch.is_tensor(latents): shared.log.warning('Hires: input is not tensor') - first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') + first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height) return first_pass_images latent_upscaler = shared.latent_upscale_modes.get(p.hr_upscaler, None) # shared.log.info(f'Hires: upscaler={p.hr_upscaler} width={p.hr_upscale_to_x} height={p.hr_upscale_to_y} images={latents.shape[0]}') if latent_upscaler is not None: return torch.nn.functional.interpolate(latents, size=(p.hr_upscale_to_y // 8, p.hr_upscale_to_x // 8), mode=latent_upscaler["mode"], antialias=latent_upscaler["antialias"]) - first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') + first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height) if p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0 and hasattr(p, 'init_hr'): shared.log.error('Hires: missing upscaling dimensions') return first_pass_images resized_images = [] for img in first_pass_images: if latent_upscaler is None: - resized_image = images.resize_image(1, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler) + resized_image = images.resize_image(p.hr_resize_mode, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context) else: resized_image = img resized_images.append(resized_image) @@ -530,7 +531,7 @@ def save_intermediate(p, latents, suffix): for i in range(len(latents)): from modules.processing import create_infotext info=create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, [], iteration=p.iteration, position_in_batch=i) - decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality) + decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality, width=p.width, height=p.height) for j in range(len(decoded)): images.save_image(decoded[j], path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix=suffix) diff --git a/modules/processing_info.py b/modules/processing_info.py index 95e573f21..ac05aa58d 100644 --- a/modules/processing_info.py +++ b/modules/processing_info.py @@ -71,13 +71,16 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None if 'hires' in p.ops or 'upscale' in p.ops: + is_resize = p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5) args["Second pass"] = p.enable_hr args["Hires force"] = p.hr_force args["Hires steps"] = p.hr_second_pass_steps - args["Hires upscaler"] = p.hr_upscaler if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None - args["Hires upscale"] = p.hr_scale if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None - args["Hires resize"] = f"{p.hr_resize_x}x{p.hr_resize_y}" if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None - args["Hires size"] = f"{p.hr_upscale_to_x}x{p.hr_upscale_to_y}" if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None + args["HiRes resize mode"] = p.hr_resize_mode if is_resize else None + args["HiRes resize context"] = p.hr_resize_context if p.hr_resize_mode == 5 else None + args["Hires upscaler"] = p.hr_upscaler if is_resize else None + args["Hires scale"] = p.hr_scale if is_resize else None + args["Hires resize"] = f"{p.hr_resize_x}x{p.hr_resize_y}" if is_resize else None + args["Hires size"] = f"{p.hr_upscale_to_x}x{p.hr_upscale_to_y}" if is_resize else None args["Denoising strength"] = p.denoising_strength args["Hires sampler"] = p.hr_sampler_name args["Image CFG scale"] = p.image_cfg_scale diff --git a/modules/processing_vae.py b/modules/processing_vae.py index e5108f0d3..9ab4acad9 100644 --- a/modules/processing_vae.py +++ b/modules/processing_vae.py @@ -5,8 +5,9 @@ from modules import shared, devices, sd_models, sd_vae, sd_vae_taesd -debug = shared.log.trace if os.environ.get('SD_VAE_DEBUG', None) is not None else lambda *args, **kwargs: None -debug('Trace: VAE') +debug = os.environ.get('SD_VAE_DEBUG', None) is not None +log_debug = shared.log.trace if debug else lambda *args, **kwargs: None +log_debug('Trace: VAE') def create_latents(image, p, dtype=None, device=None): @@ -33,6 +34,10 @@ def create_latents(image, p, dtype=None, device=None): def full_vae_decode(latents, model): t0 = time.time() + if debug: + devices.torch_gc(force=True) + shared.mem_mon.reset() + base_device = None if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False): base_device = sd_models.move_base(model, devices.cpu) if shared.opts.diffusers_offload_mode == "balanced": @@ -66,7 +71,7 @@ def full_vae_decode(latents, model): decoded = model.vae.decode(latents, return_dict=False)[0] # delete vae after OpenVINO compile - if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae: + if 'VAE' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae: shared.compiled_model_state.first_pass_vae = False if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vae"): model.vae.apply(sd_models.convert_to_faketensors) @@ -77,14 +82,16 @@ def full_vae_decode(latents, model): elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None: sd_models.move_base(model, base_device) t1 = time.time() - debug(f'VAE decode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={upcast} images={latents.shape[0]} latents={latents.shape} time={round(t1-t0, 3)}') + if debug: + log_debug(f'VAE memory: {shared.mem_mon.read()}') + log_debug(f'VAE decode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={upcast} slicing={getattr(model.vae, "use_slicing", None)} tiling={getattr(model.vae, "use_tiling", None)} images={latents.shape[0]} latents={latents.shape} time={round(t1-t0, 3)}') return decoded def full_vae_encode(image, model): - debug(f'VAE encode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={model.vae.config.get("force_upcast", None)}') + log_debug(f'VAE encode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={model.vae.config.get("force_upcast", None)}') if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and hasattr(model, 'unet'): - debug('Moving to CPU: model=UNet') + log_debug('Moving to CPU: model=UNet') unet_device = model.unet.device sd_models.move_model(model.unet, devices.cpu) if not shared.opts.diffusers_offload_mode == "sequential" and hasattr(model, 'vae'): @@ -96,10 +103,10 @@ def full_vae_encode(image, model): def taesd_vae_decode(latents): - debug(f'VAE decode: name=TAESD images={len(latents)} latents={latents.shape} slicing={shared.opts.diffusers_vae_slicing}') + log_debug(f'VAE decode: name=TAESD images={len(latents)} latents={latents.shape} slicing={shared.opts.diffusers_vae_slicing}') if len(latents) == 0: return [] - if shared.opts.diffusers_vae_slicing: + if shared.opts.diffusers_vae_slicing and len(latents) > 1: decoded = torch.zeros((len(latents), 3, latents.shape[2] * 8, latents.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device) for i in range(latents.shape[0]): decoded[i] = sd_vae_taesd.decode(latents[i]) @@ -109,15 +116,16 @@ def taesd_vae_decode(latents): def taesd_vae_encode(image): - debug(f'VAE encode: name=TAESD image={image.shape}') + log_debug(f'VAE encode: name=TAESD image={image.shape}') encoded = sd_vae_taesd.encode(image) return encoded -def vae_decode(latents, model, output_type='np', full_quality=True): +def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None): t0 = time.time() prev_job = shared.state.job shared.state.job = 'VAE' + decoded = None if not torch.is_tensor(latents): # already decoded return latents if latents.shape[0] == 0: @@ -128,24 +136,30 @@ def vae_decode(latents, model, output_type='np', full_quality=True): if not hasattr(model, 'vae'): shared.log.error('VAE not found in model') return [] + + if hasattr(model, "_unpack_latents") and hasattr(model, "vae_scale_factor") and width is not None and height is not None: # FLUX + latents = model._unpack_latents(latents, height, width, model.vae_scale_factor) # pylint: disable=protected-access if len(latents.shape) == 3: # lost a batch dim in hires latents = latents.unsqueeze(0) if latents.shape[0] == 4 and latents.shape[1] != 4: # likely animatediff latent latents = latents.permute(1, 0, 2, 3) - if full_quality: + + if any(s >= 512 for s in latents.shape): # not a latent, likely an image + decoded = latents.float().cpu().numpy() + elif full_quality and hasattr(shared.sd_model, "vae"): decoded = full_vae_decode(latents=latents, model=shared.sd_model) else: decoded = taesd_vae_decode(latents=latents) - # TODO validate decoded sample diffusers - # decoded = validate_sample(decoded) + if hasattr(model, 'image_processor'): imgs = model.image_processor.postprocess(decoded, output_type=output_type) else: import diffusers - image_processor = diffusers.image_processor.VaeImageProcessor() - imgs = image_processor.postprocess(decoded, output_type=output_type) + model.image_processor = diffusers.image_processor.VaeImageProcessor() + imgs = model.image_processor.postprocess(decoded, output_type=output_type) + shared.state.job = prev_job - if shared.cmd_opts.profile: + if shared.cmd_opts.profile or debug: t1 = time.time() shared.log.debug(f'Profile: VAE decode: {t1-t0:.2f}') devices.torch_gc() diff --git a/modules/progress.py b/modules/progress.py index aeef195b4..abd6d906d 100644 --- a/modules/progress.py +++ b/modules/progress.py @@ -74,7 +74,6 @@ def progressapi(req: ProgressRequest): predicted = elapsed / progress if progress > 0 else None eta = predicted - elapsed if predicted is not None else None # shared.log.debug(f'Progress: step={step_x}:{step_y} batch={batch_x}:{batch_y} current={current} total={total} progress={progress} elapsed={elapsed} eta={eta}') - id_live_preview = req.id_live_preview live_preview = None shared.state.set_current_image() diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 09c8c1899..31cb7c68f 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -463,13 +463,13 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl te1_device, te2_device, te3_device = None, None, None if hasattr(pipe, "text_encoder") and pipe.text_encoder.device != devices.device: te1_device = pipe.text_encoder.device - pipe.text_encoder = pipe.text_encoder.to(devices.device) + sd_models.move_model(pipe.text_encoder, devices.device) if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2.device != devices.device: te2_device = pipe.text_encoder_2.device - pipe.text_encoder_2 = pipe.text_encoder_2.to(devices.device) + sd_models.move_model(pipe.text_encoder_2, devices.device) if hasattr(pipe, "text_encoder_3") and pipe.text_encoder_3.device != devices.device: te3_device = pipe.text_encoder_3.device - pipe.text_encoder_3 = pipe.text_encoder_3.to(devices.device) + sd_models.move_model(pipe.text_encoder_3, devices.device) if is_sd3: prompt_embed, negative_embed, positive_pooled, negative_pooled = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, use_t5_encoder=bool(pipe.text_encoder_3)) @@ -481,10 +481,10 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl prompt_embed, negative_embed = get_weighted_text_embeddings_sd15(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, clip_skip=clip_skip) if te1_device is not None: - pipe.text_encoder = pipe.text_encoder.to(te1_device) + sd_models.move_model(pipe.text_encoder, te1_device) if te2_device is not None: - pipe.text_encoder_2 = pipe.text_encoder_2.to(te2_device) + sd_models.move_model(pipe.text_encoder_2, te1_device) if te3_device is not None: - pipe.text_encoder_3 = pipe.text_encoder_3.to(te3_device) + sd_models.move_model(pipe.text_encoder_3, te1_device) return prompt_embed, positive_pooled, negative_embed, negative_pooled diff --git a/modules/prompt_parser_xhinker.py b/modules/prompt_parser_xhinker.py index 6e43c8860..6a8acf8c6 100644 --- a/modules/prompt_parser_xhinker.py +++ b/modules/prompt_parser_xhinker.py @@ -269,12 +269,12 @@ def get_weighted_text_embeddings_sd15( # get positive prompt embeddings with weights token_tensor = torch.tensor( [prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) weight_tensor = torch.tensor( prompt_weight_groups[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder.device ) token_embedding = pipe.text_encoder(token_tensor)[0].squeeze(0) @@ -286,12 +286,12 @@ def get_weighted_text_embeddings_sd15( # get negative prompt embeddings with weights neg_token_tensor = torch.tensor( [neg_prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) neg_weight_tensor = torch.tensor( neg_prompt_weight_groups[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder.device ) neg_token_embedding = pipe.text_encoder(neg_token_tensor)[0].squeeze(0) for z in range(len(neg_weight_tensor)): @@ -449,36 +449,36 @@ def get_weighted_text_embeddings_sdxl( # get positive prompt embeddings with weights token_tensor = torch.tensor( [prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) weight_tensor = torch.tensor( prompt_weight_groups[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder.device ) token_tensor_2 = torch.tensor( [prompt_token_groups_2[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder_2.device ) # use first text encoder prompt_embeds_1 = pipe.text_encoder( - token_tensor.to(pipe.device) + token_tensor.to(pipe.text_encoder.device) , output_hidden_states=True ) prompt_embeds_1_hidden_states = prompt_embeds_1.hidden_states[-2] # use second text encoder prompt_embeds_2 = pipe.text_encoder_2( - token_tensor_2.to(pipe.device) + token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2] pooled_prompt_embeds = prompt_embeds_2[0] prompt_embeds_list = [prompt_embeds_1_hidden_states, prompt_embeds_2_hidden_states] - token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device) + token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device) for j in range(len(weight_tensor)): if weight_tensor[j] != 1.0: @@ -509,35 +509,35 @@ def get_weighted_text_embeddings_sdxl( # get negative prompt embeddings with weights neg_token_tensor = torch.tensor( [neg_prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) neg_token_tensor_2 = torch.tensor( [neg_prompt_token_groups_2[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder_2.device ) neg_weight_tensor = torch.tensor( neg_prompt_weight_groups[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder.device ) # use first text encoder neg_prompt_embeds_1 = pipe.text_encoder( - neg_token_tensor.to(pipe.device) + neg_token_tensor.to(pipe.text_encoder.device) , output_hidden_states=True ) neg_prompt_embeds_1_hidden_states = neg_prompt_embeds_1.hidden_states[-2] # use second text encoder neg_prompt_embeds_2 = pipe.text_encoder_2( - neg_token_tensor_2.to(pipe.device) + neg_token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2] negative_pooled_prompt_embeds = neg_prompt_embeds_2[0] neg_prompt_embeds_list = [neg_prompt_embeds_1_hidden_states, neg_prompt_embeds_2_hidden_states] - neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device) + neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device) for z in range(len(neg_weight_tensor)): if neg_weight_tensor[z] != 1.0: @@ -657,18 +657,18 @@ def get_weighted_text_embeddings_sdxl_refiner( # get positive prompt embeddings with weights token_tensor_2 = torch.tensor( [prompt_token_groups_2[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder_2.device ) weight_tensor_2 = torch.tensor( prompt_weight_groups_2[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder_2.device ) # use second text encoder prompt_embeds_2 = pipe.text_encoder_2( - token_tensor_2.to(pipe.device) + token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2] @@ -679,12 +679,12 @@ def get_weighted_text_embeddings_sdxl_refiner( for j in range(len(weight_tensor_2)): if weight_tensor_2[j] != 1.0: - ow = weight_tensor_2[j] - 1 + # ow = weight_tensor_2[j] - 1 # optional process # To map number of (0,1) to (-1,1) - tanh_weight = (math.exp(ow) / (math.exp(ow) + 1) - 0.5) * 2 - weight = 1 + tanh_weight + # tanh_weight = (math.exp(ow) / (math.exp(ow) + 1) - 0.5) * 2 + # weight = 1 + tanh_weight # add weight method 1: # token_embedding[j] = token_embedding[j] * weight @@ -703,17 +703,17 @@ def get_weighted_text_embeddings_sdxl_refiner( # get negative prompt embeddings with weights neg_token_tensor_2 = torch.tensor( [neg_prompt_token_groups_2[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder_2.device ) neg_weight_tensor_2 = torch.tensor( neg_prompt_weight_groups_2[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder_2.device ) # use second text encoder neg_prompt_embeds_2 = pipe.text_encoder_2( - neg_token_tensor_2.to(pipe.device) + neg_token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2] @@ -787,8 +787,6 @@ def get_weighted_text_embeddings_sdxl_2p( """ prompt_2 = prompt_2 or prompt neg_prompt_2 = neg_prompt_2 or neg_prompt - - import math eos = pipe.tokenizer.eos_token_id # tokenizer 1 @@ -907,33 +905,33 @@ def get_weighted_text_embeddings_sdxl_2p( # get positive prompt embeddings with weights token_tensor = torch.tensor( [prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) weight_tensor = torch.tensor( prompt_weight_groups[i] - , device=pipe.device + , device=pipe.text_encoder.device ) token_tensor_2 = torch.tensor( [prompt_token_groups_2[i]] - , device=pipe.device + , device=pipe.text_encoder_2.device ) weight_tensor_2 = torch.tensor( prompt_weight_groups_2[i] - , device=pipe.device + , device=pipe.text_encoder_2.device ) # use first text encoder prompt_embeds_1 = pipe.text_encoder( - token_tensor.to(pipe.device) + token_tensor.to(pipe.text_encoder.device) , output_hidden_states=True ) prompt_embeds_1_hidden_states = prompt_embeds_1.hidden_states[-2] # use second text encoder prompt_embeds_2 = pipe.text_encoder_2( - token_tensor_2.to(pipe.device) + token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2] @@ -966,31 +964,31 @@ def get_weighted_text_embeddings_sdxl_2p( # get negative prompt embeddings with weights neg_token_tensor = torch.tensor( [neg_prompt_token_groups[i]] - , device=pipe.device + , device=pipe.text_encoder.device ) neg_token_tensor_2 = torch.tensor( [neg_prompt_token_groups_2[i]] - , device=pipe.device + , device=pipe.text_encoder_2.device ) neg_weight_tensor = torch.tensor( neg_prompt_weight_groups[i] - , device=pipe.device + , device=pipe.text_encoder.device ) neg_weight_tensor_2 = torch.tensor( neg_prompt_weight_groups_2[i] - , device=pipe.device + , device=pipe.text_encoder_2.device ) # use first text encoder neg_prompt_embeds_1 = pipe.text_encoder( - neg_token_tensor.to(pipe.device) + neg_token_tensor.to(pipe.text_encoder.device) , output_hidden_states=True ) neg_prompt_embeds_1_hidden_states = neg_prompt_embeds_1.hidden_states[-2] # use second text encoder neg_prompt_embeds_2 = pipe.text_encoder_2( - neg_token_tensor_2.to(pipe.device) + neg_token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2] @@ -1049,7 +1047,6 @@ def get_weighted_text_embeddings_sd3( pooled_prompt_embeds (torch.Tensor) negative_pooled_prompt_embeds (torch.Tensor) """ - import math eos = pipe.tokenizer.eos_token_id # tokenizer 1 @@ -1161,22 +1158,22 @@ def get_weighted_text_embeddings_sd3( # get positive prompt embeddings with weights token_tensor = torch.tensor( [prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) weight_tensor = torch.tensor( prompt_weight_groups[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder.device ) token_tensor_2 = torch.tensor( [prompt_token_groups_2[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder_2.device ) # use first text encoder prompt_embeds_1 = pipe.text_encoder( - token_tensor.to(pipe.device) + token_tensor.to(pipe.text_encoder.device) , output_hidden_states=True ) prompt_embeds_1_hidden_states = prompt_embeds_1.hidden_states[-2] @@ -1184,14 +1181,14 @@ def get_weighted_text_embeddings_sd3( # use second text encoder prompt_embeds_2 = pipe.text_encoder_2( - token_tensor_2.to(pipe.device) + token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2] pooled_prompt_embeds_2 = prompt_embeds_2[0] prompt_embeds_list = [prompt_embeds_1_hidden_states, prompt_embeds_2_hidden_states] - token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device) + token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device) for j in range(len(weight_tensor)): if weight_tensor[j] != 1.0: @@ -1222,21 +1219,21 @@ def get_weighted_text_embeddings_sd3( # get negative prompt embeddings with weights neg_token_tensor = torch.tensor( [neg_prompt_token_groups[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder.device ) neg_token_tensor_2 = torch.tensor( [neg_prompt_token_groups_2[i]] - , dtype=torch.long, device=pipe.device + , dtype=torch.long, device=pipe.text_encoder_2.device ) neg_weight_tensor = torch.tensor( neg_prompt_weight_groups[i] , dtype=torch.float16 - , device=pipe.device + , device=pipe.text_encoder.device ) # use first text encoder neg_prompt_embeds_1 = pipe.text_encoder( - neg_token_tensor.to(pipe.device) + neg_token_tensor.to(pipe.text_encoder.device) , output_hidden_states=True ) neg_prompt_embeds_1_hidden_states = neg_prompt_embeds_1.hidden_states[-2] @@ -1244,14 +1241,14 @@ def get_weighted_text_embeddings_sd3( # use second text encoder neg_prompt_embeds_2 = pipe.text_encoder_2( - neg_token_tensor_2.to(pipe.device) + neg_token_tensor_2.to(pipe.text_encoder_2.device) , output_hidden_states=True ) neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2] negative_pooled_prompt_embeds_2 = neg_prompt_embeds_2[0] neg_prompt_embeds_list = [neg_prompt_embeds_1_hidden_states, neg_prompt_embeds_2_hidden_states] - neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device) + neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device) for z in range(len(neg_weight_tensor)): if neg_weight_tensor[z] != 1.0: @@ -1286,8 +1283,8 @@ def get_weighted_text_embeddings_sd3( # ----------------- generate positive t5 embeddings -------------------- prompt_tokens_3 = torch.tensor([prompt_tokens_3], dtype=torch.long) - t5_prompt_embeds = pipe.text_encoder_3(prompt_tokens_3.to(pipe.device))[0].squeeze(0) - t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.device) + t5_prompt_embeds = pipe.text_encoder_3(prompt_tokens_3.to(pipe.text_encoder_3.device))[0].squeeze(0) + t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.text_encoder_3.device) # add weight to t5 prompt for z in range(len(prompt_weights_3)): @@ -1296,7 +1293,7 @@ def get_weighted_text_embeddings_sd3( t5_prompt_embeds = t5_prompt_embeds.unsqueeze(0) else: t5_prompt_embeds = torch.zeros(1, 4096, dtype=prompt_embeds.dtype).unsqueeze(0) - t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.device) + t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.text_encoder_3.device) # merge with the clip embedding 1 and clip embedding 2 clip_prompt_embeds = torch.nn.functional.pad( @@ -1308,8 +1305,8 @@ def get_weighted_text_embeddings_sd3( # ---------------------- get neg t5 embeddings ------------------------- neg_prompt_tokens_3 = torch.tensor([neg_prompt_tokens_3], dtype=torch.long) - t5_neg_prompt_embeds = pipe.text_encoder_3(neg_prompt_tokens_3.to(pipe.device))[0].squeeze(0) - t5_neg_prompt_embeds = t5_neg_prompt_embeds.to(device=pipe.device) + t5_neg_prompt_embeds = pipe.text_encoder_3(neg_prompt_tokens_3.to(pipe.pipe.text_encoder_3.device))[0].squeeze(0) + t5_neg_prompt_embeds = t5_neg_prompt_embeds.to(device=pipe.text_encoder_3.device) # add weight to neg t5 embeddings for z in range(len(neg_prompt_weights_3)): @@ -1318,7 +1315,7 @@ def get_weighted_text_embeddings_sd3( t5_neg_prompt_embeds = t5_neg_prompt_embeds.unsqueeze(0) else: t5_neg_prompt_embeds = torch.zeros(1, 4096, dtype=prompt_embeds.dtype).unsqueeze(0) - t5_neg_prompt_embeds = t5_prompt_embeds.to(device=pipe.device) + t5_neg_prompt_embeds = t5_prompt_embeds.to(device=pipe.text_encoder_3.device) clip_neg_prompt_embeds = torch.nn.functional.pad( negative_prompt_embeds, (0, t5_neg_prompt_embeds.shape[-1] - negative_prompt_embeds.shape[-1]) @@ -1359,7 +1356,7 @@ def get_weighted_text_embeddings_flux1( """ prompt2 = prompt if prompt2 is None else prompt2 if device is None: - device = pipe.device + device = pipe.text_encoder.device # tokenizer 1 - openai/clip-vit-large-patch14 prompt_tokens, prompt_weights = get_prompts_tokens_with_weights( diff --git a/modules/rife/__init__.py b/modules/rife/__init__.py index 7e40735e7..f74f3d984 100644 --- a/modules/rife/__init__.py +++ b/modules/rife/__init__.py @@ -113,5 +113,5 @@ def f_pad(img): while not buffer.empty(): time.sleep(0.1) t1 = time.time() - shared.log.info(f'RIFE interpolate: input={len(images)} frames={len(interpolated)} resolution={w}x{h} interpolate={count} scale={scale} pad={pad} change={change} time={round(t1 - t0, 2)}') + shared.log.info(f'RIFE interpolate: input={len(images)} frames={len(interpolated)} width={w} height={h} interpolate={count} scale={scale} pad={pad} change={change} time={round(t1 - t0, 2)}') return interpolated diff --git a/modules/scripts.py b/modules/scripts.py index ef24e359a..53452fe6b 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -256,6 +256,7 @@ def load_scripts(): postprocessing_scripts_data.clear() script_callbacks.clear_callbacks() scripts_list = list_scripts('scripts', '.py') + list_scripts(os.path.join('modules', 'face'), '.py') + scripts_list = sorted(scripts_list, key=lambda item: item.priority + item.path.lower(), reverse=False) syspath = sys.path def register_scripts_from_module(module, scriptfile): @@ -488,7 +489,27 @@ def run(self, p, *args): if not hasattr(p, 'init_images') and p.task_args.get('image', None) is not None: p.init_images = p.task_args['image'] parsed = p.per_script_args.get(script.title(), args[script.args_from:script.args_to]) - processed = script.run(p, *parsed) + if hasattr(script, 'run'): + processed = script.run(p, *parsed) + else: + processed = None + errors.log.error(f'Script: file="{script.filename}" no run function defined') + s.record(script.title()) + s.report() + return processed + + def after(self, p, processed, *args): + s = ScriptSummary('after') + script_index = args[0] if len(args) > 0 else 0 + if script_index == 0: + return processed + script = self.selectable_scripts[script_index-1] + if script is None or not hasattr(script, 'after'): + return processed + parsed = p.per_script_args.get(script.title(), args[script.args_from:script.args_to]) + after_processed = script.after(p, processed, *parsed) + if after_processed is not None: + processed = after_processed s.record(script.title()) s.report() return processed @@ -524,7 +545,9 @@ def process_images(self, p, **kwargs): try: if (script.args_to > 0) and (script.args_to >= script.args_from): args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to]) - processed = script.process_images(p, *args, **kwargs) + _processed = script.process_images(p, *args, **kwargs) + if _processed is not None: + processed = _processed except Exception as e: errors.display(e, f'Running script process images: {script.filename}') s.record(script.title()) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index dc07dea81..2f707d098 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -265,7 +265,6 @@ def __init__(self, wrapped, embeddings): def forward(self, input_ids): batch_fixes = self.embeddings.fixes self.embeddings.fixes = None - inputs_embeds = self.wrapped(input_ids) if batch_fixes is None or len(batch_fixes) == 0 or max([len(x) for x in batch_fixes]) == 0: diff --git a/modules/sd_hijack_dynamic_atten.py b/modules/sd_hijack_dynamic_atten.py index fb2befc18..1c17e024c 100644 --- a/modules/sd_hijack_dynamic_atten.py +++ b/modules/sd_hijack_dynamic_atten.py @@ -1,7 +1,7 @@ from functools import cache, wraps import torch -from diffusers.utils import USE_PEFT_BACKEND +from diffusers.utils import USE_PEFT_BACKEND # pylint: disable=unused-import from modules import shared, devices @@ -107,8 +107,7 @@ class DynamicAttnProcessorBMM: based on AttnProcessor V1 """ - def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, - temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches + def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches, keyword-arg-before-vararg residual = hidden_states diff --git a/modules/sd_models.py b/modules/sd_models.py index 529924976..aeb680b2a 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -566,7 +566,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): # guess by size if os.path.isfile(f) and f.endswith('.safetensors'): size = round(os.path.getsize(f) / 1024 / 1024) - if size < 128: + if (size > 0 and size < 128): warn(f'Model size smaller than expected: {f} size={size} MB') elif (size >= 316 and size <= 324) or (size >= 156 and size <= 164): # 320 or 160 warn(f'Model detected as VAE model, but attempting to load as model: {op}={f} size={size} MB') @@ -591,6 +591,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): guess = 'Stable Diffusion XL' elif (size > 5692 and size < 5698) or (size > 4134 and size < 4138) or (size > 10362 and size < 10366) or (size > 15028 and size < 15228): guess = 'Stable Diffusion 3' + elif (size > 20000 and size < 40000): + guess = 'FLUX' # guess by name """ if 'LCM_' in f.upper() or 'LCM-' in f.upper() or '_LCM' in f.upper() or '-LCM' in f.upper(): @@ -620,8 +622,10 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): guess = 'Kolors' if 'auraflow' in f.lower(): guess = 'AuraFlow' - if 'flux.1' in f.lower() or 'flux1' in f.lower(): + if 'flux' in f.lower(): guess = 'FLUX' + if size > 11000 and size < 20000: + warn(f'Model detected as FLUX UNET model, but attempting to load a base model: {op}={f} size={size} MB') # switch for specific variant if guess == 'Stable Diffusion' and 'inpaint' in f.lower(): guess = 'Stable Diffusion Inpaint' @@ -654,8 +658,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): def copy_diffuser_options(new_pipe, orig_pipe): - new_pipe.sd_checkpoint_info = orig_pipe.sd_checkpoint_info - new_pipe.sd_model_checkpoint = orig_pipe.sd_model_checkpoint + new_pipe.sd_checkpoint_info = getattr(orig_pipe, 'sd_checkpoint_info', None) + new_pipe.sd_model_checkpoint = getattr(orig_pipe, 'sd_model_checkpoint', None) new_pipe.embedding_db = getattr(orig_pipe, 'embedding_db', None) new_pipe.sd_model_hash = getattr(orig_pipe, 'sd_model_hash', None) new_pipe.has_accelerate = getattr(orig_pipe, 'has_accelerate', False) @@ -678,27 +682,28 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True): if hasattr(sd_model, "vae"): if vae is not None: sd_model.vae = vae - shared.log.debug(f'Setting {op} VAE: name={sd_vae.loaded_vae_file}') + shared.log.debug(f'Setting {op} VAE: name="{sd_vae.loaded_vae_file}"') if shared.opts.diffusers_vae_upcast != 'default': sd_model.vae.config.force_upcast = True if shared.opts.diffusers_vae_upcast == 'true' else False shared.log.debug(f'Setting {op} VAE: upcast={sd_model.vae.config.force_upcast}') if shared.opts.no_half_vae: devices.dtype_vae = torch.float32 sd_model.vae.to(devices.dtype_vae) - shared.log.debug(f'Setting {op} VAE: no-half') + shared.log.debug(f'Setting {op} VAE: no-half=True') if hasattr(sd_model, "enable_vae_slicing"): if shared.opts.diffusers_vae_slicing: - shared.log.debug(f'Setting {op}: enable VAE slicing') + shared.log.debug(f'Setting {op}: slicing=True') sd_model.enable_vae_slicing() else: sd_model.disable_vae_slicing() if hasattr(sd_model, "enable_vae_tiling"): if shared.opts.diffusers_vae_tiling: - shared.log.debug(f'Setting {op}: enable VAE tiling') + shared.log.debug(f'Setting {op}: tiling=True') sd_model.enable_vae_tiling() else: sd_model.disable_vae_tiling() if hasattr(sd_model, "vqvae"): + shared.log.debug(f'Setting {op} VQVAE: upcast=True') sd_model.vqvae.to(torch.float32) # vqvae is producing nans in fp16 set_diffusers_attention(sd_model) @@ -706,7 +711,13 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True): if shared.opts.diffusers_fuse_projections and hasattr(sd_model, 'fuse_qkv_projections'): try: sd_model.fuse_qkv_projections() - shared.log.debug(f'Setting {op}: enable fused projections') + shared.log.debug(f'Setting {op}: fused-qkv=True') + except Exception as e: + shared.log.error(f'Error enabling fused projections: {e}') + if shared.opts.diffusers_fuse_projections and hasattr(sd_model, 'transformer') and hasattr(sd_model.transformer, 'fuse_qkv_projections'): + try: + sd_model.transformer.fuse_qkv_projections() + shared.log.debug(f'Setting {op}: fused-qkv=True') except Exception as e: shared.log.error(f'Error enabling fused projections: {e}') if shared.opts.diffusers_eval: @@ -720,13 +731,16 @@ def eval_model(model, op=None, sd_model=None): # pylint: disable=unused-argument sd_model = sd_models_compile.dynamic_quantization(sd_model) if shared.opts.opt_channelslast and hasattr(sd_model, 'unet'): - shared.log.debug(f'Setting {op}: enable channels last') + shared.log.debug(f'Setting {op}: channels-last=True') sd_model.unet.to(memory_format=torch.channels_last) if offload: set_diffuser_offload(sd_model, op) def set_diffuser_offload(sd_model, op: str = 'model'): + if not shared.native: + shared.log.warning('Attempting to use offload with backend=original') + return if sd_model is None: shared.log.warning(f'{op} is not loaded') return @@ -735,7 +749,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'): if hasattr(sd_model, "enable_model_cpu_offload"): if shared.opts.diffusers_offload_mode == "model": try: - shared.log.debug(f'Setting {op}: enable model CPU offload') + shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}') if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner: shared.opts.diffusers_move_base = False shared.opts.diffusers_move_unet = False @@ -751,7 +765,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'): if hasattr(sd_model, "enable_sequential_cpu_offload"): if shared.opts.diffusers_offload_mode == "sequential": try: - shared.log.debug(f'Setting {op}: enable sequential CPU offload') + shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}') if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner: shared.opts.diffusers_move_base = False shared.opts.diffusers_move_unet = False @@ -771,6 +785,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'): shared.log.error(f'Model offload error: mode={shared.opts.diffusers_offload_mode} {e}') if shared.opts.diffusers_offload_mode == "balanced": try: + shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}') sd_model = apply_balanced_offload(sd_model) except Exception as e: shared.log.error(f'Model offload error: mode={shared.opts.diffusers_offload_mode} {e}') @@ -824,7 +839,7 @@ def apply_balanced_offload_to_module(pipe): module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access except Exception as e: shared.log.error(f'Balanced offload: module={module_name} {e}') - devices.torch_gc() + devices.torch_gc(fast=True) apply_balanced_offload_to_module(sd_model) if hasattr(sd_model, "prior_pipe"): @@ -845,9 +860,23 @@ def normalize_device(device): return torch.device(str(device) + ":0") return torch.device(device) + def move_model(model, device=None, force=False): if model is None or device is None: return + + if not shared.native: + if type(model).__name__ == 'LatentDiffusion': + model = model.to(device) + if hasattr(model, 'model'): + model.model = model.model.to(device) + if hasattr(model, 'first_stage_model'): + model.first_stage_model = model.first_stage_model.to(device) + if hasattr(model, 'cond_stage_model'): + model.cond_stage_model = model.cond_stage_model.to(device) + devices.torch_gc() + return + if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE: if device == devices.device and model.vae.device.type != "meta": # force vae back to gpu if not in txt2img mode model.vae.to(device) @@ -878,7 +907,7 @@ def move_model(model, device=None, force=False): if hasattr(model, "prior_pipe"): model.prior_pipe.to(device) except Exception as e0: - if 'Cannot copy out of meta tensor' in str(e0): + if 'Cannot copy out of meta tensor' in str(e0) or 'must be Tensor, not NoneType' in str(e0): if hasattr(model, "components"): for _name, component in model.components.items(): if hasattr(component, 'modules'): @@ -1042,16 +1071,17 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No unload_model_weights(op=op) return + shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"') + pipeline, model_type = detect_pipeline(checkpoint_info.path, op) + vae = None sd_vae.loaded_vae_file = None - if op == 'model' or op == 'refiner': + if model_type.startswith('Stable Diffusion') and (op == 'model' or op == 'refiner'): # preload vae for sd models vae_file, vae_source = sd_vae.resolve_vae(checkpoint_info.filename) vae = sd_vae.load_vae_diffusers(checkpoint_info.path, vae_file, vae_source) if vae is not None: diffusers_load_config["vae"] = vae - shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"') - pipeline, model_type = detect_pipeline(checkpoint_info.path, op) if os.path.isdir(checkpoint_info.path) or checkpoint_info.type == 'huggingface' or checkpoint_info.type == 'transformer': files = shared.walk_files(checkpoint_info.path, ['.safetensors', '.bin', '.ckpt']) if 'variant' not in diffusers_load_config and any('diffusion_pytorch_model.fp16' in f for f in files): # deal with diffusers lack of variant fallback when loading @@ -1201,10 +1231,12 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if model_type.startswith('Stable Diffusion'): if shared.opts.diffusers_force_zeros: diffusers_load_config['force_zeros_for_empty_prompt '] = shared.opts.diffusers_force_zeros - if diffusers_version < 28: - diffusers_load_config['original_config_file'] = get_load_config(checkpoint_info.path, model_type, config_type='yaml') else: - diffusers_load_config['config'] = get_load_config(checkpoint_info.path, model_type, config_type='json') + model_config = get_load_config(checkpoint_info.path, model_type, config_type='json') + if model_config is not None: + if debug_load: + shared.log.debug(f'Model config: path="{model_config}"') + diffusers_load_config['config'] = model_config if model_type.startswith('Stable Diffusion 3'): from modules.model_sd3 import load_sd3 sd_model = load_sd3(fn=checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, config=diffusers_load_config.get('config', None)) @@ -1227,6 +1259,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No else: shared.log.error(f'Diffusers {op} cannot load safetensor model: {checkpoint_info.path} {shared.opts.diffusers_pipeline}') return + if shared.opts.diffusers_vae_upcast != 'default' and model_type in ['Stable Diffusion', 'Stable Diffusion XL']: + diffusers_load_config['force_upcast'] = True if shared.opts.diffusers_vae_upcast == 'true' else False if debug_load: shared.log.debug(f'Model args: {diffusers_load_config}') if sd_model is not None: @@ -1286,7 +1320,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No insert_parser_highjack(sd_model.__class__.__name__) set_diffuser_options(sd_model, vae, op, offload=False) - if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): + if shared.opts.nncf_compress_weights and not ('Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): sd_model = sd_models_compile.nncf_compress_weights(sd_model) # run this before move model so it can be compressed in CPU if shared.opts.optimum_quanto_weights: sd_model = sd_models_compile.optimum_quanto_weights(sd_model) # run this before move model so it can be compressed in CPU @@ -1307,7 +1341,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if shared.opts.ipex_optimize: sd_model = sd_models_compile.ipex_optimize(sd_model) - if (shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'): + if ('Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'): sd_model = sd_models_compile.compile_diffusers(sd_model) timer.record("compile") @@ -1458,7 +1492,7 @@ def set_diffuser_pipe(pipe, new_pipe_type): return pipe # skip specific pipelines - if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline']: + if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline', 'FluxControlNetPipeline']: return pipe if 'Onnx' in pipe.__class__.__name__: return pipe @@ -1528,11 +1562,20 @@ def set_attn(pipe, attention): for module in modules: if module.__class__.__name__ in ['SD3Transformer2DModel']: module.set_attn_processor(p.JointAttnProcessor2_0()) - elif module.__class__.__name__ in ['HunyuanDiT2DModel', 'FluxTransformer2DModel']: - pass + elif module.__class__.__name__ in ['FluxTransformer2DModel']: + module.set_attn_processor(p.FluxAttnProcessor2_0()) + elif module.__class__.__name__ in ['HunyuanDiT2DModel']: + module.set_attn_processor(p.HunyuanAttnProcessor2_0()) + elif module.__class__.__name__ in ['AuraFlowTransformer2DModel']: + module.set_attn_processor(p.AuraFlowAttnProcessor2_0()) + elif 'Transformer' in module.__class__.__name__: + pass # unknown transformer so probably dont want to force attention processor else: module.set_attn_processor(attention) + if 'ControlNet' in pipe.__class__.__name__: # do not replace attention in ControlNet pipelines + return + shared.log.debug(f"Setting model: attention={shared.opts.cross_attention_optimization}") if shared.opts.cross_attention_optimization == "Disabled": pass # do nothing elif shared.opts.cross_attention_optimization == "Scaled-Dot-Product": # The default set by Diffusers @@ -1790,6 +1833,7 @@ def disable_offload(sd_model): def unload_model_weights(op='model'): if shared.compiled_model_state is not None: shared.compiled_model_state.compiled_cache.clear() + shared.compiled_model_state.req_cache.clear() shared.compiled_model_state.partitioned_modules.clear() if op == 'model' or op == 'dict': if model_data.sd_model: @@ -1797,7 +1841,7 @@ def unload_model_weights(op='model'): from modules import sd_hijack move_model(model_data.sd_model, devices.cpu) sd_hijack.model_hijack.undo_hijack(model_data.sd_model) - elif not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): + elif not ('Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"): disable_offload(model_data.sd_model) move_model(model_data.sd_model, 'meta') model_data.sd_model = None @@ -1891,3 +1935,11 @@ def remove_token_merging(sd_model): sd_model.applied_todo = 0 except Exception: pass + + +def path_to_repo(fn: str = ''): + repo_id = fn + repo_id = repo_id.replace('Diffusers/', '').replace('Diffusers\\', '') + repo_id = repo_id.replace('diffusers/', '').replace('diffusers\\', '') + repo_id = repo_id.replace('models--', '').replace('--', '/') + return repo_id diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py index 2ec8b26a8..9b3eb90af 100644 --- a/modules/sd_models_compile.py +++ b/modules/sd_models_compile.py @@ -3,7 +3,7 @@ import logging import torch from modules import shared, devices, sd_models -from installer import setup_logging +from installer import install, setup_logging #Used by OpenVINO, can be used with TensorRT or Olive @@ -21,6 +21,7 @@ def __init__(self): self.cn_model = [] self.lora_model = [] self.compiled_cache = {} + self.req_cache = {} self.partitioned_modules = {} @@ -79,13 +80,29 @@ def apply_compile_to_model(sd_model, function, options, op=None): sd_model.prior_pipe.text_encoder = function(sd_model.prior_pipe.text_encoder, op="prior_pipe.text_encoder", sd_model=sd_model) if "VAE" in options: if hasattr(sd_model, 'vae') and hasattr(sd_model.vae, 'decode'): - sd_model.vae = function(sd_model.vae, op="vae", sd_model=sd_model) + if op == "compile": + sd_model.vae.decode = function(sd_model.vae.decode, op="vae_decode", sd_model=sd_model) + sd_model.vae.encode = function(sd_model.vae.encode, op="vae_encode", sd_model=sd_model) + else: + sd_model.vae = function(sd_model.vae, op="vae", sd_model=sd_model) if hasattr(sd_model, 'movq') and hasattr(sd_model.movq, 'decode'): - sd_model.movq = function(sd_model.movq, op="movq", sd_model=sd_model) + if op == "compile": + sd_model.movq.decode = function(sd_model.movq.decode, op="movq_decode", sd_model=sd_model) + sd_model.movq.encode = function(sd_model.movq.encode, op="movq_encode", sd_model=sd_model) + else: + sd_model.movq = function(sd_model.movq, op="movq", sd_model=sd_model) if hasattr(sd_model, 'vqgan') and hasattr(sd_model.vqgan, 'decode'): - sd_model.vqgan = function(sd_model.vqgan, op="vqgan", sd_model=sd_model) + if op == "compile": + sd_model.vqgan.decode = function(sd_model.vqgan.decode, op="vqgan_decode", sd_model=sd_model) + sd_model.vqgan.encode = function(sd_model.vqgan.encode, op="vqgan_encode", sd_model=sd_model) + else: + sd_model.vqgan = function(sd_model.vqgan, op="vqgan", sd_model=sd_model) if hasattr(sd_model, 'decoder_pipe') and hasattr(sd_model.decoder_pipe, 'vqgan'): - sd_model.decoder_pipe.vqgan = sd_model.vqgan + if op == "compile": + sd_model.decoder_pipe.vqgan.decode = function(sd_model.decoder_pipe.vqgan.decode, op="vqgan_decode", sd_model=sd_model) + sd_model.decoder_pipe.vqgan.encode = function(sd_model.decoder_pipe.vqgan.encode, op="vqgan_encode", sd_model=sd_model) + else: + sd_model.decoder_pipe.vqgan = sd_model.vqgan if hasattr(sd_model, 'image_encoder') and hasattr(sd_model.image_encoder, 'config'): sd_model.image_encoder = function(sd_model.image_encoder, op="image_encoder", sd_model=sd_model) @@ -165,7 +182,6 @@ def nncf_compress_weights(sd_model): t0 = time.time() shared.log.info(f"NNCF Compress Weights: {shared.opts.nncf_compress_weights}") global quant_last_model_name, quant_last_model_device # pylint: disable=global-statement - from installer import install install('nncf==2.7.0', quiet=True) sd_model = apply_compile_to_model(sd_model, nncf_compress_model, shared.opts.nncf_compress_weights, op="nncf") @@ -233,7 +249,6 @@ def optimum_quanto_weights(sd_model): t0 = time.time() shared.log.info(f"Optimum Quanto Weights: {shared.opts.optimum_quanto_weights}") global quant_last_model_name, quant_last_model_device # pylint: disable=global-statement - from installer import install install('optimum-quanto', quiet=True) from optimum import quanto # pylint: disable=no-name-in-module quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs) @@ -291,6 +306,7 @@ def optimize_openvino(sd_model): torch._dynamo.eval_frame.check_if_dynamo_supported = lambda: True # pylint: disable=protected-access if shared.compiled_model_state is not None: shared.compiled_model_state.compiled_cache.clear() + shared.compiled_model_state.req_cache.clear() shared.compiled_model_state.partitioned_modules.clear() shared.compiled_model_state = CompiledModelState() shared.compiled_model_state.is_compiled = True @@ -383,7 +399,7 @@ def compile_torch(sd_model): shared.log.debug(f"Model compile available backends: {torch._dynamo.list_backends()}") # pylint: disable=protected-access def torch_compile_model(model, op=None, sd_model=None): # pylint: disable=unused-argument - if model.device.type != "meta": + if hasattr(model, "device") and model.device.type != "meta": return_device = model.device model = torch.compile(model.to(devices.device), mode=shared.opts.cuda_compile_mode, @@ -423,7 +439,7 @@ def torch_compile_model(model, op=None, sd_model=None): # pylint: disable=unused except Exception as e: shared.log.error(f"Torch inductor config error: {e}") - sd_model = apply_compile_to_model(sd_model, torch_compile_model, shared.opts.cuda_compile, op="compile") + sd_model = apply_compile_to_model(sd_model, function=torch_compile_model, options=shared.opts.cuda_compile, op="compile") setup_logging() # compile messes with logging so reset is needed if shared.opts.cuda_compile_precompile: @@ -464,7 +480,7 @@ def compile_deepcache(sd_model): def compile_diffusers(sd_model): - if not shared.opts.cuda_compile: + if 'Model' not in shared.opts.cuda_compile: return sd_model if shared.opts.cuda_compile_backend == 'none': shared.log.warning('Model compile enabled but no backend specified') @@ -484,11 +500,14 @@ def compile_diffusers(sd_model): def dynamic_quantization(sd_model): try: - from torchao.quantization import quant_api + install('torchao', quiet=True) + from torchao.quantization import autoquant except Exception as e: shared.log.error(f"Model dynamic quantization not supported: {e}") return sd_model + """ + from torchao.quantization import quant_api def dynamic_quant_filter_fn(mod, *args): # pylint: disable=unused-argument return (isinstance(mod, torch.nn.Linear) and mod.in_features > 16 and (mod.in_features, mod.out_features) not in [(1280, 640), (1920, 1280), (1920, 640), (2048, 1280), (2048, 2560), (2560, 1280), (256, 128), (2816, 1280), (320, 640), (512, 1536), (512, 256), (512, 512), (640, 1280), (640, 1920), (640, 320), (640, 5120), (640, 640), (960, 320), (960, 640)]) @@ -496,19 +515,28 @@ def dynamic_quant_filter_fn(mod, *args): # pylint: disable=unused-argument def conv_filter_fn(mod, *args): # pylint: disable=unused-argument return (isinstance(mod, torch.nn.Conv2d) and mod.kernel_size == (1, 1) and 128 in [mod.in_channels, mod.out_channels]) + quant_api.swap_conv2d_1x1_to_linear(sd_model.unet, conv_filter_fn) + quant_api.swap_conv2d_1x1_to_linear(sd_model.vae, conv_filter_fn) + quant_api.apply_dynamic_quant(sd_model.unet, dynamic_quant_filter_fn) + quant_api.apply_dynamic_quant(sd_model.vae, dynamic_quant_filter_fn) + """ + shared.log.info(f"Model dynamic quantization: pipeline={sd_model.__class__.__name__}") try: - quant_api.swap_conv2d_1x1_to_linear(sd_model.unet, conv_filter_fn) - quant_api.swap_conv2d_1x1_to_linear(sd_model.vae, conv_filter_fn) - quant_api.apply_dynamic_quant(sd_model.unet, dynamic_quant_filter_fn) - quant_api.apply_dynamic_quant(sd_model.vae, dynamic_quant_filter_fn) + if shared.sd_model_type == 'sd' or shared.sd_model_type == 'sdxl': + sd_model.unet = sd_model.unet.to(devices.device) + sd_model.unet = autoquant(sd_model.unet, error_on_unseen=False) + elif shared.sd_model_type == 'f1': + sd_model.transformer = autoquant(sd_model.transformer, error_on_unseen=False) + else: + shared.log.error(f"Model dynamic quantization not supported: {shared.sd_model_type}") except Exception as e: - shared.log.error(f"Model dynamic quantization error: {e}") + shared.log.error(f"Model dynamic quantization: {e}") return sd_model def openvino_recompile_model(p, hires=False, refiner=False): # recompile if a parameter changes - if shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none': + if 'Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none': if shared.opts.cuda_compile_backend == "openvino_fx": compile_height = p.height if not hires and hasattr(p, 'height') else p.hr_upscale_to_y compile_width = p.width if not hires and hasattr(p, 'width') else p.hr_upscale_to_x @@ -531,7 +559,7 @@ def openvino_recompile_model(p, hires=False, refiner=False): # recompile if a pa def openvino_post_compile(op="base"): # delete unet after OpenVINO compile - if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx": + if 'Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx": if shared.compiled_model_state.first_pass and op == "base": shared.compiled_model_state.first_pass = False if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "unet"): diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py index 29034d63f..b7c90e603 100644 --- a/modules/sd_samplers.py +++ b/modules/sd_samplers.py @@ -69,9 +69,10 @@ def create_sampler(name, model): return sampler elif shared.native: sampler = config.constructor(model) - if shared.sd_model_type == 'f1': + if 'Flux' in model.__class__.__name__: if 'base_image_seq_len' not in sampler.sampler.config or 'max_image_seq_len' not in sampler.sampler.config or 'base_shift' not in sampler.sampler.config or 'max_shift' not in sampler.sampler.config: - shared.log.warning('FLUX sampler: attempting to use a non compatible scheduler') + shared.log.warning(f'FLUX: sampler="{name}" unsupported') + # sampler.sampler.register_to_config(base_image_seq_len=256, max_image_seq_len=4096, base_shift=0.5, max_shift=1.15) return None if not hasattr(model, 'scheduler_config'): model.scheduler_config = sampler.sampler.config.copy() diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index b49e981f1..1d4db96cb 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -61,7 +61,7 @@ def single_sample_to_image(sample, approximation=None): if approximation == 2: # TAESD x_sample = sd_vae_taesd.decode(sample) x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range - elif sd_cascade and not approximation == 3: + elif sd_cascade and approximation != 3: x_sample = sd_vae_stablecascade.decode(sample) elif approximation == 0: # Simple x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5 diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py index 24f3d033e..b13f91ec5 100644 --- a/modules/sd_samplers_diffusers.py +++ b/modules/sd_samplers_diffusers.py @@ -5,6 +5,7 @@ from modules import shared from modules import sd_samplers_common from modules.tcd import TCDScheduler +from modules.dcsolver import DCSolverMultistepScheduler #https://github.com/wl-zhao/DC-Solver debug = shared.log.trace if os.environ.get('SD_SAMPLER_DEBUG', None) is not None else lambda *args, **kwargs: None @@ -62,14 +63,15 @@ 'LMSD': { 'use_karras_sigmas': False, 'timestep_spacing': 'linspace', 'steps_offset': 0 }, 'PNDM': { 'skip_prk_steps': False, 'set_alpha_to_one': False, 'steps_offset': 0, 'timestep_spacing': 'linspace' }, 'SA Solver': {'predictor_order': 2, 'corrector_order': 2, 'thresholding': False, 'lower_order_final': True, 'use_karras_sigmas': False, 'timestep_spacing': 'linspace'}, + 'DC Solver': { 'beta_start': 0.0001, 'beta_end': 0.02, 'solver_order': 2, 'prediction_type': "epsilon", 'thresholding': False, 'solver_type': 'bh2', 'lower_order_final': True, 'dc_order': 2, 'disable_corrector': [0] }, 'LCM': { 'beta_start': 0.00085, 'beta_end': 0.012, 'beta_schedule': "scaled_linear", 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'thresholding': False, 'timestep_spacing': 'linspace' }, 'TCD': { 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'beta_schedule': 'scaled_linear' }, 'Euler SGM': { 'timestep_spacing': "trailing", 'prediction_type': "sample" }, 'Euler EDM': { }, 'DPM++ 2M EDM': { 'solver_order': 2, 'solver_type': 'midpoint', 'final_sigmas_type': 'zero', 'algorithm_type': 'dpmsolver++' }, 'CMSI': { }, #{ 'sigma_min': 0.002, 'sigma_max': 80.0, 'sigma_data': 0.5, 's_noise': 1.0, 'rho': 7.0, 'clip_denoised': True }, - 'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, }, - 'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, }, + 'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False }, + 'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 }, 'IPNDM': { }, } @@ -79,6 +81,7 @@ sd_samplers_common.SamplerData('UniPC', lambda model: DiffusionSampler('UniPC', UniPCMultistepScheduler, model), [], {}), sd_samplers_common.SamplerData('DEIS', lambda model: DiffusionSampler('DEIS', DEISMultistepScheduler, model), [], {}), sd_samplers_common.SamplerData('SA Solver', lambda model: DiffusionSampler('SA Solver', SASolverScheduler, model), [], {}), + sd_samplers_common.SamplerData('DC Solver', lambda model: DiffusionSampler('DC Solver', DCSolverMultistepScheduler, model), [], {}), sd_samplers_common.SamplerData('DDIM', lambda model: DiffusionSampler('DDIM', DDIMScheduler, model), [], {}), sd_samplers_common.SamplerData('Heun', lambda model: DiffusionSampler('Heun', HeunDiscreteScheduler, model), [], {}), sd_samplers_common.SamplerData('Euler', lambda model: DiffusionSampler('Euler', EulerDiscreteScheduler, model), [], {}), @@ -157,8 +160,10 @@ def __init__(self, name, constructor, model, **kwargs): self.config['beta_start'] = shared.opts.schedulers_beta_start if 'beta_end' in self.config and shared.opts.schedulers_beta_end > 0: self.config['beta_end'] = shared.opts.schedulers_beta_end - if 'shift' in self.config and shared.opts.schedulers_shift != 1: + if 'shift' in self.config: self.config['shift'] = shared.opts.schedulers_shift + if 'use_dynamic_shifting' in self.config: + self.config['use_dynamic_shifting'] = shared.opts.schedulers_dynamic_shift if 'rescale_betas_zero_snr' in self.config: self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas if 'timestep_spacing' in self.config and shared.opts.schedulers_timestep_spacing != 'default' and shared.opts.schedulers_timestep_spacing is not None: @@ -192,5 +197,9 @@ def __init__(self, name, constructor, model, **kwargs): debug(f'Sampler: signature={possible}') # shared.log.debug(f'Sampler: sampler="{name}" config={self.config}') self.sampler = constructor(**self.config) + if name == 'DC Solver': + if not hasattr(self.sampler, 'dc_ratios'): + pass + # self.sampler.dc_ratios = self.sampler.cascade_polynomial_regression(test_CFG=6.0, test_NFE=10, cpr_path='tmp/sd2.1.npy') # shared.log.debug(f'Sampler: class="{self.sampler.__class__.__name__}" config={self.sampler.config}') self.sampler.name = name diff --git a/modules/sd_unet.py b/modules/sd_unet.py index c948c223b..16d942a22 100644 --- a/modules/sd_unet.py +++ b/modules/sd_unet.py @@ -1,8 +1,9 @@ import os -from modules import shared, devices, files_cache +from modules import shared, devices, files_cache, sd_models unet_dict = {} +debug = os.environ.get('SD_LOAD_DEBUG', None) is not None def load_unet(model): @@ -28,15 +29,13 @@ def load_unet(model): model.prior_pipe.text_encoder = None # Prevent OOM model.prior_pipe.text_encoder = prior_text_encoder.to(devices.device, dtype=devices.dtype) if "Flux" in model.__class__.__name__: - shared.log.info(f'Loading UNet: name="{shared.opts.sd_unet}" file="{unet_dict[shared.opts.sd_unet]}" offload={shared.opts.diffusers_offload_mode}') from modules.model_flux import load_transformer transformer = load_transformer(unet_dict[shared.opts.sd_unet]) if transformer is not None: model.transformer = None if shared.opts.diffusers_offload_mode == 'none': - model.transformer = transformer.to(devices.device, devices.dtype) - else: - model.transformer = transformer + sd_models.move_model(transformer, devices.device) + model.transformer = transformer from modules.sd_models import set_diffuser_offload set_diffuser_offload(model, 'model') else: @@ -52,6 +51,9 @@ def load_unet(model): model.unet = unet.to(devices.device, devices.dtype_unet) except Exception as e: shared.log.error(f'Failed to load UNet model: {e}') + if debug: + from modules import errors + errors.display(e, 'UNet load:') return devices.torch_gc() diff --git a/modules/sd_vae.py b/modules/sd_vae.py index 0b5993256..bfe9807ad 100644 --- a/modules/sd_vae.py +++ b/modules/sd_vae.py @@ -192,7 +192,6 @@ def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"): if not os.path.exists(vae_file): shared.log.error(f'VAE not found: model{vae_file}') return None - shared.log.info(f"Loading VAE: model={vae_file} source={vae_source}") diffusers_load_config = { "low_cpu_mem_usage": False, "torch_dtype": devices.dtype_vae, @@ -207,14 +206,14 @@ def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"): diffusers_load_config['variant'] = shared.opts.diffusers_vae_load_variant if shared.opts.diffusers_vae_upcast != 'default': diffusers_load_config['force_upcast'] = True if shared.opts.diffusers_vae_upcast == 'true' else False - shared.log.debug(f'Diffusers VAE load config: {diffusers_load_config}') + _pipeline, model_type = sd_models.detect_pipeline(model_file, 'vae') + vae_config = sd_models.get_load_config(model_file, model_type, config_type='json') + if vae_config is not None: + diffusers_load_config['config'] = os.path.join(vae_config, 'vae') + shared.log.info(f'Load VAE: model="{vae_file}" source={vae_source} config={diffusers_load_config}') try: import diffusers if os.path.isfile(vae_file): - _pipeline, model_type = sd_models.detect_pipeline(model_file, 'vae') - diffusers_load_config = { - "config": os.path.join(sd_models.get_load_config(model_file, model_type, config_type='json'), 'vae'), - } if os.path.getsize(vae_file) > 1310944880: # 1.3GB vae = diffusers.ConsistencyDecoderVAE.from_pretrained('openai/consistency-decoder', **diffusers_load_config) # consistency decoder does not have from single file, so we'll just download it once more elif os.path.getsize(vae_file) < 10000000: # 10MB @@ -233,6 +232,8 @@ def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"): global loaded_vae_file # pylint: disable=global-statement loaded_vae_file = os.path.basename(vae_file) # shared.log.debug(f'Diffusers VAE config: {vae.config}') + if shared.opts.diffusers_offload_mode == 'none': + sd_models.move_model(vae, devices.device) return vae except Exception as e: shared.log.error(f"Loading VAE failed: model={vae_file} {e}") diff --git a/modules/sd_vae_natten.py b/modules/sd_vae_natten.py new file mode 100644 index 000000000..478e9b654 --- /dev/null +++ b/modules/sd_vae_natten.py @@ -0,0 +1,90 @@ +# copied from https://github.com/Birch-san/sdxl-play/blob/main/src/attn/natten_attn_processor.py + +import os +from typing import Optional +from diffusers.models.attention import Attention +import torch +from torch.nn import Linear +from einops import rearrange +from installer import install, log + + +def init(): + try: + os.environ['NATTEN_CUDA_ARCH'] = '8.0;8.6' + install('natten') + import natten + return natten + except Exception as e: + log.error(f'Init natten: {e}') + return None + + +def fuse_qkv(attn: Attention) -> None: + has_bias = attn.to_q.bias is not None + qkv = Linear(in_features=attn.to_q.in_features, out_features=attn.to_q.out_features*3, bias=has_bias, dtype=attn.to_q.weight.dtype, device=attn.to_q.weight.device) + qkv.weight.data.copy_(torch.cat([attn.to_q.weight.data * attn.scale, attn.to_k.weight.data, attn.to_v.weight.data])) + if has_bias: + qkv.bias.data.copy_(torch.cat([attn.to_q.bias.data * attn.scale, attn.to_k.bias.data, attn.to_v.bias.data])) + setattr(attn, 'qkv', qkv) # noqa: B010 + del attn.to_q, attn.to_k, attn.to_v + + +def fuse_vae_qkv(vae) -> None: + for attn in [*vae.encoder.mid_block.attentions, *vae.decoder.mid_block.attentions]: + fuse_qkv(attn) + + +class NattenAttnProcessor: + kernel_size: int + + def __init__(self, kernel_size: int): + self.kernel_size = kernel_size + + def __call__( + self, + attn: Attention, + hidden_states: torch.FloatTensor, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + attention_mask: Optional[torch.BoolTensor] = None, + temb: Optional[torch.FloatTensor] = None, + ): + import natten + assert hasattr(attn, 'qkv'), "Did not find property qkv on attn. Expected you to fuse its q_proj, k_proj, v_proj weights and biases beforehand, and multiply attn.scale into the q weights and bias." + residual = hidden_states + if attn.spatial_norm is not None: + hidden_states = attn.spatial_norm(hidden_states, temb) + # assumes MHA (as opposed to GQA) + inner_dim: int = attn.qkv.out_features // 3 + if attention_mask is not None: + raise ValueError("No mask customization for neighbourhood attention; the mask is already complicated enough as it is") + if encoder_hidden_states is not None: + raise ValueError("NATTEN cannot be used for cross-attention. I think.") + if attn.group_norm is not None: + hidden_states = attn.group_norm(hidden_states) + hidden_states = rearrange(hidden_states, '... c h w -> ... h w c') + qkv = attn.qkv(hidden_states) + # assumes MHA (as opposed to GQA) + q, k, v = rearrange(qkv, "n h w (t nh e) -> t n nh h w e", t=3, e=inner_dim) + qk = natten.functional.na2d_qk(q, k, self.kernel_size, 1) # natten2dqk + a = torch.softmax(qk, dim=-1) + hidden_states = natten.functional.na2d_av(a, v, self.kernel_size, 1) # natten2dav + hidden_states = rearrange(hidden_states, "n nh h w e -> n h w (nh e)") + linear_proj, dropout = attn.to_out + hidden_states = linear_proj(hidden_states) + hidden_states = dropout(hidden_states) + hidden_states = rearrange(hidden_states, '... h w c -> ... c h w') + if attn.residual_connection: + hidden_states = hidden_states + residual + return hidden_states + + +def enable_natten(pipe): + if not hasattr(pipe, 'vae'): + return + natten = init() + kernel_size = 17 + if natten is not None: + log.info(f'VAE natten: version={natten.__version__} kernel={kernel_size}') + fuse_vae_qkv(pipe.vae) + pipe.vae.set_attn_processor(NattenAttnProcessor(kernel_size=kernel_size)) diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py index 3400f05f9..5cd7fab7c 100644 --- a/modules/sd_vae_taesd.py +++ b/modules/sd_vae_taesd.py @@ -55,6 +55,8 @@ def Decoder(latent_channels=4): return nn.Sequential( Clamp(), conv(latent_channels, 64), nn.ReLU(), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Identity(), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Identity(), conv(64, 64, bias=False), Block(64, 64), conv(64, 3), ) elif shared.opts.live_preview_taesd_layers == 2: @@ -62,6 +64,7 @@ def Decoder(latent_channels=4): Clamp(), conv(latent_channels, 64), nn.ReLU(), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False), + Block(64, 64), Block(64, 64), Block(64, 64), nn.Identity(), conv(64, 64, bias=False), Block(64, 64), conv(64, 3), ) else: @@ -86,9 +89,9 @@ def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder self.encoder = Encoder(latent_channels) self.decoder = Decoder(latent_channels) if encoder_path is not None: - self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu")) + self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu"), strict=False) if decoder_path is not None: - self.decoder.load_state_dict(torch.load(decoder_path, map_location="cpu")) + self.decoder.load_state_dict(torch.load(decoder_path, map_location="cpu"), strict=False) def guess_latent_channels(self, decoder_path, encoder_path): """guess latent channel count based on encoder filename""" diff --git a/modules/shared.py b/modules/shared.py index e1aafca8a..e675ed39b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -76,7 +76,7 @@ "outdir_save", "outdir_init_images" } -resize_modes = ["None", "Fixed", "Crop", "Fill", "Outpaint"] +resize_modes = ["None", "Fixed", "Crop", "Fill", "Outpaint", "Context aware"] compatibility_opts = ['clip_skip', 'uni_pc_lower_order_final', 'uni_pc_order'] console = Console(log_time=True, log_time_format='%H:%M:%S-%f') dir_timestamps = {} @@ -406,7 +406,8 @@ def temp_disable_extensions(): "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list), - "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 QINT8', 'T5 FP16']}), + # "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 QINT8', 'T5 FP16']}), + "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": shared_items.sd_t5_items()}, refresh=shared_items.refresh_t5_list), "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"), "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results"), @@ -418,6 +419,7 @@ def temp_disable_extensions(): "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }), "sd_vae_checkpoint_cache": OptionInfo(0, "Cached VAEs", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": False}), "sd_disable_ckpt": OptionInfo(False, "Disallow models in ckpt format", gr.Checkbox, {"visible": False}), + "diffusers_version": OptionInfo("", "Diffusers version", gr.Textbox, {"visible": False}), })) options_templates.update(options_section(('cuda', "Compute Settings"), { @@ -575,6 +577,7 @@ def temp_disable_extensions(): "hfcache_dir": OptionInfo(os.path.join(os.path.expanduser('~'), '.cache', 'huggingface', 'hub'), "Folder for Huggingface cache", folder=True), "vae_dir": OptionInfo(os.path.join(paths.models_path, 'VAE'), "Folder with VAE files", folder=True), "unet_dir": OptionInfo(os.path.join(paths.models_path, 'UNET'), "Folder with UNET files", folder=True), + "t5_dir": OptionInfo(os.path.join(paths.models_path, 'T5'), "Folder with T5 files", folder=True), "sd_lora": OptionInfo("", "Add LoRA to prompt", gr.Textbox, {"visible": False}), "lora_dir": OptionInfo(os.path.join(paths.models_path, 'Lora'), "Folder with LoRA network(s)", folder=True), "lyco_dir": OptionInfo(os.path.join(paths.models_path, 'LyCORIS'), "Folder with LyCORIS network(s)", gr.Text, {"visible": False}), @@ -740,6 +743,7 @@ def temp_disable_extensions(): 'schedulers_timesteps': OptionInfo('', "Timesteps"), "schedulers_rescale_betas": OptionInfo(False, "Rescale betas with zero terminal SNR", gr.Checkbox), 'schedulers_shift': OptionInfo(1, "Sampler shift", gr.Slider, {"minimum": 0.1, "maximum": 10, "step": 0.1}), + 'schedulers_dynamic_shift': OptionInfo(True, "Sampler dynamic shift"), # managed from ui.py for backend original k-diffusion "schedulers_sep_kdiffusers": OptionInfo("

K-Diffusion specific config

", "", gr.HTML), @@ -839,6 +843,7 @@ def temp_disable_extensions(): "extra_networks_sidebar_width": OptionInfo(35, "UI sidebar width (%)", gr.Slider, {"minimum": 10, "maximum": 80, "step": 1}), "extra_networks_card_size": OptionInfo(160, "UI card size (px)", gr.Slider, {"minimum": 20, "maximum": 2000, "step": 1}), "extra_networks_card_square": OptionInfo(True, "UI disable variable aspect ratio"), + "extra_networks_fetch": OptionInfo(True, "UI fetch network info on mouse-over"), "extra_networks_card_fit": OptionInfo("cover", "UI image contain method", gr.Radio, {"choices": ["contain", "cover", "fill"], "visible": False}), "extra_networks_sep2": OptionInfo("

Extra networks general

", "", gr.HTML), "extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox), diff --git a/modules/shared_items.py b/modules/shared_items.py index 1b0077ef8..9f110f413 100644 --- a/modules/shared_items.py +++ b/modules/shared_items.py @@ -23,6 +23,17 @@ def refresh_unet_list(): modules.sd_unet.refresh_unet_list() +def sd_t5_items(): + import modules.model_t5 + predefined = ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 QINT8', 'T5 FP16'] + return predefined + list(modules.model_t5.t5_dict) + + +def refresh_t5_list(): + import modules.model_t5 + modules.model_t5.refresh_t5_list() + + def list_crossattention(diffusers=False): if diffusers: return [ diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ed8acceca..7ad0166e7 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -23,12 +23,12 @@ def open_embeddings(filename): """ Load Embedding files from drive. Image embeddings not currently supported. """ + embeddings = [] + skipped = [] if filename is None: - return + return embeddings, skipped filenames = list(filename) exts = [".SAFETENSORS", '.BIN', '.PT'] - embeddings = [] - skipped = [] for _filename in filenames: # debug(f'Embedding check: {filename}') fullname = _filename @@ -274,15 +274,15 @@ def load_diffusers_embedding(self, filename: Union[str, List[str]] = None, data: """ overwrite = bool(data) if not shared.sd_loaded: - return 0 + return embeddings, skipped = open_embeddings(filename) or convert_bundled(data) for skip in skipped: self.skipped_embeddings[skip.name] = skipped if not embeddings: - return 0 + return text_encoders, tokenizers, hiddensizes = get_text_encoders() if not all([text_encoders, tokenizers, hiddensizes]): - return 0 + return for embedding in embeddings: try: embedding.vector_sizes = [v.shape[-1] for v in embedding.vec] @@ -320,20 +320,20 @@ def load_from_file(self, path, filename): if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']: if '.preview' in filename.lower(): - return None + return embed_image = Image.open(path) if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text: data = embedding_from_b64(embed_image.text['sd-ti-embedding']) else: data = extract_image_data_embed(embed_image) if not data: # if data is None, means this is not an embeding, just a preview image - return None + return elif ext in ['.BIN', '.PT']: data = torch.load(path, map_location="cpu") elif ext in ['.SAFETENSORS']: data = safetensors.torch.load_file(path, device="cpu") else: - return None + return # textual inversion embeddings if 'string_to_param' in data: @@ -345,7 +345,7 @@ def load_from_file(self, path, filename): elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor: if len(data.keys()) != 1: self.skipped_embeddings[name] = Embedding(None, name=name, filename=path) - return None + return emb = next(iter(data.values())) if len(emb.shape) == 1: emb = emb.unsqueeze(0) @@ -353,7 +353,7 @@ def load_from_file(self, path, filename): raise RuntimeError(f"Couldn't identify {filename} as textual inversion embedding") if shared.native: - return emb + return vec = emb.detach().to(devices.device, dtype=torch.float32) # name = data.get('name', name) diff --git a/modules/txt2img.py b/modules/txt2img.py index 76b0a7c45..e438c2e47 100644 --- a/modules/txt2img.py +++ b/modules/txt2img.py @@ -18,13 +18,13 @@ def txt2img(id_task, seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, height, width, enable_hr, denoising_strength, - hr_scale, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y, + hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative, hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio, override_settings_texts, *args): - debug(f'txt2img: id_task={id_task}|prompt={prompt}|negative={negative_prompt}|styles={prompt_styles}|steps={steps}|sampler_index={sampler_index}|hr_sampler_index={hr_sampler_index}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|batch_count={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|clip_skip={clip_skip}|seed={seed}|subseed={subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|height={height}|width={width}|enable_hr={enable_hr}|denoising_strength={denoising_strength}|hr_scale={hr_scale}|hr_upscaler={hr_upscaler}|hr_force={hr_force}|hr_second_pass_steps={hr_second_pass_steps}|hr_resize_x={hr_resize_x}|hr_resize_y={hr_resize_y}|image_cfg_scale={image_cfg_scale}|diffusers_guidance_rescale={diffusers_guidance_rescale}|refiner_steps={refiner_steps}|refiner_start={refiner_start}|refiner_prompt={refiner_prompt}|refiner_negative={refiner_negative}|override_settings={override_settings_texts}') + debug(f'txt2img: id_task={id_task}|prompt={prompt}|negative={negative_prompt}|styles={prompt_styles}|steps={steps}|sampler_index={sampler_index}|hr_sampler_index={hr_sampler_index}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|batch_count={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|clip_skip={clip_skip}|seed={seed}|subseed={subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|height={height}|width={width}|enable_hr={enable_hr}|denoising_strength={denoising_strength}|hr_resize_mode={hr_resize_mode}|hr_resize_context={hr_resize_context}|hr_scale={hr_scale}|hr_upscaler={hr_upscaler}|hr_force={hr_force}|hr_second_pass_steps={hr_second_pass_steps}|hr_resize_x={hr_resize_x}|hr_resize_y={hr_resize_y}|image_cfg_scale={image_cfg_scale}|diffusers_guidance_rescale={diffusers_guidance_rescale}|refiner_steps={refiner_steps}|refiner_start={refiner_start}|refiner_prompt={refiner_prompt}|refiner_negative={refiner_negative}|override_settings={override_settings_texts}') if shared.sd_model is None: shared.log.warning('Model not loaded') @@ -71,6 +71,8 @@ def txt2img(id_task, enable_hr=enable_hr, denoising_strength=denoising_strength, hr_scale=hr_scale, + hr_resize_mode=hr_resize_mode, + hr_resize_context=hr_resize_context, hr_upscaler=hr_upscaler, hr_force=hr_force, hr_second_pass_steps=hr_second_pass_steps, @@ -89,6 +91,7 @@ def txt2img(id_task, processed = scripts.scripts_txt2img.run(p, *args) if processed is None: processed = processing.process_images(p) + processed = scripts.scripts_txt2img.after(p, processed, *args) p.close() if processed is None: return [], '', '', 'Error: processing failed' diff --git a/modules/ui.py b/modules/ui.py index 70128e0f9..41ef6c6c1 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -243,9 +243,12 @@ def run_settings(*args): if cmd_opts.use_directml: directml_override_opts() if cmd_opts.use_openvino: - if not shared.opts.cuda_compile: - shared.log.warning("OpenVINO: Enabling Torch Compile") - shared.opts.cuda_compile = True + if "Model" not in shared.opts.cuda_compile: + shared.log.warning("OpenVINO: Enabling Torch Compile Model") + shared.opts.cuda_compile.append("Model") + if "VAE" not in shared.opts.cuda_compile: + shared.log.warning("OpenVINO: Enabling Torch Compile VAE") + shared.opts.cuda_compile.append("VAE") if shared.opts.cuda_compile_backend != "openvino_fx": shared.log.warning("OpenVINO: Setting Torch Compiler backend to OpenVINO FX") shared.opts.cuda_compile_backend = "openvino_fx" diff --git a/modules/ui_common.py b/modules/ui_common.py index f83e8594f..e168e6981 100644 --- a/modules/ui_common.py +++ b/modules/ui_common.py @@ -42,7 +42,7 @@ def infotext_to_html(text): negative = res.get('Negative prompt', '') res.pop('Prompt', None) res.pop('Negative prompt', None) - params = [f'{k}: {v}' for k, v in res.items() if v is not None] + params = [f'{k}: {v}' for k, v in res.items() if v is not None and 'size-' not in k.lower()] params = '| '.join(params) if len(params) > 0 else '' code = '' if len(prompt) > 0: @@ -146,9 +146,13 @@ def __init__(self, d=None): destination = os.path.join(destination, dirname) destination = namegen.sanitize(destination) os.makedirs(destination, exist_ok = True) - shutil.copy(fullfn, destination) - shared.log.info(f'Copying image: file="{fullfn}" folder="{destination}"') tgt_filename = os.path.join(destination, os.path.basename(fullfn)) + if not os.path.exists(tgt_filename): + try: + shutil.copy(fullfn, destination) + shared.log.info(f'Copying image: file="{fullfn}" folder="{destination}"') + except Exception as e: + shared.log.error(f'Copying image: {fullfn} {e}') if shared.opts.save_txt: try: from PIL import Image @@ -171,7 +175,7 @@ def __init__(self, d=None): geninfo, _ = images.read_info_from_image(image) items = infotext.parse(geninfo) p = PObject(items) - fullfn, txt_fullfn = images.save_image(image, shared.opts.outdir_save, "", seed=p.all_seeds[i], prompt=p.all_prompts[i], info=info, extension=shared.opts.samples_format, grid=is_grid, p=p) + fullfn, txt_fullfn, _exif = images.save_image(image, shared.opts.outdir_save, "", seed=p.all_seeds[i], prompt=p.all_prompts[i], info=info, extension=shared.opts.samples_format, grid=is_grid, p=p) if fullfn is None: continue filename = os.path.relpath(fullfn, shared.opts.outdir_save) diff --git a/modules/ui_control.py b/modules/ui_control.py index c4aa25bd3..8904c0f5a 100644 --- a/modules/ui_control.py +++ b/modules/ui_control.py @@ -97,11 +97,11 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Accordion(open=False, label="Size", elem_id="control_size", elem_classes=["small-accordion"]): with gr.Tabs(): with gr.Tab('Before'): - resize_mode_before, resize_name_before, width_before, height_before, scale_by_before, selected_scale_tab_before = ui_sections.create_resize_inputs('control_before', [], accordion=False, latent=True) + resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before = ui_sections.create_resize_inputs('control_before', [], accordion=False, latent=True, prefix='before') with gr.Tab('After'): - resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after = ui_sections.create_resize_inputs('control_after', [], accordion=False, latent=False) + resize_mode_after, resize_name_after, resize_context_after, width_after, height_after, scale_by_after, selected_scale_tab_after = ui_sections.create_resize_inputs('control_after', [], accordion=False, latent=False, prefix='after') with gr.Tab('Mask'): - resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask = ui_sections.create_resize_inputs('control_mask', [], accordion=False, latent=False) + resize_mode_mask, resize_name_mask, resize_context_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask = ui_sections.create_resize_inputs('control_mask', [], accordion=False, latent=False, prefix='mask') with gr.Accordion(open=False, label="Sampler", elem_id="control_sampler", elem_classes=["small-accordion"]): steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "control") @@ -128,7 +128,7 @@ def create_ui(_blocks: gr.Blocks=None): video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False, elem_id="control_video_interpolate") video_type.change(fn=helpers.video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) - enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('control') + enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('control') with gr.Row(): override_settings = ui_common.create_override_inputs('control') @@ -136,7 +136,7 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Row(variant='compact', elem_id="control_extra_networks", visible=False) as extra_networks_ui: from modules import timer, ui_extra_networks extra_networks_ui = ui_extra_networks.create_ui(extra_networks_ui, btn_extra, 'control', skip_indexing=shared.opts.extra_network_skip_indexing) - timer.startup.record('ui-en') + timer.startup.record('ui-networks') with gr.Row(elem_id='control-inputs'): with gr.Column(scale=9, elem_id='control-input-column', visible=True) as _column_input: @@ -201,9 +201,10 @@ def create_ui(_blocks: gr.Blocks=None): process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None') model_id = gr.Dropdown(label="ControlNet", choices=controlnet.list_models(), value='None') ui_common.create_refresh_button(model_id, controlnet.list_models, lambda: {"choices": controlnet.list_models(refresh=True)}, f'refresh_controlnet_models_{i}') - model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0-i/10) + model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0) control_start = gr.Slider(label="Start", minimum=0.0, maximum=1.0, step=0.05, value=0) control_end = gr.Slider(label="End", minimum=0.0, maximum=1.0, step=0.05, value=1.0) + control_mode = gr.Dropdown(label="CN Mode", choices=['', 'Canny', 'Tile', 'Depth', 'Blur', 'Pose', 'Gray', 'LQ'], value=0, type='index', visible=False) reset_btn = ui_components.ToolButton(value=ui_symbols.reset) image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool']) image_reuse= ui_components.ToolButton(value=ui_symbols.reuse) @@ -226,6 +227,7 @@ def create_ui(_blocks: gr.Blocks=None): image_preview = image_preview, control_start = control_start, control_end = control_end, + control_mode = control_mode, extra_controls = extra_controls, ) ) @@ -249,7 +251,7 @@ def create_ui(_blocks: gr.Blocks=None): process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None') model_id = gr.Dropdown(label="Adapter", choices=t2iadapter.list_models(), value='None') ui_common.create_refresh_button(model_id, t2iadapter.list_models, lambda: {"choices": t2iadapter.list_models(refresh=True)}, f'refresh_adapter_models_{i}') - model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0-i/10) + model_strength = gr.Slider(label="T2I Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0) reset_btn = ui_components.ToolButton(value=ui_symbols.reset) image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool']) image_reuse= ui_components.ToolButton(value=ui_symbols.reuse) @@ -293,7 +295,7 @@ def create_ui(_blocks: gr.Blocks=None): process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None') model_id = gr.Dropdown(label="ControlNet-XS", choices=xs.list_models(), value='None') ui_common.create_refresh_button(model_id, xs.list_models, lambda: {"choices": xs.list_models(refresh=True)}, f'refresh_xs_models_{i}') - model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0-i/10) + model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0) control_start = gr.Slider(label="Start", minimum=0.0, maximum=1.0, step=0.05, value=0) control_end = gr.Slider(label="End", minimum=0.0, maximum=1.0, step=0.05, value=1.0) reset_btn = ui_components.ToolButton(value=ui_symbols.reset) @@ -340,7 +342,7 @@ def create_ui(_blocks: gr.Blocks=None): process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None') model_id = gr.Dropdown(label="Model", choices=lite.list_models(), value='None') ui_common.create_refresh_button(model_id, lite.list_models, lambda: {"choices": lite.list_models(refresh=True)}, f'refresh_lite_models_{i}') - model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0-i/10) + model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0) reset_btn = ui_components.ToolButton(value=ui_symbols.reset) image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool']) image_reuse= ui_components.ToolButton(value=ui_symbols.reuse) @@ -383,7 +385,7 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Row(): enabled_cb = gr.Checkbox(enabled, label='', container=False, show_label=False) model_id = gr.Dropdown(label="Reference", choices=reference.list_models(), value='Reference', visible=False) - model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0, visible=False) + model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0, visible=False) reset_btn = ui_components.ToolButton(value=ui_symbols.reset) image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool']) image_reuse= ui_components.ToolButton(value=ui_symbols.reuse) @@ -432,21 +434,21 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Accordion('Leres Depth', open=True, elem_classes=['processor-settings']): settings.append(gr.Checkbox(label="Boost", value=False)) settings.append(gr.Slider(label="Near threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) - settings.append(gr.Slider(label="Background threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) + settings.append(gr.Slider(label="Depth threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0)) with gr.Accordion('MediaPipe Face', open=True, elem_classes=['processor-settings']): settings.append(gr.Slider(label="Max faces", minimum=1, maximum=10, step=1, value=1)) - settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.5)) + settings.append(gr.Slider(label="Face confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.5)) with gr.Accordion('Canny', open=True, elem_classes=['processor-settings']): settings.append(gr.Slider(label="Low threshold", minimum=0, maximum=1000, step=1, value=100)) settings.append(gr.Slider(label="High threshold", minimum=0, maximum=1000, step=1, value=200)) with gr.Accordion('DWPose', open=True, elem_classes=['processor-settings']): - settings.append(gr.Radio(label="Model", choices=['Tiny', 'Medium', 'Large'], value='Tiny')) - settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.3)) + settings.append(gr.Radio(label="Pose Model", choices=['Tiny', 'Medium', 'Large'], value='Tiny')) + settings.append(gr.Slider(label="Pose confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.3)) with gr.Accordion('SegmentAnything', open=True, elem_classes=['processor-settings']): - settings.append(gr.Radio(label="Model", choices=['Base', 'Large'], value='Base')) + settings.append(gr.Radio(label="Segment Model", choices=['Base', 'Large'], value='Base')) with gr.Accordion('Edge', open=True, elem_classes=['processor-settings']): settings.append(gr.Checkbox(label="Parameter free", value=True)) - settings.append(gr.Radio(label="Mode", choices=['edge', 'gradient'], value='edge')) + settings.append(gr.Radio(label="Edge mode", choices=['edge', 'gradient'], value='edge')) with gr.Accordion('Zoe Depth', open=True, elem_classes=['processor-settings']): settings.append(gr.Checkbox(label="Gamma corrected", value=False)) with gr.Accordion('Marigold Depth', open=True, elem_classes=['processor-settings']): @@ -454,7 +456,7 @@ def create_ui(_blocks: gr.Blocks=None): settings.append(gr.Slider(label="Denoising steps", minimum=1, maximum=99, step=1, value=10)) settings.append(gr.Slider(label="Ensemble size", minimum=1, maximum=99, step=1, value=10)) with gr.Accordion('Depth Anything', open=True, elem_classes=['processor-settings']): - settings.append(gr.Dropdown(label="Color map", choices=['none'] + masking.COLORMAP, value='inferno')) + settings.append(gr.Dropdown(label="Depth map", choices=['none'] + masking.COLORMAP, value='inferno')) for setting in settings: setting.change(fn=processors.update_settings, inputs=settings, outputs=[]) @@ -502,11 +504,11 @@ def create_ui(_blocks: gr.Blocks=None): seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, full_quality, restore_faces, tiling, hidiffusion, hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio, - resize_mode_before, resize_name_before, width_before, height_before, scale_by_before, selected_scale_tab_before, - resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after, - resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask, + resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before, + resize_mode_after, resize_name_after, resize_context_after, width_after, height_after, scale_by_after, selected_scale_tab_after, + resize_mode_mask, resize_name_mask, resize_context_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask, denoising_strength, batch_count, batch_size, - enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, + enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative, video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate, ] diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index 505a62145..1ac571d41 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -590,9 +590,9 @@ def toggle_visibility(is_visible): ui.details_components.append(meta) with gr.Group(elem_id=f"{tabname}_extra_details_text", elem_classes=["extra-details-text"], visible=False) as ui.details_text: description = gr.Textbox(label='Description', lines=1, placeholder="Style description...") - prompt = gr.Textbox(label='Prompt', lines=2, placeholder="Prompt...") - negative = gr.Textbox(label='Negative prompt', lines=2, placeholder="Negative prompt...") - extra = gr.Textbox(label='Parameters', lines=2, placeholder="Generation parameters overrides...") + prompt = gr.Textbox(label='Network prompt', lines=2, placeholder="Prompt...") + negative = gr.Textbox(label='Network negative prompt', lines=2, placeholder="Negative prompt...") + extra = gr.Textbox(label='Network parameters', lines=2, placeholder="Generation parameters overrides...") wildcards = gr.Textbox(label='Wildcards', lines=2, placeholder="Wildcard prompt replacements...") ui.details_components += [description, prompt, negative, extra, wildcards] with gr.Row(): diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py index 1da835be6..5e8aa464e 100644 --- a/modules/ui_img2img.py +++ b/modules/ui_img2img.py @@ -44,7 +44,7 @@ def create_ui(): with gr.Row(variant='compact', elem_id="img2img_extra_networks", visible=False) as extra_networks_ui: from modules import ui_extra_networks extra_networks_ui_img2img = ui_extra_networks.create_ui(extra_networks_ui, img2img_extra_networks_button, 'img2img', skip_indexing=shared.opts.extra_network_skip_indexing) - timer.startup.record('ui-en') + timer.startup.record('ui-networks') with gr.Row(elem_id="img2img_interface", equal_height=False): with gr.Column(variant='compact', elem_id="img2img_settings"): @@ -119,7 +119,7 @@ def update_orig(image, state): with gr.Accordion(open=False, label="Sampler", elem_classes=["small-accordion"], elem_id="img2img_sampler_group"): steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "img2img") ui_sections.create_sampler_options('img2img') - resize_mode, resize_name, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True) + resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True, non_zero=False) batch_count, batch_size = ui_sections.create_batch_inputs('img2img', accordion=True) seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('img2img') @@ -139,7 +139,7 @@ def update_orig(image, state): inpaint_full_res_padding = gr.Slider(label='Padding', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding") mask_alpha = gr.Slider(label="Alpha", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id="img2img_mask_alpha") with gr.Row(): - inpainting_mask_invert = gr.Radio(label='Mode', choices=['masked', 'invert'], value='masked', type="index", elem_id="img2img_mask_mode") + inpainting_mask_invert = gr.Radio(label='Inpaint Mode', choices=['masked', 'invert'], value='masked', type="index", elem_id="img2img_mask_mode") inpaint_full_res = gr.Radio(label="Inpaint area", choices=["full", "masked"], type="index", value="full", elem_id="img2img_inpaint_full_res") inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'noise', 'nothing'], value='original', type="index", elem_id="img2img_inpainting_fill", visible=not shared.native) @@ -187,7 +187,7 @@ def select_img2img_tab(tab): selected_scale_tab, height, width, scale_by, - resize_mode, resize_name, + resize_mode, resize_name, resize_context, inpaint_full_res, inpaint_full_res_padding, inpainting_mask_invert, img2img_batch_files, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio, @@ -241,6 +241,7 @@ def select_img2img_tab(tab): (steps, "Steps"), # resize (resize_mode, "Resize mode"), + (resize_name, "Resize name"), (width, "Size-1"), (height, "Size-2"), (scale_by, "Resize scale"), diff --git a/modules/ui_loadsave.py b/modules/ui_loadsave.py index 69d79f879..6e398603a 100644 --- a/modules/ui_loadsave.py +++ b/modules/ui_loadsave.py @@ -4,6 +4,9 @@ from modules.ui_components import ToolButton +debug = os.environ.get('SD_UI_DEBUG', None) + + class UiLoadsave: """allows saving and restorig default values for gradio components""" @@ -37,10 +40,14 @@ def apply_field(obj, field, condition=None, init_field=None): pass elif condition and not condition(saved_value): pass + # elif getattr(obj, 'type', '') == 'index': + # pass # may need special handling else: setattr(obj, field, saved_value) if init_field is not None: init_field(saved_value) + if debug and key in self.component_mapping and not key.startswith('customscript'): + errors.log.warning(f'UI duplicate: key="{key}" id={getattr(obj, "elem_id", None)} class={getattr(obj, "elem_classes", None)}') if field == 'value' and key not in self.component_mapping: self.component_mapping[key] = x if field == 'open' and key not in self.component_mapping: @@ -125,6 +132,8 @@ def dump_defaults(self): def iter_changes(self, values): for i, name in enumerate(self.component_mapping): + # if '__init__' in name: + # continue component = self.component_mapping[name] choices = getattr(component, 'choices', None) if type(choices) is list and len(choices) > 0: # fix gradio radio button choices being tuples @@ -190,12 +199,11 @@ def ui_view(self, *values): return text def ui_apply(self, *values): - from modules.shared import log num_changed = 0 current_ui_settings = self.read_from_file() for name, old_value, new_value, default_value in self.iter_changes(values): component = self.component_mapping[name] - log.debug(f'Settings: name={name} component={component} old={old_value} default={default_value} new={new_value}') + errors.log.debug(f'Settings: name={name} component={component} old={old_value} default={default_value} new={new_value}') num_changed += 1 current_ui_settings[name] = new_value if num_changed == 0: @@ -224,11 +232,10 @@ def ui_submenu_apply(self, items): text += f"{k}{'open' if opened else 'closed'}" text += "" - from modules.shared import log num_changed = 0 current_ui_settings = self.read_from_file() for name, _old_value, new_value, default_value in self.iter_menus(): - log.debug(f'Settings: name={name} default={default_value} new={new_value}') + errors.log.debug(f'Settings: name={name} default={default_value} new={new_value}') num_changed += 1 current_ui_settings[name] = new_value if num_changed == 0: diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py index b1948a27b..aa5140da2 100644 --- a/modules/ui_postprocessing.py +++ b/modules/ui_postprocessing.py @@ -45,7 +45,7 @@ def create_ui(): trending = gr.Label(elem_id="interrogate_label_trending", label="Trending", num_top_classes=5) flavor = gr.Label(elem_id="interrogate_label_flavor", label="Flavor", num_top_classes=5) with gr.Row(): - clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model') + clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLiP Model') ui_common.create_refresh_button(clip_model, interrogate.get_clip_models, lambda: {"choices": interrogate.get_clip_models()}, 'refresh_interrogate_models') mode = gr.Radio(['best', 'fast', 'classic', 'caption', 'negative'], label='Mode', value='best') with gr.Row(elem_id='interrogate_buttons_image'): @@ -67,7 +67,7 @@ def create_ui(): with gr.Row(): batch = gr.Text(label="Prompts", lines=10) with gr.Row(): - clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model') + clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLiP Batch Model') ui_common.create_refresh_button(clip_model, interrogate.get_clip_models, lambda: {"choices": interrogate.get_clip_models()}, 'refresh_interrogate_models') with gr.Row(elem_id='interrogate_buttons_batch'): btn_interrogate_batch = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary') diff --git a/modules/ui_sections.py b/modules/ui_sections.py index e1be99edc..9c8d1d0fc 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -179,7 +179,7 @@ def create_correction_inputs(tab): with gr.Accordion(open=False, label="Corrections", elem_id=f"{tab}_corrections", elem_classes=["small-accordion"], visible=shared.native): with gr.Group(visible=shared.native): with gr.Row(elem_id=f"{tab}_hdr_mode_row"): - hdr_mode = gr.Dropdown(label="Mode", choices=["Relative values", "Absolute values"], type="index", value="Relative values", elem_id=f"{tab}_hdr_mode", show_label=False) + hdr_mode = gr.Dropdown(label="Correction mode", choices=["Relative values", "Absolute values"], type="index", value="Relative values", elem_id=f"{tab}_hdr_mode", show_label=False) gr.HTML('
') with gr.Row(elem_id=f"{tab}_correction_row"): hdr_brightness = gr.Slider(minimum=-1.0, maximum=1.0, step=0.1, value=0, label='Brightness', elem_id=f"{tab}_hdr_brightness") @@ -280,12 +280,15 @@ def create_hires_inputs(tab): with gr.Group(): with gr.Row(elem_id=f"{tab}_hires_row1"): enable_hr = gr.Checkbox(label='Enable second pass', value=False, elem_id=f"{tab}_enable_hr") + """ with gr.Row(elem_id=f"{tab}_hires_fix_row1", variant="compact"): hr_upscaler = gr.Dropdown(label="Upscaler", elem_id=f"{tab}_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode) hr_scale = gr.Slider(minimum=0.1, maximum=8.0, step=0.05, label="Rescale by", value=2.0, elem_id=f"{tab}_hr_scale") with gr.Row(elem_id=f"{tab}_hires_fix_row3", variant="compact"): hr_resize_x = gr.Slider(minimum=0, maximum=4096, step=8, label="Width resize", value=0, elem_id=f"{tab}_hr_resize_x") hr_resize_y = gr.Slider(minimum=0, maximum=4096, step=8, label="Height resize", value=0, elem_id=f"{tab}_hr_resize_y") + """ + hr_resize_mode, hr_upscaler, hr_resize_context, hr_resize_x, hr_resize_y, hr_scale, _selected_scale_tab = create_resize_inputs(tab, None, accordion=False, latent=True, non_zero=False) with gr.Row(elem_id=f"{tab}_hires_fix_row2", variant="compact"): hr_force = gr.Checkbox(label='Force HiRes', value=False, elem_id=f"{tab}_hr_force") hr_sampler_index = gr.Dropdown(label='Secondary sampler', elem_id=f"{tab}_sampling_alt", choices=[x.name for x in sd_samplers.samplers], value='Same as primary', type="index") @@ -300,43 +303,52 @@ def create_hires_inputs(tab): refiner_prompt = gr.Textbox(value='', label='Secondary prompt', elem_id=f"{tab}_refiner_prompt") with gr.Row(elem_id="txt2img_refiner_row4", variant="compact"): refiner_negative = gr.Textbox(value='', label='Secondary negative prompt', elem_id=f"{tab}_refiner_neg_prompt") - return enable_hr, hr_sampler_index, denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative + return enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative -def create_resize_inputs(tab, images, accordion=True, latent=False): +def create_resize_inputs(tab, images, accordion=True, latent=False, non_zero=True, prefix=''): dummy_component = gr.Number(visible=False, value=0) + if len(prefix) > 0 and not prefix.startswith(' '): + prefix = f' {prefix}' with gr.Accordion(open=False, label="Resize", elem_classes=["small-accordion"], elem_id=f"{tab}_resize_group") if accordion else gr.Group(): - # with gr.Row(): - # resize_mode = gr.Radio(label="Mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed') with gr.Row(): - resize_mode = gr.Dropdown(label="Mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed') - resize_name = gr.Dropdown(label="Method", elem_id=f"{tab}_resize_name", choices=([] if not latent else list(shared.latent_upscale_modes)) + [x.name for x in shared.sd_upscalers], value=shared.latent_upscale_default_mode) + resize_mode = gr.Dropdown(label=f"Mode{prefix}" if non_zero else "Resize mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed') + resize_name = gr.Dropdown(label=f"Method{prefix}", elem_id=f"{tab}_resize_name", choices=([] if not latent else list(shared.latent_upscale_modes)) + [x.name for x in shared.sd_upscalers], value=shared.latent_upscale_default_mode, visible=True) + resize_context_choices = ["Add with forward", "Remove with forward", "Add with backward", "Remove with backward"] + resize_context = gr.Dropdown(label=f"Context{prefix}", elem_id=f"{tab}_resize_context", choices=resize_context_choices, value=resize_context_choices[0], visible=False) ui_common.create_refresh_button(resize_name, modelloader.load_upscalers, lambda: {"choices": modelloader.load_upscalers()}, 'refresh_upscalers') + def resize_mode_change(mode): + if mode is None or mode == 0: + return gr.update(visible=False), gr.update(visible=False) + return gr.update(visible=mode != 5), gr.update(visible=mode == 5) + resize_mode.change(fn=resize_mode_change, inputs=[resize_mode], outputs=[resize_name, resize_context]) + with gr.Row(visible=True) as _resize_group: with gr.Column(elem_id=f"{tab}_column_size"): selected_scale_tab = gr.State(value=0) # pylint: disable=abstract-class-instantiated - with gr.Tabs(elem_id=f"{tab}_scale_tabs"): - with gr.Tab(label="Fixed", elem_id=f"{tab}_scale_tab_fixed") as tab_scale_to: + with gr.Tabs(elem_id=f"{tab}_scale_tabs", selected=0 if non_zero else 1): + with gr.Tab(label="Fixed", id=0, elem_id=f"{tab}_scale_tab_fixed") as tab_scale_to: with gr.Row(): with gr.Column(elem_id=f"{tab}_column_size_fixed"): with gr.Row(): - width = gr.Slider(minimum=64, maximum=8192, step=8, label="Width", value=512, elem_id=f"{tab}_width") - height = gr.Slider(minimum=64, maximum=8192, step=8, label="Height", value=512, elem_id=f"{tab}_height") + width = gr.Slider(minimum=64 if non_zero else 0, maximum=8192, step=8, label=f"Width {prefix}" if non_zero else "Resize width", value=1024 if non_zero else 0, elem_id=f"{tab}_resize_width") + height = gr.Slider(minimum=64 if non_zero else 0, maximum=8192, step=8, label=f"Height {prefix}" if non_zero else "Resize height", value=1024 if non_zero else 0, elem_id=f"{tab}_resize_height") ar_list = ['AR'] + [x.strip() for x in shared.opts.aspect_ratios.split(',') if x.strip() != ''] - ar_dropdown = gr.Dropdown(show_label=False, interactive=True, choices=ar_list, value=ar_list[0], elem_id=f"{tab}_ar", elem_classes=["ar-dropdown"]) + ar_dropdown = gr.Dropdown(show_label=False, interactive=True, choices=ar_list, value=ar_list[0], elem_id=f"{tab}_resize_ar", elem_classes=["ar-dropdown"]) for c in [ar_dropdown, width, height]: c.change(fn=ar_change, inputs=[ar_dropdown, width, height], outputs=[width, height], show_progress=False) - res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_res_switch_btn") + res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_resize_switch_size_btn") res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False) - detect_image_size_btn = ToolButton(value=ui_symbols.detect, elem_id=f"{tab}_detect_image_size_btn") + detect_image_size_btn = ToolButton(value=ui_symbols.detect, elem_id=f"{tab}_resize_detect_size_btn") el = tab.split('_')[0] detect_image_size_btn.click(fn=lambda w, h, _: (w or gr.update(), h or gr.update()), _js=f'currentImageResolution{el}', inputs=[dummy_component, dummy_component, dummy_component], outputs=[width, height], show_progress=False) - with gr.Tab(label="Scale", elem_id=f"{tab}_scale_tab_scale") as tab_scale_by: - scale_by = gr.Slider(minimum=0.05, maximum=8.0, step=0.05, label="Scale", value=1.0, elem_id=f"{tab}_scale") - for component in images: - component.change(fn=lambda: None, _js="updateImg2imgResizeToTextAfterChangingImage", inputs=[], outputs=[], show_progress=False) + with gr.Tab(label="Scale", id=1, elem_id=f"{tab}_scale_tab_scale") as tab_scale_by: + scale_by = gr.Slider(minimum=0.05, maximum=8.0, step=0.05, label=f"Scale {prefix}" if non_zero else "Resize scale", value=1.0, elem_id=f"{tab}_scale") + if images is not None: + for component in images: + component.change(fn=lambda: None, _js="updateImg2imgResizeToTextAfterChangingImage", inputs=[], outputs=[], show_progress=False) tab_scale_to.select(fn=lambda: 0, inputs=[], outputs=[selected_scale_tab]) tab_scale_by.select(fn=lambda: 1, inputs=[], outputs=[selected_scale_tab]) # resize_mode.change(fn=lambda x: gr.update(visible=x != 0), inputs=[resize_mode], outputs=[_resize_group]) - return resize_mode, resize_name, width, height, scale_by, selected_scale_tab + return resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab diff --git a/modules/ui_txt2img.py b/modules/ui_txt2img.py index 705444ccf..03e773e04 100644 --- a/modules/ui_txt2img.py +++ b/modules/ui_txt2img.py @@ -29,7 +29,7 @@ def create_ui(): with gr.Row(variant='compact', elem_id="txt2img_extra_networks", visible=False) as extra_networks_ui: from modules import ui_extra_networks extra_networks_ui = ui_extra_networks.create_ui(extra_networks_ui, txt2img_extra_networks_button, 'txt2img', skip_indexing=shared.opts.extra_network_skip_indexing) - timer.startup.record('ui-en') + timer.startup.record('ui-networks') with gr.Row(elem_id="txt2img_interface", equal_height=False): with gr.Column(variant='compact', elem_id="txt2img_settings"): @@ -48,7 +48,7 @@ def create_ui(): seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('txt2img') _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False) hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('txt2img') - enable_hr, hr_sampler_index, denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img') + enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img') override_settings = ui_common.create_override_inputs('txt2img') with gr.Group(elem_id="txt2img_script_container"): @@ -70,7 +70,7 @@ def create_ui(): seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, height, width, enable_hr, denoising_strength, - hr_scale, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y, + hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative, hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio, override_settings, @@ -118,14 +118,19 @@ def create_ui(): (hidiffusion, "HiDiffusion"), # second pass (enable_hr, "Second pass"), - (hr_sampler_index, "Hires sampler"), (denoising_strength, "Denoising strength"), + (hr_sampler_index, "Hires sampler"), + (hr_resize_mode, "Hires resize mode"), + (hr_resize_context, "Hires resize context"), (hr_upscaler, "Hires upscaler"), (hr_force, "Hires force"), (hr_second_pass_steps, "Hires steps"), (hr_scale, "Hires upscale"), + (hr_scale, "Hires scale"), (hr_resize_x, "Hires resize-1"), (hr_resize_y, "Hires resize-2"), + (hr_resize_x, "Hires size-1"), + (hr_resize_y, "Hires size-2"), # refiner (refiner_start, "Refiner start"), (refiner_steps, "Refiner steps"), diff --git a/modules/vqa.py b/modules/vqa.py index 87ea8431c..044ee252a 100644 --- a/modules/vqa.py +++ b/modules/vqa.py @@ -12,7 +12,10 @@ MODELS = { "MS Florence 2 Base": "microsoft/Florence-2-base", # 0.5GB "MS Florence 2 Large": "microsoft/Florence-2-large", # 1.5GB - "CogFlorence 2 Large": "thwri/CogFlorence-2-Large-Freeze", # 1.6GB + "MiaoshouAI PromptGen 1.5 Base": "MiaoshouAI/Florence-2-base-PromptGen-v1.5", # 1.1GB + "MiaoshouAI PromptGen 1.5 Large": "MiaoshouAI/Florence-2-large-PromptGen-v1.5", # 3.3GB + "CogFlorence 2.0 Large": "thwri/CogFlorence-2-Large-Freeze", # 1.6GB + "CogFlorence 2.2 Large": "thwri/CogFlorence-2.2-Large", # 1.6GB "Moondream 2": "vikhyatk/moondream2", # 3.7GB "GIT TextCaps Base": "microsoft/git-base-textcaps", # 0.7GB "GIT VQA Base": "microsoft/git-base-vqav2", # 0.7GB diff --git a/modules/zluda_installer.py b/modules/zluda_installer.py index 30bfe48a7..cd8d5798c 100644 --- a/modules/zluda_installer.py +++ b/modules/zluda_installer.py @@ -24,7 +24,7 @@ def install(zluda_path: os.PathLike) -> None: if os.path.exists(zluda_path): return - urllib.request.urlretrieve(f'https://github.com/lshqqytiger/ZLUDA/releases/download/rel.{os.environ.get("ZLUDA_HASH", "1c238a959f2aafdb9900f6801b61d9c0318040a2")}/ZLUDA-windows-rocm{rocm.version[0]}-amd64.zip', '_zluda') + urllib.request.urlretrieve(f'https://github.com/lshqqytiger/ZLUDA/releases/download/rel.{os.environ.get("ZLUDA_HASH", "c0804ca624963aab420cb418412b1c7fbae3454b")}/ZLUDA-windows-rocm{rocm.version[0]}-amd64.zip', '_zluda') with zipfile.ZipFile('_zluda', 'r') as archive: infos = archive.infolist() for info in infos: @@ -49,6 +49,8 @@ def make_copy(zluda_path: os.PathLike) -> None: def load(zluda_path: os.PathLike) -> None: + os.environ["ZLUDA_COMGR_LOG_LEVEL"] = "1" + for v in HIPSDK_TARGETS: ctypes.windll.LoadLibrary(os.path.join(rocm.path, 'bin', v)) for v in ZLUDA_TARGETS: diff --git a/requirements.txt b/requirements.txt index abecc4a57..a35a6d3f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,8 +27,7 @@ fasteners orjson invisible-watermark pi-heif -diffusers==0.30.2 -safetensors==0.4.4 +safetensors==0.4.5 tensordict==0.1.2 peft==0.11.1 httpx==0.24.1 @@ -39,7 +38,7 @@ clip-interrogator==0.6.0 antlr4-python3-runtime==4.9.3 requests==2.31.0 tqdm==4.66.4 -accelerate==0.33.0 +accelerate==0.34.2 opencv-contrib-python-headless==4.9.0.80 einops==0.4.1 gradio==3.43.2 @@ -63,3 +62,4 @@ typing-extensions==4.11.0 torchdiffeq dctorch scikit-image +seam-carving diff --git a/scripts/animatediff.py b/scripts/animatediff.py index 86177cf17..3e8eff3c5 100644 --- a/scripts/animatediff.py +++ b/scripts/animatediff.py @@ -1,15 +1,3 @@ -""" -Lightweight AnimateDiff implementation in Diffusers -Docs: -TODO animatediff items: -- SDXL -- Custom models -- Custom LORAs -- Enable second pass -- TemporalDiff: https://huggingface.co/CiaraRowles/TemporalDiff/tree/main -- AnimateFace: https://huggingface.co/nlper2022/animatediff_face_512/tree/main -""" - import os import gradio as gr import diffusers @@ -20,7 +8,7 @@ # config ADAPTERS = { 'None': None, - 'Motion 1.5 v3' :'vladmandic/animatediff-v3', + 'Motion 1.5 v3' :'diffusers/animatediff-motion-adapter-v1-5-3', 'Motion 1.5 v2' :'guoyww/animatediff-motion-adapter-v1-5-2', 'Motion 1.5 v1': 'guoyww/animatediff-motion-adapter-v1-5', 'Motion 1.4': 'guoyww/animatediff-motion-adapter-v1-4', @@ -28,6 +16,7 @@ 'AnimateFace': 'vladmandic/animateface', 'Lightning': 'ByteDance/AnimateDiff-Lightning/animatediff_lightning_4step_diffusers.safetensors', 'SDXL Beta': 'a-r-r-o-w/animatediff-motion-adapter-sdxl-beta', + 'LCM': 'wangfuyun/AnimateLCM', # 'SDXL Beta': 'guoyww/animatediff-motion-adapter-sdxl-beta', # 'LongAnimateDiff 32': 'vladmandic/longanimatediff-32', # 'LongAnimateDiff 64': 'vladmandic/longanimatediff-64', @@ -42,6 +31,7 @@ 'Tilt-down': 'guoyww/animatediff-motion-lora-tilt-down', 'Roll-left': 'guoyww/animatediff-motion-lora-rolling-anticlockwise', 'Roll-right': 'guoyww/animatediff-motion-lora-rolling-clockwise', + 'LCM': 'wangfuyun/AnimateLCM/AnimateLCM_sd15_t2v_lora.safetensors' } # state @@ -70,7 +60,7 @@ def set_adapter(adapter_name: str = 'None'): shared.log.warning(f'AnimateDiff: unsupported model type: {shared.sd_model.__class__.__name__}') return if motion_adapter is not None and loaded_adapter == adapter_name and (shared.sd_model.__class__.__name__ == 'AnimateDiffPipeline' or shared.sd_model.__class__.__name__ == 'AnimateDiffSDXLPipeline'): - shared.log.debug(f'AnimateDiff cache: adapter="{adapter_name}"') + shared.log.debug(f'AnimateDiff: adapter="{adapter_name}" cached') return if getattr(shared.sd_model, 'image_encoder', None) is not None: shared.log.debug('AnimateDiff: unloading IP adapter') @@ -130,19 +120,80 @@ def set_adapter(adapter_name: str = 'None'): sd_models.copy_diffuser_options(new_pipe, orig_pipe) sd_models.set_diffuser_options(shared.sd_model, vae=None, op='model') sd_models.move_model(shared.sd_model.unet, devices.device) # move pipeline to device - shared.log.debug(f'AnimateDiff create: pipeline="{shared.sd_model.__class__}" adapter="{loaded_adapter}"') + shared.log.debug(f'AnimateDiff: adapter="{loaded_adapter}"') except Exception as e: motion_adapter = None loaded_adapter = None shared.log.error(f'AnimateDiff load error: adapter="{adapter_name}" {e}') +def set_scheduler(p, model, override: bool = False): + if override: + p.sampler_name = 'Default' + if 'LCM' in model: + shared.sd_model.scheduler = diffusers.LCMScheduler.from_config(shared.sd_model.scheduler.config) + else: + shared.sd_model.scheduler = diffusers.DDIMScheduler.from_config(shared.sd_model.scheduler.config) + shared.log.debug(f'AnimateDiff: scheduler={shared.sd_model.scheduler.__class__.__name__}') + + +def set_prompt(p): + p.prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles) + p.negative_prompt = shared.prompt_styles.apply_negative_styles_to_prompt(p.negative_prompt, p.styles) + prompts = p.prompt.split('\n') + try: + prompt = {} + for line in prompts: + k, v = line.split(':') + prompt[int(k.strip())] = v.strip() + except Exception: + prompt = p.prompt + shared.log.debug(f'AnimateDiff prompt: {prompt}') + p.task_args['prompt'] = prompt + p.task_args['negative_prompt'] = p.negative_prompt + + +def set_lora(p, lora, strength): + if lora is not None and lora != 'None': + shared.log.debug(f'AnimateDiff: lora="{lora}" strength={strength}') + if lora.endswith('.safetensors'): + fn = os.path.basename(lora) + lora = lora.replace(f'/{fn}', '') + shared.sd_model.load_lora_weights(lora, weight_name=fn, adapter_name=lora) + else: + shared.sd_model.load_lora_weights(lora, adapter_name=lora) + shared.sd_model.set_adapters([lora], adapter_weights=[strength]) + p.extra_generation_params['AnimateDiff Lora'] = f'{lora}:{strength}' + + +def set_free_init(method, iters, order, spatial, temporal): + if hasattr(shared.sd_model, 'enable_free_init') and method != 'none': + shared.log.debug(f'AnimateDiff free init: method={method} iters={iters} order={order} spatial={spatial} temporal={temporal}') + shared.sd_model.enable_free_init( + num_iters=iters, + use_fast_sampling=False, + method=method, + order=order, + spatial_stop_frequency=spatial, + temporal_stop_frequency=temporal, + ) + + +def set_free_noise(frames): + context_length = 16 + context_stride = 4 + if frames >= context_length: + shared.log.debug(f'AnimateDiff free noise: frames={frames} context={context_length} stride={context_stride}') + shared.sd_model.enable_free_noise(context_length=context_length, context_stride=context_stride) + + class Script(scripts.Script): def title(self): return 'AnimateDiff' - def show(self, _is_img2img): - return scripts.AlwaysVisible if shared.native else False + def show(self, is_img2img): + # return scripts.AlwaysVisible if shared.native else False + return not is_img2img def ui(self, _is_img2img): @@ -154,81 +205,68 @@ def video_type_change(video_type): gr.update(visible=video_type == 'MP4'), ] - with gr.Accordion('AnimateDiff', open=False, elem_id='animatediff'): - with gr.Row(): - adapter_index = gr.Dropdown(label='Adapter', choices=list(ADAPTERS), value='None') - frames = gr.Slider(label='Frames', minimum=1, maximum=64, step=1, value=16) - with gr.Row(): - override_scheduler = gr.Checkbox(label='Override sampler', value=True) + with gr.Row(): + gr.HTML("  AnimateDiff
") + with gr.Row(): + adapter_index = gr.Dropdown(label='Adapter', choices=list(ADAPTERS), value='None') + frames = gr.Slider(label='Frames', minimum=1, maximum=256, step=1, value=16) + with gr.Row(): + override_scheduler = gr.Checkbox(label='Override sampler', value=True) + with gr.Row(): + lora_index = gr.Dropdown(label='Lora', choices=list(LORAS), value='None') + strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.05, value=1.0) + with gr.Row(): + latent_mode = gr.Checkbox(label='Latent mode', value=True, visible=False) + with gr.Row(): + video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') + duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) + with gr.Accordion('FreeInit', open=False): with gr.Row(): - lora_index = gr.Dropdown(label='Lora', choices=list(LORAS), value='None') - strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.05, value=1.0) + fi_method = gr.Dropdown(label='Method', choices=['none', 'butterworth', 'ideal', 'gaussian'], value='none') with gr.Row(): - latent_mode = gr.Checkbox(label='Latent mode', value=True, visible=False) + # fi_fast = gr.Checkbox(label='Fast sampling', value=False) + fi_iters = gr.Slider(label='Iterations', minimum=1, maximum=10, step=1, value=3) + fi_order = gr.Slider(label='Order', minimum=1, maximum=10, step=1, value=4) with gr.Row(): - video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') - duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False) - with gr.Accordion('FreeInit', open=False): - with gr.Row(): - fi_method = gr.Dropdown(label='Method', choices=['none', 'butterworth', 'ideal', 'gaussian'], value='none') - with gr.Row(): - # fi_fast = gr.Checkbox(label='Fast sampling', value=False) - fi_iters = gr.Slider(label='Iterations', minimum=1, maximum=10, step=1, value=3) - fi_order = gr.Slider(label='Order', minimum=1, maximum=10, step=1, value=4) - with gr.Row(): - fi_spatial = gr.Slider(label='Spatial frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25) - fi_temporal = gr.Slider(label='Temporal frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25) - with gr.Row(): - gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) - mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) - mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) - video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) + fi_spatial = gr.Slider(label='Spatial frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25) + fi_temporal = gr.Slider(label='Temporal frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25) + with gr.Row(): + gif_loop = gr.Checkbox(label='Loop', value=True, visible=False) + mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) + mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) + video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate]) return [adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal] - def process(self, p: processing.StableDiffusionProcessing, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument + def run(self, p: processing.StableDiffusionProcessing, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument adapter = ADAPTERS[adapter_index] lora = LORAS[lora_index] set_adapter(adapter) if motion_adapter is None: return - if override_scheduler: - p.sampler_name = 'Default' - shared.sd_model.scheduler = diffusers.DDIMScheduler( - beta_start=0.00085, - beta_end=0.012, - beta_schedule="linear", - clip_sample=False, - num_train_timesteps=1000, - rescale_betas_zero_snr=False, - set_alpha_to_one=True, - steps_offset=0, - timestep_spacing="linspace", - trained_betas=None, - ) - shared.log.debug(f'AnimateDiff: adapter="{adapter}" lora="{lora}" strength={strength} video={video_type} scheduler={shared.sd_model.scheduler.__class__.__name__ if override_scheduler else p.sampler_name}') - if lora is not None and lora != 'None': - shared.sd_model.load_lora_weights(lora, adapter_name=lora) - shared.sd_model.set_adapters([lora], adapter_weights=[strength]) - p.extra_generation_params['AnimateDiff Lora'] = f'{lora}:{strength}' - if hasattr(shared.sd_model, 'enable_free_init') and fi_method != 'none': - shared.sd_model.enable_free_init( - num_iters=fi_iters, - use_fast_sampling=False, - method=fi_method, - order=fi_order, - spatial_stop_frequency=fi_spatial, - temporal_stop_frequency=fi_temporal, - ) + set_scheduler(p, adapter, override_scheduler) + set_lora(p, lora, strength) + set_free_init(fi_method, fi_iters, fi_order, fi_spatial, fi_temporal) + set_free_noise(frames) + processing.fix_seed(p) p.extra_generation_params['AnimateDiff'] = loaded_adapter p.do_not_save_grid = True - if 'animatediff' not in p.ops: - p.ops.append('animatediff') + p.ops.append('animatediff') + p.task_args['generator'] = None p.task_args['num_frames'] = frames p.task_args['num_inference_steps'] = p.steps - if not latent_mode: - p.task_args['output_type'] = 'np' + p.task_args['output_type'] = 'np' + shared.log.debug(f'AnimateDiff args: {p.task_args}') + set_prompt(p) + orig_prompt_attention = shared.opts.data['prompt_attention'] + shared.opts.data['prompt_attention'] = 'Fixed attention' + processed: processing.Processed = processing.process_images(p) # runs processing using main loop + shared.opts.data['prompt_attention'] = orig_prompt_attention + devices.torch_gc() + return processed + - def postprocess(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument + def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument from modules.images import save_video if video_type != 'None': + shared.log.debug(f'AnimateDiff video: type={video_type} duration={duration} loop={gif_loop} pad={mp4_pad} interpolate={mp4_interpolate}') save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate) diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py new file mode 100644 index 000000000..6cbb71467 --- /dev/null +++ b/scripts/cogvideo.py @@ -0,0 +1,228 @@ +""" +models: https://huggingface.co/THUDM/CogVideoX-2b https://huggingface.co/THUDM/CogVideoX-5b +source: https://github.com/THUDM/CogVideo +quanto: https://gist.github.com/a-r-r-o-w/31be62828b00a9292821b85c1017effa +torchao: https://gist.github.com/a-r-r-o-w/4d9732d17412888c885480c6521a9897 +venhancer: https://github.com/THUDM/CogVideo/blob/dcb82ae30b454ab898aeced0633172d75dbd55b8/tools/venhancer/README.md +""" +import os +import time +import cv2 +import gradio as gr +import torch +from torchvision import transforms +import diffusers +import numpy as np +from modules import scripts, shared, devices, errors, sd_models, processing +from modules.processing_callbacks import diffusers_callback, set_callbacks_p + + +debug = (os.environ.get('SD_LOAD_DEBUG', None) is not None) or (os.environ.get('SD_PROCESS_DEBUG', None) is not None) + + +class Script(scripts.Script): + def title(self): + return 'CogVideoX' + + def show(self, is_img2img): + return shared.native + + + def ui(self, _is_img2img): + def video_type_change(video_type): + return [ + gr.update(visible=video_type != 'None'), + gr.update(visible=video_type == 'GIF' or video_type == 'PNG'), + gr.update(visible=video_type == 'MP4'), + gr.update(visible=video_type == 'MP4'), + ] + + with gr.Row(): + gr.HTML("  CogVideoX
") + with gr.Row(): + model = gr.Dropdown(label='Model', choices=['None', 'THUDM/CogVideoX-2b', 'THUDM/CogVideoX-5b'], value='THUDM/CogVideoX-2b') + sampler = gr.Dropdown(label='Sampler', choices=['DDIM', 'DPM'], value='DDIM') + with gr.Row(): + frames = gr.Slider(label='Frames', minimum=1, maximum=100, step=1, value=49) + guidance = gr.Slider(label='Guidance', minimum=0.0, maximum=14.0, step=0.5, value=6.0) + with gr.Row(): + offload = gr.Dropdown(label='Offload', choices=['none', 'balanced', 'model', 'sequential'], value='balanced') + override = gr.Checkbox(label='Override resolution', value=True) + with gr.Row(): + video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None') + duration = gr.Slider(label='Duration', minimum=0.25, maximum=30, step=0.25, value=8, visible=False) + with gr.Accordion('Optional init video', open=False): + with gr.Row(): + image = gr.Image(value=None, label='Image', type='pil', source='upload', width=256, height=256, visible=False) + video = gr.Video(value=None, label='Video', source='upload', width=256, height=256, visible=True) + with gr.Row(): + loop = gr.Checkbox(label='Loop', value=True, visible=False) + pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False) + interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False) + video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, loop, pad, interpolate]) + return [model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video] + + def load(self, model, txt): + if (shared.sd_model_type != 'cogvideox' or shared.sd_model.sd_model_checkpoint != model) and model != 'None': + sd_models.unload_model_weights('model') + shared.log.info(f'CogVideoX load: model="{model}"') + try: + shared.sd_model = None + shared.sd_model = diffusers.CogVideoXPipeline.from_pretrained(model, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir) + shared.sd_model.sd_checkpoint_info = sd_models.CheckpointInfo(model) + shared.sd_model.sd_model_hash = '' + shared.sd_model.sd_model_checkpoint = model + except Exception as e: + shared.log.error(f'Loading CogVideoX: {e}') + if debug: + errors.display(e, 'CogVideoX') + if shared.sd_model_type == 'cogvideox' and model != 'None': + shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXPipeline if txt else diffusers.CogVideoXVideoToVideoPipeline, shared.sd_model) + shared.sd_model.set_progress_bar_config(bar_format='Progress {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m', ncols=80, colour='#327fba') + shared.log.debug(f'CogVideoX load: class="{shared.sd_model.__class__.__name__}"') + if shared.sd_model is not None and model == 'None': + shared.log.info(f'CogVideoX unload: model={model}') + shared.sd_model = None + devices.torch_gc(force=True) + devices.torch_gc() + + def offload(self, offload): + if shared.sd_model_type != 'cogvideox': + return + if offload == 'none': + sd_models.move_model(shared.sd_model, devices.device) + shared.log.debug(f'CogVideoX: offload={offload}') + if offload == 'balanced': + sd_models.apply_balanced_offload(shared.sd_model) + if offload == 'model': + shared.sd_model.enable_model_cpu_offload() + if offload == 'sequential': + shared.sd_model.enable_model_cpu_offload() + shared.sd_model.enable_sequential_cpu_offload() + shared.sd_model.vae.enable_slicing() + shared.sd_model.vae.enable_tiling() + + def video(self, p, fn): + frames = [] + try: + from modules.control.util import decode_fourcc + video = cv2.VideoCapture(fn) + if not video.isOpened(): + shared.log.error(f'Video: file="{fn}" open failed') + return frames + frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + fps = int(video.get(cv2.CAP_PROP_FPS)) + w, h = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + codec = decode_fourcc(video.get(cv2.CAP_PROP_FOURCC)) + shared.log.debug(f'CogVideoX input: video="{fn}" fps={fps} width={w} height={h} codec={codec} frames={frame_count} target={len(frames)}') + frames = [] + while True: + ok, frame = video.read() + if not ok: + break + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame = cv2.resize(frame, (p.width, p.height)) + frames.append(frame) + video.release() + if len(frames) > p.frames: + frames = np.asarray(frames) + indices = np.linspace(0, len(frames) - 1, p.frames).astype(int) # reduce array from n_frames to p_frames + frames = frames[indices] + shared.log.debug(f'CogVideoX input reduce: source={len(frames)} target={p.frames}') + frames = [transforms.ToTensor()(frame) for frame in frames] + except Exception as e: + shared.log.error(f'Video: file="{fn}" {e}') + if debug: + errors.display(e, 'CogVideoX') + return frames + + """ + def image(self, p, img): + shared.log.debug(f'CogVideoX input: image={img}') + img = img.resize((p.width, p.height)) + frames = [np.array(img)] + frames = [transforms.ToTensor()(frame) for frame in frames] + return frames + """ + + def generate(self, p: processing.StableDiffusionProcessing): + if shared.sd_model_type != 'cogvideox': + return [] + shared.log.info(f'CogVideoX: sampler={p.sampler} steps={p.steps} frames={p.frames} width={p.width} height={p.height} seed={p.seed} guidance={p.guidance}') + if p.sampler == 'DDIM': + shared.sd_model.scheduler = diffusers.CogVideoXDDIMScheduler.from_config(shared.sd_model.scheduler.config, timestep_spacing="trailing") + if p.sampler == 'DPM': + shared.sd_model.scheduler = diffusers.CogVideoXDPMScheduler.from_config(shared.sd_model.scheduler.config, timestep_spacing="trailing") + t0 = time.time() + frames = [] + set_callbacks_p(p) + shared.state.job_count = 1 + shared.state.sampling_steps = p.steps - 1 + try: + args = dict( + prompt=p.prompt, + negative_prompt=p.negative_prompt, + height=p.height, + width=p.width, + num_videos_per_prompt=1, + num_inference_steps=p.steps, + guidance_scale=p.guidance, + generator=torch.Generator(device=devices.device).manual_seed(p.seed), + callback_on_step_end=diffusers_callback, + callback_on_step_end_tensor_inputs=['latents'], + ) + if getattr(p, 'image', False): + raise ValueError('CogVideoX: image not supported') # TODO image2video + # args['latents'] = self.image(p, p.image) + elif getattr(p, 'video', False): + args['video'] = self.video(p, p.video) + else: + args['num_frames'] = p.frames # only txt2vid has num_frames + if debug: + shared.log.debug(f'CogVideoX args: {args}') + frames = shared.sd_model(**args).frames[0] + except AssertionError as e: + shared.log.info(f'CogVideoX: {e}') + except Exception as e: + shared.log.error(f'CogVideoX: {e}') + if debug: + errors.display(e, 'CogVideoX') + t1 = time.time() + its = (len(frames) * p.steps) / (t1 - t0) + shared.log.info(f'CogVideoX: frames={len(frames)} its={its:.2f} time={t1 - t0:.2f}') + return frames + + # auto-executed by the script-callback + def run(self, p: processing.StableDiffusionProcessing, model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video): # pylint: disable=arguments-differ, unused-argument + shared.state.begin('CogVideoX') + processing.fix_seed(p) + p.extra_generation_params['CogVideoX'] = model + p.do_not_save_grid = True + if 'animatediff' not in p.ops: + p.ops.append('cogvideox') + if override: + p.width = 720 + p.height = 480 + p.sampler = sampler + p.guidance = guidance + p.frames = frames + p.use_dynamic_cfg = sampler == 'DPM' + p.prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles) + p.negative_prompt = shared.prompt_styles.apply_negative_styles_to_prompt(p.negative_prompt, p.styles) + p.image = image + p.video = video + txt = image is None and video is None + self.load(model, txt) + self.offload(offload) + frames = self.generate(p) + devices.torch_gc() + processed = processing.Processed(p, images_list=frames) + shared.state.end() + return processed + + # auto-executed by the script-callback + def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video): # pylint: disable=arguments-differ, unused-argument + if video_type != 'None' and processed is not None and len(processed.images) > 0: + from modules.images import save_video + shared.log.info(f'CogVideoX video: type={video_type} duration={duration} loop={loop} pad={pad} interpolate={interpolate}') + save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=loop, pad=pad, interpolate=interpolate) diff --git a/scripts/face_details.py b/scripts/face_details.py index 004c775f6..7aca8a528 100644 --- a/scripts/face_details.py +++ b/scripts/face_details.py @@ -161,7 +161,7 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None): p.negative_prompt = orig_p.get('all_negative_prompts', [''])[0] report = [{'score': f.score, 'size': f'{f.width}x{f.height}' } for f in faces] - shared.log.debug(f'Face HiRes: faces={report} args={faces[0].args} denoise={p.denoising_strength} blur={p.mask_blur} resolution={p.width}x{p.height} padding={p.inpaint_full_res_padding}') + shared.log.debug(f'Face HiRes: faces={report} args={faces[0].args} denoise={p.denoising_strength} blur={p.mask_blur} width={p.width} height={p.height} padding={p.inpaint_full_res_padding}') mask_all = [] for face in faces: diff --git a/scripts/hdr.py b/scripts/hdr.py new file mode 100644 index 000000000..788c0add2 --- /dev/null +++ b/scripts/hdr.py @@ -0,0 +1,100 @@ +import os +import cv2 +import numpy as np +import gradio as gr +from PIL import Image +import modules.scripts as scripts +from modules import images, processing, shared +from modules.processing import Processed +from modules.shared import opts, state + + +class Script(scripts.Script): + def title(self): + return "HDR" + + def show(self, is_img2img): + return True + + def ui(self, is_img2img): + with gr.Row(): + gr.HTML("  High Dynamic Range
") + with gr.Row(): + save_hdr = gr.Checkbox(label="Save HDR image", value=True) + hdr_range = gr.Slider(minimum=0, maximum=1, step=0.05, value=0.65, label='HDR range') + with gr.Row(): + is_tonemap = gr.Checkbox(label="Enable tonemap", value=False) + gamma = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label='Gamma', visible=False) + with gr.Row(): + scale = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label='Scale', visible=False) + saturation = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label='Saturation', visible=False) + is_tonemap.change(fn=self.change_tonemap, inputs=[is_tonemap], outputs=[gamma, scale, saturation]) + return [hdr_range, save_hdr, is_tonemap, gamma, scale, saturation] + + def change_tonemap(self, is_tonemap): + return [gr.update(visible=is_tonemap), gr.update(visible=is_tonemap), gr.update(visible=is_tonemap)] + + def merge(self, imgs: list, is_tonemap: bool, gamma, scale, saturation): + shared.log.info(f'HDR: merge images={len(imgs)} tonemap={is_tonemap} sgamma={gamma} scale={scale} saturation={saturation}') + imgs_np = [np.asarray(img).astype(np.uint8) for img in imgs] + + align = cv2.createAlignMTB() + align.process(imgs_np, imgs_np) + + # cv2.createMergeRobertson() + # cv2.createMergeDebevec() + merge = cv2.createMergeMertens() + hdr = merge.process(imgs_np) + + # cv2.createTonemapDrago() + # cv2.createTonemapReinhard() + if is_tonemap: + tonemap = cv2.createTonemapMantiuk(gamma, scale, saturation) + hdr = tonemap.process(hdr) + + ldr = np.clip(hdr * 255, 0, 255).astype(np.uint8) + hdr = np.clip(hdr * 65535, 0, 65535).astype(np.uint16) + hdr = cv2.cvtColor(hdr, cv2.COLOR_BGR2RGB) + return hdr, ldr + + def run(self, p, hdr_range, save_hdr, is_tonemap, gamma, scale, saturation): # pylint: disable=arguments-differ + if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl': + shared.log.error(f'HDR: incorrect base model: {shared.sd_model.__class__.__name__}') + return + p.extra_generation_params = { + "HDR range": hdr_range, + } + shared.log.info(f'HDR: range={hdr_range}') + processing.fix_seed(p) + imgs = [] + info = '' + for i in range(3): + p.n_iter = 1 + p.batch_size = 1 + p.do_not_save_grid = True + p.hdr_brightness = (i - 1) * (2.0 * hdr_range) + p.hdr_mode = 0 + p.task_args['seed'] = p.seed + processed: processing.Processed = processing.process_images(p) + imgs += processed.images + if i == 1: + info = processed.info + if state.interrupted: + break + + if len(imgs) > 1: + hdr, ldr = self.merge(imgs, is_tonemap, gamma, scale, saturation) + img = Image.fromarray(ldr) + if save_hdr: + saved_fn, _txt, _exif = images.save_image(img, shared.opts.outdir_save, "", p.seed, p.prompt, opts.grid_format, info=processed.info, p=p) + fn = os.path.splitext(saved_fn)[0] + '-hdr.png' + # cv2.imwrite(fn, hdr, [cv2.IMWRITE_PNG_COMPRESSION, 6, cv2.IMWRITE_PNG_STRATEGY, cv2.IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY, cv2.IMWRITE_HDR_COMPRESSION, cv2.IMWRITE_HDR_COMPRESSION_RLE]) + cv2.imwrite(fn, hdr) + shared.log.debug(f'Save: image="{fn}" type=PNG mode=HDR channels=16 size={os.path.getsize(fn)}') + # if opts.grid_save: + # images.save_image(grid, p.outpath_grids, "grid", p.seed, p.prompt, opts.grid_format, info=processed.info, grid=True, p=p) + grid = [images.image_grid(imgs, rows=1)] if opts.return_grid else [] + imgs = [img] + grid + + processed = Processed(p, images_list=imgs, seed=p.seed, info=info) + return processed diff --git a/scripts/lut.py b/scripts/lut.py new file mode 100644 index 000000000..3d240f291 --- /dev/null +++ b/scripts/lut.py @@ -0,0 +1,67 @@ +""" +downloads: https://luts.iwltbap.com/ +lib: https://github.com/homm/pillow-lut-tools +""" +import os +import gradio as gr +from installer import install +from modules import scripts, shared, processing + + +class Script(scripts.Script): + def title(self): + return 'LUT Color grading' + + def show(self, is_img2img): + return shared.native + + def ui(self, _is_img2img): + with gr.Row(): + gr.HTML("  Color grading
") + with gr.Row(): + original = gr.Checkbox(label='Include original image', value=True) + with gr.Row(): + cube_file = gr.File(label='LUT .cube file', type='file', help='Download LUTs from https://luts.iwltbap.com/') + with gr.Row(): + gr.HTML("
Enhance LUT") + with gr.Row(): + cube_scale = gr.Slider(label='Amplify LUT', minimum=0.0, maximum=5.0, step=0.05, value=1.0) + brightness = gr.Slider(label='Brightness', minimum=-1, maximum=1, step=0.05, value=0) + exposure = gr.Slider(label='Exposure', minimum=-5, maximum=5, step=0.05, value=0) + contrast = gr.Slider(label='Contrast', minimum=-1, maximum=1, step=0.05, value=0) + warmth = gr.Slider(label='Warmth', minimum=-1, maximum=1, step=0.05, value=0) + saturation = gr.Slider(label='Saturation', minimum=-1, maximum=5, step=0.05, value=0) + vibrance = gr.Slider(label='Vibrance', minimum=-1, maximum=5, step=0.05, value=0) + hue = gr.Slider(label='Hue', minimum=0, maximum=1, step=0.05, value=0) + gamma = gr.Slider(label='Gamma', minimum=0, maximum=10.0, step=0.1, value=1.0) + return [original, cube_file, cube_scale, brightness, exposure, contrast, warmth, saturation, vibrance, hue, gamma] + + # auto-executed by the script-callback + def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, original, cube_file, cube_scale, brightness, exposure, contrast, warmth, saturation, vibrance, hue, gamma): # pylint: disable=arguments-differ, unused-argument + install('pillow_lut', quiet=True) + import pillow_lut + + cube = None + name = os.path.splitext(os.path.basename(cube_file.name))[0] if cube_file is not None else None + shared.log.info(f'Color grading: cube="{name}" scale={cube_scale} brightness={brightness} exposure={exposure} contrast={contrast} warmth={warmth} saturation={saturation} vibrance={vibrance} hue={hue} gamma={gamma}') + if cube_file is not None: + try: + cube = pillow_lut.load_cube_file(cube_file.name) + cube = pillow_lut.amplify_lut(cube, cube_scale) + cube = pillow_lut.rgb_color_enhance(source=cube, brightness=brightness, exposure=exposure, contrast=contrast, warmth=warmth, saturation=saturation, vibrance=vibrance, hue=hue, gamma=gamma) + except Exception as e: + shared.log.error(f'Color grading: {e}') + + images = [] + if processed is not None and len(processed.images) > 0: + for image in processed.images: + info = image.info.get('parameters', '') + if original: + images.append(image) + if cube is not None: + filtered = image.filter(cube) + filtered.info['parameters'] = f'{info}, LUT: {name}' + images.append(filtered) + processed.images = images + + return processed diff --git a/scripts/postprocessing_upscale.py b/scripts/postprocessing_upscale.py index 066aa30d6..e19d577ee 100644 --- a/scripts/postprocessing_upscale.py +++ b/scripts/postprocessing_upscale.py @@ -21,8 +21,8 @@ def ui(self): with gr.TabItem('Scale to', elem_id="extras_scale_to_tab") as tab_scale_to: with gr.Row(): with gr.Row(elem_id="upscaling_column_size"): - upscaling_resize_w = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=512, elem_id="extras_upscaling_resize_w") - upscaling_resize_h = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=512, elem_id="extras_upscaling_resize_h") + upscaling_resize_w = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=1024, elem_id="extras_upscaling_resize_w") + upscaling_resize_h = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=1024, elem_id="extras_upscaling_resize_h") upscaling_res_switch_btn = ToolButton(value=symbols.switch, elem_id="upscaling_res_switch_btn") upscaling_crop = gr.Checkbox(label='Crop to fit', value=True, elem_id="extras_upscaling_crop") diff --git a/scripts/prompt_enhance.py b/scripts/prompt_enhance.py new file mode 100644 index 000000000..2fb513e76 --- /dev/null +++ b/scripts/prompt_enhance.py @@ -0,0 +1,96 @@ +# repo: https://huggingface.co/gokaygokay/Flux-Prompt-Enhance + +import time +import random +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM +import gradio as gr +from modules import shared, scripts, devices, processing + + +repo_id = "gokaygokay/Flux-Prompt-Enhance" +num_return_sequences = 5 + + +class Script(scripts.Script): + prompts = [['']] + tokenizer: AutoTokenizer = None + model: AutoModelForSeq2SeqLM = None + prefix: str = "enhance prompt: " + button: gr.Button = None + auto_apply: gr.Checkbox = None + max_length: gr.Slider = None + temperature: gr.Slider = None + repetition_penalty: gr.Slider = None + table: gr.DataFrame = None + prompt: gr.Textbox = None + + def title(self): + return 'Prompt enhance' + + def show(self, is_img2img): + return shared.native + + def load(self): + if self.tokenizer is None: + self.tokenizer = AutoTokenizer.from_pretrained('gokaygokay/Flux-Prompt-Enhance', cache_dir=shared.opts.diffusers_dir) + if self.model is None: + shared.log.info(f'Prompt enhance: model="{repo_id}"') + self.model = AutoModelForSeq2SeqLM.from_pretrained('gokaygokay/Flux-Prompt-Enhance', cache_dir=shared.opts.diffusers_dir).to(device=devices.cpu, dtype=devices.dtype) + + def enhance(self, prompt, auto_apply: bool = False, temperature: float = 0.7, repetition_penalty: float = 1.2, max_length: int = 128): + self.load() + t0 = time.time() + input_text = self.prefix + prompt + input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids.to(devices.device) + self.model = self.model.to(devices.device) + kwargs = { + 'max_length': int(max_length), + 'num_return_sequences': int(num_return_sequences), + 'do_sample': True, + 'temperature': float(temperature), + 'repetition_penalty': float(repetition_penalty), + } + try: + outputs = self.model.generate(input_ids, **kwargs) + except Exception as e: + shared.log.error(f'Prompt enhance: error="{e}"') + return [['']] + self.model = self.model.to(devices.cpu) + prompts = self.tokenizer.batch_decode(outputs, skip_special_tokens=True) + prompts = [[p] for p in prompts] + t1 = time.time() + shared.log.info(f'Prompt enhance: temperature={temperature} repetition={repetition_penalty} length={max_length} sequences={num_return_sequences} apply={auto_apply} time={t1-t0:.2f}s') + return prompts + + def select(self, cell: gr.SelectData, _table): + prompt = cell.value if hasattr(cell, 'value') else cell + shared.log.info(f'Prompt enhance: prompt="{prompt}"') + return prompt + + def ui(self, _is_img2img): + with gr.Row(): + self.button = gr.Button(value='Enhance prompt') + self.auto_apply = gr.Checkbox(label='Auto apply', default=False) + with gr.Row(): + self.max_length = gr.Slider(label='Length', minimum=64, maximum=512, step=1, value=128) + self.temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=2.0, step=0.05, value=0.7) + self.repetition_penalty = gr.Slider(label='Penalty', minimum=0.1, maximum=2.0, step=0.05, value=1.2) + with gr.Row(): + self.table = gr.DataFrame(self.prompts, label='', show_label=False, interactive=False, wrap=True, datatype="str", col_count=1, max_rows=num_return_sequences, headers=['Prompts']) + + if self.prompt is not None: + self.button.click(fn=self.enhance, inputs=[self.prompt, self.auto_apply, self.temperature, self.repetition_penalty, self.max_length], outputs=[self.table]) + self.table.select(fn=self.select, inputs=[self.table], outputs=[self.prompt]) + return [self.auto_apply, self.temperature, self.repetition_penalty, self.max_length] + + def run(self, p: processing.StableDiffusionProcessing, auto_apply, temperature, repetition_penalty, max_length): # pylint: disable=arguments-differ + if auto_apply: + p.prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles) + shared.log.debug(f'Prompt enhance: source="{p.prompt}"') + prompts = self.enhance(p.prompt, auto_apply, temperature, repetition_penalty, max_length) + p.prompt = random.choice(prompts)[0] + shared.log.debug(f'Prompt enhance: prompt="{p.prompt}"') + + def after_component(self, component, **kwargs): # searching for actual ui prompt components + if getattr(component, 'elem_id', '') in ['txt2img_prompt', 'img2img_prompt', 'control_prompt']: + self.prompt = component diff --git a/scripts/prompt_matrix.py b/scripts/prompt_matrix.py index 39c0fe993..5babf20e7 100644 --- a/scripts/prompt_matrix.py +++ b/scripts/prompt_matrix.py @@ -39,7 +39,7 @@ def draw_xy_grid(xs, ys, x_label, y_label, cell): class Script(scripts.Script): def title(self): - return "Prompt Matrix" + return "Prompt matrix" def ui(self, is_img2img): with gr.Row(): diff --git a/scripts/prompts_from_file.py b/scripts/prompts_from_file.py index 3e9745f6b..465a44974 100644 --- a/scripts/prompts_from_file.py +++ b/scripts/prompts_from_file.py @@ -92,7 +92,7 @@ def load_prompt_file(file): class Script(scripts.Script): def title(self): - return "Prompts from File" + return "Prompts from file" def ui(self, is_img2img): with gr.Row(): diff --git a/scripts/x_adapter.py b/scripts/x_adapter.py index 58d6fb9eb..7c1341701 100644 --- a/scripts/x_adapter.py +++ b/scripts/x_adapter.py @@ -25,8 +25,8 @@ def ui(self, _is_img2img): model = gr.Dropdown(label='Adapter model', choices=['None'] + sd_models.checkpoint_tiles(), value='None') sampler = gr.Dropdown(label='Adapter sampler', choices=[s.name for s in sd_samplers.samplers], value='Default') with gr.Row(): - width = gr.Slider(label='Adapter width', minimum=64, maximum=2048, step=8, value=512) - height = gr.Slider(label='Adapter height', minimum=64, maximum=2048, step=8, value=512) + width = gr.Slider(label='Adapter width', minimum=64, maximum=2048, step=8, value=1024) + height = gr.Slider(label='Adapter height', minimum=64, maximum=2048, step=8, value=1024) with gr.Row(): start = gr.Slider(label='Adapter start', minimum=0.0, maximum=1.0, step=0.01, value=0.5) scale = gr.Slider(label='Adapter scale', minimum=0.0, maximum=1.0, step=0.01, value=1.0) diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index 75b4760bb..2800a3722 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -18,18 +18,21 @@ def apply_field(field): def fun(p, x, xs): + shared.log.debug(f'XYZ grid apply field: {field}={x}') setattr(p, field, x) return fun def apply_task_args(field): def fun(p, x, xs): + shared.log.debug(f'XYZ grid apply task-arg: {field}={x}') p.task_args[field] = x return fun def apply_setting(field): def fun(p, x, xs): + shared.log.debug(f'XYZ grid apply setting: {field}={x}') shared.opts.data[field] = x return fun @@ -40,6 +43,7 @@ def apply_prompt(p, x, xs): else: p.prompt = p.prompt.replace(xs[0], x) p.negative_prompt = p.negative_prompt.replace(xs[0], x) + shared.log.debug(f'XYZ grid apply prompt: "{xs[0]}"="{x}"') def apply_order(p, x, xs): @@ -65,6 +69,7 @@ def apply_sampler(p, x, xs): shared.log.warning(f"XYZ grid: unknown sampler: {x}") else: p.sampler_name = sampler_name + shared.log.debug(f'XYZ grid apply sampler: "{x}"') def apply_hr_sampler_name(p, x, xs): @@ -73,6 +78,7 @@ def apply_hr_sampler_name(p, x, xs): shared.log.warning(f"XYZ grid: unknown sampler: {x}") else: p.hr_sampler_name = hr_sampler_name + shared.log.debug(f'XYZ grid apply HR sampler: "{x}"') def confirm_samplers(p, xs): @@ -90,6 +96,7 @@ def apply_checkpoint(p, x, xs): else: sd_models.reload_model_weights(shared.sd_model, info) p.override_settings['sd_model_checkpoint'] = info.name + shared.log.debug(f'XYZ grid apply checkpoint: "{x}"') def apply_refiner(p, x, xs): @@ -103,6 +110,7 @@ def apply_refiner(p, x, xs): else: sd_models.reload_model_weights(shared.sd_refiner, info) p.override_settings['sd_model_refiner'] = info.name + shared.log.debug(f'XYZ grid apply refiner: "{x}"') def apply_dict(p, x, xs): @@ -116,11 +124,13 @@ def apply_dict(p, x, xs): shared.opts.sd_model_dict = info_dict.name # this will trigger reload_model_weights via onchange handler p.override_settings['sd_model_checkpoint'] = info_ckpt.name p.override_settings['sd_model_dict'] = info_dict.name + shared.log.debug(f'XYZ grid apply model dict: "{x}"') def apply_clip_skip(p, x, xs): p.clip_skip = x shared.opts.data["clip_skip"] = x + shared.log.debug(f'XYZ grid apply clip-skip: "{x}"') def find_vae(name: str): @@ -139,6 +149,7 @@ def find_vae(name: str): def apply_vae(p, x, xs): sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x)) + shared.log.debug(f'XYZ grid apply VAE: "{x}"') def list_lora(): @@ -153,15 +164,18 @@ def apply_lora(p, x, xs): return x = os.path.basename(x) p.prompt = p.prompt + f" " + shared.log.debug(f'XYZ grid apply LoRA: "{x}"') def apply_te(p, x, xs): shared.opts.data["sd_text_encoder"] = x sd_models.reload_text_encoder() + shared.log.debug(f'XYZ grid apply text-encoder: "{x}"') def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _): p.styles.extend(x.split(',')) + shared.log.debug(f'XYZ grid apply style: "{x}"') def apply_upscaler(p: processing.StableDiffusionProcessingTxt2Img, opt, x): @@ -169,6 +183,13 @@ def apply_upscaler(p: processing.StableDiffusionProcessingTxt2Img, opt, x): p.hr_force = True p.denoising_strength = 0.0 p.hr_upscaler = opt + shared.log.debug(f'XYZ grid apply upscaler: "{x}"') + + +def apply_context(p: processing.StableDiffusionProcessingTxt2Img, opt, x): + p.resize_mode = 5 + p.resize_context = opt + shared.log.debug(f'XYZ grid apply resize-context: "{x}"') def apply_face_restore(p, opt, x): @@ -182,11 +203,13 @@ def apply_face_restore(p, opt, x): else: is_active = opt in ('true', 'yes', 'y', '1') p.restore_faces = is_active + shared.log.debug(f'XYZ grid apply face-restore: "{x}"') def apply_override(field): def fun(p, x, xs): p.override_settings[field] = x + shared.log.debug(f'XYZ grid apply override: "{field}"="{x}"') return fun @@ -289,6 +312,7 @@ def __init__(self, *args, **kwargs): AxisOption("[Refiner] Refiner start", float, apply_field("refiner_start")), AxisOption("[Refiner] Refiner steps", float, apply_field("refiner_steps")), AxisOption("[Postprocess] Upscaler", str, apply_upscaler, choices=lambda: [x.name for x in shared.sd_upscalers][1:]), + AxisOption("[Postprocess] Context", str, apply_context, choices=lambda: ["Add with forward", "Remove with forward", "Add with backward", "Remove with backward"]), AxisOption("[Postprocess] Face restore", str, apply_face_restore, fmt=format_value), AxisOption("[HDR] Mode", int, apply_field("hdr_mode")), AxisOption("[HDR] Brightness", float, apply_field("hdr_brightness")), diff --git a/scripts/xyz_grid_on.py b/scripts/xyz_grid_on.py new file mode 100644 index 000000000..ac362ba06 --- /dev/null +++ b/scripts/xyz_grid_on.py @@ -0,0 +1,832 @@ +# pylint: disable=unused-argument + +import os +import re +import csv +import random +from collections import namedtuple +from copy import copy +from itertools import permutations, chain +from io import StringIO +from PIL import Image +import numpy as np +import gradio as gr +from modules import shared, errors, scripts, images, sd_samplers, processing, sd_models, sd_vae, ipadapter +from modules.ui_components import ToolButton +import modules.ui_symbols as symbols + + +active = False +cache = None + + +def apply_field(field): + def fun(p, x, xs): + shared.log.debug(f'XYZ grid apply field: {field}={x}') + setattr(p, field, x) + return fun + + +def apply_task_args(field): + def fun(p, x, xs): + shared.log.debug(f'XYZ grid apply task-arg: {field}={x}') + p.task_args[field] = x + return fun + + +def apply_setting(field): + def fun(p, x, xs): + shared.log.debug(f'XYZ grid apply setting: {field}={x}') + shared.opts.data[field] = x + return fun + + +def apply_prompt(p, x, xs): + if xs[0] not in p.prompt and xs[0] not in p.negative_prompt: + shared.log.warning(f"XYZ grid: prompt S/R did not find {xs[0]} in prompt or negative prompt.") + else: + p.prompt = p.prompt.replace(xs[0], x) + p.all_prompts = p.batch_size * [p.prompt] + p.negative_prompt = p.negative_prompt.replace(xs[0], x) + p.all_negative_prompts = p.batch_size * [p.negative_prompt] + shared.log.debug(f'XYZ grid apply prompt: "{xs[0]}"="{x}"') + + +def apply_order(p, x, xs): + token_order = [] + for token in x: + token_order.append((p.prompt.find(token), token)) + token_order.sort(key=lambda t: t[0]) + prompt_parts = [] + for _, token in token_order: + n = p.prompt.find(token) + prompt_parts.append(p.prompt[0:n]) + p.prompt = p.prompt[n + len(token):] + prompt_tmp = "" + for idx, part in enumerate(prompt_parts): + prompt_tmp += part + prompt_tmp += x[idx] + p.prompt = prompt_tmp + p.prompt + + +def apply_sampler(p, x, xs): + sampler_name = sd_samplers.samplers_map.get(x.lower(), None) + if sampler_name is None: + shared.log.warning(f"XYZ grid: unknown sampler: {x}") + else: + p.sampler_name = sampler_name + shared.log.debug(f'XYZ grid apply sampler: "{x}"') + + +def apply_hr_sampler_name(p, x, xs): + hr_sampler_name = sd_samplers.samplers_map.get(x.lower(), None) + if hr_sampler_name is None: + shared.log.warning(f"XYZ grid: unknown sampler: {x}") + else: + p.hr_sampler_name = hr_sampler_name + shared.log.debug(f'XYZ grid apply HR sampler: "{x}"') + + +def confirm_samplers(p, xs): + for x in xs: + if x.lower() not in sd_samplers.samplers_map: + shared.log.warning(f"XYZ grid: unknown sampler: {x}") + + +def apply_checkpoint(p, x, xs): + if x == shared.opts.sd_model_checkpoint: + return + info = sd_models.get_closet_checkpoint_match(x) + if info is None: + shared.log.warning(f"XYZ grid: apply checkpoint unknown checkpoint: {x}") + else: + sd_models.reload_model_weights(shared.sd_model, info) + p.override_settings['sd_model_checkpoint'] = info.name + shared.log.debug(f'XYZ grid apply checkpoint: "{x}"') + + +def apply_refiner(p, x, xs): + if x == shared.opts.sd_model_refiner: + return + if x == 'None': + return + info = sd_models.get_closet_checkpoint_match(x) + if info is None: + shared.log.warning(f"XYZ grid: apply refiner unknown checkpoint: {x}") + else: + sd_models.reload_model_weights(shared.sd_refiner, info) + p.override_settings['sd_model_refiner'] = info.name + shared.log.debug(f'XYZ grid apply refiner: "{x}"') + + +def apply_dict(p, x, xs): + if x == shared.opts.sd_model_dict: + return + info_dict = sd_models.get_closet_checkpoint_match(x) + info_ckpt = sd_models.get_closet_checkpoint_match(shared.opts.sd_model_checkpoint) + if info_dict is None or info_ckpt is None: + shared.log.warning(f"XYZ grid: apply dict unknown checkpoint: {x}") + else: + shared.opts.sd_model_dict = info_dict.name # this will trigger reload_model_weights via onchange handler + p.override_settings['sd_model_checkpoint'] = info_ckpt.name + p.override_settings['sd_model_dict'] = info_dict.name + shared.log.debug(f'XYZ grid apply model dict: "{x}"') + + +def apply_clip_skip(p, x, xs): + p.clip_skip = x + shared.opts.data["clip_skip"] = x + shared.log.debug(f'XYZ grid apply clip-skip: "{x}"') + + +def find_vae(name: str): + if name.lower() in ['auto', 'automatic']: + return sd_vae.unspecified + if name.lower() == 'none': + return None + else: + choices = [x for x in sorted(sd_vae.vae_dict, key=lambda x: len(x)) if name.lower().strip() in x.lower()] + if len(choices) == 0: + shared.log.warning(f"No VAE found for {name}; using automatic") + return sd_vae.unspecified + else: + return sd_vae.vae_dict[choices[0]] + + +def apply_vae(p, x, xs): + sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x)) + shared.log.debug(f'XYZ grid apply VAE: "{x}"') + + +def list_lora(): + import sys + lora = [v for k, v in sys.modules.items() if k == 'networks'][0] + loras = [v.fullname for v in lora.available_networks.values()] + return ['None'] + loras + + +def apply_lora(p, x, xs): + if x == 'None': + return + x = os.path.basename(x) + p.prompt = p.prompt + f" " + shared.log.debug(f'XYZ grid apply LoRA: "{x}"') + + +def apply_te(p, x, xs): + shared.opts.data["sd_text_encoder"] = x + sd_models.reload_text_encoder() + shared.log.debug(f'XYZ grid apply text-encoder: "{x}"') + + +def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _): + p.styles.extend(x.split(',')) + shared.log.debug(f'XYZ grid apply style: "{x}"') + + +def apply_upscaler(p: processing.StableDiffusionProcessingTxt2Img, opt, x): + p.enable_hr = True + p.hr_force = True + p.denoising_strength = 0.0 + p.hr_upscaler = opt + shared.log.debug(f'XYZ grid apply upscaler: "{x}"') + + +def apply_context(p: processing.StableDiffusionProcessingTxt2Img, opt, x): + p.resize_mode = 5 + p.resize_context = opt + shared.log.debug(f'XYZ grid apply resize-context: "{x}"') + + +def apply_face_restore(p, opt, x): + opt = opt.lower() + if opt == 'codeformer': + is_active = True + p.face_restoration_model = 'CodeFormer' + elif opt == 'gfpgan': + is_active = True + p.face_restoration_model = 'GFPGAN' + else: + is_active = opt in ('true', 'yes', 'y', '1') + p.restore_faces = is_active + shared.log.debug(f'XYZ grid apply face-restore: "{x}"') + + +def apply_override(field): + def fun(p, x, xs): + p.override_settings[field] = x + shared.log.debug(f'XYZ grid apply override: "{field}"="{x}"') + return fun + + +def format_value_add_label(p, opt, x): + if type(x) == float: + x = round(x, 8) + return f"{opt.label}: {x}" + + +def format_value(p, opt, x): + if type(x) == float: + x = round(x, 8) + return x + + +def format_value_join_list(p, opt, x): + return ", ".join(x) + + +def do_nothing(p, x, xs): + pass + + +def format_nothing(p, opt, x): + return "" + + +def str_permutations(x): + """dummy function for specifying it in AxisOption's type when you want to get a list of permutations""" + return x + + +def list_to_csv_string(data_list): + with StringIO() as o: + csv.writer(o).writerow(data_list) + return o.getvalue().strip() + + +class AxisOption: + def __init__(self, label, tipe, apply, fmt=format_value_add_label, confirm=None, cost=0.0, choices=None): + self.label = label + self.type = tipe + self.apply = apply + self.format_value = fmt + self.confirm = confirm + self.cost = cost + self.choices = choices + + +class AxisOptionImg2Img(AxisOption): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_img2img = True + +class AxisOptionTxt2Img(AxisOption): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_img2img = False + + +axis_options = [ + AxisOption("Nothing", str, do_nothing, fmt=format_nothing), + AxisOption("Prompt S/R", str, apply_prompt, fmt=format_value), + AxisOption("Model", str, apply_checkpoint, fmt=format_value, cost=1.0, choices=lambda: sorted(sd_models.checkpoints_list)), + AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)), + AxisOption("LoRA", str, apply_lora, cost=0.5, choices=list_lora), + AxisOption("LoRA strength", float, apply_setting('extra_networks_default_multiplier')), + AxisOption("Text encoder", str, apply_te, cost=0.7, choices=lambda: ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']), + AxisOption("Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]), + AxisOption("Seed", int, apply_field("seed")), + AxisOption("Steps", int, apply_field("steps")), + AxisOption("CFG scale", float, apply_field("cfg_scale")), + AxisOption("Guidance end", float, apply_field("cfg_end")), + AxisOption("Variation seed", int, apply_field("subseed")), + AxisOption("Variation strength", float, apply_field("subseed_strength")), + AxisOption("Clip skip", float, apply_clip_skip), + AxisOption("Denoising strength", float, apply_field("denoising_strength")), + AxisOption("Prompt order", str_permutations, apply_order, fmt=format_value_join_list), + AxisOption("Model dictionary", str, apply_dict, fmt=format_value, cost=1.0, choices=lambda: ['None'] + list(sd_models.checkpoints_list)), + AxisOptionImg2Img("Image mask weight", float, apply_field("inpainting_mask_weight")), + AxisOptionTxt2Img("[Sampler] Name", str, apply_sampler, fmt=format_value, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]), + AxisOptionImg2Img("[Sampler] Name", str, apply_sampler, fmt=format_value, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers_for_img2img]), + AxisOption("[Sampler] Timestep spacing", str, apply_setting("schedulers_timestep_spacing"), choices=lambda: ['default', 'linspace', 'leading', 'trailing']), + AxisOption("[Sampler] Sigma min", float, apply_field("s_min")), + AxisOption("[Sampler] Sigma max", float, apply_field("s_max")), + AxisOption("[Sampler] Sigma tmin", float, apply_field("s_tmin")), + AxisOption("[Sampler] Sigma tmax", float, apply_field("s_tmax")), + AxisOption("[Sampler] Sigma churn", float, apply_field("s_churn")), + AxisOption("[Sampler] Sigma noise", float, apply_field("s_noise")), + AxisOption("[Sampler] Shift", float, apply_setting("schedulers_shift")), + AxisOption("[Sampler] ETA", float, apply_setting("scheduler_eta")), + AxisOption("[Sampler] Solver order", int, apply_setting("schedulers_solver_order")), + AxisOption("[Second pass] Upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]), + AxisOption("[Second pass] Sampler", str, apply_hr_sampler_name, fmt=format_value, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]), + AxisOption("[Second pass] Denoising strength", float, apply_field("denoising_strength")), + AxisOption("[Second pass] Hires steps", int, apply_field("hr_second_pass_steps")), + AxisOption("[Second pass] CFG scale", float, apply_field("image_cfg_scale")), + AxisOption("[Second pass] Guidance rescale", float, apply_field("diffusers_guidance_rescale")), + AxisOption("[Refiner] Model", str, apply_refiner, fmt=format_value, cost=1.0, choices=lambda: ['None'] + sorted(sd_models.checkpoints_list)), + AxisOption("[Refiner] Refiner start", float, apply_field("refiner_start")), + AxisOption("[Refiner] Refiner steps", float, apply_field("refiner_steps")), + AxisOption("[Postprocess] Upscaler", str, apply_upscaler, choices=lambda: [x.name for x in shared.sd_upscalers][1:]), + AxisOption("[Postprocess] Context", str, apply_context, choices=lambda: ["Add with forward", "Remove with forward", "Add with backward", "Remove with backward"]), + AxisOption("[Postprocess] Face restore", str, apply_face_restore, fmt=format_value), + AxisOption("[HDR] Mode", int, apply_field("hdr_mode")), + AxisOption("[HDR] Brightness", float, apply_field("hdr_brightness")), + AxisOption("[HDR] Color", float, apply_field("hdr_color")), + AxisOption("[HDR] Sharpen", float, apply_field("hdr_sharpen")), + AxisOption("[HDR] Clamp boundary", float, apply_field("hdr_boundary")), + AxisOption("[HDR] Clamp threshold", float, apply_field("hdr_threshold")), + AxisOption("[HDR] Maximize center shift", float, apply_field("hdr_max_center")), + AxisOption("[HDR] Maximize boundary", float, apply_field("hdr_max_boundry")), + AxisOption("[HDR] Tint color hex", str, apply_field("hdr_color_picker")), + AxisOption("[HDR] Tint ratio", float, apply_field("hdr_tint_ratio")), + AxisOption("[Token Merging] ToMe ratio", float, apply_setting('tome_ratio')), + AxisOption("[Token Merging] ToDo ratio", float, apply_setting('todo_ratio')), + AxisOption("[FreeU] 1st stage backbone factor", float, apply_setting('freeu_b1')), + AxisOption("[FreeU] 2nd stage backbone factor", float, apply_setting('freeu_b2')), + AxisOption("[FreeU] 1st stage skip factor", float, apply_setting('freeu_s1')), + AxisOption("[FreeU] 2nd stage skip factor", float, apply_setting('freeu_s2')), + AxisOption("[IP adapter] Name", str, apply_field('ip_adapter_names'), cost=1.0, choices=lambda: list(ipadapter.ADAPTERS)), + AxisOption("[IP adapter] Scale", float, apply_field('ip_adapter_scales')), + AxisOption("[IP adapter] Starts", float, apply_field('ip_adapter_starts')), + AxisOption("[IP adapter] Ends", float, apply_field('ip_adapter_ends')), + AxisOption("[HiDiffusion] T1", float, apply_override('hidiffusion_t1')), + AxisOption("[HiDiffusion] T2", float, apply_override('hidiffusion_t2')), + AxisOption("[HiDiffusion] Agression step", float, apply_field('hidiffusion_steps')), + AxisOption("[PAG] Attention scale", float, apply_field('pag_scale')), + AxisOption("[PAG] Adaptive scaling", float, apply_field('pag_adaptive')), + AxisOption("[PAG] Applied layers", str, apply_setting('pag_apply_layers')), +] + + +def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend, include_lone_images, include_sub_grids, first_axes_processed, second_axes_processed, margin_size, no_grid): + hor_texts = [[images.GridAnnotation(x)] for x in x_labels] + ver_texts = [[images.GridAnnotation(y)] for y in y_labels] + title_texts = [[images.GridAnnotation(z)] for z in z_labels] + list_size = (len(xs) * len(ys) * len(zs)) + processed_result = None + shared.state.job_count = list_size * p.n_iter + + def process_cell(x, y, z, ix, iy, iz): + nonlocal processed_result + + def index(ix, iy, iz): + return ix + iy * len(xs) + iz * len(xs) * len(ys) + + shared.state.job = 'grid' + processed: processing.Processed = cell(x, y, z, ix, iy, iz) + if processed_result is None: + processed_result = copy(processed) + if processed_result is None: + shared.log.error('XYZ grid: no processing results') + return processing.Processed(p, []) + processed_result.images = [None] * list_size + processed_result.all_prompts = [None] * list_size + processed_result.all_seeds = [None] * list_size + processed_result.infotexts = [None] * list_size + processed_result.index_of_first_image = 1 + idx = index(ix, iy, iz) + if processed is not None and processed.images: + processed_result.images[idx] = processed.images[0] + processed_result.all_prompts[idx] = processed.prompt + processed_result.all_seeds[idx] = processed.seed + processed_result.infotexts[idx] = processed.infotexts[0] + else: + cell_mode = "P" + cell_size = (processed_result.width, processed_result.height) + if processed_result.images[0] is not None: + cell_mode = processed_result.images[0].mode + cell_size = processed_result.images[0].size + processed_result.images[idx] = Image.new(cell_mode, cell_size) + + if first_axes_processed == 'x': + for ix, x in enumerate(xs): + if second_axes_processed == 'y': + for iy, y in enumerate(ys): + for iz, z in enumerate(zs): + process_cell(x, y, z, ix, iy, iz) + else: + for iz, z in enumerate(zs): + for iy, y in enumerate(ys): + process_cell(x, y, z, ix, iy, iz) + elif first_axes_processed == 'y': + for iy, y in enumerate(ys): + if second_axes_processed == 'x': + for ix, x in enumerate(xs): + for iz, z in enumerate(zs): + process_cell(x, y, z, ix, iy, iz) + else: + for iz, z in enumerate(zs): + for ix, x in enumerate(xs): + process_cell(x, y, z, ix, iy, iz) + elif first_axes_processed == 'z': + for iz, z in enumerate(zs): + if second_axes_processed == 'x': + for ix, x in enumerate(xs): + for iy, y in enumerate(ys): + process_cell(x, y, z, ix, iy, iz) + else: + for iy, y in enumerate(ys): + for ix, x in enumerate(xs): + process_cell(x, y, z, ix, iy, iz) + + if not processed_result: + shared.log.error("XYZ grid: Failed to initialize processing") + return processing.Processed(p, []) + elif not any(processed_result.images): + shared.log.error("XYZ grid: Failed to return processed image") + return processing.Processed(p, []) + + z_count = len(zs) + for i in range(z_count): + start_index = (i * len(xs) * len(ys)) + i + end_index = start_index + len(xs) * len(ys) + if (not no_grid or include_sub_grids) and images.check_grid_size(processed_result.images[start_index:end_index]): + grid = images.image_grid(processed_result.images[start_index:end_index], rows=len(ys)) + if draw_legend: + grid = images.draw_grid_annotations(grid, processed_result.images[start_index].size[0], processed_result.images[start_index].size[1], hor_texts, ver_texts, margin_size, title=title_texts[i]) + processed_result.images.insert(i, grid) + processed_result.all_prompts.insert(i, processed_result.all_prompts[start_index]) + processed_result.all_seeds.insert(i, processed_result.all_seeds[start_index]) + processed_result.infotexts.insert(i, processed_result.infotexts[start_index]) + sub_grid_size = processed_result.images[0].size + if not no_grid and images.check_grid_size(processed_result.images[:z_count]): + z_grid = images.image_grid(processed_result.images[:z_count], rows=1) + if draw_legend: + z_grid = images.draw_grid_annotations(z_grid, sub_grid_size[0], sub_grid_size[1], [[images.GridAnnotation()] for _ in z_labels], [[images.GridAnnotation()]]) + processed_result.images.insert(0, z_grid) + #processed_result.all_prompts.insert(0, processed_result.all_prompts[0]) + #processed_result.all_seeds.insert(0, processed_result.all_seeds[0]) + processed_result.infotexts.insert(0, processed_result.infotexts[0]) + return processed_result + + +class SharedSettingsStackHelper(object): + vae = None + schedulers_solver_order = None + tome_ratio = None + todo_ratio = None + sd_model_checkpoint = None + sd_model_dict = None + sd_vae_checkpoint = None + + def __enter__(self): + #Save overridden settings so they can be restored later. + self.vae = shared.opts.sd_vae + self.schedulers_solver_order = shared.opts.schedulers_solver_order + self.tome_ratio = shared.opts.tome_ratio + self.todo_ratio = shared.opts.todo_ratio + self.sd_model_checkpoint = shared.opts.sd_model_checkpoint + self.sd_model_dict = shared.opts.sd_model_dict + self.sd_vae_checkpoint = shared.opts.sd_vae + + def __exit__(self, exc_type, exc_value, tb): + #Restore overriden settings after plot generation. + shared.opts.data["sd_vae"] = self.vae + shared.opts.data["schedulers_solver_order"] = self.schedulers_solver_order + shared.opts.data["tome_ratio"] = self.tome_ratio + shared.opts.data["todo_ratio"] = self.todo_ratio + if self.sd_model_dict != shared.opts.sd_model_dict: + shared.opts.data["sd_model_dict"] = self.sd_model_dict + if self.sd_model_checkpoint != shared.opts.sd_model_checkpoint: + shared.opts.data["sd_model_checkpoint"] = self.sd_model_checkpoint + sd_models.reload_model_weights() + if self.sd_vae_checkpoint != shared.opts.sd_vae: + shared.opts.data["sd_vae"] = self.sd_vae_checkpoint + sd_vae.reload_vae_weights() + + +re_range = re.compile(r'([-+]?[0-9]*\.?[0-9]+)-([-+]?[0-9]*\.?[0-9]+):?([0-9]+)?') + +class Script(scripts.Script): + current_axis_options = [] + + def show(self, is_img2img): + return scripts.AlwaysVisible + + def title(self): + return "XYZ Grid" + + def ui(self, is_img2img): + self.current_axis_options = [x for x in axis_options if type(x) == AxisOption or x.is_img2img == is_img2img] + + with gr.Accordion('XYZ Grid', open = False, elem_id='xyz_grid'): + with gr.Row(): + enabled = gr.Checkbox(label = 'Enabled', value = False) + with gr.Row(): + with gr.Column(): + with gr.Row(variant='compact'): + x_type = gr.Dropdown(label="X type", container=True, choices=[x.label for x in self.current_axis_options], value=self.current_axis_options[0].label, type="index", elem_id=self.elem_id("x_type")) + x_values = gr.Textbox(label="X values", container=True, lines=1, elem_id=self.elem_id("x_values")) + x_values_dropdown = gr.Dropdown(label="X values", container=True, visible=False, multiselect=True, interactive=True) + fill_x_button = ToolButton(value=symbols.fill, elem_id="xyz_grid_fill_x_tool_button", visible=False) + with gr.Row(variant='compact'): + y_type = gr.Dropdown(label="Y type", container=True, choices=[x.label for x in self.current_axis_options], value=self.current_axis_options[0].label, type="index", elem_id=self.elem_id("y_type")) + y_values = gr.Textbox(label="Y values", container=True, lines=1, elem_id=self.elem_id("y_values")) + y_values_dropdown = gr.Dropdown(label="Y values", container=True, visible=False, multiselect=True, interactive=True) + fill_y_button = ToolButton(value=symbols.fill, elem_id="xyz_grid_fill_y_tool_button", visible=False) + with gr.Row(variant='compact'): + z_type = gr.Dropdown(label="Z type", container=True, choices=[x.label for x in self.current_axis_options], value=self.current_axis_options[0].label, type="index", elem_id=self.elem_id("z_type")) + z_values = gr.Textbox(label="Z values", container=True, lines=1, elem_id=self.elem_id("z_values")) + z_values_dropdown = gr.Dropdown(label="Z values", container=True, visible=False, multiselect=True, interactive=True) + fill_z_button = ToolButton(value=symbols.fill, elem_id="xyz_grid_fill_z_tool_button", visible=False) + with gr.Row(): + with gr.Column(): + csv_mode = gr.Checkbox(label='Text inputs', value=False, elem_id=self.elem_id("csv_mode"), container=False) + draw_legend = gr.Checkbox(label='Legend', value=True, elem_id=self.elem_id("draw_legend"), container=False) + no_fixed_seeds = gr.Checkbox(label='Random seeds', value=False, elem_id=self.elem_id("no_fixed_seeds"), container=False) + with gr.Column(): + no_grid = gr.Checkbox(label='Skip grid', value=False, elem_id=self.elem_id("no_xyz_grid"), container=False) + include_lone_images = gr.Checkbox(label='Sub-images', value=False, elem_id=self.elem_id("include_lone_images"), container=False) + include_sub_grids = gr.Checkbox(label='Sub-grids', value=False, elem_id=self.elem_id("include_sub_grids"), container=False) + with gr.Row(): + margin_size = gr.Slider(label="Grid margins", minimum=0, maximum=500, value=0, step=2, elem_id=self.elem_id("margin_size")) + with gr.Row(): + swap_xy_axes_button = gr.Button(value="Swap X/Y", elem_id="xy_grid_swap_axes_button", variant="secondary") + swap_yz_axes_button = gr.Button(value="Swap Y/Z", elem_id="yz_grid_swap_axes_button", variant="secondary") + swap_xz_axes_button = gr.Button(value="Swap X/Z", elem_id="xz_grid_swap_axes_button", variant="secondary") + + def swap_axes(axis1_type, axis1_values, axis1_values_dropdown, axis2_type, axis2_values, axis2_values_dropdown): + return self.current_axis_options[axis2_type].label, axis2_values, axis2_values_dropdown, self.current_axis_options[axis1_type].label, axis1_values, axis1_values_dropdown + + xy_swap_args = [x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown] + swap_xy_axes_button.click(swap_axes, inputs=xy_swap_args, outputs=xy_swap_args) + yz_swap_args = [y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown] + swap_yz_axes_button.click(swap_axes, inputs=yz_swap_args, outputs=yz_swap_args) + xz_swap_args = [x_type, x_values, x_values_dropdown, z_type, z_values, z_values_dropdown] + swap_xz_axes_button.click(swap_axes, inputs=xz_swap_args, outputs=xz_swap_args) + + def fill(axis_type, csv_mode): + axis = self.current_axis_options[axis_type] + if axis.choices: + if csv_mode: + return list_to_csv_string(axis.choices()), gr.update() + else: + return gr.update(), axis.choices() + else: + return gr.update(), gr.update() + + fill_x_button.click(fn=fill, inputs=[x_type, csv_mode], outputs=[x_values, x_values_dropdown]) + fill_y_button.click(fn=fill, inputs=[y_type, csv_mode], outputs=[y_values, y_values_dropdown]) + fill_z_button.click(fn=fill, inputs=[z_type, csv_mode], outputs=[z_values, z_values_dropdown]) + + def select_axis(axis_type, axis_values, axis_values_dropdown, csv_mode): + choices = self.current_axis_options[axis_type].choices + has_choices = choices is not None + current_values = axis_values + current_dropdown_values = axis_values_dropdown + if has_choices: + choices = choices() + if csv_mode: + current_dropdown_values = list(filter(lambda x: x in choices, current_dropdown_values)) + current_values = list_to_csv_string(current_dropdown_values) + else: + current_dropdown_values = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(axis_values)))] + current_dropdown_values = list(filter(lambda x: x in choices, current_dropdown_values)) + + return (gr.Button.update(visible=has_choices), gr.Textbox.update(visible=not has_choices or csv_mode, value=current_values), + gr.update(choices=choices if has_choices else None, visible=has_choices and not csv_mode, value=current_dropdown_values)) + + x_type.change(fn=select_axis, inputs=[x_type, x_values, x_values_dropdown, csv_mode], outputs=[fill_x_button, x_values, x_values_dropdown]) + y_type.change(fn=select_axis, inputs=[y_type, y_values, y_values_dropdown, csv_mode], outputs=[fill_y_button, y_values, y_values_dropdown]) + z_type.change(fn=select_axis, inputs=[z_type, z_values, z_values_dropdown, csv_mode], outputs=[fill_z_button, z_values, z_values_dropdown]) + + def change_choice_mode(csv_mode, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown): + _fill_x_button, _x_values, _x_values_dropdown = select_axis(x_type, x_values, x_values_dropdown, csv_mode) + _fill_y_button, _y_values, _y_values_dropdown = select_axis(y_type, y_values, y_values_dropdown, csv_mode) + _fill_z_button, _z_values, _z_values_dropdown = select_axis(z_type, z_values, z_values_dropdown, csv_mode) + return _fill_x_button, _x_values, _x_values_dropdown, _fill_y_button, _y_values, _y_values_dropdown, _fill_z_button, _z_values, _z_values_dropdown + + csv_mode.change(fn=change_choice_mode, inputs=[csv_mode, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown], outputs=[fill_x_button, x_values, x_values_dropdown, fill_y_button, y_values, y_values_dropdown, fill_z_button, z_values, z_values_dropdown]) + + def get_dropdown_update_from_params(axis,params): + val_key = f"{axis} Values" + vals = params.get(val_key,"") + valslist = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(vals))) if x] + return gr.update(value = valslist) + + self.infotext_fields = ( + (x_type, "X Type"), + (x_values, "X Values"), + (x_values_dropdown, lambda params:get_dropdown_update_from_params("X",params)), + (y_type, "Y Type"), + (y_values, "Y Values"), + (y_values_dropdown, lambda params:get_dropdown_update_from_params("Y",params)), + (z_type, "Z Type"), + (z_values, "Z Values"), + (z_values_dropdown, lambda params:get_dropdown_update_from_params("Z",params)), + ) + + return [enabled, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown, csv_mode, draw_legend, no_fixed_seeds, no_grid, include_lone_images, include_sub_grids, margin_size] + + def process(self, p, enabled, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown, csv_mode, draw_legend, no_fixed_seeds, no_grid, include_lone_images, include_sub_grids, margin_size): # pylint: disable=W0221 + global active, cache # pylint: disable=W0603 + if not enabled or active: + return + active = True + shared.log.debug(f'xyzgrid: x_type={x_type}|x_values={x_values}|x_values_dropdown={x_values_dropdown}|y_type={y_type}|{y_values}={y_values}|{y_values_dropdown}={y_values_dropdown}|z_type={z_type}|z_values={z_values}|z_values_dropdown={z_values_dropdown}|draw_legend={draw_legend}|include_lone_images={include_lone_images}|include_sub_grids={include_sub_grids}|no_grid={no_grid}|margin_size={margin_size}') + if not no_fixed_seeds: + processing.fix_seed(p) + if not shared.opts.return_grid: + p.batch_size = 1 + + def process_axis(opt, vals, vals_dropdown): + if opt.label == 'Nothing': + return [0] + if opt.choices is not None and not csv_mode: + valslist = vals_dropdown + else: + valslist = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(vals))) if x] + if opt.type == int: + valslist_ext = [] + for val in valslist: + m = re_range.fullmatch(val) + if m is not None: + start_val = int(m.group(1)) if m.group(1) is not None else val + end_val = int(m.group(2)) if m.group(2) is not None else val + num = int(m.group(3)) if m.group(3) is not None else int(end_val-start_val) + valslist_ext += [int(x) for x in np.linspace(start=start_val, stop=end_val, num=max(2, num)).tolist()] + shared.log.debug(f'XYZ grid range: start={start_val} end={end_val} num={max(2, num)} list={valslist}') + else: + valslist_ext.append(int(val)) + valslist.clear() + valslist = [x for x in valslist_ext if x not in valslist] + elif opt.type == float: + valslist_ext = [] + for val in valslist: + m = re_range.fullmatch(val) + if m is not None: + start_val = float(m.group(1)) if m.group(1) is not None else val + end_val = float(m.group(2)) if m.group(2) is not None else val + num = int(m.group(3)) if m.group(3) is not None else int(end_val-start_val) + valslist_ext += [round(float(x), 2) for x in np.linspace(start=start_val, stop=end_val, num=max(2, num)).tolist()] + shared.log.debug(f'XYZ grid range: start={start_val} end={end_val} num={max(2, num)} list={valslist}') + else: + valslist_ext.append(float(val)) + valslist.clear() + valslist = [x for x in valslist_ext if x not in valslist] + elif opt.type == str_permutations: # pylint: disable=comparison-with-callable + valslist = list(permutations(valslist)) + valslist = [opt.type(x) for x in valslist] + # Confirm options are valid before starting + if opt.confirm: + opt.confirm(p, valslist) + return valslist + + x_opt = self.current_axis_options[x_type] + if x_opt.choices is not None and not csv_mode: + x_values = list_to_csv_string(x_values_dropdown) + xs = process_axis(x_opt, x_values, x_values_dropdown) + y_opt = self.current_axis_options[y_type] + if y_opt.choices is not None and not csv_mode: + y_values = list_to_csv_string(y_values_dropdown) + ys = process_axis(y_opt, y_values, y_values_dropdown) + z_opt = self.current_axis_options[z_type] + if z_opt.choices is not None and not csv_mode: + z_values = list_to_csv_string(z_values_dropdown) + zs = process_axis(z_opt, z_values, z_values_dropdown) + Image.MAX_IMAGE_PIXELS = None # disable check in Pillow and rely on check below to allow large custom image sizes + + def fix_axis_seeds(axis_opt, axis_list): + if axis_opt.label in ['Seed', 'Var. seed']: + return [int(random.randrange(4294967294)) if val is None or val == '' or val == -1 else val for val in axis_list] + else: + return axis_list + + if not no_fixed_seeds: + xs = fix_axis_seeds(x_opt, xs) + ys = fix_axis_seeds(y_opt, ys) + zs = fix_axis_seeds(z_opt, zs) + + if x_opt.label == 'Steps': + total_steps = sum(xs) * len(ys) * len(zs) + elif y_opt.label == 'Steps': + total_steps = sum(ys) * len(xs) * len(zs) + elif z_opt.label == 'Steps': + total_steps = sum(zs) * len(xs) * len(ys) + else: + total_steps = p.steps * len(xs) * len(ys) * len(zs) + if isinstance(p, processing.StableDiffusionProcessingTxt2Img) and p.enable_hr: + if x_opt.label == "Hires steps": + total_steps += sum(xs) * len(ys) * len(zs) + elif y_opt.label == "Hires steps": + total_steps += sum(ys) * len(xs) * len(zs) + elif z_opt.label == "Hires steps": + total_steps += sum(zs) * len(xs) * len(ys) + elif p.hr_second_pass_steps: + total_steps += p.hr_second_pass_steps * len(xs) * len(ys) * len(zs) + else: + total_steps *= 2 + total_steps *= p.n_iter + image_cell_count = p.n_iter * p.batch_size + shared.log.info(f"XYZ grid: images={len(xs)*len(ys)*len(zs)*image_cell_count} grid={len(zs)} {len(xs)}x{len(ys)} cells={len(zs)} steps={total_steps}") + AxisInfo = namedtuple('AxisInfo', ['axis', 'values']) + shared.state.xyz_plot_x = AxisInfo(x_opt, xs) + shared.state.xyz_plot_y = AxisInfo(y_opt, ys) + shared.state.xyz_plot_z = AxisInfo(z_opt, zs) + # If one of the axes is very slow to change between (like SD model checkpoint), then make sure it is in the outer iteration of the nested `for` loop. + first_axes_processed = 'z' + second_axes_processed = 'y' + if x_opt.cost > y_opt.cost and x_opt.cost > z_opt.cost: + first_axes_processed = 'x' + if y_opt.cost > z_opt.cost: + second_axes_processed = 'y' + else: + second_axes_processed = 'z' + elif y_opt.cost > x_opt.cost and y_opt.cost > z_opt.cost: + first_axes_processed = 'y' + if x_opt.cost > z_opt.cost: + second_axes_processed = 'x' + else: + second_axes_processed = 'z' + elif z_opt.cost > x_opt.cost and z_opt.cost > y_opt.cost: + first_axes_processed = 'z' + if x_opt.cost > y_opt.cost: + second_axes_processed = 'x' + else: + second_axes_processed = 'y' + grid_infotext = [None] * (1 + len(zs)) + + def cell(x, y, z, ix, iy, iz): + if shared.state.interrupted: + return processing.Processed(p, [], p.seed, "") + pc = copy(p) + pc.override_settings_restore_afterwards = False + pc.styles = pc.styles[:] + x_opt.apply(pc, x, xs) + y_opt.apply(pc, y, ys) + z_opt.apply(pc, z, zs) + try: + res = processing.process_images(pc) + except Exception as e: + shared.log.error(f"XYZ grid: Failed to process image: {e}") + errors.display(e, 'XYZ grid') + res = None + subgrid_index = 1 + iz # Sets subgrid infotexts + if grid_infotext[subgrid_index] is None and ix == 0 and iy == 0: + pc.extra_generation_params = copy(pc.extra_generation_params) + pc.extra_generation_params['Script'] = self.title() + if x_opt.label != 'Nothing': + pc.extra_generation_params["X Type"] = x_opt.label + pc.extra_generation_params["X Values"] = x_values + if x_opt.label in ["Seed", "Var. seed"] and not no_fixed_seeds: + pc.extra_generation_params["Fixed X Values"] = ", ".join([str(x) for x in xs]) + if y_opt.label != 'Nothing': + pc.extra_generation_params["Y Type"] = y_opt.label + pc.extra_generation_params["Y Values"] = y_values + if y_opt.label in ["Seed", "Var. seed"] and not no_fixed_seeds: + pc.extra_generation_params["Fixed Y Values"] = ", ".join([str(y) for y in ys]) + grid_infotext[subgrid_index] = processing.create_infotext(pc, pc.all_prompts, pc.all_seeds, pc.all_subseeds) + if grid_infotext[0] is None and ix == 0 and iy == 0 and iz == 0: # Sets main grid infotext + pc.extra_generation_params = copy(pc.extra_generation_params) + if z_opt.label != 'Nothing': + pc.extra_generation_params["Z Type"] = z_opt.label + pc.extra_generation_params["Z Values"] = z_values + if z_opt.label in ["Seed", "Var. seed"] and not no_fixed_seeds: + pc.extra_generation_params["Fixed Z Values"] = ", ".join([str(z) for z in zs]) + grid_infotext[0] = processing.create_infotext(pc, pc.all_prompts, pc.all_seeds, pc.all_subseeds) + return res + + with SharedSettingsStackHelper(): + processed = draw_xyz_grid( + p, + xs=xs, + ys=ys, + zs=zs, + x_labels=[x_opt.format_value(p, x_opt, x) for x in xs], + y_labels=[y_opt.format_value(p, y_opt, y) for y in ys], + z_labels=[z_opt.format_value(p, z_opt, z) for z in zs], + cell=cell, + draw_legend=draw_legend, + include_lone_images=include_lone_images, + include_sub_grids=include_sub_grids, + first_axes_processed=first_axes_processed, + second_axes_processed=second_axes_processed, + margin_size=margin_size, + no_grid=no_grid, + ) + + if not processed.images: + active = False + return processed # It broke, no further handling needed. + z_count = len(zs) + processed.infotexts[:1+z_count] = grid_infotext[:1+z_count] # Set the grid infotexts to the real ones with extra_generation_params (1 main grid + z_count sub-grids) + if not include_lone_images: + if no_grid and include_sub_grids: + processed.images = processed.images[:z_count] # we don't have the main grid image, and need zero additional sub-images + else: + processed.images = processed.images[:z_count+1] # we either have the main grid image, or need one sub-images + if shared.opts.grid_save: # Auto-save main and sub-grids: + grid_count = z_count + ( 1 if not no_grid and z_count > 1 else 0 ) + for g in range(grid_count): + adj_g = g-1 if g > 0 else g + images.save_image(processed.images[g], p.outpath_grids, "xyz_grid", info=processed.infotexts[g], extension=shared.opts.grid_format, prompt=processed.all_prompts[adj_g], seed=processed.all_seeds[adj_g], grid=True, p=processed) + if not include_sub_grids: # Done with sub-grids, drop all related information: + for _sg in range(z_count): + del processed.images[1] + del processed.all_prompts[1] + del processed.all_seeds[1] + del processed.infotexts[1] + elif no_grid: + del processed.infotexts[0] + active = False + cache = processed + return processed + + def process_images(self, p, enabled, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown, csv_mode, draw_legend, no_fixed_seeds, no_grid, include_lone_images, include_sub_grids, margin_size): # pylint: disable=W0221 + global cache # pylint: disable=W0603 + if cache is not None and hasattr(cache, 'images'): + samples = cache.images.copy() + cache = None + return samples + return None diff --git a/webui.py b/webui.py index c9af90e75..6013aafd6 100644 --- a/webui.py +++ b/webui.py @@ -24,6 +24,7 @@ import modules.sd_models import modules.sd_vae import modules.sd_unet +import modules.model_t5 import modules.progress import modules.ui import modules.txt2img @@ -33,7 +34,7 @@ import modules.hypernetworks.hypernetwork import modules.script_callbacks from modules.api.middleware import setup_middleware -from modules.shared import cmd_opts, opts +from modules.shared import cmd_opts, opts # pylint: disable=unused-import sys.excepthook = custom_excepthook @@ -63,6 +64,7 @@ timer.startup.record("ldm") modules.loader.initialized = True + def check_rollback_vae(): if shared.cmd_opts.rollback_vae: if not torch.cuda.is_available(): @@ -76,7 +78,6 @@ def check_rollback_vae(): shared.cmd_opts.rollback_vae = False - def initialize(): log.debug('Initializing') check_rollback_vae() @@ -90,6 +91,9 @@ def initialize(): modules.sd_unet.refresh_unet_list() timer.startup.record("unet") + modules.model_t5.refresh_t5_list() + timer.startup.record("unet") + extensions.list_extensions() timer.startup.record("extensions") @@ -98,10 +102,10 @@ def initialize(): timer.startup.record("models") import modules.postprocess.codeformer_model as codeformer - codeformer.setup_model(opts.codeformer_models_path) + codeformer.setup_model(shared.opts.codeformer_models_path) sys.modules["modules.codeformer_model"] = codeformer import modules.postprocess.gfpgan_model as gfpgan - gfpgan.setup_model(opts.gfpgan_models_path) + gfpgan.setup_model(shared.opts.gfpgan_models_path) timer.startup.record("face-restore") log.debug('Load extensions') @@ -155,7 +159,7 @@ def sigint_handler(_sig, _frame): def load_model(): modules.devices.set_cuda_params() - if not opts.sd_checkpoint_autoload or (shared.cmd_opts.ckpt is not None and shared.cmd_opts.ckpt.lower() != 'none'): + if not shared.opts.sd_checkpoint_autoload or (shared.cmd_opts.ckpt is not None and shared.cmd_opts.ckpt.lower() != 'none'): log.debug('Model auto load disabled') else: shared.state.begin('Load') @@ -213,9 +217,15 @@ def start_common(): log.info(f'Using data path: {shared.cmd_opts.data_dir}') if shared.cmd_opts.models_dir is not None and len(shared.cmd_opts.models_dir) > 0 and shared.cmd_opts.models_dir != 'models': log.info(f'Using models path: {shared.cmd_opts.models_dir}') - create_paths(opts) + create_paths(shared.opts) async_policy() initialize() + try: + from installer import diffusers_commit + if diffusers_commit != 'unknown': + shared.opts.diffusers_version = diffusers_commit # update installed diffusers version + except Exception: + pass if shared.opts.clean_temp_dir_at_start: gr_tempdir.cleanup_tmpdr() timer.startup.record("cleanup") diff --git a/wiki b/wiki index cf1e6bbb5..87bd5adaa 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit cf1e6bbb5b476007081b6a29f9d65f185b49b629 +Subproject commit 87bd5adaae34ebe5c880f64e56322ff47d0a315a