diff --git a/.gitignore b/.gitignore
index 3e5d7807e..2bc819860 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ venv
 .history
 cache
 **/.DS_Store
+tunableop_results*.csv
 
 # all models and temp files
 *.log
diff --git a/.pylintrc b/.pylintrc
index ceb7c1e94..00a59df29 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -23,7 +23,9 @@ ignore-paths=/usr/lib/.*$,
              modules/todo,
              modules/unipc,
              modules/xadapter,
+             modules/dcsolver,
              repositories,
+             modules/prompt_parser_xhinker.py,
              extensions-builtin/sd-webui-agent-scheduler,
              extensions-builtin/sd-extension-chainner/nodes,
              extensions-builtin/sdnext-modernui/node_modules,
@@ -135,6 +137,7 @@ disable=bad-inline-option,
         consider-using-get,
         consider-using-in,
         consider-using-min-builtin,
+        consider-using-max-builtin,
         consider-using-sys-exit,
         dangerous-default-value,
         deprecated-pragma,
diff --git a/.ruff.toml b/.ruff.toml
index ea91ba5a5..52499859d 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -13,11 +13,14 @@ exclude = [
     "modules/todo",
     "modules/unipc",
     "modules/xadapter",
+    "modules/dcsolver",
     "modules/intel/openvino",
     "modules/intel/ipex",
     "modules/segmoe",
     "modules/control/proc",
     "modules/control/units",
+    "modules/prompt_parser_xhinker.py",
+    "modules/postprocess/aurasr_arch.py",
     "repositories",
     "extensions-builtin/sd-extension-chainner/nodes",
     "extensions-builtin/sd-webui-agent-scheduler",
@@ -80,4 +83,4 @@ line-ending = "auto"
 docstring-code-format = false
 
 [lint.mccabe]
-max-complexity = 99
+max-complexity = 150
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 428fc2335..690477350 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -8,5 +8,6 @@
     "./repositories/taming"
   ],
   "python.analysis.typeCheckingMode": "off",
-  "editor.formatOnSave": false
+  "editor.formatOnSave": false,
+  "python.REPL.enableREPLSmartSend": false
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b8444c168..188f7d2ef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,150 @@
 # Change Log for SD.Next
 
+## Update for 2024-09-13
+
+### Highlights for 2024-09-13
+
+Major refactor of [FLUX.1](https://blackforestlabs.ai/announcing-black-forest-labs/) support:  
+- Full **ControlNet** support, better **LoRA** support, full **prompt attention** implementation  
+- Faster execution, more flexible loading, additional quantization options, and more...  
+- Added **image-to-image**, **inpaint**, **outpaint**, **hires** modes  
+- Added workflow where FLUX can be used as **refiner** for other models  
+- Since both *Optimum-Quanto* and *BitsAndBytes* libraries are limited in their platform support matrix,  
+  try enabling **NNCF** for quantization/compression on-the-fly!  
+
+Few image related goodies...  
+- **Context-aware** resize that allows for *img2img/inpaint* even at massively different aspect ratios without distortions!
+- **LUT Color grading** apply professional color grading to your images using industry-standard *.cube* LUTs!
+- Auto **HDR** image create for SD and SDXL with both 16ch true-HDR and 8-ch HDR-effect images ;)  
+
+And few video related goodies...  
+- [CogVideoX](https://huggingface.co/THUDM/CogVideoX-5b) **2b** and **5b** variants  
+  with support for *text-to-video* and *video-to-video*!  
+- [AnimateDiff](https://github.com/guoyww/animatediff/) **prompt travel** and **long context windows**!  
+  create video which travels between different prompts and at long video lengths!  
+
+Plus tons of other items and fixes - see [changelog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) for details!  
+Examples:
+- Built-in prompt-enhancer, TAESD optimizations, new DC-Solver scheduler, global XYZ grid management, etc.  
+- Updates to ZLUDA, IPEX, OpenVINO...
+
+### Details for 2024-09-13
+
+**Major refactor of FLUX.1 support:**
+- allow configuration of individual FLUX.1 model components: *transformer, text-encoder, vae*  
+  model load will load selected components first and then initialize model using pre-loaded components  
+  components that were not pre-loaded will be downloaded and initialized as needed  
+  as usual, components can also be loaded after initial model load  
+  *note*: use of transformer/unet is recommended as those are flux.1 finetunes  
+  *note*: manually selecting vae and text-encoder is not recommended  
+  *note*: mix-and-match of different quantizations for different components can lead to unexpected errors  
+  - transformer/unet is list of manually downloaded safetensors  
+  - vae is list of manually downloaded safetensors  
+  - text-encoder is list of predefined and manually downloaded text-encoders  
+- **controlnet** support:
+  support for **InstantX/Shakker-Labs** models including [Union-Pro](InstantX/FLUX.1-dev-Controlnet-Union)  
+  note that flux controlnet models are large, up to 6.6GB on top of already large base model!  
+  as such, you may need to use offloading:sequential which is not as fast, but uses far less memory  
+  when using union model, you must also select control mode in the control unit  
+  flux does not yet support *img2img* so to use controlnet, you need to set contronet input via control unit override  
+- model support loading **all-in-one** safetensors  
+  not recommended due to massive duplication of components, but added due to popular demand  
+  each such model is 20-32GB in size vs ~11GB for typical unet fine-tune  
+- improve logging, warn when attempting to load unet as base model  
+- **refiner** support  
+  FLUX.1 can be used as refiner for other models such as sd/sdxl  
+  simply load sd/sdxl model as base and flux model as refiner and use as usual refiner workflow  
+- **img2img**, **inpaint** and **outpaint** support  
+  *note* flux may require higher denoising strength than typical sd/sdxl models  
+  *note*: img2img is not yet supported with controlnet  
+- transformer/unet support *fp8/fp4* quantization  
+  this brings supported quants to: *nf4/fp8/fp4/qint8/qint4*
+- vae support *fp16*  
+- **lora** support additional training tools  
+- **face-hires** support  
+- support **fuse-qkv** projections  
+  can speed up generate  
+  enable via *settings -> compute -> fused projections*  
+
+**Other improvements & Fixes:**
+- [CogVideoX](https://huggingface.co/THUDM/CogVideoX-5b)  
+  - support for both **2B** and **5B** variations  
+  - support for both **text2video** and **video2video** modes
+  - simply select in *scripts -> cogvideox*  
+  - as with any video modules, includes additional frame interpolation using RIFE  
+  - if init video is used, it will be automatically resized and interpolated to desired number of frames  
+- **AnimateDiff**:  
+  - **prompt travel**  
+     create video which travels between different prompts at different steps!  
+     example prompt:
+      > 0: dog  
+      > 5: cat  
+      > 10: bird  
+  - support for **v3** model (finally)  
+  - support for **LCM** model  
+  - support for **free-noise** rolling context window  
+    allow for creation of much longer videos, automatically enabled if frames > 16  
+- **Context-aware** image resize, thanks @AI-Casanova!  
+  based on [seam-carving](https://github.com/li-plus/seam-carving)  
+  allows for *img2img/inpaint* even at massively different aspect ratios without distortions!  
+  simply select as resize method when using *img2img* or *control* tabs  
+- **HDR** high-dynamic-range image create for SD and SDXL  
+  create hdr images from in multiple exposures by latent-space modifications during generation  
+  use via *scripts -> hdr*  
+  option *save hdr images* creates images in standard 8bit/channel (hdr-effect) *and* 16bit/channel (full-hdr) PNG format  
+  ui result is always 8bit/channel hdr-effect image plus grid of original images used to create hdr  
+  grid image can be disabled via settings -> user interface -> show grid  
+  actual full-hdr image is not displayed in ui, only optionally saved to disk  
+- new scheduler: [DC Solver](https://github.com/wl-zhao/DC-Solver)  
+- **color grading** apply professional color grading to your images  
+  using industry-standard *.cube* LUTs!
+  enable via *scripts -> color-grading*  
+- **hires** workflow now allows for full resize options  
+  not just limited width/height/scale  
+- **xyz grid** is now availabe as both local and global script!
+- **prompt enhance**: improve quality and/or verbosity of your prompts  
+  simply select in *scripts -> prompt enhance*
+  uses [gokaygokay/Flux-Prompt-Enhance](https://huggingface.co/gokaygokay/Flux-Prompt-Enhance) model  
+- **taesd** configurable number of layers  
+  can be used to speed-up taesd decoding by reducing number of ops  
+  e.g. if generating 1024px image, reducing layers by 1 will result in preview being 512px  
+  set via *settings -> live preview -> taesd decode layers*  
+- **xhinker** prompt parser handle offloaded models  
+- **control** better handle offloading  
+- **upscale** will use resize-to if set to non-zero values over resize-by  
+  applies to any upscale options, including refine workflow  
+- **networks** add option to choose if mouse-over on network should attempt to fetch additional info  
+  option:`extra_networks_fetch` enable/disable in *settings -> networks*  
+- speed up some garbage collection ops  
+- sampler settings add **dynamic shift**  
+  used by flow-matching samplers to adjust between structure and details  
+- sampler settings force base shift  
+  improves quality of the flow-matching samplers  
+- **t5** support manually downloaded models  
+  applies to all models that use t5 transformer  
+- **modern-ui** add override field  
+- full **lint** updates  
+- use `diffusers` from main branch, no longer tied to release  
+- improve diffusers/transformers/huggingface_hub progress reporting  
+- use unique identifiers for all ui components  
+- **visual query** (a.ka vqa or vlm) added support for several models
+  - [MiaoshouAI PromptGen 1.5 Base](https://huggingface.co/MiaoshouAI/Florence-2-base-PromptGen-v1.5)
+  - [MiaoshouAI PromptGen 1.5 Large](https://huggingface.co/MiaoshouAI/Florence-2-large-PromptGen-v1.5)
+  - [CogFlorence 2.2 Large](https://huggingface.co/thwri/CogFlorence-2.2-Large)
+- **modernui** update  
+- **zluda** update to 3.8.4, thanks @lshqqytiger!
+- **ipex** update to 2.3.110+xpu on linux, thanks @Disty0!
+- **openvino** update to 2024.3.0, thanks @Disty0!
+- update `requirements`
+- fix **AuraFlow**  
+- fix handling of model configs if offline config is not available  
+- fix vae decode in backend original  
+- fix model path typos  
+- fix guidance end handler  
+- fix script sorting  
+- fix vae dtype during load  
+- fix all ui labels are unique
+
 ## Update for 2024-08-31
 
 ### Highlights for 2024-08-31
diff --git a/TODO.md b/TODO.md
index 61dc038d5..5726e67da 100644
--- a/TODO.md
+++ b/TODO.md
@@ -4,13 +4,9 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 ## Future Candidates
 
-- cogvideo-x: <https://huggingface.co/THUDM/CogVideoX-5b>
-- animatediff prompt-travel: <https://github.com/huggingface/diffusers/pull/9231>
 - async lowvram: <https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14855>
 - fp8: <https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14031>
 - ipadapter-negative: https://github.com/huggingface/diffusers/discussions/7167
-- hd-painter: https://github.com/huggingface/diffusers/blob/main/examples/community/README.md#hd-painter
-- init latents: variations, img2img
 - include reference styles
 
 ### Missing
diff --git a/cli/image-grid.py b/cli/image-grid.py
index 8a48aecd9..f6f8ed755 100755
--- a/cli/image-grid.py
+++ b/cli/image-grid.py
@@ -52,7 +52,8 @@ def grid(images, labels = None, width = 0, height = 0, border = 0, square = Fals
         h = round(height / rows)
     size = tuple(size)
     image = Image.new('RGB', size = size, color = 'black') # pylint: disable=redefined-outer-name
-    font = ImageFont.truetype('DejaVuSansMono', round(w / 40))
+    font_size = round(w / 40) if params.font == 0 else params.font
+    font = ImageFont.truetype('DejaVuSansMono', font_size)
     for i, img in enumerate(images): # pylint: disable=redefined-outer-name
         x = (i % cols * w) + (i % cols * border)
         y = (i // cols * h) + (i // cols * border)
@@ -76,6 +77,7 @@ def grid(images, labels = None, width = 0, height = 0, border = 0, square = Fals
     parser.add_argument("--width", type = int, default = 0, required = False, help = "fixed grid width")
     parser.add_argument("--height", type = int, default = 0, required = False, help = "fixed grid height")
     parser.add_argument("--border", type = int, default = 0, required = False, help = "image border")
+    parser.add_argument("--font", type = int, default = 0, required = False, help = "font text size")
     parser.add_argument('--nolabels', default = False, action='store_true', help = "do not print image labels")
     parser.add_argument('--debug', default = False, action='store_true', help = "print extra debug information")
     parser.add_argument('output', type = str)
diff --git a/extensions-builtin/Lora/lora_patches.py b/extensions-builtin/Lora/lora_patches.py
index 7b0916e3a..532782c80 100644
--- a/extensions-builtin/Lora/lora_patches.py
+++ b/extensions-builtin/Lora/lora_patches.py
@@ -22,8 +22,8 @@ def apply(self):
             return
         if "Model" in shared.opts.optimum_quanto_weights or "Text Encoder" in shared.opts.optimum_quanto_weights:
             from optimum import quanto
-            self.QLinear_forward = patches.patch(__name__, quanto.nn.QLinear, 'forward', networks.network_QLinear_forward)
-            self.QConv2d_forward = patches.patch(__name__, quanto.nn.QConv2d, 'forward', networks.network_QConv2d_forward)
+            self.QLinear_forward = patches.patch(__name__, quanto.nn.QLinear, 'forward', networks.network_QLinear_forward) # pylint: disable=attribute-defined-outside-init
+            self.QConv2d_forward = patches.patch(__name__, quanto.nn.QConv2d, 'forward', networks.network_QConv2d_forward) # pylint: disable=attribute-defined-outside-init
         self.Linear_forward = patches.patch(__name__, torch.nn.Linear, 'forward', networks.network_Linear_forward)
         self.Linear_load_state_dict = patches.patch(__name__, torch.nn.Linear, '_load_from_state_dict', networks.network_Linear_load_state_dict)
         self.Conv2d_forward = patches.patch(__name__, torch.nn.Conv2d, 'forward', networks.network_Conv2d_forward)
@@ -44,8 +44,8 @@ def undo(self):
             return
         if "Model" in shared.opts.optimum_quanto_weights or "Text Encoder" in shared.opts.optimum_quanto_weights:
             from optimum import quanto
-            self.QLinear_forward = patches.undo(__name__, quanto.nn.QLinear, 'forward') # pylint: disable=E1128
-            self.QConv2d_forward = patches.undo(__name__, quanto.nn.QConv2d, 'forward') # pylint: disable=E1128
+            self.QLinear_forward = patches.undo(__name__, quanto.nn.QLinear, 'forward') # pylint: disable=E1128, attribute-defined-outside-init
+            self.QConv2d_forward = patches.undo(__name__, quanto.nn.QConv2d, 'forward') # pylint: disable=E1128, attribute-defined-outside-init
         self.Linear_forward = patches.undo(__name__, torch.nn.Linear, 'forward') # pylint: disable=E1128
         self.Linear_load_state_dict = patches.undo(__name__, torch.nn.Linear, '_load_from_state_dict') # pylint: disable=E1128
         self.Conv2d_forward = patches.undo(__name__, torch.nn.Conv2d, 'forward') # pylint: disable=E1128
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 847d46632..1bc7a74f7 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -97,10 +97,17 @@ def load_diffusers(name, network_on_disk, lora_scale=1.0) -> network.Network:
     try:
         shared.sd_model.load_lora_weights(network_on_disk.filename, adapter_name=name)
     except Exception as e:
-        errors.display(e, "LoRA")
-        return None
-    diffuser_loaded.append(name)
-    diffuser_scales.append(lora_scale)
+        if 'already in use' in str(e):
+            # shared.log.warning(f"LoRA load failed: file={network_on_disk.filename} {e}")
+            pass
+        else:
+            shared.log.error(f"LoRA load failed: file={network_on_disk.filename} {e}")
+            if debug:
+                errors.display(e, "LoRA")
+            return None
+    if name not in diffuser_loaded:
+        diffuser_loaded.append(name)
+        diffuser_scales.append(lora_scale)
     net = network.Network(name, network_on_disk)
     net.mtime = os.path.getmtime(network_on_disk.filename)
     # lora_cache[name] = net
@@ -199,7 +206,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
     if recompile_model:
         backup_cuda_compile = shared.opts.cuda_compile
         sd_models.unload_model_weights(op='model')
-        shared.opts.cuda_compile = False
+        shared.opts.cuda_compile = []
         sd_models.reload_model_weights(op='model')
         shared.opts.cuda_compile = backup_cuda_compile
 
@@ -254,7 +261,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
     if recompile_model:
         shared.log.info("LoRA recompiling model")
         backup_lora_model = shared.compiled_model_state.lora_model
-        if shared.opts.cuda_compile:
+        if 'Model' in shared.opts.cuda_compile:
             shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
 
         shared.compiled_model_state.lora_model = backup_lora_model
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index c84d677e0..2c95d480d 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit c84d677e0c2df4aabe556dc3b40d5fed024e4cc1
+Subproject commit 2c95d480d63d46232122ddbd4161b73cba8c258a
diff --git a/installer.py b/installer.py
index cff0fd110..f932f04a8 100644
--- a/installer.py
+++ b/installer.py
@@ -25,6 +25,7 @@ class Dot(dict): # dot notation access to dictionary attributes
 version = None
 current_branch = None
 log = logging.getLogger("sd")
+console = None
 debug = log.debug if os.environ.get('SD_INSTALL_DEBUG', None) is not None else lambda *args, **kwargs: None
 pip_log = '--log pip.log ' if os.environ.get('SD_PIP_DEBUG', None) is not None else ''
 log_file = os.path.join(os.path.dirname(__file__), 'sdnext.log')
@@ -55,6 +56,7 @@ class Dot(dict): # dot notation access to dictionary attributes
     'uv': False,
 })
 git_commit = "unknown"
+diffusers_commit = "unknown"
 submodules_commit = {
     'sd-webui-controlnet': 'ecd33eb',
     # 'stable-diffusion-webui-images-browser': '27fe4a7',
@@ -109,6 +111,7 @@ def get(self):
 
     level = logging.DEBUG if args.debug else logging.INFO
     log.setLevel(logging.DEBUG) # log to file is always at level debug for facility `sd`
+    global console # pylint: disable=global-statement
     console = Console(log_time=True, log_time_format='%H:%M:%S-%f', theme=Theme({
         "traceback.border": "black",
         "traceback.border.syntax_error": "black",
@@ -435,19 +438,24 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 
 # check diffusers version
 def check_diffusers():
-    pass # noop for now, can be used to force specific version based on conditions
+    sha = '5e1427a7da6e878b958fd5a2422c7763a94ff02b'
+    pkg = pkg_resources.working_set.by_key.get('diffusers', None)
+    minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
+    cur = opts.get('diffusers_version', '') if minor > 0 else ''
+    if (minor == 0) or (cur != sha):
+        log.debug(f'Diffusers {"install" if minor == 0 else "upgrade"}: current={pkg}@{cur} target={sha}')
+        if minor > 0:
+            pip('uninstall --yes diffusers', ignore=True, quiet=True, uv=False)
+        pip(f'install --upgrade git+https://github.com/huggingface/diffusers@{sha}', ignore=False, quiet=True, uv=False)
+        global diffusers_commit # pylint: disable=global-statement
+        diffusers_commit = sha
 
 
 # check onnx version
 def check_onnx():
     if not installed('onnx', quiet=True):
         install('onnx', 'onnx', ignore=True)
-    if not installed('onnxruntime', quiet=True) and not (
-        installed('onnxruntime-gpu', quiet=True) or
-        installed('onnxruntime-openvino', quiet=True) or
-        installed('onnxruntime-training', quiet=True)
-        ): # allow either
-
+    if not installed('onnxruntime', quiet=True) and not (installed('onnxruntime-gpu', quiet=True) or installed('onnxruntime-openvino', quiet=True) or installed('onnxruntime-training', quiet=True)): # allow either
         install('onnxruntime', 'onnxruntime', ignore=True)
 
 
@@ -492,7 +500,7 @@ def install_rocm_zluda():
             break
 
     log.info(f'ROCm version detected: {rocm.version}')
-
+    torch_command = ''
     if sys.platform == "win32":
         #if args.use_zluda:
         log.warning("ZLUDA support: experimental")
@@ -510,6 +518,7 @@ def install_rocm_zluda():
         if error is None:
             if args.device_id is not None:
                 os.environ['HIP_VISIBLE_DEVICES'] = args.device_id
+                del args.device_id
             try:
                 zluda_installer.load(zluda_path)
                 torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.0 torchvision --index-url https://download.pytorch.org/whl/cu118')
@@ -521,7 +530,7 @@ def install_rocm_zluda():
             log.info('Using CPU-only torch')
             torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
         #else:
-        # TODO TBD after ROCm for Windows is released
+        # TODO after ROCm for Windows is released
     else:
         if rocm.version is None or float(rocm.version) > 6.1: # assume the latest if version check fails
             torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/rocm6.1')
@@ -532,9 +541,13 @@ def install_rocm_zluda():
         else:
             torch_command = os.environ.get('TORCH_COMMAND', f'torch torchvision --index-url https://download.pytorch.org/whl/rocm{rocm.version}')
 
-        ort_version = os.environ.get('ONNXRUNTIME_VERSION', None)
-        ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/{rocm.version[0]}{rocm.version[2]} --extra-index-url https://pypi.org/simple")
-        install(ort_package, 'onnxruntime-training')
+        if sys.version_info < (3, 11):
+            ort_version = os.environ.get('ONNXRUNTIME_VERSION', None)
+            if rocm.version is None or float(rocm.version) > 6.0:
+                ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/60 --extra-index-url https://pypi.org/simple")
+            else:
+                ort_package = os.environ.get('ONNXRUNTIME_PACKAGE', f"--pre onnxruntime-training{'' if ort_version is None else ('==' + ort_version)} --index-url https://pypi.lsh.sh/{rocm.version[0]}{rocm.version[2]} --extra-index-url https://pypi.org/simple")
+            install(ort_package, 'onnxruntime-training')
 
         if hip_default_device is not None and rocm.version != "6.2" and rocm.version == rocm.version_torch and rocm.get_blaslt_enabled():
             log.debug(f'hipBLASLt arch={hip_default_device.name} available={hip_default_device.blaslt_supported}')
@@ -551,13 +564,8 @@ def install_ipex(torch_command):
     if os.environ.get("ClDeviceGlobalMemSizeAvailablePercent", None) is None:
         os.environ.setdefault('ClDeviceGlobalMemSizeAvailablePercent', '100')
     if "linux" in sys.platform:
-        torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.1.0.post3 torchvision==0.16.0.post3 intel-extension-for-pytorch==2.1.40+xpu oneccl_bind_pt==2.1.400+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/')
-        # os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.0 intel-extension-for-tensorflow[xpu]==2.15.0.0')
-        if os.environ.get('DISABLE_VENV_LIBS', None) is None:
-            install(os.environ.get('MKL_PACKAGE', 'mkl==2024.2.0'), 'mkl')
-            install(os.environ.get('DPCPP_PACKAGE', 'mkl-dpcpp==2024.2.0'), 'mkl-dpcpp')
-            install(os.environ.get('ONECCL_PACKAGE', 'oneccl-devel==2021.13.0'), 'oneccl-devel')
-            install(os.environ.get('MPI_PACKAGE', 'impi-devel==2021.13.0'), 'impi-devel')
+        torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/')
+        # os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.1 intel-extension-for-tensorflow[xpu]==2.15.0.1')
     else:
         if sys.version_info.minor == 11:
             pytorch_pip = 'https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.1.10%2Bxpu/torch-2.1.0a0+cxx11.abi-cp311-cp311-win_amd64.whl'
@@ -570,26 +578,26 @@ def install_ipex(torch_command):
             ipex_pip = 'https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.1.10%2Bxpu/intel_extension_for_pytorch-2.1.10+xpu-cp310-cp310-win_amd64.whl'
             torch_command = os.environ.get('TORCH_COMMAND', f'{pytorch_pip} {torchvision_pip} {ipex_pip}')
         else:
-            torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.1.0.post0 torchvision==0.16.0.post0 intel-extension-for-pytorch==2.1.20+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/')
+            torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.1.0.post3 torchvision==0.16.0.post3 intel-extension-for-pytorch==2.1.40+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/')
             if os.environ.get('DISABLE_VENV_LIBS', None) is None:
-                install(os.environ.get('MKL_PACKAGE', 'mkl==2024.1.0'), 'mkl')
-                install(os.environ.get('DPCPP_PACKAGE', 'mkl-dpcpp==2024.1.0'), 'mkl-dpcpp')
-                install(os.environ.get('ONECCL_PACKAGE', 'oneccl-devel==2021.12.0'), 'oneccl-devel')
-                install(os.environ.get('MPI_PACKAGE', 'impi-devel==2021.12.0'), 'impi-devel')
+                install(os.environ.get('MKL_PACKAGE', 'mkl==2024.2.0'), 'mkl')
+                install(os.environ.get('DPCPP_PACKAGE', 'mkl-dpcpp==2024.2.0'), 'mkl-dpcpp')
+                install(os.environ.get('ONECCL_PACKAGE', 'oneccl-devel==2021.13.0'), 'oneccl-devel')
+                install(os.environ.get('MPI_PACKAGE', 'impi-devel==2021.13.0'), 'impi-devel')
         torch_command = os.environ.get('TORCH_COMMAND', f'{pytorch_pip} {torchvision_pip} {ipex_pip}')
-    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2023.3.0'), 'openvino', ignore=True)
+    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino', ignore=True)
     install('nncf==2.7.0', 'nncf', ignore=True)
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
     return torch_command
 
 
 def install_openvino(torch_command):
-    check_python(supported_minors=[9, 10, 11], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
+    check_python(supported_minors=[8, 9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
     log.info('Using OpenVINO')
-    torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.2.0 torchvision==0.17.0 --index-url https://download.pytorch.org/whl/cpu')
-    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2023.3.0'), 'openvino')
+    torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cpu')
+    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino')
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
-    install('nncf==2.8.1', 'nncf')
+    install('nncf==2.12.0', 'nncf')
     os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX')
     if os.environ.get("NEOReadDebugKeys", None) is None:
         os.environ.setdefault('NEOReadDebugKeys', '1')
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index e0294b4f4..ab818a8d1 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -421,21 +421,23 @@ function setupExtraNetworksForTab(tabname) {
   // card hover
   let hoverTimer = null;
   let previousCard = null;
-  gradioApp().getElementById(`${tabname}_extra_tabs`).onmouseover = (e) => {
-    const el = e.target.closest('.card'); // bubble-up to card
-    if (!el || (el.title === previousCard)) return;
-    if (!hoverTimer) {
-      hoverTimer = setTimeout(() => {
-        readCardDescription(el.dataset.page, el.dataset.name);
-        readCardTags(el, el.dataset.tags);
-        previousCard = el.title;
-      }, 300);
-    }
-    el.onmouseout = () => {
-      clearTimeout(hoverTimer);
-      hoverTimer = null;
+  if (window.opts.extra_networks_fetch) {
+    gradioApp().getElementById(`${tabname}_extra_tabs`).onmouseover = async (e) => {
+      const el = e.target.closest('.card'); // bubble-up to card
+      if (!el || (el.title === previousCard)) return;
+      if (!hoverTimer) {
+        hoverTimer = setTimeout(() => {
+          readCardDescription(el.dataset.page, el.dataset.name);
+          readCardTags(el, el.dataset.tags);
+          previousCard = el.title;
+        }, 300);
+      }
+      el.onmouseout = () => {
+        clearTimeout(hoverTimer);
+        hoverTimer = null;
+      };
     };
-  };
+  }
 
   // en style
   if (!en) return;
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 6f6cfc200..d2b59771b 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -285,11 +285,12 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt
 #control_script_container { display: block; margin-top: 1em; border-width: 2px 0 0 0; border-style: solid; border-color: var(--highlight-color); }
 .control-button { min-height: 42px; max-height: 42px; line-height: 1em; }
 .control-tabs > .tab-nav { margin-bottom: 0; margin-top: 0; }
-.control-unit { max-width: 1200px; padding: 0 !important; margin-top: -10px !important; }
+.control-unit { padding: 0 !important; margin-top: -10px !important; }
 .control-unit > .label-wrap { margin-bottom: 0 !important; }
 .control-settings { border-width: var(--block-border-width) !important; border-top: var(--button-primary-border-color) !important; border-style: solid !important; margin-top: 1em !important; }
 .processor-settings { padding: 0 !important; max-width: 300px; }
 .processor-group > div { flex-flow: wrap;gap: 1em; }
+.control-unit .gradio-button.tool { align-self: baseline; margin-top: 2rem; }
 
 /* main info */
 .main-info { font-weight: var(--section-header-text-weight); color: var(--body-text-color-subdued); padding: 1em !important; margin-top: 2em !important; line-height: var(--line-lg) !important; }
diff --git a/javascript/startup.js b/javascript/startup.js
index f1a44faf5..245a3eae8 100644
--- a/javascript/startup.js
+++ b/javascript/startup.js
@@ -17,7 +17,6 @@ async function initStartup() {
   initImageViewer();
   initGallery();
   setupControlUI();
-  setupExtraNetworks();
 
   // reconnect server session
   await reconnectUI();
@@ -25,6 +24,7 @@ async function initStartup() {
   // make sure all of the ui is ready and options are loaded
   while (Object.keys(window.opts).length === 0) await sleep(50);
   executeCallbacks(uiReadyCallbacks);
+  setupExtraNetworks();
 
   // optinally wait for modern ui
   if (window.waitForUiReady) await waitForUiReady();
diff --git a/modules/api/control.py b/modules/api/control.py
index 9c93b6bc6..cf8916095 100644
--- a/modules/api/control.py
+++ b/modules/api/control.py
@@ -103,6 +103,7 @@ def prepare_ip_adapter(self, request):
                 args['ip_adapter_scales'].append(ipadapter.scale)
                 args['ip_adapter_starts'].append(ipadapter.start)
                 args['ip_adapter_ends'].append(ipadapter.end)
+                args['ip_adapter_crops'].append(ipadapter.end)
                 args['ip_adapter_images'].append([helpers.decode_base64_to_image(x) for x in ipadapter.images])
                 if ipadapter.masks:
                     args['ip_adapter_masks'].append([helpers.decode_base64_to_image(x) for x in ipadapter.masks])
diff --git a/modules/api/models.py b/modules/api/models.py
index 4aba4fb01..3c5fd6146 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -149,12 +149,13 @@ class ItemEmbedding(BaseModel):
     vectors: int = Field(title="Vectors", description="The number of vectors in the embedding")
 
 class ItemIPAdapter(BaseModel):
-    adapter: str = Field(title="Adapter", default="Base", description="")
-    images: List[str] = Field(title="Image", default=[], description="")
-    masks: Optional[List[str]] = Field(title="Mask", default=[], description="")
-    scale: float = Field(title="Scale", default=0.5, ge=0, le=1, description="")
-    start: float = Field(title="Start", default=0.0, ge=0, le=1, description="")
-    end: float = Field(title="End", default=1.0, gt=0, le=1, description="")
+    adapter: str = Field(title="Adapter", default="Base", description="IP adapter name")
+    images: List[str] = Field(title="Image", default=[], description="IP adapter input images")
+    masks: Optional[List[str]] = Field(title="Mask", default=[], description="IP adapter mask images")
+    scale: float = Field(title="Scale", default=0.5, ge=0, le=1, description="IP adapter scale")
+    start: float = Field(title="Start", default=0.0, ge=0, le=1, description="IP adapter start step")
+    end: float = Field(title="End", default=1.0, gt=0, le=1, description="IP adapter end step")
+    crop: bool = Field(title="Crop", default=False, description="IP adapter crop face from input")
 
 class ItemFace(BaseModel):
     mode: str = Field(title="Mode", default="FaceID", description="The mode to use (available values: FaceID, FaceSwap, PhotoMaker, InstantID).")
diff --git a/modules/control/run.py b/modules/control/run.py
index 41204fa49..5b8a4bf36 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -62,11 +62,11 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini
                 full_quality: bool = True, restore_faces: bool = False, tiling: bool = False, hidiffusion: bool = False,
                 hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95,
                 hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0,
-                resize_mode_before: int = 0, resize_name_before: str = 'None', width_before: int = 512, height_before: int = 512, scale_by_before: float = 1.0, selected_scale_tab_before: int = 0,
-                resize_mode_after: int = 0, resize_name_after: str = 'None', width_after: int = 0, height_after: int = 0, scale_by_after: float = 1.0, selected_scale_tab_after: int = 0,
-                resize_mode_mask: int = 0, resize_name_mask: str = 'None', width_mask: int = 0, height_mask: int = 0, scale_by_mask: float = 1.0, selected_scale_tab_mask: int = 0,
+                resize_mode_before: int = 0, resize_name_before: str = 'None', resize_context_before: str = 'None', width_before: int = 512, height_before: int = 512, scale_by_before: float = 1.0, selected_scale_tab_before: int = 0,
+                resize_mode_after: int = 0, resize_name_after: str = 'None', resize_context_after: str = 'None', width_after: int = 0, height_after: int = 0, scale_by_after: float = 1.0, selected_scale_tab_after: int = 0,
+                resize_mode_mask: int = 0, resize_name_mask: str = 'None', resize_context_mask: str = 'None', width_mask: int = 0, height_mask: int = 0, scale_by_mask: float = 1.0, selected_scale_tab_mask: int = 0,
                 denoising_strength: float = 0, batch_count: int = 1, batch_size: int = 1,
-                enable_hr: bool = False, hr_sampler_index: int = None, hr_denoising_strength: float = 0.3, hr_upscaler: str = None, hr_force: bool = False, hr_second_pass_steps: int = 20,
+                enable_hr: bool = False, hr_sampler_index: int = None, hr_denoising_strength: float = 0.3, hr_resize_mode: int = 0, hr_resize_context: str = 'None', hr_upscaler: str = None, hr_force: bool = False, hr_second_pass_steps: int = 20,
                 hr_scale: float = 1.0, hr_resize_x: int = 0, hr_resize_y: int = 0, refiner_steps: int = 5, refiner_start: float = 0.0, refiner_prompt: str = '', refiner_negative: str = '',
                 video_skip_frames: int = 0, video_type: str = 'None', video_duration: float = 2.0, video_loop: bool = False, video_pad: int = 0, video_interpolate: int = 0,
                 *input_script_args
@@ -180,6 +180,8 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini
     p.enable_hr = enable_hr
     p.hr_sampler_name = processing.get_sampler_name(hr_sampler_index)
     p.hr_denoising_strength = hr_denoising_strength
+    p.hr_resize_mode = hr_resize_mode
+    p.hr_resize_context = hr_resize_context
     p.hr_upscaler = hr_upscaler
     p.hr_force = hr_force
     p.hr_second_pass_steps = hr_second_pass_steps
@@ -217,10 +219,9 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini
                 debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.cpu}')
                 sd_models.move_model(u.controlnet.model, devices.cpu)
             continue
-        else:
-            if u.controlnet is not None and u.controlnet.model is not None:
-                debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}')
-                sd_models.move_model(u.controlnet.model, devices.device)
+        if u.controlnet is not None and u.controlnet.model is not None:
+            debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}')
+            sd_models.move_model(u.controlnet.model, devices.device)
         if unit_type == 't2i adapter' and u.adapter.model is not None:
             active_process.append(u.process)
             active_model.append(u.adapter)
@@ -234,7 +235,8 @@ def control_run(units: List[unit.Unit] = [], inputs: List[Image.Image] = [], ini
             active_start.append(float(u.start))
             active_end.append(float(u.end))
             p.guess_mode = u.guess
-            shared.log.debug(f'Control ControlNet unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
+            p.control_mode = u.mode
+            shared.log.debug(f'Control ControlNet unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
         elif unit_type == 'xs' and u.controlnet.model is not None:
             active_process.append(u.process)
             active_model.append(u.controlnet)
@@ -388,7 +390,7 @@ def set_pipe():
                     video = cv2.VideoCapture(inputs)
                     if not video.isOpened():
                         if is_generator:
-                            yield terminate(f'Control: video open failed: path={inputs}')
+                            yield terminate(f'Video open failed: path={inputs}')
                         return [], '', '', 'Error: video open failed'
                     frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
                     fps = int(video.get(cv2.CAP_PROP_FPS))
@@ -401,7 +403,7 @@ def set_pipe():
                     shared.log.debug(f'Control: input video: path={inputs} frames={frames} fps={fps} size={w}x{h} codec={codec}')
                 except Exception as e:
                     if is_generator:
-                        yield terminate(f'Control: video open failed: path={inputs} {e}')
+                        yield terminate(f'Video open failed: path={inputs} {e}')
                     return [], '', '', 'Error: video open failed'
 
             while status:
@@ -419,7 +421,7 @@ def set_pipe():
                     if shared.state.interrupted:
                         shared.state.interrupted = False
                         if is_generator:
-                            yield terminate('Control interrupted')
+                            yield terminate('Interrupted')
                         return [], '', '', 'Interrupted'
                     # get input
                     if isinstance(input_image, str):
@@ -456,8 +458,8 @@ def set_pipe():
                             width_before, height_before = int(input_image.width * scale_by_before), int(input_image.height * scale_by_before)
                         if input_image is not None:
                             p.extra_generation_params["Control resize"] = f'{resize_name_before}'
-                            debug(f'Control resize: op=before image={input_image} width={width_before} height={height_before} mode={resize_mode_before} name={resize_name_before}')
-                            input_image = images.resize_image(resize_mode_before, input_image, width_before, height_before, resize_name_before)
+                            debug(f'Control resize: op=before image={input_image} width={width_before} height={height_before} mode={resize_mode_before} name={resize_name_before} context="{resize_context_before}"')
+                            input_image = images.resize_image(resize_mode_before, input_image, width_before, height_before, resize_name_before, context=resize_context_before)
                     if input_image is not None and init_image is not None and init_image.size != input_image.size:
                         debug(f'Control resize init: image={init_image} target={input_image}')
                         init_image = images.resize_image(resize_mode=1, im=init_image, width=input_image.width, height=input_image.height)
@@ -508,7 +510,7 @@ def set_pipe():
                             pass
                         if any(img is None for img in processed_images):
                             if is_generator:
-                                yield terminate('Control: attempting process but output is none')
+                                yield terminate('Attempting process but output is none')
                             return [], '', '', 'Error: output is none'
                         if len(processed_images) > 1 and len(active_process) != len(active_model):
                             processed_image = [np.array(i) for i in processed_images]
@@ -527,7 +529,7 @@ def set_pipe():
                             p.init_images = processed_images
                         elif isinstance(selected_models, list) and len(processed_images) != len(selected_models):
                             if is_generator:
-                                yield terminate(f'Control: number of inputs does not match: input={len(processed_images)} models={len(selected_models)}')
+                                yield terminate(f'Number of inputs does not match: input={len(processed_images)} models={len(selected_models)}')
                             return [], '', '', 'Error: number of inputs does not match'
                         elif selected_models is not None:
                             p.init_images = processed_image
@@ -542,19 +544,24 @@ def set_pipe():
                         debug(f'Control: process=None image={p.ref_image}')
                         if p.ref_image is None:
                             if is_generator:
-                                yield terminate('Control: attempting reference mode but image is none')
+                                yield terminate('Attempting reference mode but image is none')
                             return [], '', '', 'Reference mode without image'
-                    elif unit_type == 'controlnet' and input_type == 1 and has_models: # Init image same as control
-                        p.task_args['control_image'] = p.init_images # switch image and control_image
-                        p.task_args['strength'] = p.denoising_strength
-                        p.init_images = [p.override or input_image] * len(active_model)
-                    elif unit_type == 'controlnet' and input_type == 2 and has_models: # Separate init image
-                        if init_image is None:
-                            shared.log.warning('Control: separate init image not provided')
-                            init_image = input_image
-                        p.task_args['control_image'] = p.init_images # switch image and control_image
-                        p.task_args['strength'] = p.denoising_strength
-                        p.init_images = [init_image] * len(active_model)
+                    elif unit_type == 'controlnet' and has_models:
+                        if input_type == 0: # Control only
+                            if shared.sd_model_type == 'f1':
+                                p.task_args['control_image'] = p.init_images # flux controlnet mandates this
+                                p.task_args['strength'] = p.denoising_strength
+                        elif input_type == 1: # Init image same as control
+                            p.task_args['control_image'] = p.init_images # switch image and control_image
+                            p.task_args['strength'] = p.denoising_strength
+                            p.init_images = [p.override or input_image] * len(active_model)
+                        elif input_type == 2: # Separate init image
+                            if init_image is None:
+                                shared.log.warning('Control: separate init image not provided')
+                                init_image = input_image
+                            p.task_args['control_image'] = p.init_images # switch image and control_image
+                            p.task_args['strength'] = p.denoising_strength
+                            p.init_images = [init_image] * len(active_model)
 
                     if is_generator:
                         image_txt = f'{blended_image.width}x{blended_image.height}' if blended_image is not None else 'None'
@@ -596,6 +603,8 @@ def set_pipe():
                         if unit_type == 'lite':
                             p.init_image = [input_image]
                             instance.apply(selected_models, processed_image, control_conditioning)
+                        if p.control_mode is not None:
+                            p.task_args['control_mode'] = p.control_mode
                     if hasattr(p, 'init_images') and p.init_images is None: # delete empty
                         del p.init_images
 
@@ -603,7 +612,7 @@ def set_pipe():
                     if has_models:
                         if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] and p.task_args.get('image', None) is None and getattr(p, 'init_images', None) is None:
                             if is_generator:
-                                yield terminate(f'Control: mode={p.extra_generation_params.get("Control mode", None)} input image is none')
+                                yield terminate(f'Mode={p.extra_generation_params.get("Control mode", None)} input image is none')
                             return [], '', '', 'Error: Input image is none'
 
                     # resize mask
@@ -611,7 +620,7 @@ def set_pipe():
                         if selected_scale_tab_mask == 1:
                             width_mask, height_mask = int(input_image.width * scale_by_mask), int(input_image.height * scale_by_mask)
                         p.width, p.height = width_mask, height_mask
-                        debug(f'Control resize: op=mask image={mask} width={width_mask} height={height_mask} mode={resize_mode_mask} name={resize_name_mask}')
+                        debug(f'Control resize: op=mask image={mask} width={width_mask} height={height_mask} mode={resize_mode_mask} name={resize_name_mask} context="{resize_context_mask}"')
 
                     # pipeline
                     output = None
@@ -638,6 +647,7 @@ def set_pipe():
                             processed: processing.Processed = processing.process_images(p) # run actual pipeline
                         else:
                             script_run = True
+                        processed = p.scripts.after(p, processed, *p.script_args)
                         output = None
                         if processed is not None:
                             output = processed.images
@@ -659,8 +669,8 @@ def set_pipe():
                                 width_after = int(output_image.width * scale_by_after)
                                 height_after = int(output_image.height * scale_by_after)
                             if resize_mode_after != 0 and resize_name_after != 'None' and not is_grid:
-                                debug(f'Control resize: op=after image={output_image} width={width_after} height={height_after} mode={resize_mode_after} name={resize_name_after}')
-                                output_image = images.resize_image(resize_mode_after, output_image, width_after, height_after, resize_name_after)
+                                debug(f'Control resize: op=after image={output_image} width={width_after} height={height_after} mode={resize_mode_after} name={resize_name_after} context="{resize_context_after}"')
+                                output_image = images.resize_image(resize_mode_after, output_image, width_after, height_after, resize_name_after, context=resize_context_after)
 
                             output_images.append(output_image)
                             if shared.opts.include_mask and not script_run:
diff --git a/modules/control/unit.py b/modules/control/unit.py
index b2b102b9b..51101493d 100644
--- a/modules/control/unit.py
+++ b/modules/control/unit.py
@@ -36,6 +36,7 @@ def __init__(self,
                  image_preview = None,
                  control_start = None,
                  control_end = None,
+                 control_mode = None,
                  result_txt = None,
                  extra_controls: list = [],
         ):
@@ -46,6 +47,7 @@ def __init__(self,
         self.end = end or 1
         self.start = min(self.start, self.end)
         self.end = max(self.start, self.end)
+        self.mode = None
         # processor always exists, adapter and controlnet are optional
         self.process: processors.Processor = processors.Processor()
         self.adapter: t2iadapter.Adapter = None
@@ -83,6 +85,12 @@ def control_change(start, end):
             self.start = min(start, end)
             self.end = max(start, end)
 
+        def control_mode_change(mode):
+            self.mode = mode - 1 if mode > 0 else None
+
+        def control_mode_show(model_id):
+            return gr.update(visible='union' in model_id.lower())
+
         def adapter_extra(c1):
             self.factor = c1
 
@@ -156,6 +164,7 @@ def set_image(image):
                     self.controlnet.load(model_id)
                 else:
                     model_id.change(fn=self.controlnet.load, inputs=[model_id], outputs=[result_txt], show_progress=True)
+                    model_id.change(fn=control_mode_show, inputs=[model_id], outputs=[control_mode], show_progress=False)
             if extra_controls is not None and len(extra_controls) > 0:
                 extra_controls[0].change(fn=controlnet_extra, inputs=extra_controls)
         elif self.type == 'xs':
@@ -202,3 +211,5 @@ def set_image(image):
         if control_start is not None and control_end is not None:
             control_start.change(fn=control_change, inputs=[control_start, control_end])
             control_end.change(fn=control_change, inputs=[control_start, control_end])
+        if control_mode is not None:
+            control_mode.change(fn=control_mode_change, inputs=[control_mode])
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 419e119fc..a200f59a2 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -1,10 +1,10 @@
 import os
 import time
 from typing import Union
-from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, ControlNetModel, StableDiffusionControlNetPipeline, StableDiffusionXLControlNetPipeline
+from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, FluxPipeline, ControlNetModel
 from modules.control.units import detect
 from modules.shared import log, opts, listdir
-from modules import errors, sd_models
+from modules import errors, sd_models, devices
 
 
 what = 'ControlNet'
@@ -80,6 +80,7 @@
 all_models = {}
 all_models.update(predefined_sd15)
 all_models.update(predefined_sdxl)
+all_models.update(predefined_f1)
 cache_dir = 'models/control/controlnet'
 
 
@@ -139,6 +140,19 @@ def reset(self):
         self.model = None
         self.model_id = None
 
+    def get_class(self):
+        import modules.shared
+        if modules.shared.sd_model_type == 'sd':
+            from diffusers import ControlNetModel as model_class # pylint: disable=reimported
+        elif modules.shared.sd_model_type == 'sdxl':
+            from diffusers import ControlNetModel as model_class # pylint: disable=reimported # sdxl shares same model class
+        elif modules.shared.sd_model_type == 'f1':
+            from diffusers import FluxControlNetModel as model_class
+        else:
+            log.error(f'Control {what}: type={modules.shared.sd_model_type} unsupported model')
+            return None
+        return model_class
+
     def load_safetensors(self, model_path):
         name = os.path.splitext(model_path)[0]
         config_path = None
@@ -164,7 +178,8 @@ def load_safetensors(self, model_path):
             config_path = f'{name}.json'
         if config_path is not None:
             self.load_config['original_config_file '] = config_path
-        self.model = ControlNetModel.from_single_file(model_path, **self.load_config)
+        cls = self.get_class()
+        self.model = cls.from_single_file(model_path, **self.load_config)
 
     def load(self, model_id: str = None) -> str:
         try:
@@ -189,7 +204,8 @@ def load(self, model_id: str = None) -> str:
                 if '/bin' in model_path:
                     model_path = model_path.replace('/bin', '')
                     self.load_config['use_safetensors'] = False
-                self.model = ControlNetModel.from_pretrained(model_path, **self.load_config)
+                cls = self.get_class()
+                self.model = cls.from_pretrained(model_path, **self.load_config)
             if self.dtype is not None:
                 self.model.to(self.dtype)
             if "ControlNet" in opts.nncf_compress_weights:
@@ -223,7 +239,7 @@ def load(self, model_id: str = None) -> str:
 
 
 class ControlNetPipeline():
-    def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline], dtype = None):
+    def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline, FluxPipeline], dtype = None):
         t0 = time.time()
         self.orig_pipeline = pipeline
         self.pipeline = None
@@ -231,6 +247,7 @@ def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pi
             log.error('Control model pipeline: model not loaded')
             return
         elif detect.is_sdxl(pipeline):
+            from diffusers import StableDiffusionXLControlNetPipeline
             self.pipeline = StableDiffusionXLControlNetPipeline(
                 vae=pipeline.vae,
                 text_encoder=pipeline.text_encoder,
@@ -242,8 +259,8 @@ def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pi
                 feature_extractor=getattr(pipeline, 'feature_extractor', None),
                 controlnet=controlnet, # can be a list
             )
-            sd_models.move_model(self.pipeline, pipeline.device)
         elif detect.is_sd15(pipeline):
+            from diffusers import StableDiffusionControlNetPipeline
             self.pipeline = StableDiffusionControlNetPipeline(
                 vae=pipeline.vae,
                 text_encoder=pipeline.text_encoder,
@@ -257,17 +274,33 @@ def __init__(self, controlnet: Union[ControlNetModel, list[ControlNetModel]], pi
             )
             sd_models.move_model(self.pipeline, pipeline.device)
         elif detect.is_f1(pipeline):
-            log.warning('Control model pipeline: class=FluxPipeline unsupported model type')
+            from diffusers import FluxControlNetPipeline
+            self.pipeline = FluxControlNetPipeline(
+                vae=pipeline.vae,
+                text_encoder=pipeline.text_encoder,
+                text_encoder_2=pipeline.text_encoder_2,
+                tokenizer=pipeline.tokenizer,
+                tokenizer_2=pipeline.tokenizer_2,
+                transformer=pipeline.transformer,
+                scheduler=pipeline.scheduler,
+                controlnet=controlnet, # can be a list
+            )
         else:
             log.error(f'Control {what} pipeline: class={pipeline.__class__.__name__} unsupported model type')
             return
-        if dtype is not None and self.pipeline is not None:
+
+        if self.pipeline is None:
+            log.error(f'Control {what} pipeline: not initialized')
+            return
+        if dtype is not None:
             self.pipeline = self.pipeline.to(dtype)
+        if opts.diffusers_offload_mode == 'none':
+            sd_models.move_model(self.pipeline, devices.device)
+        from modules.sd_models import set_diffuser_offload
+        set_diffuser_offload(self.pipeline, 'model')
+
         t1 = time.time()
-        if self.pipeline is not None:
-            log.debug(f'Control {what} pipeline: class={self.pipeline.__class__.__name__} time={t1-t0:.2f}')
-        else:
-            log.error(f'Control {what} pipeline: not initialized')
+        log.debug(f'Control {what} pipeline: class={self.pipeline.__class__.__name__} time={t1-t0:.2f}')
 
     def restore(self):
         self.pipeline = None
diff --git a/modules/dcsolver/__init__.py b/modules/dcsolver/__init__.py
new file mode 100644
index 000000000..a1ccfbeba
--- /dev/null
+++ b/modules/dcsolver/__init__.py
@@ -0,0 +1,1096 @@
+# Copyright 2023 TSAIL Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: check https://arxiv.org/abs/2302.04867 and https://github.com/wl-zhao/UniPC for more info
+# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+
+import math
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+# from ..configuration_utils import ConfigMixin, register_to_config
+# from ..utils import deprecate
+# from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import deprecate
+from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
+
+
+# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
+
+    Returns:
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+    if alpha_transform_type == "cosine":
+
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    elif alpha_transform_type == "exp":
+
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
+    return torch.tensor(betas, dtype=torch.float32)
+
+
+class DCSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
+    """
+    `UniPCMultistepScheduler` is a training-free framework designed for the fast sampling of diffusion models.
+
+    Dynamic Extropolation
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        beta_start (`float`, defaults to 0.0001):
+            The starting `beta` value of inference.
+        beta_end (`float`, defaults to 0.02):
+            The final `beta` value.
+        beta_schedule (`str`, defaults to `"linear"`):
+            The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+        trained_betas (`np.ndarray`, *optional*):
+            Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
+        solver_order (`int`, default `2`):
+            The UniPC order which can be any positive integer. The effective order of accuracy is `solver_order + 1`
+            due to the UniC. It is recommended to use `solver_order=2` for guided sampling, and `solver_order=3` for
+            unconditional sampling.
+        prediction_type (`str`, defaults to `epsilon`, *optional*):
+            Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
+            `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
+            Video](https://imagen.research.google/video/paper.pdf) paper).
+        thresholding (`bool`, defaults to `False`):
+            Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
+            as Stable Diffusion.
+        dynamic_thresholding_ratio (`float`, defaults to 0.995):
+            The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
+        sample_max_value (`float`, defaults to 1.0):
+            The threshold value for dynamic thresholding. Valid only when `thresholding=True` and `predict_x0=True`.
+        predict_x0 (`bool`, defaults to `True`):
+            Whether to use the updating algorithm on the predicted x0.
+        solver_type (`str`, default `bh2`):
+            Solver type for UniPC. It is recommended to use `bh1` for unconditional sampling when steps < 10, and `bh2`
+            otherwise.
+        lower_order_final (`bool`, default `True`):
+            Whether to use lower-order solvers in the final steps. Only valid for < 15 inference steps. This can
+            stabilize the sampling of DPMSolver for steps < 15, especially for steps <= 10.
+        disable_corrector (`list`, default `[]`):
+            Decides which step to disable the corrector to mitigate the misalignment between `epsilon_theta(x_t, c)`
+            and `epsilon_theta(x_t^c, c)` which can influence convergence for a large guidance scale. Corrector is
+            usually disabled during the first few steps.
+        solver_p (`SchedulerMixin`, default `None`):
+            Any other scheduler that if specified, the algorithm becomes `solver_p + UniC`.
+        use_karras_sigmas (`bool`, *optional*, defaults to `False`):
+            Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
+            the sigmas are determined according to a sequence of noise levels {σi}.
+        timestep_spacing (`str`, defaults to `"linspace"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        steps_offset (`int`, defaults to 0):
+            An offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
+            Diffusion.
+    """
+
+    _compatibles = [e.name for e in KarrasDiffusionSchedulers]
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        beta_schedule: str = "linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        solver_order: int = 2,
+        dc_order: int = 2,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        sample_max_value: float = 1.0,
+        predict_x0: bool = True,
+        solver_type: str = "bh2",
+        lower_order_final: bool = True,
+        disable_corrector: List[int] = [],
+        solver_p: SchedulerMixin = None,
+        use_karras_sigmas: Optional[bool] = False,
+        timestep_spacing: str = "linspace",
+        steps_offset: int = 0,
+        # ddim_gt_path: str = None,
+        ddim_gt=None,
+        num_iters=20,
+        bound_func='none',
+    ):
+        if trained_betas is not None:
+            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
+        elif beta_schedule == "linear":
+            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        elif beta_schedule == "scaled_linear":
+            # this schedule is very specific to the latent diffusion model.
+            self.betas = (
+                torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+            )
+        elif beta_schedule == "squaredcos_cap_v2":
+            # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        else:
+            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        # Currently we only support VP-type noise schedule
+        self.alpha_t = torch.sqrt(self.alphas_cumprod)
+        self.sigma_t = torch.sqrt(1 - self.alphas_cumprod)
+        self.lambda_t = torch.log(self.alpha_t) - torch.log(self.sigma_t)
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+
+        if solver_type not in ["bh1", "bh2"]:
+            if solver_type in ["midpoint", "heun", "logrho"]:
+                self.register_to_config(solver_type="bh2")
+            else:
+                raise NotImplementedError(f"{solver_type} does is not implemented for {self.__class__}")
+
+        self.predict_x0 = predict_x0
+        # setable values
+        self.num_inference_steps = None
+        timesteps = np.linspace(0, num_train_timesteps - 1, num_train_timesteps, dtype=np.float32)[::-1].copy()
+        self.timesteps = torch.from_numpy(timesteps)
+        self.buffer_size = max(solver_order, dc_order + 1)
+        self.num_iters = num_iters
+        self.model_outputs = [None] * self.buffer_size
+        self.timestep_list = [None] * self.buffer_size
+        self.lower_order_nums = 0
+        self.disable_corrector = disable_corrector
+        self.solver_p = solver_p
+        self.last_sample = None
+        self._step_index = None
+
+        if ddim_gt is not None:
+            self.ddim_gt = dict(
+                ts=ddim_gt['ts'].cpu().numpy(),
+                intermediates=ddim_gt['intermediates'].cpu().numpy(),
+            )
+        else:
+            self.ddim_gt = None
+        self.bound_func = bound_func
+        self.dc_order = dc_order
+
+    @property
+    def step_index(self):
+        """
+        The index counter for current timestep. It will increae 1 after each scheduler step.
+        """
+        return self._step_index
+
+    def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        """
+        # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+        if self.config.timestep_spacing == "linspace":
+            timesteps = (
+                np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
+                .round()[::-1][:-1]
+                .copy()
+                .astype(np.int64)
+            )
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = self.config.num_train_timesteps // (num_inference_steps + 1)
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(0, num_inference_steps + 1) * step_ratio).round()[::-1][:-1].copy().astype(np.int64)
+            timesteps += self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = self.config.num_train_timesteps / num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = np.arange(self.config.num_train_timesteps, 0, -step_ratio).round().copy().astype(np.int64)
+            timesteps -= 1
+        else:
+            raise ValueError(
+                f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
+            )
+
+        sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
+        if self.config.use_karras_sigmas:
+            log_sigmas = np.log(sigmas)
+            sigmas = np.flip(sigmas).copy()
+            sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
+            timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
+            sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
+        else:
+            sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
+            sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5
+            sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
+
+        self.sigmas = torch.from_numpy(sigmas)
+        self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.int64)
+
+        self.num_inference_steps = len(timesteps)
+
+        self.model_outputs = [None] * self.buffer_size
+        self.timestep_list = [None] * self.buffer_size
+
+        self.lower_order_nums = 0
+        self.last_sample = None
+        if self.solver_p:
+            self.solver_p.set_timesteps(self.num_inference_steps, device=device)
+
+        # add an index counter for schedulers that allow duplicated timesteps
+        self._step_index = None
+        # also init the ratios
+        self.dc_ratios = []
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+    def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        """
+        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
+        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
+        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
+        pixels from saturation at each step. We find that dynamic thresholding results in significantly better
+        photorealism as well as better image-text alignment, especially when using very large guidance weights."
+
+        https://arxiv.org/abs/2205.11487
+        """
+        dtype = sample.dtype
+        batch_size, channels, *remaining_dims = sample.shape
+
+        if dtype not in (torch.float32, torch.float64):
+            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
+
+        # Flatten sample for doing quantile calculation along each image
+        sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
+
+        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
+
+        s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
+        s = torch.clamp(
+            s, min=1, max=self.config.sample_max_value
+        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+        sample = sample.reshape(batch_size, channels, *remaining_dims)
+        sample = sample.to(dtype)
+
+        return sample
+
+    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
+    def _sigma_to_t(self, sigma, log_sigmas):
+        # get log sigma
+        log_sigma = np.log(sigma)
+
+        # get distribution
+        dists = log_sigma - log_sigmas[:, np.newaxis]
+
+        # get sigmas range
+        low_idx = np.cumsum((dists >= 0), axis=0).argmax(axis=0).clip(max=log_sigmas.shape[0] - 2)
+        high_idx = low_idx + 1
+
+        low = log_sigmas[low_idx]
+        high = log_sigmas[high_idx]
+
+        # interpolate sigmas
+        w = (low - log_sigma) / (low - high)
+        w = np.clip(w, 0, 1)
+
+        # transform interpolation to time range
+        t = (1 - w) * low_idx + w * high_idx
+        t = t.reshape(sigma.shape)
+        return t
+
+    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler._sigma_to_alpha_sigma_t
+    def _sigma_to_alpha_sigma_t(self, sigma):
+        alpha_t = 1 / ((sigma**2 + 1) ** 0.5)
+        sigma_t = sigma * alpha_t
+
+        return alpha_t, sigma_t
+
+    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
+    def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
+        """Constructs the noise schedule of Karras et al. (2022)."""
+
+        sigma_min: float = in_sigmas[-1].item()
+        sigma_max: float = in_sigmas[0].item()
+
+        rho = 7.0  # 7.0 is the value used in the paper
+        ramp = np.linspace(0, 1, num_inference_steps)
+        min_inv_rho = sigma_min ** (1 / rho)
+        max_inv_rho = sigma_max ** (1 / rho)
+        sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
+        return sigmas
+
+    def convert_model_output(
+        self,
+        model_output: torch.FloatTensor,
+        *args,
+        sample: torch.FloatTensor = None,
+        **kwargs,
+    ) -> torch.FloatTensor:
+        r"""
+        Convert the model output to the corresponding type the UniPC algorithm needs.
+
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from the learned diffusion model.
+            timestep (`int`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+
+        Returns:
+            `torch.FloatTensor`:
+                The converted model output.
+        """
+        timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None)
+        if sample is None:
+            if len(args) > 1:
+                sample = args[1]
+            else:
+                raise ValueError("missing `sample` as a required keyward argument")
+        if timestep is not None:
+            deprecate(
+                "timesteps",
+                "1.0.0",
+                "Passing `timesteps` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
+            )
+
+        sigma = self.sigmas[self.step_index]
+        alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma)
+
+        if self.predict_x0:
+            if self.config.prediction_type == "epsilon":
+                x0_pred = (sample - sigma_t * model_output) / alpha_t
+            elif self.config.prediction_type == "sample":
+                x0_pred = model_output
+            elif self.config.prediction_type == "v_prediction":
+                x0_pred = alpha_t * sample - sigma_t * model_output
+            else:
+                raise ValueError(
+                    f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or"
+                    " `v_prediction` for the UniPCMultistepScheduler."
+                )
+
+            if self.config.thresholding:
+                x0_pred = self._threshold_sample(x0_pred)
+
+            return x0_pred
+        else:
+            if self.config.prediction_type == "epsilon":
+                return model_output
+            elif self.config.prediction_type == "sample":
+                epsilon = (sample - alpha_t * model_output) / sigma_t
+                return epsilon
+            elif self.config.prediction_type == "v_prediction":
+                epsilon = alpha_t * model_output + sigma_t * sample
+                return epsilon
+            else:
+                raise ValueError(
+                    f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or"
+                    " `v_prediction` for the UniPCMultistepScheduler."
+                )
+
+
+    def multistep_uni_p_bh_update(
+        self,
+        model_output: torch.FloatTensor = None,
+        *args,
+        sample: torch.FloatTensor = None,
+        order: int = None,
+        **kwargs,
+    ) -> torch.FloatTensor:
+        """
+        One step for the UniP (B(h) version). Alternatively, `self.solver_p` is used if is specified.
+
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from the learned diffusion model at the current timestep.
+            prev_timestep (`int`):
+                The previous discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+            order (`int`):
+                The order of UniP at this timestep (corresponds to the *p* in UniPC-p).
+
+        Returns:
+            `torch.FloatTensor`:
+                The sample tensor at the previous timestep.
+        """
+        prev_timestep = args[0] if len(args) > 0 else kwargs.pop("prev_timestep", None)
+        if sample is None:
+            if len(args) > 1:
+                sample = args[1]
+            else:
+                raise ValueError(" missing `sample` as a required keyward argument")
+        if order is None:
+            if len(args) > 2:
+                order = args[2]
+            else:
+                raise ValueError(" missing `order` as a required keyward argument")
+        if prev_timestep is not None:
+            deprecate(
+                "prev_timestep",
+                "1.0.0",
+                "Passing `prev_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
+            )
+        model_output_list = self.model_outputs
+
+        s0 = self.timestep_list[-1]
+        m0 = model_output_list[-1]
+        assert m0 is not None
+        x = sample
+
+        if self.solver_p:
+            raise NotImplementedError()
+
+        sigma_t, sigma_s0 = self.sigmas[self.step_index + 1], self.sigmas[self.step_index]
+        alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t)
+        alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0)
+
+        lambda_t = torch.log(alpha_t) - torch.log(sigma_t)
+        lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0)
+
+        h = lambda_t - lambda_s0
+        device = sample.device
+
+        rks = []
+        D1s = []
+        for i in range(1, order):
+            si = self.step_index - i
+            mi = model_output_list[-(i + 1)]
+            alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si])
+            lambda_si = torch.log(alpha_si) - torch.log(sigma_si)
+            rk = (lambda_si - lambda_s0) / h
+            rks.append(rk)
+            D1s.append((mi - m0) / rk)
+
+        rks.append(1.0)
+        rks = torch.tensor(rks, device=device)
+
+        R = []
+        b = []
+
+        hh = -h if self.predict_x0 else h
+        h_phi_1 = torch.expm1(hh)  # h\phi_1(h) = e^h - 1
+        h_phi_k = h_phi_1 / hh - 1
+
+        factorial_i = 1
+
+        if self.config.solver_type == "bh1":
+            B_h = hh
+        elif self.config.solver_type == "bh2":
+            B_h = torch.expm1(hh)
+        else:
+            raise NotImplementedError()
+
+        for i in range(1, order + 1):
+            R.append(torch.pow(rks, i - 1))
+            b.append(h_phi_k * factorial_i / B_h)
+            factorial_i *= i + 1
+            h_phi_k = h_phi_k / hh - 1 / factorial_i
+
+        R = torch.stack(R)
+        b = torch.tensor(b, device=device)
+
+        if len(D1s) > 0:
+            D1s = torch.stack(D1s, dim=1)  # (B, K)
+            # for order 2, we use a simplified version
+            if order == 2:
+                rhos_p = torch.tensor([0.5], dtype=x.dtype, device=device)
+            else:
+                rhos_p = torch.linalg.solve(R[:-1, :-1], b[:-1])
+        else:
+            D1s = None
+
+        if self.predict_x0:
+            x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0
+            if D1s is not None:
+                pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s)
+            else:
+                pred_res = 0
+            x_t = x_t_ - alpha_t * B_h * pred_res
+        else:
+            x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0
+            if D1s is not None:
+                pred_res = torch.einsum("k,bkc...->bc...", rhos_p, D1s)
+            else:
+                pred_res = 0
+            x_t = x_t_ - sigma_t * B_h * pred_res
+
+        x_t = x_t.to(x.dtype)
+        return x_t
+
+    def multistep_uni_c_bh_update(
+        self,
+        this_model_output: torch.FloatTensor,
+        *args,
+        last_sample: torch.FloatTensor = None,
+        this_sample: torch.FloatTensor = None,
+        order: int = None,
+        **kwargs,
+    ) -> torch.FloatTensor:
+        """
+        One step for the UniC (B(h) version).
+
+        Args:
+            this_model_output (`torch.FloatTensor`):
+                The model outputs at `x_t`.
+            this_timestep (`int`):
+                The current timestep `t`.
+            last_sample (`torch.FloatTensor`):
+                The generated sample before the last predictor `x_{t-1}`.
+            this_sample (`torch.FloatTensor`):
+                The generated sample after the last predictor `x_{t}`.
+            order (`int`):
+                The `p` of UniC-p at this step. The effective order of accuracy should be `order + 1`.
+
+        Returns:
+            `torch.FloatTensor`:
+                The corrected sample tensor at the current timestep.
+        """
+        this_timestep = args[0] if len(args) > 0 else kwargs.pop("this_timestep", None)
+        if last_sample is None:
+            if len(args) > 1:
+                last_sample = args[1]
+            else:
+                raise ValueError(" missing`last_sample` as a required keyward argument")
+        if this_sample is None:
+            if len(args) > 2:
+                this_sample = args[2]
+            else:
+                raise ValueError(" missing`this_sample` as a required keyward argument")
+        if order is None:
+            if len(args) > 3:
+                order = args[3]
+            else:
+                raise ValueError(" missing`order` as a required keyward argument")
+        if this_timestep is not None:
+            deprecate(
+                "this_timestep",
+                "1.0.0",
+                "Passing `this_timestep` is deprecated and has no effect as model output conversion is now handled via an internal counter `self.step_index`",
+            )
+
+        model_output_list = self.model_outputs
+
+        m0 = model_output_list[-1]
+        x = last_sample
+        x_t = this_sample
+        model_t = this_model_output
+
+        sigma_t, sigma_s0 = self.sigmas[self.step_index], self.sigmas[self.step_index - 1]
+        alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma_t)
+        alpha_s0, sigma_s0 = self._sigma_to_alpha_sigma_t(sigma_s0)
+
+        lambda_t = torch.log(alpha_t) - torch.log(sigma_t)
+        lambda_s0 = torch.log(alpha_s0) - torch.log(sigma_s0)
+
+        h = lambda_t - lambda_s0
+        device = this_sample.device
+
+        rks = []
+        D1s = []
+        for i in range(1, order):
+            si = self.step_index - (i + 1)
+            mi = model_output_list[-(i + 1)]
+            alpha_si, sigma_si = self._sigma_to_alpha_sigma_t(self.sigmas[si])
+            lambda_si = torch.log(alpha_si) - torch.log(sigma_si)
+            rk = (lambda_si - lambda_s0) / h
+            rks.append(rk)
+            D1s.append((mi - m0) / rk)
+
+        rks.append(1.0)
+        rks = torch.tensor(rks, device=device)
+
+        R = []
+        b = []
+
+        hh = -h if self.predict_x0 else h
+        h_phi_1 = torch.expm1(hh)  # h\phi_1(h) = e^h - 1
+        h_phi_k = h_phi_1 / hh - 1
+
+        factorial_i = 1
+
+        if self.config.solver_type == "bh1":
+            B_h = hh
+        elif self.config.solver_type == "bh2":
+            B_h = torch.expm1(hh)
+        else:
+            raise NotImplementedError()
+
+        for i in range(1, order + 1):
+            R.append(torch.pow(rks, i - 1))
+            b.append(h_phi_k * factorial_i / B_h)
+            factorial_i *= i + 1
+            h_phi_k = h_phi_k / hh - 1 / factorial_i
+
+        R = torch.stack(R)
+        b = torch.tensor(b, device=device)
+
+        if len(D1s) > 0:
+            D1s = torch.stack(D1s, dim=1)
+        else:
+            D1s = None
+
+        # for order 1, we use a simplified version
+        if order == 1:
+            rhos_c = torch.tensor([0.5], dtype=x.dtype, device=device)
+        else:
+            rhos_c = torch.linalg.solve(R, b)
+
+        if self.predict_x0:
+            try:
+                x_t_ = sigma_t / sigma_s0 * x - alpha_t * h_phi_1 * m0
+            except Exception as e:
+                import pdb; pdb.set_trace()
+            if D1s is not None:
+                corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s)
+            else:
+                corr_res = 0
+            D1_t = model_t - m0
+            x_t = x_t_ - alpha_t * B_h * (corr_res + rhos_c[-1] * D1_t)
+        else:
+            x_t_ = alpha_t / alpha_s0 * x - sigma_t * h_phi_1 * m0
+            if D1s is not None:
+                corr_res = torch.einsum("k,bkc...->bc...", rhos_c[:-1], D1s)
+            else:
+                corr_res = 0
+            D1_t = model_t - m0
+            x_t = x_t_ - sigma_t * B_h * (corr_res + rhos_c[-1] * D1_t)
+        x_t = x_t.to(x.dtype)
+        return x_t
+
+    def _init_step_index(self, timestep):
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.to(self.timesteps.device)
+
+        index_candidates = (self.timesteps == timestep).nonzero()
+
+        if len(index_candidates) == 0:
+            step_index = len(self.timesteps) - 1
+        # The sigma index that is taken for the **very** first `step`
+        # is always the second index (or the last index if there is only 1)
+        # This way we can ensure we don't accidentally skip a sigma in
+        # case we start in the middle of the denoising schedule (e.g. for image-to-image)
+        elif len(index_candidates) > 1:
+            step_index = index_candidates[1].item()
+        else:
+            step_index = index_candidates[0].item()
+
+        self._step_index = step_index
+
+    def dynamic_compensation(self, model_prev_list, t_prev_list, ratio):
+        len_buffer = len([t for t in t_prev_list if t is not None])
+        if len_buffer < 2:
+            return None
+
+        t_ = ratio * (t_prev_list[-1] - t_prev_list[-2]) + t_prev_list[-2]
+
+        inter_order = min(self.dc_order + 1, 4)
+        
+        if inter_order is not None:
+            model_t_dc = torch.zeros_like(model_prev_list[-1])
+            for i in range(inter_order):
+                term = model_prev_list[-(i + 1)]
+                for j in range(inter_order):
+                    if i != j:
+                        para = (t_ - t_prev_list[-(j + 1)]) / (t_prev_list[-(i + 1)] - t_prev_list[-(j + 1)])
+                        term = term * para
+                model_t_dc = model_t_dc + term
+        else:
+            model_t_dc = None
+        return model_t_dc
+
+    def find_optim_ratio(self, sample, ratio_initial=1.0):
+        if self.bound_func == 'tanh':
+            bound_func = lambda x: torch.nn.functional.tanh(x) * 0.5 + ratio_initial
+            param_initial = 0.
+        else:
+            bound_func = lambda x: x
+            param_initial = ratio_initial
+
+        # step 1: define the parameters
+        if self.step_index < len(self.timesteps) - 2:
+            scalar_t = self.timesteps[self.step_index + 1].item()
+        else:
+            scalar_t = 0
+        ratio_param = torch.nn.Parameter(torch.tensor([param_initial], device=sample.device), requires_grad=True)
+        
+        sample_clone = sample.clone()
+
+        index = np.where(self.ddim_gt['ts'] >= scalar_t)[0].max()
+        batch_size = sample.shape[0]
+
+        x_t_gt = torch.from_numpy(self.ddim_gt['intermediates'][:batch_size, index]).to(sample.device) # suppose the first batch
+
+        model_t_bak = self.model_outputs[-1]
+        def closure(ratio_param):
+            ratio_bound = bound_func(ratio_param)
+            # torch.nn.functional.tanh(ratio_param) * 0.5 + ratio_initial
+            sample = sample_clone.clone()
+            model_t_dc = self.dynamic_compensation(self.model_outputs, self.timestep_list, ratio=ratio_bound)
+            if model_t_dc is not None:
+                self.model_outputs[-1] = model_t_dc
+            self.last_sample = sample
+            # run predictor
+            sample = self.multistep_uni_p_bh_update(
+                sample=sample,
+                order=self.this_order,
+            )
+            # run the next corrector
+            self._step_index += 1
+            use_corrector = (
+                self.step_index > 0 and self.step_index - 1 not in self.disable_corrector \
+                and self.last_sample is not None \
+                and self.step_index < len(self.timesteps)
+            )
+            if use_corrector:
+                model_output = self.model_wrapper(sample, self.timesteps[self.step_index])
+                model_output_convert = self.convert_model_output(model_output, sample=sample)
+                sample = self.multistep_uni_c_bh_update(
+                    this_model_output=model_output_convert,
+                    last_sample=self.last_sample,
+                    this_sample=sample,
+                    order=self.this_order,
+                )
+            x_t_pred = sample
+            loss = torch.nn.functional.mse_loss(x_t_pred, x_t_gt)
+            # rewind
+            self._step_index -= 1
+            self.model_outputs[-1] = model_t_bak
+            return loss
+
+        optimizer = torch.optim.AdamW([ratio_param], lr=0.1)
+        for iter_ in range(self.num_iters):
+            optimizer.zero_grad()
+            loss = closure(ratio_param)
+            loss.backward()
+            optimizer.step()
+            ratio_bound = bound_func(ratio_param)
+            print(f'iter [{iter_}]', ratio_bound.item(), loss.item())
+
+        torch.cuda.empty_cache()
+        return ratio_bound.data.detach().item()
+
+    def cascade_polynomial_regression(self, test_CFG, test_NFE, cpr_path):
+        def f1(x, a, b, c):
+            return a * x ** 2 + b * x + c # np.log(np.abs(x - c)) + b
+
+        def f2(x, a, b, c):
+            return a * x ** 2 + b * x + c # a * np.exp(-b * x) + c
+
+        def predict(xs, *coeffs):
+            CFG, NFE, x = xs[0], xs[1], xs[2]
+            CFG = CFG / 12
+            x = x / NFE
+            NFE = NFE / 50
+            NFE = NFE.reshape(-1, 1, 1)
+            CFG = CFG.reshape(-1, 1)
+            coeffs = np.array(coeffs).reshape(-1, 3, 3)
+            coeffs1 = f2(NFE, coeffs[..., 0], coeffs[..., 1], coeffs[..., 2])
+            coeffs2 = f1(CFG, coeffs1[..., 0], coeffs1[..., 1], coeffs1[..., 2])
+
+            x_pow = 1
+            result = 0
+            for i in range(coeffs2.shape[-1]):
+                result = result + coeffs2[:, i] * x_pow
+                x_pow = x_pow * x
+            return result
+
+        cpr_coeffs = np.load(cpr_path)
+        ratios = []
+        steps = list(range(1, test_NFE + 1))
+        for step in steps:
+            if step < 3:
+                ratio = 1
+            else:
+                infer_x = np.array([test_CFG, test_NFE, step]).reshape(3, -1)
+                ratio = predict(infer_x, *cpr_coeffs).item()
+            ratios.append(ratio)
+        return ratios
+
+
+    def step(self, *args, **kwargs):
+        if self.ddim_gt is None:
+            return self._step(*args, **kwargs)
+        else:
+            return self._step_search(*args, **kwargs)
+
+    @torch.no_grad()
+    def _step_search(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: int,
+        sample: torch.FloatTensor,
+        return_dict: bool = True,
+    ) -> Union[SchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
+        the multistep UniPC.
+
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from learned diffusion model.
+            timestep (`int`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+            return_dict (`bool`):
+                Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
+
+        Returns:
+            [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
+                tuple is returned where the first element is the sample tensor.
+
+        """
+        if self.num_inference_steps is None:
+            raise ValueError(
+                "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        if self.step_index is None:
+            self._init_step_index(timestep)
+
+        use_corrector = (
+            self.step_index > 0 and self.step_index - 1 not in self.disable_corrector and self.last_sample is not None
+        )
+
+        model_output_convert = self.convert_model_output(model_output, sample=sample)
+        if use_corrector:
+            sample = self.multistep_uni_c_bh_update(
+                this_model_output=model_output_convert,
+                last_sample=self.last_sample,
+                this_sample=sample,
+                order=self.this_order,
+            )
+
+        for i in range(self.buffer_size - 1):
+            self.model_outputs[i] = self.model_outputs[i + 1]
+            self.timestep_list[i] = self.timestep_list[i + 1]
+
+        self.model_outputs[-1] = model_output_convert
+        self.timestep_list[-1] = timestep
+
+        if self.config.lower_order_final:
+            this_order = min(self.config.solver_order, len(self.timesteps) - self.step_index)
+        else:
+            this_order = self.config.solver_order
+
+        self.this_order = min(this_order, self.lower_order_nums + 1)  # warmup for multistep
+        assert self.this_order > 0
+
+        # here we will use dynamic extrapolation to update the model_output
+        with torch.enable_grad():
+            if self.step_index > 1:
+                ratio_optim = self.find_optim_ratio(sample, ratio_initial=1.0)
+            else:
+                ratio_optim = 1.0
+            self.dc_ratios.append(ratio_optim)
+
+        # now update by dynamic compensation
+        if ratio_optim != 1.0:
+            self.model_outputs[-1] = self.dynamic_compensation(self.model_outputs, self.timestep_list, ratio=ratio_optim)
+
+        prev_sample = self.multistep_uni_p_bh_update(
+            # model_output=model_output,  # pass the original non-converted model output, in case solver-p is used
+            sample=sample,
+            order=self.this_order,
+        )
+        self.last_sample = sample
+        if self.lower_order_nums < self.config.solver_order:
+            self.lower_order_nums += 1
+
+        # upon completion increase step index by one
+        self._step_index += 1
+
+        if not return_dict:
+            return (prev_sample,)
+
+        return SchedulerOutput(prev_sample=prev_sample)
+
+    def _step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: int,
+        sample: torch.FloatTensor,
+        return_dict: bool = True,
+    ) -> Union[SchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the sample with
+        the multistep UniPC.
+
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from learned diffusion model.
+            timestep (`int`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+            return_dict (`bool`):
+                Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`.
+
+        Returns:
+            [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
+                tuple is returned where the first element is the sample tensor.
+
+        """
+        if self.num_inference_steps is None:
+            raise ValueError(
+                "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
+            )
+
+        if self.step_index is None:
+            self._init_step_index(timestep)
+
+        use_corrector = (
+            self.step_index > 0 and self.step_index - 1 not in self.disable_corrector and self.last_sample is not None
+        )
+
+        model_output_convert = self.convert_model_output(model_output, sample=sample)
+        if use_corrector:
+            sample = self.multistep_uni_c_bh_update(
+                this_model_output=model_output_convert,
+                last_sample=self.last_sample,
+                this_sample=sample,
+                order=self.this_order,
+            )
+
+        for i in range(self.buffer_size - 1):
+            self.model_outputs[i] = self.model_outputs[i + 1]
+            self.timestep_list[i] = self.timestep_list[i + 1]
+
+        self.model_outputs[-1] = model_output_convert
+        self.timestep_list[-1] = timestep
+
+        if self.config.lower_order_final:
+            this_order = min(self.config.solver_order, len(self.timesteps) - self.step_index)
+        else:
+            this_order = self.config.solver_order
+
+        self.this_order = min(this_order, self.lower_order_nums + 1)  # warmup for multistep
+        assert self.this_order > 0
+
+        self.last_sample = sample
+
+        # here we will use dynamic compensation to update the model_output
+        # dc_ratio = self.dc_ratios[self.step_index]
+        # if dc_ratio != 1.0:
+        #    self.model_outputs[-1] = self.dynamic_compensation(self.model_outputs, self.timestep_list, dc_ratio)
+
+        prev_sample = self.multistep_uni_p_bh_update(
+            model_output=model_output,  # pass the original non-converted model output, in case solver-p is used
+            sample=sample,
+            order=self.this_order,
+        )
+
+        if self.lower_order_nums < self.config.solver_order:
+            self.lower_order_nums += 1
+
+        # upon completion increase step index by one
+        self._step_index += 1
+
+        if not return_dict:
+            return (prev_sample,)
+
+        return SchedulerOutput(prev_sample=prev_sample)
+
+
+    def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor:
+        """
+        Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+        current timestep.
+
+        Args:
+            sample (`torch.FloatTensor`):
+                The input sample.
+
+        Returns:
+            `torch.FloatTensor`:
+                A scaled input sample.
+        """
+        return sample
+
+    # Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.add_noise
+    def add_noise(
+        self,
+        original_samples: torch.FloatTensor,
+        noise: torch.FloatTensor,
+        timesteps: torch.IntTensor,
+    ) -> torch.FloatTensor:
+        # Make sure sigmas and timesteps have the same device and dtype as original_samples
+        sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
+        if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
+            # mps does not support float64
+            schedule_timesteps = self.timesteps.to(original_samples.device, dtype=torch.float32)
+            timesteps = timesteps.to(original_samples.device, dtype=torch.float32)
+        else:
+            schedule_timesteps = self.timesteps.to(original_samples.device)
+            timesteps = timesteps.to(original_samples.device)
+
+        step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+
+        sigma = sigmas[step_indices].flatten()
+        while len(sigma.shape) < len(original_samples.shape):
+            sigma = sigma.unsqueeze(-1)
+
+        alpha_t, sigma_t = self._sigma_to_alpha_sigma_t(sigma)
+        noisy_samples = alpha_t * original_samples + sigma_t * noise
+        return noisy_samples
+
+    def __len__(self):
+        return self.config.num_train_timesteps
diff --git a/modules/devices.py b/modules/devices.py
index 1d6ae3703..a006e1ca3 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -128,7 +128,7 @@ def get_device_for(task):
     return get_optimal_device()
 
 
-def torch_gc(force=False):
+def torch_gc(force=False, fast=False):
     t0 = time.time()
     mem = memstats.memory_stats()
     gpu = mem.get('gpu', {})
@@ -151,7 +151,7 @@ def torch_gc(force=False):
         return
 
     # actual gc
-    collected = gc.collect() # python gc
+    collected = gc.collect() if not fast else 0 # python gc
     if cuda_ok:
         try:
             with torch.cuda.device(get_cuda_device_string()):
@@ -228,6 +228,11 @@ def test_bf16():
 def set_cuda_params():
     if debug:
         log.debug(f'Verifying Torch settings: cuda={cuda_ok}')
+    if backend == "ipex":
+        try:
+            torch.xpu.set_fp32_math_mode(mode=torch.xpu.FP32MathMode.TF32)
+        except Exception:
+            pass
     if cuda_ok:
         try:
             torch.backends.cuda.matmul.allow_tf32 = True
diff --git a/modules/face/__init__.py b/modules/face/__init__.py
index d1ded8c37..d5d51e647 100644
--- a/modules/face/__init__.py
+++ b/modules/face/__init__.py
@@ -72,7 +72,7 @@ def ui(self, _is_img2img):
                 id_strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.01, value=1.0)
                 id_conditioning = gr.Slider(label='Control', minimum=0.0, maximum=2.0, step=0.01, value=0.5)
             with gr.Row(visible=True):
-                id_cache = gr.Checkbox(label='Cache model', value=False)
+                id_cache = gr.Checkbox(label='Cache model', value=True)
         with gr.Group(visible=False) as cfg_photomaker:
             with gr.Row():
                 gr.HTML('<a href="https://photo-maker.github.io/" target="_blank">&nbsp Tenecent ARC Lab PhotoMaker</a><br>')
diff --git a/modules/gr_tempdir.py b/modules/gr_tempdir.py
index 90a8f7376..0ee15b314 100644
--- a/modules/gr_tempdir.py
+++ b/modules/gr_tempdir.py
@@ -70,7 +70,7 @@ def pil_to_temp_file(self, img: Image, dir: str, format="png") -> str: # pylint:
         img.save(name, pnginfo=(metadata if use_metadata else None))
         img.already_saved_as = name
         size = os.path.getsize(name)
-        shared.log.debug(f'Save temp: image="{name}" resolution={img.width}x{img.height} size={size}')
+        shared.log.debug(f'Save temp: image="{name}" width={img.width} height={img.height} size={size}')
     params = ', '.join([f'{k}: {v}' for k, v in img.info.items()])
     params = params[12:] if params.startswith('parameters: ') else params
     with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file:
diff --git a/modules/images.py b/modules/images.py
index bb194dfe2..6882e97f5 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -5,6 +5,7 @@
 import math
 import json
 import uuid
+import time
 import queue
 import string
 import random
@@ -214,15 +215,13 @@ def draw_prompt_matrix(im, width, height, all_prompts, margin=0):
     return draw_grid_annotations(im, width, height, hor_texts, ver_texts, margin)
 
 
-def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image'):
-    if im.width == width and im.height == height:
-        shared.log.debug(f'Image resize: input={im} target={width}x{height} mode={shared.resize_modes[resize_mode]} upscaler="{upscaler_name}" fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access
+def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image', context=None):
     upscaler_name = upscaler_name or shared.opts.upscaler_for_img2img
 
     def latent(im, w, h, upscaler):
         from modules.processing_vae import vae_encode, vae_decode
         import torch
-        latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO enable full VAE mode
+        latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO enable full VAE mode for resize-latent
         latents = torch.nn.functional.interpolate(latents, size=(int(h // 8), int(w // 8)), mode=upscaler["mode"], antialias=upscaler["antialias"])
         im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0]
         return im
@@ -288,9 +287,40 @@ def fill(im, color=None):
         res.paste(im, box=((width - im.width)//2, (height - im.height)//2))
         return res
 
+    def context_aware(im, width, height, context):
+        import seam_carving # https://github.com/li-plus/seam-carving
+        if 'forward' in context:
+            energy_mode = "forward"
+        elif 'backward' in context:
+            energy_mode = "backward"
+        else:
+            return im
+        if 'Add' in context:
+            src_ratio = min(width / im.width, height / im.height)
+            src_w = int(im.width * src_ratio)
+            src_h = int(im.height * src_ratio)
+            src_image = resize(im, src_w, src_h)
+        elif 'Remove' in context:
+            ratio = width / height
+            src_ratio = im.width / im.height
+            src_w = width if ratio > src_ratio else im.width * height // im.height
+            src_h = height if ratio <= src_ratio else im.height * width // im.width
+            src_image = resize(im, src_w, src_h)
+        else:
+            return im
+        res = Image.fromarray(seam_carving.resize(
+            src_image, # source image (rgb or gray)
+            size=(width, height),  # target size
+            energy_mode=energy_mode,  # choose from {backward, forward}
+            order="width-first",  # choose from {width-first, height-first}
+            keep_mask=None,  # object mask to protect from removal
+        ))
+        return res
+
+    t0 = time.time()
     if resize_mode is None:
         resize_mode = 0
-    if resize_mode == 0 or (im.width == width and im.height == height): # none
+    if resize_mode == 0 or (im.width == width and im.height == height) or (width == 0 and height == 0): # none
         res = im.copy()
     elif resize_mode == 1: # fixed
         res = resize(im, width, height)
@@ -302,12 +332,14 @@ def fill(im, color=None):
         from modules import masking
         res = fill(im, color=0)
         res, _mask = masking.outpaint(res)
+    elif resize_mode == 5:  # context-aware
+        res = context_aware(im, width, height, context)
     else:
         res = im.copy()
         shared.log.error(f'Invalid resize mode: {resize_mode}')
-    if output_type == 'np':
-        return np.array(res)
-    return res
+    t1 = time.time()
+    shared.log.debug(f'Image resize: input={im} width={width} height={height} mode="{shared.resize_modes[resize_mode]}" upscaler="{upscaler_name}" context="{context}" type={output_type} result={res} time={t1-t0:.2f} fn={sys._getframe(1).f_code.co_filename}:{sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access
+    return np.array(res) if output_type == 'np' else res
 
 
 re_nonletters = re.compile(r'[\s' + string.punctuation + ']+')
@@ -596,7 +628,7 @@ def atomically_save_image():
             shared.log.error(f'Save failed: file="{fn}" format={image_format} args={save_args} {e}')
             errors.display(e, 'Image save')
         size = os.path.getsize(fn) if os.path.exists(fn) else 0
-        shared.log.info(f'Save: image="{fn}" type={image_format} resolution={image.width}x{image.height} size={size}')
+        shared.log.info(f'Save: image="{fn}" type={image_format} width={image.width} height={image.height} size={size}')
         if shared.opts.save_log_fn != '' and len(exifinfo) > 0:
             fn = os.path.join(paths.data_path, shared.opts.save_log_fn)
             if not fn.endswith('.json'):
@@ -621,9 +653,9 @@ def save_image(image, path, basename='', seed=None, prompt=None, extension=share
     debug(f'Save: fn={sys._getframe(1).f_code.co_name}') # pylint: disable=protected-access
     if image is None:
         shared.log.warning('Image is none')
-        return None, None
+        return None, None, None
     if not check_grid_size([image]):
-        return None, None
+        return None, None, None
     if path is None or path == '': # set default path to avoid errors when functions are triggered manually or via api and param is not set
         path = shared.opts.outdir_save
     namegen = FilenameGenerator(p, seed, prompt, image, grid=grid)
@@ -668,7 +700,7 @@ def save_image(image, path, basename='', seed=None, prompt=None, extension=share
         debug(f'Image marked: "{params.filename}"')
         params.image.already_saved_as = params.filename
     script_callbacks.image_saved_callback(params)
-    return params.filename, filename_txt
+    return params.filename, filename_txt, exifinfo
 
 
 def save_video_atomic(images, filename, video_type: str = 'none', duration: float = 2.0, loop: bool = False, interpolate: int = 0, scale: float = 1.0, pad: int = 1, change: float = 0.3):
@@ -719,7 +751,9 @@ def save_video(p, images, filename = None, video_type: str = 'none', duration: f
         return None
     image = images[0]
     if p is not None:
-        namegen = FilenameGenerator(p, seed=p.all_seeds[0], prompt=p.all_prompts[0], image=image)
+        seed = p.all_seeds[0] if getattr(p, 'all_seeds', None) is not None else p.seed
+        prompt = p.all_prompts[0] if getattr(p, 'all_prompts', None) is not None else p.prompt
+        namegen = FilenameGenerator(p, seed=seed, prompt=prompt, image=image)
     else:
         namegen = FilenameGenerator(None, seed=0, prompt='', image=image)
     if filename is None and p is not None:
diff --git a/modules/img2img.py b/modules/img2img.py
index 319d02171..4bf057d4a 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -103,6 +103,7 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args)
             for k, v in items.items():
                 image.info[k] = v
             images.save_image(image, path=output_dir, basename=basename, seed=None, prompt=None, extension=ext, info=geninfo, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=image.info, forced_filename=forced_filename)
+        proc = modules.scripts.scripts_img2img.after(p, proc, *args)
         shared.log.debug(f'Processed: images={len(batch_image_files)} memory={memory_stats()} batch')
 
 
@@ -132,7 +133,7 @@ def img2img(id_task: str, mode: int,
             selected_scale_tab,
             height, width,
             scale_by,
-            resize_mode, resize_name,
+            resize_mode, resize_name, resize_context,
             inpaint_full_res, inpaint_full_res_padding, inpainting_mask_invert,
             img2img_batch_files, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir,
             hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
@@ -143,7 +144,7 @@ def img2img(id_task: str, mode: int,
         shared.log.warning('Model not loaded')
         return [], '', '', 'Error: model not loaded'
 
-    debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}')
+    debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|resize_context={resize_context}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}')
 
     if mode == 5:
         if img2img_batch_files is None or len(img2img_batch_files) == 0:
@@ -233,6 +234,8 @@ def img2img(id_task: str, mode: int,
         inpainting_fill=inpainting_fill,
         resize_mode=resize_mode,
         resize_name=resize_name,
+        resize_context=resize_context,
+        scale_by=scale_by,
         denoising_strength=denoising_strength,
         image_cfg_scale=image_cfg_scale,
         diffusers_guidance_rescale=diffusers_guidance_rescale,
@@ -246,8 +249,6 @@ def img2img(id_task: str, mode: int,
         hdr_boundary=hdr_boundary, hdr_threshold=hdr_threshold, hdr_maximize=hdr_maximize, hdr_max_center=hdr_max_center, hdr_max_boundry=hdr_max_boundry, hdr_color_picker=hdr_color_picker, hdr_tint_ratio=hdr_tint_ratio,
         override_settings=override_settings,
     )
-    if selected_scale_tab == 1 and resize_mode != 0:
-        p.scale_by = scale_by
     p.scripts = modules.scripts.scripts_img2img
     p.script_args = args
     if mask:
diff --git a/modules/intel/ipex/__init__.py b/modules/intel/ipex/__init__.py
index 38fc81c07..189dd07d0 100644
--- a/modules/intel/ipex/__init__.py
+++ b/modules/intel/ipex/__init__.py
@@ -66,45 +66,47 @@ def ipex_init(): # pylint: disable=too-many-statements
             torch.cuda.__file__ = torch.xpu.__file__
             # torch.cuda.is_current_stream_capturing = torch.xpu.is_current_stream_capturing
 
-            if  legacy:
+            if legacy:
                 torch.cuda.os = torch.xpu.os
                 torch.cuda.Device = torch.xpu.Device
                 torch.cuda.warnings = torch.xpu.warnings
                 torch.cuda.classproperty = torch.xpu.classproperty
-                torch.cuda._initialization_lock = torch.xpu.lazy_init._initialization_lock
-                torch.cuda._initialized = torch.xpu.lazy_init._initialized
-                torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork
-                torch.cuda._lazy_seed_tracker = torch.xpu.lazy_init._lazy_seed_tracker
-                torch.cuda._queued_calls = torch.xpu.lazy_init._queued_calls
-                torch.cuda._tls = torch.xpu.lazy_init._tls
-                torch.cuda.threading = torch.xpu.lazy_init.threading
-                torch.cuda.traceback = torch.xpu.lazy_init.traceback
-                torch.cuda._lazy_new = torch.xpu._lazy_new
-
                 torch.UntypedStorage.cuda = torch.UntypedStorage.xpu
-                torch.cuda.FloatTensor = torch.xpu.FloatTensor
-                torch.cuda.FloatStorage = torch.xpu.FloatStorage
-                torch.cuda.BFloat16Tensor = torch.xpu.BFloat16Tensor
-                torch.cuda.BFloat16Storage = torch.xpu.BFloat16Storage
-                torch.cuda.HalfTensor = torch.xpu.HalfTensor
-                torch.cuda.HalfStorage = torch.xpu.HalfStorage
-                torch.cuda.ByteTensor = torch.xpu.ByteTensor
-                torch.cuda.ByteStorage = torch.xpu.ByteStorage
-                torch.cuda.DoubleTensor = torch.xpu.DoubleTensor
-                torch.cuda.DoubleStorage = torch.xpu.DoubleStorage
-                torch.cuda.ShortTensor = torch.xpu.ShortTensor
-                torch.cuda.ShortStorage = torch.xpu.ShortStorage
-                torch.cuda.LongTensor = torch.xpu.LongTensor
-                torch.cuda.LongStorage = torch.xpu.LongStorage
-                torch.cuda.IntTensor = torch.xpu.IntTensor
-                torch.cuda.IntStorage = torch.xpu.IntStorage
-                torch.cuda.CharTensor = torch.xpu.CharTensor
-                torch.cuda.CharStorage = torch.xpu.CharStorage
-                torch.cuda.BoolTensor = torch.xpu.BoolTensor
-                torch.cuda.BoolStorage = torch.xpu.BoolStorage
-                torch.cuda.ComplexFloatStorage = torch.xpu.ComplexFloatStorage
-                torch.cuda.ComplexDoubleStorage = torch.xpu.ComplexDoubleStorage
-            else:
+                if not ipex.__version__.startswith("2.3"):
+                    torch.cuda._initialization_lock = torch.xpu.lazy_init._initialization_lock
+                    torch.cuda._initialized = torch.xpu.lazy_init._initialized
+                    torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork
+                    torch.cuda._lazy_seed_tracker = torch.xpu.lazy_init._lazy_seed_tracker
+                    torch.cuda._queued_calls = torch.xpu.lazy_init._queued_calls
+                    torch.cuda._tls = torch.xpu.lazy_init._tls
+                    torch.cuda.threading = torch.xpu.lazy_init.threading
+                    torch.cuda.traceback = torch.xpu.lazy_init.traceback
+                    torch.cuda._lazy_new = torch.xpu._lazy_new
+
+                    torch.cuda.FloatTensor = torch.xpu.FloatTensor
+                    torch.cuda.FloatStorage = torch.xpu.FloatStorage
+                    torch.cuda.BFloat16Tensor = torch.xpu.BFloat16Tensor
+                    torch.cuda.BFloat16Storage = torch.xpu.BFloat16Storage
+                    torch.cuda.HalfTensor = torch.xpu.HalfTensor
+                    torch.cuda.HalfStorage = torch.xpu.HalfStorage
+                    torch.cuda.ByteTensor = torch.xpu.ByteTensor
+                    torch.cuda.ByteStorage = torch.xpu.ByteStorage
+                    torch.cuda.DoubleTensor = torch.xpu.DoubleTensor
+                    torch.cuda.DoubleStorage = torch.xpu.DoubleStorage
+                    torch.cuda.ShortTensor = torch.xpu.ShortTensor
+                    torch.cuda.ShortStorage = torch.xpu.ShortStorage
+                    torch.cuda.LongTensor = torch.xpu.LongTensor
+                    torch.cuda.LongStorage = torch.xpu.LongStorage
+                    torch.cuda.IntTensor = torch.xpu.IntTensor
+                    torch.cuda.IntStorage = torch.xpu.IntStorage
+                    torch.cuda.CharTensor = torch.xpu.CharTensor
+                    torch.cuda.CharStorage = torch.xpu.CharStorage
+                    torch.cuda.BoolTensor = torch.xpu.BoolTensor
+                    torch.cuda.BoolStorage = torch.xpu.BoolStorage
+                    torch.cuda.ComplexFloatStorage = torch.xpu.ComplexFloatStorage
+                    torch.cuda.ComplexDoubleStorage = torch.xpu.ComplexDoubleStorage
+
+            if not legacy or ipex.__version__.startswith("2.3"):
                 torch.cuda._initialization_lock = torch.xpu._initialization_lock
                 torch.cuda._initialized = torch.xpu._initialized
                 torch.cuda._is_in_bad_fork = torch.xpu._is_in_bad_fork
@@ -152,8 +154,9 @@ def ipex_init(): # pylint: disable=too-many-statements
                 torch.xpu.amp.custom_fwd = torch.cuda.amp.custom_fwd
                 torch.xpu.amp.custom_bwd = torch.cuda.amp.custom_bwd
                 torch.cuda.amp = torch.xpu.amp
-                torch.is_autocast_enabled = torch.xpu.is_autocast_xpu_enabled
-                torch.get_autocast_gpu_dtype = torch.xpu.get_autocast_xpu_dtype
+                if not ipex.__version__.startswith("2.3"):
+                    torch.is_autocast_enabled = torch.xpu.is_autocast_xpu_enabled
+                    torch.get_autocast_gpu_dtype = torch.xpu.get_autocast_xpu_dtype
 
                 if not hasattr(torch.cuda.amp, "common"):
                     torch.cuda.amp.common = contextlib.nullcontext()
@@ -170,11 +173,16 @@ def ipex_init(): # pylint: disable=too-many-statements
                         torch.cuda.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
 
             # C
-            if legacy:
+            if legacy and not ipex.__version__.startswith("2.3"):
                 torch._C._cuda_getCurrentRawStream = ipex._C._getCurrentRawStream
                 ipex._C._DeviceProperties.multi_processor_count = ipex._C._DeviceProperties.gpu_subslice_count
-                ipex._C._DeviceProperties.major = 2024
+                ipex._C._DeviceProperties.major = 12
                 ipex._C._DeviceProperties.minor = 1
+            else:
+                torch._C._cuda_getCurrentRawStream = torch._C._xpu_getCurrentRawStream
+                torch._C._XpuDeviceProperties.multi_processor_count = torch._C._XpuDeviceProperties.gpu_subslice_count
+                torch._C._XpuDeviceProperties.major = 12
+                torch._C._XpuDeviceProperties.minor = 1
 
             # Fix functions with ipex:
             torch.cuda.mem_get_info = lambda device=None: [(torch.xpu.get_device_properties(device).total_memory - torch.xpu.memory_reserved(device)), torch.xpu.get_device_properties(device).total_memory]
diff --git a/modules/intel/ipex/hijacks.py b/modules/intel/ipex/hijacks.py
index d8ee00e28..e5a71f5e2 100644
--- a/modules/intel/ipex/hijacks.py
+++ b/modules/intel/ipex/hijacks.py
@@ -26,7 +26,7 @@ def check_device(device):
     return bool((isinstance(device, torch.device) and device.type == "cuda") or (isinstance(device, str) and "cuda" in device) or isinstance(device, int))
 
 def return_xpu(device):
-    return f"xpu:{device.split(':')[-1]}" if isinstance(device, str) and ":" in device else f"xpu:{device}" if isinstance(device, int) else torch.device(devices.device) if isinstance(device, torch.device) else devices.device
+    return f"xpu:{device.split(':')[-1]}" if isinstance(device, str) and ":" in device else f"xpu:{device}" if isinstance(device, int) else torch.device(f"xpu:{device.index}" if device.index is not None else "xpu") if isinstance(device, torch.device) else "xpu"
 
 
 # Autocast
diff --git a/modules/intel/openvino/__init__.py b/modules/intel/openvino/__init__.py
index c2af24637..975a97672 100644
--- a/modules/intel/openvino/__init__.py
+++ b/modules/intel/openvino/__init__.py
@@ -12,6 +12,7 @@
 from torch._dynamo.backends.registry import register_backend
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch.fx import GraphModule
+from torch._subclasses.fake_tensor import FakeTensorMode
 from torch.utils._pytree import tree_flatten
 
 from types import MappingProxyType
@@ -21,26 +22,6 @@
 from modules import shared, devices, sd_models
 
 
-def BUILD_MAP_UNPACK(self, inst):
-        items = self.popn(inst.argval)
-        # ensure everything is a dict
-        items = [BuiltinVariable(dict).call_function(self, [x], {}) for x in items] # noqa: F821
-        result = dict()
-        for x in items:
-            assert isinstance(x, ConstDictVariable) # noqa: F821
-        result.update(x.items)
-        self.push(
-            ConstDictVariable( # noqa: F821
-                result,
-                dict,
-                mutable_local=MutableLocal(), # noqa: F821
-                **VariableTracker.propagate(items), # noqa: F821
-            )
-        )
-tmp_torch = sys.modules["torch"]
-tmp_torch.BUILD_MAP_UNPACK_WITH_CALL = BUILD_MAP_UNPACK
-max_openvino_partitions = 0
-
 DEFAULT_OPENVINO_PYTHON_CONFIG = MappingProxyType(
     {
         "use_python_fusion_cache": True,
@@ -48,6 +29,7 @@ def BUILD_MAP_UNPACK(self, inst):
     },
 )
 
+
 class OpenVINOGraphModule(torch.nn.Module):
     def __init__(self, gm, partition_id, use_python_fusion_cache, model_hash_str: str = None, file_name=""):
         super().__init__()
@@ -61,10 +43,12 @@ def __call__(self, *args):
         result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id, file_name=self.file_name)
         return result
 
+
 def get_device_list():
     core = Core()
     return core.available_devices
 
+
 def get_device():
     if hasattr(shared, "opts") and len(shared.opts.openvino_devices) == 1:
         return shared.opts.openvino_devices[0]
@@ -96,6 +80,7 @@ def get_device():
         shared.log.warning(f"OpenVINO: No compatible GPU detected! Using {device}")
     return device
 
+
 def get_openvino_device():
     core = Core()
     try:
@@ -103,6 +88,7 @@ def get_openvino_device():
     except Exception:
         return f"OpenVINO {get_device()}"
 
+
 def cached_model_name(model_hash_str, device, args, cache_root, reversed = False):
     if model_hash_str is None:
         return None
@@ -120,9 +106,14 @@ def cached_model_name(model_hash_str, device, args, cache_root, reversed = False
     for input_data in args:
         if isinstance(input_data, torch.SymInt):
             if reversed:
-                inputs_str = "_" + "torch.SymInt" + inputs_str
+                inputs_str = "_" + "torch.SymInt1" + inputs_str
             else:
                 inputs_str += "_" + "torch.SymInt1"
+        elif isinstance(input_data, int):
+            if reversed:
+                inputs_str = "_" + "int" + inputs_str
+            else:
+                inputs_str += "_" + "int"
         else:
             if reversed:
                 inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
@@ -133,18 +124,6 @@ def cached_model_name(model_hash_str, device, args, cache_root, reversed = False
 
     return file_name
 
-def check_fully_supported(self, graph_module: GraphModule) -> bool:
-    num_fused = 0
-    for node in graph_module.graph.nodes:
-        if node.op == "call_module" and "fused_" in node.name:
-            num_fused += 1
-        elif node.op != "placeholder" and node.op != "output":
-            return False
-    if num_fused == 1:
-        return True
-    return False
-
-Partitioner.check_fully_supported = functools.partial(check_fully_supported, Partitioner)
 
 def execute(
     gm,
@@ -161,6 +140,7 @@ def execute(
     msg = "Received unexpected value for 'executor': {0}. Allowed values are: openvino, strictly_openvino.".format(executor)
     raise ValueError(msg)
 
+
 def execute_cached(compiled_model, *args):
     flat_args, _ = tree_flatten(args)
     ov_inputs = [a.detach().cpu().numpy() for a in flat_args]
@@ -192,18 +172,21 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
         for input_data in example_inputs:
             if isinstance(input_data, torch.SymInt):
                 input_types.append(torch.SymInt)
-                input_shapes.append(1)
+                input_shapes.append(torch.Size([1]))
+            elif isinstance(input_data, int):
+                input_types.append(torch.int64)
+                input_shapes.append(torch.Size([1]))
             else:
                 input_types.append(input_data.type())
                 input_shapes.append(input_data.size())
 
-        decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types)
+        decoder = TorchFXPythonDecoder(gm, input_shapes=input_shapes, input_types=input_types)
 
         im = fe.load(decoder)
 
         om = fe.convert(im)
 
-        if (file_name is not None):
+        if file_name is not None:
             serialize(om, file_name + ".xml", file_name + ".bin")
             if (shared.compiled_model_state.cn_model != []):
                 f = open(file_name + ".txt", "w")
@@ -224,8 +207,12 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
     }
 
     for idx, input_data in enumerate(example_inputs):
-        om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
-        om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
+        if isinstance(input_data, int):
+            om.inputs[idx].get_node().set_element_type(dtype_mapping[torch.int64])
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([1]))))
+        else:
+            om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
     om.validate_nodes_and_infer_types()
 
     if shared.opts.nncf_quantize and not dont_use_quant:
@@ -256,6 +243,7 @@ def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = Non
     compiled_model = core.compile_model(om, device)
     return compiled_model
 
+
 def openvino_compile_cached_model(cached_model_path, *example_inputs):
     core = Core()
     om = core.read_model(cached_model_path + ".xml")
@@ -306,6 +294,7 @@ def openvino_compile_cached_model(cached_model_path, *example_inputs):
     compiled_model = core.compile_model(om, get_device())
     return compiled_model
 
+
 def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition_id, file_name=""):
     executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG
 
@@ -320,6 +309,7 @@ def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition
 
     if use_cache and (partition_id in shared.compiled_model_state.compiled_cache):
         compiled = shared.compiled_model_state.compiled_cache[partition_id]
+        req = shared.compiled_model_state.req_cache[partition_id]
     else:
         if (shared.compiled_model_state.cn_model != [] and file_name is not None
                 and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin")):
@@ -327,17 +317,22 @@ def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition
         else:
             compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str, file_name=file_name)
         shared.compiled_model_state.compiled_cache[partition_id] = compiled
+        req = compiled.create_infer_request()
+        shared.compiled_model_state.req_cache[partition_id] = req
 
     flat_args, _ = tree_flatten(args)
-    ov_inputs = [a.detach().cpu().numpy() for a in flat_args]
+    ov_inputs = []
+    for arg in flat_args:
+        ov_inputs.append((arg if isinstance(arg, int) else arg.detach().cpu().numpy()))
 
-    res = compiled(ov_inputs)
+    res = req.infer(ov_inputs, share_inputs=True, share_outputs=True)
 
     results1 = [torch.from_numpy(res[out]) for out in compiled.outputs]
     if len(results1) == 1:
         return results1[0]
     return results1
 
+
 def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=None, file_name=""):
     executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG
 
@@ -360,8 +355,8 @@ def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=Non
 
     return shared.compiled_model_state.partitioned_modules[signature](*args)
 
+
 def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_str: str = None, file_name=""):
-    global max_openvino_partitions
     for node in gm.graph.nodes:
         if node.op == "call_module" and "fused_" in node.name:
             openvino_submodule = getattr(gm, node.name)
@@ -375,16 +370,19 @@ def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_s
 
     return gm
 
+
 def generate_subgraph_str(tensor):
     if hasattr(tensor, "weight"):
         shared.compiled_model_state.model_hash_str = shared.compiled_model_state.model_hash_str + sha256(str(tensor.weight).encode('utf-8')).hexdigest()
     return tensor
 
+
 def get_subgraph_type(tensor):
     global subgraph_type
     subgraph_type.append(type(tensor))
     return tensor
 
+
 @register_backend
 @fake_tensor_unsupported
 def openvino_fx(subgraph, example_inputs):
@@ -483,14 +481,15 @@ def _call(*args):
 
     if inputs_reversed:
         example_inputs.reverse()
-    model = make_fx(subgraph)(*example_inputs)
+    with FakeTensorMode(allow_non_fake_inputs=True):
+        model = make_fx(subgraph)(*example_inputs)
     for node in model.graph.nodes:
         if node.target == torch.ops.aten.mul_.Tensor:
             node.target = torch.ops.aten.mul.Tensor
     with devices.inference_context():
         model.eval()
-    partitioner = Partitioner()
-    compiled_model = partitioner.make_partitions(model)
+    partitioner = Partitioner(options=None)
+    compiled_model = partitioner.make_partitions(model, options=None)
 
     if executor_parameters is not None and 'model_hash_str' in executor_parameters:
         # Check if the model is fully supported.
@@ -499,7 +498,6 @@ def _call(*args):
             executor_parameters["model_hash_str"] += "_fs"
 
     def _call(*args):
-        res = execute(compiled_model, *args, executor="openvino",
-                        executor_parameters=executor_parameters, file_name=maybe_fs_cached_name)
+        res = execute(compiled_model, *args, executor="openvino", executor_parameters=executor_parameters, file_name=maybe_fs_cached_name)
         return res
     return _call
diff --git a/modules/loader.py b/modules/loader.py
index e7cb03339..0aad4d76b 100644
--- a/modules/loader.py
+++ b/modules/loader.py
@@ -27,6 +27,10 @@
 warnings.filterwarnings(action="ignore", category=DeprecationWarning)
 warnings.filterwarnings(action="ignore", category=FutureWarning)
 warnings.filterwarnings(action="ignore", category=UserWarning, module="torchvision")
+try:
+    torch._logging.set_logs(all=logging.ERROR, bytecode=False, aot_graphs=False, aot_joint_graph=False, ddp_graphs=False, graph=False, graph_code=False, graph_breaks=False, graph_sizes=False, guards=False, recompiles=False, recompiles_verbose=False, trace_source=False, trace_call=False, trace_bytecode=False, output_code=False, kernel_code=False, schedule=False, perf_hints=False, post_grad_graphs=False, onnx_diagnostics=False, fusion=False, overlap=False, export=None, modules=None, cudagraphs=False, sym_node=False, compiled_autograd_verbose=False) # pylint: disable=protected-access
+except Exception:
+    pass
 if ".dev" in torch.__version__ or "+git" in torch.__version__:
     torch.__long_version__ = torch.__version__
     torch.__version__ = re.search(r'[\d.]+[\d]', torch.__version__).group(0)
@@ -49,10 +53,33 @@
 
 import diffusers # pylint: disable=W0611,C0411
 import diffusers.loaders.single_file # pylint: disable=W0611,C0411
+import huggingface_hub # pylint: disable=W0611,C0411
 logging.getLogger("diffusers.loaders.single_file").setLevel(logging.ERROR)
+timer.startup.record("diffusers")
+
+
+# patch different progress bars
+import tqdm as tqdm_lib # pylint: disable=C0411
 from tqdm.rich import tqdm # pylint: disable=W0611,C0411
 diffusers.loaders.single_file.logging.tqdm = partial(tqdm, unit='C')
-timer.startup.record("diffusers")
+
+class _tqdm_cls():
+    def __call__(self, *args, **kwargs):
+        bar_format = 'Diffusers {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m' + '{desc}' + '\x1b[0m'
+        return tqdm_lib.tqdm(*args, bar_format=bar_format, ncols=80, colour='#327fba', **kwargs)
+
+class _tqdm_old(tqdm_lib.tqdm):
+    def __init__(self, *args, **kwargs):
+        kwargs.pop("name", None)
+        kwargs['bar_format'] = 'Diffusers {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m' + '{desc}' + '\x1b[0m'
+        kwargs['ncols'] = 80
+        super().__init__(*args, **kwargs)
+
+
+transformers.utils.logging.tqdm = _tqdm_cls()
+diffusers.pipelines.pipeline_utils.logging.tqdm = _tqdm_cls()
+huggingface_hub._snapshot_download.hf_tqdm = _tqdm_old # pylint: disable=protected-access
+
 
 def get_packages():
     return {
diff --git a/modules/model_auraflow.py b/modules/model_auraflow.py
index 344e6558a..6f18bf13c 100644
--- a/modules/model_auraflow.py
+++ b/modules/model_auraflow.py
@@ -1,15 +1,17 @@
+import os
 import torch
 import diffusers
+from modules import shared, sd_models, devices
 
 
-repo_id = 'fal/AuraFlow'
+debug = shared.log.trace if os.environ.get('SD_LOAD_DEBUG', None) is not None else lambda *args, **kwargs: None
 
 
-def load_auraflow(_checkpoint_info, diffusers_load_config={}):
-    from modules import shared, devices
+def load_auraflow(checkpoint_info, diffusers_load_config={}):
+    repo_id = sd_models.path_to_repo(checkpoint_info.name)
     if 'torch_dtype' not in diffusers_load_config:
         diffusers_load_config['torch_dtype'] = torch.float16
-
+    debug(f'Loading AuraFlow: repo="{repo_id}" config={diffusers_load_config}')
     pipe = diffusers.AuraFlowPipeline.from_pretrained(
         repo_id,
         cache_dir = shared.opts.diffusers_dir,
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 4940a659e..0fd4e39ca 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -5,109 +5,246 @@
 import transformers
 from safetensors.torch import load_file
 from huggingface_hub import hf_hub_download
-from modules import shared, devices
-
-
-def load_quanto_transformer(checkpoint_info):
-    from optimum.quanto import requantize # pylint: disable=no-name-in-module
-    repo_path = checkpoint_info.path
-    quantization_map = os.path.join(repo_path, "transformer", "quantization_map.json")
-    if not os.path.exists(quantization_map):
-        repo_id = checkpoint_info.name.replace('Diffusers/', '')
-        quantization_map = hf_hub_download(repo_id, subfolder='transformer', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir)
-    with open(quantization_map, "r", encoding='utf8') as f:
-        quantization_map = json.load(f)
-    state_dict = load_file(os.path.join(repo_path, "transformer", "diffusion_pytorch_model.safetensors"))
-    dtype = state_dict['context_embedder.bias'].dtype
-    with torch.device("meta"):
-        transformer = diffusers.FluxTransformer2DModel.from_config(os.path.join(repo_path, "transformer", "config.json")).to(dtype=dtype)
-    requantize(transformer, state_dict, quantization_map, device=torch.device("cpu"))
-    transformer.eval()
-    return transformer
+from modules import shared, devices, modelloader, sd_models
+
+
+debug = shared.log.trace if os.environ.get('SD_LOAD_DEBUG', None) is not None else lambda *args, **kwargs: None
+
+
+def get_quant(file_path):
+    if "qint8" in file_path.lower():
+        return 'qint8'
+    if "qint4" in file_path.lower():
+        return 'qint4'
+    if "fp8" in file_path.lower():
+        return 'fp8'
+    if "fp4" in file_path.lower():
+        return 'fp4'
+    if "nf4" in file_path.lower():
+        return 'nf4'
+    return 'none'
+
+
+def load_flux_quanto(checkpoint_info):
+    transformer, text_encoder_2 = None, None
+    from installer import install
+    install('optimum-quanto', quiet=True)
+    try:
+        from optimum import quanto # pylint: disable=no-name-in-module
+        from optimum.quanto import requantize # pylint: disable=no-name-in-module
+    except Exception as e:
+        shared.log.error(f"Loading FLUX: Failed to import optimum-quanto: {e}")
+        raise
+    quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs)
+
+    if isinstance(checkpoint_info, str):
+        repo_path = checkpoint_info
+    else:
+        repo_path = checkpoint_info.path
+
+    try:
+        quantization_map = os.path.join(repo_path, "transformer", "quantization_map.json")
+        debug(f'Loading FLUX: quantization map="{quantization_map}" repo="{checkpoint_info.name}" component="transformer"')
+        if not os.path.exists(quantization_map):
+            repo_id = sd_models.path_to_repo(checkpoint_info.name)
+            quantization_map = hf_hub_download(repo_id, subfolder='transformer', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir)
+        with open(quantization_map, "r", encoding='utf8') as f:
+            quantization_map = json.load(f)
+        state_dict = load_file(os.path.join(repo_path, "transformer", "diffusion_pytorch_model.safetensors"))
+        dtype = state_dict['context_embedder.bias'].dtype
+        with torch.device("meta"):
+            transformer = diffusers.FluxTransformer2DModel.from_config(os.path.join(repo_path, "transformer", "config.json")).to(dtype=dtype)
+        requantize(transformer, state_dict, quantization_map, device=torch.device("cpu"))
+        transformer.eval()
+        if transformer.dtype != devices.dtype:
+            try:
+                transformer = transformer.to(dtype=devices.dtype)
+            except Exception:
+                shared.log.error(f"Loading FLUX: Failed to cast transformer to {devices.dtype}, set dtype to {transformer.dtype}")
+    except Exception as e:
+        shared.log.error(f"Loading FLUX: Failed to load Quanto transformer: {e}")
+        if debug:
+            from modules import errors
+            errors.display(e, 'FLUX Quanto:')
+
+    try:
+        quantization_map = os.path.join(repo_path, "text_encoder_2", "quantization_map.json")
+        debug(f'Loading FLUX: quantization map="{quantization_map}" repo="{checkpoint_info.name}" component="text_encoder_2"')
+        if not os.path.exists(quantization_map):
+            repo_id = sd_models.path_to_repo(checkpoint_info.name)
+            quantization_map = hf_hub_download(repo_id, subfolder='text_encoder_2', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir)
+        with open(quantization_map, "r", encoding='utf8') as f:
+            quantization_map = json.load(f)
+        with open(os.path.join(repo_path, "text_encoder_2", "config.json"), encoding='utf8') as f:
+            t5_config = transformers.T5Config(**json.load(f))
+        state_dict = load_file(os.path.join(repo_path, "text_encoder_2", "model.safetensors"))
+        dtype = state_dict['encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight'].dtype
+        with torch.device("meta"):
+            text_encoder_2 = transformers.T5EncoderModel(t5_config).to(dtype=dtype)
+        requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cpu"))
+        text_encoder_2.eval()
+        if text_encoder_2.dtype != devices.dtype:
+            try:
+                text_encoder_2 = text_encoder_2.to(dtype=devices.dtype)
+            except Exception:
+                shared.log.error(f"Loading FLUX: Failed to cast text encoder to {devices.dtype}, set dtype to {text_encoder_2.dtype}")
+    except Exception as e:
+        shared.log.error(f"Loading FLUX: Failed to load Quanto text encoder: {e}")
+        if debug:
+            from modules import errors
+            errors.display(e, 'FLUX Quanto:')
+
+    return transformer, text_encoder_2
 
 
-def load_quanto_text_encoder_2(checkpoint_info):
-    from optimum.quanto import requantize # pylint: disable=no-name-in-module
-    repo_path = checkpoint_info.path
-    quantization_map = os.path.join(repo_path, "text_encoder_2", "quantization_map.json")
-    if not os.path.exists(quantization_map):
-        repo_id = checkpoint_info.name.replace('Diffusers/', '')
-        quantization_map = hf_hub_download(repo_id, subfolder='text_encoder_2', filename='quantization_map.json', cache_dir=shared.opts.diffusers_dir)
-    with open(quantization_map, "r", encoding='utf8') as f:
-        quantization_map = json.load(f)
-    with open(os.path.join(repo_path, "text_encoder_2", "config.json"), encoding='utf8') as f:
-        t5_config = transformers.T5Config(**json.load(f))
-    state_dict = load_file(os.path.join(repo_path, "text_encoder_2", "model.safetensors"))
-    dtype = state_dict['encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight'].dtype
-    with torch.device("meta"):
-        text_encoder_2 = transformers.T5EncoderModel(t5_config).to(dtype=dtype)
-    requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cpu"))
-    text_encoder_2.eval()
-    return text_encoder_2
-
-
-def load_transformer(file_path):
+def load_flux_bnb(checkpoint_info, diffusers_load_config): # pylint: disable=unused-argument
+    transformer, text_encoder_2 = None, None
+    if isinstance(checkpoint_info, str):
+        repo_path = checkpoint_info
+    else:
+        repo_path = checkpoint_info.path
+    from installer import install
+    install('bitsandbytes', quiet=True)
+    from diffusers import FluxTransformer2DModel
+    quant = get_quant(repo_path)
+    if quant == 'fp8':
+        quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)
+        if transformer is None:
+            transformer = FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config, quantization_config=quantization_config)
+    elif quant == 'fp4':
+        quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True)
+        if transformer is None:
+            transformer = FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config, quantization_config=quantization_config)
+    else:
+        if transformer is None:
+            transformer = FluxTransformer2DModel.from_single_file(repo_path, **diffusers_load_config)
+    return transformer, text_encoder_2
+
+
+def load_transformer(file_path): # triggered by opts.sd_unet change
+    transformer = None
+    quant = get_quant(file_path)
     diffusers_load_config = {
         "low_cpu_mem_usage": True,
         "torch_dtype": devices.dtype,
         "cache_dir": shared.opts.hfcache_dir,
     }
-    from diffusers import FluxTransformer2DModel
-    transformer = FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config)
+    shared.log.info(f'Loading UNet: type=FLUX file="{file_path}" offload={shared.opts.diffusers_offload_mode} quant={quant} dtype={devices.dtype}')
+    if 'nf4' in quant:
+        from modules.model_flux_nf4 import load_flux_nf4
+        _transformer, _text_encoder_2 = load_flux_nf4(file_path)
+        if _transformer is not None:
+            transformer = _transformer
+    elif quant == 'qint8' or quant == 'qint4':
+        _transformer, _text_encoder_2 = load_flux_quanto(file_path)
+        if _transformer is not None:
+            transformer = _transformer
+    elif quant == 'fp8' or quant == 'fp4':
+        _transformer, _text_encoder_2 = load_flux_bnb(file_path, diffusers_load_config)
+        if _transformer is not None:
+            transformer = _transformer
+    else:
+        from diffusers import FluxTransformer2DModel
+        transformer = FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config)
     if transformer is None:
         shared.log.error('Failed to load UNet model')
     return transformer
 
 
-def load_flux(checkpoint_info, diffusers_load_config):
-    if "qint8" in checkpoint_info.path.lower():
-        quant = 'qint8'
-    elif "qint4" in checkpoint_info.path.lower():
-        quant = 'qint4'
-    elif "nf4" in checkpoint_info.path.lower():
-        quant = 'nf4'
-    else:
-        quant = None
-    shared.log.debug(f'Loading FLUX: model="{checkpoint_info.name}" quant={quant}')
+def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_checkpoint change
+    quant = get_quant(checkpoint_info.path)
+    repo_id = sd_models.path_to_repo(checkpoint_info.name)
+    shared.log.debug(f'Loading FLUX: model="{checkpoint_info.name}" repo="{repo_id}" unet="{shared.opts.sd_unet}" t5="{shared.opts.sd_text_encoder}" vae="{shared.opts.sd_vae}" quant={quant} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype}')
+    debug(f'Loading FLUX: config={diffusers_load_config}')
+    modelloader.hf_login()
+
+    transformer = None
+    text_encoder_2 = None
+    vae = None
+
+    # load overrides if any
+    if shared.opts.sd_unet != 'None':
+        try:
+            debug(f'Loading FLUX: unet="{shared.opts.sd_unet}"')
+            from modules import sd_unet
+            _transformer = load_transformer(sd_unet.unet_dict[shared.opts.sd_unet])
+            if _transformer is not None:
+                transformer = _transformer
+        except Exception as e:
+            shared.log.error(f"Loading FLUX: Failed to load UNet: {e}")
+            if debug:
+                from modules import errors
+                errors.display(e, 'FLUX UNet:')
+    if shared.opts.sd_text_encoder != 'None':
+        try:
+            debug(f'Loading FLUX: t5="{shared.opts.sd_text_encoder}"')
+            from modules.model_t5 import load_t5
+            _text_encoder_2 = load_t5(t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir)
+            if _text_encoder_2 is not None:
+                text_encoder_2 = _text_encoder_2
+        except Exception as e:
+            shared.log.error(f"Loading FLUX: Failed to load T5: {e}")
+            if debug:
+                from modules import errors
+                errors.display(e, 'FLUX T5:')
+    if shared.opts.sd_vae != 'None' and shared.opts.sd_vae != 'Automatic':
+        try:
+            debug(f'Loading FLUX: vae="{shared.opts.sd_vae}"')
+            from modules import sd_vae
+            # vae = sd_vae.load_vae_diffusers(None, sd_vae.vae_dict[shared.opts.sd_vae], 'override')
+            vae_file = sd_vae.vae_dict[shared.opts.sd_vae]
+            if os.path.exists(vae_file):
+                vae_config = os.path.join('configs', 'flux', 'vae', 'config.json')
+                vae = diffusers.AutoencoderKL.from_single_file(vae_file, config=vae_config, **diffusers_load_config)
+        except Exception as e:
+            shared.log.error(f"Loading FLUX: Failed to load VAE: {e}")
+            if debug:
+                from modules import errors
+                errors.display(e, 'FLUX VAE:')
+
+    # load quantized components if any
     if quant == 'nf4':
-        from installer import install
-        install('bitsandbytes', quiet=True)
         try:
-            import bitsandbytes # pylint: disable=unused-import
+            from modules.model_flux_nf4 import load_flux_nf4
+            _transformer, _text_encoder = load_flux_nf4(checkpoint_info)
+            if _transformer is not None:
+                transformer = _transformer
+            if _text_encoder is not None:
+                text_encoder_2 = _text_encoder
         except Exception as e:
-            shared.log.error(f"FLUX: Failed to import bitsandbytes: {e}")
-            raise
-        from modules.model_flux_nf4 import load_flux_nf4
-        pipe = load_flux_nf4(checkpoint_info, diffusers_load_config)
-    elif quant == 'qint8' or quant == 'qint4':
-        from installer import install
-        install('optimum-quanto', quiet=True)
+            shared.log.error(f"Loading FLUX: Failed to load NF4 components: {e}")
+            if debug:
+                from modules import errors
+                errors.display(e, 'FLUX NF4:')
+    if quant == 'qint8' or quant == 'qint4':
         try:
-            from optimum import quanto # pylint: disable=no-name-in-module
+            _transformer, _text_encoder = load_flux_quanto(checkpoint_info)
+            if _transformer is not None:
+                transformer = _transformer
+            if _text_encoder is not None:
+                text_encoder_2 = _text_encoder
         except Exception as e:
-            shared.log.error(f"FLUX: Failed to import optimum-quanto: {e}")
-            raise
-        quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs)
-        pipe = diffusers.FluxPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, transformer=None, text_encoder_2=None, **diffusers_load_config)
-        pipe.transformer = load_quanto_transformer(checkpoint_info)
-        pipe.text_encoder_2 = load_quanto_text_encoder_2(checkpoint_info)
-        if pipe.transformer.dtype != devices.dtype:
-            try:
-                pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-            except Exception:
-                shared.log.error(f"FLUX: Failed to cast transformer to {devices.dtype}, set dtype to {pipe.transformer.dtype}")
-                raise
-        if pipe.text_encoder_2.dtype != devices.dtype:
-            try:
-                pipe.text_encoder_2 = pipe.text_encoder_2.to(dtype=devices.dtype)
-            except Exception:
-                shared.log.error(f"FLUX: Failed to cast text encoder to {devices.dtype}, set dtype to {pipe.text_encoder_2.dtype}")
-                raise
-    else:
-        pipe = diffusers.FluxPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
-    if devices.dtype == torch.float16 and not shared.opts.no_half_vae:
-        shared.log.warning("FLUX: does not support FP16 VAE, enabling no-half-vae")
-        shared.opts.no_half_vae = True
-    # from accelerate.utils import compute_module_sizes
-    # shared.log.debug(f'FLUX computed size: {round(compute_module_sizes(pipe.transformer)[""] / 1024 / 1204)}')
+            shared.log.error(f"Loading FLUX: Failed to load Quanto components: {e}")
+            if debug:
+                from modules import errors
+                errors.display(e, 'FLUX Quanto:')
+
+    # initialize pipeline with pre-loaded components
+    components = {}
+    if transformer is not None:
+        components['transformer'] = transformer
+    if text_encoder_2 is not None:
+        components['text_encoder_2'] = text_encoder_2
+    if vae is not None:
+        components['vae'] = vae
+    shared.log.debug(f'Loading FLUX: preloaded={list(components)}')
+    if repo_id == 'sayakpaul/flux.1-dev-nf4':
+        repo_id = 'black-forest-labs/FLUX.1-dev' # workaround since sayakpaul model is missing model_index.json
+    pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **components, **diffusers_load_config)
+    try:
+        diffusers.pipelines.auto_pipeline.AUTO_TEXT2IMAGE_PIPELINES_MAPPING["flux"] = diffusers.FluxPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_IMAGE2IMAGE_PIPELINES_MAPPING["flux"] = diffusers.FluxImg2ImgPipeline
+        diffusers.pipelines.auto_pipeline.AUTO_INPAINT_PIPELINES_MAPPING["flux"] = diffusers.FluxInpaintPipeline
+    except Exception:
+        pass
     return pipe
diff --git a/modules/model_flux_nf4.py b/modules/model_flux_nf4.py
index a28dc3526..1644a667a 100644
--- a/modules/model_flux_nf4.py
+++ b/modules/model_flux_nf4.py
@@ -5,17 +5,31 @@
 import os
 import torch
 import torch.nn as nn
-import bitsandbytes as bnb
 from transformers.quantizers.quantizers_utils import get_module_from_name
 from huggingface_hub import hf_hub_download
 from accelerate import init_empty_weights
 from accelerate.utils import set_module_tensor_to_device
-from diffusers import FluxTransformer2DModel, FluxPipeline
 from diffusers.loaders.single_file_utils import convert_flux_transformer_checkpoint_to_diffusers
 import safetensors.torch
 from modules import shared, devices
 
 
+bnb = None
+debug = os.environ.get('SD_LOAD_DEBUG', None) is not None
+
+
+def load_bnb():
+    from installer import install
+    install('bitsandbytes', quiet=True)
+    try:
+        import bitsandbytes
+        global bnb # pylint: disable=global-statement
+        bnb = bitsandbytes
+    except Exception as e:
+        shared.log.error(f"Loading FLUX: Failed to import bitsandbytes: {e}")
+        raise
+
+
 def _replace_with_bnb_linear(
     model,
     method="nf4",
@@ -148,32 +162,41 @@ def create_quantized_param(
     module._parameters[tensor_name] = new_value # pylint: disable=protected-access
 
 
-def load_flux_nf4(checkpoint_info, diffusers_load_config):
-    repo_path = checkpoint_info.path
+def load_flux_nf4(checkpoint_info):
+    load_bnb()
+    transformer = None
+    text_encoder_2 = None
+    if isinstance(checkpoint_info, str):
+        repo_path = checkpoint_info
+    else:
+        repo_path = checkpoint_info.path
     if os.path.exists(repo_path) and os.path.isfile(repo_path):
         ckpt_path = repo_path
-    if os.path.exists(repo_path) and os.path.isdir(repo_path) and os.path.exists(os.path.join(repo_path, "diffusion_pytorch_model.safetensors")):
+    elif os.path.exists(repo_path) and os.path.isdir(repo_path) and os.path.exists(os.path.join(repo_path, "diffusion_pytorch_model.safetensors")):
         ckpt_path = os.path.join(repo_path, "diffusion_pytorch_model.safetensors")
     else:
         ckpt_path = hf_hub_download(repo_path, filename="diffusion_pytorch_model.safetensors", cache_dir=shared.opts.diffusers_dir)
     original_state_dict = safetensors.torch.load_file(ckpt_path)
 
-    if 'sayakpaul' in checkpoint_info.path:
+    if 'sayakpaul' in repo_path:
         converted_state_dict = original_state_dict # already converted
     else:
         try:
             converted_state_dict = convert_flux_transformer_checkpoint_to_diffusers(original_state_dict)
         except Exception as e:
-            from modules import errors
-            errors.display(e, 'FLUX convert:')
-            raise
+            shared.log.error(f"Loading FLUX: Failed to convert UNET: {e}")
+            if debug:
+                from modules import errors
+                errors.display(e, 'FLUX convert:')
+            converted_state_dict = original_state_dict
 
     with init_empty_weights():
-        config = FluxTransformer2DModel.load_config("black-forest-labs/flux.1-dev", subfolder="transformer")
-        model = FluxTransformer2DModel.from_config(config).to(devices.dtype)
-        expected_state_dict_keys = list(model.state_dict().keys())
+        from diffusers import FluxTransformer2DModel
+        config = FluxTransformer2DModel.load_config(os.path.join('configs', 'flux'), subfolder="transformer")
+        transformer = FluxTransformer2DModel.from_config(config).to(devices.dtype)
+        expected_state_dict_keys = list(transformer.state_dict().keys())
 
-    _replace_with_bnb_linear(model, "nf4")
+    _replace_with_bnb_linear(transformer, "nf4")
 
     for param_name, param in converted_state_dict.items():
         if param_name not in expected_state_dict_keys:
@@ -181,12 +204,11 @@ def load_flux_nf4(checkpoint_info, diffusers_load_config):
         is_param_float8_e4m3fn = hasattr(torch, "float8_e4m3fn") and param.dtype == torch.float8_e4m3fn
         if torch.is_floating_point(param) and not is_param_float8_e4m3fn:
             param = param.to(devices.dtype)
-        if not check_quantized_param(model, param_name):
-            set_module_tensor_to_device(model, param_name, device=0, value=param)
+        if not check_quantized_param(transformer, param_name):
+            set_module_tensor_to_device(transformer, param_name, device=0, value=param)
         else:
-            create_quantized_param(model, param, param_name, target_device=0, state_dict=original_state_dict, pre_quantized=True)
+            create_quantized_param(transformer, param, param_name, target_device=0, state_dict=original_state_dict, pre_quantized=True)
 
     del original_state_dict
-    pipe = FluxPipeline.from_pretrained("black-forest-labs/flux.1-dev", transformer=model, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
     devices.torch_gc(force=True)
-    return pipe
+    return transformer, text_encoder_2
diff --git a/modules/model_t5.py b/modules/model_t5.py
index 2e1ff06c9..4d082d2d4 100644
--- a/modules/model_t5.py
+++ b/modules/model_t5.py
@@ -1,67 +1,60 @@
+import os
+import json
 import torch
 import transformers
+from safetensors.torch import load_file
+from modules import shared, devices, files_cache
+from installer import install
+
+
+t5_dict = {}
 
 
 def load_t5(t5=None, cache_dir=None):
-    from modules import devices, modelloader
+    from modules import modelloader
+    modelloader.hf_login()
     repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
-    if 'fp16' in t5.lower():
-        modelloader.hf_login()
-        t5 = transformers.T5EncoderModel.from_pretrained(
-            repo_id,
-            subfolder='text_encoder_3',
-            # torch_dtype=dtype,
-            cache_dir=cache_dir,
-            torch_dtype=devices.dtype,
-        )
+    fn = t5_dict.get(t5) if t5 in t5_dict else None
+    if fn is not None:
+        from accelerate.utils import set_module_tensor_to_device
+        with open(os.path.join('configs', 'flux', 'text_encoder_2', 'config.json'), encoding='utf8') as f:
+            t5_config = transformers.T5Config(**json.load(f))
+        state_dict = load_file(fn)
+        dtype = state_dict['encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight'].dtype
+        with torch.device("meta"):
+            t5 = transformers.T5EncoderModel(t5_config).to(dtype=dtype)
+        for param_name, param in state_dict.items():
+            is_param_float8_e4m3fn = hasattr(torch, "float8_e4m3fn") and param.dtype == torch.float8_e4m3fn
+            if torch.is_floating_point(param) and not is_param_float8_e4m3fn:
+                param = param.to(devices.dtype)
+                set_module_tensor_to_device(t5, param_name, device=0, value=param)
+        t5.eval()
+        if t5.dtype != devices.dtype:
+            try:
+                t5 = t5.to(dtype=devices.dtype)
+            except Exception:
+                shared.log.error(f"FLUX: Failed to cast text encoder to {devices.dtype}, set dtype to {t5.dtype}")
+                raise
+    elif 'fp16' in t5.lower():
+        t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', cache_dir=cache_dir, torch_dtype=devices.dtype)
     elif 'fp4' in t5.lower():
-        modelloader.hf_login()
-        from installer import install
         install('bitsandbytes', quiet=True)
         quantization_config = transformers.BitsAndBytesConfig(load_in_4bit=True)
-        t5 = transformers.T5EncoderModel.from_pretrained(
-            repo_id,
-            subfolder='text_encoder_3',
-            quantization_config=quantization_config,
-            cache_dir=cache_dir,
-            torch_dtype=devices.dtype,
-        )
+        t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', quantization_config=quantization_config, cache_dir=cache_dir, torch_dtype=devices.dtype)
     elif 'fp8' in t5.lower():
-        modelloader.hf_login()
-        from installer import install
         install('bitsandbytes', quiet=True)
         quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)
-        t5 = transformers.T5EncoderModel.from_pretrained(
-            repo_id,
-            subfolder='text_encoder_3',
-            quantization_config=quantization_config,
-            cache_dir=cache_dir,
-            torch_dtype=devices.dtype,
-        )
+        t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', quantization_config=quantization_config, cache_dir=cache_dir, torch_dtype=devices.dtype)
     elif 'qint8' in t5.lower():
-        modelloader.hf_login()
-        from installer import install
         install('optimum-quanto', quiet=True)
         from modules.sd_models_compile import optimum_quanto_model
-        t5 = transformers.T5EncoderModel.from_pretrained(
-            repo_id,
-            subfolder='text_encoder_3',
-            cache_dir=cache_dir,
-            torch_dtype=devices.dtype,
-        )
+        t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', cache_dir=cache_dir, torch_dtype=devices.dtype)
         t5 = optimum_quanto_model(t5, weights="qint8", activations="none")
     elif 'int8' in t5.lower():
-        modelloader.hf_login()
-        from installer import install
         install('nncf==2.7.0', quiet=True)
         from modules.sd_models_compile import nncf_compress_model
         from modules.sd_hijack import NNCF_T5DenseGatedActDense
-        t5 = transformers.T5EncoderModel.from_pretrained(
-            repo_id,
-            subfolder='text_encoder_3',
-            cache_dir=cache_dir,
-            torch_dtype=devices.dtype,
-        )
+        t5 = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder='text_encoder_3', cache_dir=cache_dir, torch_dtype=devices.dtype)
         for i in range(len(t5.encoder.block)):
             t5.encoder.block[i].layer[1].DenseReluDense = NNCF_T5DenseGatedActDense(
                 t5.encoder.block[i].layer[1].DenseReluDense,
@@ -74,10 +67,11 @@ def load_t5(t5=None, cache_dir=None):
 
 
 def set_t5(pipe, module, t5=None, cache_dir=None):
-    from modules import devices, shared
     if pipe is None or not hasattr(pipe, module):
         return pipe
     t5 = load_t5(t5=t5, cache_dir=cache_dir)
+    if module == "text_encoder_2" and t5 is None: # do not unload te2
+        return None
     setattr(pipe, module, t5)
     if shared.opts.diffusers_offload_mode == "sequential":
         from accelerate import cpu_offload
@@ -90,3 +84,11 @@ def set_t5(pipe, module, t5=None, cache_dir=None):
         pipe.maybe_free_model_hooks()
     devices.torch_gc()
     return pipe
+
+
+def refresh_t5_list():
+    t5_dict.clear()
+    for file in files_cache.list_files(shared.opts.t5_dir, ext_filter=[".safetensors"]):
+        name = os.path.splitext(os.path.basename(file))[0]
+        t5_dict[name] = file
+    shared.log.debug(f'Available T5s: path="{shared.opts.t5_dir}" items={len(t5_dict)}')
diff --git a/modules/modeldata.py b/modules/modeldata.py
index f002ca7bd..52895857d 100644
--- a/modules/modeldata.py
+++ b/modules/modeldata.py
@@ -103,8 +103,10 @@ def sd_model_type(self):
                 model_type = 'sc'
             elif "AuraFlow" in self.sd_model.__class__.__name__:
                 model_type = 'auraflow'
-            elif "FluxPipeline" in self.sd_model.__class__.__name__:
+            elif "Flux" in self.sd_model.__class__.__name__:
                 model_type = 'f1'
+            elif "CogVideo" in self.sd_model.__class__.__name__:
+                model_type = 'cogvideox'
             else:
                 model_type = self.sd_model.__class__.__name__
         except Exception:
diff --git a/modules/paths.py b/modules/paths.py
index 763eddd31..e71e8e917 100644
--- a/modules/paths.py
+++ b/modules/paths.py
@@ -101,6 +101,7 @@ def fix_path(folder):
     create_path(fix_path('diffusers_dir'))
     create_path(fix_path('vae_dir'))
     create_path(fix_path('unet_dir'))
+    create_path(fix_path('t5_dir'))
     create_path(fix_path('lora_dir'))
     create_path(fix_path('embeddings_dir'))
     create_path(fix_path('hypernetwork_dir'))
diff --git a/modules/processing.py b/modules/processing.py
index 5c7390903..9cca323a4 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -8,6 +8,7 @@
 from modules.sd_hijack_hypertile import context_hypertile_vae, context_hypertile_unet
 from modules.processing_class import StableDiffusionProcessing, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, StableDiffusionProcessingControl # pylint: disable=unused-import
 from modules.processing_info import create_infotext
+from modules.modeldata import model_data
 from modules import pag
 
 
@@ -35,23 +36,23 @@
 class Processed:
     def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
         self.images = images_list
-        self.prompt = p.prompt
-        self.negative_prompt = p.negative_prompt
-        self.seed = seed
+        self.prompt = p.prompt or ''
+        self.negative_prompt = p.negative_prompt or ''
+        self.seed = seed if seed != -1 else p.seed
         self.subseed = subseed
         self.subseed_strength = p.subseed_strength
         self.info = info
-        self.comments = comments
+        self.comments = comments or ''
         self.width = p.width if hasattr(p, 'width') else (self.images[0].width if len(self.images) > 0 else 0)
         self.height = p.height if hasattr(p, 'height') else (self.images[0].height if len(self.images) > 0 else 0)
-        self.sampler_name = p.sampler_name
-        self.cfg_scale = p.cfg_scale
-        self.image_cfg_scale = p.image_cfg_scale
-        self.steps = p.steps
-        self.batch_size = p.batch_size
-        self.restore_faces = p.restore_faces
+        self.sampler_name = p.sampler_name or ''
+        self.cfg_scale = p.cfg_scale or 0
+        self.image_cfg_scale = p.image_cfg_scale or 0
+        self.steps = p.steps or 0
+        self.batch_size = max(1, p.batch_size)
+        self.restore_faces = p.restore_faces or False
         self.face_restoration_model = shared.opts.face_restoration_model if p.restore_faces else None
-        self.sd_model_hash = getattr(shared.sd_model, 'sd_model_hash', '')
+        self.sd_model_hash = getattr(shared.sd_model, 'sd_model_hash', '') if model_data.sd_model is not None else ''
         self.seed_resize_from_w = p.seed_resize_from_w
         self.seed_resize_from_h = p.seed_resize_from_h
         self.denoising_strength = p.denoising_strength
@@ -114,7 +115,6 @@ def infotext(self, p: StableDiffusionProcessing, index):
         return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
 
 
-
 def process_images(p: StableDiffusionProcessing) -> Processed:
     debug(f'Process images: {vars(p)}')
     if not hasattr(p.sd_model, 'sd_checkpoint_info'):
diff --git a/modules/processing_args.py b/modules/processing_args.py
index d08783256..ef7e9f30f 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -34,6 +34,10 @@ def task_specific_kwargs(p, model):
             'image': p.init_images,
             'strength': p.denoising_strength,
         }
+        if model.__class__.__name__ == 'FluxImg2ImgPipeline': # needs explicit width/height
+            p.width = 8 * math.ceil(p.init_images[0].width / 8)
+            p.height = 8 * math.ceil(p.init_images[0].height / 8)
+            task_args['width'], task_args['height'] = p.width, p.height
     elif sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.INSTRUCT and len(getattr(p, 'init_images', [])) > 0:
         p.ops.append('instruct')
         task_args = {
@@ -229,6 +233,15 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
             args['cross_attention_kwargs'] = {}
         args['cross_attention_kwargs'][k] = v
 
+    # handle missing resolution
+    if args.get('image', None) is not None and ('width' not in args or 'height' not in args):
+        if isinstance(args['image'], torch.Tensor) or isinstance(args['image'], np.ndarray):
+            args['width'] = 8 * args['image'].shape[-1]
+            args['height'] = 8 * args['image'].shape[-2]
+        else:
+            args['width'] = 8 * math.ceil(args['image'][0].width / 8)
+            args['height'] = 8 * math.ceil(args['image'][0].height / 8)
+
     # handle implicit controlnet
     if 'control_image' in possible and 'control_image' not in args and 'image' in args:
         debug('Diffusers: set control image')
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index a9c7ab5e8..33416f103 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -81,11 +81,21 @@ def diffusers_callback(pipe, step: int, timestep: int, kwargs: dict):
         else:
             pipe._guidance_scale = 0.0  # pylint: disable=protected-access
             for key in {"prompt_embeds", "negative_prompt_embeds", "add_text_embeds", "add_time_ids"} & set(kwargs):
-                kwargs[key] = kwargs[key].chunk(2)[-1]
-    if hasattr(pipe, "_unpack_latents") and hasattr(pipe, "vae_scale_factor"): # FLUX
-        shared.state.current_latent = pipe._unpack_latents(kwargs['latents'], p.height, p.width, pipe.vae_scale_factor) # pylint: disable=protected-access
-    else:
-        shared.state.current_latent = kwargs['latents']
+                if kwargs[key] is not None:
+                    kwargs[key] = kwargs[key].chunk(2)[-1]
+    try:
+        if hasattr(pipe, "_unpack_latents") and hasattr(pipe, "vae_scale_factor"): # FLUX
+            if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5) and p.is_hr_pass:
+                width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
+                height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
+            else:
+                width = getattr(p, 'width', 0)
+                height = getattr(p, 'height', 0)
+            shared.state.current_latent = pipe._unpack_latents(kwargs['latents'], height, width, pipe.vae_scale_factor) # pylint: disable=protected-access
+        else:
+            shared.state.current_latent = kwargs['latents']
+    except Exception as e:
+        shared.log.error(f'Callback: {e}')
     if shared.cmd_opts.profile and shared.profiler is not None:
         shared.profiler.step()
     return kwargs
diff --git a/modules/processing_class.py b/modules/processing_class.py
index ad083ec5a..43cb3407b 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -20,7 +20,7 @@ class StableDiffusionProcessing:
     """
     The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
     """
-    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, hr_sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, image_cfg_scale: float = None, clip_skip: int = 1, width: int = 512, height: int = 512, full_quality: bool = True, restore_faces: bool = False, tiling: bool = False, hidiffusion: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1, resize_mode: int = 0, resize_name: str = 'None', scale_by: float = 0, selected_scale_tab: int = 0, hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95, hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None): # pylint: disable=unused-argument
+    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, hr_sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, image_cfg_scale: float = None, clip_skip: int = 1, width: int = 512, height: int = 512, full_quality: bool = True, restore_faces: bool = False, tiling: bool = False, hidiffusion: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1, resize_mode: int = 0, resize_name: str = 'None', resize_context: str = 'None', scale_by: float = 0, selected_scale_tab: int = 0, hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95, hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None): # pylint: disable=unused-argument
         self.outpath_samples: str = outpath_samples
         self.outpath_grids: str = outpath_grids
         self.prompt: str = prompt
@@ -81,6 +81,8 @@ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prom
         self.enable_hr = None
         self.hr_scale = None
         self.hr_upscaler = None
+        self.hr_resize_mode = 0
+        self.hr_resize_context = 'None'
         self.hr_resize_x = 0
         self.hr_resize_y = 0
         self.hr_upscale_to_x = 0
@@ -95,6 +97,7 @@ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prom
         self.ops = []
         self.resize_mode: int = resize_mode
         self.resize_name: str = resize_name
+        self.resize_context: str = resize_context
         self.ddim_discretize = shared.opts.ddim_discretize
         self.s_min_uncond = shared.opts.s_min_uncond
         self.s_churn = shared.opts.s_churn
@@ -169,7 +172,7 @@ def script_args(self, value):
 
     def setup_scripts(self):
         self.scripts_setup_complete = True
-        self.scripts.setup_scrips(self, is_ui=not self.is_api)
+        self.scripts.setup_scripts()
 
     def comment(self, text):
         self.comments[text] = 1
@@ -186,13 +189,15 @@ def close(self):
 
 class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
-    def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_force: bool = False, hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs):
+    def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_force: bool = False, hr_resize_mode: int = 0, hr_resize_context: str = 'None', hr_upscaler: str = None, hr_second_pass_steps: int = 0, hr_resize_x: int = 0, hr_resize_y: int = 0, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs):
 
         super().__init__(**kwargs)
         self.enable_hr = enable_hr
         self.denoising_strength = denoising_strength
         self.hr_scale = hr_scale
         self.hr_upscaler = hr_upscaler
+        self.hr_resize_mode = hr_resize_mode
+        self.hr_resize_context = hr_resize_context
         self.hr_force = hr_force
         self.hr_second_pass_steps = hr_second_pass_steps
         self.hr_resize_x = hr_resize_x
@@ -240,6 +245,9 @@ def init_hr(self, scale = None, upscaler = None, force = False): # pylint: disab
             elif self.hr_resize_x == 0:
                 self.hr_upscale_to_x = self.hr_resize_y * self.width // self.height
                 self.hr_upscale_to_y = self.hr_resize_y
+            elif self.hr_resize_x > 0 and self.hr_resize_y > 0 and shared.native:
+                self.hr_upscale_to_x = self.hr_resize_x
+                self.hr_upscale_to_y = self.hr_resize_y
             else:
                 target_w = self.hr_resize_x
                 target_h = self.hr_resize_y
@@ -260,7 +268,7 @@ def init_hr(self, scale = None, upscaler = None, force = False): # pylint: disab
             self.is_hr_pass = True
             hypertile_set(self, hr=True)
             shared.state.job_count = 2 * self.n_iter
-            shared.log.debug(f'Init hires: upscaler="{self.hr_upscaler}" sampler="{self.hr_sampler_name}" resize={self.hr_resize_x}x{self.hr_resize_y} upscale={self.hr_upscale_to_x}x{self.hr_upscale_to_y}')
+        shared.log.debug(f'Init hires: upscaler="{self.hr_upscaler}" sampler="{self.hr_sampler_name}" resize={self.hr_resize_x}x{self.hr_resize_y} upscale={self.hr_upscale_to_x}x{self.hr_upscale_to_y}')
 
     def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
         from modules import processing_original
@@ -269,11 +277,12 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs
 
 class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
 
-    def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: str = 'None', denoising_strength: float = 0.3, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = False, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, scale_by: float = 1, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs):
+    def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name: str = 'None', resize_context: str = 'None', denoising_strength: float = 0.3, image_cfg_scale: float = None, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = False, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, scale_by: float = 1, refiner_steps: int = 5, refiner_start: float = 0, refiner_prompt: str = '', refiner_negative: str = '', **kwargs):
         super().__init__(**kwargs)
         self.init_images = init_images
         self.resize_mode: int = resize_mode
         self.resize_name: str = resize_name
+        self.resize_context: str = resize_context
         self.denoising_strength: float = denoising_strength
         self.hr_denoising_strength: float = denoising_strength
         self.image_cfg_scale: float = image_cfg_scale
@@ -304,6 +313,11 @@ def __init__(self, init_images: list = None, resize_mode: int = 0, resize_name:
         self.script_args = []
 
     def init(self, all_prompts=None, all_seeds=None, all_subseeds=None):
+        if hasattr(self, 'init_images') and self.init_images is not None and len(self.init_images) > 0:
+            if self.width is None or self.width == 0:
+                self.width = int(8 * (self.init_images[0].width * self.scale_by // 8))
+            if self.height is None or self.height == 0:
+                self.height = int(8 * (self.init_images[0].height * self.scale_by // 8))
         if shared.native and getattr(self, 'image_mask', None) is not None:
             shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.INPAINTING)
         elif shared.native and getattr(self, 'init_images', None) is not None:
@@ -383,10 +397,8 @@ def init(self, all_prompts=None, all_seeds=None, all_subseeds=None):
             if shared.opts.save_init_img:
                 images.save_image(img, path=shared.opts.outdir_init_images, basename=None, forced_filename=self.init_img_hash, suffix="-init-image")
             image = images.flatten(img, shared.opts.img2img_background_color)
-            if self.width is None or self.height is None:
-                self.width, self.height = image.width, image.height
             if crop_region is None and self.resize_mode > 0:
-                image = images.resize_image(self.resize_mode, image, self.width, self.height, self.resize_name)
+                image = images.resize_image(self.resize_mode, image, self.width, self.height, upscaler_name=self.resize_name, context=self.resize_context)
                 self.width = image.width
                 self.height = image.height
             if self.image_mask is not None and shared.opts.mask_apply_overlay:
@@ -456,6 +468,7 @@ def __init__(self, **kwargs):
         self.controlnet_conditioning_scale = None
         self.control_guidance_start = None
         self.control_guidance_end = None
+        self.control_mode = None
         self.reference_attn = None
         self.reference_adain = None
         self.attention_auto_machine_weight = None
@@ -495,15 +508,19 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs
     def init_hr(self, scale = None, upscaler = None, force = False):
         scale = scale or self.scale_by
         upscaler = upscaler or self.resize_name
-        if upscaler == 'None' or scale == 1.0:
+        use_scale = self.hr_resize_x == 0 or self.hr_resize_y == 0
+        if upscaler == 'None' or (use_scale and scale == 1.0):
             return
         self.is_hr_pass = True
         self.hr_force = force
         self.hr_upscaler = upscaler
-        self.hr_upscale_to_x, self.hr_upscale_to_y = 8 * int(self.width * scale / 8), 8 * int(self.height * scale / 8)
+        if use_scale:
+            self.hr_upscale_to_x, self.hr_upscale_to_y = 8 * int(self.width * scale / 8), 8 * int(self.height * scale / 8)
+        else:
+            self.hr_upscale_to_x, self.hr_upscale_to_y = self.hr_resize_x, self.hr_resize_y
         # hypertile_set(self, hr=True)
         shared.state.job_count = 2 * self.n_iter
-        shared.log.debug(f'Control hires: upscaler="{self.hr_upscaler}" upscale={scale} size={self.hr_upscale_to_x}x{self.hr_upscale_to_y}')
+        shared.log.debug(f'Control hires: upscaler="{self.hr_upscaler}" scale={scale} fixed={not use_scale} size={self.hr_upscale_to_x}x{self.hr_upscale_to_y}')
 
 
 def switch_class(p: StableDiffusionProcessing, new_class: type, dct: dict = None):
diff --git a/modules/processing_correction.py b/modules/processing_correction.py
index 6afaaa8d3..c52f30ab3 100644
--- a/modules/processing_correction.py
+++ b/modules/processing_correction.py
@@ -85,8 +85,7 @@ def correction(p, timestep, latent):
     if timestep > 950 and p.hdr_clamp:
         p.extra_generation_params["HDR clamp"] = f'{p.hdr_threshold}/{p.hdr_boundary}'
         latent = soft_clamp_tensor(latent, threshold=p.hdr_threshold, boundary=p.hdr_boundary)
-    if 500 < timestep < 800 and (p.hdr_brightness != 0 or p.hdr_color != 0 or p.hdr_tint_ratio != 0):
-        p.extra_generation_params["HDR center"] = f'{p.hdr_color}/{p.hdr_brightness}'
+    if 600 < timestep < 900 and (p.hdr_color != 0 or p.hdr_tint_ratio != 0):
         if p.hdr_brightness != 0:
             latent[0:1] = center_tensor(latent[0:1], full_shift=float(p.hdr_mode), offset=2*p.hdr_brightness)  # Brightness
             p.extra_generation_params["HDR brightness"] = f'{p.hdr_brightness}'
@@ -98,6 +97,11 @@ def correction(p, timestep, latent):
         if p.hdr_tint_ratio != 0:
             latent = color_adjust(latent, p.hdr_color_picker, p.hdr_tint_ratio)
             p.hdr_tint_ratio = 0
+    if timestep < 200 and (p.hdr_brightness != 0): # do it late so it doesn't change the composition
+        if p.hdr_brightness != 0:
+            latent[0:1] = center_tensor(latent[0:1], full_shift=float(p.hdr_mode), offset=2*p.hdr_brightness)  # Brightness
+            p.extra_generation_params["HDR brightness"] = f'{p.hdr_brightness}'
+            p.hdr_brightness = 0
     if timestep < 350 and p.hdr_sharpen != 0:
         p.extra_generation_params["HDR sharpen"] = f'{p.hdr_sharpen}'
         per_step_ratio = 2 ** (timestep / 250) * p.hdr_sharpen / 16
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index a5714e775..2233a0798 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -26,20 +26,6 @@ def is_refiner_enabled():
         return p.enable_hr and p.refiner_steps > 0 and p.refiner_start > 0 and p.refiner_start < 1 and shared.sd_refiner is not None
 
     def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
-        """
-        import diffusers
-        if p.sag_scale > 0 and is_txt2img():
-            update_sampler(shared.sd_model)
-            supported = ['DDIMScheduler', 'PNDMScheduler', 'DDPMScheduler', 'DEISMultistepScheduler', 'UniPCMultistepScheduler', 'DPMSolverMultistepScheduler', 'DPMSolverSinlgestepScheduler']
-            if hasattr(sd_model, 'sfast'):
-                shared.log.warning(f'SAG incompatible compile mode: backend={shared.opts.cuda_compile_backend}')
-            elif sd_model.scheduler.__class__.__name__ in supported:
-                sd_model = sd_models.switch_pipe(diffusers.StableDiffusionSAGPipeline, sd_model)
-                p.extra_generation_params["SAG scale"] = p.sag_scale
-                p.task_args['sag_scale'] = p.sag_scale
-            else:
-                shared.log.warning(f'SAG incompatible scheduler: current={sd_model.scheduler.__class__.__name__} supported={supported}')
-        """
         if sd_models.get_diffusers_task(sd_model) == sd_models.DiffusersTaskType.INPAINTING and getattr(p, 'image_mask', None) is None and p.task_args.get('image_mask', None) is None and getattr(p, 'mask', None) is None:
             shared.log.warning('Processing: mode=inpaint mask=None')
             sd_model = sd_models.set_diffuser_pipe(sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
@@ -167,8 +153,8 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
             shared.sd_model.restore_pipeline()
 
         # upscale
-        if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_upscaler is not None and p.hr_upscaler != 'None':
-            shared.log.info(f'Upscale: upscaler="{p.hr_upscaler}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}')
+        if hasattr(p, 'height') and hasattr(p, 'width') and p.hr_resize_mode >0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
+            shared.log.info(f'Upscale: mode={p.hr_resize_mode} upscaler="{p.hr_upscaler}" context="{p.hr_resize_context}" resize={p.hr_resize_x}x{p.hr_resize_y} upscale={p.hr_upscale_to_x}x{p.hr_upscale_to_y}')
             p.ops.append('upscale')
             if shared.opts.save and not p.do_not_save_samples and shared.opts.save_images_before_highres_fix and hasattr(shared.sd_model, 'vae'):
                 save_intermediate(p, latents=output.images, suffix="-before-hires")
@@ -185,13 +171,19 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
             p.hr_force = True
 
         # hires
+        p.denoising_strength = getattr(p, 'hr_denoising_strength', p.denoising_strength)
+        if p.hr_force and p.denoising_strength == 0:
+            shared.log.warning('HiRes skip: denoising=0')
+            p.hr_force = False
         if p.hr_force:
             shared.state.job_count = 2 * p.n_iter
             shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
             shared.log.info(f'HiRes: class={shared.sd_model.__class__.__name__} sampler="{p.hr_sampler_name}"')
+            if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__:
+                output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
             if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
                 if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
-                    output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil') # controlnet cannnot deal with latent input
+                    output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
                     p.task_args['image'] = output.images # replace so hires uses new output
             sd_models.move_model(shared.sd_model, devices.device)
             orig_denoise = p.denoising_strength
@@ -256,8 +248,8 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
             image = output.images[i]
             noise_level = round(350 * p.denoising_strength)
             output_type='latent' if hasattr(shared.sd_refiner, 'vae') else 'np'
-            if shared.sd_refiner.__class__.__name__ == 'StableDiffusionUpscalePipeline':
-                image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil')
+            if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__:
+                image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
                 p.extra_generation_params['Noise level'] = noise_level
                 output_type = 'np'
             if hasattr(p, 'task_args') and p.task_args.get('image', None) is not None and output is not None: # replace input with output so it can be used by hires/refine
@@ -294,7 +286,7 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
                 shared.log.info(e)
 
             if not shared.state.interrupted and not shared.state.skipped:
-                refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True)
+                refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True, width=max(p.width, p.hr_upscale_to_x), height=max(p.height, p.hr_upscale_to_y))
                 for refiner_image in refiner_images:
                     results.append(refiner_image)
 
@@ -313,12 +305,14 @@ def update_pipeline(sd_model, p: processing.StableDiffusionProcessing):
             if not hasattr(output, 'images') and hasattr(output, 'frames'):
                 shared.log.debug(f'Generated: frames={len(output.frames[0])}')
                 output.images = output.frames[0]
-            if hasattr(shared.sd_model, "_unpack_latents") and hasattr(shared.sd_model, "vae_scale_factor"): # FLUX
-                output.images = shared.sd_model._unpack_latents(output.images, p.height, p.width, shared.sd_model.vae_scale_factor) # pylint: disable=protected-access
-            if torch.is_tensor(output.images) and len(output.images) > 0 and any(s >= 512 for s in output.images.shape):
-                results = output.images.float().cpu().numpy()
-            elif hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
-                results = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality)
+            if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
+                if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
+                    width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
+                    height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
+                else:
+                    width = getattr(p, 'width', 0)
+                    height = getattr(p, 'height', 0)
+                results = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, width=width, height=height)
             elif hasattr(output, 'images'):
                 results = output.images
             else:
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index 47839ed43..82f68ca35 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -196,7 +196,8 @@ def decode_first_stage(model, x, full_quality=True):
         try:
             if full_quality:
                 if hasattr(model, 'decode_first_stage'):
-                    x_sample = model.decode_first_stage(x) * 0.5 + 0.5
+                    # x_sample = model.decode_first_stage(x) * 0.5 + 0.5
+                    x_sample = model.decode_first_stage(x)
                 elif hasattr(model, 'vae'):
                     x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np', full_quality=full_quality)
                 else:
@@ -382,20 +383,20 @@ def resize_init_images(p):
 def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else latent
     if not torch.is_tensor(latents):
         shared.log.warning('Hires: input is not tensor')
-        first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil')
+        first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
         return first_pass_images
     latent_upscaler = shared.latent_upscale_modes.get(p.hr_upscaler, None)
     # shared.log.info(f'Hires: upscaler={p.hr_upscaler} width={p.hr_upscale_to_x} height={p.hr_upscale_to_y} images={latents.shape[0]}')
     if latent_upscaler is not None:
         return torch.nn.functional.interpolate(latents, size=(p.hr_upscale_to_y // 8, p.hr_upscale_to_x // 8), mode=latent_upscaler["mode"], antialias=latent_upscaler["antialias"])
-    first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil')
+    first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
     if p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0 and hasattr(p, 'init_hr'):
         shared.log.error('Hires: missing upscaling dimensions')
         return first_pass_images
     resized_images = []
     for img in first_pass_images:
         if latent_upscaler is None:
-            resized_image = images.resize_image(1, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler)
+            resized_image = images.resize_image(p.hr_resize_mode, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
         else:
             resized_image = img
         resized_images.append(resized_image)
@@ -530,7 +531,7 @@ def save_intermediate(p, latents, suffix):
     for i in range(len(latents)):
         from modules.processing import create_infotext
         info=create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, [], iteration=p.iteration, position_in_batch=i)
-        decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality)
+        decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality, width=p.width, height=p.height)
         for j in range(len(decoded)):
             images.save_image(decoded[j], path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix=suffix)
 
diff --git a/modules/processing_info.py b/modules/processing_info.py
index 95e573f21..ac05aa58d 100644
--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@@ -71,13 +71,16 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
         args["Variation seed"] = all_subseeds[index] if p.subseed_strength > 0 else None
         args["Variation strength"] = p.subseed_strength if p.subseed_strength > 0 else None
     if 'hires' in p.ops or 'upscale' in p.ops:
+        is_resize = p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5)
         args["Second pass"] = p.enable_hr
         args["Hires force"] = p.hr_force
         args["Hires steps"] = p.hr_second_pass_steps
-        args["Hires upscaler"] = p.hr_upscaler if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None
-        args["Hires upscale"] = p.hr_scale if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None
-        args["Hires resize"] = f"{p.hr_resize_x}x{p.hr_resize_y}" if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None
-        args["Hires size"] = f"{p.hr_upscale_to_x}x{p.hr_upscale_to_y}" if p.hr_upscaler is not None and p.hr_upscaler != 'None' else None
+        args["HiRes resize mode"] = p.hr_resize_mode if is_resize else None
+        args["HiRes resize context"] = p.hr_resize_context if p.hr_resize_mode == 5 else None
+        args["Hires upscaler"] = p.hr_upscaler if is_resize else None
+        args["Hires scale"] = p.hr_scale if is_resize else None
+        args["Hires resize"] = f"{p.hr_resize_x}x{p.hr_resize_y}" if is_resize else None
+        args["Hires size"] = f"{p.hr_upscale_to_x}x{p.hr_upscale_to_y}" if is_resize else None
         args["Denoising strength"] = p.denoising_strength
         args["Hires sampler"] = p.hr_sampler_name
         args["Image CFG scale"] = p.image_cfg_scale
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index e5108f0d3..9ab4acad9 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -5,8 +5,9 @@
 from modules import shared, devices, sd_models, sd_vae, sd_vae_taesd
 
 
-debug = shared.log.trace if os.environ.get('SD_VAE_DEBUG', None) is not None else lambda *args, **kwargs: None
-debug('Trace: VAE')
+debug = os.environ.get('SD_VAE_DEBUG', None) is not None
+log_debug = shared.log.trace if debug else lambda *args, **kwargs: None
+log_debug('Trace: VAE')
 
 
 def create_latents(image, p, dtype=None, device=None):
@@ -33,6 +34,10 @@ def create_latents(image, p, dtype=None, device=None):
 
 def full_vae_decode(latents, model):
     t0 = time.time()
+    if debug:
+        devices.torch_gc(force=True)
+        shared.mem_mon.reset()
+    base_device = None
     if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False):
         base_device = sd_models.move_base(model, devices.cpu)
     if shared.opts.diffusers_offload_mode == "balanced":
@@ -66,7 +71,7 @@ def full_vae_decode(latents, model):
     decoded = model.vae.decode(latents, return_dict=False)[0]
 
     # delete vae after OpenVINO compile
-    if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae:
+    if 'VAE' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae:
         shared.compiled_model_state.first_pass_vae = False
         if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vae"):
             model.vae.apply(sd_models.convert_to_faketensors)
@@ -77,14 +82,16 @@ def full_vae_decode(latents, model):
     elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None:
         sd_models.move_base(model, base_device)
     t1 = time.time()
-    debug(f'VAE decode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={upcast} images={latents.shape[0]} latents={latents.shape} time={round(t1-t0, 3)}')
+    if debug:
+        log_debug(f'VAE memory: {shared.mem_mon.read()}')
+    log_debug(f'VAE decode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={upcast} slicing={getattr(model.vae, "use_slicing", None)} tiling={getattr(model.vae, "use_tiling", None)} images={latents.shape[0]} latents={latents.shape} time={round(t1-t0, 3)}')
     return decoded
 
 
 def full_vae_encode(image, model):
-    debug(f'VAE encode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={model.vae.config.get("force_upcast", None)}')
+    log_debug(f'VAE encode: name={sd_vae.loaded_vae_file if sd_vae.loaded_vae_file is not None else "baked"} dtype={model.vae.dtype} upcast={model.vae.config.get("force_upcast", None)}')
     if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and hasattr(model, 'unet'):
-        debug('Moving to CPU: model=UNet')
+        log_debug('Moving to CPU: model=UNet')
         unet_device = model.unet.device
         sd_models.move_model(model.unet, devices.cpu)
     if not shared.opts.diffusers_offload_mode == "sequential" and hasattr(model, 'vae'):
@@ -96,10 +103,10 @@ def full_vae_encode(image, model):
 
 
 def taesd_vae_decode(latents):
-    debug(f'VAE decode: name=TAESD images={len(latents)} latents={latents.shape} slicing={shared.opts.diffusers_vae_slicing}')
+    log_debug(f'VAE decode: name=TAESD images={len(latents)} latents={latents.shape} slicing={shared.opts.diffusers_vae_slicing}')
     if len(latents) == 0:
         return []
-    if shared.opts.diffusers_vae_slicing:
+    if shared.opts.diffusers_vae_slicing and len(latents) > 1:
         decoded = torch.zeros((len(latents), 3, latents.shape[2] * 8, latents.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
         for i in range(latents.shape[0]):
             decoded[i] = sd_vae_taesd.decode(latents[i])
@@ -109,15 +116,16 @@ def taesd_vae_decode(latents):
 
 
 def taesd_vae_encode(image):
-    debug(f'VAE encode: name=TAESD image={image.shape}')
+    log_debug(f'VAE encode: name=TAESD image={image.shape}')
     encoded = sd_vae_taesd.encode(image)
     return encoded
 
 
-def vae_decode(latents, model, output_type='np', full_quality=True):
+def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None):
     t0 = time.time()
     prev_job = shared.state.job
     shared.state.job = 'VAE'
+    decoded = None
     if not torch.is_tensor(latents): # already decoded
         return latents
     if latents.shape[0] == 0:
@@ -128,24 +136,30 @@ def vae_decode(latents, model, output_type='np', full_quality=True):
     if not hasattr(model, 'vae'):
         shared.log.error('VAE not found in model')
         return []
+
+    if hasattr(model, "_unpack_latents") and hasattr(model, "vae_scale_factor") and width is not None and height is not None: # FLUX
+        latents = model._unpack_latents(latents, height, width, model.vae_scale_factor) # pylint: disable=protected-access
     if len(latents.shape) == 3: # lost a batch dim in hires
         latents = latents.unsqueeze(0)
     if latents.shape[0] == 4 and latents.shape[1] != 4: # likely animatediff latent
         latents = latents.permute(1, 0, 2, 3)
-    if full_quality:
+
+    if any(s >= 512 for s in latents.shape): # not a latent, likely an image
+        decoded = latents.float().cpu().numpy()
+    elif full_quality and hasattr(shared.sd_model, "vae"):
         decoded = full_vae_decode(latents=latents, model=shared.sd_model)
     else:
         decoded = taesd_vae_decode(latents=latents)
-    # TODO validate decoded sample diffusers
-    # decoded = validate_sample(decoded)
+
     if hasattr(model, 'image_processor'):
         imgs = model.image_processor.postprocess(decoded, output_type=output_type)
     else:
         import diffusers
-        image_processor = diffusers.image_processor.VaeImageProcessor()
-        imgs = image_processor.postprocess(decoded, output_type=output_type)
+        model.image_processor = diffusers.image_processor.VaeImageProcessor()
+        imgs = model.image_processor.postprocess(decoded, output_type=output_type)
+
     shared.state.job = prev_job
-    if shared.cmd_opts.profile:
+    if shared.cmd_opts.profile or debug:
         t1 = time.time()
         shared.log.debug(f'Profile: VAE decode: {t1-t0:.2f}')
     devices.torch_gc()
diff --git a/modules/progress.py b/modules/progress.py
index aeef195b4..abd6d906d 100644
--- a/modules/progress.py
+++ b/modules/progress.py
@@ -74,7 +74,6 @@ def progressapi(req: ProgressRequest):
     predicted = elapsed / progress if progress > 0 else None
     eta = predicted - elapsed if predicted is not None else None
     # shared.log.debug(f'Progress: step={step_x}:{step_y} batch={batch_x}:{batch_y} current={current} total={total} progress={progress} elapsed={elapsed} eta={eta}')
-
     id_live_preview = req.id_live_preview
     live_preview = None
     shared.state.set_current_image()
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 09c8c1899..31cb7c68f 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -463,13 +463,13 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
     te1_device, te2_device, te3_device = None, None, None
     if hasattr(pipe, "text_encoder") and pipe.text_encoder.device != devices.device:
         te1_device = pipe.text_encoder.device
-        pipe.text_encoder = pipe.text_encoder.to(devices.device)
+        sd_models.move_model(pipe.text_encoder, devices.device)
     if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2.device != devices.device:
         te2_device = pipe.text_encoder_2.device
-        pipe.text_encoder_2 = pipe.text_encoder_2.to(devices.device)
+        sd_models.move_model(pipe.text_encoder_2, devices.device)
     if hasattr(pipe, "text_encoder_3") and pipe.text_encoder_3.device != devices.device:
         te3_device = pipe.text_encoder_3.device
-        pipe.text_encoder_3 = pipe.text_encoder_3.to(devices.device)
+        sd_models.move_model(pipe.text_encoder_3, devices.device)
 
     if is_sd3:
         prompt_embed, negative_embed, positive_pooled, negative_pooled = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, use_t5_encoder=bool(pipe.text_encoder_3))
@@ -481,10 +481,10 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
         prompt_embed, negative_embed = get_weighted_text_embeddings_sd15(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, clip_skip=clip_skip)
 
     if te1_device is not None:
-        pipe.text_encoder = pipe.text_encoder.to(te1_device)
+        sd_models.move_model(pipe.text_encoder, te1_device)
     if te2_device is not None:
-        pipe.text_encoder_2 = pipe.text_encoder_2.to(te2_device)
+        sd_models.move_model(pipe.text_encoder_2, te1_device)
     if te3_device is not None:
-        pipe.text_encoder_3 = pipe.text_encoder_3.to(te3_device)
+        sd_models.move_model(pipe.text_encoder_3, te1_device)
 
     return prompt_embed, positive_pooled, negative_embed, negative_pooled
diff --git a/modules/prompt_parser_xhinker.py b/modules/prompt_parser_xhinker.py
index 6e43c8860..6a8acf8c6 100644
--- a/modules/prompt_parser_xhinker.py
+++ b/modules/prompt_parser_xhinker.py
@@ -269,12 +269,12 @@ def get_weighted_text_embeddings_sd15(
         # get positive prompt embeddings with weights
         token_tensor = torch.tensor(
             [prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         weight_tensor = torch.tensor(
             prompt_weight_groups[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
 
         token_embedding = pipe.text_encoder(token_tensor)[0].squeeze(0)
@@ -286,12 +286,12 @@ def get_weighted_text_embeddings_sd15(
         # get negative prompt embeddings with weights
         neg_token_tensor = torch.tensor(
             [neg_prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         neg_weight_tensor = torch.tensor(
             neg_prompt_weight_groups[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
         neg_token_embedding = pipe.text_encoder(neg_token_tensor)[0].squeeze(0)
         for z in range(len(neg_weight_tensor)):
@@ -449,36 +449,36 @@ def get_weighted_text_embeddings_sdxl(
         # get positive prompt embeddings with weights
         token_tensor = torch.tensor(
             [prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         weight_tensor = torch.tensor(
             prompt_weight_groups[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
 
         token_tensor_2 = torch.tensor(
             [prompt_token_groups_2[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder_2.device
         )
 
         # use first text encoder
         prompt_embeds_1 = pipe.text_encoder(
-            token_tensor.to(pipe.device)
+            token_tensor.to(pipe.text_encoder.device)
             , output_hidden_states=True
         )
         prompt_embeds_1_hidden_states = prompt_embeds_1.hidden_states[-2]
 
         # use second text encoder
         prompt_embeds_2 = pipe.text_encoder_2(
-            token_tensor_2.to(pipe.device)
+            token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2]
         pooled_prompt_embeds = prompt_embeds_2[0]
 
         prompt_embeds_list = [prompt_embeds_1_hidden_states, prompt_embeds_2_hidden_states]
-        token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device)
+        token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device)
 
         for j in range(len(weight_tensor)):
             if weight_tensor[j] != 1.0:
@@ -509,35 +509,35 @@ def get_weighted_text_embeddings_sdxl(
         # get negative prompt embeddings with weights
         neg_token_tensor = torch.tensor(
             [neg_prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         neg_token_tensor_2 = torch.tensor(
             [neg_prompt_token_groups_2[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder_2.device
         )
         neg_weight_tensor = torch.tensor(
             neg_prompt_weight_groups[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
 
         # use first text encoder
         neg_prompt_embeds_1 = pipe.text_encoder(
-            neg_token_tensor.to(pipe.device)
+            neg_token_tensor.to(pipe.text_encoder.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_1_hidden_states = neg_prompt_embeds_1.hidden_states[-2]
 
         # use second text encoder
         neg_prompt_embeds_2 = pipe.text_encoder_2(
-            neg_token_tensor_2.to(pipe.device)
+            neg_token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2]
         negative_pooled_prompt_embeds = neg_prompt_embeds_2[0]
 
         neg_prompt_embeds_list = [neg_prompt_embeds_1_hidden_states, neg_prompt_embeds_2_hidden_states]
-        neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device)
+        neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device)
 
         for z in range(len(neg_weight_tensor)):
             if neg_weight_tensor[z] != 1.0:
@@ -657,18 +657,18 @@ def get_weighted_text_embeddings_sdxl_refiner(
         # get positive prompt embeddings with weights
         token_tensor_2 = torch.tensor(
             [prompt_token_groups_2[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder_2.device
         )
 
         weight_tensor_2 = torch.tensor(
             prompt_weight_groups_2[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder_2.device
         )
 
         # use second text encoder
         prompt_embeds_2 = pipe.text_encoder_2(
-            token_tensor_2.to(pipe.device)
+            token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2]
@@ -679,12 +679,12 @@ def get_weighted_text_embeddings_sdxl_refiner(
 
         for j in range(len(weight_tensor_2)):
             if weight_tensor_2[j] != 1.0:
-                ow = weight_tensor_2[j] - 1
+                # ow = weight_tensor_2[j] - 1
 
                 # optional process
                 # To map number of (0,1) to (-1,1)
-                tanh_weight = (math.exp(ow) / (math.exp(ow) + 1) - 0.5) * 2
-                weight = 1 + tanh_weight
+                # tanh_weight = (math.exp(ow) / (math.exp(ow) + 1) - 0.5) * 2
+                # weight = 1 + tanh_weight
 
                 # add weight method 1:
                 # token_embedding[j] = token_embedding[j] * weight
@@ -703,17 +703,17 @@ def get_weighted_text_embeddings_sdxl_refiner(
         # get negative prompt embeddings with weights
         neg_token_tensor_2 = torch.tensor(
             [neg_prompt_token_groups_2[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder_2.device
         )
         neg_weight_tensor_2 = torch.tensor(
             neg_prompt_weight_groups_2[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder_2.device
         )
 
         # use second text encoder
         neg_prompt_embeds_2 = pipe.text_encoder_2(
-            neg_token_tensor_2.to(pipe.device)
+            neg_token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2]
@@ -787,8 +787,6 @@ def get_weighted_text_embeddings_sdxl_2p(
     """
     prompt_2 = prompt_2 or prompt
     neg_prompt_2 = neg_prompt_2 or neg_prompt
-
-    import math
     eos = pipe.tokenizer.eos_token_id
 
     # tokenizer 1
@@ -907,33 +905,33 @@ def get_weighted_text_embeddings_sdxl_2p(
         # get positive prompt embeddings with weights
         token_tensor = torch.tensor(
             [prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         weight_tensor = torch.tensor(
             prompt_weight_groups[i]
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
 
         token_tensor_2 = torch.tensor(
             [prompt_token_groups_2[i]]
-            , device=pipe.device
+            , device=pipe.text_encoder_2.device
         )
 
         weight_tensor_2 = torch.tensor(
             prompt_weight_groups_2[i]
-            , device=pipe.device
+            , device=pipe.text_encoder_2.device
         )
 
         # use first text encoder
         prompt_embeds_1 = pipe.text_encoder(
-            token_tensor.to(pipe.device)
+            token_tensor.to(pipe.text_encoder.device)
             , output_hidden_states=True
         )
         prompt_embeds_1_hidden_states = prompt_embeds_1.hidden_states[-2]
 
         # use second text encoder
         prompt_embeds_2 = pipe.text_encoder_2(
-            token_tensor_2.to(pipe.device)
+            token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2]
@@ -966,31 +964,31 @@ def get_weighted_text_embeddings_sdxl_2p(
         # get negative prompt embeddings with weights
         neg_token_tensor = torch.tensor(
             [neg_prompt_token_groups[i]]
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
         neg_token_tensor_2 = torch.tensor(
             [neg_prompt_token_groups_2[i]]
-            , device=pipe.device
+            , device=pipe.text_encoder_2.device
         )
         neg_weight_tensor = torch.tensor(
             neg_prompt_weight_groups[i]
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
         neg_weight_tensor_2 = torch.tensor(
             neg_prompt_weight_groups_2[i]
-            , device=pipe.device
+            , device=pipe.text_encoder_2.device
         )
 
         # use first text encoder
         neg_prompt_embeds_1 = pipe.text_encoder(
-            neg_token_tensor.to(pipe.device)
+            neg_token_tensor.to(pipe.text_encoder.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_1_hidden_states = neg_prompt_embeds_1.hidden_states[-2]
 
         # use second text encoder
         neg_prompt_embeds_2 = pipe.text_encoder_2(
-            neg_token_tensor_2.to(pipe.device)
+            neg_token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2]
@@ -1049,7 +1047,6 @@ def get_weighted_text_embeddings_sd3(
         pooled_prompt_embeds (torch.Tensor)
         negative_pooled_prompt_embeds (torch.Tensor)
     """
-    import math
     eos = pipe.tokenizer.eos_token_id
 
     # tokenizer 1
@@ -1161,22 +1158,22 @@ def get_weighted_text_embeddings_sd3(
         # get positive prompt embeddings with weights
         token_tensor = torch.tensor(
             [prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         weight_tensor = torch.tensor(
             prompt_weight_groups[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
 
         token_tensor_2 = torch.tensor(
             [prompt_token_groups_2[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder_2.device
         )
 
         # use first text encoder
         prompt_embeds_1 = pipe.text_encoder(
-            token_tensor.to(pipe.device)
+            token_tensor.to(pipe.text_encoder.device)
             , output_hidden_states=True
         )
         prompt_embeds_1_hidden_states = prompt_embeds_1.hidden_states[-2]
@@ -1184,14 +1181,14 @@ def get_weighted_text_embeddings_sd3(
 
         # use second text encoder
         prompt_embeds_2 = pipe.text_encoder_2(
-            token_tensor_2.to(pipe.device)
+            token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         prompt_embeds_2_hidden_states = prompt_embeds_2.hidden_states[-2]
         pooled_prompt_embeds_2 = prompt_embeds_2[0]
 
         prompt_embeds_list = [prompt_embeds_1_hidden_states, prompt_embeds_2_hidden_states]
-        token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device)
+        token_embedding = torch.concat(prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device)
 
         for j in range(len(weight_tensor)):
             if weight_tensor[j] != 1.0:
@@ -1222,21 +1219,21 @@ def get_weighted_text_embeddings_sd3(
         # get negative prompt embeddings with weights
         neg_token_tensor = torch.tensor(
             [neg_prompt_token_groups[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder.device
         )
         neg_token_tensor_2 = torch.tensor(
             [neg_prompt_token_groups_2[i]]
-            , dtype=torch.long, device=pipe.device
+            , dtype=torch.long, device=pipe.text_encoder_2.device
         )
         neg_weight_tensor = torch.tensor(
             neg_prompt_weight_groups[i]
             , dtype=torch.float16
-            , device=pipe.device
+            , device=pipe.text_encoder.device
         )
 
         # use first text encoder
         neg_prompt_embeds_1 = pipe.text_encoder(
-            neg_token_tensor.to(pipe.device)
+            neg_token_tensor.to(pipe.text_encoder.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_1_hidden_states = neg_prompt_embeds_1.hidden_states[-2]
@@ -1244,14 +1241,14 @@ def get_weighted_text_embeddings_sd3(
 
         # use second text encoder
         neg_prompt_embeds_2 = pipe.text_encoder_2(
-            neg_token_tensor_2.to(pipe.device)
+            neg_token_tensor_2.to(pipe.text_encoder_2.device)
             , output_hidden_states=True
         )
         neg_prompt_embeds_2_hidden_states = neg_prompt_embeds_2.hidden_states[-2]
         negative_pooled_prompt_embeds_2 = neg_prompt_embeds_2[0]
 
         neg_prompt_embeds_list = [neg_prompt_embeds_1_hidden_states, neg_prompt_embeds_2_hidden_states]
-        neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.device)
+        neg_token_embedding = torch.concat(neg_prompt_embeds_list, dim=-1).squeeze(0).to(pipe.text_encoder.device)
 
         for z in range(len(neg_weight_tensor)):
             if neg_weight_tensor[z] != 1.0:
@@ -1286,8 +1283,8 @@ def get_weighted_text_embeddings_sd3(
         # ----------------- generate positive t5 embeddings --------------------
         prompt_tokens_3 = torch.tensor([prompt_tokens_3], dtype=torch.long)
 
-        t5_prompt_embeds = pipe.text_encoder_3(prompt_tokens_3.to(pipe.device))[0].squeeze(0)
-        t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.device)
+        t5_prompt_embeds = pipe.text_encoder_3(prompt_tokens_3.to(pipe.text_encoder_3.device))[0].squeeze(0)
+        t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.text_encoder_3.device)
 
         # add weight to t5 prompt
         for z in range(len(prompt_weights_3)):
@@ -1296,7 +1293,7 @@ def get_weighted_text_embeddings_sd3(
         t5_prompt_embeds = t5_prompt_embeds.unsqueeze(0)
     else:
         t5_prompt_embeds = torch.zeros(1, 4096, dtype=prompt_embeds.dtype).unsqueeze(0)
-        t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.device)
+        t5_prompt_embeds = t5_prompt_embeds.to(device=pipe.text_encoder_3.device)
 
     # merge with the clip embedding 1 and clip embedding 2
     clip_prompt_embeds = torch.nn.functional.pad(
@@ -1308,8 +1305,8 @@ def get_weighted_text_embeddings_sd3(
         # ---------------------- get neg t5 embeddings -------------------------
         neg_prompt_tokens_3 = torch.tensor([neg_prompt_tokens_3], dtype=torch.long)
 
-        t5_neg_prompt_embeds = pipe.text_encoder_3(neg_prompt_tokens_3.to(pipe.device))[0].squeeze(0)
-        t5_neg_prompt_embeds = t5_neg_prompt_embeds.to(device=pipe.device)
+        t5_neg_prompt_embeds = pipe.text_encoder_3(neg_prompt_tokens_3.to(pipe.pipe.text_encoder_3.device))[0].squeeze(0)
+        t5_neg_prompt_embeds = t5_neg_prompt_embeds.to(device=pipe.text_encoder_3.device)
 
         # add weight to neg t5 embeddings
         for z in range(len(neg_prompt_weights_3)):
@@ -1318,7 +1315,7 @@ def get_weighted_text_embeddings_sd3(
         t5_neg_prompt_embeds = t5_neg_prompt_embeds.unsqueeze(0)
     else:
         t5_neg_prompt_embeds = torch.zeros(1, 4096, dtype=prompt_embeds.dtype).unsqueeze(0)
-        t5_neg_prompt_embeds = t5_prompt_embeds.to(device=pipe.device)
+        t5_neg_prompt_embeds = t5_prompt_embeds.to(device=pipe.text_encoder_3.device)
 
     clip_neg_prompt_embeds = torch.nn.functional.pad(
         negative_prompt_embeds, (0, t5_neg_prompt_embeds.shape[-1] - negative_prompt_embeds.shape[-1])
@@ -1359,7 +1356,7 @@ def get_weighted_text_embeddings_flux1(
     """
     prompt2 = prompt if prompt2 is None else prompt2
     if device is None:
-        device = pipe.device
+        device = pipe.text_encoder.device
 
     # tokenizer 1 - openai/clip-vit-large-patch14
     prompt_tokens, prompt_weights = get_prompts_tokens_with_weights(
diff --git a/modules/rife/__init__.py b/modules/rife/__init__.py
index 7e40735e7..f74f3d984 100644
--- a/modules/rife/__init__.py
+++ b/modules/rife/__init__.py
@@ -113,5 +113,5 @@ def f_pad(img):
     while not buffer.empty():
         time.sleep(0.1)
     t1 = time.time()
-    shared.log.info(f'RIFE interpolate: input={len(images)} frames={len(interpolated)} resolution={w}x{h} interpolate={count} scale={scale} pad={pad} change={change} time={round(t1 - t0, 2)}')
+    shared.log.info(f'RIFE interpolate: input={len(images)} frames={len(interpolated)} width={w} height={h} interpolate={count} scale={scale} pad={pad} change={change} time={round(t1 - t0, 2)}')
     return interpolated
diff --git a/modules/scripts.py b/modules/scripts.py
index ef24e359a..53452fe6b 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -256,6 +256,7 @@ def load_scripts():
     postprocessing_scripts_data.clear()
     script_callbacks.clear_callbacks()
     scripts_list = list_scripts('scripts', '.py') + list_scripts(os.path.join('modules', 'face'), '.py')
+    scripts_list = sorted(scripts_list, key=lambda item: item.priority + item.path.lower(), reverse=False)
     syspath = sys.path
 
     def register_scripts_from_module(module, scriptfile):
@@ -488,7 +489,27 @@ def run(self, p, *args):
             if not hasattr(p, 'init_images') and p.task_args.get('image', None) is not None:
                 p.init_images = p.task_args['image']
         parsed = p.per_script_args.get(script.title(), args[script.args_from:script.args_to])
-        processed = script.run(p, *parsed)
+        if hasattr(script, 'run'):
+            processed = script.run(p, *parsed)
+        else:
+            processed = None
+            errors.log.error(f'Script: file="{script.filename}" no run function defined')
+        s.record(script.title())
+        s.report()
+        return processed
+
+    def after(self, p, processed, *args):
+        s = ScriptSummary('after')
+        script_index = args[0] if len(args) > 0 else 0
+        if script_index == 0:
+            return processed
+        script = self.selectable_scripts[script_index-1]
+        if script is None or not hasattr(script, 'after'):
+            return processed
+        parsed = p.per_script_args.get(script.title(), args[script.args_from:script.args_to])
+        after_processed = script.after(p, processed, *parsed)
+        if after_processed is not None:
+            processed = after_processed
         s.record(script.title())
         s.report()
         return processed
@@ -524,7 +545,9 @@ def process_images(self, p, **kwargs):
             try:
                 if (script.args_to > 0) and (script.args_to >= script.args_from):
                     args = p.per_script_args.get(script.title(), p.script_args[script.args_from:script.args_to])
-                    processed = script.process_images(p, *args, **kwargs)
+                    _processed = script.process_images(p, *args, **kwargs)
+                    if _processed is not None:
+                        processed = _processed
             except Exception as e:
                 errors.display(e, f'Running script process images: {script.filename}')
             s.record(script.title())
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index dc07dea81..2f707d098 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -265,7 +265,6 @@ def __init__(self, wrapped, embeddings):
     def forward(self, input_ids):
         batch_fixes = self.embeddings.fixes
         self.embeddings.fixes = None
-
         inputs_embeds = self.wrapped(input_ids)
 
         if batch_fixes is None or len(batch_fixes) == 0 or max([len(x) for x in batch_fixes]) == 0:
diff --git a/modules/sd_hijack_dynamic_atten.py b/modules/sd_hijack_dynamic_atten.py
index fb2befc18..1c17e024c 100644
--- a/modules/sd_hijack_dynamic_atten.py
+++ b/modules/sd_hijack_dynamic_atten.py
@@ -1,7 +1,7 @@
 
 from functools import cache, wraps
 import torch
-from diffusers.utils import USE_PEFT_BACKEND
+from diffusers.utils import USE_PEFT_BACKEND # pylint: disable=unused-import
 from modules import shared, devices
 
 
@@ -107,8 +107,7 @@ class DynamicAttnProcessorBMM:
     based on AttnProcessor V1
     """
 
-    def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None,
-    temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches
+    def __call__(self, attn, hidden_states: torch.Tensor, encoder_hidden_states=None, attention_mask=None, temb=None, *args, **kwargs) -> torch.Tensor: # pylint: disable=too-many-statements, too-many-locals, too-many-branches, keyword-arg-before-vararg
 
         residual = hidden_states
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 529924976..aeb680b2a 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -566,7 +566,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
             # guess by size
             if os.path.isfile(f) and f.endswith('.safetensors'):
                 size = round(os.path.getsize(f) / 1024 / 1024)
-                if size < 128:
+                if (size > 0 and size < 128):
                     warn(f'Model size smaller than expected: {f} size={size} MB')
                 elif (size >= 316 and size <= 324) or (size >= 156 and size <= 164): # 320 or 160
                     warn(f'Model detected as VAE model, but attempting to load as model: {op}={f} size={size} MB')
@@ -591,6 +591,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                     guess = 'Stable Diffusion XL'
                 elif (size > 5692 and size < 5698) or (size > 4134 and size < 4138) or (size > 10362 and size < 10366) or (size > 15028 and size < 15228):
                     guess = 'Stable Diffusion 3'
+                elif (size > 20000 and size < 40000):
+                    guess = 'FLUX'
             # guess by name
             """
             if 'LCM_' in f.upper() or 'LCM-' in f.upper() or '_LCM' in f.upper() or '-LCM' in f.upper():
@@ -620,8 +622,10 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                 guess = 'Kolors'
             if 'auraflow' in f.lower():
                 guess = 'AuraFlow'
-            if 'flux.1' in f.lower() or 'flux1' in f.lower():
+            if 'flux' in f.lower():
                 guess = 'FLUX'
+                if size > 11000 and size < 20000:
+                    warn(f'Model detected as FLUX UNET model, but attempting to load a base model: {op}={f} size={size} MB')
             # switch for specific variant
             if guess == 'Stable Diffusion' and 'inpaint' in f.lower():
                 guess = 'Stable Diffusion Inpaint'
@@ -654,8 +658,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
 
 
 def copy_diffuser_options(new_pipe, orig_pipe):
-    new_pipe.sd_checkpoint_info = orig_pipe.sd_checkpoint_info
-    new_pipe.sd_model_checkpoint = orig_pipe.sd_model_checkpoint
+    new_pipe.sd_checkpoint_info = getattr(orig_pipe, 'sd_checkpoint_info', None)
+    new_pipe.sd_model_checkpoint = getattr(orig_pipe, 'sd_model_checkpoint', None)
     new_pipe.embedding_db = getattr(orig_pipe, 'embedding_db', None)
     new_pipe.sd_model_hash = getattr(orig_pipe, 'sd_model_hash', None)
     new_pipe.has_accelerate = getattr(orig_pipe, 'has_accelerate', False)
@@ -678,27 +682,28 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True):
     if hasattr(sd_model, "vae"):
         if vae is not None:
             sd_model.vae = vae
-            shared.log.debug(f'Setting {op} VAE: name={sd_vae.loaded_vae_file}')
+            shared.log.debug(f'Setting {op} VAE: name="{sd_vae.loaded_vae_file}"')
         if shared.opts.diffusers_vae_upcast != 'default':
             sd_model.vae.config.force_upcast = True if shared.opts.diffusers_vae_upcast == 'true' else False
             shared.log.debug(f'Setting {op} VAE: upcast={sd_model.vae.config.force_upcast}')
         if shared.opts.no_half_vae:
             devices.dtype_vae = torch.float32
             sd_model.vae.to(devices.dtype_vae)
-            shared.log.debug(f'Setting {op} VAE: no-half')
+            shared.log.debug(f'Setting {op} VAE: no-half=True')
     if hasattr(sd_model, "enable_vae_slicing"):
         if shared.opts.diffusers_vae_slicing:
-            shared.log.debug(f'Setting {op}: enable VAE slicing')
+            shared.log.debug(f'Setting {op}: slicing=True')
             sd_model.enable_vae_slicing()
         else:
             sd_model.disable_vae_slicing()
     if hasattr(sd_model, "enable_vae_tiling"):
         if shared.opts.diffusers_vae_tiling:
-            shared.log.debug(f'Setting {op}: enable VAE tiling')
+            shared.log.debug(f'Setting {op}: tiling=True')
             sd_model.enable_vae_tiling()
         else:
             sd_model.disable_vae_tiling()
     if hasattr(sd_model, "vqvae"):
+        shared.log.debug(f'Setting {op} VQVAE: upcast=True')
         sd_model.vqvae.to(torch.float32) # vqvae is producing nans in fp16
 
     set_diffusers_attention(sd_model)
@@ -706,7 +711,13 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True):
     if shared.opts.diffusers_fuse_projections and hasattr(sd_model, 'fuse_qkv_projections'):
         try:
             sd_model.fuse_qkv_projections()
-            shared.log.debug(f'Setting {op}: enable fused projections')
+            shared.log.debug(f'Setting {op}: fused-qkv=True')
+        except Exception as e:
+            shared.log.error(f'Error enabling fused projections: {e}')
+    if shared.opts.diffusers_fuse_projections and hasattr(sd_model, 'transformer') and hasattr(sd_model.transformer, 'fuse_qkv_projections'):
+        try:
+            sd_model.transformer.fuse_qkv_projections()
+            shared.log.debug(f'Setting {op}: fused-qkv=True')
         except Exception as e:
             shared.log.error(f'Error enabling fused projections: {e}')
     if shared.opts.diffusers_eval:
@@ -720,13 +731,16 @@ def eval_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
         sd_model = sd_models_compile.dynamic_quantization(sd_model)
 
     if shared.opts.opt_channelslast and hasattr(sd_model, 'unet'):
-        shared.log.debug(f'Setting {op}: enable channels last')
+        shared.log.debug(f'Setting {op}: channels-last=True')
         sd_model.unet.to(memory_format=torch.channels_last)
 
     if offload:
         set_diffuser_offload(sd_model, op)
 
 def set_diffuser_offload(sd_model, op: str = 'model'):
+    if not shared.native:
+        shared.log.warning('Attempting to use offload with backend=original')
+        return
     if sd_model is None:
         shared.log.warning(f'{op} is not loaded')
         return
@@ -735,7 +749,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
     if hasattr(sd_model, "enable_model_cpu_offload"):
         if shared.opts.diffusers_offload_mode == "model":
             try:
-                shared.log.debug(f'Setting {op}: enable model CPU offload')
+                shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
                 if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
                     shared.opts.diffusers_move_base = False
                     shared.opts.diffusers_move_unet = False
@@ -751,7 +765,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
     if hasattr(sd_model, "enable_sequential_cpu_offload"):
         if shared.opts.diffusers_offload_mode == "sequential":
             try:
-                shared.log.debug(f'Setting {op}: enable sequential CPU offload')
+                shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
                 if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
                     shared.opts.diffusers_move_base = False
                     shared.opts.diffusers_move_unet = False
@@ -771,6 +785,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
                 shared.log.error(f'Model offload error: mode={shared.opts.diffusers_offload_mode} {e}')
     if shared.opts.diffusers_offload_mode == "balanced":
         try:
+            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
             sd_model = apply_balanced_offload(sd_model)
         except Exception as e:
             shared.log.error(f'Model offload error: mode={shared.opts.diffusers_offload_mode} {e}')
@@ -824,7 +839,7 @@ def apply_balanced_offload_to_module(pipe):
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
                 except Exception as e:
                     shared.log.error(f'Balanced offload: module={module_name} {e}')
-                devices.torch_gc()
+                devices.torch_gc(fast=True)
 
     apply_balanced_offload_to_module(sd_model)
     if hasattr(sd_model, "prior_pipe"):
@@ -845,9 +860,23 @@ def normalize_device(device):
         return torch.device(str(device) + ":0")
     return torch.device(device)
 
+
 def move_model(model, device=None, force=False):
     if model is None or device is None:
         return
+
+    if not shared.native:
+        if type(model).__name__ == 'LatentDiffusion':
+            model = model.to(device)
+            if hasattr(model, 'model'):
+                model.model = model.model.to(device)
+            if hasattr(model, 'first_stage_model'):
+                model.first_stage_model = model.first_stage_model.to(device)
+            if hasattr(model, 'cond_stage_model'):
+                model.cond_stage_model = model.cond_stage_model.to(device)
+        devices.torch_gc()
+        return
+
     if getattr(model, 'vae', None) is not None and get_diffusers_task(model) != DiffusersTaskType.TEXT_2_IMAGE:
         if device == devices.device and model.vae.device.type != "meta": # force vae back to gpu if not in txt2img mode
             model.vae.to(device)
@@ -878,7 +907,7 @@ def move_model(model, device=None, force=False):
             if hasattr(model, "prior_pipe"):
                 model.prior_pipe.to(device)
         except Exception as e0:
-            if 'Cannot copy out of meta tensor' in str(e0):
+            if 'Cannot copy out of meta tensor' in str(e0) or 'must be Tensor, not NoneType' in str(e0):
                 if hasattr(model, "components"):
                     for _name, component in model.components.items():
                         if hasattr(component, 'modules'):
@@ -1042,16 +1071,17 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
             unload_model_weights(op=op)
             return
 
+        shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"')
+        pipeline, model_type = detect_pipeline(checkpoint_info.path, op)
+
         vae = None
         sd_vae.loaded_vae_file = None
-        if op == 'model' or op == 'refiner':
+        if model_type.startswith('Stable Diffusion') and (op == 'model' or op == 'refiner'): # preload vae for sd models
             vae_file, vae_source = sd_vae.resolve_vae(checkpoint_info.filename)
             vae = sd_vae.load_vae_diffusers(checkpoint_info.path, vae_file, vae_source)
             if vae is not None:
                 diffusers_load_config["vae"] = vae
 
-        shared.log.debug(f'Diffusers loading: path="{checkpoint_info.path}"')
-        pipeline, model_type = detect_pipeline(checkpoint_info.path, op)
         if os.path.isdir(checkpoint_info.path) or checkpoint_info.type == 'huggingface' or checkpoint_info.type == 'transformer':
             files = shared.walk_files(checkpoint_info.path, ['.safetensors', '.bin', '.ckpt'])
             if 'variant' not in diffusers_load_config and any('diffusion_pytorch_model.fp16' in f for f in files): # deal with diffusers lack of variant fallback when loading
@@ -1201,10 +1231,12 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
                 if model_type.startswith('Stable Diffusion'):
                     if shared.opts.diffusers_force_zeros:
                         diffusers_load_config['force_zeros_for_empty_prompt '] = shared.opts.diffusers_force_zeros
-                    if diffusers_version < 28:
-                        diffusers_load_config['original_config_file'] = get_load_config(checkpoint_info.path, model_type, config_type='yaml')
                     else:
-                        diffusers_load_config['config'] = get_load_config(checkpoint_info.path, model_type, config_type='json')
+                        model_config = get_load_config(checkpoint_info.path, model_type, config_type='json')
+                        if model_config is not None:
+                            if debug_load:
+                                shared.log.debug(f'Model config: path="{model_config}"')
+                            diffusers_load_config['config'] = model_config
                 if model_type.startswith('Stable Diffusion 3'):
                     from modules.model_sd3 import load_sd3
                     sd_model = load_sd3(fn=checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, config=diffusers_load_config.get('config', None))
@@ -1227,6 +1259,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
                 else:
                     shared.log.error(f'Diffusers {op} cannot load safetensor model: {checkpoint_info.path} {shared.opts.diffusers_pipeline}')
                     return
+                if shared.opts.diffusers_vae_upcast != 'default' and model_type in ['Stable Diffusion', 'Stable Diffusion XL']:
+                    diffusers_load_config['force_upcast'] = True if shared.opts.diffusers_vae_upcast == 'true' else False
                 if debug_load:
                     shared.log.debug(f'Model args: {diffusers_load_config}')
                 if sd_model is not None:
@@ -1286,7 +1320,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
         insert_parser_highjack(sd_model.__class__.__name__)
 
         set_diffuser_options(sd_model, vae, op, offload=False)
-        if shared.opts.nncf_compress_weights and not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"):
+        if shared.opts.nncf_compress_weights and not ('Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"):
             sd_model = sd_models_compile.nncf_compress_weights(sd_model) # run this before move model so it can be compressed in CPU
         if shared.opts.optimum_quanto_weights:
             sd_model = sd_models_compile.optimum_quanto_weights(sd_model) # run this before move model so it can be compressed in CPU
@@ -1307,7 +1341,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
         if shared.opts.ipex_optimize:
             sd_model = sd_models_compile.ipex_optimize(sd_model)
 
-        if (shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'):
+        if ('Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none'):
             sd_model = sd_models_compile.compile_diffusers(sd_model)
         timer.record("compile")
 
@@ -1458,7 +1492,7 @@ def set_diffuser_pipe(pipe, new_pipe_type):
         return pipe
 
     # skip specific pipelines
-    if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline']:
+    if n in ['StableDiffusionReferencePipeline', 'StableDiffusionAdapterPipeline', 'AnimateDiffPipeline', 'AnimateDiffSDXLPipeline', 'FluxControlNetPipeline']:
         return pipe
     if 'Onnx' in pipe.__class__.__name__:
         return pipe
@@ -1528,11 +1562,20 @@ def set_attn(pipe, attention):
         for module in modules:
             if module.__class__.__name__ in ['SD3Transformer2DModel']:
                 module.set_attn_processor(p.JointAttnProcessor2_0())
-            elif module.__class__.__name__ in ['HunyuanDiT2DModel', 'FluxTransformer2DModel']:
-                pass
+            elif module.__class__.__name__ in ['FluxTransformer2DModel']:
+                module.set_attn_processor(p.FluxAttnProcessor2_0())
+            elif module.__class__.__name__ in ['HunyuanDiT2DModel']:
+                module.set_attn_processor(p.HunyuanAttnProcessor2_0())
+            elif module.__class__.__name__ in ['AuraFlowTransformer2DModel']:
+                module.set_attn_processor(p.AuraFlowAttnProcessor2_0())
+            elif 'Transformer' in module.__class__.__name__:
+                pass # unknown transformer so probably dont want to force attention processor
             else:
                 module.set_attn_processor(attention)
 
+    if 'ControlNet' in pipe.__class__.__name__: # do not replace attention in ControlNet pipelines
+        return
+    shared.log.debug(f"Setting model: attention={shared.opts.cross_attention_optimization}")
     if shared.opts.cross_attention_optimization == "Disabled":
         pass # do nothing
     elif shared.opts.cross_attention_optimization == "Scaled-Dot-Product": # The default set by Diffusers
@@ -1790,6 +1833,7 @@ def disable_offload(sd_model):
 def unload_model_weights(op='model'):
     if shared.compiled_model_state is not None:
         shared.compiled_model_state.compiled_cache.clear()
+        shared.compiled_model_state.req_cache.clear()
         shared.compiled_model_state.partitioned_modules.clear()
     if op == 'model' or op == 'dict':
         if model_data.sd_model:
@@ -1797,7 +1841,7 @@ def unload_model_weights(op='model'):
                 from modules import sd_hijack
                 move_model(model_data.sd_model, devices.cpu)
                 sd_hijack.model_hijack.undo_hijack(model_data.sd_model)
-            elif not (shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"):
+            elif not ('Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx"):
                 disable_offload(model_data.sd_model)
                 move_model(model_data.sd_model, 'meta')
             model_data.sd_model = None
@@ -1891,3 +1935,11 @@ def remove_token_merging(sd_model):
             sd_model.applied_todo = 0
     except Exception:
         pass
+
+
+def path_to_repo(fn: str = ''):
+    repo_id = fn
+    repo_id = repo_id.replace('Diffusers/', '').replace('Diffusers\\', '')
+    repo_id = repo_id.replace('diffusers/', '').replace('diffusers\\', '')
+    repo_id = repo_id.replace('models--', '').replace('--', '/')
+    return repo_id
diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index 2ec8b26a8..9b3eb90af 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -3,7 +3,7 @@
 import logging
 import torch
 from modules import shared, devices, sd_models
-from installer import setup_logging
+from installer import install, setup_logging
 
 
 #Used by OpenVINO, can be used with TensorRT or Olive
@@ -21,6 +21,7 @@ def __init__(self):
         self.cn_model = []
         self.lora_model = []
         self.compiled_cache = {}
+        self.req_cache = {}
         self.partitioned_modules = {}
 
 
@@ -79,13 +80,29 @@ def apply_compile_to_model(sd_model, function, options, op=None):
             sd_model.prior_pipe.text_encoder = function(sd_model.prior_pipe.text_encoder, op="prior_pipe.text_encoder", sd_model=sd_model)
     if "VAE" in options:
         if hasattr(sd_model, 'vae') and hasattr(sd_model.vae, 'decode'):
-            sd_model.vae = function(sd_model.vae, op="vae", sd_model=sd_model)
+            if op == "compile":
+                sd_model.vae.decode = function(sd_model.vae.decode, op="vae_decode", sd_model=sd_model)
+                sd_model.vae.encode = function(sd_model.vae.encode, op="vae_encode", sd_model=sd_model)
+            else:
+                sd_model.vae = function(sd_model.vae, op="vae", sd_model=sd_model)
         if hasattr(sd_model, 'movq') and hasattr(sd_model.movq, 'decode'):
-            sd_model.movq = function(sd_model.movq, op="movq", sd_model=sd_model)
+            if op == "compile":
+                sd_model.movq.decode = function(sd_model.movq.decode, op="movq_decode", sd_model=sd_model)
+                sd_model.movq.encode = function(sd_model.movq.encode, op="movq_encode", sd_model=sd_model)
+            else:
+                sd_model.movq = function(sd_model.movq, op="movq", sd_model=sd_model)
         if hasattr(sd_model, 'vqgan') and hasattr(sd_model.vqgan, 'decode'):
-            sd_model.vqgan = function(sd_model.vqgan, op="vqgan", sd_model=sd_model)
+            if op == "compile":
+                sd_model.vqgan.decode = function(sd_model.vqgan.decode, op="vqgan_decode", sd_model=sd_model)
+                sd_model.vqgan.encode = function(sd_model.vqgan.encode, op="vqgan_encode", sd_model=sd_model)
+            else:
+                sd_model.vqgan = function(sd_model.vqgan, op="vqgan", sd_model=sd_model)
             if hasattr(sd_model, 'decoder_pipe') and hasattr(sd_model.decoder_pipe, 'vqgan'):
-                sd_model.decoder_pipe.vqgan = sd_model.vqgan
+                if op == "compile":
+                    sd_model.decoder_pipe.vqgan.decode = function(sd_model.decoder_pipe.vqgan.decode, op="vqgan_decode", sd_model=sd_model)
+                    sd_model.decoder_pipe.vqgan.encode = function(sd_model.decoder_pipe.vqgan.encode, op="vqgan_encode", sd_model=sd_model)
+                else:
+                    sd_model.decoder_pipe.vqgan = sd_model.vqgan
         if hasattr(sd_model, 'image_encoder') and hasattr(sd_model.image_encoder, 'config'):
             sd_model.image_encoder = function(sd_model.image_encoder, op="image_encoder", sd_model=sd_model)
 
@@ -165,7 +182,6 @@ def nncf_compress_weights(sd_model):
         t0 = time.time()
         shared.log.info(f"NNCF Compress Weights: {shared.opts.nncf_compress_weights}")
         global quant_last_model_name, quant_last_model_device # pylint: disable=global-statement
-        from installer import install
         install('nncf==2.7.0', quiet=True)
 
         sd_model = apply_compile_to_model(sd_model, nncf_compress_model, shared.opts.nncf_compress_weights, op="nncf")
@@ -233,7 +249,6 @@ def optimum_quanto_weights(sd_model):
         t0 = time.time()
         shared.log.info(f"Optimum Quanto Weights: {shared.opts.optimum_quanto_weights}")
         global quant_last_model_name, quant_last_model_device # pylint: disable=global-statement
-        from installer import install
         install('optimum-quanto', quiet=True)
         from optimum import quanto # pylint: disable=no-name-in-module
         quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs)
@@ -291,6 +306,7 @@ def optimize_openvino(sd_model):
         torch._dynamo.eval_frame.check_if_dynamo_supported = lambda: True # pylint: disable=protected-access
         if shared.compiled_model_state is not None:
             shared.compiled_model_state.compiled_cache.clear()
+            shared.compiled_model_state.req_cache.clear()
             shared.compiled_model_state.partitioned_modules.clear()
         shared.compiled_model_state = CompiledModelState()
         shared.compiled_model_state.is_compiled = True
@@ -383,7 +399,7 @@ def compile_torch(sd_model):
         shared.log.debug(f"Model compile available backends: {torch._dynamo.list_backends()}") # pylint: disable=protected-access
 
         def torch_compile_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
-            if model.device.type != "meta":
+            if hasattr(model, "device") and model.device.type != "meta":
                 return_device = model.device
                 model = torch.compile(model.to(devices.device),
                     mode=shared.opts.cuda_compile_mode,
@@ -423,7 +439,7 @@ def torch_compile_model(model, op=None, sd_model=None): # pylint: disable=unused
         except Exception as e:
             shared.log.error(f"Torch inductor config error: {e}")
 
-        sd_model = apply_compile_to_model(sd_model, torch_compile_model, shared.opts.cuda_compile, op="compile")
+        sd_model = apply_compile_to_model(sd_model, function=torch_compile_model, options=shared.opts.cuda_compile, op="compile")
 
         setup_logging() # compile messes with logging so reset is needed
         if shared.opts.cuda_compile_precompile:
@@ -464,7 +480,7 @@ def compile_deepcache(sd_model):
 
 
 def compile_diffusers(sd_model):
-    if not shared.opts.cuda_compile:
+    if 'Model' not in shared.opts.cuda_compile:
         return sd_model
     if shared.opts.cuda_compile_backend == 'none':
         shared.log.warning('Model compile enabled but no backend specified')
@@ -484,11 +500,14 @@ def compile_diffusers(sd_model):
 
 def dynamic_quantization(sd_model):
     try:
-        from torchao.quantization import quant_api
+        install('torchao', quiet=True)
+        from torchao.quantization import autoquant
     except Exception as e:
         shared.log.error(f"Model dynamic quantization not supported: {e}")
         return sd_model
 
+    """
+    from torchao.quantization import quant_api
     def dynamic_quant_filter_fn(mod, *args): # pylint: disable=unused-argument
         return (isinstance(mod, torch.nn.Linear) and mod.in_features > 16 and (mod.in_features, mod.out_features)
                 not in [(1280, 640), (1920, 1280), (1920, 640), (2048, 1280), (2048, 2560), (2560, 1280), (256, 128), (2816, 1280), (320, 640), (512, 1536), (512, 256), (512, 512), (640, 1280), (640, 1920), (640, 320), (640, 5120), (640, 640), (960, 320), (960, 640)])
@@ -496,19 +515,28 @@ def dynamic_quant_filter_fn(mod, *args): # pylint: disable=unused-argument
     def conv_filter_fn(mod, *args): # pylint: disable=unused-argument
         return (isinstance(mod, torch.nn.Conv2d) and mod.kernel_size == (1, 1) and 128 in [mod.in_channels, mod.out_channels])
 
+    quant_api.swap_conv2d_1x1_to_linear(sd_model.unet, conv_filter_fn)
+    quant_api.swap_conv2d_1x1_to_linear(sd_model.vae, conv_filter_fn)
+    quant_api.apply_dynamic_quant(sd_model.unet, dynamic_quant_filter_fn)
+    quant_api.apply_dynamic_quant(sd_model.vae, dynamic_quant_filter_fn)
+    """
+
     shared.log.info(f"Model dynamic quantization: pipeline={sd_model.__class__.__name__}")
     try:
-        quant_api.swap_conv2d_1x1_to_linear(sd_model.unet, conv_filter_fn)
-        quant_api.swap_conv2d_1x1_to_linear(sd_model.vae, conv_filter_fn)
-        quant_api.apply_dynamic_quant(sd_model.unet, dynamic_quant_filter_fn)
-        quant_api.apply_dynamic_quant(sd_model.vae, dynamic_quant_filter_fn)
+        if shared.sd_model_type == 'sd' or shared.sd_model_type == 'sdxl':
+            sd_model.unet = sd_model.unet.to(devices.device)
+            sd_model.unet = autoquant(sd_model.unet, error_on_unseen=False)
+        elif shared.sd_model_type == 'f1':
+            sd_model.transformer = autoquant(sd_model.transformer, error_on_unseen=False)
+        else:
+            shared.log.error(f"Model dynamic quantization not supported: {shared.sd_model_type}")
     except Exception as e:
-        shared.log.error(f"Model dynamic quantization error: {e}")
+        shared.log.error(f"Model dynamic quantization: {e}")
     return sd_model
 
 
 def openvino_recompile_model(p, hires=False, refiner=False): # recompile if a parameter changes
-    if shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none':
+    if 'Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend != 'none':
         if shared.opts.cuda_compile_backend == "openvino_fx":
             compile_height = p.height if not hires and hasattr(p, 'height') else p.hr_upscale_to_y
             compile_width = p.width if not hires and hasattr(p, 'width') else p.hr_upscale_to_x
@@ -531,7 +559,7 @@ def openvino_recompile_model(p, hires=False, refiner=False): # recompile if a pa
 
 
 def openvino_post_compile(op="base"): # delete unet after OpenVINO compile
-    if shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx":
+    if 'Model' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx":
         if shared.compiled_model_state.first_pass and op == "base":
             shared.compiled_model_state.first_pass = False
             if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "unet"):
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 29034d63f..b7c90e603 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -69,9 +69,10 @@ def create_sampler(name, model):
         return sampler
     elif shared.native:
         sampler = config.constructor(model)
-        if shared.sd_model_type == 'f1':
+        if 'Flux' in model.__class__.__name__:
             if 'base_image_seq_len' not in sampler.sampler.config or 'max_image_seq_len' not in sampler.sampler.config or 'base_shift' not in sampler.sampler.config or 'max_shift' not in sampler.sampler.config:
-                shared.log.warning('FLUX sampler: attempting to use a non compatible scheduler')
+                shared.log.warning(f'FLUX: sampler="{name}" unsupported')
+                # sampler.sampler.register_to_config(base_image_seq_len=256, max_image_seq_len=4096, base_shift=0.5, max_shift=1.15)
                 return None
         if not hasattr(model, 'scheduler_config'):
             model.scheduler_config = sampler.sampler.config.copy()
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index b49e981f1..1d4db96cb 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -61,7 +61,7 @@ def single_sample_to_image(sample, approximation=None):
         if approximation == 2: # TAESD
             x_sample = sd_vae_taesd.decode(sample)
             x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range
-        elif sd_cascade and not approximation == 3:
+        elif sd_cascade and approximation != 3:
             x_sample = sd_vae_stablecascade.decode(sample)
         elif approximation == 0: # Simple
             x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 24f3d033e..b13f91ec5 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -5,6 +5,7 @@
 from modules import shared
 from modules import sd_samplers_common
 from modules.tcd import TCDScheduler
+from modules.dcsolver import DCSolverMultistepScheduler #https://github.com/wl-zhao/DC-Solver
 
 
 debug = shared.log.trace if os.environ.get('SD_SAMPLER_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -62,14 +63,15 @@
     'LMSD': { 'use_karras_sigmas': False, 'timestep_spacing': 'linspace', 'steps_offset': 0 },
     'PNDM': { 'skip_prk_steps': False, 'set_alpha_to_one': False, 'steps_offset': 0, 'timestep_spacing': 'linspace' },
     'SA Solver': {'predictor_order': 2, 'corrector_order': 2, 'thresholding': False, 'lower_order_final': True, 'use_karras_sigmas': False, 'timestep_spacing': 'linspace'},
+    'DC Solver': { 'beta_start': 0.0001, 'beta_end': 0.02, 'solver_order': 2, 'prediction_type': "epsilon", 'thresholding': False, 'solver_type': 'bh2', 'lower_order_final': True, 'dc_order': 2, 'disable_corrector': [0] },
     'LCM': { 'beta_start': 0.00085, 'beta_end': 0.012, 'beta_schedule': "scaled_linear", 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'thresholding': False, 'timestep_spacing': 'linspace' },
     'TCD': { 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'beta_schedule': 'scaled_linear' },
     'Euler SGM': { 'timestep_spacing': "trailing", 'prediction_type': "sample" },
     'Euler EDM': { },
     'DPM++ 2M EDM': { 'solver_order': 2, 'solver_type': 'midpoint', 'final_sigmas_type': 'zero', 'algorithm_type': 'dpmsolver++' },
     'CMSI': { }, #{ 'sigma_min':  0.002, 'sigma_max': 80.0, 'sigma_data': 0.5, 's_noise': 1.0, 'rho': 7.0, 'clip_denoised': True },
-    'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, },
-    'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, },
+    'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False },
+    'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 },
     'IPNDM': { },
 }
 
@@ -79,6 +81,7 @@
     sd_samplers_common.SamplerData('UniPC', lambda model: DiffusionSampler('UniPC', UniPCMultistepScheduler, model), [], {}),
     sd_samplers_common.SamplerData('DEIS', lambda model: DiffusionSampler('DEIS', DEISMultistepScheduler, model), [], {}),
     sd_samplers_common.SamplerData('SA Solver', lambda model: DiffusionSampler('SA Solver', SASolverScheduler, model), [], {}),
+    sd_samplers_common.SamplerData('DC Solver', lambda model: DiffusionSampler('DC Solver', DCSolverMultistepScheduler, model), [], {}),
     sd_samplers_common.SamplerData('DDIM', lambda model: DiffusionSampler('DDIM', DDIMScheduler, model), [], {}),
     sd_samplers_common.SamplerData('Heun', lambda model: DiffusionSampler('Heun', HeunDiscreteScheduler, model), [], {}),
     sd_samplers_common.SamplerData('Euler', lambda model: DiffusionSampler('Euler', EulerDiscreteScheduler, model), [], {}),
@@ -157,8 +160,10 @@ def __init__(self, name, constructor, model, **kwargs):
             self.config['beta_start'] = shared.opts.schedulers_beta_start
         if 'beta_end' in self.config and shared.opts.schedulers_beta_end > 0:
             self.config['beta_end'] = shared.opts.schedulers_beta_end
-        if 'shift' in self.config and shared.opts.schedulers_shift != 1:
+        if 'shift' in self.config:
             self.config['shift'] = shared.opts.schedulers_shift
+        if 'use_dynamic_shifting' in self.config:
+            self.config['use_dynamic_shifting'] = shared.opts.schedulers_dynamic_shift
         if 'rescale_betas_zero_snr' in self.config:
             self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas
         if 'timestep_spacing' in self.config and shared.opts.schedulers_timestep_spacing != 'default' and shared.opts.schedulers_timestep_spacing is not None:
@@ -192,5 +197,9 @@ def __init__(self, name, constructor, model, **kwargs):
         debug(f'Sampler: signature={possible}')
         # shared.log.debug(f'Sampler: sampler="{name}" config={self.config}')
         self.sampler = constructor(**self.config)
+        if name == 'DC Solver':
+            if not hasattr(self.sampler, 'dc_ratios'):
+                pass
+                # self.sampler.dc_ratios = self.sampler.cascade_polynomial_regression(test_CFG=6.0, test_NFE=10, cpr_path='tmp/sd2.1.npy')
         # shared.log.debug(f'Sampler: class="{self.sampler.__class__.__name__}" config={self.sampler.config}')
         self.sampler.name = name
diff --git a/modules/sd_unet.py b/modules/sd_unet.py
index c948c223b..16d942a22 100644
--- a/modules/sd_unet.py
+++ b/modules/sd_unet.py
@@ -1,8 +1,9 @@
 import os
-from modules import shared, devices, files_cache
+from modules import shared, devices, files_cache, sd_models
 
 
 unet_dict = {}
+debug = os.environ.get('SD_LOAD_DEBUG', None) is not None
 
 
 def load_unet(model):
@@ -28,15 +29,13 @@ def load_unet(model):
                 model.prior_pipe.text_encoder = None # Prevent OOM
                 model.prior_pipe.text_encoder = prior_text_encoder.to(devices.device, dtype=devices.dtype)
         if "Flux" in model.__class__.__name__:
-            shared.log.info(f'Loading UNet: name="{shared.opts.sd_unet}" file="{unet_dict[shared.opts.sd_unet]}" offload={shared.opts.diffusers_offload_mode}')
             from modules.model_flux import load_transformer
             transformer = load_transformer(unet_dict[shared.opts.sd_unet])
             if transformer is not None:
                 model.transformer = None
                 if shared.opts.diffusers_offload_mode == 'none':
-                    model.transformer = transformer.to(devices.device, devices.dtype)
-                else:
-                    model.transformer = transformer
+                    sd_models.move_model(transformer, devices.device)
+                model.transformer = transformer
                 from modules.sd_models import set_diffuser_offload
                 set_diffuser_offload(model, 'model')
         else:
@@ -52,6 +51,9 @@ def load_unet(model):
             model.unet = unet.to(devices.device, devices.dtype_unet)
     except Exception as e:
         shared.log.error(f'Failed to load UNet model: {e}')
+        if debug:
+            from modules import errors
+            errors.display(e, 'UNet load:')
         return
     devices.torch_gc()
 
diff --git a/modules/sd_vae.py b/modules/sd_vae.py
index 0b5993256..bfe9807ad 100644
--- a/modules/sd_vae.py
+++ b/modules/sd_vae.py
@@ -192,7 +192,6 @@ def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"):
     if not os.path.exists(vae_file):
         shared.log.error(f'VAE not found: model{vae_file}')
         return None
-    shared.log.info(f"Loading VAE: model={vae_file} source={vae_source}")
     diffusers_load_config = {
         "low_cpu_mem_usage": False,
         "torch_dtype": devices.dtype_vae,
@@ -207,14 +206,14 @@ def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"):
         diffusers_load_config['variant'] = shared.opts.diffusers_vae_load_variant
     if shared.opts.diffusers_vae_upcast != 'default':
         diffusers_load_config['force_upcast'] = True if shared.opts.diffusers_vae_upcast == 'true' else False
-    shared.log.debug(f'Diffusers VAE load config: {diffusers_load_config}')
+    _pipeline, model_type = sd_models.detect_pipeline(model_file, 'vae')
+    vae_config = sd_models.get_load_config(model_file, model_type, config_type='json')
+    if vae_config is not None:
+        diffusers_load_config['config'] = os.path.join(vae_config, 'vae')
+    shared.log.info(f'Load VAE: model="{vae_file}" source={vae_source} config={diffusers_load_config}')
     try:
         import diffusers
         if os.path.isfile(vae_file):
-            _pipeline, model_type = sd_models.detect_pipeline(model_file, 'vae')
-            diffusers_load_config = {
-                "config": os.path.join(sd_models.get_load_config(model_file, model_type, config_type='json'), 'vae'),
-            }
             if os.path.getsize(vae_file) > 1310944880: # 1.3GB
                 vae = diffusers.ConsistencyDecoderVAE.from_pretrained('openai/consistency-decoder', **diffusers_load_config) # consistency decoder does not have from single file, so we'll just download it once more
             elif os.path.getsize(vae_file) < 10000000: # 10MB
@@ -233,6 +232,8 @@ def load_vae_diffusers(model_file, vae_file=None, vae_source="unknown-source"):
         global loaded_vae_file # pylint: disable=global-statement
         loaded_vae_file = os.path.basename(vae_file)
         # shared.log.debug(f'Diffusers VAE config: {vae.config}')
+        if shared.opts.diffusers_offload_mode == 'none':
+            sd_models.move_model(vae, devices.device)
         return vae
     except Exception as e:
         shared.log.error(f"Loading VAE failed: model={vae_file} {e}")
diff --git a/modules/sd_vae_natten.py b/modules/sd_vae_natten.py
new file mode 100644
index 000000000..478e9b654
--- /dev/null
+++ b/modules/sd_vae_natten.py
@@ -0,0 +1,90 @@
+# copied from https://github.com/Birch-san/sdxl-play/blob/main/src/attn/natten_attn_processor.py
+
+import os
+from typing import Optional
+from diffusers.models.attention import Attention
+import torch
+from torch.nn import Linear
+from einops import rearrange
+from installer import install, log
+
+
+def init():
+    try:
+        os.environ['NATTEN_CUDA_ARCH'] = '8.0;8.6'
+        install('natten')
+        import natten
+        return natten
+    except Exception as e:
+        log.error(f'Init natten: {e}')
+        return None
+
+
+def fuse_qkv(attn: Attention) -> None:
+    has_bias = attn.to_q.bias is not None
+    qkv = Linear(in_features=attn.to_q.in_features, out_features=attn.to_q.out_features*3, bias=has_bias, dtype=attn.to_q.weight.dtype, device=attn.to_q.weight.device)
+    qkv.weight.data.copy_(torch.cat([attn.to_q.weight.data * attn.scale, attn.to_k.weight.data, attn.to_v.weight.data]))
+    if has_bias:
+        qkv.bias.data.copy_(torch.cat([attn.to_q.bias.data * attn.scale, attn.to_k.bias.data, attn.to_v.bias.data]))
+    setattr(attn, 'qkv', qkv) # noqa: B010
+    del attn.to_q, attn.to_k, attn.to_v
+
+
+def fuse_vae_qkv(vae) -> None:
+    for attn in [*vae.encoder.mid_block.attentions, *vae.decoder.mid_block.attentions]:
+        fuse_qkv(attn)
+
+
+class NattenAttnProcessor:
+    kernel_size: int
+
+    def __init__(self, kernel_size: int):
+        self.kernel_size = kernel_size
+
+    def __call__(
+        self,
+        attn: Attention,
+        hidden_states: torch.FloatTensor,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        attention_mask: Optional[torch.BoolTensor] = None,
+        temb: Optional[torch.FloatTensor] = None,
+    ):
+        import natten
+        assert hasattr(attn, 'qkv'), "Did not find property qkv on attn. Expected you to fuse its q_proj, k_proj, v_proj weights and biases beforehand, and multiply attn.scale into the q weights and bias."
+        residual = hidden_states
+        if attn.spatial_norm is not None:
+            hidden_states = attn.spatial_norm(hidden_states, temb)
+        # assumes MHA (as opposed to GQA)
+        inner_dim: int = attn.qkv.out_features // 3
+        if attention_mask is not None:
+            raise ValueError("No mask customization for neighbourhood attention; the mask is already complicated enough as it is")
+        if encoder_hidden_states is not None:
+            raise ValueError("NATTEN cannot be used for cross-attention. I think.")
+        if attn.group_norm is not None:
+            hidden_states = attn.group_norm(hidden_states)
+            hidden_states = rearrange(hidden_states, '... c h w -> ... h w c')
+        qkv = attn.qkv(hidden_states)
+        # assumes MHA (as opposed to GQA)
+        q, k, v = rearrange(qkv, "n h w (t nh e) -> t n nh h w e", t=3, e=inner_dim)
+        qk = natten.functional.na2d_qk(q, k, self.kernel_size, 1) # natten2dqk
+        a = torch.softmax(qk, dim=-1)
+        hidden_states = natten.functional.na2d_av(a, v, self.kernel_size, 1) # natten2dav
+        hidden_states = rearrange(hidden_states, "n nh h w e -> n h w (nh e)")
+        linear_proj, dropout = attn.to_out
+        hidden_states = linear_proj(hidden_states)
+        hidden_states = dropout(hidden_states)
+        hidden_states = rearrange(hidden_states, '... h w c -> ... c h w')
+        if attn.residual_connection:
+            hidden_states = hidden_states + residual
+        return hidden_states
+
+
+def enable_natten(pipe):
+    if not hasattr(pipe, 'vae'):
+        return
+    natten = init()
+    kernel_size = 17
+    if natten is not None:
+        log.info(f'VAE natten: version={natten.__version__} kernel={kernel_size}')
+        fuse_vae_qkv(pipe.vae)
+        pipe.vae.set_attn_processor(NattenAttnProcessor(kernel_size=kernel_size))
diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py
index 3400f05f9..5cd7fab7c 100644
--- a/modules/sd_vae_taesd.py
+++ b/modules/sd_vae_taesd.py
@@ -55,6 +55,8 @@ def Decoder(latent_channels=4):
         return nn.Sequential(
             Clamp(), conv(latent_channels, 64), nn.ReLU(),
             Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
+            Block(64, 64), Block(64, 64), Block(64, 64), nn.Identity(), conv(64, 64, bias=False),
+            Block(64, 64), Block(64, 64), Block(64, 64), nn.Identity(), conv(64, 64, bias=False),
             Block(64, 64), conv(64, 3),
         )
     elif shared.opts.live_preview_taesd_layers == 2:
@@ -62,6 +64,7 @@ def Decoder(latent_channels=4):
             Clamp(), conv(latent_channels, 64), nn.ReLU(),
             Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
             Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
+            Block(64, 64), Block(64, 64), Block(64, 64), nn.Identity(), conv(64, 64, bias=False),
             Block(64, 64), conv(64, 3),
         )
     else:
@@ -86,9 +89,9 @@ def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder
         self.encoder = Encoder(latent_channels)
         self.decoder = Decoder(latent_channels)
         if encoder_path is not None:
-            self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu"))
+            self.encoder.load_state_dict(torch.load(encoder_path, map_location="cpu"), strict=False)
         if decoder_path is not None:
-            self.decoder.load_state_dict(torch.load(decoder_path, map_location="cpu"))
+            self.decoder.load_state_dict(torch.load(decoder_path, map_location="cpu"), strict=False)
 
     def guess_latent_channels(self, decoder_path, encoder_path):
         """guess latent channel count based on encoder filename"""
diff --git a/modules/shared.py b/modules/shared.py
index e1aafca8a..e675ed39b 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -76,7 +76,7 @@
     "outdir_save",
     "outdir_init_images"
 }
-resize_modes = ["None", "Fixed", "Crop", "Fill", "Outpaint"]
+resize_modes = ["None", "Fixed", "Crop", "Fill", "Outpaint", "Context aware"]
 compatibility_opts = ['clip_skip', 'uni_pc_lower_order_final', 'uni_pc_order']
 console = Console(log_time=True, log_time_format='%H:%M:%S-%f')
 dir_timestamps = {}
@@ -406,7 +406,8 @@ def temp_disable_extensions():
     "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
     "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
     "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list),
-    "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 QINT8', 'T5 FP16']}),
+    # "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 QINT8', 'T5 FP16']}),
+    "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": shared_items.sd_t5_items()}, refresh=shared_items.refresh_t5_list),
     "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
     "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
     "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results"),
@@ -418,6 +419,7 @@ def temp_disable_extensions():
     "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }),
     "sd_vae_checkpoint_cache": OptionInfo(0, "Cached VAEs", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": False}),
     "sd_disable_ckpt": OptionInfo(False, "Disallow models in ckpt format", gr.Checkbox, {"visible": False}),
+    "diffusers_version": OptionInfo("", "Diffusers version", gr.Textbox, {"visible": False}),
 }))
 
 options_templates.update(options_section(('cuda', "Compute Settings"), {
@@ -575,6 +577,7 @@ def temp_disable_extensions():
     "hfcache_dir": OptionInfo(os.path.join(os.path.expanduser('~'), '.cache', 'huggingface', 'hub'), "Folder for Huggingface cache", folder=True),
     "vae_dir": OptionInfo(os.path.join(paths.models_path, 'VAE'), "Folder with VAE files", folder=True),
     "unet_dir": OptionInfo(os.path.join(paths.models_path, 'UNET'), "Folder with UNET files", folder=True),
+    "t5_dir": OptionInfo(os.path.join(paths.models_path, 'T5'), "Folder with T5 files", folder=True),
     "sd_lora": OptionInfo("", "Add LoRA to prompt", gr.Textbox, {"visible": False}),
     "lora_dir": OptionInfo(os.path.join(paths.models_path, 'Lora'), "Folder with LoRA network(s)", folder=True),
     "lyco_dir": OptionInfo(os.path.join(paths.models_path, 'LyCORIS'), "Folder with LyCORIS network(s)", gr.Text, {"visible": False}),
@@ -740,6 +743,7 @@ def temp_disable_extensions():
     'schedulers_timesteps': OptionInfo('', "Timesteps"),
     "schedulers_rescale_betas": OptionInfo(False, "Rescale betas with zero terminal SNR", gr.Checkbox),
     'schedulers_shift': OptionInfo(1, "Sampler shift", gr.Slider, {"minimum": 0.1, "maximum": 10, "step": 0.1}),
+    'schedulers_dynamic_shift': OptionInfo(True, "Sampler dynamic shift"),
 
     # managed from ui.py for backend original k-diffusion
     "schedulers_sep_kdiffusers": OptionInfo("<h2>K-Diffusion specific config</h2>", "", gr.HTML),
@@ -839,6 +843,7 @@ def temp_disable_extensions():
     "extra_networks_sidebar_width": OptionInfo(35, "UI sidebar width (%)", gr.Slider, {"minimum": 10, "maximum": 80, "step": 1}),
     "extra_networks_card_size": OptionInfo(160, "UI card size (px)", gr.Slider, {"minimum": 20, "maximum": 2000, "step": 1}),
     "extra_networks_card_square": OptionInfo(True, "UI disable variable aspect ratio"),
+    "extra_networks_fetch": OptionInfo(True, "UI fetch network info on mouse-over"),
     "extra_networks_card_fit": OptionInfo("cover", "UI image contain method", gr.Radio, {"choices": ["contain", "cover", "fill"], "visible": False}),
     "extra_networks_sep2": OptionInfo("<h2>Extra networks general</h2>", "", gr.HTML),
     "extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox),
diff --git a/modules/shared_items.py b/modules/shared_items.py
index 1b0077ef8..9f110f413 100644
--- a/modules/shared_items.py
+++ b/modules/shared_items.py
@@ -23,6 +23,17 @@ def refresh_unet_list():
     modules.sd_unet.refresh_unet_list()
 
 
+def sd_t5_items():
+    import modules.model_t5
+    predefined = ['None', 'T5 FP4', 'T5 FP8', 'T5 INT8', 'T5 QINT8', 'T5 FP16']
+    return predefined + list(modules.model_t5.t5_dict)
+
+
+def refresh_t5_list():
+    import modules.model_t5
+    modules.model_t5.refresh_t5_list()
+
+
 def list_crossattention(diffusers=False):
     if diffusers:
         return [
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index ed8acceca..7ad0166e7 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -23,12 +23,12 @@ def open_embeddings(filename):
     """
     Load Embedding files from drive. Image embeddings not currently supported.
     """
+    embeddings = []
+    skipped = []
     if filename is None:
-        return
+        return embeddings, skipped
     filenames = list(filename)
     exts = [".SAFETENSORS", '.BIN', '.PT']
-    embeddings = []
-    skipped = []
     for _filename in filenames:
         # debug(f'Embedding check: {filename}')
         fullname = _filename
@@ -274,15 +274,15 @@ def load_diffusers_embedding(self, filename: Union[str, List[str]] = None, data:
         """
         overwrite = bool(data)
         if not shared.sd_loaded:
-            return 0
+            return
         embeddings, skipped = open_embeddings(filename) or convert_bundled(data)
         for skip in skipped:
             self.skipped_embeddings[skip.name] = skipped
         if not embeddings:
-            return 0
+            return
         text_encoders, tokenizers, hiddensizes = get_text_encoders()
         if not all([text_encoders, tokenizers, hiddensizes]):
-            return 0
+            return
         for embedding in embeddings:
             try:
                 embedding.vector_sizes = [v.shape[-1] for v in embedding.vec]
@@ -320,20 +320,20 @@ def load_from_file(self, path, filename):
 
         if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
             if '.preview' in filename.lower():
-                return None
+                return
             embed_image = Image.open(path)
             if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
                 data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
             else:
                 data = extract_image_data_embed(embed_image)
                 if not data: # if data is None, means this is not an embeding, just a preview image
-                    return None
+                    return
         elif ext in ['.BIN', '.PT']:
             data = torch.load(path, map_location="cpu")
         elif ext in ['.SAFETENSORS']:
             data = safetensors.torch.load_file(path, device="cpu")
         else:
-            return None
+            return
 
         # textual inversion embeddings
         if 'string_to_param' in data:
@@ -345,7 +345,7 @@ def load_from_file(self, path, filename):
         elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
             if len(data.keys()) != 1:
                 self.skipped_embeddings[name] = Embedding(None, name=name, filename=path)
-                return None
+                return
             emb = next(iter(data.values()))
             if len(emb.shape) == 1:
                 emb = emb.unsqueeze(0)
@@ -353,7 +353,7 @@ def load_from_file(self, path, filename):
             raise RuntimeError(f"Couldn't identify {filename} as textual inversion embedding")
 
         if shared.native:
-            return emb
+            return
 
         vec = emb.detach().to(devices.device, dtype=torch.float32)
         # name = data.get('name', name)
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 76b0a7c45..e438c2e47 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -18,13 +18,13 @@ def txt2img(id_task,
             seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w,
             height, width,
             enable_hr, denoising_strength,
-            hr_scale, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y,
+            hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y,
             refiner_steps, refiner_start, refiner_prompt, refiner_negative,
             hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
             override_settings_texts,
             *args):
 
-    debug(f'txt2img: id_task={id_task}|prompt={prompt}|negative={negative_prompt}|styles={prompt_styles}|steps={steps}|sampler_index={sampler_index}|hr_sampler_index={hr_sampler_index}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|batch_count={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|clip_skip={clip_skip}|seed={seed}|subseed={subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|height={height}|width={width}|enable_hr={enable_hr}|denoising_strength={denoising_strength}|hr_scale={hr_scale}|hr_upscaler={hr_upscaler}|hr_force={hr_force}|hr_second_pass_steps={hr_second_pass_steps}|hr_resize_x={hr_resize_x}|hr_resize_y={hr_resize_y}|image_cfg_scale={image_cfg_scale}|diffusers_guidance_rescale={diffusers_guidance_rescale}|refiner_steps={refiner_steps}|refiner_start={refiner_start}|refiner_prompt={refiner_prompt}|refiner_negative={refiner_negative}|override_settings={override_settings_texts}')
+    debug(f'txt2img: id_task={id_task}|prompt={prompt}|negative={negative_prompt}|styles={prompt_styles}|steps={steps}|sampler_index={sampler_index}|hr_sampler_index={hr_sampler_index}|full_quality={full_quality}|restore_faces={restore_faces}|tiling={tiling}|hidiffusion={hidiffusion}|batch_count={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|clip_skip={clip_skip}|seed={seed}|subseed={subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|height={height}|width={width}|enable_hr={enable_hr}|denoising_strength={denoising_strength}|hr_resize_mode={hr_resize_mode}|hr_resize_context={hr_resize_context}|hr_scale={hr_scale}|hr_upscaler={hr_upscaler}|hr_force={hr_force}|hr_second_pass_steps={hr_second_pass_steps}|hr_resize_x={hr_resize_x}|hr_resize_y={hr_resize_y}|image_cfg_scale={image_cfg_scale}|diffusers_guidance_rescale={diffusers_guidance_rescale}|refiner_steps={refiner_steps}|refiner_start={refiner_start}|refiner_prompt={refiner_prompt}|refiner_negative={refiner_negative}|override_settings={override_settings_texts}')
 
     if shared.sd_model is None:
         shared.log.warning('Model not loaded')
@@ -71,6 +71,8 @@ def txt2img(id_task,
         enable_hr=enable_hr,
         denoising_strength=denoising_strength,
         hr_scale=hr_scale,
+        hr_resize_mode=hr_resize_mode,
+        hr_resize_context=hr_resize_context,
         hr_upscaler=hr_upscaler,
         hr_force=hr_force,
         hr_second_pass_steps=hr_second_pass_steps,
@@ -89,6 +91,7 @@ def txt2img(id_task,
     processed = scripts.scripts_txt2img.run(p, *args)
     if processed is None:
         processed = processing.process_images(p)
+    processed = scripts.scripts_txt2img.after(p, processed, *args)
     p.close()
     if processed is None:
         return [], '', '', 'Error: processing failed'
diff --git a/modules/ui.py b/modules/ui.py
index 70128e0f9..41ef6c6c1 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -243,9 +243,12 @@ def run_settings(*args):
         if cmd_opts.use_directml:
             directml_override_opts()
         if cmd_opts.use_openvino:
-            if not shared.opts.cuda_compile:
-                shared.log.warning("OpenVINO: Enabling Torch Compile")
-                shared.opts.cuda_compile = True
+            if "Model" not in shared.opts.cuda_compile:
+                shared.log.warning("OpenVINO: Enabling Torch Compile Model")
+                shared.opts.cuda_compile.append("Model")
+            if "VAE" not in shared.opts.cuda_compile:
+                shared.log.warning("OpenVINO: Enabling Torch Compile VAE")
+                shared.opts.cuda_compile.append("VAE")
             if shared.opts.cuda_compile_backend != "openvino_fx":
                 shared.log.warning("OpenVINO: Setting Torch Compiler backend to OpenVINO FX")
                 shared.opts.cuda_compile_backend = "openvino_fx"
diff --git a/modules/ui_common.py b/modules/ui_common.py
index f83e8594f..e168e6981 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -42,7 +42,7 @@ def infotext_to_html(text):
     negative = res.get('Negative prompt', '')
     res.pop('Prompt', None)
     res.pop('Negative prompt', None)
-    params = [f'{k}: {v}' for k, v in res.items() if v is not None]
+    params = [f'{k}: {v}' for k, v in res.items() if v is not None and 'size-' not in k.lower()]
     params = '| '.join(params) if len(params) > 0 else ''
     code = ''
     if len(prompt) > 0:
@@ -146,9 +146,13 @@ def __init__(self, d=None):
             destination = os.path.join(destination, dirname)
             destination = namegen.sanitize(destination)
             os.makedirs(destination, exist_ok = True)
-            shutil.copy(fullfn, destination)
-            shared.log.info(f'Copying image: file="{fullfn}" folder="{destination}"')
             tgt_filename = os.path.join(destination, os.path.basename(fullfn))
+            if not os.path.exists(tgt_filename):
+                try:
+                    shutil.copy(fullfn, destination)
+                    shared.log.info(f'Copying image: file="{fullfn}" folder="{destination}"')
+                except Exception as e:
+                    shared.log.error(f'Copying image: {fullfn} {e}')
             if shared.opts.save_txt:
                 try:
                     from PIL import Image
@@ -171,7 +175,7 @@ def __init__(self, d=None):
                 geninfo, _ = images.read_info_from_image(image)
                 items = infotext.parse(geninfo)
                 p = PObject(items)
-            fullfn, txt_fullfn = images.save_image(image, shared.opts.outdir_save, "", seed=p.all_seeds[i], prompt=p.all_prompts[i], info=info, extension=shared.opts.samples_format, grid=is_grid, p=p)
+            fullfn, txt_fullfn, _exif = images.save_image(image, shared.opts.outdir_save, "", seed=p.all_seeds[i], prompt=p.all_prompts[i], info=info, extension=shared.opts.samples_format, grid=is_grid, p=p)
             if fullfn is None:
                 continue
             filename = os.path.relpath(fullfn, shared.opts.outdir_save)
diff --git a/modules/ui_control.py b/modules/ui_control.py
index c4aa25bd3..8904c0f5a 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -97,11 +97,11 @@ def create_ui(_blocks: gr.Blocks=None):
                 with gr.Accordion(open=False, label="Size", elem_id="control_size", elem_classes=["small-accordion"]):
                     with gr.Tabs():
                         with gr.Tab('Before'):
-                            resize_mode_before, resize_name_before, width_before, height_before, scale_by_before, selected_scale_tab_before = ui_sections.create_resize_inputs('control_before', [], accordion=False, latent=True)
+                            resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before = ui_sections.create_resize_inputs('control_before', [], accordion=False, latent=True, prefix='before')
                         with gr.Tab('After'):
-                            resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after = ui_sections.create_resize_inputs('control_after', [], accordion=False, latent=False)
+                            resize_mode_after, resize_name_after, resize_context_after, width_after, height_after, scale_by_after, selected_scale_tab_after = ui_sections.create_resize_inputs('control_after', [], accordion=False, latent=False, prefix='after')
                         with gr.Tab('Mask'):
-                            resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask = ui_sections.create_resize_inputs('control_mask', [], accordion=False, latent=False)
+                            resize_mode_mask, resize_name_mask, resize_context_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask = ui_sections.create_resize_inputs('control_mask', [], accordion=False, latent=False, prefix='mask')
 
                 with gr.Accordion(open=False, label="Sampler", elem_id="control_sampler", elem_classes=["small-accordion"]):
                     steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "control")
@@ -128,7 +128,7 @@ def create_ui(_blocks: gr.Blocks=None):
                         video_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False, elem_id="control_video_interpolate")
                     video_type.change(fn=helpers.video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate])
 
-                enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('control')
+                enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('control')
 
             with gr.Row():
                 override_settings = ui_common.create_override_inputs('control')
@@ -136,7 +136,7 @@ def create_ui(_blocks: gr.Blocks=None):
             with gr.Row(variant='compact', elem_id="control_extra_networks", visible=False) as extra_networks_ui:
                 from modules import timer, ui_extra_networks
                 extra_networks_ui = ui_extra_networks.create_ui(extra_networks_ui, btn_extra, 'control', skip_indexing=shared.opts.extra_network_skip_indexing)
-                timer.startup.record('ui-en')
+                timer.startup.record('ui-networks')
 
             with gr.Row(elem_id='control-inputs'):
                 with gr.Column(scale=9, elem_id='control-input-column', visible=True) as _column_input:
@@ -201,9 +201,10 @@ def create_ui(_blocks: gr.Blocks=None):
                                     process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None')
                                     model_id = gr.Dropdown(label="ControlNet", choices=controlnet.list_models(), value='None')
                                     ui_common.create_refresh_button(model_id, controlnet.list_models, lambda: {"choices": controlnet.list_models(refresh=True)}, f'refresh_controlnet_models_{i}')
-                                    model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0-i/10)
+                                    model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0)
                                     control_start = gr.Slider(label="Start", minimum=0.0, maximum=1.0, step=0.05, value=0)
                                     control_end = gr.Slider(label="End", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
+                                    control_mode = gr.Dropdown(label="CN Mode", choices=['', 'Canny', 'Tile', 'Depth', 'Blur', 'Pose', 'Gray', 'LQ'], value=0, type='index', visible=False)
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
                                     image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
@@ -226,6 +227,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                 image_preview = image_preview,
                                 control_start = control_start,
                                 control_end = control_end,
+                                control_mode = control_mode,
                                 extra_controls = extra_controls,
                                 )
                             )
@@ -249,7 +251,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                     process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None')
                                     model_id = gr.Dropdown(label="Adapter", choices=t2iadapter.list_models(), value='None')
                                     ui_common.create_refresh_button(model_id, t2iadapter.list_models, lambda: {"choices": t2iadapter.list_models(refresh=True)}, f'refresh_adapter_models_{i}')
-                                    model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0-i/10)
+                                    model_strength = gr.Slider(label="T2I Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0)
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
                                     image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
@@ -293,7 +295,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                     process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None')
                                     model_id = gr.Dropdown(label="ControlNet-XS", choices=xs.list_models(), value='None')
                                     ui_common.create_refresh_button(model_id, xs.list_models, lambda: {"choices": xs.list_models(refresh=True)}, f'refresh_xs_models_{i}')
-                                    model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0-i/10)
+                                    model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0)
                                     control_start = gr.Slider(label="Start", minimum=0.0, maximum=1.0, step=0.05, value=0)
                                     control_end = gr.Slider(label="End", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
@@ -340,7 +342,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                     process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None')
                                     model_id = gr.Dropdown(label="Model", choices=lite.list_models(), value='None')
                                     ui_common.create_refresh_button(model_id, lite.list_models, lambda: {"choices": lite.list_models(refresh=True)}, f'refresh_lite_models_{i}')
-                                    model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0-i/10)
+                                    model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0)
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
                                     image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
@@ -383,7 +385,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                 with gr.Row():
                                     enabled_cb = gr.Checkbox(enabled, label='', container=False, show_label=False)
                                     model_id = gr.Dropdown(label="Reference", choices=reference.list_models(), value='Reference', visible=False)
-                                    model_strength = gr.Slider(label="Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0, visible=False)
+                                    model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=1.0, step=0.01, value=1.0, visible=False)
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
                                     image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
@@ -432,21 +434,21 @@ def create_ui(_blocks: gr.Blocks=None):
                         with gr.Accordion('Leres Depth', open=True, elem_classes=['processor-settings']):
                             settings.append(gr.Checkbox(label="Boost", value=False))
                             settings.append(gr.Slider(label="Near threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0))
-                            settings.append(gr.Slider(label="Background threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0))
+                            settings.append(gr.Slider(label="Depth threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.0))
                         with gr.Accordion('MediaPipe Face', open=True, elem_classes=['processor-settings']):
                             settings.append(gr.Slider(label="Max faces", minimum=1, maximum=10, step=1, value=1))
-                            settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.5))
+                            settings.append(gr.Slider(label="Face confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.5))
                         with gr.Accordion('Canny', open=True, elem_classes=['processor-settings']):
                             settings.append(gr.Slider(label="Low threshold", minimum=0, maximum=1000, step=1, value=100))
                             settings.append(gr.Slider(label="High threshold", minimum=0, maximum=1000, step=1, value=200))
                         with gr.Accordion('DWPose', open=True, elem_classes=['processor-settings']):
-                            settings.append(gr.Radio(label="Model", choices=['Tiny', 'Medium', 'Large'], value='Tiny'))
-                            settings.append(gr.Slider(label="Min confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.3))
+                            settings.append(gr.Radio(label="Pose Model", choices=['Tiny', 'Medium', 'Large'], value='Tiny'))
+                            settings.append(gr.Slider(label="Pose confidence", minimum=0.0, maximum=1.0, step=0.01, value=0.3))
                         with gr.Accordion('SegmentAnything', open=True, elem_classes=['processor-settings']):
-                            settings.append(gr.Radio(label="Model", choices=['Base', 'Large'], value='Base'))
+                            settings.append(gr.Radio(label="Segment Model", choices=['Base', 'Large'], value='Base'))
                         with gr.Accordion('Edge', open=True, elem_classes=['processor-settings']):
                             settings.append(gr.Checkbox(label="Parameter free", value=True))
-                            settings.append(gr.Radio(label="Mode", choices=['edge', 'gradient'], value='edge'))
+                            settings.append(gr.Radio(label="Edge mode", choices=['edge', 'gradient'], value='edge'))
                         with gr.Accordion('Zoe Depth', open=True, elem_classes=['processor-settings']):
                             settings.append(gr.Checkbox(label="Gamma corrected", value=False))
                         with gr.Accordion('Marigold Depth', open=True, elem_classes=['processor-settings']):
@@ -454,7 +456,7 @@ def create_ui(_blocks: gr.Blocks=None):
                             settings.append(gr.Slider(label="Denoising steps", minimum=1, maximum=99, step=1, value=10))
                             settings.append(gr.Slider(label="Ensemble size", minimum=1, maximum=99, step=1, value=10))
                         with gr.Accordion('Depth Anything', open=True, elem_classes=['processor-settings']):
-                            settings.append(gr.Dropdown(label="Color map", choices=['none'] + masking.COLORMAP, value='inferno'))
+                            settings.append(gr.Dropdown(label="Depth map", choices=['none'] + masking.COLORMAP, value='inferno'))
                         for setting in settings:
                             setting.change(fn=processors.update_settings, inputs=settings, outputs=[])
 
@@ -502,11 +504,11 @@ def create_ui(_blocks: gr.Blocks=None):
                 seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w,
                 cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end, full_quality, restore_faces, tiling, hidiffusion,
                 hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
-                resize_mode_before, resize_name_before, width_before, height_before, scale_by_before, selected_scale_tab_before,
-                resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after,
-                resize_mode_mask, resize_name_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask,
+                resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before,
+                resize_mode_after, resize_name_after, resize_context_after, width_after, height_after, scale_by_after, selected_scale_tab_after,
+                resize_mode_mask, resize_name_mask, resize_context_mask, width_mask, height_mask, scale_by_mask, selected_scale_tab_mask,
                 denoising_strength, batch_count, batch_size,
-                enable_hr, hr_sampler_index, hr_denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps,
+                enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps,
                 refiner_start, refiner_prompt, refiner_negative,
                 video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate,
             ]
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index 505a62145..1ac571d41 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -590,9 +590,9 @@ def toggle_visibility(is_visible):
                     ui.details_components.append(meta)
         with gr.Group(elem_id=f"{tabname}_extra_details_text", elem_classes=["extra-details-text"], visible=False) as ui.details_text:
             description = gr.Textbox(label='Description', lines=1, placeholder="Style description...")
-            prompt = gr.Textbox(label='Prompt', lines=2, placeholder="Prompt...")
-            negative = gr.Textbox(label='Negative prompt', lines=2, placeholder="Negative prompt...")
-            extra = gr.Textbox(label='Parameters', lines=2, placeholder="Generation parameters overrides...")
+            prompt = gr.Textbox(label='Network prompt', lines=2, placeholder="Prompt...")
+            negative = gr.Textbox(label='Network negative prompt', lines=2, placeholder="Negative prompt...")
+            extra = gr.Textbox(label='Network parameters', lines=2, placeholder="Generation parameters overrides...")
             wildcards = gr.Textbox(label='Wildcards', lines=2, placeholder="Wildcard prompt replacements...")
             ui.details_components += [description, prompt, negative, extra, wildcards]
             with gr.Row():
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index 1da835be6..5e8aa464e 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -44,7 +44,7 @@ def create_ui():
         with gr.Row(variant='compact', elem_id="img2img_extra_networks", visible=False) as extra_networks_ui:
             from modules import ui_extra_networks
             extra_networks_ui_img2img = ui_extra_networks.create_ui(extra_networks_ui, img2img_extra_networks_button, 'img2img', skip_indexing=shared.opts.extra_network_skip_indexing)
-            timer.startup.record('ui-en')
+            timer.startup.record('ui-networks')
 
         with gr.Row(elem_id="img2img_interface", equal_height=False):
             with gr.Column(variant='compact', elem_id="img2img_settings"):
@@ -119,7 +119,7 @@ def update_orig(image, state):
                     with gr.Accordion(open=False, label="Sampler", elem_classes=["small-accordion"], elem_id="img2img_sampler_group"):
                         steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "img2img")
                         ui_sections.create_sampler_options('img2img')
-                    resize_mode, resize_name, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True)
+                    resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True, non_zero=False)
                     batch_count, batch_size = ui_sections.create_batch_inputs('img2img', accordion=True)
                     seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('img2img')
 
@@ -139,7 +139,7 @@ def update_orig(image, state):
                             inpaint_full_res_padding = gr.Slider(label='Padding', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding")
                             mask_alpha = gr.Slider(label="Alpha", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id="img2img_mask_alpha")
                         with gr.Row():
-                            inpainting_mask_invert = gr.Radio(label='Mode', choices=['masked', 'invert'], value='masked', type="index", elem_id="img2img_mask_mode")
+                            inpainting_mask_invert = gr.Radio(label='Inpaint Mode', choices=['masked', 'invert'], value='masked', type="index", elem_id="img2img_mask_mode")
                             inpaint_full_res = gr.Radio(label="Inpaint area", choices=["full", "masked"], type="index", value="full", elem_id="img2img_inpaint_full_res")
                             inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'noise', 'nothing'], value='original', type="index", elem_id="img2img_inpainting_fill", visible=not shared.native)
 
@@ -187,7 +187,7 @@ def select_img2img_tab(tab):
                 selected_scale_tab,
                 height, width,
                 scale_by,
-                resize_mode, resize_name,
+                resize_mode, resize_name, resize_context,
                 inpaint_full_res, inpaint_full_res_padding, inpainting_mask_invert,
                 img2img_batch_files, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir,
                 hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
@@ -241,6 +241,7 @@ def select_img2img_tab(tab):
                 (steps, "Steps"),
                 # resize
                 (resize_mode, "Resize mode"),
+                (resize_name, "Resize name"),
                 (width, "Size-1"),
                 (height, "Size-2"),
                 (scale_by, "Resize scale"),
diff --git a/modules/ui_loadsave.py b/modules/ui_loadsave.py
index 69d79f879..6e398603a 100644
--- a/modules/ui_loadsave.py
+++ b/modules/ui_loadsave.py
@@ -4,6 +4,9 @@
 from modules.ui_components import ToolButton
 
 
+debug = os.environ.get('SD_UI_DEBUG', None)
+
+
 class UiLoadsave:
     """allows saving and restorig default values for gradio components"""
 
@@ -37,10 +40,14 @@ def apply_field(obj, field, condition=None, init_field=None):
                 pass
             elif condition and not condition(saved_value):
                 pass
+            # elif getattr(obj, 'type', '') == 'index':
+            #     pass # may need special handling
             else:
                 setattr(obj, field, saved_value)
                 if init_field is not None:
                     init_field(saved_value)
+            if debug and key in self.component_mapping and not key.startswith('customscript'):
+                errors.log.warning(f'UI duplicate: key="{key}" id={getattr(obj, "elem_id", None)} class={getattr(obj, "elem_classes", None)}')
             if field == 'value' and key not in self.component_mapping:
                 self.component_mapping[key] = x
             if field == 'open' and key not in self.component_mapping:
@@ -125,6 +132,8 @@ def dump_defaults(self):
 
     def iter_changes(self, values):
         for i, name in enumerate(self.component_mapping):
+            # if '__init__' in name:
+            #    continue
             component = self.component_mapping[name]
             choices = getattr(component, 'choices', None)
             if type(choices) is list and len(choices) > 0: # fix gradio radio button choices being tuples
@@ -190,12 +199,11 @@ def ui_view(self, *values):
         return text
 
     def ui_apply(self, *values):
-        from modules.shared import log
         num_changed = 0
         current_ui_settings = self.read_from_file()
         for name, old_value, new_value, default_value in self.iter_changes(values):
             component = self.component_mapping[name]
-            log.debug(f'Settings: name={name} component={component} old={old_value} default={default_value} new={new_value}')
+            errors.log.debug(f'Settings: name={name} component={component} old={old_value} default={default_value} new={new_value}')
             num_changed += 1
             current_ui_settings[name] = new_value
         if num_changed == 0:
@@ -224,11 +232,10 @@ def ui_submenu_apply(self, items):
             text += f"<tr><td>{k}</td><td>{'open' if opened else 'closed'}</td></tr>"
         text += "</tbody></table>"
 
-        from modules.shared import log
         num_changed = 0
         current_ui_settings = self.read_from_file()
         for name, _old_value, new_value, default_value in self.iter_menus():
-            log.debug(f'Settings: name={name} default={default_value} new={new_value}')
+            errors.log.debug(f'Settings: name={name} default={default_value} new={new_value}')
             num_changed += 1
             current_ui_settings[name] = new_value
         if num_changed == 0:
diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py
index b1948a27b..aa5140da2 100644
--- a/modules/ui_postprocessing.py
+++ b/modules/ui_postprocessing.py
@@ -45,7 +45,7 @@ def create_ui():
                         trending = gr.Label(elem_id="interrogate_label_trending", label="Trending", num_top_classes=5)
                         flavor = gr.Label(elem_id="interrogate_label_flavor", label="Flavor", num_top_classes=5)
                     with gr.Row():
-                        clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model')
+                        clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLiP Model')
                         ui_common.create_refresh_button(clip_model, interrogate.get_clip_models, lambda: {"choices": interrogate.get_clip_models()}, 'refresh_interrogate_models')
                         mode = gr.Radio(['best', 'fast', 'classic', 'caption', 'negative'], label='Mode', value='best')
                     with gr.Row(elem_id='interrogate_buttons_image'):
@@ -67,7 +67,7 @@ def create_ui():
                     with gr.Row():
                         batch = gr.Text(label="Prompts", lines=10)
                     with gr.Row():
-                        clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLIP Model')
+                        clip_model = gr.Dropdown([], value='ViT-L-14/openai', label='CLiP Batch Model')
                         ui_common.create_refresh_button(clip_model, interrogate.get_clip_models, lambda: {"choices": interrogate.get_clip_models()}, 'refresh_interrogate_models')
                     with gr.Row(elem_id='interrogate_buttons_batch'):
                         btn_interrogate_batch = gr.Button("Interrogate", elem_id="interrogate_btn_interrogate", variant='primary')
diff --git a/modules/ui_sections.py b/modules/ui_sections.py
index e1be99edc..9c8d1d0fc 100644
--- a/modules/ui_sections.py
+++ b/modules/ui_sections.py
@@ -179,7 +179,7 @@ def create_correction_inputs(tab):
     with gr.Accordion(open=False, label="Corrections", elem_id=f"{tab}_corrections", elem_classes=["small-accordion"], visible=shared.native):
         with gr.Group(visible=shared.native):
             with gr.Row(elem_id=f"{tab}_hdr_mode_row"):
-                hdr_mode = gr.Dropdown(label="Mode", choices=["Relative values", "Absolute values"], type="index", value="Relative values", elem_id=f"{tab}_hdr_mode", show_label=False)
+                hdr_mode = gr.Dropdown(label="Correction mode", choices=["Relative values", "Absolute values"], type="index", value="Relative values", elem_id=f"{tab}_hdr_mode", show_label=False)
                 gr.HTML('<br>')
             with gr.Row(elem_id=f"{tab}_correction_row"):
                 hdr_brightness = gr.Slider(minimum=-1.0, maximum=1.0, step=0.1, value=0,  label='Brightness', elem_id=f"{tab}_hdr_brightness")
@@ -280,12 +280,15 @@ def create_hires_inputs(tab):
         with gr.Group():
             with gr.Row(elem_id=f"{tab}_hires_row1"):
                 enable_hr = gr.Checkbox(label='Enable second pass', value=False, elem_id=f"{tab}_enable_hr")
+            """
             with gr.Row(elem_id=f"{tab}_hires_fix_row1", variant="compact"):
                 hr_upscaler = gr.Dropdown(label="Upscaler", elem_id=f"{tab}_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode)
                 hr_scale = gr.Slider(minimum=0.1, maximum=8.0, step=0.05, label="Rescale by", value=2.0, elem_id=f"{tab}_hr_scale")
             with gr.Row(elem_id=f"{tab}_hires_fix_row3", variant="compact"):
                 hr_resize_x = gr.Slider(minimum=0, maximum=4096, step=8, label="Width resize", value=0, elem_id=f"{tab}_hr_resize_x")
                 hr_resize_y = gr.Slider(minimum=0, maximum=4096, step=8, label="Height resize", value=0, elem_id=f"{tab}_hr_resize_y")
+            """
+            hr_resize_mode, hr_upscaler, hr_resize_context, hr_resize_x, hr_resize_y, hr_scale, _selected_scale_tab = create_resize_inputs(tab, None, accordion=False, latent=True, non_zero=False)
             with gr.Row(elem_id=f"{tab}_hires_fix_row2", variant="compact"):
                 hr_force = gr.Checkbox(label='Force HiRes', value=False, elem_id=f"{tab}_hr_force")
                 hr_sampler_index = gr.Dropdown(label='Secondary sampler', elem_id=f"{tab}_sampling_alt", choices=[x.name for x in sd_samplers.samplers], value='Same as primary', type="index")
@@ -300,43 +303,52 @@ def create_hires_inputs(tab):
                 refiner_prompt = gr.Textbox(value='', label='Secondary prompt', elem_id=f"{tab}_refiner_prompt")
             with gr.Row(elem_id="txt2img_refiner_row4", variant="compact"):
                 refiner_negative = gr.Textbox(value='', label='Secondary negative prompt', elem_id=f"{tab}_refiner_neg_prompt")
-    return enable_hr, hr_sampler_index, denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative
+    return enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative
 
 
-def create_resize_inputs(tab, images, accordion=True, latent=False):
+def create_resize_inputs(tab, images, accordion=True, latent=False, non_zero=True, prefix=''):
     dummy_component = gr.Number(visible=False, value=0)
+    if len(prefix) > 0 and not prefix.startswith(' '):
+        prefix = f' {prefix}'
     with gr.Accordion(open=False, label="Resize", elem_classes=["small-accordion"], elem_id=f"{tab}_resize_group") if accordion else gr.Group():
-        # with gr.Row():
-        #    resize_mode = gr.Radio(label="Mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed')
         with gr.Row():
-            resize_mode = gr.Dropdown(label="Mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed')
-            resize_name = gr.Dropdown(label="Method", elem_id=f"{tab}_resize_name", choices=([] if not latent else list(shared.latent_upscale_modes)) + [x.name for x in shared.sd_upscalers], value=shared.latent_upscale_default_mode)
+            resize_mode = gr.Dropdown(label=f"Mode{prefix}" if non_zero else "Resize mode", elem_id=f"{tab}_resize_mode", choices=shared.resize_modes, type="index", value='Fixed')
+            resize_name = gr.Dropdown(label=f"Method{prefix}", elem_id=f"{tab}_resize_name", choices=([] if not latent else list(shared.latent_upscale_modes)) + [x.name for x in shared.sd_upscalers], value=shared.latent_upscale_default_mode, visible=True)
+            resize_context_choices = ["Add with forward", "Remove with forward", "Add with backward", "Remove with backward"]
+            resize_context = gr.Dropdown(label=f"Context{prefix}", elem_id=f"{tab}_resize_context", choices=resize_context_choices, value=resize_context_choices[0], visible=False)
             ui_common.create_refresh_button(resize_name, modelloader.load_upscalers, lambda: {"choices": modelloader.load_upscalers()}, 'refresh_upscalers')
 
+            def resize_mode_change(mode):
+                if mode is None or mode == 0:
+                    return gr.update(visible=False), gr.update(visible=False)
+                return gr.update(visible=mode != 5), gr.update(visible=mode == 5)
+            resize_mode.change(fn=resize_mode_change, inputs=[resize_mode], outputs=[resize_name, resize_context])
+
         with gr.Row(visible=True) as _resize_group:
             with gr.Column(elem_id=f"{tab}_column_size"):
                 selected_scale_tab = gr.State(value=0) # pylint: disable=abstract-class-instantiated
-                with gr.Tabs(elem_id=f"{tab}_scale_tabs"):
-                    with gr.Tab(label="Fixed", elem_id=f"{tab}_scale_tab_fixed") as tab_scale_to:
+                with gr.Tabs(elem_id=f"{tab}_scale_tabs", selected=0 if non_zero else 1):
+                    with gr.Tab(label="Fixed", id=0, elem_id=f"{tab}_scale_tab_fixed") as tab_scale_to:
                         with gr.Row():
                             with gr.Column(elem_id=f"{tab}_column_size_fixed"):
                                 with gr.Row():
-                                    width = gr.Slider(minimum=64, maximum=8192, step=8, label="Width", value=512, elem_id=f"{tab}_width")
-                                    height = gr.Slider(minimum=64, maximum=8192, step=8, label="Height", value=512, elem_id=f"{tab}_height")
+                                    width = gr.Slider(minimum=64 if non_zero else 0, maximum=8192, step=8, label=f"Width {prefix}" if non_zero else "Resize width", value=1024 if non_zero else 0, elem_id=f"{tab}_resize_width")
+                                    height = gr.Slider(minimum=64 if non_zero else 0, maximum=8192, step=8, label=f"Height {prefix}" if non_zero else "Resize height", value=1024 if non_zero else 0, elem_id=f"{tab}_resize_height")
                                     ar_list = ['AR'] + [x.strip() for x in shared.opts.aspect_ratios.split(',') if x.strip() != '']
-                                    ar_dropdown = gr.Dropdown(show_label=False, interactive=True, choices=ar_list, value=ar_list[0], elem_id=f"{tab}_ar", elem_classes=["ar-dropdown"])
+                                    ar_dropdown = gr.Dropdown(show_label=False, interactive=True, choices=ar_list, value=ar_list[0], elem_id=f"{tab}_resize_ar", elem_classes=["ar-dropdown"])
                                     for c in [ar_dropdown, width, height]:
                                         c.change(fn=ar_change, inputs=[ar_dropdown, width, height], outputs=[width, height], show_progress=False)
-                                    res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_res_switch_btn")
+                                    res_switch_btn = ToolButton(value=ui_symbols.switch, elem_id=f"{tab}_resize_switch_size_btn")
                                     res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False)
-                                    detect_image_size_btn = ToolButton(value=ui_symbols.detect, elem_id=f"{tab}_detect_image_size_btn")
+                                    detect_image_size_btn = ToolButton(value=ui_symbols.detect, elem_id=f"{tab}_resize_detect_size_btn")
                                     el = tab.split('_')[0]
                                     detect_image_size_btn.click(fn=lambda w, h, _: (w or gr.update(), h or gr.update()), _js=f'currentImageResolution{el}', inputs=[dummy_component, dummy_component, dummy_component], outputs=[width, height], show_progress=False)
-                    with gr.Tab(label="Scale", elem_id=f"{tab}_scale_tab_scale") as tab_scale_by:
-                        scale_by = gr.Slider(minimum=0.05, maximum=8.0, step=0.05, label="Scale", value=1.0, elem_id=f"{tab}_scale")
-                    for component in images:
-                        component.change(fn=lambda: None, _js="updateImg2imgResizeToTextAfterChangingImage", inputs=[], outputs=[], show_progress=False)
+                    with gr.Tab(label="Scale", id=1, elem_id=f"{tab}_scale_tab_scale") as tab_scale_by:
+                        scale_by = gr.Slider(minimum=0.05, maximum=8.0, step=0.05, label=f"Scale {prefix}" if non_zero else "Resize scale", value=1.0, elem_id=f"{tab}_scale")
+                    if images is not None:
+                        for component in images:
+                            component.change(fn=lambda: None, _js="updateImg2imgResizeToTextAfterChangingImage", inputs=[], outputs=[], show_progress=False)
             tab_scale_to.select(fn=lambda: 0, inputs=[], outputs=[selected_scale_tab])
             tab_scale_by.select(fn=lambda: 1, inputs=[], outputs=[selected_scale_tab])
             # resize_mode.change(fn=lambda x: gr.update(visible=x != 0), inputs=[resize_mode], outputs=[_resize_group])
-    return resize_mode, resize_name, width, height, scale_by, selected_scale_tab
+    return resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab
diff --git a/modules/ui_txt2img.py b/modules/ui_txt2img.py
index 705444ccf..03e773e04 100644
--- a/modules/ui_txt2img.py
+++ b/modules/ui_txt2img.py
@@ -29,7 +29,7 @@ def create_ui():
         with gr.Row(variant='compact', elem_id="txt2img_extra_networks", visible=False) as extra_networks_ui:
             from modules import ui_extra_networks
             extra_networks_ui = ui_extra_networks.create_ui(extra_networks_ui, txt2img_extra_networks_button, 'txt2img', skip_indexing=shared.opts.extra_network_skip_indexing)
-            timer.startup.record('ui-en')
+            timer.startup.record('ui-networks')
 
         with gr.Row(elem_id="txt2img_interface", equal_height=False):
             with gr.Column(variant='compact', elem_id="txt2img_settings"):
@@ -48,7 +48,7 @@ def create_ui():
                     seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('txt2img')
                     _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
                     hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('txt2img')
-                    enable_hr, hr_sampler_index, denoising_strength, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
+                    enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
                     override_settings = ui_common.create_override_inputs('txt2img')
 
                 with gr.Group(elem_id="txt2img_script_container"):
@@ -70,7 +70,7 @@ def create_ui():
                 seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w,
                 height, width,
                 enable_hr, denoising_strength,
-                hr_scale, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y,
+                hr_scale, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_resize_x, hr_resize_y,
                 refiner_steps, refiner_start, refiner_prompt, refiner_negative,
                 hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
                 override_settings,
@@ -118,14 +118,19 @@ def create_ui():
                 (hidiffusion, "HiDiffusion"),
                 # second pass
                 (enable_hr, "Second pass"),
-                (hr_sampler_index, "Hires sampler"),
                 (denoising_strength, "Denoising strength"),
+                (hr_sampler_index, "Hires sampler"),
+                (hr_resize_mode, "Hires resize mode"),
+                (hr_resize_context, "Hires resize context"),
                 (hr_upscaler, "Hires upscaler"),
                 (hr_force, "Hires force"),
                 (hr_second_pass_steps, "Hires steps"),
                 (hr_scale, "Hires upscale"),
+                (hr_scale, "Hires scale"),
                 (hr_resize_x, "Hires resize-1"),
                 (hr_resize_y, "Hires resize-2"),
+                (hr_resize_x, "Hires size-1"),
+                (hr_resize_y, "Hires size-2"),
                 # refiner
                 (refiner_start, "Refiner start"),
                 (refiner_steps, "Refiner steps"),
diff --git a/modules/vqa.py b/modules/vqa.py
index 87ea8431c..044ee252a 100644
--- a/modules/vqa.py
+++ b/modules/vqa.py
@@ -12,7 +12,10 @@
 MODELS = {
     "MS Florence 2 Base": "microsoft/Florence-2-base", # 0.5GB
     "MS Florence 2 Large": "microsoft/Florence-2-large", # 1.5GB
-    "CogFlorence 2 Large": "thwri/CogFlorence-2-Large-Freeze", # 1.6GB
+    "MiaoshouAI PromptGen 1.5 Base": "MiaoshouAI/Florence-2-base-PromptGen-v1.5", # 1.1GB
+    "MiaoshouAI PromptGen 1.5 Large": "MiaoshouAI/Florence-2-large-PromptGen-v1.5", # 3.3GB
+    "CogFlorence 2.0 Large": "thwri/CogFlorence-2-Large-Freeze", # 1.6GB
+    "CogFlorence 2.2 Large": "thwri/CogFlorence-2.2-Large", # 1.6GB
     "Moondream 2": "vikhyatk/moondream2", # 3.7GB
     "GIT TextCaps Base": "microsoft/git-base-textcaps", # 0.7GB
     "GIT VQA Base": "microsoft/git-base-vqav2", # 0.7GB
diff --git a/modules/zluda_installer.py b/modules/zluda_installer.py
index 30bfe48a7..cd8d5798c 100644
--- a/modules/zluda_installer.py
+++ b/modules/zluda_installer.py
@@ -24,7 +24,7 @@ def install(zluda_path: os.PathLike) -> None:
     if os.path.exists(zluda_path):
         return
 
-    urllib.request.urlretrieve(f'https://github.com/lshqqytiger/ZLUDA/releases/download/rel.{os.environ.get("ZLUDA_HASH", "1c238a959f2aafdb9900f6801b61d9c0318040a2")}/ZLUDA-windows-rocm{rocm.version[0]}-amd64.zip', '_zluda')
+    urllib.request.urlretrieve(f'https://github.com/lshqqytiger/ZLUDA/releases/download/rel.{os.environ.get("ZLUDA_HASH", "c0804ca624963aab420cb418412b1c7fbae3454b")}/ZLUDA-windows-rocm{rocm.version[0]}-amd64.zip', '_zluda')
     with zipfile.ZipFile('_zluda', 'r') as archive:
         infos = archive.infolist()
         for info in infos:
@@ -49,6 +49,8 @@ def make_copy(zluda_path: os.PathLike) -> None:
 
 
 def load(zluda_path: os.PathLike) -> None:
+    os.environ["ZLUDA_COMGR_LOG_LEVEL"] = "1"
+
     for v in HIPSDK_TARGETS:
         ctypes.windll.LoadLibrary(os.path.join(rocm.path, 'bin', v))
     for v in ZLUDA_TARGETS:
diff --git a/requirements.txt b/requirements.txt
index abecc4a57..a35a6d3f5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,8 +27,7 @@ fasteners
 orjson
 invisible-watermark
 pi-heif
-diffusers==0.30.2
-safetensors==0.4.4
+safetensors==0.4.5
 tensordict==0.1.2
 peft==0.11.1
 httpx==0.24.1
@@ -39,7 +38,7 @@ clip-interrogator==0.6.0
 antlr4-python3-runtime==4.9.3
 requests==2.31.0
 tqdm==4.66.4
-accelerate==0.33.0
+accelerate==0.34.2
 opencv-contrib-python-headless==4.9.0.80
 einops==0.4.1
 gradio==3.43.2
@@ -63,3 +62,4 @@ typing-extensions==4.11.0
 torchdiffeq
 dctorch
 scikit-image
+seam-carving
diff --git a/scripts/animatediff.py b/scripts/animatediff.py
index 86177cf17..3e8eff3c5 100644
--- a/scripts/animatediff.py
+++ b/scripts/animatediff.py
@@ -1,15 +1,3 @@
-"""
-Lightweight AnimateDiff implementation in Diffusers
-Docs: <https://huggingface.co/docs/diffusers/api/pipelines/animatediff>
-TODO animatediff items:
-- SDXL
-- Custom models
-- Custom LORAs
-- Enable second pass
-- TemporalDiff: https://huggingface.co/CiaraRowles/TemporalDiff/tree/main
-- AnimateFace: https://huggingface.co/nlper2022/animatediff_face_512/tree/main
-"""
-
 import os
 import gradio as gr
 import diffusers
@@ -20,7 +8,7 @@
 # config
 ADAPTERS = {
     'None': None,
-    'Motion 1.5 v3' :'vladmandic/animatediff-v3',
+    'Motion 1.5 v3' :'diffusers/animatediff-motion-adapter-v1-5-3',
     'Motion 1.5 v2' :'guoyww/animatediff-motion-adapter-v1-5-2',
     'Motion 1.5 v1': 'guoyww/animatediff-motion-adapter-v1-5',
     'Motion 1.4': 'guoyww/animatediff-motion-adapter-v1-4',
@@ -28,6 +16,7 @@
     'AnimateFace': 'vladmandic/animateface',
     'Lightning': 'ByteDance/AnimateDiff-Lightning/animatediff_lightning_4step_diffusers.safetensors',
     'SDXL Beta': 'a-r-r-o-w/animatediff-motion-adapter-sdxl-beta',
+    'LCM': 'wangfuyun/AnimateLCM',
     # 'SDXL Beta': 'guoyww/animatediff-motion-adapter-sdxl-beta',
     # 'LongAnimateDiff 32': 'vladmandic/longanimatediff-32',
     # 'LongAnimateDiff 64': 'vladmandic/longanimatediff-64',
@@ -42,6 +31,7 @@
     'Tilt-down': 'guoyww/animatediff-motion-lora-tilt-down',
     'Roll-left': 'guoyww/animatediff-motion-lora-rolling-anticlockwise',
     'Roll-right': 'guoyww/animatediff-motion-lora-rolling-clockwise',
+    'LCM': 'wangfuyun/AnimateLCM/AnimateLCM_sd15_t2v_lora.safetensors'
 }
 
 # state
@@ -70,7 +60,7 @@ def set_adapter(adapter_name: str = 'None'):
         shared.log.warning(f'AnimateDiff: unsupported model type: {shared.sd_model.__class__.__name__}')
         return
     if motion_adapter is not None and loaded_adapter == adapter_name and (shared.sd_model.__class__.__name__ == 'AnimateDiffPipeline' or shared.sd_model.__class__.__name__ == 'AnimateDiffSDXLPipeline'):
-        shared.log.debug(f'AnimateDiff cache: adapter="{adapter_name}"')
+        shared.log.debug(f'AnimateDiff: adapter="{adapter_name}" cached')
         return
     if getattr(shared.sd_model, 'image_encoder', None) is not None:
         shared.log.debug('AnimateDiff: unloading IP adapter')
@@ -130,19 +120,80 @@ def set_adapter(adapter_name: str = 'None'):
         sd_models.copy_diffuser_options(new_pipe, orig_pipe)
         sd_models.set_diffuser_options(shared.sd_model, vae=None, op='model')
         sd_models.move_model(shared.sd_model.unet, devices.device) # move pipeline to device
-        shared.log.debug(f'AnimateDiff create: pipeline="{shared.sd_model.__class__}" adapter="{loaded_adapter}"')
+        shared.log.debug(f'AnimateDiff: adapter="{loaded_adapter}"')
     except Exception as e:
         motion_adapter = None
         loaded_adapter = None
         shared.log.error(f'AnimateDiff load error: adapter="{adapter_name}" {e}')
 
 
+def set_scheduler(p, model, override: bool = False):
+    if override:
+        p.sampler_name = 'Default'
+        if 'LCM' in model:
+            shared.sd_model.scheduler = diffusers.LCMScheduler.from_config(shared.sd_model.scheduler.config)
+        else:
+            shared.sd_model.scheduler = diffusers.DDIMScheduler.from_config(shared.sd_model.scheduler.config)
+    shared.log.debug(f'AnimateDiff: scheduler={shared.sd_model.scheduler.__class__.__name__}')
+
+
+def set_prompt(p):
+    p.prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles)
+    p.negative_prompt = shared.prompt_styles.apply_negative_styles_to_prompt(p.negative_prompt, p.styles)
+    prompts = p.prompt.split('\n')
+    try:
+        prompt = {}
+        for line in prompts:
+            k, v = line.split(':')
+            prompt[int(k.strip())] = v.strip()
+    except Exception:
+        prompt = p.prompt
+    shared.log.debug(f'AnimateDiff prompt: {prompt}')
+    p.task_args['prompt'] = prompt
+    p.task_args['negative_prompt'] = p.negative_prompt
+
+
+def set_lora(p, lora, strength):
+    if lora is not None and lora != 'None':
+        shared.log.debug(f'AnimateDiff: lora="{lora}" strength={strength}')
+        if lora.endswith('.safetensors'):
+            fn = os.path.basename(lora)
+            lora = lora.replace(f'/{fn}', '')
+            shared.sd_model.load_lora_weights(lora, weight_name=fn, adapter_name=lora)
+        else:
+            shared.sd_model.load_lora_weights(lora, adapter_name=lora)
+        shared.sd_model.set_adapters([lora], adapter_weights=[strength])
+        p.extra_generation_params['AnimateDiff Lora'] = f'{lora}:{strength}'
+
+
+def set_free_init(method, iters, order, spatial, temporal):
+    if hasattr(shared.sd_model, 'enable_free_init') and method != 'none':
+        shared.log.debug(f'AnimateDiff free init: method={method} iters={iters} order={order} spatial={spatial} temporal={temporal}')
+        shared.sd_model.enable_free_init(
+            num_iters=iters,
+            use_fast_sampling=False,
+            method=method,
+            order=order,
+            spatial_stop_frequency=spatial,
+            temporal_stop_frequency=temporal,
+        )
+
+
+def set_free_noise(frames):
+    context_length = 16
+    context_stride = 4
+    if frames >= context_length:
+        shared.log.debug(f'AnimateDiff free noise: frames={frames} context={context_length} stride={context_stride}')
+        shared.sd_model.enable_free_noise(context_length=context_length, context_stride=context_stride)
+
+
 class Script(scripts.Script):
     def title(self):
         return 'AnimateDiff'
 
-    def show(self, _is_img2img):
-        return scripts.AlwaysVisible if shared.native else False
+    def show(self, is_img2img):
+        # return scripts.AlwaysVisible if shared.native else False
+        return not is_img2img
 
 
     def ui(self, _is_img2img):
@@ -154,81 +205,68 @@ def video_type_change(video_type):
                 gr.update(visible=video_type == 'MP4'),
             ]
 
-        with gr.Accordion('AnimateDiff', open=False, elem_id='animatediff'):
-            with gr.Row():
-                adapter_index = gr.Dropdown(label='Adapter', choices=list(ADAPTERS), value='None')
-                frames = gr.Slider(label='Frames', minimum=1, maximum=64, step=1, value=16)
-            with gr.Row():
-                override_scheduler = gr.Checkbox(label='Override sampler', value=True)
+        with gr.Row():
+            gr.HTML("<span>&nbsp AnimateDiff</span><br>")
+        with gr.Row():
+            adapter_index = gr.Dropdown(label='Adapter', choices=list(ADAPTERS), value='None')
+            frames = gr.Slider(label='Frames', minimum=1, maximum=256, step=1, value=16)
+        with gr.Row():
+            override_scheduler = gr.Checkbox(label='Override sampler', value=True)
+        with gr.Row():
+            lora_index = gr.Dropdown(label='Lora', choices=list(LORAS), value='None')
+            strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.05, value=1.0)
+        with gr.Row():
+            latent_mode = gr.Checkbox(label='Latent mode', value=True, visible=False)
+        with gr.Row():
+            video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
+            duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
+        with gr.Accordion('FreeInit', open=False):
             with gr.Row():
-                lora_index = gr.Dropdown(label='Lora', choices=list(LORAS), value='None')
-                strength = gr.Slider(label='Strength', minimum=0.0, maximum=2.0, step=0.05, value=1.0)
+                fi_method = gr.Dropdown(label='Method', choices=['none', 'butterworth', 'ideal', 'gaussian'], value='none')
             with gr.Row():
-                latent_mode = gr.Checkbox(label='Latent mode', value=True, visible=False)
+                # fi_fast = gr.Checkbox(label='Fast sampling', value=False)
+                fi_iters = gr.Slider(label='Iterations', minimum=1, maximum=10, step=1, value=3)
+                fi_order = gr.Slider(label='Order', minimum=1, maximum=10, step=1, value=4)
             with gr.Row():
-                video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
-                duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
-            with gr.Accordion('FreeInit', open=False):
-                with gr.Row():
-                    fi_method = gr.Dropdown(label='Method', choices=['none', 'butterworth', 'ideal', 'gaussian'], value='none')
-                with gr.Row():
-                    # fi_fast = gr.Checkbox(label='Fast sampling', value=False)
-                    fi_iters = gr.Slider(label='Iterations', minimum=1, maximum=10, step=1, value=3)
-                    fi_order = gr.Slider(label='Order', minimum=1, maximum=10, step=1, value=4)
-                with gr.Row():
-                    fi_spatial = gr.Slider(label='Spatial frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25)
-                    fi_temporal = gr.Slider(label='Temporal frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25)
-            with gr.Row():
-                gif_loop = gr.Checkbox(label='Loop', value=True, visible=False)
-                mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
-                mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
-            video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
+                fi_spatial = gr.Slider(label='Spatial frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25)
+                fi_temporal = gr.Slider(label='Temporal frequency', minimum=0.0, maximum=1.0, step=0.05, value=0.25)
+        with gr.Row():
+            gif_loop = gr.Checkbox(label='Loop', value=True, visible=False)
+            mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
+            mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
+        video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
         return [adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal]
 
-    def process(self, p: processing.StableDiffusionProcessing, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument
+    def run(self, p: processing.StableDiffusionProcessing, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument
         adapter = ADAPTERS[adapter_index]
         lora = LORAS[lora_index]
         set_adapter(adapter)
         if motion_adapter is None:
             return
-        if override_scheduler:
-            p.sampler_name = 'Default'
-            shared.sd_model.scheduler = diffusers.DDIMScheduler(
-                beta_start=0.00085,
-                beta_end=0.012,
-                beta_schedule="linear",
-                clip_sample=False,
-                num_train_timesteps=1000,
-                rescale_betas_zero_snr=False,
-                set_alpha_to_one=True,
-                steps_offset=0,
-                timestep_spacing="linspace",
-                trained_betas=None,
-            )
-        shared.log.debug(f'AnimateDiff: adapter="{adapter}" lora="{lora}" strength={strength} video={video_type} scheduler={shared.sd_model.scheduler.__class__.__name__ if override_scheduler else p.sampler_name}')
-        if lora is not None and lora != 'None':
-            shared.sd_model.load_lora_weights(lora, adapter_name=lora)
-            shared.sd_model.set_adapters([lora], adapter_weights=[strength])
-            p.extra_generation_params['AnimateDiff Lora'] = f'{lora}:{strength}'
-        if hasattr(shared.sd_model, 'enable_free_init') and fi_method != 'none':
-            shared.sd_model.enable_free_init(
-                num_iters=fi_iters,
-                use_fast_sampling=False,
-                method=fi_method,
-                order=fi_order,
-                spatial_stop_frequency=fi_spatial,
-                temporal_stop_frequency=fi_temporal,
-            )
+        set_scheduler(p, adapter, override_scheduler)
+        set_lora(p, lora, strength)
+        set_free_init(fi_method, fi_iters, fi_order, fi_spatial, fi_temporal)
+        set_free_noise(frames)
+        processing.fix_seed(p)
         p.extra_generation_params['AnimateDiff'] = loaded_adapter
         p.do_not_save_grid = True
-        if 'animatediff' not in p.ops:
-            p.ops.append('animatediff')
+        p.ops.append('animatediff')
+        p.task_args['generator'] = None
         p.task_args['num_frames'] = frames
         p.task_args['num_inference_steps'] = p.steps
-        if not latent_mode:
-            p.task_args['output_type'] = 'np'
+        p.task_args['output_type'] = 'np'
+        shared.log.debug(f'AnimateDiff args: {p.task_args}')
+        set_prompt(p)
+        orig_prompt_attention = shared.opts.data['prompt_attention']
+        shared.opts.data['prompt_attention'] = 'Fixed attention'
+        processed: processing.Processed = processing.process_images(p) # runs processing using main loop
+        shared.opts.data['prompt_attention'] = orig_prompt_attention
+        devices.torch_gc()
+        return processed
+
 
-    def postprocess(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument
+    def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, adapter_index, frames, lora_index, strength, latent_mode, video_type, duration, gif_loop, mp4_pad, mp4_interpolate, override_scheduler, fi_method, fi_iters, fi_order, fi_spatial, fi_temporal): # pylint: disable=arguments-differ, unused-argument
         from modules.images import save_video
         if video_type != 'None':
+            shared.log.debug(f'AnimateDiff video: type={video_type} duration={duration} loop={gif_loop} pad={mp4_pad} interpolate={mp4_interpolate}')
             save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate)
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
new file mode 100644
index 000000000..6cbb71467
--- /dev/null
+++ b/scripts/cogvideo.py
@@ -0,0 +1,228 @@
+"""
+models: https://huggingface.co/THUDM/CogVideoX-2b https://huggingface.co/THUDM/CogVideoX-5b
+source: https://github.com/THUDM/CogVideo
+quanto: https://gist.github.com/a-r-r-o-w/31be62828b00a9292821b85c1017effa
+torchao: https://gist.github.com/a-r-r-o-w/4d9732d17412888c885480c6521a9897
+venhancer: https://github.com/THUDM/CogVideo/blob/dcb82ae30b454ab898aeced0633172d75dbd55b8/tools/venhancer/README.md
+"""
+import os
+import time
+import cv2
+import gradio as gr
+import torch
+from torchvision import transforms
+import diffusers
+import numpy as np
+from modules import scripts, shared, devices, errors, sd_models, processing
+from modules.processing_callbacks import diffusers_callback, set_callbacks_p
+
+
+debug = (os.environ.get('SD_LOAD_DEBUG', None) is not None) or (os.environ.get('SD_PROCESS_DEBUG', None) is not None)
+
+
+class Script(scripts.Script):
+    def title(self):
+        return 'CogVideoX'
+
+    def show(self, is_img2img):
+        return shared.native
+
+
+    def ui(self, _is_img2img):
+        def video_type_change(video_type):
+            return [
+                gr.update(visible=video_type != 'None'),
+                gr.update(visible=video_type == 'GIF' or video_type == 'PNG'),
+                gr.update(visible=video_type == 'MP4'),
+                gr.update(visible=video_type == 'MP4'),
+            ]
+
+        with gr.Row():
+            gr.HTML("<span>&nbsp CogVideoX</span><br>")
+        with gr.Row():
+            model = gr.Dropdown(label='Model', choices=['None', 'THUDM/CogVideoX-2b', 'THUDM/CogVideoX-5b'], value='THUDM/CogVideoX-2b')
+            sampler = gr.Dropdown(label='Sampler', choices=['DDIM', 'DPM'], value='DDIM')
+        with gr.Row():
+            frames = gr.Slider(label='Frames', minimum=1, maximum=100, step=1, value=49)
+            guidance = gr.Slider(label='Guidance', minimum=0.0, maximum=14.0, step=0.5, value=6.0)
+        with gr.Row():
+            offload = gr.Dropdown(label='Offload', choices=['none', 'balanced', 'model', 'sequential'], value='balanced')
+            override = gr.Checkbox(label='Override resolution', value=True)
+        with gr.Row():
+            video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
+            duration = gr.Slider(label='Duration', minimum=0.25, maximum=30, step=0.25, value=8, visible=False)
+        with gr.Accordion('Optional init video', open=False):
+            with gr.Row():
+                image = gr.Image(value=None, label='Image', type='pil', source='upload', width=256, height=256, visible=False)
+                video = gr.Video(value=None, label='Video', source='upload', width=256, height=256, visible=True)
+        with gr.Row():
+            loop = gr.Checkbox(label='Loop', value=True, visible=False)
+            pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
+            interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
+        video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, loop, pad, interpolate])
+        return [model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video]
+
+    def load(self, model, txt):
+        if (shared.sd_model_type != 'cogvideox' or shared.sd_model.sd_model_checkpoint != model) and model != 'None':
+            sd_models.unload_model_weights('model')
+            shared.log.info(f'CogVideoX load: model="{model}"')
+            try:
+                shared.sd_model = None
+                shared.sd_model = diffusers.CogVideoXPipeline.from_pretrained(model, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir)
+                shared.sd_model.sd_checkpoint_info = sd_models.CheckpointInfo(model)
+                shared.sd_model.sd_model_hash = ''
+                shared.sd_model.sd_model_checkpoint = model
+            except Exception as e:
+                shared.log.error(f'Loading CogVideoX: {e}')
+                if debug:
+                    errors.display(e, 'CogVideoX')
+        if shared.sd_model_type == 'cogvideox' and model != 'None':
+            shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXPipeline if txt else diffusers.CogVideoXVideoToVideoPipeline, shared.sd_model)
+            shared.sd_model.set_progress_bar_config(bar_format='Progress {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m', ncols=80, colour='#327fba')
+            shared.log.debug(f'CogVideoX load: class="{shared.sd_model.__class__.__name__}"')
+        if shared.sd_model is not None and model == 'None':
+            shared.log.info(f'CogVideoX unload: model={model}')
+            shared.sd_model = None
+            devices.torch_gc(force=True)
+        devices.torch_gc()
+
+    def offload(self, offload):
+        if shared.sd_model_type != 'cogvideox':
+            return
+        if offload == 'none':
+            sd_models.move_model(shared.sd_model, devices.device)
+        shared.log.debug(f'CogVideoX: offload={offload}')
+        if offload == 'balanced':
+            sd_models.apply_balanced_offload(shared.sd_model)
+        if offload == 'model':
+            shared.sd_model.enable_model_cpu_offload()
+        if offload == 'sequential':
+            shared.sd_model.enable_model_cpu_offload()
+            shared.sd_model.enable_sequential_cpu_offload()
+        shared.sd_model.vae.enable_slicing()
+        shared.sd_model.vae.enable_tiling()
+
+    def video(self, p, fn):
+        frames = []
+        try:
+            from modules.control.util import decode_fourcc
+            video = cv2.VideoCapture(fn)
+            if not video.isOpened():
+                shared.log.error(f'Video: file="{fn}" open failed')
+                return frames
+            frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+            fps = int(video.get(cv2.CAP_PROP_FPS))
+            w, h = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            codec = decode_fourcc(video.get(cv2.CAP_PROP_FOURCC))
+            shared.log.debug(f'CogVideoX input: video="{fn}" fps={fps} width={w} height={h} codec={codec} frames={frame_count} target={len(frames)}')
+            frames = []
+            while True:
+                ok, frame = video.read()
+                if not ok:
+                    break
+                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                frame = cv2.resize(frame, (p.width, p.height))
+                frames.append(frame)
+            video.release()
+            if len(frames) > p.frames:
+                frames = np.asarray(frames)
+                indices = np.linspace(0, len(frames) - 1, p.frames).astype(int) # reduce array from n_frames to p_frames
+                frames = frames[indices]
+                shared.log.debug(f'CogVideoX input reduce: source={len(frames)} target={p.frames}')
+            frames = [transforms.ToTensor()(frame) for frame in frames]
+        except Exception as e:
+            shared.log.error(f'Video: file="{fn}" {e}')
+            if debug:
+                errors.display(e, 'CogVideoX')
+        return frames
+
+    """
+    def image(self, p, img):
+        shared.log.debug(f'CogVideoX input: image={img}')
+        img = img.resize((p.width, p.height))
+        frames = [np.array(img)]
+        frames = [transforms.ToTensor()(frame) for frame in frames]
+        return frames
+    """
+
+    def generate(self, p: processing.StableDiffusionProcessing):
+        if shared.sd_model_type != 'cogvideox':
+            return []
+        shared.log.info(f'CogVideoX: sampler={p.sampler} steps={p.steps} frames={p.frames} width={p.width} height={p.height} seed={p.seed} guidance={p.guidance}')
+        if p.sampler == 'DDIM':
+            shared.sd_model.scheduler = diffusers.CogVideoXDDIMScheduler.from_config(shared.sd_model.scheduler.config, timestep_spacing="trailing")
+        if p.sampler == 'DPM':
+            shared.sd_model.scheduler = diffusers.CogVideoXDPMScheduler.from_config(shared.sd_model.scheduler.config, timestep_spacing="trailing")
+        t0 = time.time()
+        frames = []
+        set_callbacks_p(p)
+        shared.state.job_count = 1
+        shared.state.sampling_steps = p.steps - 1
+        try:
+            args = dict(
+                prompt=p.prompt,
+                negative_prompt=p.negative_prompt,
+                height=p.height,
+                width=p.width,
+                num_videos_per_prompt=1,
+                num_inference_steps=p.steps,
+                guidance_scale=p.guidance,
+                generator=torch.Generator(device=devices.device).manual_seed(p.seed),
+                callback_on_step_end=diffusers_callback,
+                callback_on_step_end_tensor_inputs=['latents'],
+            )
+            if getattr(p, 'image', False):
+                raise ValueError('CogVideoX: image not supported') # TODO image2video
+                # args['latents'] = self.image(p, p.image)
+            elif getattr(p, 'video', False):
+                args['video'] = self.video(p, p.video)
+            else:
+                args['num_frames'] = p.frames # only txt2vid has num_frames
+            if debug:
+                shared.log.debug(f'CogVideoX args: {args}')
+            frames = shared.sd_model(**args).frames[0]
+        except AssertionError as e:
+            shared.log.info(f'CogVideoX: {e}')
+        except Exception as e:
+            shared.log.error(f'CogVideoX: {e}')
+            if debug:
+                errors.display(e, 'CogVideoX')
+        t1 = time.time()
+        its = (len(frames) * p.steps) / (t1 - t0)
+        shared.log.info(f'CogVideoX: frames={len(frames)} its={its:.2f} time={t1 - t0:.2f}')
+        return frames
+
+    # auto-executed by the script-callback
+    def run(self, p: processing.StableDiffusionProcessing, model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video): # pylint: disable=arguments-differ, unused-argument
+        shared.state.begin('CogVideoX')
+        processing.fix_seed(p)
+        p.extra_generation_params['CogVideoX'] = model
+        p.do_not_save_grid = True
+        if 'animatediff' not in p.ops:
+            p.ops.append('cogvideox')
+        if override:
+            p.width = 720
+            p.height = 480
+        p.sampler = sampler
+        p.guidance = guidance
+        p.frames = frames
+        p.use_dynamic_cfg = sampler == 'DPM'
+        p.prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles)
+        p.negative_prompt = shared.prompt_styles.apply_negative_styles_to_prompt(p.negative_prompt, p.styles)
+        p.image = image
+        p.video = video
+        txt = image is None and video is None
+        self.load(model, txt)
+        self.offload(offload)
+        frames = self.generate(p)
+        devices.torch_gc()
+        processed = processing.Processed(p, images_list=frames)
+        shared.state.end()
+        return processed
+
+    # auto-executed by the script-callback
+    def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, model, sampler, frames, guidance, offload, override, video_type, duration, loop, pad, interpolate, image, video): # pylint: disable=arguments-differ, unused-argument
+        if video_type != 'None' and processed is not None and len(processed.images) > 0:
+            from modules.images import save_video
+            shared.log.info(f'CogVideoX video: type={video_type} duration={duration} loop={loop} pad={pad} interpolate={interpolate}')
+            save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=loop, pad=pad, interpolate=interpolate)
diff --git a/scripts/face_details.py b/scripts/face_details.py
index 004c775f6..7aca8a528 100644
--- a/scripts/face_details.py
+++ b/scripts/face_details.py
@@ -161,7 +161,7 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None):
             p.negative_prompt = orig_p.get('all_negative_prompts', [''])[0]
 
         report = [{'score': f.score, 'size': f'{f.width}x{f.height}' } for f in faces]
-        shared.log.debug(f'Face HiRes: faces={report} args={faces[0].args} denoise={p.denoising_strength} blur={p.mask_blur} resolution={p.width}x{p.height} padding={p.inpaint_full_res_padding}')
+        shared.log.debug(f'Face HiRes: faces={report} args={faces[0].args} denoise={p.denoising_strength} blur={p.mask_blur} width={p.width} height={p.height} padding={p.inpaint_full_res_padding}')
 
         mask_all = []
         for face in faces:
diff --git a/scripts/hdr.py b/scripts/hdr.py
new file mode 100644
index 000000000..788c0add2
--- /dev/null
+++ b/scripts/hdr.py
@@ -0,0 +1,100 @@
+import os
+import cv2
+import numpy as np
+import gradio as gr
+from PIL import Image
+import modules.scripts as scripts
+from modules import images, processing, shared
+from modules.processing import Processed
+from modules.shared import opts, state
+
+
+class Script(scripts.Script):
+    def title(self):
+        return "HDR"
+
+    def show(self, is_img2img):
+        return True
+
+    def ui(self, is_img2img):
+        with gr.Row():
+            gr.HTML("<span>&nbsp High Dynamic Range</span><br>")
+        with gr.Row():
+            save_hdr = gr.Checkbox(label="Save HDR image", value=True)
+            hdr_range = gr.Slider(minimum=0, maximum=1, step=0.05, value=0.65, label='HDR range')
+        with gr.Row():
+            is_tonemap = gr.Checkbox(label="Enable tonemap", value=False)
+            gamma = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label='Gamma', visible=False)
+        with gr.Row():
+            scale = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label='Scale', visible=False)
+            saturation = gr.Slider(minimum=0, maximum=2, step=0.05, value=1.0, label='Saturation', visible=False)
+        is_tonemap.change(fn=self.change_tonemap, inputs=[is_tonemap], outputs=[gamma, scale, saturation])
+        return [hdr_range, save_hdr, is_tonemap, gamma, scale, saturation]
+
+    def change_tonemap(self, is_tonemap):
+        return [gr.update(visible=is_tonemap), gr.update(visible=is_tonemap), gr.update(visible=is_tonemap)]
+
+    def merge(self, imgs: list, is_tonemap: bool, gamma, scale, saturation):
+        shared.log.info(f'HDR: merge images={len(imgs)} tonemap={is_tonemap} sgamma={gamma} scale={scale} saturation={saturation}')
+        imgs_np = [np.asarray(img).astype(np.uint8) for img in imgs]
+
+        align = cv2.createAlignMTB()
+        align.process(imgs_np, imgs_np)
+
+        # cv2.createMergeRobertson()
+        # cv2.createMergeDebevec()
+        merge = cv2.createMergeMertens()
+        hdr = merge.process(imgs_np)
+
+        # cv2.createTonemapDrago()
+        # cv2.createTonemapReinhard()
+        if is_tonemap:
+            tonemap = cv2.createTonemapMantiuk(gamma, scale, saturation)
+            hdr = tonemap.process(hdr)
+
+        ldr = np.clip(hdr * 255, 0, 255).astype(np.uint8)
+        hdr = np.clip(hdr * 65535, 0, 65535).astype(np.uint16)
+        hdr = cv2.cvtColor(hdr, cv2.COLOR_BGR2RGB)
+        return hdr, ldr
+
+    def run(self, p, hdr_range, save_hdr, is_tonemap, gamma, scale, saturation): # pylint: disable=arguments-differ
+        if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
+            shared.log.error(f'HDR: incorrect base model: {shared.sd_model.__class__.__name__}')
+            return
+        p.extra_generation_params = {
+            "HDR range": hdr_range,
+        }
+        shared.log.info(f'HDR: range={hdr_range}')
+        processing.fix_seed(p)
+        imgs = []
+        info = ''
+        for i in range(3):
+            p.n_iter = 1
+            p.batch_size = 1
+            p.do_not_save_grid = True
+            p.hdr_brightness = (i - 1) * (2.0 * hdr_range)
+            p.hdr_mode = 0
+            p.task_args['seed'] = p.seed
+            processed: processing.Processed = processing.process_images(p)
+            imgs += processed.images
+            if i == 1:
+                info = processed.info
+            if state.interrupted:
+                break
+
+        if len(imgs) > 1:
+            hdr, ldr = self.merge(imgs, is_tonemap, gamma, scale, saturation)
+            img = Image.fromarray(ldr)
+            if save_hdr:
+                saved_fn, _txt, _exif = images.save_image(img, shared.opts.outdir_save, "", p.seed, p.prompt, opts.grid_format, info=processed.info, p=p)
+                fn = os.path.splitext(saved_fn)[0] + '-hdr.png'
+                # cv2.imwrite(fn, hdr, [cv2.IMWRITE_PNG_COMPRESSION, 6, cv2.IMWRITE_PNG_STRATEGY, cv2.IMWRITE_PNG_STRATEGY_HUFFMAN_ONLY, cv2.IMWRITE_HDR_COMPRESSION, cv2.IMWRITE_HDR_COMPRESSION_RLE])
+                cv2.imwrite(fn, hdr)
+                shared.log.debug(f'Save: image="{fn}" type=PNG mode=HDR channels=16 size={os.path.getsize(fn)}')
+            # if opts.grid_save:
+            #    images.save_image(grid, p.outpath_grids, "grid", p.seed, p.prompt, opts.grid_format, info=processed.info, grid=True, p=p)
+            grid = [images.image_grid(imgs, rows=1)] if opts.return_grid else []
+            imgs = [img] + grid
+
+        processed = Processed(p, images_list=imgs, seed=p.seed, info=info)
+        return processed
diff --git a/scripts/lut.py b/scripts/lut.py
new file mode 100644
index 000000000..3d240f291
--- /dev/null
+++ b/scripts/lut.py
@@ -0,0 +1,67 @@
+"""
+downloads: https://luts.iwltbap.com/
+lib: https://github.com/homm/pillow-lut-tools
+"""
+import os
+import gradio as gr
+from installer import install
+from modules import scripts, shared, processing
+
+
+class Script(scripts.Script):
+    def title(self):
+        return 'LUT Color grading'
+
+    def show(self, is_img2img):
+        return shared.native
+
+    def ui(self, _is_img2img):
+        with gr.Row():
+            gr.HTML("<span>&nbsp Color grading</span><br>")
+        with gr.Row():
+            original = gr.Checkbox(label='Include original image', value=True)
+        with gr.Row():
+            cube_file = gr.File(label='LUT .cube file', type='file', help='Download LUTs from https://luts.iwltbap.com/')
+        with gr.Row():
+            gr.HTML("<br>Enhance LUT")
+        with gr.Row():
+            cube_scale = gr.Slider(label='Amplify LUT', minimum=0.0, maximum=5.0, step=0.05, value=1.0)
+            brightness = gr.Slider(label='Brightness', minimum=-1, maximum=1, step=0.05, value=0)
+            exposure = gr.Slider(label='Exposure', minimum=-5, maximum=5, step=0.05, value=0)
+            contrast = gr.Slider(label='Contrast', minimum=-1, maximum=1, step=0.05, value=0)
+            warmth = gr.Slider(label='Warmth', minimum=-1, maximum=1, step=0.05, value=0)
+            saturation = gr.Slider(label='Saturation', minimum=-1, maximum=5, step=0.05, value=0)
+            vibrance = gr.Slider(label='Vibrance', minimum=-1, maximum=5, step=0.05, value=0)
+            hue = gr.Slider(label='Hue', minimum=0, maximum=1, step=0.05, value=0)
+            gamma = gr.Slider(label='Gamma', minimum=0, maximum=10.0, step=0.1, value=1.0)
+        return [original, cube_file, cube_scale, brightness, exposure, contrast, warmth, saturation, vibrance, hue, gamma]
+
+    # auto-executed by the script-callback
+    def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, original, cube_file, cube_scale, brightness, exposure, contrast, warmth, saturation, vibrance, hue, gamma): # pylint: disable=arguments-differ, unused-argument
+        install('pillow_lut', quiet=True)
+        import pillow_lut
+
+        cube = None
+        name = os.path.splitext(os.path.basename(cube_file.name))[0] if cube_file is not None else None
+        shared.log.info(f'Color grading: cube="{name}" scale={cube_scale} brightness={brightness} exposure={exposure} contrast={contrast} warmth={warmth} saturation={saturation} vibrance={vibrance} hue={hue} gamma={gamma}')
+        if cube_file is not None:
+            try:
+                cube = pillow_lut.load_cube_file(cube_file.name)
+                cube = pillow_lut.amplify_lut(cube, cube_scale)
+                cube = pillow_lut.rgb_color_enhance(source=cube, brightness=brightness, exposure=exposure, contrast=contrast, warmth=warmth, saturation=saturation, vibrance=vibrance, hue=hue, gamma=gamma)
+            except Exception as e:
+                shared.log.error(f'Color grading: {e}')
+
+        images = []
+        if processed is not None and len(processed.images) > 0:
+            for image in processed.images:
+                info = image.info.get('parameters', '')
+                if original:
+                    images.append(image)
+                if cube is not None:
+                    filtered = image.filter(cube)
+                    filtered.info['parameters'] = f'{info}, LUT: {name}'
+                    images.append(filtered)
+        processed.images = images
+
+        return processed
diff --git a/scripts/postprocessing_upscale.py b/scripts/postprocessing_upscale.py
index 066aa30d6..e19d577ee 100644
--- a/scripts/postprocessing_upscale.py
+++ b/scripts/postprocessing_upscale.py
@@ -21,8 +21,8 @@ def ui(self):
                     with gr.TabItem('Scale to', elem_id="extras_scale_to_tab") as tab_scale_to:
                         with gr.Row():
                             with gr.Row(elem_id="upscaling_column_size"):
-                                upscaling_resize_w = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=512, elem_id="extras_upscaling_resize_w")
-                                upscaling_resize_h = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=512, elem_id="extras_upscaling_resize_h")
+                                upscaling_resize_w = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=1024, elem_id="extras_upscaling_resize_w")
+                                upscaling_resize_h = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=1024, elem_id="extras_upscaling_resize_h")
                                 upscaling_res_switch_btn = ToolButton(value=symbols.switch, elem_id="upscaling_res_switch_btn")
                                 upscaling_crop = gr.Checkbox(label='Crop to fit', value=True, elem_id="extras_upscaling_crop")
 
diff --git a/scripts/prompt_enhance.py b/scripts/prompt_enhance.py
new file mode 100644
index 000000000..2fb513e76
--- /dev/null
+++ b/scripts/prompt_enhance.py
@@ -0,0 +1,96 @@
+# repo: https://huggingface.co/gokaygokay/Flux-Prompt-Enhance
+
+import time
+import random
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import gradio as gr
+from modules import shared, scripts, devices, processing
+
+
+repo_id = "gokaygokay/Flux-Prompt-Enhance"
+num_return_sequences = 5
+
+
+class Script(scripts.Script):
+    prompts = [['']]
+    tokenizer: AutoTokenizer = None
+    model: AutoModelForSeq2SeqLM = None
+    prefix: str = "enhance prompt: "
+    button: gr.Button = None
+    auto_apply: gr.Checkbox = None
+    max_length: gr.Slider = None
+    temperature: gr.Slider = None
+    repetition_penalty: gr.Slider = None
+    table: gr.DataFrame = None
+    prompt: gr.Textbox = None
+
+    def title(self):
+        return 'Prompt enhance'
+
+    def show(self, is_img2img):
+        return shared.native
+
+    def load(self):
+        if self.tokenizer is None:
+            self.tokenizer = AutoTokenizer.from_pretrained('gokaygokay/Flux-Prompt-Enhance', cache_dir=shared.opts.diffusers_dir)
+        if self.model is None:
+            shared.log.info(f'Prompt enhance: model="{repo_id}"')
+            self.model = AutoModelForSeq2SeqLM.from_pretrained('gokaygokay/Flux-Prompt-Enhance', cache_dir=shared.opts.diffusers_dir).to(device=devices.cpu, dtype=devices.dtype)
+
+    def enhance(self, prompt, auto_apply: bool = False, temperature: float = 0.7, repetition_penalty: float = 1.2, max_length: int = 128):
+        self.load()
+        t0 = time.time()
+        input_text = self.prefix + prompt
+        input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids.to(devices.device)
+        self.model = self.model.to(devices.device)
+        kwargs = {
+            'max_length': int(max_length),
+            'num_return_sequences': int(num_return_sequences),
+            'do_sample': True,
+            'temperature': float(temperature),
+            'repetition_penalty': float(repetition_penalty),
+        }
+        try:
+            outputs = self.model.generate(input_ids, **kwargs)
+        except Exception as e:
+            shared.log.error(f'Prompt enhance: error="{e}"')
+            return [['']]
+        self.model = self.model.to(devices.cpu)
+        prompts = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        prompts = [[p] for p in prompts]
+        t1 = time.time()
+        shared.log.info(f'Prompt enhance: temperature={temperature} repetition={repetition_penalty} length={max_length} sequences={num_return_sequences} apply={auto_apply} time={t1-t0:.2f}s')
+        return prompts
+
+    def select(self, cell: gr.SelectData, _table):
+        prompt = cell.value if hasattr(cell, 'value') else cell
+        shared.log.info(f'Prompt enhance: prompt="{prompt}"')
+        return prompt
+
+    def ui(self, _is_img2img):
+        with gr.Row():
+            self.button = gr.Button(value='Enhance prompt')
+            self.auto_apply = gr.Checkbox(label='Auto apply', default=False)
+        with gr.Row():
+            self.max_length = gr.Slider(label='Length', minimum=64, maximum=512, step=1, value=128)
+            self.temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=2.0, step=0.05, value=0.7)
+            self.repetition_penalty = gr.Slider(label='Penalty', minimum=0.1, maximum=2.0, step=0.05, value=1.2)
+        with gr.Row():
+            self.table = gr.DataFrame(self.prompts, label='', show_label=False, interactive=False, wrap=True, datatype="str", col_count=1, max_rows=num_return_sequences, headers=['Prompts'])
+
+        if self.prompt is not None:
+            self.button.click(fn=self.enhance, inputs=[self.prompt, self.auto_apply, self.temperature, self.repetition_penalty, self.max_length], outputs=[self.table])
+            self.table.select(fn=self.select, inputs=[self.table], outputs=[self.prompt])
+        return [self.auto_apply, self.temperature, self.repetition_penalty, self.max_length]
+
+    def run(self, p: processing.StableDiffusionProcessing, auto_apply, temperature, repetition_penalty, max_length): # pylint: disable=arguments-differ
+        if auto_apply:
+            p.prompt = shared.prompt_styles.apply_styles_to_prompt(p.prompt, p.styles)
+            shared.log.debug(f'Prompt enhance: source="{p.prompt}"')
+            prompts = self.enhance(p.prompt, auto_apply, temperature, repetition_penalty, max_length)
+            p.prompt = random.choice(prompts)[0]
+            shared.log.debug(f'Prompt enhance: prompt="{p.prompt}"')
+
+    def after_component(self, component, **kwargs): # searching for actual ui prompt components
+        if getattr(component, 'elem_id', '') in ['txt2img_prompt', 'img2img_prompt', 'control_prompt']:
+            self.prompt = component
diff --git a/scripts/prompt_matrix.py b/scripts/prompt_matrix.py
index 39c0fe993..5babf20e7 100644
--- a/scripts/prompt_matrix.py
+++ b/scripts/prompt_matrix.py
@@ -39,7 +39,7 @@ def draw_xy_grid(xs, ys, x_label, y_label, cell):
 
 class Script(scripts.Script):
     def title(self):
-        return "Prompt Matrix"
+        return "Prompt matrix"
 
     def ui(self, is_img2img):
         with gr.Row():
diff --git a/scripts/prompts_from_file.py b/scripts/prompts_from_file.py
index 3e9745f6b..465a44974 100644
--- a/scripts/prompts_from_file.py
+++ b/scripts/prompts_from_file.py
@@ -92,7 +92,7 @@ def load_prompt_file(file):
 
 class Script(scripts.Script):
     def title(self):
-        return "Prompts from File"
+        return "Prompts from file"
 
     def ui(self, is_img2img):
         with gr.Row():
diff --git a/scripts/x_adapter.py b/scripts/x_adapter.py
index 58d6fb9eb..7c1341701 100644
--- a/scripts/x_adapter.py
+++ b/scripts/x_adapter.py
@@ -25,8 +25,8 @@ def ui(self, _is_img2img):
             model = gr.Dropdown(label='Adapter model', choices=['None'] + sd_models.checkpoint_tiles(), value='None')
             sampler = gr.Dropdown(label='Adapter sampler', choices=[s.name for s in sd_samplers.samplers], value='Default')
         with gr.Row():
-            width = gr.Slider(label='Adapter width', minimum=64, maximum=2048, step=8, value=512)
-            height = gr.Slider(label='Adapter height', minimum=64, maximum=2048, step=8, value=512)
+            width = gr.Slider(label='Adapter width', minimum=64, maximum=2048, step=8, value=1024)
+            height = gr.Slider(label='Adapter height', minimum=64, maximum=2048, step=8, value=1024)
         with gr.Row():
             start = gr.Slider(label='Adapter start', minimum=0.0, maximum=1.0, step=0.01, value=0.5)
             scale = gr.Slider(label='Adapter scale', minimum=0.0, maximum=1.0, step=0.01, value=1.0)
diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 75b4760bb..2800a3722 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -18,18 +18,21 @@
 
 def apply_field(field):
     def fun(p, x, xs):
+        shared.log.debug(f'XYZ grid apply field: {field}={x}')
         setattr(p, field, x)
     return fun
 
 
 def apply_task_args(field):
     def fun(p, x, xs):
+        shared.log.debug(f'XYZ grid apply task-arg: {field}={x}')
         p.task_args[field] = x
     return fun
 
 
 def apply_setting(field):
     def fun(p, x, xs):
+        shared.log.debug(f'XYZ grid apply setting: {field}={x}')
         shared.opts.data[field] = x
     return fun
 
@@ -40,6 +43,7 @@ def apply_prompt(p, x, xs):
     else:
         p.prompt = p.prompt.replace(xs[0], x)
         p.negative_prompt = p.negative_prompt.replace(xs[0], x)
+        shared.log.debug(f'XYZ grid apply prompt: "{xs[0]}"="{x}"')
 
 
 def apply_order(p, x, xs):
@@ -65,6 +69,7 @@ def apply_sampler(p, x, xs):
         shared.log.warning(f"XYZ grid: unknown sampler: {x}")
     else:
         p.sampler_name = sampler_name
+    shared.log.debug(f'XYZ grid apply sampler: "{x}"')
 
 
 def apply_hr_sampler_name(p, x, xs):
@@ -73,6 +78,7 @@ def apply_hr_sampler_name(p, x, xs):
         shared.log.warning(f"XYZ grid: unknown sampler: {x}")
     else:
         p.hr_sampler_name = hr_sampler_name
+    shared.log.debug(f'XYZ grid apply HR sampler: "{x}"')
 
 
 def confirm_samplers(p, xs):
@@ -90,6 +96,7 @@ def apply_checkpoint(p, x, xs):
     else:
         sd_models.reload_model_weights(shared.sd_model, info)
         p.override_settings['sd_model_checkpoint'] = info.name
+    shared.log.debug(f'XYZ grid apply checkpoint: "{x}"')
 
 
 def apply_refiner(p, x, xs):
@@ -103,6 +110,7 @@ def apply_refiner(p, x, xs):
     else:
         sd_models.reload_model_weights(shared.sd_refiner, info)
         p.override_settings['sd_model_refiner'] = info.name
+    shared.log.debug(f'XYZ grid apply refiner: "{x}"')
 
 
 def apply_dict(p, x, xs):
@@ -116,11 +124,13 @@ def apply_dict(p, x, xs):
         shared.opts.sd_model_dict = info_dict.name # this will trigger reload_model_weights via onchange handler
         p.override_settings['sd_model_checkpoint'] = info_ckpt.name
         p.override_settings['sd_model_dict'] = info_dict.name
+    shared.log.debug(f'XYZ grid apply model dict: "{x}"')
 
 
 def apply_clip_skip(p, x, xs):
     p.clip_skip = x
     shared.opts.data["clip_skip"] = x
+    shared.log.debug(f'XYZ grid apply clip-skip: "{x}"')
 
 
 def find_vae(name: str):
@@ -139,6 +149,7 @@ def find_vae(name: str):
 
 def apply_vae(p, x, xs):
     sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x))
+    shared.log.debug(f'XYZ grid apply VAE: "{x}"')
 
 
 def list_lora():
@@ -153,15 +164,18 @@ def apply_lora(p, x, xs):
         return
     x = os.path.basename(x)
     p.prompt = p.prompt + f" <lora:{x}:{shared.opts.extra_networks_default_multiplier}>"
+    shared.log.debug(f'XYZ grid apply LoRA: "{x}"')
 
 
 def apply_te(p, x, xs):
     shared.opts.data["sd_text_encoder"] = x
     sd_models.reload_text_encoder()
+    shared.log.debug(f'XYZ grid apply text-encoder: "{x}"')
 
 
 def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _):
     p.styles.extend(x.split(','))
+    shared.log.debug(f'XYZ grid apply style: "{x}"')
 
 
 def apply_upscaler(p: processing.StableDiffusionProcessingTxt2Img, opt, x):
@@ -169,6 +183,13 @@ def apply_upscaler(p: processing.StableDiffusionProcessingTxt2Img, opt, x):
     p.hr_force = True
     p.denoising_strength = 0.0
     p.hr_upscaler = opt
+    shared.log.debug(f'XYZ grid apply upscaler: "{x}"')
+
+
+def apply_context(p: processing.StableDiffusionProcessingTxt2Img, opt, x):
+    p.resize_mode = 5
+    p.resize_context = opt
+    shared.log.debug(f'XYZ grid apply resize-context: "{x}"')
 
 
 def apply_face_restore(p, opt, x):
@@ -182,11 +203,13 @@ def apply_face_restore(p, opt, x):
     else:
         is_active = opt in ('true', 'yes', 'y', '1')
     p.restore_faces = is_active
+    shared.log.debug(f'XYZ grid apply face-restore: "{x}"')
 
 
 def apply_override(field):
     def fun(p, x, xs):
         p.override_settings[field] = x
+        shared.log.debug(f'XYZ grid apply override: "{field}"="{x}"')
     return fun
 
 
@@ -289,6 +312,7 @@ def __init__(self, *args, **kwargs):
     AxisOption("[Refiner] Refiner start", float, apply_field("refiner_start")),
     AxisOption("[Refiner] Refiner steps", float, apply_field("refiner_steps")),
     AxisOption("[Postprocess] Upscaler", str, apply_upscaler, choices=lambda: [x.name for x in shared.sd_upscalers][1:]),
+    AxisOption("[Postprocess] Context", str, apply_context, choices=lambda: ["Add with forward", "Remove with forward", "Add with backward", "Remove with backward"]),
     AxisOption("[Postprocess] Face restore", str, apply_face_restore, fmt=format_value),
     AxisOption("[HDR] Mode", int, apply_field("hdr_mode")),
     AxisOption("[HDR] Brightness", float, apply_field("hdr_brightness")),
diff --git a/scripts/xyz_grid_on.py b/scripts/xyz_grid_on.py
new file mode 100644
index 000000000..ac362ba06
--- /dev/null
+++ b/scripts/xyz_grid_on.py
@@ -0,0 +1,832 @@
+# pylint: disable=unused-argument
+
+import os
+import re
+import csv
+import random
+from collections import namedtuple
+from copy import copy
+from itertools import permutations, chain
+from io import StringIO
+from PIL import Image
+import numpy as np
+import gradio as gr
+from modules import shared, errors, scripts, images, sd_samplers, processing, sd_models, sd_vae, ipadapter
+from modules.ui_components import ToolButton
+import modules.ui_symbols as symbols
+
+
+active = False
+cache = None
+
+
+def apply_field(field):
+    def fun(p, x, xs):
+        shared.log.debug(f'XYZ grid apply field: {field}={x}')
+        setattr(p, field, x)
+    return fun
+
+
+def apply_task_args(field):
+    def fun(p, x, xs):
+        shared.log.debug(f'XYZ grid apply task-arg: {field}={x}')
+        p.task_args[field] = x
+    return fun
+
+
+def apply_setting(field):
+    def fun(p, x, xs):
+        shared.log.debug(f'XYZ grid apply setting: {field}={x}')
+        shared.opts.data[field] = x
+    return fun
+
+
+def apply_prompt(p, x, xs):
+    if xs[0] not in p.prompt and xs[0] not in p.negative_prompt:
+        shared.log.warning(f"XYZ grid: prompt S/R did not find {xs[0]} in prompt or negative prompt.")
+    else:
+        p.prompt = p.prompt.replace(xs[0], x)
+        p.all_prompts = p.batch_size * [p.prompt]
+        p.negative_prompt = p.negative_prompt.replace(xs[0], x)
+        p.all_negative_prompts = p.batch_size * [p.negative_prompt]
+        shared.log.debug(f'XYZ grid apply prompt: "{xs[0]}"="{x}"')
+
+
+def apply_order(p, x, xs):
+    token_order = []
+    for token in x:
+        token_order.append((p.prompt.find(token), token))
+    token_order.sort(key=lambda t: t[0])
+    prompt_parts = []
+    for _, token in token_order:
+        n = p.prompt.find(token)
+        prompt_parts.append(p.prompt[0:n])
+        p.prompt = p.prompt[n + len(token):]
+    prompt_tmp = ""
+    for idx, part in enumerate(prompt_parts):
+        prompt_tmp += part
+        prompt_tmp += x[idx]
+    p.prompt = prompt_tmp + p.prompt
+
+
+def apply_sampler(p, x, xs):
+    sampler_name = sd_samplers.samplers_map.get(x.lower(), None)
+    if sampler_name is None:
+        shared.log.warning(f"XYZ grid: unknown sampler: {x}")
+    else:
+        p.sampler_name = sampler_name
+    shared.log.debug(f'XYZ grid apply sampler: "{x}"')
+
+
+def apply_hr_sampler_name(p, x, xs):
+    hr_sampler_name = sd_samplers.samplers_map.get(x.lower(), None)
+    if hr_sampler_name is None:
+        shared.log.warning(f"XYZ grid: unknown sampler: {x}")
+    else:
+        p.hr_sampler_name = hr_sampler_name
+    shared.log.debug(f'XYZ grid apply HR sampler: "{x}"')
+
+
+def confirm_samplers(p, xs):
+    for x in xs:
+        if x.lower() not in sd_samplers.samplers_map:
+            shared.log.warning(f"XYZ grid: unknown sampler: {x}")
+
+
+def apply_checkpoint(p, x, xs):
+    if x == shared.opts.sd_model_checkpoint:
+        return
+    info = sd_models.get_closet_checkpoint_match(x)
+    if info is None:
+        shared.log.warning(f"XYZ grid: apply checkpoint unknown checkpoint: {x}")
+    else:
+        sd_models.reload_model_weights(shared.sd_model, info)
+        p.override_settings['sd_model_checkpoint'] = info.name
+    shared.log.debug(f'XYZ grid apply checkpoint: "{x}"')
+
+
+def apply_refiner(p, x, xs):
+    if x == shared.opts.sd_model_refiner:
+        return
+    if x == 'None':
+        return
+    info = sd_models.get_closet_checkpoint_match(x)
+    if info is None:
+        shared.log.warning(f"XYZ grid: apply refiner unknown checkpoint: {x}")
+    else:
+        sd_models.reload_model_weights(shared.sd_refiner, info)
+        p.override_settings['sd_model_refiner'] = info.name
+    shared.log.debug(f'XYZ grid apply refiner: "{x}"')
+
+
+def apply_dict(p, x, xs):
+    if x == shared.opts.sd_model_dict:
+        return
+    info_dict = sd_models.get_closet_checkpoint_match(x)
+    info_ckpt = sd_models.get_closet_checkpoint_match(shared.opts.sd_model_checkpoint)
+    if info_dict is None or info_ckpt is None:
+        shared.log.warning(f"XYZ grid: apply dict unknown checkpoint: {x}")
+    else:
+        shared.opts.sd_model_dict = info_dict.name # this will trigger reload_model_weights via onchange handler
+        p.override_settings['sd_model_checkpoint'] = info_ckpt.name
+        p.override_settings['sd_model_dict'] = info_dict.name
+    shared.log.debug(f'XYZ grid apply model dict: "{x}"')
+
+
+def apply_clip_skip(p, x, xs):
+    p.clip_skip = x
+    shared.opts.data["clip_skip"] = x
+    shared.log.debug(f'XYZ grid apply clip-skip: "{x}"')
+
+
+def find_vae(name: str):
+    if name.lower() in ['auto', 'automatic']:
+        return sd_vae.unspecified
+    if name.lower() == 'none':
+        return None
+    else:
+        choices = [x for x in sorted(sd_vae.vae_dict, key=lambda x: len(x)) if name.lower().strip() in x.lower()]
+        if len(choices) == 0:
+            shared.log.warning(f"No VAE found for {name}; using automatic")
+            return sd_vae.unspecified
+        else:
+            return sd_vae.vae_dict[choices[0]]
+
+
+def apply_vae(p, x, xs):
+    sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x))
+    shared.log.debug(f'XYZ grid apply VAE: "{x}"')
+
+
+def list_lora():
+    import sys
+    lora = [v for k, v in sys.modules.items() if k == 'networks'][0]
+    loras = [v.fullname for v in lora.available_networks.values()]
+    return ['None'] + loras
+
+
+def apply_lora(p, x, xs):
+    if x == 'None':
+        return
+    x = os.path.basename(x)
+    p.prompt = p.prompt + f" <lora:{x}:{shared.opts.extra_networks_default_multiplier}>"
+    shared.log.debug(f'XYZ grid apply LoRA: "{x}"')
+
+
+def apply_te(p, x, xs):
+    shared.opts.data["sd_text_encoder"] = x
+    sd_models.reload_text_encoder()
+    shared.log.debug(f'XYZ grid apply text-encoder: "{x}"')
+
+
+def apply_styles(p: processing.StableDiffusionProcessingTxt2Img, x: str, _):
+    p.styles.extend(x.split(','))
+    shared.log.debug(f'XYZ grid apply style: "{x}"')
+
+
+def apply_upscaler(p: processing.StableDiffusionProcessingTxt2Img, opt, x):
+    p.enable_hr = True
+    p.hr_force = True
+    p.denoising_strength = 0.0
+    p.hr_upscaler = opt
+    shared.log.debug(f'XYZ grid apply upscaler: "{x}"')
+
+
+def apply_context(p: processing.StableDiffusionProcessingTxt2Img, opt, x):
+    p.resize_mode = 5
+    p.resize_context = opt
+    shared.log.debug(f'XYZ grid apply resize-context: "{x}"')
+
+
+def apply_face_restore(p, opt, x):
+    opt = opt.lower()
+    if opt == 'codeformer':
+        is_active = True
+        p.face_restoration_model = 'CodeFormer'
+    elif opt == 'gfpgan':
+        is_active = True
+        p.face_restoration_model = 'GFPGAN'
+    else:
+        is_active = opt in ('true', 'yes', 'y', '1')
+    p.restore_faces = is_active
+    shared.log.debug(f'XYZ grid apply face-restore: "{x}"')
+
+
+def apply_override(field):
+    def fun(p, x, xs):
+        p.override_settings[field] = x
+        shared.log.debug(f'XYZ grid apply override: "{field}"="{x}"')
+    return fun
+
+
+def format_value_add_label(p, opt, x):
+    if type(x) == float:
+        x = round(x, 8)
+    return f"{opt.label}: {x}"
+
+
+def format_value(p, opt, x):
+    if type(x) == float:
+        x = round(x, 8)
+    return x
+
+
+def format_value_join_list(p, opt, x):
+    return ", ".join(x)
+
+
+def do_nothing(p, x, xs):
+    pass
+
+
+def format_nothing(p, opt, x):
+    return ""
+
+
+def str_permutations(x):
+    """dummy function for specifying it in AxisOption's type when you want to get a list of permutations"""
+    return x
+
+
+def list_to_csv_string(data_list):
+    with StringIO() as o:
+        csv.writer(o).writerow(data_list)
+        return o.getvalue().strip()
+
+
+class AxisOption:
+    def __init__(self, label, tipe, apply, fmt=format_value_add_label, confirm=None, cost=0.0, choices=None):
+        self.label = label
+        self.type = tipe
+        self.apply = apply
+        self.format_value = fmt
+        self.confirm = confirm
+        self.cost = cost
+        self.choices = choices
+
+
+class AxisOptionImg2Img(AxisOption):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.is_img2img = True
+
+class AxisOptionTxt2Img(AxisOption):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.is_img2img = False
+
+
+axis_options = [
+    AxisOption("Nothing", str, do_nothing, fmt=format_nothing),
+    AxisOption("Prompt S/R", str, apply_prompt, fmt=format_value),
+    AxisOption("Model", str, apply_checkpoint, fmt=format_value, cost=1.0, choices=lambda: sorted(sd_models.checkpoints_list)),
+    AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)),
+    AxisOption("LoRA", str, apply_lora, cost=0.5, choices=list_lora),
+    AxisOption("LoRA strength", float, apply_setting('extra_networks_default_multiplier')),
+    AxisOption("Text encoder", str, apply_te, cost=0.7, choices=lambda: ['None', 'T5 FP4', 'T5 FP8', 'T5 FP16']),
+    AxisOption("Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]),
+    AxisOption("Seed", int, apply_field("seed")),
+    AxisOption("Steps", int, apply_field("steps")),
+    AxisOption("CFG scale", float, apply_field("cfg_scale")),
+    AxisOption("Guidance end", float, apply_field("cfg_end")),
+    AxisOption("Variation seed", int, apply_field("subseed")),
+    AxisOption("Variation strength", float, apply_field("subseed_strength")),
+    AxisOption("Clip skip", float, apply_clip_skip),
+    AxisOption("Denoising strength", float, apply_field("denoising_strength")),
+    AxisOption("Prompt order", str_permutations, apply_order, fmt=format_value_join_list),
+    AxisOption("Model dictionary", str, apply_dict, fmt=format_value, cost=1.0, choices=lambda: ['None'] + list(sd_models.checkpoints_list)),
+    AxisOptionImg2Img("Image mask weight", float, apply_field("inpainting_mask_weight")),
+    AxisOptionTxt2Img("[Sampler] Name", str, apply_sampler, fmt=format_value, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]),
+    AxisOptionImg2Img("[Sampler] Name", str, apply_sampler, fmt=format_value, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers_for_img2img]),
+    AxisOption("[Sampler] Timestep spacing", str, apply_setting("schedulers_timestep_spacing"), choices=lambda: ['default', 'linspace', 'leading', 'trailing']),
+    AxisOption("[Sampler] Sigma min", float, apply_field("s_min")),
+    AxisOption("[Sampler] Sigma max", float, apply_field("s_max")),
+    AxisOption("[Sampler] Sigma tmin", float, apply_field("s_tmin")),
+    AxisOption("[Sampler] Sigma tmax", float, apply_field("s_tmax")),
+    AxisOption("[Sampler] Sigma churn", float, apply_field("s_churn")),
+    AxisOption("[Sampler] Sigma noise", float, apply_field("s_noise")),
+    AxisOption("[Sampler] Shift", float, apply_setting("schedulers_shift")),
+    AxisOption("[Sampler] ETA", float, apply_setting("scheduler_eta")),
+    AxisOption("[Sampler] Solver order", int, apply_setting("schedulers_solver_order")),
+    AxisOption("[Second pass] Upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]),
+    AxisOption("[Second pass] Sampler", str, apply_hr_sampler_name, fmt=format_value, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]),
+    AxisOption("[Second pass] Denoising strength", float, apply_field("denoising_strength")),
+    AxisOption("[Second pass] Hires steps", int, apply_field("hr_second_pass_steps")),
+    AxisOption("[Second pass] CFG scale", float, apply_field("image_cfg_scale")),
+    AxisOption("[Second pass] Guidance rescale", float, apply_field("diffusers_guidance_rescale")),
+    AxisOption("[Refiner] Model", str, apply_refiner, fmt=format_value, cost=1.0, choices=lambda: ['None'] + sorted(sd_models.checkpoints_list)),
+    AxisOption("[Refiner] Refiner start", float, apply_field("refiner_start")),
+    AxisOption("[Refiner] Refiner steps", float, apply_field("refiner_steps")),
+    AxisOption("[Postprocess] Upscaler", str, apply_upscaler, choices=lambda: [x.name for x in shared.sd_upscalers][1:]),
+    AxisOption("[Postprocess] Context", str, apply_context, choices=lambda: ["Add with forward", "Remove with forward", "Add with backward", "Remove with backward"]),
+    AxisOption("[Postprocess] Face restore", str, apply_face_restore, fmt=format_value),
+    AxisOption("[HDR] Mode", int, apply_field("hdr_mode")),
+    AxisOption("[HDR] Brightness", float, apply_field("hdr_brightness")),
+    AxisOption("[HDR] Color", float, apply_field("hdr_color")),
+    AxisOption("[HDR] Sharpen", float, apply_field("hdr_sharpen")),
+    AxisOption("[HDR] Clamp boundary", float, apply_field("hdr_boundary")),
+    AxisOption("[HDR] Clamp threshold", float, apply_field("hdr_threshold")),
+    AxisOption("[HDR] Maximize center shift", float, apply_field("hdr_max_center")),
+    AxisOption("[HDR] Maximize boundary", float, apply_field("hdr_max_boundry")),
+    AxisOption("[HDR] Tint color hex", str, apply_field("hdr_color_picker")),
+    AxisOption("[HDR] Tint ratio", float, apply_field("hdr_tint_ratio")),
+    AxisOption("[Token Merging] ToMe ratio", float, apply_setting('tome_ratio')),
+    AxisOption("[Token Merging] ToDo ratio", float, apply_setting('todo_ratio')),
+    AxisOption("[FreeU] 1st stage backbone factor", float, apply_setting('freeu_b1')),
+    AxisOption("[FreeU] 2nd stage backbone factor", float, apply_setting('freeu_b2')),
+    AxisOption("[FreeU] 1st stage skip factor", float, apply_setting('freeu_s1')),
+    AxisOption("[FreeU] 2nd stage skip factor", float, apply_setting('freeu_s2')),
+    AxisOption("[IP adapter] Name", str, apply_field('ip_adapter_names'), cost=1.0, choices=lambda: list(ipadapter.ADAPTERS)),
+    AxisOption("[IP adapter] Scale", float, apply_field('ip_adapter_scales')),
+    AxisOption("[IP adapter] Starts", float, apply_field('ip_adapter_starts')),
+    AxisOption("[IP adapter] Ends", float, apply_field('ip_adapter_ends')),
+    AxisOption("[HiDiffusion] T1", float, apply_override('hidiffusion_t1')),
+    AxisOption("[HiDiffusion] T2", float, apply_override('hidiffusion_t2')),
+    AxisOption("[HiDiffusion] Agression step", float, apply_field('hidiffusion_steps')),
+    AxisOption("[PAG] Attention scale", float, apply_field('pag_scale')),
+    AxisOption("[PAG] Adaptive scaling", float, apply_field('pag_adaptive')),
+    AxisOption("[PAG] Applied layers", str, apply_setting('pag_apply_layers')),
+]
+
+
+def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend, include_lone_images, include_sub_grids, first_axes_processed, second_axes_processed, margin_size, no_grid):
+    hor_texts = [[images.GridAnnotation(x)] for x in x_labels]
+    ver_texts = [[images.GridAnnotation(y)] for y in y_labels]
+    title_texts = [[images.GridAnnotation(z)] for z in z_labels]
+    list_size = (len(xs) * len(ys) * len(zs))
+    processed_result = None
+    shared.state.job_count = list_size * p.n_iter
+
+    def process_cell(x, y, z, ix, iy, iz):
+        nonlocal processed_result
+
+        def index(ix, iy, iz):
+            return ix + iy * len(xs) + iz * len(xs) * len(ys)
+
+        shared.state.job = 'grid'
+        processed: processing.Processed = cell(x, y, z, ix, iy, iz)
+        if processed_result is None:
+            processed_result = copy(processed)
+            if processed_result is None:
+                shared.log.error('XYZ grid: no processing results')
+                return processing.Processed(p, [])
+            processed_result.images = [None] * list_size
+            processed_result.all_prompts = [None] * list_size
+            processed_result.all_seeds = [None] * list_size
+            processed_result.infotexts = [None] * list_size
+            processed_result.index_of_first_image = 1
+        idx = index(ix, iy, iz)
+        if processed is not None and processed.images:
+            processed_result.images[idx] = processed.images[0]
+            processed_result.all_prompts[idx] = processed.prompt
+            processed_result.all_seeds[idx] = processed.seed
+            processed_result.infotexts[idx] = processed.infotexts[0]
+        else:
+            cell_mode = "P"
+            cell_size = (processed_result.width, processed_result.height)
+            if processed_result.images[0] is not None:
+                cell_mode = processed_result.images[0].mode
+                cell_size = processed_result.images[0].size
+            processed_result.images[idx] = Image.new(cell_mode, cell_size)
+
+    if first_axes_processed == 'x':
+        for ix, x in enumerate(xs):
+            if second_axes_processed == 'y':
+                for iy, y in enumerate(ys):
+                    for iz, z in enumerate(zs):
+                        process_cell(x, y, z, ix, iy, iz)
+            else:
+                for iz, z in enumerate(zs):
+                    for iy, y in enumerate(ys):
+                        process_cell(x, y, z, ix, iy, iz)
+    elif first_axes_processed == 'y':
+        for iy, y in enumerate(ys):
+            if second_axes_processed == 'x':
+                for ix, x in enumerate(xs):
+                    for iz, z in enumerate(zs):
+                        process_cell(x, y, z, ix, iy, iz)
+            else:
+                for iz, z in enumerate(zs):
+                    for ix, x in enumerate(xs):
+                        process_cell(x, y, z, ix, iy, iz)
+    elif first_axes_processed == 'z':
+        for iz, z in enumerate(zs):
+            if second_axes_processed == 'x':
+                for ix, x in enumerate(xs):
+                    for iy, y in enumerate(ys):
+                        process_cell(x, y, z, ix, iy, iz)
+            else:
+                for iy, y in enumerate(ys):
+                    for ix, x in enumerate(xs):
+                        process_cell(x, y, z, ix, iy, iz)
+
+    if not processed_result:
+        shared.log.error("XYZ grid: Failed to initialize processing")
+        return processing.Processed(p, [])
+    elif not any(processed_result.images):
+        shared.log.error("XYZ grid: Failed to return processed image")
+        return processing.Processed(p, [])
+
+    z_count = len(zs)
+    for i in range(z_count):
+        start_index = (i * len(xs) * len(ys)) + i
+        end_index = start_index + len(xs) * len(ys)
+        if (not no_grid or include_sub_grids) and images.check_grid_size(processed_result.images[start_index:end_index]):
+            grid = images.image_grid(processed_result.images[start_index:end_index], rows=len(ys))
+            if draw_legend:
+                grid = images.draw_grid_annotations(grid, processed_result.images[start_index].size[0], processed_result.images[start_index].size[1], hor_texts, ver_texts, margin_size, title=title_texts[i])
+            processed_result.images.insert(i, grid)
+        processed_result.all_prompts.insert(i, processed_result.all_prompts[start_index])
+        processed_result.all_seeds.insert(i, processed_result.all_seeds[start_index])
+        processed_result.infotexts.insert(i, processed_result.infotexts[start_index])
+    sub_grid_size = processed_result.images[0].size
+    if not no_grid and images.check_grid_size(processed_result.images[:z_count]):
+        z_grid = images.image_grid(processed_result.images[:z_count], rows=1)
+        if draw_legend:
+            z_grid = images.draw_grid_annotations(z_grid, sub_grid_size[0], sub_grid_size[1], [[images.GridAnnotation()] for _ in z_labels], [[images.GridAnnotation()]])
+        processed_result.images.insert(0, z_grid)
+    #processed_result.all_prompts.insert(0, processed_result.all_prompts[0])
+    #processed_result.all_seeds.insert(0, processed_result.all_seeds[0])
+    processed_result.infotexts.insert(0, processed_result.infotexts[0])
+    return processed_result
+
+
+class SharedSettingsStackHelper(object):
+    vae = None
+    schedulers_solver_order = None
+    tome_ratio = None
+    todo_ratio = None
+    sd_model_checkpoint = None
+    sd_model_dict = None
+    sd_vae_checkpoint = None
+
+    def __enter__(self):
+        #Save overridden settings so they can be restored later.
+        self.vae = shared.opts.sd_vae
+        self.schedulers_solver_order = shared.opts.schedulers_solver_order
+        self.tome_ratio = shared.opts.tome_ratio
+        self.todo_ratio = shared.opts.todo_ratio
+        self.sd_model_checkpoint = shared.opts.sd_model_checkpoint
+        self.sd_model_dict = shared.opts.sd_model_dict
+        self.sd_vae_checkpoint = shared.opts.sd_vae
+
+    def __exit__(self, exc_type, exc_value, tb):
+        #Restore overriden settings after plot generation.
+        shared.opts.data["sd_vae"] = self.vae
+        shared.opts.data["schedulers_solver_order"] = self.schedulers_solver_order
+        shared.opts.data["tome_ratio"] = self.tome_ratio
+        shared.opts.data["todo_ratio"] = self.todo_ratio
+        if self.sd_model_dict != shared.opts.sd_model_dict:
+            shared.opts.data["sd_model_dict"] = self.sd_model_dict
+        if self.sd_model_checkpoint != shared.opts.sd_model_checkpoint:
+            shared.opts.data["sd_model_checkpoint"] = self.sd_model_checkpoint
+            sd_models.reload_model_weights()
+        if self.sd_vae_checkpoint != shared.opts.sd_vae:
+            shared.opts.data["sd_vae"] = self.sd_vae_checkpoint
+            sd_vae.reload_vae_weights()
+
+
+re_range = re.compile(r'([-+]?[0-9]*\.?[0-9]+)-([-+]?[0-9]*\.?[0-9]+):?([0-9]+)?')
+
+class Script(scripts.Script):
+    current_axis_options = []
+
+    def show(self, is_img2img):
+        return scripts.AlwaysVisible
+
+    def title(self):
+        return "XYZ Grid"
+
+    def ui(self, is_img2img):
+        self.current_axis_options = [x for x in axis_options if type(x) == AxisOption or x.is_img2img == is_img2img]
+
+        with gr.Accordion('XYZ Grid', open = False, elem_id='xyz_grid'):
+            with gr.Row():
+                enabled = gr.Checkbox(label = 'Enabled', value = False)
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row(variant='compact'):
+                        x_type = gr.Dropdown(label="X type", container=True, choices=[x.label for x in self.current_axis_options], value=self.current_axis_options[0].label, type="index", elem_id=self.elem_id("x_type"))
+                        x_values = gr.Textbox(label="X values", container=True, lines=1, elem_id=self.elem_id("x_values"))
+                        x_values_dropdown = gr.Dropdown(label="X values", container=True, visible=False, multiselect=True, interactive=True)
+                        fill_x_button = ToolButton(value=symbols.fill, elem_id="xyz_grid_fill_x_tool_button", visible=False)
+                    with gr.Row(variant='compact'):
+                        y_type = gr.Dropdown(label="Y type", container=True, choices=[x.label for x in self.current_axis_options], value=self.current_axis_options[0].label, type="index", elem_id=self.elem_id("y_type"))
+                        y_values = gr.Textbox(label="Y values", container=True, lines=1, elem_id=self.elem_id("y_values"))
+                        y_values_dropdown = gr.Dropdown(label="Y values", container=True, visible=False, multiselect=True, interactive=True)
+                        fill_y_button = ToolButton(value=symbols.fill, elem_id="xyz_grid_fill_y_tool_button", visible=False)
+                    with gr.Row(variant='compact'):
+                        z_type = gr.Dropdown(label="Z type", container=True, choices=[x.label for x in self.current_axis_options], value=self.current_axis_options[0].label, type="index", elem_id=self.elem_id("z_type"))
+                        z_values = gr.Textbox(label="Z values", container=True, lines=1, elem_id=self.elem_id("z_values"))
+                        z_values_dropdown = gr.Dropdown(label="Z values", container=True, visible=False, multiselect=True, interactive=True)
+                        fill_z_button = ToolButton(value=symbols.fill, elem_id="xyz_grid_fill_z_tool_button", visible=False)
+            with gr.Row():
+                with gr.Column():
+                    csv_mode = gr.Checkbox(label='Text inputs', value=False, elem_id=self.elem_id("csv_mode"), container=False)
+                    draw_legend = gr.Checkbox(label='Legend', value=True, elem_id=self.elem_id("draw_legend"), container=False)
+                    no_fixed_seeds = gr.Checkbox(label='Random seeds', value=False, elem_id=self.elem_id("no_fixed_seeds"), container=False)
+                with gr.Column():
+                    no_grid = gr.Checkbox(label='Skip grid', value=False, elem_id=self.elem_id("no_xyz_grid"), container=False)
+                    include_lone_images = gr.Checkbox(label='Sub-images', value=False, elem_id=self.elem_id("include_lone_images"), container=False)
+                    include_sub_grids = gr.Checkbox(label='Sub-grids', value=False, elem_id=self.elem_id("include_sub_grids"), container=False)
+            with gr.Row():
+                margin_size = gr.Slider(label="Grid margins", minimum=0, maximum=500, value=0, step=2, elem_id=self.elem_id("margin_size"))
+            with gr.Row():
+                swap_xy_axes_button = gr.Button(value="Swap X/Y", elem_id="xy_grid_swap_axes_button", variant="secondary")
+                swap_yz_axes_button = gr.Button(value="Swap Y/Z", elem_id="yz_grid_swap_axes_button", variant="secondary")
+                swap_xz_axes_button = gr.Button(value="Swap X/Z", elem_id="xz_grid_swap_axes_button", variant="secondary")
+
+        def swap_axes(axis1_type, axis1_values, axis1_values_dropdown, axis2_type, axis2_values, axis2_values_dropdown):
+            return self.current_axis_options[axis2_type].label, axis2_values, axis2_values_dropdown, self.current_axis_options[axis1_type].label, axis1_values, axis1_values_dropdown
+
+        xy_swap_args = [x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown]
+        swap_xy_axes_button.click(swap_axes, inputs=xy_swap_args, outputs=xy_swap_args)
+        yz_swap_args = [y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown]
+        swap_yz_axes_button.click(swap_axes, inputs=yz_swap_args, outputs=yz_swap_args)
+        xz_swap_args = [x_type, x_values, x_values_dropdown, z_type, z_values, z_values_dropdown]
+        swap_xz_axes_button.click(swap_axes, inputs=xz_swap_args, outputs=xz_swap_args)
+
+        def fill(axis_type, csv_mode):
+            axis = self.current_axis_options[axis_type]
+            if axis.choices:
+                if csv_mode:
+                    return list_to_csv_string(axis.choices()), gr.update()
+                else:
+                    return gr.update(), axis.choices()
+            else:
+                return gr.update(), gr.update()
+
+        fill_x_button.click(fn=fill, inputs=[x_type, csv_mode], outputs=[x_values, x_values_dropdown])
+        fill_y_button.click(fn=fill, inputs=[y_type, csv_mode], outputs=[y_values, y_values_dropdown])
+        fill_z_button.click(fn=fill, inputs=[z_type, csv_mode], outputs=[z_values, z_values_dropdown])
+
+        def select_axis(axis_type, axis_values, axis_values_dropdown, csv_mode):
+            choices = self.current_axis_options[axis_type].choices
+            has_choices = choices is not None
+            current_values = axis_values
+            current_dropdown_values = axis_values_dropdown
+            if has_choices:
+                choices = choices()
+                if csv_mode:
+                    current_dropdown_values = list(filter(lambda x: x in choices, current_dropdown_values))
+                    current_values = list_to_csv_string(current_dropdown_values)
+                else:
+                    current_dropdown_values = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(axis_values)))]
+                    current_dropdown_values = list(filter(lambda x: x in choices, current_dropdown_values))
+
+            return (gr.Button.update(visible=has_choices), gr.Textbox.update(visible=not has_choices or csv_mode, value=current_values),
+                    gr.update(choices=choices if has_choices else None, visible=has_choices and not csv_mode, value=current_dropdown_values))
+
+        x_type.change(fn=select_axis, inputs=[x_type, x_values, x_values_dropdown, csv_mode], outputs=[fill_x_button, x_values, x_values_dropdown])
+        y_type.change(fn=select_axis, inputs=[y_type, y_values, y_values_dropdown, csv_mode], outputs=[fill_y_button, y_values, y_values_dropdown])
+        z_type.change(fn=select_axis, inputs=[z_type, z_values, z_values_dropdown, csv_mode], outputs=[fill_z_button, z_values, z_values_dropdown])
+
+        def change_choice_mode(csv_mode, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown):
+            _fill_x_button, _x_values, _x_values_dropdown = select_axis(x_type, x_values, x_values_dropdown, csv_mode)
+            _fill_y_button, _y_values, _y_values_dropdown = select_axis(y_type, y_values, y_values_dropdown, csv_mode)
+            _fill_z_button, _z_values, _z_values_dropdown = select_axis(z_type, z_values, z_values_dropdown, csv_mode)
+            return _fill_x_button, _x_values, _x_values_dropdown, _fill_y_button, _y_values, _y_values_dropdown, _fill_z_button, _z_values, _z_values_dropdown
+
+        csv_mode.change(fn=change_choice_mode, inputs=[csv_mode, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown], outputs=[fill_x_button, x_values, x_values_dropdown, fill_y_button, y_values, y_values_dropdown, fill_z_button, z_values, z_values_dropdown])
+
+        def get_dropdown_update_from_params(axis,params):
+            val_key = f"{axis} Values"
+            vals = params.get(val_key,"")
+            valslist = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(vals))) if x]
+            return gr.update(value = valslist)
+
+        self.infotext_fields = (
+            (x_type, "X Type"),
+            (x_values, "X Values"),
+            (x_values_dropdown, lambda params:get_dropdown_update_from_params("X",params)),
+            (y_type, "Y Type"),
+            (y_values, "Y Values"),
+            (y_values_dropdown, lambda params:get_dropdown_update_from_params("Y",params)),
+            (z_type, "Z Type"),
+            (z_values, "Z Values"),
+            (z_values_dropdown, lambda params:get_dropdown_update_from_params("Z",params)),
+        )
+
+        return [enabled, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown, csv_mode, draw_legend, no_fixed_seeds, no_grid, include_lone_images, include_sub_grids, margin_size]
+
+    def process(self, p, enabled, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown, csv_mode, draw_legend, no_fixed_seeds, no_grid, include_lone_images, include_sub_grids, margin_size): # pylint: disable=W0221
+        global active, cache # pylint: disable=W0603
+        if not enabled or active:
+            return
+        active = True
+        shared.log.debug(f'xyzgrid: x_type={x_type}|x_values={x_values}|x_values_dropdown={x_values_dropdown}|y_type={y_type}|{y_values}={y_values}|{y_values_dropdown}={y_values_dropdown}|z_type={z_type}|z_values={z_values}|z_values_dropdown={z_values_dropdown}|draw_legend={draw_legend}|include_lone_images={include_lone_images}|include_sub_grids={include_sub_grids}|no_grid={no_grid}|margin_size={margin_size}')
+        if not no_fixed_seeds:
+            processing.fix_seed(p)
+        if not shared.opts.return_grid:
+            p.batch_size = 1
+
+        def process_axis(opt, vals, vals_dropdown):
+            if opt.label == 'Nothing':
+                return [0]
+            if opt.choices is not None and not csv_mode:
+                valslist = vals_dropdown
+            else:
+                valslist = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(vals))) if x]
+            if opt.type == int:
+                valslist_ext = []
+                for val in valslist:
+                    m = re_range.fullmatch(val)
+                    if m is not None:
+                        start_val = int(m.group(1)) if m.group(1) is not None else val
+                        end_val = int(m.group(2)) if m.group(2) is not None else val
+                        num = int(m.group(3)) if m.group(3) is not None else int(end_val-start_val)
+                        valslist_ext += [int(x) for x in np.linspace(start=start_val, stop=end_val, num=max(2, num)).tolist()]
+                        shared.log.debug(f'XYZ grid range: start={start_val} end={end_val} num={max(2, num)} list={valslist}')
+                    else:
+                        valslist_ext.append(int(val))
+                valslist.clear()
+                valslist = [x for x in valslist_ext if x not in valslist]
+            elif opt.type == float:
+                valslist_ext = []
+                for val in valslist:
+                    m = re_range.fullmatch(val)
+                    if m is not None:
+                        start_val = float(m.group(1)) if m.group(1) is not None else val
+                        end_val = float(m.group(2)) if m.group(2) is not None else val
+                        num = int(m.group(3)) if m.group(3) is not None else int(end_val-start_val)
+                        valslist_ext += [round(float(x), 2) for x in np.linspace(start=start_val, stop=end_val, num=max(2, num)).tolist()]
+                        shared.log.debug(f'XYZ grid range: start={start_val} end={end_val} num={max(2, num)} list={valslist}')
+                    else:
+                        valslist_ext.append(float(val))
+                valslist.clear()
+                valslist = [x for x in valslist_ext if x not in valslist]
+            elif opt.type == str_permutations: # pylint: disable=comparison-with-callable
+                valslist = list(permutations(valslist))
+            valslist = [opt.type(x) for x in valslist]
+            # Confirm options are valid before starting
+            if opt.confirm:
+                opt.confirm(p, valslist)
+            return valslist
+
+        x_opt = self.current_axis_options[x_type]
+        if x_opt.choices is not None and not csv_mode:
+            x_values = list_to_csv_string(x_values_dropdown)
+        xs = process_axis(x_opt, x_values, x_values_dropdown)
+        y_opt = self.current_axis_options[y_type]
+        if y_opt.choices is not None and not csv_mode:
+            y_values = list_to_csv_string(y_values_dropdown)
+        ys = process_axis(y_opt, y_values, y_values_dropdown)
+        z_opt = self.current_axis_options[z_type]
+        if z_opt.choices is not None and not csv_mode:
+            z_values = list_to_csv_string(z_values_dropdown)
+        zs = process_axis(z_opt, z_values, z_values_dropdown)
+        Image.MAX_IMAGE_PIXELS = None # disable check in Pillow and rely on check below to allow large custom image sizes
+
+        def fix_axis_seeds(axis_opt, axis_list):
+            if axis_opt.label in ['Seed', 'Var. seed']:
+                return [int(random.randrange(4294967294)) if val is None or val == '' or val == -1 else val for val in axis_list]
+            else:
+                return axis_list
+
+        if not no_fixed_seeds:
+            xs = fix_axis_seeds(x_opt, xs)
+            ys = fix_axis_seeds(y_opt, ys)
+            zs = fix_axis_seeds(z_opt, zs)
+
+        if x_opt.label == 'Steps':
+            total_steps = sum(xs) * len(ys) * len(zs)
+        elif y_opt.label == 'Steps':
+            total_steps = sum(ys) * len(xs) * len(zs)
+        elif z_opt.label == 'Steps':
+            total_steps = sum(zs) * len(xs) * len(ys)
+        else:
+            total_steps = p.steps * len(xs) * len(ys) * len(zs)
+        if isinstance(p, processing.StableDiffusionProcessingTxt2Img) and p.enable_hr:
+            if x_opt.label == "Hires steps":
+                total_steps += sum(xs) * len(ys) * len(zs)
+            elif y_opt.label == "Hires steps":
+                total_steps += sum(ys) * len(xs) * len(zs)
+            elif z_opt.label == "Hires steps":
+                total_steps += sum(zs) * len(xs) * len(ys)
+            elif p.hr_second_pass_steps:
+                total_steps += p.hr_second_pass_steps * len(xs) * len(ys) * len(zs)
+            else:
+                total_steps *= 2
+        total_steps *= p.n_iter
+        image_cell_count = p.n_iter * p.batch_size
+        shared.log.info(f"XYZ grid: images={len(xs)*len(ys)*len(zs)*image_cell_count} grid={len(zs)} {len(xs)}x{len(ys)} cells={len(zs)} steps={total_steps}")
+        AxisInfo = namedtuple('AxisInfo', ['axis', 'values'])
+        shared.state.xyz_plot_x = AxisInfo(x_opt, xs)
+        shared.state.xyz_plot_y = AxisInfo(y_opt, ys)
+        shared.state.xyz_plot_z = AxisInfo(z_opt, zs)
+        # If one of the axes is very slow to change between (like SD model checkpoint), then make sure it is in the outer iteration of the nested `for` loop.
+        first_axes_processed = 'z'
+        second_axes_processed = 'y'
+        if x_opt.cost > y_opt.cost and x_opt.cost > z_opt.cost:
+            first_axes_processed = 'x'
+            if y_opt.cost > z_opt.cost:
+                second_axes_processed = 'y'
+            else:
+                second_axes_processed = 'z'
+        elif y_opt.cost > x_opt.cost and y_opt.cost > z_opt.cost:
+            first_axes_processed = 'y'
+            if x_opt.cost > z_opt.cost:
+                second_axes_processed = 'x'
+            else:
+                second_axes_processed = 'z'
+        elif z_opt.cost > x_opt.cost and z_opt.cost > y_opt.cost:
+            first_axes_processed = 'z'
+            if x_opt.cost > y_opt.cost:
+                second_axes_processed = 'x'
+            else:
+                second_axes_processed = 'y'
+        grid_infotext = [None] * (1 + len(zs))
+
+        def cell(x, y, z, ix, iy, iz):
+            if shared.state.interrupted:
+                return processing.Processed(p, [], p.seed, "")
+            pc = copy(p)
+            pc.override_settings_restore_afterwards = False
+            pc.styles = pc.styles[:]
+            x_opt.apply(pc, x, xs)
+            y_opt.apply(pc, y, ys)
+            z_opt.apply(pc, z, zs)
+            try:
+                res = processing.process_images(pc)
+            except Exception as e:
+                shared.log.error(f"XYZ grid: Failed to process image: {e}")
+                errors.display(e, 'XYZ grid')
+                res = None
+            subgrid_index = 1 + iz # Sets subgrid infotexts
+            if grid_infotext[subgrid_index] is None and ix == 0 and iy == 0:
+                pc.extra_generation_params = copy(pc.extra_generation_params)
+                pc.extra_generation_params['Script'] = self.title()
+                if x_opt.label != 'Nothing':
+                    pc.extra_generation_params["X Type"] = x_opt.label
+                    pc.extra_generation_params["X Values"] = x_values
+                    if x_opt.label in ["Seed", "Var. seed"] and not no_fixed_seeds:
+                        pc.extra_generation_params["Fixed X Values"] = ", ".join([str(x) for x in xs])
+                if y_opt.label != 'Nothing':
+                    pc.extra_generation_params["Y Type"] = y_opt.label
+                    pc.extra_generation_params["Y Values"] = y_values
+                    if y_opt.label in ["Seed", "Var. seed"] and not no_fixed_seeds:
+                        pc.extra_generation_params["Fixed Y Values"] = ", ".join([str(y) for y in ys])
+                grid_infotext[subgrid_index] = processing.create_infotext(pc, pc.all_prompts, pc.all_seeds, pc.all_subseeds)
+            if grid_infotext[0] is None and ix == 0 and iy == 0 and iz == 0: # Sets main grid infotext
+                pc.extra_generation_params = copy(pc.extra_generation_params)
+                if z_opt.label != 'Nothing':
+                    pc.extra_generation_params["Z Type"] = z_opt.label
+                    pc.extra_generation_params["Z Values"] = z_values
+                    if z_opt.label in ["Seed", "Var. seed"] and not no_fixed_seeds:
+                        pc.extra_generation_params["Fixed Z Values"] = ", ".join([str(z) for z in zs])
+                grid_infotext[0] = processing.create_infotext(pc, pc.all_prompts, pc.all_seeds, pc.all_subseeds)
+            return res
+
+        with SharedSettingsStackHelper():
+            processed = draw_xyz_grid(
+                p,
+                xs=xs,
+                ys=ys,
+                zs=zs,
+                x_labels=[x_opt.format_value(p, x_opt, x) for x in xs],
+                y_labels=[y_opt.format_value(p, y_opt, y) for y in ys],
+                z_labels=[z_opt.format_value(p, z_opt, z) for z in zs],
+                cell=cell,
+                draw_legend=draw_legend,
+                include_lone_images=include_lone_images,
+                include_sub_grids=include_sub_grids,
+                first_axes_processed=first_axes_processed,
+                second_axes_processed=second_axes_processed,
+                margin_size=margin_size,
+                no_grid=no_grid,
+            )
+
+        if not processed.images:
+            active = False
+            return processed # It broke, no further handling needed.
+        z_count = len(zs)
+        processed.infotexts[:1+z_count] = grid_infotext[:1+z_count] # Set the grid infotexts to the real ones with extra_generation_params (1 main grid + z_count sub-grids)
+        if not include_lone_images:
+            if no_grid and include_sub_grids:
+                processed.images = processed.images[:z_count] # we don't have the main grid image, and need zero additional sub-images
+            else:
+                processed.images = processed.images[:z_count+1] # we either have the main grid image, or need one sub-images
+        if shared.opts.grid_save: # Auto-save main and sub-grids:
+            grid_count = z_count + ( 1 if not no_grid and z_count > 1 else 0 )
+            for g in range(grid_count):
+                adj_g = g-1 if g > 0 else g
+                images.save_image(processed.images[g], p.outpath_grids, "xyz_grid", info=processed.infotexts[g], extension=shared.opts.grid_format, prompt=processed.all_prompts[adj_g], seed=processed.all_seeds[adj_g], grid=True, p=processed)
+        if not include_sub_grids: # Done with sub-grids, drop all related information:
+            for _sg in range(z_count):
+                del processed.images[1]
+                del processed.all_prompts[1]
+                del processed.all_seeds[1]
+                del processed.infotexts[1]
+        elif no_grid:
+            del processed.infotexts[0]
+        active = False
+        cache = processed
+        return processed
+
+    def process_images(self, p, enabled, x_type, x_values, x_values_dropdown, y_type, y_values, y_values_dropdown, z_type, z_values, z_values_dropdown, csv_mode, draw_legend, no_fixed_seeds, no_grid, include_lone_images, include_sub_grids, margin_size): # pylint: disable=W0221
+        global cache # pylint: disable=W0603
+        if cache is not None and hasattr(cache, 'images'):
+            samples = cache.images.copy()
+            cache = None
+            return samples
+        return None
diff --git a/webui.py b/webui.py
index c9af90e75..6013aafd6 100644
--- a/webui.py
+++ b/webui.py
@@ -24,6 +24,7 @@
 import modules.sd_models
 import modules.sd_vae
 import modules.sd_unet
+import modules.model_t5
 import modules.progress
 import modules.ui
 import modules.txt2img
@@ -33,7 +34,7 @@
 import modules.hypernetworks.hypernetwork
 import modules.script_callbacks
 from modules.api.middleware import setup_middleware
-from modules.shared import cmd_opts, opts
+from modules.shared import cmd_opts, opts # pylint: disable=unused-import
 
 
 sys.excepthook = custom_excepthook
@@ -63,6 +64,7 @@
 timer.startup.record("ldm")
 modules.loader.initialized = True
 
+
 def check_rollback_vae():
     if shared.cmd_opts.rollback_vae:
         if not torch.cuda.is_available():
@@ -76,7 +78,6 @@ def check_rollback_vae():
             shared.cmd_opts.rollback_vae = False
 
 
-
 def initialize():
     log.debug('Initializing')
     check_rollback_vae()
@@ -90,6 +91,9 @@ def initialize():
     modules.sd_unet.refresh_unet_list()
     timer.startup.record("unet")
 
+    modules.model_t5.refresh_t5_list()
+    timer.startup.record("unet")
+
     extensions.list_extensions()
     timer.startup.record("extensions")
 
@@ -98,10 +102,10 @@ def initialize():
     timer.startup.record("models")
 
     import modules.postprocess.codeformer_model as codeformer
-    codeformer.setup_model(opts.codeformer_models_path)
+    codeformer.setup_model(shared.opts.codeformer_models_path)
     sys.modules["modules.codeformer_model"] = codeformer
     import modules.postprocess.gfpgan_model as gfpgan
-    gfpgan.setup_model(opts.gfpgan_models_path)
+    gfpgan.setup_model(shared.opts.gfpgan_models_path)
     timer.startup.record("face-restore")
 
     log.debug('Load extensions')
@@ -155,7 +159,7 @@ def sigint_handler(_sig, _frame):
 
 def load_model():
     modules.devices.set_cuda_params()
-    if not opts.sd_checkpoint_autoload or (shared.cmd_opts.ckpt is not None and shared.cmd_opts.ckpt.lower() != 'none'):
+    if not shared.opts.sd_checkpoint_autoload or (shared.cmd_opts.ckpt is not None and shared.cmd_opts.ckpt.lower() != 'none'):
         log.debug('Model auto load disabled')
     else:
         shared.state.begin('Load')
@@ -213,9 +217,15 @@ def start_common():
         log.info(f'Using data path: {shared.cmd_opts.data_dir}')
     if shared.cmd_opts.models_dir is not None and len(shared.cmd_opts.models_dir) > 0 and shared.cmd_opts.models_dir != 'models':
         log.info(f'Using models path: {shared.cmd_opts.models_dir}')
-    create_paths(opts)
+    create_paths(shared.opts)
     async_policy()
     initialize()
+    try:
+        from installer import diffusers_commit
+        if diffusers_commit != 'unknown':
+            shared.opts.diffusers_version = diffusers_commit # update installed diffusers version
+    except Exception:
+        pass
     if shared.opts.clean_temp_dir_at_start:
         gr_tempdir.cleanup_tmpdr()
         timer.startup.record("cleanup")
diff --git a/wiki b/wiki
index cf1e6bbb5..87bd5adaa 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit cf1e6bbb5b476007081b6a29f9d65f185b49b629
+Subproject commit 87bd5adaae34ebe5c880f64e56322ff47d0a315a