Merge branch 'dev' into temp

Artheriax · Oct 31, 2023 · 0d7807a · 0d7807a
2 parents 2336ffc + 324cd65
commit 0d7807a
Show file tree

Hide file tree

Showing 310 changed files with 43,427 additions and 423 deletions.
diff --git a/.gitignore b/.gitignore
@@ -43,7 +43,6 @@ cache
 !package.json
 
 # all dynamic stuff
-/repositories/**/*
 /extensions/**/*
 /outputs/**/*
 /embeddings/**/*
@@ -60,6 +59,5 @@ cache
 /localizations
 
 # unexcluded so folders get created
-!/repositories/.placeholder
 !/models/VAE-approx
 !/models/VAE-approx/model.pt
diff --git a/.gitmodules b/.gitmodules
@@ -32,3 +32,7 @@
   path = extensions-builtin/sd-extension-chainner
   url = https://github.com/vladmandic/sd-extension-chainner
   ignore = dirty
+[submodule "modules/k-diffusion"]
+  path = modules/k-diffusion
+  url = https://github.com/crowsonkb/k-diffusion
+  ignore = dirty
diff --git a/.pylintrc b/.pylintrc
@@ -151,6 +151,7 @@ disable=bad-inline-option,
         missing-function-docstring,
         missing-module-docstring,
         no-else-return,
+        not-callable,
         pointless-string-statement,
         raw-checker-failed,
         simplifiable-if-expression,

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,67 +1,79 @@
 # Change Log for SD.Next
 
-## Update for 2023-10-25
+## Update for 2023-10-30
 
 *Note*: Pending release of `diffusers==0.22.0`
 
-Mostly service release with support for several new models and additional optimizations...
+Another pretty big release, this time with focus on  
+new models, new backends and optimizations and tons of fixes
+
+Also, [Wiki](https://github.com/vladmandic/automatic/wiki) has been updated with new content, so check it out!  
+Some highlights: [OpenVINO](https://github.com/vladmandic/automatic/wiki/OpenVINO), [IntelArc](https://github.com/vladmandic/automatic/wiki/Intel-ARC), [DirectML](https://github.com/vladmandic/automatic/wiki/DirectML), [ONNX/Olive>](https://github.com/vladmandic/automatic/wiki/ONNX-Runtime)
 
 - **Diffusers**
-  - new model type: [SegMind SSD-1B](https://huggingface.co/segmind/SSD-1B)  
-    its a distilled model, this time 50% smaller and faster version of SD-XL!  
+  - new model type: [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B)  
+    its a *distilled* model, this time 50% smaller and faster version of SD-XL!  
+    (and quality does not suffer, its just more optimized)  
     test shows batch-size:4 with 1k images used less than 6.5GB of VRAM  
     download using built-in **Huggingface** downloader: `segmind/SSD-1B`  
   - new model type: [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)  
     near-instant generate in a as little as 3 steps!  
+    combined with OpenVINO, generate on CPU takes less than 10 seconds: <https://www.youtube.com/watch?v=b90ESUTLsRo>  
     download using built-in **Huggingface** downloader: `SimianLuo/LCM_Dreamshaper_v7`  
   - support for **Custom pipelines**, thanks @disty0  
     download using built-in **Huggingface** downloader  
     think of them as plugins for diffusers not unlike original extensions that modify behavior of `ldm` backend  
-    list of community pipelines: <https://github.com/huggingface/diffusers/tree/main/examples/community>  
-    and make sure to check our reference one: `Disty0/zero123plus-pipeline`  
-    which generates 4 output images with different camera positions: front, side, top, back!  
+    list of community pipelines: <https://github.com/huggingface/diffusers/blob/main/examples/community/README.md>  
+  - new custom pipeline: `Disty0/zero123plus-pipeline`  
+    generate 4 output images with different camera positions: front, side, top, back!  
+    for more details, see <https://github.com/vladmandic/automatic/discussions/2421>  
+  - new backend: **ONNX/Olive** (experimental)  
+    for details, see WiKi  
   - extend support for [Free-U](https://github.com/ChenyangSi/FreeU)  
     improve generations quality at no cost (other than finding params that work for you)  
 - **General**  
   - add **Lora OFT** support, thanks @antis0007 and @ai-casanova  
   - **Upscalers**  
-    - **compile compile** option, thanks @disty0  
+    - **compile** option, thanks @disty0  
     - **chaiNNer** add high quality models from [Helaman](https://openmodeldb.info/users/helaman)  
-  - redesigned **progress bar** with full details on current operation
+  - redesigned **Progress bar** with full details on current operation  
+  - **Extra networks** sort by name, size, date, etc.  
   - new option: *settings -> images -> keep incomplete*  
     can be used to skip vae decode on aborted/skipped/interrupted image generations  
+  - new option: *settings -> system paths -> models*  
+    can be used to set custom base path for *all* models (previously only as cli option)  
   - remove external clone of items in `/repositories`  
   - switch core font in default theme to **noto-sans**  
     previously default font was simply *system-ui*, but it lead to too much variations between browsers and platforms  
-- **Fixes**
-  - fix **freeu** for backend original and add it to xyz grid
-  - fix loading diffuser models in huggingface format from non-standard location
-  - fix default styles looking in wrong location
-  - fix missing upscaler folder on initial startup
-  - fix handling of relative path for models
-  - fix simple live preview device mismatch
-  - fix batch img2img
-  - fix diffusers samplers: dpm++ 2m, dpm++ 1s, deis
-  - fix new style filename template
-  - fix image name template using model name
-  - fix image name sequence
-  - fix model path using relative path
-  - fix `torch-rocm` and `tensorflow-rocm` version detection, thanks @xangelix
-  - fix **chainner** upscalers color clipping
+- **Fixes**  
+  - fix **freeu** for backend original and add it to xyz grid  
+  - fix loading diffuser models in huggingface format from non-standard location  
+  - fix default styles looking in wrong location  
+  - fix missing upscaler folder on initial startup  
+  - fix handling of relative path for models  
+  - fix simple live preview device mismatch  
+  - fix batch img2img  
+  - fix diffusers samplers: dpm++ 2m, dpm++ 1s, deis  
+  - fix new style filename template  
+  - fix image name template using model name  
+  - fix image name sequence  
+  - fix model path using relative path  
+  - fix `torch-rocm` and `tensorflow-rocm` version detection, thanks @xangelix  
+  - fix **chainner** upscalers color clipping  
   - fix for base+refiner workflow in diffusers mode: number of steps, diffuser pipe mode  
-  - fix for prompt encoder with refiner in diffusers mode
-  - fix prompts-from-file saving incorrect metadata
-  - fix before-hires step
-  - fix diffusers switch from invalid model
-  - **directml** and **ipex** updates
-  - force second requirements check on startup
-  - remove lyco, multiple_tqdm
+  - fix for prompt encoder with refiner in diffusers mode  
+  - fix prompts-from-file saving incorrect metadata  
+  - fix before-hires step  
+  - fix diffusers switch from invalid model  
+  - **directml** and **ipex** updates  
+  - force second requirements check on startup  
+  - remove **lyco**, multiple_tqdm  
   - enhance extension compatibility for exensions directly importing codeformers  
   - enhance extension compatibility for exensions directly accessing processing params  
-  - css fixes
-  - clearly mark external themes in ui
-  - update `openvino`, thanks @disty0
-  - update `typing-extensions`
+  - **css** fixes  
+  - clearly mark external themes in ui  
+  - update `openvino`, thanks @disty0  
+  - update `typing-extensions`  
 
 ## Update for 2023-10-17
 

diff --git a/README.md b/README.md
@@ -56,21 +56,23 @@ Additional models will be added as they become available and there is public int
 - [Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*
 - [Stable Diffusion XL](https://github.com/Stability-AI/generative-models)
 - [Kandinsky](https://github.com/ai-forever/Kandinsky-2) 2.1 and 2.2
-- [DeepFloyd IF](https://github.com/deep-floyd/IF)
-- [UniDiffusion](https://github.com/thu-ml/unidiffuser)
 - [SD-Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)*
 - [Wuerstchen](https://huggingface.co/blog/wuertschen)  
+- [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B)
+- [UniDiffusion](https://github.com/thu-ml/unidiffuser)
+- [DeepFloyd IF](https://github.com/deep-floyd/IF)
 
 ## Platform support
 
-- *nVidia* GPUs using **CUDA** libraries on both *Windows and Linux*
-- *AMD* GPUs using **ROCm** libraries on *Linux*.  
-  Support will be extended to *Windows* once AMD releases ROCm for Windows
-- *Intel Arc* GPUs using **OneAPI** with *IPEX XPU* libraries on both *Windows and Linux*
-- Any GPU compatible with *DirectX* on *Windows* using **DirectML** libraries.  
-  This includes support for AMD GPUs that are not supported by native ROCm libraries
-- Any GPU or device compatible with **OpenVINO** libraries on both *Windows and Linux*
-- *Apple M1/M2* on *OSX* using built-in support in Torch with **MPS** optimizations
+- *nVidia* GPUs using **CUDA** libraries on both *Windows and Linux*  
+- *AMD* GPUs using **ROCm** libraries on *Linux*  
+  Support will be extended to *Windows* once AMD releases ROCm for Windows  
+- *Intel Arc* GPUs using **OneAPI** with *IPEX XPU* libraries on both *Windows and Linux*  
+- Any GPU compatible with *DirectX* on *Windows* using **DirectML** libraries  
+  This includes support for AMD GPUs that are not supported by native ROCm libraries  
+- Any GPU or device compatible with **OpenVINO** libraries on both *Windows and Linux*  
+- *Apple M1/M2* on *OSX* using built-in support in Torch with **MPS** optimizations  
+- *ONNX/Olive* (experimental)  
 
 ## Install & Run
 

diff --git a/configs/v2-1-stable-unclip-h-inference.yaml b/configs/v2-1-stable-unclip-h-inference.yaml
@@ -0,0 +1,80 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
+  params:
+    embedding_dropout: 0.25
+    parameterization: "v"
+    linear_start: 0.00085
+    linear_end: 0.0120
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 96
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn-adm
+    scale_factor: 0.18215
+    monitor: val/loss_simple_ema
+    use_ema: False
+
+    embedder_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
+
+    noise_aug_config:
+      target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
+      params:
+        timestep_dim: 1024
+        noise_schedule_config:
+          timesteps: 1000
+          beta_schedule: squaredcos_cap_v2
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        num_classes: "sequential"
+        adm_in_channels: 2048
+        use_checkpoint: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+            - 1
+            - 2
+            - 4
+            - 4
+          num_res_blocks: 2
+          attn_resolutions: [ ]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
diff --git a/configs/v2-1-stable-unclip-l-inference.yaml b/configs/v2-1-stable-unclip-l-inference.yaml
@@ -0,0 +1,83 @@
+model:
+  base_learning_rate: 1.0e-04
+  target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
+  params:
+    embedding_dropout: 0.25
+    parameterization: "v"
+    linear_start: 0.00085
+    linear_end: 0.0120
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 96
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn-adm
+    scale_factor: 0.18215
+    monitor: val/loss_simple_ema
+    use_ema: False
+
+    embedder_config:
+      target: ldm.modules.encoders.modules.ClipImageEmbedder
+      params:
+        model: "ViT-L/14"
+
+    noise_aug_config:
+      target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
+      params:
+        clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th"
+        timestep_dim: 768
+        noise_schedule_config:
+          timesteps: 1000
+          beta_schedule: squaredcos_cap_v2
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        num_classes: "sequential"
+        adm_in_channels: 1536
+        use_checkpoint: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+            - 1
+            - 2
+            - 4
+            - 4
+          num_res_blocks: 2
+          attn_resolutions: [ ]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"