Skip to content

Commit

Permalink
Revert "Merge pull request #2411 from vladmandic/master"
Browse files Browse the repository at this point in the history
This reverts commit 64cce8a, reversing
changes made to 597fc18.
  • Loading branch information
vladmandic committed Oct 26, 2023
1 parent 886af2e commit 267905e
Show file tree
Hide file tree
Showing 282 changed files with 43,168 additions and 288 deletions.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ cache
!package.json

# all dynamic stuff
/repositories/**/*
/extensions/**/*
/outputs/**/*
/embeddings/**/*
Expand All @@ -59,6 +58,5 @@ cache
/localizations

# unexcluded so folders get created
!/repositories/.placeholder
!/models/VAE-approx
!/models/VAE-approx/model.pt
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,7 @@
path = extensions-builtin/sd-extension-chainner
url = https://github.com/vladmandic/sd-extension-chainner
ignore = dirty
[submodule "modules/k-diffusion"]
path = modules/k-diffusion
url = https://github.com/crowsonkb/k-diffusion
ignore = dirty
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ disable=bad-inline-option,
missing-function-docstring,
missing-module-docstring,
no-else-return,
not-callable,
pointless-string-statement,
raw-checker-failed,
simplifiable-if-expression,
Expand Down
80 changes: 80 additions & 0 deletions configs/v2-1-stable-unclip-h-inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
model:
base_learning_rate: 1.0e-04
target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
params:
embedding_dropout: 0.25
parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 96
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn-adm
scale_factor: 0.18215
monitor: val/loss_simple_ema
use_ema: False

embedder_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder

noise_aug_config:
target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
params:
timestep_dim: 1024
noise_schedule_config:
timesteps: 1000
beta_schedule: squaredcos_cap_v2

unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
num_classes: "sequential"
adm_in_channels: 2048
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False

first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
83 changes: 83 additions & 0 deletions configs/v2-1-stable-unclip-l-inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
model:
base_learning_rate: 1.0e-04
target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
params:
embedding_dropout: 0.25
parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 96
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn-adm
scale_factor: 0.18215
monitor: val/loss_simple_ema
use_ema: False

embedder_config:
target: ldm.modules.encoders.modules.ClipImageEmbedder
params:
model: "ViT-L/14"

noise_aug_config:
target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
params:
clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th"
timestep_dim: 768
noise_schedule_config:
timesteps: 1000
beta_schedule: squaredcos_cap_v2

unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
num_classes: "sequential"
adm_in_channels: 1536
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False

first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
74 changes: 74 additions & 0 deletions configs/v2-midas-inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
model:
base_learning_rate: 5.0e-07
target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
scale_factor: 0.18215
monitor: val/loss_simple_ema
finetune_keys: null
use_ema: False

depth_stage_config:
target: ldm.modules.midas.api.MiDaSInference
params:
model_type: "dpt_hybrid"

unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 5
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False

first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"


16 changes: 8 additions & 8 deletions extensions-builtin/Lora/lora_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,12 @@ def __init__(self):
self.converter = self.diffusers
self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
self.LORA_PREFIX_UNET = "lora_unet"
self.LORA_PREFIX_TEXT_ENCODER = "lora_te"
self.LORA_PREFIX_UNET = "lora_unet_"
self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
self.OFT_PREFIX_UNET = "oft_unet_"
# SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1"
self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2"
self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"

def original(self, key):
key = convert_diffusers_name_to_compvis(key, self.is_sd2)
Expand All @@ -142,13 +143,12 @@ def diffusers(self, key):
if self.is_sdxl:
map_keys = list(self.UNET_CONVERSION_MAP.keys()) # prefix of U-Net modules
map_keys.sort()
search_key = key.replace(self.LORA_PREFIX_UNET + "_", "").replace(self.LORA_PREFIX_TEXT_ENCODER1 + "_",
"").replace(
self.LORA_PREFIX_TEXT_ENCODER2 + "_", "")
search_key = key.replace(self.LORA_PREFIX_UNET, "").replace(self.OFT_PREFIX_UNET, "").replace(self.LORA_PREFIX_TEXT_ENCODER1, "").replace(self.LORA_PREFIX_TEXT_ENCODER2, "")

position = bisect.bisect_right(map_keys, search_key)
map_key = map_keys[position - 1]
if search_key.startswith(map_key):
key = key.replace(map_key, self.UNET_CONVERSION_MAP[map_key]) # pylint: disable=unsubscriptable-object
key = key.replace(map_key, self.UNET_CONVERSION_MAP[map_key]).replace("oft","lora") # pylint: disable=unsubscriptable-object
sd_module = shared.sd_model.network_layer_mapping.get(key, None)
return key, sd_module

Expand Down
49 changes: 49 additions & 0 deletions extensions-builtin/Lora/network_oft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import torch
import diffusers.models.lora as diffusers_lora
import network
from modules import devices

class ModuleTypeOFT(network.ModuleType):
def create_module(self, net: network.Network, weights: network.NetworkWeights):
"""
weights.w.items()
alpha : tensor(0.0010, dtype=torch.bfloat16)
oft_blocks : tensor([[[ 0.0000e+00, 1.4400e-04, 1.7319e-03, ..., -8.8882e-04,
5.7373e-03, -4.4250e-03],
[-1.4400e-04, 0.0000e+00, 8.6594e-04, ..., 1.5945e-03,
-8.5449e-04, 1.9684e-03], ...etc...
, dtype=torch.bfloat16)"""

if "oft_blocks" in weights.w.keys():
module = NetworkModuleOFT(net, weights)
return module
else:
return None


class NetworkModuleOFT(network.NetworkModule):
def __init__(self, net: network.Network, weights: network.NetworkWeights):
super().__init__(net, weights)

self.weights = weights.w.get("oft_blocks").to(device=devices.device)
self.dim = self.weights.shape[0] # num blocks
self.alpha = self.multiplier()
self.block_size = self.weights.shape[-1]

def get_weight(self):
block_Q = self.weights - self.weights.transpose(1, 2)
I = torch.eye(self.block_size, device=devices.device).unsqueeze(0).repeat(self.dim, 1, 1)
block_R = torch.matmul(I + block_Q, (I - block_Q).inverse())
block_R_weighted = self.alpha * block_R + (1 - self.alpha) * I
R = torch.block_diag(*block_R_weighted)
return R

def calc_updown(self, orig_weight):
R = self.get_weight().to(device=devices.device, dtype=orig_weight.dtype)
if orig_weight.dim() == 4:
updown = torch.einsum("oihw, op -> pihw", orig_weight, R) * self.calc_scale()
else:
updown = torch.einsum("oi, op -> pi", orig_weight, R) * self.calc_scale()

return self.finalize_updown(updown, orig_weight, orig_weight.shape)
2 changes: 2 additions & 0 deletions extensions-builtin/Lora/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import network_lora
import network_hada
import network_ia3
import network_oft
import network_lokr
import network_full
import network_norm
Expand All @@ -32,6 +33,7 @@
network_lora.ModuleTypeLora(),
network_hada.ModuleTypeHada(),
network_ia3.ModuleTypeIa3(),
network_oft.ModuleTypeOFT(),
network_lokr.ModuleTypeLokr(),
network_full.ModuleTypeFull(),
network_norm.ModuleTypeNorm(),
Expand Down
2 changes: 2 additions & 0 deletions installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,7 @@ def install_packages():

# clone required repositories
def install_repositories():
"""
if args.profile:
pr = cProfile.Profile()
pr.enable()
Expand All @@ -615,6 +616,7 @@ def d(name):
clone(blip_repo, d('BLIP'), blip_commit)
if args.profile:
print_profile(pr, 'Repositories')
"""


# run extension installer
Expand Down
Loading

0 comments on commit 267905e

Please sign in to comment.