Skip to content

Commit

Permalink
direct import of external repos and remove clone from installer
Browse files Browse the repository at this point in the history
  • Loading branch information
vladmandic committed Oct 21, 2023
1 parent 8eb5e43 commit 443dde9
Show file tree
Hide file tree
Showing 193 changed files with 35,857 additions and 19 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ cache
!package.json

# all dynamic stuff
/repositories/**/*
/extensions/**/*
/outputs/**/*
/embeddings/**/*
Expand Down
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@
[submodule "modules/k-diffusion"]
path = modules/k-diffusion
url = https://github.com/crowsonkb/k-diffusion
ignore = dirty
80 changes: 80 additions & 0 deletions configs/v2-1-stable-unclip-h-inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
model:
base_learning_rate: 1.0e-04
target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
params:
embedding_dropout: 0.25
parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 96
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn-adm
scale_factor: 0.18215
monitor: val/loss_simple_ema
use_ema: False

embedder_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder

noise_aug_config:
target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
params:
timestep_dim: 1024
noise_schedule_config:
timesteps: 1000
beta_schedule: squaredcos_cap_v2

unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
num_classes: "sequential"
adm_in_channels: 2048
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False

first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
83 changes: 83 additions & 0 deletions configs/v2-1-stable-unclip-l-inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
model:
base_learning_rate: 1.0e-04
target: ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion
params:
embedding_dropout: 0.25
parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 96
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn-adm
scale_factor: 0.18215
monitor: val/loss_simple_ema
use_ema: False

embedder_config:
target: ldm.modules.encoders.modules.ClipImageEmbedder
params:
model: "ViT-L/14"

noise_aug_config:
target: ldm.modules.encoders.modules.CLIPEmbeddingNoiseAugmentation
params:
clip_stats_path: "checkpoints/karlo_models/ViT-L-14_stats.th"
timestep_dim: 768
noise_schedule_config:
timesteps: 1000
beta_schedule: squaredcos_cap_v2

unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
num_classes: "sequential"
adm_in_channels: 1536
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False

first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
74 changes: 74 additions & 0 deletions configs/v2-midas-inference.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
model:
base_learning_rate: 5.0e-07
target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
scale_factor: 0.18215
monitor: val/loss_simple_ema
finetune_keys: null
use_ema: False

depth_stage_config:
target: ldm.modules.midas.api.MiDaSInference
params:
model_type: "dpt_hybrid"

unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 5
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False

first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"


2 changes: 2 additions & 0 deletions installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,7 @@ def install_packages():

# clone required repositories
def install_repositories():
"""
if args.profile:
pr = cProfile.Profile()
pr.enable()
Expand All @@ -615,6 +616,7 @@ def d(name):
clone(blip_repo, d('BLIP'), blip_commit)
if args.profile:
print_profile(pr, 'Repositories')
"""


# run extension installer
Expand Down
21 changes: 6 additions & 15 deletions modules/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,13 @@
# data_path = cmd_opts_pre.data
sys.path.insert(0, script_path)

# search for directory of stable diffusion in following places
sd_path = None
possible_sd_paths = [os.path.join(script_path, 'repositories/stable-diffusion-stability-ai'), '.', os.path.dirname(script_path)]
for possible_sd_path in possible_sd_paths:
if os.path.exists(os.path.join(possible_sd_path, 'ldm/models/diffusion/ddpm.py')):
sd_path = os.path.abspath(possible_sd_path)
break

assert sd_path is not None, f"Couldn't find Stable Diffusion in any of: {possible_sd_paths}"

sd_path = os.path.join(script_path, 'repositories')
path_dirs = [
(sd_path, 'ldm', 'Stable Diffusion', []),
(os.path.join(sd_path, '../taming-transformers'), 'taming', 'Taming Transformers', []),
(os.path.join(sd_path, '../CodeFormer'), 'inference_codeformer.py', 'CodeFormer', []),
(os.path.join(sd_path, '../BLIP'), 'models/blip.py', 'BLIP', []),
(os.path.join(sd_path, '../k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
(sd_path, 'ldm', 'ldm', []),
(sd_path, 'taming', 'Taming Transformers', []),
(os.path.join(sd_path, 'blip'), 'models/blip.py', 'BLIP', []),
(os.path.join(sd_path, 'codeformer'), 'inference_codeformer.py', 'CodeFormer', []),
(os.path.join('modules', 'k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
]

paths = {}
Expand Down
6 changes: 3 additions & 3 deletions modules/sd_models_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

from modules import paths, sd_disable_initialization, devices

sd_repo_configs_path = os.path.join(paths.paths['Stable Diffusion'], "configs", "stable-diffusion")
sd_repo_configs_path = 'configs'
config_default = paths.sd_default_config
config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml")
config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference-512-base.yaml")
config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-768-v.yaml")
config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml")
config_depth_model = os.path.join(sd_repo_configs_path, "v2-midas-inference.yaml")
config_unclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-l-inference.yaml")
Expand Down
1 change: 1 addition & 0 deletions repositories/blip
Submodule blip added at 3a29b7
Loading

0 comments on commit 443dde9

Please sign in to comment.