Skip to content

Commit

Permalink
Removed hooks in favor of a PhotoMakerLoraLoaderPlus node.
Browse files Browse the repository at this point in the history
Resolves #37
  • Loading branch information
shiimizu committed Sep 1, 2024
1 parent 3f77c15 commit 13987cf
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 296 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,20 @@ PhotoMaker implementation that follows the ComfyUI way of doing things. The code
git clone https://github.com/shiimizu/ComfyUI-PhotoMaker-Plus.git
```
4. Download the model(s) from Hugging Face ([V1](https://huggingface.co/TencentARC/PhotoMaker), [V2](https://huggingface.co/TencentARC/PhotoMaker-V2)) and place it in a `photomaker` folder in your `models` folder such as `ComfyUI/models/photomaker`.
5. Load the LoRA within the model using the `LoraLoaderModelOnly` node.
5. Check out the [example workflows](https://github.com/shiimizu/ComfyUI-PhotoMaker-Plus/tree/main/examples).

## Features of this `Plus` version

* Better face resemblance by using `CLIPImageProcessor` like in the original code.
* Automatic PhotoMaker LoRA detection & loading via the LoraLoader nodes.
* Customizable trigger word
* Allows multiple trigger words in the prompt
* Extra nodes such as `PhotoMakerStyles` and `PrepImagesForClipVisionFromPath`

## Important news

**2024-09-01**
* A `PhotoMakerLoraLoaderPlus` node was added. Use that to load the LoRA.

**2024-07-26**
* Support for PhotoMaker V2. This uses InsightFace, so make sure to use the new `PhotoMakerLoaderPlus` and `PhotoMakerInsightFaceLoader` nodes.

Expand Down
140 changes: 95 additions & 45 deletions photomaker.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,102 @@
import torch
import hashlib
import os
import logging
import numpy as np
import comfy.clip_vision
import comfy.clip_model
import comfy.model_management
import comfy.utils
import comfy.sd
import folder_paths
import torchvision.transforms.v2 as T
from comfy.sd import CLIP
from typing import Union
from collections import Counter
from torch import Tensor
from transformers import CLIPImageProcessor
from transformers.image_utils import PILImageResampling
from collections import Counter
import folder_paths
import torch
import os
from .insightface_package import analyze_faces, insightface_loader
from .model import PhotoMakerIDEncoder
from .model_v2 import PhotoMakerIDEncoder_CLIPInsightfaceExtendtoken
from .utils import LoadImageCustom, load_image, prepImage, crop_image_pil, tokenize_with_trigger_word
from folder_paths import folder_names_and_paths, models_dir, supported_pt_extensions, add_model_folder_path
from torch import Tensor
import hashlib
from typing import Union
from .insightface_package import analyze_faces, insightface_loader
import numpy as np
import torchvision.transforms.v2 as T

INSIGHTFACE_DIR = os.path.join(models_dir, "insightface")

folder_names_and_paths["photomaker"] = ([os.path.join(models_dir, "photomaker")], supported_pt_extensions)
add_model_folder_path("loras", folder_names_and_paths["photomaker"][0][0])
from .style_template import styles

class PhotoMakerLoaderPlus:
def __init__(self):
self.loaded_lora = None
self.loaded_clipvision = None

@classmethod
def INPUT_TYPES(s):
return {"required": { "photomaker_model_name": (folder_paths.get_filename_list("photomaker"), ),
}}
RETURN_TYPES = ("PHOTOMAKER",)
return {"required": {
"photomaker_model_name": (folder_paths.get_filename_list("photomaker"), ),
},
}
RETURN_TYPES = ("PHOTOMAKER", )
FUNCTION = "load_photomaker_model"

CATEGORY = "PhotoMaker"

def load_photomaker_model(self, photomaker_model_name):
photomaker_model_path = folder_paths.get_full_path("photomaker", photomaker_model_name)
if 'v1' in photomaker_model_name:
photomaker_model = PhotoMakerIDEncoder()
else:
self.load_data(None, None, photomaker_model_name, 0, 0)[0]
if 'qformer_perceiver.token_norm.weight' in self.loaded_clipvision[1].keys():
photomaker_model = PhotoMakerIDEncoder_CLIPInsightfaceExtendtoken()
data = comfy.utils.load_torch_file(photomaker_model_path, safe_load=True)
if "id_encoder" in data:
data = data["id_encoder"]
photomaker_model.load_state_dict(data)
else:
photomaker_model = PhotoMakerIDEncoder()
photomaker_model.load_state_dict(self.loaded_clipvision[1])
photomaker_model.loader = self
photomaker_model.filename = photomaker_model_name
return (photomaker_model,)

def load_data(self, model, clip, name, strength_model, strength_clip):
model_lora, clip_lora = model, clip

path = folder_paths.get_full_path("photomaker", name)
lora = None
if self.loaded_lora is not None:
if self.loaded_lora[0] == path:
lora = self.loaded_lora[1]
else:
temp = self.loaded_lora
self.loaded_lora = None
del temp
temp = self.loaded_clipvision
self.loaded_clipvision = None
del temp

if lora is None:
data = comfy.utils.load_torch_file(path, safe_load=True)
clipvision = data.get("id_encoder", None)
lora = data.get("lora_weights", None)
self.loaded_lora = (path, lora)
self.loaded_clipvision = (path, clipvision)

if model is not None and (strength_model > 0 or strength_clip > 0):
model_lora, clip_lora = comfy.sd.load_lora_for_models(model, clip, lora, strength_model, strength_clip)
return (model_lora, clip_lora)

class PhotoMakerLoraLoaderPlus:
def __init__(self):
self.loaded_lora = None
self.loaded_clipvision = None

@classmethod
def INPUT_TYPES(s):
return {"required": {
"model": ("MODEL",),
"photomaker": ("PHOTOMAKER",),
"lora_strength": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01}),
},
}
RETURN_TYPES = ("MODEL", )
FUNCTION = "load_photomaker_lora"

CATEGORY = "PhotoMaker"

def load_photomaker_lora(self, model, photomaker, lora_strength):
return (photomaker.loader.load_data(model, None, photomaker.filename, lora_strength, 0)[0],)

class PhotoMakerInsightFaceLoader:
@classmethod
def INPUT_TYPES(s):
Expand Down Expand Up @@ -86,26 +136,33 @@ def INPUT_TYPES(s):
def apply_photomaker(self, clip: CLIP, photomaker: Union[PhotoMakerIDEncoder, PhotoMakerIDEncoder_CLIPInsightfaceExtendtoken], image: Tensor, trigger_word: str, text: str, insightface_opt=None):
if (num_images := len(image)) == 0:
raise ValueError("No image provided or found.")
trigger_word=trigger_word.strip()
tokens = clip.tokenize(text)
class_tokens_mask = {}
out_tokens = {}
num_tokens = getattr(photomaker, 'num_tokens', 2)
num_tokens = 1
for key, val in tokens.items():
clip_tokenizer = getattr(clip.tokenizer, f'clip_{key}', clip.tokenizer)
img_token = clip_tokenizer.tokenizer(trigger_word.strip(), truncation=False, add_special_tokens=False)["input_ids"][0] # only get the first token
img_token = clip_tokenizer.tokenizer(trigger_word, truncation=False, add_special_tokens=False)["input_ids"][0] # only get the first token
_tokens = torch.tensor([[tpy[0] for tpy in tpy_] for tpy_ in val ] , dtype=torch.int32)
_weights = torch.tensor([[tpy[1] for tpy in tpy_] for tpy_ in val] , dtype=torch.float32)
start_token = clip_tokenizer.start_token
end_token = clip_tokenizer.end_token
pad_token = clip_tokenizer.pad_token

tokens_mask = tokenize_with_trigger_word(_tokens, _weights, num_images,img_token,start_token, end_token, pad_token, return_mask=True)[0]
tokens_new, weights_new, num_trigger_tokens_processed = tokenize_with_trigger_word(_tokens, _weights, num_images,img_token,start_token, end_token, pad_token)
tokens_mask = tokenize_with_trigger_word(_tokens, _weights, num_images, num_tokens, img_token,start_token, end_token, pad_token, return_mask=True)[0]
tokens_new, weights_new, num_trigger_tokens_processed = tokenize_with_trigger_word(_tokens, _weights, num_images, num_tokens, img_token,start_token, end_token, pad_token)
token_weight_pairs = [[(tt,ww) for tt,ww in zip(x.tolist(), y.tolist())] for x,y in zip(tokens_new, weights_new)]
mask = (tokens_mask == -1).tolist()
class_tokens_mask[key] = mask
out_tokens[key] = token_weight_pairs

cond, pooled = clip.encode_from_tokens(out_tokens, return_pooled=True)
if num_trigger_tokens_processed == 0 or not trigger_word:
logging.warning("\033[33mWarning:\033[0m No trigger token found.")
return ([[cond, {"pooled_output": pooled}]],)

prompt_embeds = cond
device_orig = prompt_embeds.device
first_key = next(iter(tokens.keys()))
Expand All @@ -129,8 +186,9 @@ def apply_photomaker(self, clip: CLIP, photomaker: Union[PhotoMakerIDEncoder, Ph
pixel_values = comfy.clip_vision.clip_preprocess(image.to(photomaker.load_device)).float()

if photomaker.__class__.__name__ == 'PhotoMakerIDEncoder':
cond = photomaker(id_pixel_values=pixel_values.unsqueeze(0), prompt_embeds=cond.to(photomaker.load_device),
class_tokens_mask=torch.tensor(class_tokens_mask, dtype=torch.bool, device=photomaker.load_device))
cond = photomaker(id_pixel_values=pixel_values.unsqueeze(0),
prompt_embeds=cond.to(photomaker.load_device),
class_tokens_mask=torch.tensor(class_tokens_mask, dtype=torch.bool, device=photomaker.load_device).unsqueeze(0))
else:
if insightface_opt is None:
raise ValueError(f"InsightFace is required for PhotoMaker V2")
Expand Down Expand Up @@ -171,9 +229,6 @@ def tensor_to_pil_np(_img):

return ([[cond, {"pooled_output": pooled}]],)


from .style_template import styles

class PhotoMakerStyles:
@classmethod
def INPUT_TYPES(s):
Expand Down Expand Up @@ -262,8 +317,8 @@ def prep_images_for_clip_vision_from_path(self, path:str, interpolation:str, cro
clip_preprocess = CLIPImageProcessor(resample=resample, do_normalize=False, do_resize=do_resize)
id_pixel_values = clip_preprocess(input_id_images, return_tensors="pt").pixel_values.movedim(1,-1)
except TypeError as err:
print('[PhotoMaker]:', err)
print('[PhotoMaker]: You may need to update transformers.')
logging.warning('[PhotoMaker]:', err)
logging.warning('[PhotoMaker]: You may need to update transformers.')
input_id_images = [self.image_loader.load_image(image_path)[0] for image_path in image_path_list]
do_resize = not all(img.shape[-3:-3+2] == size for img in input_id_images)
if do_resize:
Expand All @@ -272,26 +327,21 @@ def prep_images_for_clip_vision_from_path(self, path:str, interpolation:str, cro
id_pixel_values = torch.cat(input_id_images)
return (id_pixel_values,)

# supported = False
# try:
# from comfy_extras.nodes_photomaker import PhotoMakerLoader as _PhotoMakerLoader
# supported = True
# except Exception: ...

NODE_CLASS_MAPPINGS = {
# **({} if supported else {"PhotoMakerLoader": PhotoMakerLoaderPlus}),
"PhotoMakerLoaderPlus": PhotoMakerLoaderPlus,
"PhotoMakerEncodePlus": PhotoMakerEncodePlus,
"PhotoMakerStyles": PhotoMakerStyles,
"PhotoMakerLoraLoaderPlus": PhotoMakerLoraLoaderPlus,
"PrepImagesForClipVisionFromPath": PrepImagesForClipVisionFromPath,
"PhotoMakerInsightFaceLoader": PhotoMakerInsightFaceLoader,
}

NODE_DISPLAY_NAME_MAPPINGS = {
# **({} if supported else {"PhotoMakerLoader": "Load PhotoMaker"}),
"PhotoMakerLoaderPlus": "PhotoMaker Loader Plus",
"PhotoMakerEncodePlus": "PhotoMaker Encode Plus",
"PhotoMakerStyles": "Apply PhotoMaker Style",
"PhotoMakerLoraLoaderPlus": "PhotoMaker LoRA Loader Plus",
"PrepImagesForClipVisionFromPath": "Prepare Images For CLIP Vision From Path",
"PhotoMakerInsightFaceLoader": "PhotoMaker InsightFace Loader",
}
}
Loading

0 comments on commit 13987cf

Please sign in to comment.