From 6435691bb11c5a35703720bfd2a875f24c066f86 Mon Sep 17 00:00:00 2001 From: Justin Maier Date: Sun, 9 Oct 2022 19:26:52 -0600 Subject: [PATCH 01/56] Add "Scale to" option to Extras --- javascript/ui.js | 3 ++- modules/extras.py | 28 +++++++++++++++++++++++----- modules/ui.py | 38 +++++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/javascript/ui.js b/javascript/ui.js index b1053201..4100944e 100644 --- a/javascript/ui.js +++ b/javascript/ui.js @@ -101,7 +101,8 @@ function create_tab_index_args(tabId, args){ } function get_extras_tab_index(){ - return create_tab_index_args('mode_extras', arguments) + const [,,...args] = [...arguments] + return [get_tab_index('mode_extras'), get_tab_index('extras_resize_mode'), ...args] } function create_submit_args(args){ diff --git a/modules/extras.py b/modules/extras.py index 41e8612c..83ca7049 100644 --- a/modules/extras.py +++ b/modules/extras.py @@ -1,3 +1,4 @@ +import math import os import numpy as np @@ -19,7 +20,7 @@ import gradio as gr cached_images = {} -def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility): +def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility): devices.torch_gc() imageArr = [] @@ -67,8 +68,23 @@ def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_v info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n" image = res + if resize_mode == 1: + upscaling_resize = max(upscaling_resize_w/image.width, upscaling_resize_h/image.height) + crop_info = " (crop)" if upscaling_crop else "" + info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n" + + def crop_upscaled_center(image, resize_w, resize_h): + left = int(math.ceil((image.width - resize_w) / 2)) + right = image.width - int(math.floor((image.width - resize_w) / 2)) + top = int(math.ceil((image.height - resize_h) / 2)) + bottom = image.height - int(math.floor((image.height - resize_h) / 2)) + + image = image.crop((left, top, right, bottom)) + return image + + if upscaling_resize != 1.0: - def upscale(image, scaler_index, resize): + def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop): small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10)) pixels = tuple(np.array(small).flatten().tolist()) key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight) + pixels @@ -77,15 +93,17 @@ def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_v if c is None: upscaler = shared.sd_upscalers[scaler_index] c = upscaler.scaler.upscale(image, resize, upscaler.data_path) + if mode == 1 and crop: + c = crop_upscaled_center(c, resize_w, resize_h) cached_images[key] = c return c info += f"Upscale: {round(upscaling_resize, 3)}, model:{shared.sd_upscalers[extras_upscaler_1].name}\n" - res = upscale(image, extras_upscaler_1, upscaling_resize) + res = upscale(image, extras_upscaler_1, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop) if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0: - res2 = upscale(image, extras_upscaler_2, upscaling_resize) + res2 = upscale(image, extras_upscaler_2, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop) info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {round(extras_upscaler_2_visibility, 3)}, model:{shared.sd_upscalers[extras_upscaler_2].name}\n" res = Image.blend(res, res2, extras_upscaler_2_visibility) @@ -190,7 +208,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, interp_method, int theta_0[key] = theta_func(theta_0[key], theta_1[key], (float(1.0) - interp_amount)) # Need to reverse the interp_amount to match the desired mix ration in the merged checkpoint if save_as_half: theta_0[key] = theta_0[key].half() - + for key in theta_1.keys(): if 'model' in key and key not in theta_0: theta_0[key] = theta_1[key] diff --git a/modules/ui.py b/modules/ui.py index 2231a8ed..4bb2892b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -101,7 +101,7 @@ def send_gradio_gallery_to_image(x): def save_files(js_data, images, do_make_zip, index): - import csv + import csv filenames = [] fullfns = [] @@ -551,7 +551,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False) - + with gr.Row(): download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False) @@ -739,7 +739,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Row(): do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False) - + with gr.Row(): download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False) @@ -903,7 +903,15 @@ def create_ui(wrap_gradio_gpu_call): with gr.TabItem('Batch Process'): image_batch = gr.File(label="Batch Process", file_count="multiple", interactive=True, type="file") - upscaling_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Resize", value=2) + with gr.Tabs(elem_id="extras_resize_mode"): + with gr.TabItem('Scale by'): + upscaling_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Resize", value=2) + with gr.TabItem('Scale to'): + with gr.Group(): + with gr.Row(): + upscaling_resize_w = gr.Number(label="Width", value=512) + upscaling_resize_h = gr.Number(label="Height", value=512) + upscaling_crop = gr.Checkbox(label='Crop to fit', value=True) with gr.Group(): extras_upscaler_1 = gr.Radio(label='Upscaler 1', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index") @@ -934,6 +942,7 @@ def create_ui(wrap_gradio_gpu_call): fn=wrap_gradio_gpu_call(modules.extras.run_extras), _js="get_extras_tab_index", inputs=[ + dummy_component, dummy_component, extras_image, image_batch, @@ -941,6 +950,9 @@ def create_ui(wrap_gradio_gpu_call): codeformer_visibility, codeformer_weight, upscaling_resize, + upscaling_resize_w, + upscaling_resize_h, + upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, @@ -951,14 +963,14 @@ def create_ui(wrap_gradio_gpu_call): html_info, ] ) - + extras_send_to_img2img.click( fn=lambda x: image_from_url_text(x), _js="extract_image_from_gallery_img2img", inputs=[result_images], outputs=[init_img], ) - + extras_send_to_inpaint.click( fn=lambda x: image_from_url_text(x), _js="extract_image_from_gallery_img2img", @@ -1286,7 +1298,7 @@ Requested path was: {f} outputs=[], _js='function(){restart_reload()}' ) - + if column is not None: column.__exit__() @@ -1318,12 +1330,12 @@ Requested path was: {f} component_dict[k] = component settings_interface.gradio_ref = demo - + with gr.Tabs() as tabs: for interface, label, ifid in interfaces: with gr.TabItem(label, id=ifid): interface.render() - + if os.path.exists(os.path.join(script_path, "notification.mp3")): audio_notification = gr.Audio(interactive=False, value=os.path.join(script_path, "notification.mp3"), elem_id="audio_notification", visible=False) @@ -1456,10 +1468,10 @@ Requested path was: {f} if getattr(obj,'custom_script_source',None) is not None: key = 'customscript/' + obj.custom_script_source + '/' + key - + if getattr(obj, 'do_not_save_to_config', False): return - + saved_value = ui_settings.get(key, None) if saved_value is None: ui_settings[key] = getattr(obj, field) @@ -1483,10 +1495,10 @@ Requested path was: {f} if type(x) == gr.Textbox: apply_field(x, 'value') - + if type(x) == gr.Number: apply_field(x, 'value') - + visit(txt2img_interface, loadsave, "txt2img") visit(img2img_interface, loadsave, "img2img") visit(extras_interface, loadsave, "extras") From 1f92336be768d235c18a82acb2195b7135101ae7 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Sun, 9 Oct 2022 23:58:18 -0500 Subject: [PATCH 02/56] refactored the deepbooru module to improve speed on running multiple interogations in a row. Added the option to generate deepbooru tags for textual inversion preproccessing. --- modules/deepbooru.py | 84 +++++++++++++++++++------ modules/textual_inversion/preprocess.py | 22 ++++++- modules/ui.py | 52 ++++++++++----- 3 files changed, 122 insertions(+), 36 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 7e3c0618..cee4a3b4 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,21 +1,74 @@ import os.path from concurrent.futures import ProcessPoolExecutor -from multiprocessing import get_context +import multiprocessing -def _load_tf_and_return_tags(pil_image, threshold): +def get_deepbooru_tags(pil_image, threshold=0.5): + """ + This method is for running only one image at a time for simple use. Used to the img2img interrogate. + """ + from modules import shared # prevents circular reference + create_deepbooru_process(threshold) + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(pil_image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + release_process() + return ret + + +def deepbooru_process(queue, deepbooru_process_return, threshold): + model, tags = get_deepbooru_tags_model() + while True: # while process is running, keep monitoring queue for new image + pil_image = queue.get() + if pil_image == "QUIT": + break + else: + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold) + + +def create_deepbooru_process(threshold=0.5): + """ + Creates deepbooru process. A queue is created to send images into the process. This enables multiple images + to be processed in a row without reloading the model or creating a new process. To return the data, a shared + dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned + to the dictionary and the method adding the image to the queue should wait for this value to be updated with + the tags. + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_manager = multiprocessing.Manager() + shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() + shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold)) + shared.deepbooru_process.start() + + +def release_process(): + """ + Stops the deepbooru process to return used memory + """ + from modules import shared # prevents circular reference + shared.deepbooru_process_queue.put("QUIT") + shared.deepbooru_process.join() + shared.deepbooru_process_queue = None + shared.deepbooru_process = None + shared.deepbooru_process_return = None + shared.deepbooru_process_manager = None + +def get_deepbooru_tags_model(): import deepdanbooru as dd import tensorflow as tf import numpy as np - this_folder = os.path.dirname(__file__) model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru')) if not os.path.exists(os.path.join(model_path, 'project.json')): # there is no point importing these every time import zipfile from basicsr.utils.download_util import load_file_from_url - load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", - model_path) + load_file_from_url( + r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip", + model_path) with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref: zip_ref.extractall(model_path) os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip")) @@ -24,7 +77,13 @@ def _load_tf_and_return_tags(pil_image, threshold): model = dd.project.load_model_from_project( model_path, compile_model=True ) + return model, tags + +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): + import deepdanbooru as dd + import tensorflow as tf + import numpy as np width = model.input_shape[2] height = model.input_shape[1] image = np.array(pil_image) @@ -57,17 +116,4 @@ def _load_tf_and_return_tags(pil_image, threshold): print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') - - -def subprocess_init_no_cuda(): - import os - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - - -def get_deepbooru_tags(pil_image, threshold=0.5): - context = get_context('spawn') - with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor: - f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, ) - ret = f.result() # will rethrow any exceptions - return ret \ No newline at end of file + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') \ No newline at end of file diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..9f63c9a4 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -3,11 +3,14 @@ from PIL import Image, ImageOps import platform import sys import tqdm +import time from modules import shared, images +from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + import modules.deepbooru as deepbooru - -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): +def preprocess(process_src, process_dst, process_flip, process_split, process_caption, process_caption_deepbooru=False): size = 512 src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -24,10 +27,21 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.load() + if process_caption_deepbooru: + deepbooru.create_deepbooru_process() + def save_pic_with_caption(image, index): if process_caption: caption = "-" + shared.interrogator.generate_caption(image) caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + elif process_caption_deepbooru: + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = "-" + shared.deepbooru_process_return["value"] + caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + shared.deepbooru_process_return["value"] = -1 else: caption = filename caption = os.path.splitext(caption)[0] @@ -79,6 +93,10 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.send_blip_to_ram() + if process_caption_deepbooru: + deepbooru.release_process() + + def sanitize_caption(base_path, original_caption, suffix): operating_system = platform.system().lower() if (operating_system == "windows"): diff --git a/modules/ui.py b/modules/ui.py index 2231a8ed..179e3a83 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1034,6 +1034,9 @@ def create_ui(wrap_gradio_gpu_call): process_flip = gr.Checkbox(label='Create flipped copies') process_split = gr.Checkbox(label='Split oversized images into two') process_caption = gr.Checkbox(label='Use BLIP caption as filename') + if cmd_opts.deepdanbooru: + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') + with gr.Row(): with gr.Column(scale=3): @@ -1086,21 +1089,40 @@ def create_ui(wrap_gradio_gpu_call): ] ) - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) + if cmd_opts.deepdanbooru: + # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + process_caption_deepbooru, + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) + else: + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + ], + outputs=[ + ti_output, + ti_outcome, + ], + ) train_embedding.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]), From 3110f895b2718a3a25aae419fdf5c87c177ec9f4 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 17:07:46 +0900 Subject: [PATCH 03/56] Textual Inversion: Added custom training image size and number of repeats per input image in a single epoch --- modules/textual_inversion/dataset.py | 6 +++--- modules/textual_inversion/preprocess.py | 4 ++-- modules/textual_inversion/textual_inversion.py | 15 ++++++++++++--- modules/ui.py | 8 +++++++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 7c44ea5b..acc4ce59 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -15,13 +15,13 @@ re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, size=None, repeats=100, flip_p=0.5, placeholder_token="*", width=512, height=512, model=None, device=None, template_file=None): + def __init__(self, data_root, size, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): self.placeholder_token = placeholder_token self.size = size - self.width = width - self.height = height + self.width = size + self.height = size self.flip = transforms.RandomHorizontalFlip(p=flip_p) self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..b3de6fd7 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,8 +7,8 @@ import tqdm from modules import shared, images -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): - size = 512 +def preprocess(process_src, process_dst, process_size, process_flip, process_split, process_caption): + size = process_size src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index cd9f3498..e34dc2e8 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,6 +6,7 @@ import torch import tqdm import html import datetime +import math from modules import shared, devices, sd_hijack, processing, sd_models @@ -156,7 +157,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_size, steps, num_repeats, create_image_every, save_embedding_every, template_file): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -182,7 +183,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=512, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=training_size, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -200,6 +201,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, if ititial_step > steps: return embedding, filename + tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) + epoch_len = (tr_img_len * num_repeats) + tr_img_len + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step @@ -223,7 +227,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, loss.backward() optimizer.step() - pbar.set_description(f"loss: {losses.mean():.7f}") + epoch_num = math.floor(embedding.step / epoch_len) + epoch_step = embedding.step - (epoch_num * epoch_len) + + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') @@ -236,6 +243,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, sd_model=shared.sd_model, prompt=text, steps=20, + height=training_size, + width=training_size, do_not_save_grid=True, do_not_save_samples=True, ) diff --git a/modules/ui.py b/modules/ui.py index 2231a8ed..f821fd8d 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1029,6 +1029,7 @@ def create_ui(wrap_gradio_gpu_call): process_src = gr.Textbox(label='Source directory') process_dst = gr.Textbox(label='Destination directory') + process_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Size (width and height)", value=512) with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') @@ -1043,13 +1044,15 @@ def create_ui(wrap_gradio_gpu_call): run_preprocess = gr.Button(value="Preprocess", variant='primary') with gr.Group(): - gr.HTML(value="

Train an embedding; must specify a directory with a set of 512x512 images

") + gr.HTML(value="

Train an embedding; must specify a directory with a set of 1:1 ratio images

") train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())) learn_rate = gr.Number(label='Learning rate', value=5.0e-03) dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion") template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt")) + training_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Size (width and height)", value=512) steps = gr.Number(label='Max steps', value=100000, precision=0) + num_repeats = gr.Number(label='Number of repeats for a single input image per epoch', value=100, precision=0) create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0) save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0) @@ -1092,6 +1095,7 @@ def create_ui(wrap_gradio_gpu_call): inputs=[ process_src, process_dst, + process_size, process_flip, process_split, process_caption, @@ -1110,7 +1114,9 @@ def create_ui(wrap_gradio_gpu_call): learn_rate, dataset_directory, log_directory, + training_size, steps, + num_repeats, create_image_every, save_embedding_every, template_file, From 8ec069e64df48f8f202f8b93a08e91b69448eb39 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 03:23:24 -0500 Subject: [PATCH 04/56] removed duplicate run_preprocess.click by creating run_preprocess_inputs list and appending deepbooru variable to input list if in scope --- modules/ui.py | 49 +++++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 179e3a83..22ca74c2 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1089,40 +1089,25 @@ def create_ui(wrap_gradio_gpu_call): ] ) + run_preprocess_inputs = [ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + ] if cmd_opts.deepdanbooru: # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - process_caption_deepbooru, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) - else: - run_preprocess.click( - fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), - _js="start_training_textual_inversion", - inputs=[ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ], - outputs=[ - ti_output, - ti_outcome, - ], - ) + run_preprocess_inputs.append(process_caption_deepbooru) + run_preprocess.click( + fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), + _js="start_training_textual_inversion", + inputs=run_preprocess_inputs, + outputs=[ + ti_output, + ti_outcome, + ], + ) train_embedding.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]), From 4ee7519fc2e459ce8eff1f61f1655afba393357c Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 17:31:33 +0900 Subject: [PATCH 05/56] Fixed progress bar output for epoch --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e34dc2e8..769682ea 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -228,7 +228,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini optimizer.step() epoch_num = math.floor(embedding.step / epoch_len) - epoch_step = embedding.step - (epoch_num * epoch_len) + epoch_step = embedding.step - (epoch_num * epoch_len) + 1 pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") From 2f94331df2cb1181439adecc28cfd758049f6501 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 03:34:00 -0500 Subject: [PATCH 06/56] removed change in last commit, simplified to adding the visible argument to process_caption_deepbooru and it set to False if deepdanbooru argument is not set --- modules/ui.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 22ca74c2..f8adafb3 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1036,7 +1036,8 @@ def create_ui(wrap_gradio_gpu_call): process_caption = gr.Checkbox(label='Use BLIP caption as filename') if cmd_opts.deepdanbooru: process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename') - + else: + process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename', visible=False) with gr.Row(): with gr.Column(scale=3): @@ -1089,20 +1090,17 @@ def create_ui(wrap_gradio_gpu_call): ] ) - run_preprocess_inputs = [ - process_src, - process_dst, - process_flip, - process_split, - process_caption, - ] - if cmd_opts.deepdanbooru: - # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled - run_preprocess_inputs.append(process_caption_deepbooru) run_preprocess.click( fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]), _js="start_training_textual_inversion", - inputs=run_preprocess_inputs, + inputs=[ + process_src, + process_dst, + process_flip, + process_split, + process_caption, + process_caption_deepbooru + ], outputs=[ ti_output, ti_outcome, From 04c745ea4f81518999927fee5f78500560c25e29 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 22:35:35 +0900 Subject: [PATCH 07/56] Custom Width and Height --- modules/textual_inversion/dataset.py | 7 +++---- modules/textual_inversion/preprocess.py | 19 ++++++++++--------- .../textual_inversion/textual_inversion.py | 11 +++++------ modules/ui.py | 12 ++++++++---- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index acc4ce59..bcf772d2 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -15,13 +15,12 @@ re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, size, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): self.placeholder_token = placeholder_token - self.size = size - self.width = size - self.height = size + self.width = width + self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index b3de6fd7..d7efdef2 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,8 +7,9 @@ import tqdm from modules import shared, images -def preprocess(process_src, process_dst, process_size, process_flip, process_split, process_caption): - size = process_size +def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption): + width = process_width + height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -55,23 +56,23 @@ def preprocess(process_src, process_dst, process_size, process_flip, process_spl is_wide = ratio < 1 / 1.35 if process_split and is_tall: - img = img.resize((size, size * img.height // img.width)) + img = img.resize((width, height * img.height // img.width)) - top = img.crop((0, 0, size, size)) + top = img.crop((0, 0, width, height)) save_pic(top, index) - bot = img.crop((0, img.height - size, size, img.height)) + bot = img.crop((0, img.height - height, width, img.height)) save_pic(bot, index) elif process_split and is_wide: - img = img.resize((size * img.width // img.height, size)) + img = img.resize((width * img.width // img.height, height)) - left = img.crop((0, 0, size, size)) + left = img.crop((0, 0, width, height)) save_pic(left, index) - right = img.crop((img.width - size, 0, img.width, size)) + right = img.crop((img.width - width, 0, img.width, height)) save_pic(right, index) else: - img = images.resize_image(1, img, size, size) + img = images.resize_image(1, img, width, height) save_pic(img, index) shared.state.nextjob() diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 769682ea..5965c5a0 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,7 +6,6 @@ import torch import tqdm import html import datetime -import math from modules import shared, devices, sd_hijack, processing, sd_models @@ -157,7 +156,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_size, steps, num_repeats, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -183,7 +182,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=training_size, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -227,7 +226,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini loss.backward() optimizer.step() - epoch_num = math.floor(embedding.step / epoch_len) + epoch_num = embedding.step // epoch_len epoch_step = embedding.step - (epoch_num * epoch_len) + 1 pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") @@ -243,8 +242,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini sd_model=shared.sd_model, prompt=text, steps=20, - height=training_size, - width=training_size, + height=training_height, + width=training_width, do_not_save_grid=True, do_not_save_samples=True, ) diff --git a/modules/ui.py b/modules/ui.py index f821fd8d..8c06ad7c 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1029,7 +1029,8 @@ def create_ui(wrap_gradio_gpu_call): process_src = gr.Textbox(label='Source directory') process_dst = gr.Textbox(label='Destination directory') - process_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Size (width and height)", value=512) + process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + process_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) with gr.Row(): process_flip = gr.Checkbox(label='Create flipped copies') @@ -1050,7 +1051,8 @@ def create_ui(wrap_gradio_gpu_call): dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion") template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt")) - training_size = gr.Slider(minimum=64, maximum=2048, step=64, label="Size (width and height)", value=512) + training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) + training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) steps = gr.Number(label='Max steps', value=100000, precision=0) num_repeats = gr.Number(label='Number of repeats for a single input image per epoch', value=100, precision=0) create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0) @@ -1095,7 +1097,8 @@ def create_ui(wrap_gradio_gpu_call): inputs=[ process_src, process_dst, - process_size, + process_width, + process_height, process_flip, process_split, process_caption, @@ -1114,7 +1117,8 @@ def create_ui(wrap_gradio_gpu_call): learn_rate, dataset_directory, log_directory, - training_size, + training_width, + training_height, steps, num_repeats, create_image_every, From 1d64976dbc5a0f3124567b91fadd5014a9d93c5f Mon Sep 17 00:00:00 2001 From: Justin Maier Date: Mon, 10 Oct 2022 12:04:21 -0600 Subject: [PATCH 08/56] Simplify crop logic --- modules/extras.py | 14 +++----------- modules/ui.py | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/modules/extras.py b/modules/extras.py index 83ca7049..b24d7de3 100644 --- a/modules/extras.py +++ b/modules/extras.py @@ -73,16 +73,6 @@ def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, crop_info = " (crop)" if upscaling_crop else "" info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n" - def crop_upscaled_center(image, resize_w, resize_h): - left = int(math.ceil((image.width - resize_w) / 2)) - right = image.width - int(math.floor((image.width - resize_w) / 2)) - top = int(math.ceil((image.height - resize_h) / 2)) - bottom = image.height - int(math.floor((image.height - resize_h) / 2)) - - image = image.crop((left, top, right, bottom)) - return image - - if upscaling_resize != 1.0: def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop): small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10)) @@ -94,7 +84,9 @@ def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, upscaler = shared.sd_upscalers[scaler_index] c = upscaler.scaler.upscale(image, resize, upscaler.data_path) if mode == 1 and crop: - c = crop_upscaled_center(c, resize_w, resize_h) + cropped = Image.new("RGB", (resize_w, resize_h)) + cropped.paste(c, box=(resize_w // 2 - c.width // 2, resize_h // 2 - c.height // 2)) + c = cropped cached_images[key] = c return c diff --git a/modules/ui.py b/modules/ui.py index 4bb2892b..1aabe18d 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -909,8 +909,8 @@ def create_ui(wrap_gradio_gpu_call): with gr.TabItem('Scale to'): with gr.Group(): with gr.Row(): - upscaling_resize_w = gr.Number(label="Width", value=512) - upscaling_resize_h = gr.Number(label="Height", value=512) + upscaling_resize_w = gr.Number(label="Width", value=512, precision=0) + upscaling_resize_h = gr.Number(label="Height", value=512, precision=0) upscaling_crop = gr.Checkbox(label='Crop to fit', value=True) with gr.Group(): From bc3e183b739913e7be91213a256f038b10eb71e9 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 04:30:13 +0900 Subject: [PATCH 09/56] Textual Inversion: Preprocess and Training will only pick-up image files --- modules/textual_inversion/dataset.py | 3 ++- modules/textual_inversion/preprocess.py | 3 ++- modules/textual_inversion/textual_inversion.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index bcf772d2..d4baf066 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -22,6 +22,7 @@ class PersonalizedBase(Dataset): self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) + self.extns = [".jpg",".jpeg",".png"] self.dataset = [] @@ -32,7 +33,7 @@ class PersonalizedBase(Dataset): assert data_root, 'dataset directory not specified' - self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in self.extns] print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): image = Image.open(path) diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index d7efdef2..b6c78cf8 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -12,12 +12,13 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) + extns = [".jpg",".jpeg",".png"] assert src != dst, 'same directory specified as source and destination' os.makedirs(dst, exist_ok=True) - files = os.listdir(src) + files = [i for i in os.listdir(src) if os.path.splitext(i.casefold())[1] in extns] shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 5965c5a0..45397be9 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -161,6 +161,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = "Initializing textual inversion training..." shared.state.job_count = steps + extns = [".jpg",".jpeg",".png"] filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') @@ -200,7 +201,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) + tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in extns]) epoch_len = (tr_img_len * num_repeats) + tr_img_len pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) From 2536ecbb1790da2af0d61b6a26f38732cba665cd Mon Sep 17 00:00:00 2001 From: Fampai <> Date: Mon, 10 Oct 2022 17:10:29 -0400 Subject: [PATCH 10/56] Refactored learning rate code --- .../textual_inversion/textual_inversion.py | 51 +++++++++++++++++-- modules/ui.py | 2 +- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 5965c5a0..c64a4598 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -189,8 +189,6 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini embedding = hijack.embedding_db.word_embeddings[embedding_name] embedding.vec.requires_grad = True - optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) - losses = torch.zeros((32,)) last_saved_file = "" @@ -203,12 +201,24 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) epoch_len = (tr_img_len * num_repeats) + tr_img_len + scheduleIter = iter(LearnSchedule(learn_rate, steps, ititial_step)) + (learn_rate, end_step) = next(scheduleIter) + print(f'Training at rate of {learn_rate} until step {end_step}') + + optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step - if embedding.step > steps: - break + if embedding.step > end_step: + try: + (learn_rate, end_step) = next(scheduleIter) + except: + break + tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') + for pg in optimizer.param_groups: + pg['lr'] = learn_rate if shared.state.interrupted: break @@ -277,3 +287,36 @@ Last saved image: {html.escape(last_saved_image)}
return embedding, filename +class LearnSchedule: + def __init__(self, learn_rate, max_steps, cur_step=0): + pairs = learn_rate.split(',') + self.rates = [] + self.it = 0 + self.maxit = 0 + for i, pair in enumerate(pairs): + tmp = pair.split(':') + if len(tmp) == 2: + step = int(tmp[1]) + if step > cur_step: + self.rates.append((float(tmp[0]), min(step, max_steps))) + self.maxit += 1 + if step > max_steps: + return + elif step == -1: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + else: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + + def __iter__(self): + return self + + def __next__(self): + if self.it < self.maxit: + self.it += 1 + return self.rates[self.it - 1] + else: + raise StopIteration diff --git a/modules/ui.py b/modules/ui.py index 8c06ad7c..c9e8355b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1047,7 +1047,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Group(): gr.HTML(value="

Train an embedding; must specify a directory with a set of 1:1 ratio images

") train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())) - learn_rate = gr.Number(label='Learning rate', value=5.0e-03) + learn_rate = gr.Textbox(label='Learning rate', placeholder="Learning rate", value = "5.0e-03") dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion") template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt")) From 907a88b2d0be320575c2129d8d6a1d4f3a68f9eb Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 06:33:08 +0900 Subject: [PATCH 11/56] Added .webp .bmp --- modules/textual_inversion/dataset.py | 2 +- modules/textual_inversion/preprocess.py | 2 +- modules/textual_inversion/textual_inversion.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index d4baf066..0dc54fb7 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -22,7 +22,7 @@ class PersonalizedBase(Dataset): self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) - self.extns = [".jpg",".jpeg",".png"] + self.extns = [".jpg",".jpeg",".png",".webp",".bmp"] self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index b6c78cf8..8290abe8 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -12,7 +12,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) - extns = [".jpg",".jpeg",".png"] + extns = [".jpg",".jpeg",".png",".webp",".bmp"] assert src != dst, 'same directory specified as source and destination' diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index a03b299c..33c923d1 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -161,7 +161,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = "Initializing textual inversion training..." shared.state.job_count = steps - extns = [".jpg",".jpeg",".png"] + extns = [".jpg",".jpeg",".png",".webp",".bmp"] filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') From a1a05ad2d13d0b995dbf8ecead6315f17837ef81 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 16:47:58 -0500 Subject: [PATCH 12/56] import time missing, added to deepbooru fixxing error on get_deepbooru_tags --- modules/deepbooru.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index cee4a3b4..12555b2e 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -1,6 +1,7 @@ import os.path from concurrent.futures import ProcessPoolExecutor import multiprocessing +import time def get_deepbooru_tags(pil_image, threshold=0.5): From b980e7188c671fc55b26557f097076fb5c976ba0 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 16:52:54 -0500 Subject: [PATCH 13/56] corrected tag return in get_deepbooru_tags --- modules/deepbooru.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 12555b2e..ebdba5e0 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -15,7 +15,6 @@ def get_deepbooru_tags(pil_image, threshold=0.5): while shared.deepbooru_process_return["value"] == -1: time.sleep(0.2) release_process() - return ret def deepbooru_process(queue, deepbooru_process_return, threshold): From 76ef3d75f61253516c024553335d9083d9660a8a Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 18:01:49 -0500 Subject: [PATCH 14/56] added deepbooru settings (threshold and sort by alpha or likelyhood) --- modules/deepbooru.py | 36 +++++++++++++++++++++++++----------- modules/shared.py | 6 ++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index ebdba5e0..e31e92c0 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -3,31 +3,32 @@ from concurrent.futures import ProcessPoolExecutor import multiprocessing import time - -def get_deepbooru_tags(pil_image, threshold=0.5): +def get_deepbooru_tags(pil_image): """ This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(threshold) + create_deepbooru_process(shared.opts.deepbooru_threshold, shared.opts.deepbooru_sort_alpha) shared.deepbooru_process_return["value"] = -1 shared.deepbooru_process_queue.put(pil_image) while shared.deepbooru_process_return["value"] == -1: time.sleep(0.2) + tags = shared.deepbooru_process_return["value"] release_process() + return tags -def deepbooru_process(queue, deepbooru_process_return, threshold): +def deepbooru_process(queue, deepbooru_process_return, threshold, alpha_sort): model, tags = get_deepbooru_tags_model() while True: # while process is running, keep monitoring queue for new image pil_image = queue.get() if pil_image == "QUIT": break else: - deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold) + deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) -def create_deepbooru_process(threshold=0.5): +def create_deepbooru_process(threshold, alpha_sort): """ Creates deepbooru process. A queue is created to send images into the process. This enables multiple images to be processed in a row without reloading the model or creating a new process. To return the data, a shared @@ -40,7 +41,7 @@ def create_deepbooru_process(threshold=0.5): shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue() shared.deepbooru_process_return = shared.deepbooru_process_manager.dict() shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold)) + shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, alpha_sort)) shared.deepbooru_process.start() @@ -80,7 +81,7 @@ def get_deepbooru_tags_model(): return model, tags -def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): +def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort): import deepdanbooru as dd import tensorflow as tf import numpy as np @@ -105,15 +106,28 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5): for i, tag in enumerate(tags): result_dict[tag] = y[i] - result_tags_out = [] + + unsorted_tags_in_theshold = [] result_tags_print = [] for tag in tags: if result_dict[tag] >= threshold: if tag.startswith("rating:"): continue - result_tags_out.append(tag) + unsorted_tags_in_theshold.append((result_dict[tag], tag)) result_tags_print.append(f'{result_dict[tag]} {tag}') + # sort tags + result_tags_out = [] + sort_ndx = 0 + print(alpha_sort) + if alpha_sort: + sort_ndx = 1 + + # sort by reverse by likelihood and normal for alpha + unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort)) + for weight, tag in unsorted_tags_in_theshold: + result_tags_out.append(tag) + print('\n'.join(sorted(result_tags_print, reverse=True))) - return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') \ No newline at end of file + return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ') diff --git a/modules/shared.py b/modules/shared.py index 1995a99a..2e307809 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -261,6 +261,12 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters" 's_noise': OptionInfo(1.0, "sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), })) +if cmd_opts.deepdanbooru: + options_templates.update(options_section(('deepbooru-params', "DeepBooru parameters"), { + "deepbooru_sort_alpha": OptionInfo(True, "Sort Alphabetical", gr.Checkbox), + 'deepbooru_threshold': OptionInfo(0.5, "Threshold", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), + })) + class Options: data = None From 70b50b1dfcb0ce0f87998c994f4855014bc7e26b Mon Sep 17 00:00:00 2001 From: ClashSAN <98228077+ClashSAN@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:23:12 +0000 Subject: [PATCH 15/56] add features, credit for Composable Diffusion to readme https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/2171 --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 561eb03d..0e938768 100644 --- a/README.md +++ b/README.md @@ -28,10 +28,12 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - CodeFormer, face restoration tool as an alternative to GFPGAN - RealESRGAN, neural network upscaler - ESRGAN, neural network upscaler with a lot of third party models - - SwinIR, neural network upscaler + - SwinIR and Swin2SR([see here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/2092)), neural network upscalers - LDSR, Latent diffusion super resolution upscaling - Resizing aspect ratio options - Sampling method selection + - Adjust sampler eta values (noise multiplier) + - More advanced noise setting options - Interrupt processing at any time - 4GB video card support (also reports of 2GB working) - Correct seeds for batches @@ -67,6 +69,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2` - No token limit for prompts (original stable diffusion lets you use up to 75 tokens) - DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args) +- [xformers](https://github.com/mv-lab/swin2sr), major speed increase for select cards: (add --xformers to commandline args) ## Installation and Running Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs. @@ -116,6 +119,7 @@ The documentation was moved from this README over to the project's [wiki](https: - CodeFormer - https://github.com/sczhou/CodeFormer - ESRGAN - https://github.com/xinntao/ESRGAN - SwinIR - https://github.com/JingyunLiang/SwinIR +- Swin2SR - https://github.com/mv-lab/swin2sr - LDSR - https://github.com/Hafiidz/latent-diffusion - Ideas for optimizations - https://github.com/basujindal/stable-diffusion - Doggettx - Cross Attention layer optimization - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing. @@ -123,6 +127,8 @@ The documentation was moved from this README over to the project's [wiki](https: - Idea for SD upscale - https://github.com/jquesnelle/txt2imghd - Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot - CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator +- Idea for Composable Diffusion - https://github.com/energy-based-model/Compositional-Visual-Generation-with-Composable-Diffusion-Models-PyTorch +- xformers - https://github.com/facebookresearch/xformers +- DeepDanbooru - interrogator for anime diffusers https://github.com/KichangKim/DeepDanbooru - Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user. -- DeepDanbooru - interrogator for anime diffusors https://github.com/KichangKim/DeepDanbooru - (You) From bb932dbf9faf43ba918daa4791873078797b2a48 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 18:37:52 -0500 Subject: [PATCH 16/56] added alpha sort and threshold variables to create process method in preprocessing --- modules/textual_inversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 4a2194da..c0af729b 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process() + deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: From 1add3cff84b7e2436d69b1e97ae689281e4a7c33 Mon Sep 17 00:00:00 2001 From: papuSpartan Date: Mon, 10 Oct 2022 19:57:43 -0500 Subject: [PATCH 17/56] Refresh list of models/ckpts upon hitting restart gradio in the settings pane --- modules/ui.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/ui.py b/modules/ui.py index e8039d76..06ff118f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -39,6 +39,7 @@ import modules.generation_parameters_copypaste from modules import prompt_parser from modules.images import save_image import modules.textual_inversion.ui +from modules.sd_models import list_models # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI mimetypes.init() @@ -1290,6 +1291,9 @@ Requested path was: {f} shared.state.interrupt() settings_interface.gradio_ref.do_restart = True + # refresh models so that new models/.ckpt's show up on reload + list_models() + restart_gradio.click( fn=request_restart, inputs=[], From b2368a3bce663f19a7209d9cb38617e635ca6e3c Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 17:32:46 +0900 Subject: [PATCH 18/56] Switched to exception handling --- modules/textual_inversion/dataset.py | 10 +++++----- modules/textual_inversion/preprocess.py | 8 +++++--- modules/textual_inversion/textual_inversion.py | 18 ++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 0dc54fb7..4d006366 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -22,7 +22,6 @@ class PersonalizedBase(Dataset): self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) - self.extns = [".jpg",".jpeg",".png",".webp",".bmp"] self.dataset = [] @@ -33,12 +32,13 @@ class PersonalizedBase(Dataset): assert data_root, 'dataset directory not specified' - self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in self.extns] + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): - image = Image.open(path) - image = image.convert('RGB') - image = image.resize((self.width, self.height), PIL.Image.BICUBIC) + try: + image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC) + except Exception: + continue filename = os.path.basename(path) filename_tokens = os.path.splitext(filename)[0] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 8290abe8..1a672725 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -12,13 +12,12 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) - extns = [".jpg",".jpeg",".png",".webp",".bmp"] assert src != dst, 'same directory specified as source and destination' os.makedirs(dst, exist_ok=True) - files = [i for i in os.listdir(src) if os.path.splitext(i.casefold())[1] in extns] + files = os.listdir(src) shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) @@ -47,7 +46,10 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ for index, imagefile in enumerate(tqdm.tqdm(files)): subindex = [0] filename = os.path.join(src, imagefile) - img = Image.open(filename).convert("RGB") + try: + img = Image.open(filename).convert("RGB") + except Exception: + continue if shared.state.interrupted: break diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 33c923d1..91cde04b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -161,7 +161,6 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = "Initializing textual inversion training..." shared.state.job_count = steps - extns = [".jpg",".jpeg",".png",".webp",".bmp"] filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') @@ -201,10 +200,6 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in extns]) - - epoch_len = (tr_img_len * num_repeats) + tr_img_len - pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step @@ -228,10 +223,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini loss.backward() optimizer.step() - epoch_num = embedding.step // epoch_len - epoch_step = embedding.step - (epoch_num * epoch_len) + 1 + epoch_num = embedding.step // len(ds) + epoch_step = embedding.step - (epoch_num * len(ds)) + 1 - pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{len(ds)}]loss: {losses.mean():.7f}") if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') @@ -243,9 +238,12 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, prompt=text, - steps=20, - height=training_height, + steps=28, + height=768, width=training_width, + negative_prompt="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts,signature, watermark, username, blurry, artist name", + cfg_scale=7.0, + sampler_index=0, do_not_save_grid=True, do_not_save_samples=True, ) From 8bacbca0a1ab9aabcb0ad0cbf070e0006991e98a Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 17:35:09 +0900 Subject: [PATCH 19/56] Removed my local edits to checkpoint image generation --- modules/textual_inversion/textual_inversion.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 91cde04b..e9ff80c2 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -238,12 +238,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, prompt=text, - steps=28, - height=768, + steps=20, + height=training_height, width=training_width, - negative_prompt="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts,signature, watermark, username, blurry, artist name", - cfg_scale=7.0, - sampler_index=0, do_not_save_grid=True, do_not_save_samples=True, ) From 5766ce21abc1986c94d8bd3279b6f4d5205ba984 Mon Sep 17 00:00:00 2001 From: ClashSAN <98228077+ClashSAN@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:20:03 +0000 Subject: [PATCH 20/56] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0e938768..a10faa01 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2` - No token limit for prompts (original stable diffusion lets you use up to 75 tokens) - DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args) -- [xformers](https://github.com/mv-lab/swin2sr), major speed increase for select cards: (add --xformers to commandline args) +- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args) ## Installation and Running Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs. From d01a2d01560b31937df1f3433d210c18f97d32fa Mon Sep 17 00:00:00 2001 From: papuSpartan Date: Tue, 11 Oct 2022 08:03:31 -0500 Subject: [PATCH 21/56] move list refresh to webui.py and add stdout indicating it's doing so --- modules/ui.py | 3 --- webui.py | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/ui.py b/modules/ui.py index 06ff118f..ae9317a3 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -39,7 +39,6 @@ import modules.generation_parameters_copypaste from modules import prompt_parser from modules.images import save_image import modules.textual_inversion.ui -from modules.sd_models import list_models # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI mimetypes.init() @@ -1291,8 +1290,6 @@ Requested path was: {f} shared.state.interrupt() settings_interface.gradio_ref.do_restart = True - # refresh models so that new models/.ckpt's show up on reload - list_models() restart_gradio.click( fn=request_restart, diff --git a/webui.py b/webui.py index 270584f7..94098c4c 100644 --- a/webui.py +++ b/webui.py @@ -124,6 +124,8 @@ def webui(): modules.scripts.reload_scripts(os.path.join(script_path, "scripts")) print('Reloading modules: modules.ui') importlib.reload(modules.ui) + print('Refreshing Model List') + modules.sd_models.list_models() print('Restarting Gradio') From 66b7d7584f0b44ce1316425808c27ca7df38293c Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 17:03:00 +0300 Subject: [PATCH 22/56] become even stricter with pickles no pickle shall pass thank you again, RyotaK --- modules/safe.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/modules/safe.py b/modules/safe.py index 05917463..20be16a5 100644 --- a/modules/safe.py +++ b/modules/safe.py @@ -10,6 +10,7 @@ import torch import numpy import _codecs import zipfile +import re # PyTorch 1.13 and later have _TypedStorage renamed to TypedStorage @@ -54,11 +55,27 @@ class RestrictedUnpickler(pickle.Unpickler): raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden") +allowed_zip_names = ["archive/data.pkl", "archive/version"] +allowed_zip_names_re = re.compile(r"^archive/data/\d+$") + + +def check_zip_filenames(filename, names): + for name in names: + if name in allowed_zip_names: + continue + if allowed_zip_names_re.match(name): + continue + + raise Exception(f"bad file inside {filename}: {name}") + + def check_pt(filename): try: # new pytorch format is a zip file with zipfile.ZipFile(filename) as z: + check_zip_filenames(filename, z.namelist()) + with z.open('archive/data.pkl') as file: unpickler = RestrictedUnpickler(file) unpickler.load() From e0ee5bf703996b33e6d97aa36e0973ceedc88503 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 17:08:03 +0300 Subject: [PATCH 23/56] add codeowners file so stop the great guys who are collaborating on the project from merging in PRs. --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000..935fedcf --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @AUTOMATIC1111 From c0484f1b986ce7acb0e3596f6089a191279f5442 Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 10 Oct 2022 22:48:54 -0400 Subject: [PATCH 24/56] Add cross-attention optimization from InvokeAI * Add cross-attention optimization from InvokeAI (~30% speed improvement on MPS) * Add command line option for it * Make it default when CUDA is unavailable --- modules/sd_hijack.py | 5 +- modules/sd_hijack_optimizations.py | 79 ++++++++++++++++++++++++++++++ modules/shared.py | 5 +- 3 files changed, 86 insertions(+), 3 deletions(-) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index f07ec041..5a1b167f 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -30,8 +30,11 @@ def apply_optimizations(): elif cmd_opts.opt_split_attention_v1: print("Applying v1 cross attention optimization.") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 + elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()): + print("Applying cross attention optimization (InvokeAI).") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()): - print("Applying cross attention optimization.") + print("Applying cross attention optimization (Doggettx).") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.cross_attention_attnblock_forward diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 3349b9c3..870226c5 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -1,6 +1,7 @@ import math import sys import traceback +import psutil import torch from torch import einsum @@ -116,6 +117,84 @@ def split_cross_attention_forward(self, x, context=None, mask=None): return self.to_out(r2) +# -- From https://github.com/invoke-ai/InvokeAI/blob/main/ldm/modules/attention.py (with hypernetworks support added) -- + +mem_total_gb = psutil.virtual_memory().total // (1 << 30) + +def einsum_op_compvis(q, k, v): + s = einsum('b i d, b j d -> b i j', q, k) + s = s.softmax(dim=-1, dtype=s.dtype) + return einsum('b i j, b j d -> b i d', s, v) + +def einsum_op_slice_0(q, k, v, slice_size): + r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + for i in range(0, q.shape[0], slice_size): + end = i + slice_size + r[i:end] = einsum_op_compvis(q[i:end], k[i:end], v[i:end]) + return r + +def einsum_op_slice_1(q, k, v, slice_size): + r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + r[:, i:end] = einsum_op_compvis(q[:, i:end], k, v) + return r + +def einsum_op_mps_v1(q, k, v): + if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096 + return einsum_op_compvis(q, k, v) + else: + slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1])) + return einsum_op_slice_1(q, k, v, slice_size) + +def einsum_op_mps_v2(q, k, v): + if mem_total_gb > 8 and q.shape[1] <= 4096: + return einsum_op_compvis(q, k, v) + else: + return einsum_op_slice_0(q, k, v, 1) + +def einsum_op_tensor_mem(q, k, v, max_tensor_mb): + size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20) + if size_mb <= max_tensor_mb: + return einsum_op_compvis(q, k, v) + div = 1 << int((size_mb - 1) / max_tensor_mb).bit_length() + if div <= q.shape[0]: + return einsum_op_slice_0(q, k, v, q.shape[0] // div) + return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1)) + +def einsum_op(q, k, v): + if q.device.type == 'mps': + if mem_total_gb >= 32: + return einsum_op_mps_v1(q, k, v) + return einsum_op_mps_v2(q, k, v) + + # Smaller slices are faster due to L2/L3/SLC caches. + # Tested on i7 with 8MB L3 cache. + return einsum_op_tensor_mem(q, k, v, 32) + +def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None): + h = self.heads + + q = self.to_q(x) + context = default(context, x) + + hypernetwork = shared.loaded_hypernetwork + hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None) + + if hypernetwork_layers is not None: + k = self.to_k(hypernetwork_layers[0](context)) * self.scale + v = self.to_v(hypernetwork_layers[1](context)) + else: + k = self.to_k(context) * self.scale + v = self.to_v(context) + del context, x + + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) + r = einsum_op(q, k, v) + return self.to_out(rearrange(r, '(b h) n d -> b n (h d)', h=h)) + +# -- End of code from https://github.com/invoke-ai/InvokeAI/blob/main/ldm/modules/attention.py -- + def xformers_attention_forward(self, x, context=None, mask=None): h = self.heads q_in = self.to_q(x) diff --git a/modules/shared.py b/modules/shared.py index 1dc2ccf2..20b45f23 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -50,9 +50,10 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers") parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work") parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator") -parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.") -parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") +parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.") +parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") +parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU as torch device for specified modules", default=[]) parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) From 98fd5cde72d5bda1620ab78416c7828fdc3dc10b Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 10 Oct 2022 23:55:48 -0400 Subject: [PATCH 25/56] Add check for psutil --- modules/sd_hijack.py | 10 ++++++++-- modules/sd_hijack_optimizations.py | 19 +++++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 5a1b167f..ac70f876 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -10,6 +10,7 @@ from torch.nn.functional import silu import modules.textual_inversion.textual_inversion from modules import prompt_parser, devices, sd_hijack_optimizations, shared from modules.shared import opts, device, cmd_opts +from modules.sd_hijack_optimizations import invokeAI_mps_available import ldm.modules.attention import ldm.modules.diffusionmodules.model @@ -31,8 +32,13 @@ def apply_optimizations(): print("Applying v1 cross attention optimization.") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()): - print("Applying cross attention optimization (InvokeAI).") - ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI + if not invokeAI_mps_available and shared.device.type == 'mps': + print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.") + print("Applying v1 cross attention optimization.") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1 + else: + print("Applying cross attention optimization (InvokeAI).") + ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()): print("Applying cross attention optimization (Doggettx).") ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 870226c5..2a4ac7e0 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -1,7 +1,7 @@ import math import sys import traceback -import psutil +import importlib import torch from torch import einsum @@ -117,9 +117,20 @@ def split_cross_attention_forward(self, x, context=None, mask=None): return self.to_out(r2) -# -- From https://github.com/invoke-ai/InvokeAI/blob/main/ldm/modules/attention.py (with hypernetworks support added) -- -mem_total_gb = psutil.virtual_memory().total // (1 << 30) +def check_for_psutil(): + try: + spec = importlib.util.find_spec('psutil') + return spec is not None + except ModuleNotFoundError: + return False + +invokeAI_mps_available = check_for_psutil() + +# -- Taken from https://github.com/invoke-ai/InvokeAI -- +if invokeAI_mps_available: + import psutil + mem_total_gb = psutil.virtual_memory().total // (1 << 30) def einsum_op_compvis(q, k, v): s = einsum('b i d, b j d -> b i j', q, k) @@ -193,7 +204,7 @@ def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None): r = einsum_op(q, k, v) return self.to_out(rearrange(r, '(b h) n d -> b n (h d)', h=h)) -# -- End of code from https://github.com/invoke-ai/InvokeAI/blob/main/ldm/modules/attention.py -- +# -- End of code from https://github.com/invoke-ai/InvokeAI -- def xformers_attention_forward(self, x, context=None, mask=None): h = self.heads From 574c8e554a5371eca2cbf344764cb241c6ec4efc Mon Sep 17 00:00:00 2001 From: brkirch Date: Tue, 11 Oct 2022 03:32:11 -0400 Subject: [PATCH 26/56] Add InvokeAI and lstein to credits, add back CUDA support --- README.md | 1 + modules/sd_hijack_optimizations.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/README.md b/README.md index a10faa01..859a91b6 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,7 @@ The documentation was moved from this README over to the project's [wiki](https: - LDSR - https://github.com/Hafiidz/latent-diffusion - Ideas for optimizations - https://github.com/basujindal/stable-diffusion - Doggettx - Cross Attention layer optimization - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing. +- InvokeAI, lstein - Cross Attention layer optimization - https://github.com/invoke-ai/InvokeAI (originally http://github.com/lstein/stable-diffusion) - Rinon Gal - Textual Inversion - https://github.com/rinongal/textual_inversion (we're not using his code, but we are using his ideas). - Idea for SD upscale - https://github.com/jquesnelle/txt2imghd - Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 2a4ac7e0..f006427f 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -173,7 +173,20 @@ def einsum_op_tensor_mem(q, k, v, max_tensor_mb): return einsum_op_slice_0(q, k, v, q.shape[0] // div) return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1)) +def einsum_op_cuda(q, k, v): + stats = torch.cuda.memory_stats(q.device) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(q.device) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + # Divide factor of safety as there's copying and fragmentation + return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) + def einsum_op(q, k, v): + if q.device.type == 'cuda': + return einsum_op_cuda(q, k, v) + if q.device.type == 'mps': if mem_total_gb >= 32: return einsum_op_mps_v1(q, k, v) From 861db783c7acfcb93cf0b5191db3d50f9a9bc531 Mon Sep 17 00:00:00 2001 From: brkirch Date: Tue, 11 Oct 2022 05:13:17 -0400 Subject: [PATCH 27/56] Use apply_hypernetwork function --- modules/sd_hijack_optimizations.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index f006427f..79405525 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -202,16 +202,10 @@ def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None): q = self.to_q(x) context = default(context, x) - hypernetwork = shared.loaded_hypernetwork - hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None) - - if hypernetwork_layers is not None: - k = self.to_k(hypernetwork_layers[0](context)) * self.scale - v = self.to_v(hypernetwork_layers[1](context)) - else: - k = self.to_k(context) * self.scale - v = self.to_v(context) - del context, x + context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context) + k = self.to_k(context_k) * self.scale + v = self.to_v(context_v) + del context, context_k, context_v, x q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) r = einsum_op(q, k, v) From 5ba23cb41f28f5856a7f64cb0d95e1e94dce90af Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 17:28:17 +0300 Subject: [PATCH 28/56] change default for XY plot's Y to Nothing. --- scripts/xy_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index cddb192a..ef431105 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -197,7 +197,7 @@ class Script(scripts.Script): x_values = gr.Textbox(label="X values", visible=False, lines=1) with gr.Row(): - y_type = gr.Dropdown(label="Y type", choices=[x.label for x in current_axis_options], value=current_axis_options[4].label, visible=False, type="index", elem_id="y_type") + y_type = gr.Dropdown(label="Y type", choices=[x.label for x in current_axis_options], value=current_axis_options[0].label, visible=False, type="index", elem_id="y_type") y_values = gr.Textbox(label="Y values", visible=False, lines=1) draw_legend = gr.Checkbox(label='Draw legend', value=True) From d682444ecc99319fbd2b142a12727501e2884ba7 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 18:04:47 +0300 Subject: [PATCH 29/56] add option to select hypernetwork modules when creating --- modules/hypernetworks/hypernetwork.py | 4 ++-- modules/hypernetworks/ui.py | 4 ++-- modules/ui.py | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index aa701bda..b081f14e 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -42,7 +42,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None): + def __init__(self, name=None, enable_sizes=None): self.filename = None self.name = name self.layers = {} @@ -50,7 +50,7 @@ class Hypernetwork: self.sd_checkpoint = None self.sd_checkpoint_name = None - for size in [320, 640, 768, 1280]: + for size in enable_sizes or [320, 640, 768, 1280]: self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size)) def weights(self): diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index e7540f41..cdddcce1 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -9,11 +9,11 @@ from modules import sd_hijack, shared from modules.hypernetworks import hypernetwork -def create_hypernetwork(name): +def create_hypernetwork(name, enable_sizes): fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") assert not os.path.exists(fn), f"file {fn} already exists" - hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name) + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes]) hypernet.save(fn) shared.reload_hypernetworks() diff --git a/modules/ui.py b/modules/ui.py index f2d16b12..14b87b92 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1037,6 +1037,7 @@ def create_ui(wrap_gradio_gpu_call): gr.HTML(value="

Create a new hypernetwork

") new_hypernetwork_name = gr.Textbox(label="Name") + new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) with gr.Row(): with gr.Column(scale=3): @@ -1114,6 +1115,7 @@ def create_ui(wrap_gradio_gpu_call): fn=modules.hypernetworks.ui.create_hypernetwork, inputs=[ new_hypernetwork_name, + new_hypernetwork_sizes, ], outputs=[ train_hypernetwork_name, From ff4ef13dd591ec52f196f344f47537695df95364 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 10:24:27 -0500 Subject: [PATCH 30/56] removed unneeded print --- modules/deepbooru.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index e31e92c0..89dcac3c 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -119,7 +119,6 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, alpha_sort) # sort tags result_tags_out = [] sort_ndx = 0 - print(alpha_sort) if alpha_sort: sort_ndx = 1 From 6d09b8d1df3a96e1380bb1650f5961781630af96 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 18:33:57 +0300 Subject: [PATCH 31/56] produce error when training with medvram/lowvram enabled --- modules/hypernetworks/ui.py | 2 ++ modules/textual_inversion/ui.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index cdddcce1..3541a388 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -25,6 +25,8 @@ def train_hypernetwork(*args): initial_hypernetwork = shared.loaded_hypernetwork + assert not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram, 'Training models with lowvram or medvram is not possible' + try: sd_hijack.undo_optimizations() diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index c57de1f9..70f47343 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -22,6 +22,9 @@ def preprocess(*args): def train_embedding(*args): + + assert not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram, 'Training models with lowvram or medvram is not possible' + try: sd_hijack.undo_optimizations() From d4ea5f4d8631f778d11efcde397e4a5b8801d43b Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 19:03:08 +0300 Subject: [PATCH 32/56] add an option to unload models during hypernetwork training to save VRAM --- modules/hypernetworks/hypernetwork.py | 25 +++++++++++----- modules/hypernetworks/ui.py | 4 ++- modules/shared.py | 4 +++ modules/textual_inversion/dataset.py | 29 +++++++++++++------ .../textual_inversion/textual_inversion.py | 2 +- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b081f14e..4700e1ec 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -175,6 +175,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt') log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name) + unload = shared.opts.unload_models_when_training if save_hypernetwork_every > 0: hypernetwork_dir = os.path.join(log_directory, "hypernetworks") @@ -188,11 +189,13 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, else: images_dir = None - cond_model = shared.sd_model.cond_stage_model - shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=1, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=1, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True) + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) hypernetwork = shared.loaded_hypernetwork weights = hypernetwork.weights() @@ -211,7 +214,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, return hypernetwork, filename pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - for i, (x, text) in pbar: + for i, (x, text, cond) in pbar: hypernetwork.step = i + ititial_step if hypernetwork.step > steps: @@ -221,11 +224,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, break with torch.autocast("cuda"): - c = cond_model([text]) - + cond = cond.to(devices.device) x = x.to(devices.device) - loss = shared.sd_model(x.unsqueeze(0), c)[0] + loss = shared.sd_model(x.unsqueeze(0), cond)[0] del x + del cond losses[hypernetwork.step % losses.shape[0]] = loss.item() @@ -244,6 +247,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, preview_text = text if preview_image_prompt == "" else preview_image_prompt + optimizer.zero_grad() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, prompt=preview_text, @@ -255,6 +262,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, processed = processing.process_images(p) image = processed.images[0] + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + shared.state.current_image = image image.save(last_saved_image) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 3541a388..c67facbb 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -5,7 +5,7 @@ import gradio as gr import modules.textual_inversion.textual_inversion import modules.textual_inversion.preprocess -from modules import sd_hijack, shared +from modules import sd_hijack, shared, devices from modules.hypernetworks import hypernetwork @@ -41,5 +41,7 @@ Hypernetwork saved to {html.escape(filename)} raise finally: shared.loaded_hypernetwork = initial_hypernetwork + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) sd_hijack.apply_optimizations() diff --git a/modules/shared.py b/modules/shared.py index 20b45f23..c1092ff7 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -228,6 +228,10 @@ options_templates.update(options_section(('system', "System"), { "multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job."), })) +options_templates.update(options_section(('training', "Training"), { + "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP form VRAM when training"), +})) + options_templates.update(options_section(('sd', "Stable Diffusion"), { "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, show_on_main_page=True), "sd_hypernetwork": OptionInfo("None", "Stable Diffusion finetune hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}), diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 4d006366..f61f40d3 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -8,14 +8,14 @@ from torchvision import transforms import random import tqdm -from modules import devices +from modules import devices, shared import re re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False): self.placeholder_token = placeholder_token @@ -32,6 +32,8 @@ class PersonalizedBase(Dataset): assert data_root, 'dataset directory not specified' + cond_model = shared.sd_model.cond_stage_model + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): @@ -53,7 +55,13 @@ class PersonalizedBase(Dataset): init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() init_latent = init_latent.to(devices.cpu) - self.dataset.append((init_latent, filename_tokens)) + if include_cond: + text = self.create_text(filename_tokens) + cond = cond_model([text]).to(devices.cpu) + else: + cond = None + + self.dataset.append((init_latent, filename_tokens, cond)) self.length = len(self.dataset) * repeats @@ -64,6 +72,12 @@ class PersonalizedBase(Dataset): def shuffle(self): self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])] + def create_text(self, filename_tokens): + text = random.choice(self.lines) + text = text.replace("[name]", self.placeholder_token) + text = text.replace("[filewords]", ' '.join(filename_tokens)) + return text + def __len__(self): return self.length @@ -72,10 +86,7 @@ class PersonalizedBase(Dataset): self.shuffle() index = self.indexes[i % len(self.indexes)] - x, filename_tokens = self.dataset[index] + x, filename_tokens, cond = self.dataset[index] - text = random.choice(self.lines) - text = text.replace("[name]", self.placeholder_token) - text = text.replace("[filewords]", ' '.join(filename_tokens)) - - return x, text + text = self.create_text(filename_tokens) + return x, text, cond diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index bb05cdc6..35f4bd9e 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -201,7 +201,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini return embedding, filename pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) - for i, (x, text) in pbar: + for i, (x, text, _) in pbar: embedding.step = i + ititial_step if embedding.step > steps: From 6a9ea5b41cf92cd9e980349bb5034439f4e7a58b Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 19:22:30 +0300 Subject: [PATCH 33/56] prevent extra modules from being saved/loaded with hypernet --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 4700e1ec..5608e799 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -50,7 +50,7 @@ class Hypernetwork: self.sd_checkpoint = None self.sd_checkpoint_name = None - for size in enable_sizes or [320, 640, 768, 1280]: + for size in enable_sizes or []: self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size)) def weights(self): From d7474a5185df2af84a93a12bc7e140d24e0fc516 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 21:10:55 +0300 Subject: [PATCH 34/56] bump gradio to 3.4.1 --- requirements.txt | 2 +- requirements_versions.txt | 2 +- style.css | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 631fe616..a0d985ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ fairscale==0.4.4 fonts font-roboto gfpgan -gradio==3.4b3 +gradio==3.4.1 invisible-watermark numpy omegaconf diff --git a/requirements_versions.txt b/requirements_versions.txt index fdff2687..2bbea40b 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -2,7 +2,7 @@ transformers==4.19.2 diffusers==0.3.0 basicsr==1.4.2 gfpgan==1.3.8 -gradio==3.4b3 +gradio==3.4.1 numpy==1.23.3 Pillow==9.2.0 realesrgan==0.3.0 diff --git a/style.css b/style.css index ecb51bb0..e6fa10b4 100644 --- a/style.css +++ b/style.css @@ -240,6 +240,7 @@ fieldset span.text-gray-500, .gr-block.gr-box span.text-gray-500, label.block s #settings fieldset span.text-gray-500, #settings .gr-block.gr-box span.text-gray-500, #settings label.block span{ position: relative; border: none; + margin-right: 8em; } .gr-panel div.flex-col div.justify-between label span{ @@ -495,3 +496,13 @@ canvas[key="mask"] { mix-blend-mode: multiply; pointer-events: none; } + + +/* gradio 3.4.1 stuff for editable scrollbar values */ +.gr-box > div > div > input.gr-text-input{ + position: absolute; + right: 0.5em; + top: -0.6em; + z-index: 200; + width: 8em; +} From 9e5f6b558072f6cdfa0f7010fa819662952fcaf1 Mon Sep 17 00:00:00 2001 From: nai-degen <92774204+nai-degen@users.noreply.github.com> Date: Sun, 9 Oct 2022 19:37:35 -0500 Subject: [PATCH 35/56] triggers 'input' event when using arrow keys to edit attention --- javascript/edit-attention.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js index 0280c603..79566a2e 100644 --- a/javascript/edit-attention.js +++ b/javascript/edit-attention.js @@ -38,4 +38,7 @@ addEventListener('keydown', (event) => { target.selectionStart = selectionStart; target.selectionEnd = selectionEnd; } + // Since we've modified a Gradio Textbox component manually, we need to simulate an `input` DOM event to ensure its + // internal Svelte data binding remains in sync. + target.dispatchEvent(new Event("input", { bubbles: true })); }); From d6fcc6b87bc00fcdecea276fe5b7c7945f7a8b14 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 22:03:05 +0300 Subject: [PATCH 36/56] apply lr schedule to hypernets --- modules/hypernetworks/hypernetwork.py | 19 ++++++-- modules/textual_inversion/learn_schedule.py | 34 ++++++++++++++ .../textual_inversion/textual_inversion.py | 44 ++----------------- modules/ui.py | 2 +- 4 files changed, 54 insertions(+), 45 deletions(-) create mode 100644 modules/textual_inversion/learn_schedule.py diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 5608e799..470659df 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -14,6 +14,7 @@ import torch from torch import einsum from einops import rearrange, repeat import modules.textual_inversion.dataset +from modules.textual_inversion.learn_schedule import LearnSchedule class HypernetworkModule(torch.nn.Module): @@ -202,8 +203,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, for weight in weights: weight.requires_grad = True - optimizer = torch.optim.AdamW(weights, lr=learn_rate) - losses = torch.zeros((32,)) last_saved_file = "" @@ -213,12 +212,24 @@ def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, if ititial_step > steps: return hypernetwork, filename + schedules = iter(LearnSchedule(learn_rate, steps, ititial_step)) + (learn_rate, end_step) = next(schedules) + print(f'Training at rate of {learn_rate} until step {end_step}') + + optimizer = torch.optim.AdamW(weights, lr=learn_rate) + pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, (x, text, cond) in pbar: hypernetwork.step = i + ititial_step - if hypernetwork.step > steps: - break + if hypernetwork.step > end_step: + try: + (learn_rate, end_step) = next(schedules) + except Exception: + break + tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') + for pg in optimizer.param_groups: + pg['lr'] = learn_rate if shared.state.interrupted: break diff --git a/modules/textual_inversion/learn_schedule.py b/modules/textual_inversion/learn_schedule.py new file mode 100644 index 00000000..db720271 --- /dev/null +++ b/modules/textual_inversion/learn_schedule.py @@ -0,0 +1,34 @@ + +class LearnSchedule: + def __init__(self, learn_rate, max_steps, cur_step=0): + pairs = learn_rate.split(',') + self.rates = [] + self.it = 0 + self.maxit = 0 + for i, pair in enumerate(pairs): + tmp = pair.split(':') + if len(tmp) == 2: + step = int(tmp[1]) + if step > cur_step: + self.rates.append((float(tmp[0]), min(step, max_steps))) + self.maxit += 1 + if step > max_steps: + return + elif step == -1: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + else: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + + def __iter__(self): + return self + + def __next__(self): + if self.it < self.maxit: + self.it += 1 + return self.rates[self.it - 1] + else: + raise StopIteration diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 47a27faf..7717837d 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -10,6 +10,7 @@ import datetime from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset +from modules.textual_inversion.learn_schedule import LearnSchedule class Embedding: @@ -198,11 +199,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) - epoch_len = (tr_img_len * num_repeats) + tr_img_len - - scheduleIter = iter(LearnSchedule(learn_rate, steps, ititial_step)) - (learn_rate, end_step) = next(scheduleIter) + schedules = iter(LearnSchedule(learn_rate, steps, ititial_step)) + (learn_rate, end_step) = next(schedules) print(f'Training at rate of {learn_rate} until step {end_step}') optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) @@ -213,7 +211,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if embedding.step > end_step: try: - (learn_rate, end_step) = next(scheduleIter) + (learn_rate, end_step) = next(schedules) except: break tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') @@ -288,37 +286,3 @@ Last saved image: {html.escape(last_saved_image)}
embedding.save(filename) return embedding, filename - -class LearnSchedule: - def __init__(self, learn_rate, max_steps, cur_step=0): - pairs = learn_rate.split(',') - self.rates = [] - self.it = 0 - self.maxit = 0 - for i, pair in enumerate(pairs): - tmp = pair.split(':') - if len(tmp) == 2: - step = int(tmp[1]) - if step > cur_step: - self.rates.append((float(tmp[0]), min(step, max_steps))) - self.maxit += 1 - if step > max_steps: - return - elif step == -1: - self.rates.append((float(tmp[0]), max_steps)) - self.maxit += 1 - return - else: - self.rates.append((float(tmp[0]), max_steps)) - self.maxit += 1 - return - - def __iter__(self): - return self - - def __next__(self): - if self.it < self.maxit: - self.it += 1 - return self.rates[self.it - 1] - else: - raise StopIteration diff --git a/modules/ui.py b/modules/ui.py index 2b688e32..1204eef7 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1070,7 +1070,7 @@ def create_ui(wrap_gradio_gpu_call): gr.HTML(value="

Train an embedding; must specify a directory with a set of 1:1 ratio images

") train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())) train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', choices=[x for x in shared.hypernetworks.keys()]) - learn_rate = gr.Textbox(label='Learning rate', placeholder="Learning rate", value = "5.0e-03") + learn_rate = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005") dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images") log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion") template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt")) From 6be32b31d181e42c639dad3451229aa7b9cfd1cf Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 23:07:09 +0300 Subject: [PATCH 37/56] reports that training with medvram is possible. --- modules/hypernetworks/ui.py | 2 +- modules/textual_inversion/ui.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index c67facbb..dfa599af 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -25,7 +25,7 @@ def train_hypernetwork(*args): initial_hypernetwork = shared.loaded_hypernetwork - assert not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram, 'Training models with lowvram or medvram is not possible' + assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible' try: sd_hijack.undo_optimizations() diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index 70f47343..36881e7a 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -23,7 +23,7 @@ def preprocess(*args): def train_embedding(*args): - assert not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram, 'Training models with lowvram or medvram is not possible' + assert not shared.cmd_opts.lowvram, 'Training models with lowvram not possible' try: sd_hijack.undo_optimizations() From f53f703aebc801c4204182d52bb1e0bef9808e1f Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 18:12:12 -0500 Subject: [PATCH 38/56] resolved conflicts, moved settings under interrogate section, settings only show if deepbooru flag is enabled --- modules/deepbooru.py | 2 +- modules/shared.py | 19 +++++++++---------- modules/textual_inversion/preprocess.py | 2 +- modules/ui.py | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/modules/deepbooru.py b/modules/deepbooru.py index 89dcac3c..29529949 100644 --- a/modules/deepbooru.py +++ b/modules/deepbooru.py @@ -8,7 +8,7 @@ def get_deepbooru_tags(pil_image): This method is for running only one image at a time for simple use. Used to the img2img interrogate. """ from modules import shared # prevents circular reference - create_deepbooru_process(shared.opts.deepbooru_threshold, shared.opts.deepbooru_sort_alpha) + create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, shared.opts.deepbooru_sort_alpha) shared.deepbooru_process_return["value"] = -1 shared.deepbooru_process_queue.put(pil_image) while shared.deepbooru_process_return["value"] == -1: diff --git a/modules/shared.py b/modules/shared.py index 817203f8..5456c477 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -248,15 +248,20 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}), })) -options_templates.update(options_section(('interrogate', "Interrogate Options"), { +interrogate_option_dictionary = { "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"), "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum description length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), - "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)"), - "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), -})) + "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)") +} + +if cmd_opts.deepdanbooru: + interrogate_option_dictionary["interrogate_deepbooru_score_threshold"] = OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}) + interrogate_option_dictionary["deepbooru_sort_alpha"] = OptionInfo(True, "Interrogate: deepbooru sort alphabetically", gr.Checkbox) + +options_templates.update(options_section(('interrogate', "Interrogate Options"), interrogate_option_dictionary)) options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), @@ -282,12 +287,6 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters" 'eta_noise_seed_delta': OptionInfo(0, "Eta noise seed delta", gr.Number, {"precision": 0}), })) -if cmd_opts.deepdanbooru: - options_templates.update(options_section(('deepbooru-params', "DeepBooru parameters"), { - "deepbooru_sort_alpha": OptionInfo(True, "Sort Alphabetical", gr.Checkbox), - 'deepbooru_threshold': OptionInfo(0.5, "Threshold", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), - })) - class Options: data = None diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index a96388d6..113cecf1 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: diff --git a/modules/ui.py b/modules/ui.py index 2891fc8c..fa45edca 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -317,7 +317,7 @@ def interrogate(image): def interrogate_deepbooru(image): - prompt = get_deepbooru_tags(image, opts.interrogate_deepbooru_score_threshold) + prompt = get_deepbooru_tags(image) return gr_show(True) if prompt is None else prompt From 6d408c06c634cc96480f055941754dcc43f781d9 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Wed, 12 Oct 2022 00:19:28 +0100 Subject: [PATCH 39/56] Prevent nans from failed float parsing from overwriting weights --- javascript/edit-attention.js | 1 + 1 file changed, 1 insertion(+) diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js index 79566a2e..3f1d2fbb 100644 --- a/javascript/edit-attention.js +++ b/javascript/edit-attention.js @@ -25,6 +25,7 @@ addEventListener('keydown', (event) => { } else { end = target.value.slice(selectionEnd + 1).indexOf(")") + 1; weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end)); + if (isNaN(weight)) return; if (event.key == minus) weight -= 0.1; if (event.key == plus) weight += 0.1; From 65b973ac4e547a325f30a05f852b161421af2041 Mon Sep 17 00:00:00 2001 From: supersteve3d <39339941+supersteve3d@users.noreply.github.com> Date: Wed, 12 Oct 2022 08:21:52 +0800 Subject: [PATCH 40/56] Update shared.py Correct typo to "Unload VAE and CLIP from VRAM when training" in settings tab. --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index c1092ff7..46bc740c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -229,7 +229,7 @@ options_templates.update(options_section(('system', "System"), { })) options_templates.update(options_section(('training', "Training"), { - "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP form VRAM when training"), + "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP from VRAM when training"), })) options_templates.update(options_section(('sd', "Stable Diffusion"), { From 6ac2ec2b78bc5fabd09cb866dd9a71061d669269 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 07:01:20 +0300 Subject: [PATCH 41/56] create dir for hypernetworks --- modules/shared.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/shared.py b/modules/shared.py index c1092ff7..e65e77f8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -86,6 +86,7 @@ parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram xformers_available = False config_filename = cmd_opts.ui_settings_file +os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True) hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir) loaded_hypernetwork = None From fec2221eeaafb50afd26ba3e109bf6f928011e69 Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Tue, 11 Oct 2022 19:29:38 -0700 Subject: [PATCH 42/56] Truncate error text to fix service lockup / stall What: * Update wrap_gradio_call to add a limit to the maximum amount of text output Why: * wrap_gradio_call currently prints out a list of the arguments provided to the failing function. * if that function is save_image, this causes the entire image to be printed to stderr * If the image is large, this can cause the service to lock up while attempting to print all the text * It is easy to generate large images using the x/y plot script * it is easy to encounter image save exceptions, including if the output directory does not exist / cannot be written to, or if the file is too big * The huge amount of log spam is confusing and not particularly helpful --- modules/ui.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/ui.py b/modules/ui.py index 1204eef7..33a49d3b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -181,8 +181,15 @@ def wrap_gradio_call(func, extra_outputs=None): try: res = list(func(*args, **kwargs)) except Exception as e: + # When printing out our debug argument list, do not print out more than a MB of text + max_debug_str_len = 131072 # (1024*1024)/8 + print("Error completing request", file=sys.stderr) - print("Arguments:", args, kwargs, file=sys.stderr) + argStr = f"Arguments: {str(args)} {str(kwargs)}" + print(argStr[:max_debug_str_len], file=sys.stderr) + if len(argStr) > max_debug_str_len: + print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) shared.state.job = "" From 336bd8703c7b4d71f2f096f303599925a30b8167 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 09:00:07 +0300 Subject: [PATCH 43/56] just add the deepdanbooru settings unconditionally --- modules/shared.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index f150e024..42e99741 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -249,20 +249,15 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}), })) -interrogate_option_dictionary = { +options_templates.update(options_section(('interrogate', "Interrogate Options"), { "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"), "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum description length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum description length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), - "interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)") -} - -if cmd_opts.deepdanbooru: - interrogate_option_dictionary["interrogate_deepbooru_score_threshold"] = OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}) - interrogate_option_dictionary["deepbooru_sort_alpha"] = OptionInfo(True, "Interrogate: deepbooru sort alphabetically", gr.Checkbox) - -options_templates.update(options_section(('interrogate', "Interrogate Options"), interrogate_option_dictionary)) + "interrogate_deepbooru_score_threshold": OptionInfo(0.5, "Interrogate: deepbooru score threshold", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), + "deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"), +})) options_templates.update(options_section(('ui', "User interface"), { "show_progressbar": OptionInfo(True, "Show progressbar"), From fd07b103aeb70a80e3641068e483475e32c9750c Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 09:00:39 +0300 Subject: [PATCH 44/56] prevent SD model from loading when running in deepdanbooru process --- webui.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/webui.py b/webui.py index ca278e94..32bcdb06 100644 --- a/webui.py +++ b/webui.py @@ -31,12 +31,7 @@ from modules.paths import script_path from modules.shared import cmd_opts import modules.hypernetworks.hypernetwork -modelloader.cleanup_models() -modules.sd_models.setup_model() -codeformer.setup_model(cmd_opts.codeformer_models_path) -gfpgan.setup_model(cmd_opts.gfpgan_models_path) -shared.face_restorers.append(modules.face_restoration.FaceRestoration()) -modelloader.load_upscalers() + queue_lock = threading.Lock() @@ -78,12 +73,19 @@ def wrap_gradio_gpu_call(func, extra_outputs=None): return modules.ui.wrap_gradio_call(f, extra_outputs=extra_outputs) -modules.scripts.load_scripts(os.path.join(script_path, "scripts")) +def initialize(): + modelloader.cleanup_models() + modules.sd_models.setup_model() + codeformer.setup_model(cmd_opts.codeformer_models_path) + gfpgan.setup_model(cmd_opts.gfpgan_models_path) + shared.face_restorers.append(modules.face_restoration.FaceRestoration()) + modelloader.load_upscalers() -shared.sd_model = modules.sd_models.load_model() -shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) + modules.scripts.load_scripts(os.path.join(script_path, "scripts")) -shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) + shared.sd_model = modules.sd_models.load_model() + shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model))) + shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork))) def webui(): @@ -98,7 +100,7 @@ def webui(): demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call) - app,local_url,share_url = demo.launch( + app, local_url, share_url = demo.launch( share=cmd_opts.share, server_name="0.0.0.0" if cmd_opts.listen else None, server_port=cmd_opts.port, @@ -129,6 +131,6 @@ def webui(): print('Restarting Gradio') - if __name__ == "__main__": + initialize() webui() From 7edd58d90dd08f68fab5ff84d26dedd0eb85cae3 Mon Sep 17 00:00:00 2001 From: James Noeckel Date: Tue, 11 Oct 2022 17:48:24 -0700 Subject: [PATCH 45/56] update environment-wsl2.yaml --- environment-wsl2.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/environment-wsl2.yaml b/environment-wsl2.yaml index c9ce11df..f8872750 100644 --- a/environment-wsl2.yaml +++ b/environment-wsl2.yaml @@ -3,9 +3,9 @@ channels: - pytorch - defaults dependencies: - - python=3.8.5 - - pip=20.3 + - python=3.10 + - pip=22.2.2 - cudatoolkit=11.3 - - pytorch=1.11.0 - - torchvision=0.12.0 - - numpy=1.19.2 + - pytorch=1.12.1 + - torchvision=0.13.1 + - numpy=1.23.1 \ No newline at end of file From 8aead63f1ac9fec0e5198bd626ec2c5bcbeff4d8 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 09:32:14 +0300 Subject: [PATCH 46/56] emergency fix --- webui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webui.py b/webui.py index 32bcdb06..33ba7905 100644 --- a/webui.py +++ b/webui.py @@ -89,6 +89,8 @@ def initialize(): def webui(): + initialize() + # make the program just exit at ctrl+c without waiting for anything def sigint_handler(sig, frame): print(f'Interrupted with signal {sig} in {frame}') @@ -132,5 +134,4 @@ def webui(): if __name__ == "__main__": - initialize() webui() From 57e03cdd244eee4e33ccab7554b3594563a3d0cd Mon Sep 17 00:00:00 2001 From: brkirch Date: Wed, 12 Oct 2022 00:54:24 -0400 Subject: [PATCH 47/56] Ensure the directory exists before saving to it The directory for the images saved with the Save button may still not exist, so it needs to be created prior to opening the log.csv file. --- modules/ui.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/ui.py b/modules/ui.py index 00bf09ae..cd67b84b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -131,6 +131,8 @@ def save_files(js_data, images, do_make_zip, index): images = [images[index]] start_index = index + os.makedirs(opts.outdir_save, exist_ok=True) + with open(os.path.join(opts.outdir_save, "log.csv"), "a", encoding="utf8", newline='') as file: at_start = file.tell() == 0 writer = csv.writer(file) From f421f2af2df41a86af1aea1e82b4c32a2d143385 Mon Sep 17 00:00:00 2001 From: aoirusann Date: Wed, 12 Oct 2022 13:02:28 +0800 Subject: [PATCH 48/56] [img2imgalt] Fix seed & Allow batch. --- scripts/img2imgalt.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/img2imgalt.py b/scripts/img2imgalt.py index f9894cb0..313a55d2 100644 --- a/scripts/img2imgalt.py +++ b/scripts/img2imgalt.py @@ -129,8 +129,6 @@ class Script(scripts.Script): return [original_prompt, original_negative_prompt, cfg, st, randomness, sigma_adjustment] def run(self, p, original_prompt, original_negative_prompt, cfg, st, randomness, sigma_adjustment): - p.batch_size = 1 - p.batch_count = 1 def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength): @@ -154,7 +152,7 @@ class Script(scripts.Script): rec_noise = find_noise_for_image(p, cond, uncond, cfg, st) self.cache = Cached(rec_noise, cfg, st, lat, original_prompt, original_negative_prompt, sigma_adjustment) - rand_noise = processing.create_random_tensors(p.init_latent.shape[1:], [p.seed + x + 1 for x in range(p.init_latent.shape[0])]) + rand_noise = processing.create_random_tensors(p.init_latent.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, seed_resize_from_h=p.seed_resize_from_h, seed_resize_from_w=p.seed_resize_from_w, p=p) combined_noise = ((1 - randomness) * rec_noise + randomness * rand_noise) / ((randomness**2 + (1-randomness)**2) ** 0.5) From ca5efc316b9431746ff886d259275310f63f95fb Mon Sep 17 00:00:00 2001 From: LunixWasTaken Date: Tue, 11 Oct 2022 22:04:56 +0200 Subject: [PATCH 49/56] Typo fix in watermark hint. --- javascript/hints.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/javascript/hints.js b/javascript/hints.js index 045f2d3c..b81c181b 100644 --- a/javascript/hints.js +++ b/javascript/hints.js @@ -80,7 +80,7 @@ titles = { "Scale latent": "Uscale the image in latent space. Alternative is to produce the full image from latent representation, upscale that, and then move it back to latent space.", "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.", - "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be bevaing in an unethical manner.", + "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be behaving in an unethical manner.", } From 2d006ce16cd95d587533656c3ac4991495e96f23 Mon Sep 17 00:00:00 2001 From: Milly Date: Mon, 10 Oct 2022 00:56:36 +0900 Subject: [PATCH 50/56] xy_grid: Find hypernetwork by closest name --- modules/hypernetworks/hypernetwork.py | 11 +++++++++++ scripts/xy_grid.py | 6 +++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 470659df..8f2192e2 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -120,6 +120,17 @@ def load_hypernetwork(filename): shared.loaded_hypernetwork = None +def find_closest_hypernetwork_name(search: str): + if not search: + return None + search = search.lower() + applicable = [name for name in shared.hypernetworks if search in name.lower()] + if not applicable: + return None + applicable = sorted(applicable, key=lambda name: len(name)) + return applicable[0] + + def apply_hypernetwork(hypernetwork, context, layer=None): hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index ef431105..6f4217ec 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -84,7 +84,11 @@ def apply_checkpoint(p, x, xs): def apply_hypernetwork(p, x, xs): - hypernetwork.load_hypernetwork(x) + if x.lower() in ["", "none"]: + name = None + else: + name = hypernetwork.find_closest_hypernetwork_name(x) + hypernetwork.load_hypernetwork(name) def apply_clip_skip(p, x, xs): From 7dba1c07cb337114507d9c256f9b843162c187d6 Mon Sep 17 00:00:00 2001 From: Milly Date: Mon, 10 Oct 2022 01:37:09 +0900 Subject: [PATCH 51/56] xy_grid: Confirm that hypernetwork options are valid before starting --- scripts/xy_grid.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index 6f4217ec..b2239d0a 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -88,9 +88,19 @@ def apply_hypernetwork(p, x, xs): name = None else: name = hypernetwork.find_closest_hypernetwork_name(x) + if not name: + raise RuntimeError(f"Unknown hypernetwork: {x}") hypernetwork.load_hypernetwork(name) +def confirm_hypernetworks(xs): + for x in xs: + if x.lower() in ["", "none"]: + continue + if not hypernetwork.find_closest_hypernetwork_name(x): + raise RuntimeError(f"Unknown hypernetwork: {x}") + + def apply_clip_skip(p, x, xs): opts.data["CLIP_stop_at_last_layers"] = x @@ -284,6 +294,8 @@ class Script(scripts.Script): for ckpt_val in valslist: if modules.sd_models.get_closet_checkpoint_match(ckpt_val) is None: raise RuntimeError(f"Checkpoint for {ckpt_val} not found") + elif opt.label == "Hypernetwork": + confirm_hypernetworks(valslist) return valslist From 2fffd4bddce12b2c98a5bae5a2cc6d64450d65a0 Mon Sep 17 00:00:00 2001 From: Milly Date: Mon, 10 Oct 2022 02:20:35 +0900 Subject: [PATCH 52/56] xy_grid: Refactor confirm functions --- scripts/xy_grid.py | 73 +++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py index b2239d0a..3bb080bf 100644 --- a/scripts/xy_grid.py +++ b/scripts/xy_grid.py @@ -77,12 +77,26 @@ def apply_sampler(p, x, xs): p.sampler_index = sampler_index +def confirm_samplers(p, xs): + samplers_dict = build_samplers_dict(p) + for x in xs: + if x.lower() not in samplers_dict.keys(): + raise RuntimeError(f"Unknown sampler: {x}") + + def apply_checkpoint(p, x, xs): info = modules.sd_models.get_closet_checkpoint_match(x) - assert info is not None, f'Checkpoint for {x} not found' + if info is None: + raise RuntimeError(f"Unknown checkpoint: {x}") modules.sd_models.reload_model_weights(shared.sd_model, info) +def confirm_checkpoints(p, xs): + for x in xs: + if modules.sd_models.get_closet_checkpoint_match(x) is None: + raise RuntimeError(f"Unknown checkpoint: {x}") + + def apply_hypernetwork(p, x, xs): if x.lower() in ["", "none"]: name = None @@ -93,7 +107,7 @@ def apply_hypernetwork(p, x, xs): hypernetwork.load_hypernetwork(name) -def confirm_hypernetworks(xs): +def confirm_hypernetworks(p, xs): for x in xs: if x.lower() in ["", "none"]: continue @@ -135,29 +149,29 @@ def str_permutations(x): return x -AxisOption = namedtuple("AxisOption", ["label", "type", "apply", "format_value"]) -AxisOptionImg2Img = namedtuple("AxisOptionImg2Img", ["label", "type", "apply", "format_value"]) +AxisOption = namedtuple("AxisOption", ["label", "type", "apply", "format_value", "confirm"]) +AxisOptionImg2Img = namedtuple("AxisOptionImg2Img", ["label", "type", "apply", "format_value", "confirm"]) axis_options = [ - AxisOption("Nothing", str, do_nothing, format_nothing), - AxisOption("Seed", int, apply_field("seed"), format_value_add_label), - AxisOption("Var. seed", int, apply_field("subseed"), format_value_add_label), - AxisOption("Var. strength", float, apply_field("subseed_strength"), format_value_add_label), - AxisOption("Steps", int, apply_field("steps"), format_value_add_label), - AxisOption("CFG Scale", float, apply_field("cfg_scale"), format_value_add_label), - AxisOption("Prompt S/R", str, apply_prompt, format_value), - AxisOption("Prompt order", str_permutations, apply_order, format_value_join_list), - AxisOption("Sampler", str, apply_sampler, format_value), - AxisOption("Checkpoint name", str, apply_checkpoint, format_value), - AxisOption("Hypernetwork", str, apply_hypernetwork, format_value), - AxisOption("Sigma Churn", float, apply_field("s_churn"), format_value_add_label), - AxisOption("Sigma min", float, apply_field("s_tmin"), format_value_add_label), - AxisOption("Sigma max", float, apply_field("s_tmax"), format_value_add_label), - AxisOption("Sigma noise", float, apply_field("s_noise"), format_value_add_label), - AxisOption("Eta", float, apply_field("eta"), format_value_add_label), - AxisOption("Clip skip", int, apply_clip_skip, format_value_add_label), - AxisOptionImg2Img("Denoising", float, apply_field("denoising_strength"), format_value_add_label), # as it is now all AxisOptionImg2Img items must go after AxisOption ones + AxisOption("Nothing", str, do_nothing, format_nothing, None), + AxisOption("Seed", int, apply_field("seed"), format_value_add_label, None), + AxisOption("Var. seed", int, apply_field("subseed"), format_value_add_label, None), + AxisOption("Var. strength", float, apply_field("subseed_strength"), format_value_add_label, None), + AxisOption("Steps", int, apply_field("steps"), format_value_add_label, None), + AxisOption("CFG Scale", float, apply_field("cfg_scale"), format_value_add_label, None), + AxisOption("Prompt S/R", str, apply_prompt, format_value, None), + AxisOption("Prompt order", str_permutations, apply_order, format_value_join_list, None), + AxisOption("Sampler", str, apply_sampler, format_value, confirm_samplers), + AxisOption("Checkpoint name", str, apply_checkpoint, format_value, confirm_checkpoints), + AxisOption("Hypernetwork", str, apply_hypernetwork, format_value, confirm_hypernetworks), + AxisOption("Sigma Churn", float, apply_field("s_churn"), format_value_add_label, None), + AxisOption("Sigma min", float, apply_field("s_tmin"), format_value_add_label, None), + AxisOption("Sigma max", float, apply_field("s_tmax"), format_value_add_label, None), + AxisOption("Sigma noise", float, apply_field("s_noise"), format_value_add_label, None), + AxisOption("Eta", float, apply_field("eta"), format_value_add_label, None), + AxisOption("Clip skip", int, apply_clip_skip, format_value_add_label, None), + AxisOptionImg2Img("Denoising", float, apply_field("denoising_strength"), format_value_add_label, None), # as it is now all AxisOptionImg2Img items must go after AxisOption ones ] @@ -283,19 +297,10 @@ class Script(scripts.Script): valslist = list(permutations(valslist)) valslist = [opt.type(x) for x in valslist] - + # Confirm options are valid before starting - if opt.label == "Sampler": - samplers_dict = build_samplers_dict(p) - for sampler_val in valslist: - if sampler_val.lower() not in samplers_dict.keys(): - raise RuntimeError(f"Unknown sampler: {sampler_val}") - elif opt.label == "Checkpoint name": - for ckpt_val in valslist: - if modules.sd_models.get_closet_checkpoint_match(ckpt_val) is None: - raise RuntimeError(f"Checkpoint for {ckpt_val} not found") - elif opt.label == "Hypernetwork": - confirm_hypernetworks(valslist) + if opt.confirm: + opt.confirm(p, valslist) return valslist From ee015a1af66a94a75c914659fa0d321e702a0a87 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 11:05:57 +0300 Subject: [PATCH 53/56] change textual inversion tab to train remake train interface to use tabs --- modules/hypernetworks/hypernetwork.py | 2 +- modules/ui.py | 22 +++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 8f2192e2..8314450a 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -175,7 +175,7 @@ def attention_CrossAttention_forward(self, x, context=None, mask=None): def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_image_prompt): - assert hypernetwork_name, 'embedding not selected' + assert hypernetwork_name, 'hypernetwork not selected' path = shared.hypernetworks.get(hypernetwork_name, None) shared.loaded_hypernetwork = Hypernetwork() diff --git a/modules/ui.py b/modules/ui.py index 4bfdd275..86a2da6c 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1035,14 +1035,14 @@ def create_ui(wrap_gradio_gpu_call): sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() - with gr.Blocks() as textual_inversion_interface: + with gr.Blocks() as train_interface: with gr.Row().style(equal_height=False): - with gr.Column(): - with gr.Group(): - gr.HTML(value="

See wiki for detailed explanation.

") + gr.HTML(value="

See wiki for detailed explanation.

") - gr.HTML(value="

Create a new embedding

") + with gr.Row().style(equal_height=False): + with gr.Tabs(elem_id="train_tabs"): + with gr.Tab(label="Create embedding"): new_embedding_name = gr.Textbox(label="Name") initialization_text = gr.Textbox(label="Initialization text", value="*") nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1) @@ -1054,9 +1054,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): create_embedding = gr.Button(value="Create embedding", variant='primary') - with gr.Group(): - gr.HTML(value="

Create a new hypernetwork

") - + with gr.Tab(label="Create hypernetwork"): new_hypernetwork_name = gr.Textbox(label="Name") new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"]) @@ -1067,9 +1065,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary') - with gr.Group(): - gr.HTML(value="

Preprocess images

") - + with gr.Tab(label="Preprocess images"): process_src = gr.Textbox(label='Source directory') process_dst = gr.Textbox(label='Destination directory') process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512) @@ -1091,7 +1087,7 @@ def create_ui(wrap_gradio_gpu_call): with gr.Column(): run_preprocess = gr.Button(value="Preprocess", variant='primary') - with gr.Group(): + with gr.Tab(label="Train"): gr.HTML(value="

Train an embedding; must specify a directory with a set of 1:1 ratio images

") train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())) train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', choices=[x for x in shared.hypernetworks.keys()]) @@ -1388,7 +1384,7 @@ Requested path was: {f} (extras_interface, "Extras", "extras"), (pnginfo_interface, "PNG Info", "pnginfo"), (modelmerger_interface, "Checkpoint Merger", "modelmerger"), - (textual_inversion_interface, "Textual inversion", "ti"), + (train_interface, "Train", "ti"), (settings_interface, "Settings", "settings"), ] From 80f3cf2bb2ce3f00d801cae2c3a8c20a8d4167d8 Mon Sep 17 00:00:00 2001 From: hentailord85ez <112723046+hentailord85ez@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:48:53 +0100 Subject: [PATCH 54/56] Account when lines are mismatched --- modules/sd_hijack.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index ac70f876..2753d4fa 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -321,7 +321,17 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): fixes.append(fix[1]) self.hijack.fixes.append(fixes) - z1 = self.process_tokens([x[:75] for x in remade_batch_tokens], [x[:75] for x in batch_multipliers]) + tokens = [] + multipliers = [] + for i in range(len(remade_batch_tokens)): + if len(remade_batch_tokens[i]) > 0: + tokens.append(remade_batch_tokens[i][:75]) + multipliers.append(batch_multipliers[i][:75]) + else: + tokens.append([self.wrapped.tokenizer.eos_token_id] * 75) + multipliers.append([1.0] * 75) + + z1 = self.process_tokens(tokens, multipliers) z = z1 if z is None else torch.cat((z, z1), axis=-2) remade_batch_tokens = rem_tokens From 8561d5762b98bf7cfb764128ebf11633d8bb4405 Mon Sep 17 00:00:00 2001 From: Kalle Date: Wed, 12 Oct 2022 12:43:11 +0300 Subject: [PATCH 55/56] Remove duplicate artist from file --- artists.csv | 1 - 1 file changed, 1 deletion(-) diff --git a/artists.csv b/artists.csv index 14ba2022..99cdbdc6 100644 --- a/artists.csv +++ b/artists.csv @@ -1045,7 +1045,6 @@ Bakemono Zukushi,0.67051035,anime Lucy Madox Brown,0.67032814,fineart Paul Wonner,0.6700563,scribbles Guido Borelli Da Caluso,0.66966087,digipa-high-impact -Guido Borelli da Caluso,0.66966087,digipa-high-impact Emil Alzamora,0.5844039,nudity Heinrich Brocksieper,0.64469147,fineart Dan Smith,0.669563,digipa-high-impact From 429442f4a6aab7301efb89d27bef524fe827e81a Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 13:38:03 +0300 Subject: [PATCH 56/56] fix iterator bug for #2295 --- modules/sd_hijack.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 2753d4fa..c81722a0 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -323,10 +323,10 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module): tokens = [] multipliers = [] - for i in range(len(remade_batch_tokens)): - if len(remade_batch_tokens[i]) > 0: - tokens.append(remade_batch_tokens[i][:75]) - multipliers.append(batch_multipliers[i][:75]) + for j in range(len(remade_batch_tokens)): + if len(remade_batch_tokens[j]) > 0: + tokens.append(remade_batch_tokens[j][:75]) + multipliers.append(batch_multipliers[j][:75]) else: tokens.append([self.wrapped.tokenizer.eos_token_id] * 75) multipliers.append([1.0] * 75)