Merge branch 'master' into dev/deepdanbooru

2022-10-07 18:31:49 +02:00 · 2022-10-07 18:31:49 +02:00 · 537da7a304
commit 537da7a304
parent 4320f386d9 f7c787eb7c
21 changed files with 615 additions and 187 deletions
--- a/README.md
+++ b/README.md
@ -16,6 +16,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
 - Attention, specify parts of text that the model should pay more attention to
    - a man in a ((tuxedo)) - will pay more attention to tuxedo
    - a man in a (tuxedo:1.21) - alternative syntax
+    - select text and press ctrl+up or ctrl+down to aduotmatically adjust attention to selected text
 - Loopback, run img2img processing multiple times
 - X/Y plot, a way to draw a 2 dimensional plot of images with different parameters
 - Textual Inversion
@ -61,6 +62,9 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
 - Reloading checkpoints on the fly
 - Checkpoint Merger, a tab that allows you to merge two checkpoints into one
 - [Custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Custom-Scripts) with many extensions from community
+- [Composable-Diffusion](https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/), a way to use multiple prompts at once
+     - separate prompts using uppercase `AND`
+     - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`

 ## Installation and Running
 Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
--- a/javascript/edit-attention.js
+++ b/javascript/edit-attention.js
@ -0,0 +1,41 @@
+addEventListener('keydown', (event) => {
+	let target = event.originalTarget;
+	if (!target.hasAttribute("placeholder")) return;
+	if (!target.placeholder.toLowerCase().includes("prompt")) return;
+
+	let plus = "ArrowUp"
+	let minus = "ArrowDown"
+	if (event.key != plus && event.key != minus) return;
+
+	selectionStart = target.selectionStart;
+	selectionEnd = target.selectionEnd;
+	if(selectionStart == selectionEnd) return;
+
+	event.preventDefault();
+
+	if (selectionStart == 0 || target.value[selectionStart - 1] != "(") {
+		target.value = target.value.slice(0, selectionStart) +
+			"(" + target.value.slice(selectionStart, selectionEnd) + ":1.0)" +
+			target.value.slice(selectionEnd);
+
+		target.focus();
+		target.selectionStart = selectionStart + 1;
+		target.selectionEnd = selectionEnd + 1;
+
+	} else {
+		end = target.value.slice(selectionEnd + 1).indexOf(")") + 1;
+		weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end));
+		if (event.key == minus) weight -= 0.1;
+		if (event.key == plus) weight += 0.1;
+
+		weight = parseFloat(weight.toPrecision(12));
+
+		target.value = target.value.slice(0, selectionEnd + 1) +
+			weight +
+			target.value.slice(selectionEnd + 1 + end - 1);
+
+		target.focus();
+		target.selectionStart = selectionStart;
+		target.selectionEnd = selectionEnd;
+	}
+});
--- a/javascript/progressbar.js
+++ b/javascript/progressbar.js
@ -4,6 +4,21 @@ global_progressbars = {}
 function check_progressbar(id_part, id_progressbar, id_progressbar_span, id_interrupt, id_preview, id_gallery){
    var progressbar = gradioApp().getElementById(id_progressbar)
    var interrupt = gradioApp().getElementById(id_interrupt)
+    
+    if(opts.show_progress_in_title && progressbar && progressbar.offsetParent){
+        if(progressbar.innerText){
+            let newtitle = 'Stable Diffusion - ' + progressbar.innerText
+            if(document.title != newtitle){
+                document.title =  newtitle;          
+            }
+        }else{
+            let newtitle = 'Stable Diffusion'
+            if(document.title != newtitle){
+                document.title =  newtitle;          
+            }
+        }
+    }
+    
 	if(progressbar!= null && progressbar != global_progressbars[id_progressbar]){
 	    global_progressbars[id_progressbar] = progressbar

--- a/launch.py
+++ b/launch.py
@ -19,7 +19,7 @@ clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLI

 stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc")
 taming_transformers_commit_hash = os.environ.get('TAMING_TRANSFORMERS_COMMIT_HASH', "24268930bf1dce879235a7fddd0b2355b84d7ea6")
-k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "a7ec1974d4ccb394c2dca275f42cd97490618924")
+k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "f4e99857772fc3a126ba886aadf795a332774878")
 codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af")
 blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")

@ -86,6 +86,15 @@ def git_clone(url, dir, name, commithash=None):
    # TODO clone into temporary dir and move if successful

    if os.path.exists(dir):
+        if commithash is None:
+            return
+
+        current_hash = run(f'"{git}" -C {dir} rev-parse HEAD', None, f"Couldn't determine {name}'s hash: {commithash}").strip()
+        if current_hash == commithash:
+            return
+
+        run(f'"{git}" -C {dir} fetch', f"Fetching updates for {name}...", f"Couldn't fetch {name}")
+        run(f'"{git}" -C {dir} checkout {commithash}', f"Checking out commint for {name} with hash: {commithash}...", f"Couldn't checkout commit {commithash} for {name}")
        return

    run(f'"{git}" clone "{url}" "{dir}"', f"Cloning {name} into {dir}...", f"Couldn't clone {name}")
--- a/modules/extras.py
+++ b/modules/extras.py
@ -100,6 +100,8 @@ def run_extras(extras_mode, image, image_folder, gfpgan_visibility, codeformer_v

        outputs.append(image)

+    devices.torch_gc()
+
    return outputs, plaintext_to_html(info), ''


--- a/modules/hypernetwork.py
+++ b/modules/hypernetwork.py
@ -0,0 +1,88 @@
+import glob
+import os
+import sys
+import traceback
+
+import torch
+
+from ldm.util import default
+from modules import devices, shared
+import torch
+from torch import einsum
+from einops import rearrange, repeat
+
+
+class HypernetworkModule(torch.nn.Module):
+    def __init__(self, dim, state_dict):
+        super().__init__()
+
+        self.linear1 = torch.nn.Linear(dim, dim * 2)
+        self.linear2 = torch.nn.Linear(dim * 2, dim)
+
+        self.load_state_dict(state_dict, strict=True)
+        self.to(devices.device)
+
+    def forward(self, x):
+        return x + (self.linear2(self.linear1(x)))
+
+
+class Hypernetwork:
+    filename = None
+    name = None
+
+    def __init__(self, filename):
+        self.filename = filename
+        self.name = os.path.splitext(os.path.basename(filename))[0]
+        self.layers = {}
+
+        state_dict = torch.load(filename, map_location='cpu')
+        for size, sd in state_dict.items():
+            self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1]))
+
+
+def load_hypernetworks(path):
+    res = {}
+
+    for filename in glob.iglob(path + '**/*.pt', recursive=True):
+        try:
+            hn = Hypernetwork(filename)
+            res[hn.name] = hn
+        except Exception:
+            print(f"Error loading hypernetwork {filename}", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+
+    return res
+
+
+def attention_CrossAttention_forward(self, x, context=None, mask=None):
+    h = self.heads
+
+    q = self.to_q(x)
+    context = default(context, x)
+
+    hypernetwork = shared.selected_hypernetwork()
+    hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None)
+
+    if hypernetwork_layers is not None:
+        k = self.to_k(hypernetwork_layers[0](context))
+        v = self.to_v(hypernetwork_layers[1](context))
+    else:
+        k = self.to_k(context)
+        v = self.to_v(context)
+
+    q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
+
+    sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
+
+    if mask is not None:
+        mask = rearrange(mask, 'b ... -> b (...)')
+        max_neg_value = -torch.finfo(sim.dtype).max
+        mask = repeat(mask, 'b j -> (b h) () j', h=h)
+        sim.masked_fill_(~mask, max_neg_value)
+
+    # attention, what we cannot get enough of
+    attn = sim.softmax(dim=-1)
+
+    out = einsum('b i j, b j d -> b i d', attn, v)
+    out = rearrange(out, '(b h) n d -> b n (h d)', h=h)
+    return self.to_out(out)
--- a/modules/images.py
+++ b/modules/images.py
@ -292,18 +292,13 @@ def apply_filename_pattern(x, p, seed, prompt):
        x = x.replace("[cfg]", str(p.cfg_scale))
        x = x.replace("[width]", str(p.width))
        x = x.replace("[height]", str(p.height))
-
-        #currently disabled if using the save button, will work otherwise
-        # if enabled it will cause a bug because styles is not included in the save_files data dictionary
-        if hasattr(p, "styles"):
-            x = x.replace("[styles]", sanitize_filename_part(", ".join([x for x in p.styles if not x == "None"]) or "None", replace_spaces=False))
-
+        x = x.replace("[styles]", sanitize_filename_part(", ".join([x for x in p.styles if not x == "None"]) or "None", replace_spaces=False))
        x = x.replace("[sampler]", sanitize_filename_part(sd_samplers.samplers[p.sampler_index].name, replace_spaces=False))

-    x = x.replace("[model_hash]", shared.sd_model.sd_model_hash)
+    x = x.replace("[model_hash]", getattr(p, "sd_model_hash", shared.sd_model.sd_model_hash))
    x = x.replace("[date]", datetime.date.today().isoformat())
    x = x.replace("[datetime]", datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
-    x = x.replace("[job_timestamp]", shared.state.job_timestamp)
+    x = x.replace("[job_timestamp]", getattr(p, "job_timestamp", shared.state.job_timestamp))

    # Apply [prompt] at last. Because it may contain any replacement word.^M
    if prompt is not None:
@ -353,7 +348,7 @@ def get_next_sequence_number(path, basename):
    return result + 1


-def save_image(image, path, basename, seed=None, prompt=None, extension='png', info=None, short_filename=False, no_prompt=False, grid=False, pnginfo_section_name='parameters', p=None, existing_info=None, forced_filename=None, suffix=""):
+def save_image(image, path, basename, seed=None, prompt=None, extension='png', info=None, short_filename=False, no_prompt=False, grid=False, pnginfo_section_name='parameters', p=None, existing_info=None, forced_filename=None, suffix="", save_to_dirs=None):
    if short_filename or prompt is None or seed is None:
        file_decoration = ""
    elif opts.save_to_dirs:
@ -377,7 +372,8 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
    else:
        pnginfo = None

-    save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt)
+    if save_to_dirs is None:
+        save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt)

    if save_to_dirs:
        dirname = apply_filename_pattern(opts.directories_filename_pattern or "[prompt_words]", p, seed, prompt).strip('\\ /')
@ -431,4 +427,4 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
        with open(f"{fullfn_without_extension}.txt", "w", encoding="utf8") as file:
            file.write(info + "\n")

-
+    return fullfn
--- a/modules/processing.py
+++ b/modules/processing.py
@ -11,9 +11,8 @@ import cv2
 from skimage import exposure

 import modules.sd_hijack
-from modules import devices, prompt_parser, masking
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram
 from modules.sd_hijack import model_hijack
-from modules.sd_samplers import samplers, samplers_for_img2img
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.face_restoration
@ -84,7 +83,7 @@ class StableDiffusionProcessing:
        self.s_tmin = opts.s_tmin
        self.s_tmax = float('inf')  # not representable as a standard ui option
        self.s_noise = opts.s_noise
-        
+
        if not seed_enable_extras:
            self.subseed = -1
            self.subseed_strength = 0
@ -110,7 +109,7 @@ class Processed:
        self.width = p.width
        self.height = p.height
        self.sampler_index = p.sampler_index
-        self.sampler = samplers[p.sampler_index].name
+        self.sampler = sd_samplers.samplers[p.sampler_index].name
        self.cfg_scale = p.cfg_scale
        self.steps = p.steps
        self.batch_size = p.batch_size
@ -122,6 +121,8 @@ class Processed:
        self.denoising_strength = getattr(p, 'denoising_strength', None)
        self.extra_generation_params = p.extra_generation_params
        self.index_of_first_image = index_of_first_image
+        self.styles = p.styles
+        self.job_timestamp = state.job_timestamp

        self.eta = p.eta
        self.ddim_discretize = p.ddim_discretize
@ -166,6 +167,8 @@ class Processed:
            "extra_generation_params": self.extra_generation_params,
            "index_of_first_image": self.index_of_first_image,
            "infotexts": self.infotexts,
+            "styles": self.styles,
+            "job_timestamp": self.job_timestamp,
        }

        return json.dumps(obj)
@ -265,7 +268,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration

    generation_params = {
        "Steps": p.steps,
-        "Sampler": samplers[p.sampler_index].name,
+        "Sampler": sd_samplers.samplers[p.sampler_index].name,
        "CFG scale": p.cfg_scale,
        "Seed": all_seeds[index],
        "Face restoration": (opts.face_restoration_model if p.restore_faces else None),
@ -296,7 +299,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
        assert(len(p.prompt) > 0)
    else:
        assert p.prompt is not None
-        
+
    devices.torch_gc()

    seed = get_fixed_seed(p.seed)
@ -359,8 +362,8 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
            #uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
            #c = p.sd_model.get_learned_conditioning(prompts)
            with devices.autocast():
-                uc = prompt_parser.get_learned_conditioning(len(prompts) * [p.negative_prompt], p.steps)
-                c = prompt_parser.get_learned_conditioning(prompts, p.steps)
+                uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt], p.steps)
+                c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps)

            if len(model_hijack.comments) > 0:
                for comment in model_hijack.comments:
@ -383,6 +386,13 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
            x_samples_ddim = p.sd_model.decode_first_stage(samples_ddim)
            x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)

+            del samples_ddim
+
+            if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+                lowvram.send_everything_to_cpu()
+
+            devices.torch_gc()
+
            if opts.filter_nsfw:
                import modules.safety as safety
                x_samples_ddim = modules.safety.censor_batch(x_samples_ddim)
@ -424,9 +434,15 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
                if opts.samples_save and not p.do_not_save_samples:
                    images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p)

-                infotexts.append(infotext(n, i))
+                text = infotext(n, i)
+                infotexts.append(text)
+                image.info["parameters"] = text
                output_images.append(image)

+            del x_samples_ddim 
+
+            devices.torch_gc()
+
            state.nextjob()

        p.color_corrections = None
@ -437,7 +453,9 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
            grid = images.image_grid(output_images, p.batch_size)

            if opts.return_grid:
-                infotexts.insert(0, infotext())
+                text = infotext()
+                infotexts.insert(0, text)
+                grid.info["parameters"] = text
                output_images.insert(0, grid)
                index_of_first_image = 1

@ -478,7 +496,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
            self.firstphase_height_truncated = int(scale * self.height)

    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
-        self.sampler = samplers[self.sampler_index].constructor(self.sd_model)
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)

        if not self.enable_hr:
            x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
@ -521,13 +539,14 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):

        shared.state.nextjob()

-        self.sampler = samplers[self.sampler_index].constructor(self.sd_model)
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
+
        noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)

        # GC now before running the next img2img to prevent running out of memory
        x = None
        devices.torch_gc()
-        
+
        samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps)

        return samples
@ -556,7 +575,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
        self.nmask = None

    def init(self, all_prompts, all_seeds, all_subseeds):
-        self.sampler = samplers_for_img2img[self.sampler_index].constructor(self.sd_model)
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model)
        crop_region = None

        if self.image_mask is not None:
@ -663,4 +682,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
        if self.mask is not None:
            samples = samples * self.nmask + self.init_latent * self.mask

+        del x
+        devices.torch_gc()
+
        return samples
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@ -1,10 +1,7 @@
 import re
 from collections import namedtuple
-import torch
-from lark import Lark, Transformer, Visitor
-import functools
-
-import modules.shared as shared
+from typing import List
+import lark

 # a prompt like this: "fantasy landscape with a [mountain:lake:0.25] and [an oak:a christmas tree:0.75][ in foreground::0.6][ in background:0.25] [shoddy:masterful:0.5]"
 # will be represented with prompt_schedule like this (assuming steps=100):
@ -14,25 +11,48 @@ import modules.shared as shared
 # [75, 'fantasy landscape with a lake and an oak in background masterful']
 # [100, 'fantasy landscape with a lake and a christmas tree in background masterful']

+schedule_parser = lark.Lark(r"""
+!start: (prompt | /[][():]/+)*
+prompt: (emphasized | scheduled | plain | WHITESPACE)*
+!emphasized: "(" prompt ")"
+        | "(" prompt ":" prompt ")"
+        | "[" prompt "]"
+scheduled: "[" [prompt ":"] prompt ":" [WHITESPACE] NUMBER "]"
+WHITESPACE: /\s+/
+plain: /([^\\\[\]():]|\\.)+/
+%import common.SIGNED_NUMBER -> NUMBER
+""")

 def get_learned_conditioning_prompt_schedules(prompts, steps):
-    grammar = r"""
-    start: prompt
-    prompt: (emphasized | scheduled | weighted | plain)*
-    !emphasized: "(" prompt ")"
-            | "(" prompt ":" prompt ")"
-            | "[" prompt "]"
-    scheduled: "[" (prompt ":")? prompt ":" NUMBER "]"
-    !weighted: "{" weighted_item ("|" weighted_item)* "}"
-    !weighted_item: prompt (":" prompt)?
-    plain: /([^\\\[\](){}:|]|\\.)+/
-    %import common.SIGNED_NUMBER -> NUMBER
    """
-    parser = Lark(grammar, parser='lalr')
+    >>> g = lambda p: get_learned_conditioning_prompt_schedules([p], 10)[0]
+    >>> g("test")
+    [[10, 'test']]
+    >>> g("a [b:3]")
+    [[3, 'a '], [10, 'a b']]
+    >>> g("a [b: 3]")
+    [[3, 'a '], [10, 'a b']]
+    >>> g("a [[[b]]:2]")
+    [[2, 'a '], [10, 'a [[b]]']]
+    >>> g("[(a:2):3]")
+    [[3, ''], [10, '(a:2)']]
+    >>> g("a [b : c : 1] d")
+    [[1, 'a b  d'], [10, 'a  c  d']]
+    >>> g("a[b:[c:d:2]:1]e")
+    [[1, 'abe'], [2, 'ace'], [10, 'ade']]
+    >>> g("a [unbalanced")
+    [[10, 'a [unbalanced']]
+    >>> g("a [b:.5] c")
+    [[5, 'a  c'], [10, 'a b c']]
+    >>> g("a [{b|d{:.5] c")  # not handling this right now
+    [[5, 'a  c'], [10, 'a {b|d{ c']]
+    >>> g("((a][:b:c [d:3]")
+    [[3, '((a][:b:c '], [10, '((a][:b:c d']]
+    """

    def collect_steps(steps, tree):
        l = [steps]
-        class CollectSteps(Visitor):
+        class CollectSteps(lark.Visitor):
            def scheduled(self, tree):
                tree.children[-1] = float(tree.children[-1])
                if tree.children[-1] < 1:
@ -43,13 +63,10 @@ def get_learned_conditioning_prompt_schedules(prompts, steps):
        return sorted(set(l))

    def at_step(step, tree):
-        class AtStep(Transformer):
+        class AtStep(lark.Transformer):
            def scheduled(self, args):
-                if len(args) == 2:
-                    before, after, when = (), *args
-                else:
-                    before, after, when = args
-                yield before if step <= when else after
+                before, after, _, when = args
+                yield before or () if step <= when else after
            def start(self, args):
                def flatten(x):
                    if type(x) == str:
@ -57,16 +74,22 @@ def get_learned_conditioning_prompt_schedules(prompts, steps):
                    else:
                        for gen in x:
                            yield from flatten(gen)
-                return ''.join(flatten(args[0]))
+                return ''.join(flatten(args))
            def plain(self, args):
                yield args[0].value
            def __default__(self, data, children, meta):
                for child in children:
                    yield from child
        return AtStep().transform(tree)
-    
+
    def get_schedule(prompt):
-        tree = parser.parse(prompt)
+        try:
+            tree = schedule_parser.parse(prompt)
+        except lark.exceptions.LarkError as e:
+            if 0:
+                import traceback
+                traceback.print_exc()
+            return [[steps, prompt]]
        return [[t, at_step(t, tree)] for t in collect_steps(steps, tree)]

    promptdict = {prompt: get_schedule(prompt) for prompt in set(prompts)}
@ -74,11 +97,26 @@ def get_learned_conditioning_prompt_schedules(prompts, steps):


 ScheduledPromptConditioning = namedtuple("ScheduledPromptConditioning", ["end_at_step", "cond"])
-ScheduledPromptBatch = namedtuple("ScheduledPromptBatch", ["shape", "schedules"])


-def get_learned_conditioning(prompts, steps):
+def get_learned_conditioning(model, prompts, steps):
+    """converts a list of prompts into a list of prompt schedules - each schedule is a list of ScheduledPromptConditioning, specifying the comdition (cond),
+    and the sampling step at which this condition is to be replaced by the next one.

+    Input:
+    (model, ['a red crown', 'a [blue:green:5] jeweled crown'], 20)
+
+    Output:
+    [
+        [
+            ScheduledPromptConditioning(end_at_step=20, cond=tensor([[-0.3886,  0.0229, -0.0523,  ..., -0.4901, -0.3066,  0.0674], ..., [ 0.3317, -0.5102, -0.4066,  ...,  0.4119, -0.7647, -1.0160]], device='cuda:0'))
+        ],
+        [
+            ScheduledPromptConditioning(end_at_step=5, cond=tensor([[-0.3886,  0.0229, -0.0522,  ..., -0.4901, -0.3067,  0.0673], ..., [-0.0192,  0.3867, -0.4644,  ...,  0.1135, -0.3696, -0.4625]], device='cuda:0')),
+            ScheduledPromptConditioning(end_at_step=20, cond=tensor([[-0.3886,  0.0229, -0.0522,  ..., -0.4901, -0.3067,  0.0673], ..., [-0.7352, -0.4356, -0.7888,  ...,  0.6994, -0.4312, -1.2593]], device='cuda:0'))
+        ]
+    ]
+    """
    res = []

    prompt_schedules = get_learned_conditioning_prompt_schedules(prompts, steps)
@ -92,7 +130,7 @@ def get_learned_conditioning(prompts, steps):
            continue

        texts = [x[1] for x in prompt_schedule]
-        conds = shared.sd_model.get_learned_conditioning(texts)
+        conds = model.get_learned_conditioning(texts)

        cond_schedule = []
        for i, (end_at_step, text) in enumerate(prompt_schedule):
@ -101,22 +139,109 @@ def get_learned_conditioning(prompts, steps):
        cache[prompt] = cond_schedule
        res.append(cond_schedule)

-    return ScheduledPromptBatch((len(prompts),) + res[0][0].cond.shape, res)
+    return res


-def reconstruct_cond_batch(c: ScheduledPromptBatch, current_step):
-    res = torch.zeros(c.shape, device=shared.device, dtype=next(shared.sd_model.parameters()).dtype)
-    for i, cond_schedule in enumerate(c.schedules):
+re_AND = re.compile(r"\bAND\b")
+re_weight = re.compile(r"^(.*?)(?:\s*:\s*([-+]?(?:\d+\.?|\d*\.\d+)))?\s*$")
+
+def get_multicond_prompt_list(prompts):
+    res_indexes = []
+
+    prompt_flat_list = []
+    prompt_indexes = {}
+
+    for prompt in prompts:
+        subprompts = re_AND.split(prompt)
+
+        indexes = []
+        for subprompt in subprompts:
+            match = re_weight.search(subprompt)
+
+            text, weight = match.groups() if match is not None else (subprompt, 1.0)
+
+            weight = float(weight) if weight is not None else 1.0
+
+            index = prompt_indexes.get(text, None)
+            if index is None:
+                index = len(prompt_flat_list)
+                prompt_flat_list.append(text)
+                prompt_indexes[text] = index
+
+            indexes.append((index, weight))
+
+        res_indexes.append(indexes)
+
+    return res_indexes, prompt_flat_list, prompt_indexes
+
+
+class ComposableScheduledPromptConditioning:
+    def __init__(self, schedules, weight=1.0):
+        self.schedules: List[ScheduledPromptConditioning] = schedules
+        self.weight: float = weight
+
+
+class MulticondLearnedConditioning:
+    def __init__(self, shape, batch):
+        self.shape: tuple = shape  # the shape field is needed to send this object to DDIM/PLMS
+        self.batch: List[List[ComposableScheduledPromptConditioning]] = batch
+
+def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearnedConditioning:
+    """same as get_learned_conditioning, but returns a list of ScheduledPromptConditioning along with the weight objects for each prompt.
+    For each prompt, the list is obtained by splitting the prompt using the AND separator.
+
+    https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/
+    """
+
+    res_indexes, prompt_flat_list, prompt_indexes = get_multicond_prompt_list(prompts)
+
+    learned_conditioning = get_learned_conditioning(model, prompt_flat_list, steps)
+
+    res = []
+    for indexes in res_indexes:
+        res.append([ComposableScheduledPromptConditioning(learned_conditioning[i], weight) for i, weight in indexes])
+
+    return MulticondLearnedConditioning(shape=(len(prompts),), batch=res)
+
+
+def reconstruct_cond_batch(c: List[List[ScheduledPromptConditioning]], current_step):
+    param = c[0][0].cond
+    res = torch.zeros((len(c),) + param.shape, device=param.device, dtype=param.dtype)
+    for i, cond_schedule in enumerate(c):
        target_index = 0
-        for curret_index, (end_at, cond) in enumerate(cond_schedule):
+        for current, (end_at, cond) in enumerate(cond_schedule):
            if current_step <= end_at:
-                target_index = curret_index
+                target_index = current
                break
        res[i] = cond_schedule[target_index].cond

    return res


+def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step):
+    param = c.batch[0][0].schedules[0].cond
+
+    tensors = []
+    conds_list = []
+
+    for batch_no, composable_prompts in enumerate(c.batch):
+        conds_for_batch = []
+
+        for cond_index, composable_prompt in enumerate(composable_prompts):
+            target_index = 0
+            for current, (end_at, cond) in enumerate(composable_prompt.schedules):
+                if current_step <= end_at:
+                    target_index = current
+                    break
+
+            conds_for_batch.append((len(tensors), composable_prompt.weight))
+            tensors.append(composable_prompt.schedules[target_index].cond)
+
+        conds_list.append(conds_for_batch)
+
+    return conds_list, torch.stack(tensors).to(device=param.device, dtype=param.dtype)
+
+
 re_attention = re.compile(r"""
 \\\(|
 \\\)|
@ -148,23 +273,26 @@ def parse_prompt_attention(text):
      \\ - literal character '\'
      anything else - just text

-    Example:
-
-        'a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).'
-
-    produces:
-
-    [
-        ['a ', 1.0],
-        ['house', 1.5730000000000004],
-        [' ', 1.1],
-        ['on', 1.0],
-        [' a ', 1.1],
-        ['hill', 0.55],
-        [', sun, ', 1.1],
-        ['sky', 1.4641000000000006],
-        ['.', 1.1]
-    ]
+    >>> parse_prompt_attention('normal text')
+    [['normal text', 1.0]]
+    >>> parse_prompt_attention('an (important) word')
+    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
+    >>> parse_prompt_attention('(unbalanced')
+    [['unbalanced', 1.1]]
+    >>> parse_prompt_attention('\(literal\]')
+    [['(literal]', 1.0]]
+    >>> parse_prompt_attention('(unnecessary)(parens)')
+    [['unnecessaryparens', 1.1]]
+    >>> parse_prompt_attention('a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).')
+    [['a ', 1.0],
+     ['house', 1.5730000000000004],
+     [' ', 1.1],
+     ['on', 1.0],
+     [' a ', 1.1],
+     ['hill', 0.55],
+     [', sun, ', 1.1],
+     ['sky', 1.4641000000000006],
+     ['.', 1.1]]
    """

    res = []
@ -206,4 +334,19 @@ def parse_prompt_attention(text):
    if len(res) == 0:
        res = [["", 1.0]]

+    # merge runs of identical weights
+    i = 0
+    while i + 1 < len(res):
+        if res[i][1] == res[i + 1][1]:
+            res[i][0] += res[i + 1][0]
+            res.pop(i + 1)
+        else:
+            i += 1
+
    return res
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
+else:
+    import torch  # doctest faster
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@ -5,9 +5,10 @@ import traceback
 import torch
 import numpy as np
 from torch import einsum
+from torch.nn.functional import silu

 import modules.textual_inversion.textual_inversion
-from modules import prompt_parser, devices, sd_hijack_optimizations, shared
+from modules import prompt_parser, devices, sd_hijack_optimizations, shared, hypernetwork
 from modules.shared import opts, device, cmd_opts

 import ldm.modules.attention
@ -19,16 +20,19 @@ diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.At


 def apply_optimizations():
+    undo_optimizations()
+
+    ldm.modules.diffusionmodules.model.nonlinearity = silu
+
    if cmd_opts.opt_split_attention_v1:
        ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
    elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()):
        ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward
-        ldm.modules.diffusionmodules.model.nonlinearity = sd_hijack_optimizations.nonlinearity_hijack
        ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.cross_attention_attnblock_forward


 def undo_optimizations():
-    ldm.modules.attention.CrossAttention.forward = attention_CrossAttention_forward
+    ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
    ldm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity
    ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward

--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@ -5,6 +5,8 @@ from torch import einsum
 from ldm.util import default
 from einops import rearrange

+from modules import shared
+

 # see https://github.com/basujindal/stable-diffusion/pull/117 for discussion
 def split_cross_attention_forward_v1(self, x, context=None, mask=None):
@ -42,8 +44,19 @@ def split_cross_attention_forward(self, x, context=None, mask=None):

    q_in = self.to_q(x)
    context = default(context, x)
-    k_in = self.to_k(context) * self.scale
-    v_in = self.to_v(context)
+
+    hypernetwork = shared.selected_hypernetwork()
+    hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None)
+
+    if hypernetwork_layers is not None:
+        k_in = self.to_k(hypernetwork_layers[0](context))
+        v_in = self.to_v(hypernetwork_layers[1](context))
+    else:
+        k_in = self.to_k(context)
+        v_in = self.to_v(context)
+
+    k_in *= self.scale
+
    del context, x

    q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))
@ -92,14 +105,6 @@ def split_cross_attention_forward(self, x, context=None, mask=None):

    return self.to_out(r2)

-def nonlinearity_hijack(x):
-    # swish
-    t = torch.sigmoid(x)
-    x *= t
-    del t
-
-    return x
-
 def cross_attention_attnblock_forward(self, x):
        h_ = x
        h_ = self.norm(h_)
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -134,6 +134,14 @@ def load_model_weights(model, checkpoint_file, sd_model_hash):

    devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16

+    vae_file = os.path.splitext(checkpoint_file)[0] + ".vae.pt"
+    if os.path.exists(vae_file):
+        print(f"Loading VAE weights from: {vae_file}")
+        vae_ckpt = torch.load(vae_file, map_location="cpu")
+        vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
+
+        model.first_stage_model.load_state_dict(vae_dict)
+
    model.sd_model_hash = sd_model_hash
    model.sd_model_checkpint = checkpoint_file

--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@ -13,31 +13,57 @@ from modules.shared import opts, cmd_opts, state
 import modules.shared as shared


-SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases'])
+SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])

 samplers_k_diffusion = [
-    ('Euler a', 'sample_euler_ancestral', ['k_euler_a']),
-    ('Euler', 'sample_euler', ['k_euler']),
-    ('LMS', 'sample_lms', ['k_lms']),
-    ('Heun', 'sample_heun', ['k_heun']),
-    ('DPM2', 'sample_dpm_2', ['k_dpm_2']),
-    ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a']),
-    ('DPM fast', 'sample_dpm_fast', ['k_dpm_fast']),
-    ('DPM adaptive', 'sample_dpm_adaptive', ['k_dpm_ad']),
+    ('Euler a', 'sample_euler_ancestral', ['k_euler_a'], {}),
+    ('Euler', 'sample_euler', ['k_euler'], {}),
+    ('LMS', 'sample_lms', ['k_lms'], {}),
+    ('Heun', 'sample_heun', ['k_heun'], {}),
+    ('DPM2', 'sample_dpm_2', ['k_dpm_2'], {}),
+    ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a'], {}),
+    ('DPM fast', 'sample_dpm_fast', ['k_dpm_fast'], {}),
+    ('DPM adaptive', 'sample_dpm_adaptive', ['k_dpm_ad'], {}),
+    ('LMS Karras', 'sample_lms', ['k_lms_ka'], {'scheduler': 'karras'}),
+    ('DPM2 Karras', 'sample_dpm_2', ['k_dpm_2_ka'], {'scheduler': 'karras'}),
+    ('DPM2 a Karras', 'sample_dpm_2_ancestral', ['k_dpm_2_a_ka'], {'scheduler': 'karras'}),
 ]

 samplers_data_k_diffusion = [
-    SamplerData(label, lambda model, funcname=funcname: KDiffusionSampler(funcname, model), aliases)
-    for label, funcname, aliases in samplers_k_diffusion
+    SamplerData(label, lambda model, funcname=funcname: KDiffusionSampler(funcname, model), aliases, options)
+    for label, funcname, aliases, options in samplers_k_diffusion
    if hasattr(k_diffusion.sampling, funcname)
 ]

-samplers = [
+all_samplers = [
    *samplers_data_k_diffusion,
-    SamplerData('DDIM', lambda model: VanillaStableDiffusionSampler(ldm.models.diffusion.ddim.DDIMSampler, model), []),
-    SamplerData('PLMS', lambda model: VanillaStableDiffusionSampler(ldm.models.diffusion.plms.PLMSSampler, model), []),
+    SamplerData('DDIM', lambda model: VanillaStableDiffusionSampler(ldm.models.diffusion.ddim.DDIMSampler, model), [], {}),
+    SamplerData('PLMS', lambda model: VanillaStableDiffusionSampler(ldm.models.diffusion.plms.PLMSSampler, model), [], {}),
 ]
-samplers_for_img2img = [x for x in samplers if x.name not in ['PLMS', 'DPM fast', 'DPM adaptive']]
+
+samplers = []
+samplers_for_img2img = []
+
+
+def create_sampler_with_index(list_of_configs, index, model):
+    config = list_of_configs[index]
+    sampler = config.constructor(model)
+    sampler.config = config
+    
+    return sampler
+
+
+def set_samplers():
+    global samplers, samplers_for_img2img
+
+    hidden = set(opts.hide_samplers)
+    hidden_img2img = set(opts.hide_samplers + ['PLMS', 'DPM fast', 'DPM adaptive'])
+
+    samplers = [x for x in all_samplers if x.name not in hidden]
+    samplers_for_img2img = [x for x in all_samplers if x.name not in hidden_img2img]
+
+
+set_samplers()

 sampler_extra_params = {
    'sample_euler': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
@ -104,14 +130,18 @@ class VanillaStableDiffusionSampler:
        self.step = 0
        self.eta = None
        self.default_eta = 0.0
+        self.config = None

    def number_of_needed_noises(self, p):
        return 0

    def p_sample_ddim_hook(self, x_dec, cond, ts, unconditional_conditioning, *args, **kwargs):
-        cond = prompt_parser.reconstruct_cond_batch(cond, self.step)
+        conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
        unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step)

+        assert all([len(conds) == 1 for conds in conds_list]), 'composition via AND is not supported for DDIM/PLMS samplers'
+        cond = tensor
+
        if self.mask is not None:
            img_orig = self.sampler.model.q_sample(self.init_latent, ts)
            x_dec = img_orig * self.mask + self.nmask * x_dec
@ -183,19 +213,31 @@ class CFGDenoiser(torch.nn.Module):
        self.step = 0

    def forward(self, x, sigma, uncond, cond, cond_scale):
-        cond = prompt_parser.reconstruct_cond_batch(cond, self.step)
+        conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
        uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step)

+        batch_size = len(conds_list)
+        repeats = [len(conds_list[i]) for i in range(batch_size)]
+
+        x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x])
+        sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma])
+        cond_in = torch.cat([tensor, uncond])
+
        if shared.batch_cond_uncond:
-            x_in = torch.cat([x] * 2)
-            sigma_in = torch.cat([sigma] * 2)
-            cond_in = torch.cat([uncond, cond])
-            uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
-            denoised = uncond + (cond - uncond) * cond_scale
+            x_out = self.inner_model(x_in, sigma_in, cond=cond_in)
        else:
-            uncond = self.inner_model(x, sigma, cond=uncond)
-            cond = self.inner_model(x, sigma, cond=cond)
-            denoised = uncond + (cond - uncond) * cond_scale
+            x_out = torch.zeros_like(x_in)
+            for batch_offset in range(0, x_out.shape[0], batch_size):
+                a = batch_offset
+                b = a + batch_size
+                x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b])
+
+        denoised_uncond = x_out[-batch_size:]
+        denoised = torch.clone(denoised_uncond)
+
+        for i, conds in enumerate(conds_list):
+            for cond_index, weight in conds:
+                denoised[i] += (x_out[cond_index] - denoised_uncond[i]) * (weight * cond_scale)

        if self.mask is not None:
            denoised = self.init_latent * self.mask + self.nmask * denoised
@ -250,6 +292,7 @@ class KDiffusionSampler:
        self.stop_at = None
        self.eta = None
        self.default_eta = 1.0
+        self.config = None

    def callback_state(self, d):
        store_latent(d["denoised"])
@ -295,9 +338,11 @@ class KDiffusionSampler:
        steps, t_enc = setup_img2img_steps(p, steps)

        if p.sampler_noise_scheduler_override:
-          sigmas = p.sampler_noise_scheduler_override(steps)
+            sigmas = p.sampler_noise_scheduler_override(steps)
+        elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
+            sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=0.1, sigma_max=10, device=shared.device)
        else:
-          sigmas = self.model_wrap.get_sigmas(steps)
+            sigmas = self.model_wrap.get_sigmas(steps)

        noise = noise * sigmas[steps - t_enc - 1]
        xi = x + noise
@ -314,9 +359,12 @@ class KDiffusionSampler:
        steps = steps or p.steps

        if p.sampler_noise_scheduler_override:
-          sigmas = p.sampler_noise_scheduler_override(steps)
+            sigmas = p.sampler_noise_scheduler_override(steps)
+        elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
+            sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=0.1, sigma_max=10, device=shared.device)
        else:
-          sigmas = self.model_wrap.get_sigmas(steps)
+            sigmas = self.model_wrap.get_sigmas(steps)
+
        x = x * sigmas[0]

        extra_params_kwargs = self.initialize(p)
--- a/modules/shared.py
+++ b/modules/shared.py
@ -13,11 +13,11 @@ import modules.memmon
 import modules.sd_models
 import modules.styles
 import modules.devices as devices
-from modules.paths import script_path, sd_path
+from modules import sd_samplers, hypernetwork
+from modules.paths import models_path, script_path, sd_path

 sd_model_file = os.path.join(script_path, 'model.ckpt')
 default_sd_model_file = sd_model_file
-model_path = os.path.join(script_path, 'models')
 parser = argparse.ArgumentParser()
 parser.add_argument("--config", type=str, default=os.path.join(sd_path, "configs/stable-diffusion/v1-inference.yaml"), help="path to config which constructs model",)
 parser.add_argument("--ckpt", type=str, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",)
@ -35,14 +35,14 @@ parser.add_argument("--always-batch-cond-uncond", action='store_true', help="dis
 parser.add_argument("--unload-gfpgan", action='store_true', help="does not do anything.")
 parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast")
 parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site (doesn't work for me but you might have better luck)")
-parser.add_argument("--codeformer-models-path", type=str, help="Path to directory with codeformer model file(s).", default=os.path.join(model_path, 'Codeformer'))
-parser.add_argument("--gfpgan-models-path", type=str, help="Path to directory with GFPGAN model file(s).", default=os.path.join(model_path, 'GFPGAN'))
-parser.add_argument("--esrgan-models-path", type=str, help="Path to directory with ESRGAN model file(s).", default=os.path.join(model_path, 'ESRGAN'))
-parser.add_argument("--bsrgan-models-path", type=str, help="Path to directory with BSRGAN model file(s).", default=os.path.join(model_path, 'BSRGAN'))
-parser.add_argument("--realesrgan-models-path", type=str, help="Path to directory with RealESRGAN model file(s).", default=os.path.join(model_path, 'RealESRGAN'))
-parser.add_argument("--scunet-models-path", type=str, help="Path to directory with ScuNET model file(s).", default=os.path.join(model_path, 'ScuNET'))
-parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(model_path, 'SwinIR'))
-parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(model_path, 'LDSR'))
+parser.add_argument("--codeformer-models-path", type=str, help="Path to directory with codeformer model file(s).", default=os.path.join(models_path, 'Codeformer'))
+parser.add_argument("--gfpgan-models-path", type=str, help="Path to directory with GFPGAN model file(s).", default=os.path.join(models_path, 'GFPGAN'))
+parser.add_argument("--esrgan-models-path", type=str, help="Path to directory with ESRGAN model file(s).", default=os.path.join(models_path, 'ESRGAN'))
+parser.add_argument("--bsrgan-models-path", type=str, help="Path to directory with BSRGAN model file(s).", default=os.path.join(models_path, 'BSRGAN'))
+parser.add_argument("--realesrgan-models-path", type=str, help="Path to directory with RealESRGAN model file(s).", default=os.path.join(models_path, 'RealESRGAN'))
+parser.add_argument("--scunet-models-path", type=str, help="Path to directory with ScuNET model file(s).", default=os.path.join(models_path, 'ScuNET'))
+parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(models_path, 'SwinIR'))
+parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
 parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
 parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
 parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
@ -55,6 +55,7 @@ parser.add_argument("--hide-ui-dir-config", action='store_true', help="hide dire
 parser.add_argument("--ui-settings-file", type=str, help="filename to use for ui settings", default=os.path.join(script_path, 'config.json'))
 parser.add_argument("--gradio-debug",  action='store_true', help="launch gradio with --debug option")
 parser.add_argument("--gradio-auth", type=str, help='set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None)
+parser.add_argument("--gradio-img2img-tool", type=str, help='gradio image uploader tool: can be either editor for ctopping, or color-sketch for drawing', choices=["color-sketch", "editor"], default="editor")
 parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last")
 parser.add_argument("--styles-file", type=str, help="filename to use for styles", default=os.path.join(script_path, 'styles.csv'))
 parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False)
@ -75,6 +76,12 @@ parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram

 config_filename = cmd_opts.ui_settings_file

+hypernetworks = hypernetwork.load_hypernetworks(os.path.join(models_path, 'hypernetworks'))
+
+
+def selected_hypernetwork():
+    return hypernetworks.get(opts.sd_hypernetwork, None)
+

 class State:
    interrupted = False
@ -205,6 +212,7 @@ options_templates.update(options_section(('system', "System"), {

 options_templates.update(options_section(('sd', "Stable Diffusion"), {
    "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": modules.sd_models.checkpoint_tiles()}),
+    "sd_hypernetwork": OptionInfo("None", "Stable Diffusion finetune hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}),
    "img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."),
    "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"),
    "img2img_fix_steps": OptionInfo(False, "With img2img, do exactly the amount of steps the slider specifies (normally you'd do less with less denoising)."),
@ -234,17 +242,20 @@ options_templates.update(options_section(('ui', "User interface"), {
    "font": OptionInfo("", "Font for image grids that have text"),
    "js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
    "js_modal_lightbox_initialy_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
+    "show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
 }))

 options_templates.update(options_section(('sampler-params', "Sampler parameters"), {
-  "eta_ddim": OptionInfo(0.0, "eta (noise multiplier) for DDIM", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
-  "eta_ancestral": OptionInfo(1.0, "eta (noise multiplier) for ancestral samplers", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
-  "ddim_discretize": OptionInfo('uniform', "img2img DDIM discretize", gr.Radio, {"choices": ['uniform', 'quad']}),
-  's_churn': OptionInfo(0.0, "sigma churn", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
-  's_tmin':  OptionInfo(0.0, "sigma tmin",  gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
-  's_noise': OptionInfo(1.0, "sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    "hide_samplers": OptionInfo([], "Hide samplers in user interface (requires restart)", gr.CheckboxGroup, lambda: {"choices": [x.name for x in sd_samplers.all_samplers]}),
+    "eta_ddim": OptionInfo(0.0, "eta (noise multiplier) for DDIM", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    "eta_ancestral": OptionInfo(1.0, "eta (noise multiplier) for ancestral samplers", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    "ddim_discretize": OptionInfo('uniform', "img2img DDIM discretize", gr.Radio, {"choices": ['uniform', 'quad']}),
+    's_churn': OptionInfo(0.0, "sigma churn", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    's_tmin':  OptionInfo(0.0, "sigma tmin",  gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    's_noise': OptionInfo(1.0, "sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
 }))

+
 class Options:
    data = None
    data_labels = options_templates
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@ -1,5 +1,7 @@
 import os
 from PIL import Image, ImageOps
+import platform
+import sys
 import tqdm

 from modules import shared, images
@ -10,7 +12,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
    src = os.path.abspath(process_src)
    dst = os.path.abspath(process_dst)

-    assert src != dst, 'same directory specified as source and desitnation'
+    assert src != dst, 'same directory specified as source and destination'

    os.makedirs(dst, exist_ok=True)

@ -25,6 +27,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
    def save_pic_with_caption(image, index):
        if process_caption:
            caption = "-" + shared.interrogator.generate_caption(image)
+            caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png")
        else:
            caption = filename
            caption = os.path.splitext(caption)[0]
@ -75,3 +78,27 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca

    if process_caption:
        shared.interrogator.send_blip_to_ram()
+
+def sanitize_caption(base_path, original_caption, suffix):
+    operating_system = platform.system().lower()
+    if (operating_system == "windows"):
+        invalid_path_characters = "\\/:*?\"<>|"
+        max_path_length = 259
+    else:
+        invalid_path_characters = "/" #linux/macos
+        max_path_length = 1023
+    caption = original_caption
+    for invalid_character in invalid_path_characters:
+        caption = caption.replace(invalid_character, "")
+    fixed_path_length = len(base_path) + len(suffix) 
+    if fixed_path_length + len(caption) <= max_path_length:
+        return caption
+    caption_tokens = caption.split()
+    new_caption = ""
+    for token in caption_tokens:
+        last_caption = new_caption
+        new_caption = new_caption + token + " "
+        if (len(new_caption) + fixed_path_length - 1  > max_path_length):
+            break
+    print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr)
+    return last_caption.strip()
--- a/modules/ui.py
+++ b/modules/ui.py
@ -35,8 +35,8 @@ import modules.gfpgan_model
 import modules.codeformer_model
 import modules.styles
 import modules.generation_parameters_copypaste
-from modules.prompt_parser import get_learned_conditioning_prompt_schedules
-from modules.images import apply_filename_pattern, get_next_sequence_number
+from modules import prompt_parser
+from modules.images import save_image
 import modules.textual_inversion.ui

 # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI
@ -115,20 +115,13 @@ def save_files(js_data, images, index):
    p = MyObject(data)
    path = opts.outdir_save
    save_to_dirs = opts.use_save_to_dirs_for_ui
-
-    if save_to_dirs:
-        dirname = apply_filename_pattern(opts.directories_filename_pattern or "[prompt_words]", p, p.seed, p.prompt)
-        path = os.path.join(opts.outdir_save, dirname)
-
-    os.makedirs(path, exist_ok=True)
-  
+    extension: str = opts.samples_format
+    start_index = 0

    if index > -1 and opts.save_selected_only and (index >= data["index_of_first_image"]):  # ensures we are looking at a specific non-grid picture, and we have save_selected_only

        images = [images[index]]
-        infotexts = [data["infotexts"][index]]
-    else:
-        infotexts = data["infotexts"]
+        start_index = index

    with open(os.path.join(opts.outdir_save, "log.csv"), "a", encoding="utf8", newline='') as file:
        at_start = file.tell() == 0
@ -136,37 +129,18 @@ def save_files(js_data, images, index):
        if at_start:
            writer.writerow(["prompt", "seed", "width", "height", "sampler", "cfgs", "steps", "filename", "negative_prompt"])

-        file_decoration = opts.samples_filename_pattern or "[seed]-[prompt_spaces]"
-        if file_decoration != "":
-            file_decoration = "-" + file_decoration.lower()
-        file_decoration = apply_filename_pattern(file_decoration, p, p.seed, p.prompt)
-        truncated = (file_decoration[:240] + '..') if len(file_decoration) > 240 else file_decoration
-        filename_base = truncated
-        extension = opts.samples_format.lower()
-
-        basecount = get_next_sequence_number(path, "")
-        for i, filedata in enumerate(images):
-            file_number = f"{basecount+i:05}"
-            filename = file_number + filename_base + f".{extension}"
-            filepath = os.path.join(path, filename)
-
-
+        for image_index, filedata in enumerate(images, start_index):
            if filedata.startswith("data:image/png;base64,"):
                filedata = filedata[len("data:image/png;base64,"):]

            image = Image.open(io.BytesIO(base64.decodebytes(filedata.encode('utf-8'))))
-            if opts.enable_pnginfo and extension == 'png':
-                pnginfo = PngImagePlugin.PngInfo()
-                pnginfo.add_text('parameters', infotexts[i])
-                image.save(filepath, pnginfo=pnginfo)
-            else:
-                image.save(filepath, quality=opts.jpeg_quality)

-            if opts.enable_pnginfo and extension in ("jpg", "jpeg", "webp"):
-                piexif.insert(piexif.dump({"Exif": {
-                    piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(infotexts[i], encoding="unicode")
-                }}), filepath)
+            is_grid = image_index < p.index_of_first_image
+            i = 0 if is_grid else (image_index - p.index_of_first_image)

+            fullfn = save_image(image, path, "", seed=p.all_seeds[i], prompt=p.all_prompts[i], extension=extension, info=p.infotexts[image_index], grid=is_grid, p=p, save_to_dirs=save_to_dirs)
+
+            filename = os.path.relpath(fullfn, path)
            filenames.append(filename)

        writer.writerow([data["prompt"], data["seed"], data["width"], data["height"], data["sampler"], data["cfg_scale"], data["steps"], filenames[0], data["negative_prompt"]])
@ -197,6 +171,11 @@ def wrap_gradio_call(func, extra_outputs=None):
            res = extra_outputs_array + [f"<div class='error'>{plaintext_to_html(type(e).__name__+': '+str(e))}</div>"]

        elapsed = time.perf_counter() - t
+        elapsed_m = int(elapsed // 60)
+        elapsed_s = elapsed % 60
+        elapsed_text = f"{elapsed_s:.2f}s"
+        if (elapsed_m > 0):
+            elapsed_text = f"{elapsed_m}m "+elapsed_text

        if run_memmon:
            mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()}
@ -211,7 +190,7 @@ def wrap_gradio_call(func, extra_outputs=None):
            vram_html = ''

        # last item is always HTML
-        res[-1] += f"<div class='performance'><p class='time'>Time taken: <wbr>{elapsed:.2f}s</p>{vram_html}</div>"
+        res[-1] += f"<div class='performance'><p class='time'>Time taken: <wbr>{elapsed_text}</p>{vram_html}</div>"

        shared.state.interrupted = False
        shared.state.job_count = 0
@ -395,7 +374,9 @@ def connect_reuse_seed(seed: gr.Number, reuse_seed: gr.Button, generation_info:

 def update_token_counter(text, steps):
    try:
-        prompt_schedules = get_learned_conditioning_prompt_schedules([text], steps)
+        _, prompt_flat_list, _ = prompt_parser.get_multicond_prompt_list([text])
+        prompt_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(prompt_flat_list, steps)
+
    except Exception:
        # a parsing error can happen here during typing, and we don't want to bother the user with
        # messages related to it in console
@ -652,7 +633,7 @@ def create_ui(wrap_gradio_gpu_call):

                with gr.Tabs(elem_id="mode_img2img") as tabs_img2img_mode:
                    with gr.TabItem('img2img', id='img2img'):
-                        init_img = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil")
+                        init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_img2img_tool)

                    with gr.TabItem('Inpaint', id='inpaint'):
                        init_img_with_mask = gr.Image(label="Image for inpainting with mask",  show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA")
@ -1219,6 +1200,7 @@ def create_ui(wrap_gradio_gpu_call):
        )

        def request_restart():
+            shared.state.interrupt()
            settings_interface.gradio_ref.do_restart = True

        restart_gradio.click(
--- a/scripts/img2imgalt.py
+++ b/scripts/img2imgalt.py
@ -8,7 +8,6 @@ import gradio as gr

 from modules import processing, shared, sd_samplers, prompt_parser
 from modules.processing import Processed
-from modules.sd_samplers import samplers
 from modules.shared import opts, cmd_opts, state

 import torch
@ -159,7 +158,7 @@ class Script(scripts.Script):
            
            combined_noise = ((1 - randomness) * rec_noise + randomness * rand_noise) / ((randomness**2 + (1-randomness)**2) ** 0.5)
            
-            sampler = samplers[p.sampler_index].constructor(p.sd_model)
+            sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, p.sampler_index, p.sd_model)

            sigmas = sampler.model_wrap.get_sigmas(p.steps)
            
--- a/scripts/outpainting_mk_2.py
+++ b/scripts/outpainting_mk_2.py
@ -85,8 +85,11 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q=1, color_variation=0.0
    src_dist = np.absolute(src_fft)
    src_phase = src_fft / src_dist

+    # create a generator with a static seed to make outpainting deterministic / only follow global seed
+    rng = np.random.default_rng(0)
+
    noise_window = _get_gaussian_window(width, height, mode=1)  # start with simple gaussian noise
-    noise_rgb = np.random.random_sample((width, height, num_channels))
+    noise_rgb = rng.random((width, height, num_channels))
    noise_grey = (np.sum(noise_rgb, axis=2) / 3.)
    noise_rgb *= color_variation  # the colorfulness of the starting noise is blended to greyscale with a parameter
    for c in range(num_channels):
--- a/scripts/xy_grid.py
+++ b/scripts/xy_grid.py
@ -1,8 +1,9 @@
 from collections import namedtuple
 from copy import copy
-from itertools import permutations
+from itertools import permutations, chain
 import random
-
+import csv
+from io import StringIO
 from PIL import Image
 import numpy as np

@ -76,6 +77,11 @@ def apply_checkpoint(p, x, xs):
    modules.sd_models.reload_model_weights(shared.sd_model, info)


+def apply_hypernetwork(p, x, xs):
+    hn = shared.hypernetworks.get(x, None)
+    opts.data["sd_hypernetwork"] = hn.name if hn is not None else 'None'
+
+
 def format_value_add_label(p, opt, x):
    if type(x) == float:
        x = round(x, 8)
@ -121,6 +127,7 @@ axis_options = [
    AxisOption("Prompt order", str_permutations, apply_order, format_value_join_list),
    AxisOption("Sampler", str, apply_sampler, format_value),
    AxisOption("Checkpoint name", str, apply_checkpoint, format_value),
+    AxisOption("Hypernetwork", str, apply_hypernetwork, format_value),
    AxisOption("Sigma Churn", float, apply_field("s_churn"), format_value_add_label),
    AxisOption("Sigma min", float, apply_field("s_tmin"), format_value_add_label),
    AxisOption("Sigma max", float, apply_field("s_tmax"), format_value_add_label),
@ -168,7 +175,6 @@ re_range_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d
 re_range_count = re.compile(r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\[(\d+)\s*\])?\s*")
 re_range_count_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d*)?)(?:\s*\[(\d+(?:.\d*)?)\s*\])?\s*")

-
 class Script(scripts.Script):
    def title(self):
        return "X/Y plot"
@ -193,11 +199,13 @@ class Script(scripts.Script):
        modules.processing.fix_seed(p)
        p.batch_size = 1

+        initial_hn = opts.sd_hypernetwork
+
        def process_axis(opt, vals):
            if opt.label == 'Nothing':
                return [0]

-            valslist = [x.strip() for x in vals.split(",")]
+            valslist = [x.strip() for x in chain.from_iterable(csv.reader(StringIO(vals)))]

            if opt.type == int:
                valslist_ext = []
@ -300,4 +308,6 @@ class Script(scripts.Script):
        # restore checkpoint in case it was changed by axes
        modules.sd_models.reload_model_weights(shared.sd_model)

+        opts.data["sd_hypernetwork"] = initial_hn
+
        return processed
--- a/style.css
+++ b/style.css
@ -408,3 +408,11 @@ input[type="range"]{
 .red {
 	color: red;
 }
+
+.gallery-item {
+    --tw-bg-opacity: 0 !important;
+}
+
+#img2img_image div.h-60{
+    height: 480px;
+}
--- a/webui.py
+++ b/webui.py
@ -2,11 +2,12 @@ import os
 import threading
 import time
 import importlib
-from modules import devices
-from modules.paths import script_path
 import signal
 import threading

+from modules.paths import script_path
+
+from modules import devices, sd_samplers
 import modules.codeformer_model as codeformer
 import modules.extras
 import modules.face_restoration
@ -109,6 +110,8 @@ def webui():
                time.sleep(0.5)
                break

+        sd_samplers.set_samplers()
+
        print('Reloading Custom Scripts')
        modules.scripts.reload_scripts(os.path.join(script_path, "scripts"))
        print('Reloading modules: modules.ui')