Merge branch 'master' into varsize

This commit is contained in:
AUTOMATIC1111 2023-01-09 22:45:39 +03:00 committed by GitHub
commit 18c001792a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 294 additions and 119 deletions

View file

@ -1,7 +1,7 @@
name: Feature request name: Feature request
description: Suggest an idea for this project description: Suggest an idea for this project
title: "[Feature Request]: " title: "[Feature Request]: "
labels: ["suggestion"] labels: ["enhancement"]
body: body:
- type: checkboxes - type: checkboxes

View file

@ -19,22 +19,19 @@ jobs:
- name: Checkout Code - name: Checkout Code
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Set up Python 3.10 - name: Set up Python 3.10
uses: actions/setup-python@v3 uses: actions/setup-python@v4
with: with:
python-version: 3.10.6 python-version: 3.10.6
- uses: actions/cache@v2 cache: pip
with: cache-dependency-path: |
path: ~/.cache/pip **/requirements*txt
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install PyLint - name: Install PyLint
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pylint pip install pylint
# This lets PyLint check to see if it can resolve imports # This lets PyLint check to see if it can resolve imports
- name: Install dependencies - name: Install dependencies
run : | run: |
export COMMANDLINE_ARGS="--skip-torch-cuda-test --exit" export COMMANDLINE_ARGS="--skip-torch-cuda-test --exit"
python launch.py python launch.py
- name: Analysing the code with pylint - name: Analysing the code with pylint

View file

@ -14,11 +14,9 @@ jobs:
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: 3.10.6 python-version: 3.10.6
- uses: actions/cache@v3 cache: pip
with: cache-dependency-path: |
path: ~/.cache/pip **/requirements*txt
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: ${{ runner.os }}-pip-
- name: Run tests - name: Run tests
run: python launch.py --tests basic_features --no-half --disable-opt-split-attention --use-cpu all --skip-torch-cuda-test run: python launch.py --tests basic_features --no-half --disable-opt-split-attention --use-cpu all --skip-torch-cuda-test
- name: Upload main app stdout-stderr - name: Upload main app stdout-stderr

View file

@ -4,7 +4,7 @@ titles = {
"Sampling steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results", "Sampling steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results",
"Sampling method": "Which algorithm to use to produce the image", "Sampling method": "Which algorithm to use to produce the image",
"GFPGAN": "Restore low quality faces using GFPGAN neural network", "GFPGAN": "Restore low quality faces using GFPGAN neural network",
"Euler a": "Euler Ancestral - very creative, each can get a completely different picture depending on step count, setting steps to higher than 30-40 does not help", "Euler a": "Euler Ancestral - very creative, each can get a completely different picture depending on step count, setting steps higher than 30-40 does not help",
"DDIM": "Denoising Diffusion Implicit Models - best at inpainting", "DDIM": "Denoising Diffusion Implicit Models - best at inpainting",
"DPM adaptive": "Ignores step count - uses a number of steps determined by the CFG and resolution", "DPM adaptive": "Ignores step count - uses a number of steps determined by the CFG and resolution",
@ -74,7 +74,7 @@ titles = {
"Style 1": "Style to apply; styles have components for both positive and negative prompts and apply to both", "Style 1": "Style to apply; styles have components for both positive and negative prompts and apply to both",
"Style 2": "Style to apply; styles have components for both positive and negative prompts and apply to both", "Style 2": "Style to apply; styles have components for both positive and negative prompts and apply to both",
"Apply style": "Insert selected styles into prompt fields", "Apply style": "Insert selected styles into prompt fields",
"Create style": "Save current prompts as a style. If you add the token {prompt} to the text, the style use that as placeholder for your prompt when you use the style in the future.", "Create style": "Save current prompts as a style. If you add the token {prompt} to the text, the style uses that as a placeholder for your prompt when you use the style in the future.",
"Checkpoint name": "Loads weights from checkpoint before making images. You can either use hash or a part of filename (as seen in settings) for checkpoint name. Recommended to use with Y axis for less switching.", "Checkpoint name": "Loads weights from checkpoint before making images. You can either use hash or a part of filename (as seen in settings) for checkpoint name. Recommended to use with Y axis for less switching.",
"Inpainting conditioning mask strength": "Only applies to inpainting models. Determines how strongly to mask off the original image for inpainting and img2img. 1.0 means fully masked, which is the default behaviour. 0.0 means a fully unmasked conditioning. Lower values will help preserve the overall composition of the image, but will struggle with large changes.", "Inpainting conditioning mask strength": "Only applies to inpainting models. Determines how strongly to mask off the original image for inpainting and img2img. 1.0 means fully masked, which is the default behaviour. 0.0 means a fully unmasked conditioning. Lower values will help preserve the overall composition of the image, but will struggle with large changes.",
@ -92,12 +92,12 @@ titles = {
"Weighted sum": "Result = A * (1 - M) + B * M", "Weighted sum": "Result = A * (1 - M) + B * M",
"Add difference": "Result = A + (B - C) * M", "Add difference": "Result = A + (B - C) * M",
"Learning rate": "how fast should the training go. Low values will take longer to train, high values may fail to converge (not generate accurate results) and/or may break the embedding (This has happened if you see Loss: nan in the training info textbox. If this happens, you need to manually restore your embedding from an older not-broken backup).\n\nYou can set a single numeric value, or multiple learning rates using the syntax:\n\n rate_1:max_steps_1, rate_2:max_steps_2, ...\n\nEG: 0.005:100, 1e-3:1000, 1e-5\n\nWill train with rate of 0.005 for first 100 steps, then 1e-3 until 1000 steps, then 1e-5 for all remaining steps.", "Learning rate": "How fast should training go. Low values will take longer to train, high values may fail to converge (not generate accurate results) and/or may break the embedding (This has happened if you see Loss: nan in the training info textbox. If this happens, you need to manually restore your embedding from an older not-broken backup).\n\nYou can set a single numeric value, or multiple learning rates using the syntax:\n\n rate_1:max_steps_1, rate_2:max_steps_2, ...\n\nEG: 0.005:100, 1e-3:1000, 1e-5\n\nWill train with rate of 0.005 for first 100 steps, then 1e-3 until 1000 steps, then 1e-5 for all remaining steps.",
"Clip skip": "Early stopping parameter for CLIP model; 1 is stop at last layer as usual, 2 is stop at penultimate layer, etc.", "Clip skip": "Early stopping parameter for CLIP model; 1 is stop at last layer as usual, 2 is stop at penultimate layer, etc.",
"Approx NN": "Cheap neural network approximation. Very fast compared to VAE, but produces pictures with 4 times smaller horizontal/vertical resoluton and lower quality.", "Approx NN": "Cheap neural network approximation. Very fast compared to VAE, but produces pictures with 4 times smaller horizontal/vertical resolution and lower quality.",
"Approx cheap": "Very cheap approximation. Very fast compared to VAE, but produces pictures with 8 times smaller horizontal/vertical resoluton and extremely low quality.", "Approx cheap": "Very cheap approximation. Very fast compared to VAE, but produces pictures with 8 times smaller horizontal/vertical resolution and extremely low quality.",
"Hires. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition", "Hires. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition",
"Hires steps": "Number of sampling steps for upscaled picture. If 0, uses same as for original.", "Hires steps": "Number of sampling steps for upscaled picture. If 0, uses same as for original.",

25
javascript/hires_fix.js Normal file
View file

@ -0,0 +1,25 @@
function setInactive(elem, inactive){
console.log(elem)
if(inactive){
elem.classList.add('inactive')
} else{
elem.classList.remove('inactive')
}
}
function onCalcResolutionHires(enable, width, height, hr_scale, hr_resize_x, hr_resize_y){
console.log(enable, width, height, hr_scale, hr_resize_x, hr_resize_y)
hrUpscaleBy = gradioApp().getElementById('txt2img_hr_scale')
hrResizeX = gradioApp().getElementById('txt2img_hr_resize_x')
hrResizeY = gradioApp().getElementById('txt2img_hr_resize_y')
gradioApp().getElementById('txt2img_hires_fix_row2').style.display = opts.use_old_hires_fix_width_height ? "none" : ""
setInactive(hrUpscaleBy, opts.use_old_hires_fix_width_height || hr_resize_x > 0 || hr_resize_y > 0)
setInactive(hrResizeX, opts.use_old_hires_fix_width_height || hr_resize_x == 0)
setInactive(hrResizeY, opts.use_old_hires_fix_width_height || hr_resize_y == 0)
return [enable, width, height, hr_scale, hr_resize_x, hr_resize_y]
}

View file

@ -11,7 +11,7 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest from secrets import compare_digest
import modules.shared as shared import modules.shared as shared
from modules import sd_samplers, deepbooru, sd_hijack, images from modules import sd_samplers, deepbooru, sd_hijack, images, scripts, ui
from modules.api.models import * from modules.api.models import *
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.extras import run_extras from modules.extras import run_extras
@ -28,8 +28,13 @@ def upscaler_to_index(name: str):
try: try:
return [x.name.lower() for x in shared.sd_upscalers].index(name.lower()) return [x.name.lower() for x in shared.sd_upscalers].index(name.lower())
except: except:
raise HTTPException(status_code=400, detail=f"Invalid upscaler, needs to be on of these: {' , '.join([x.name for x in sd_upscalers])}") raise HTTPException(status_code=400, detail=f"Invalid upscaler, needs to be one of these: {' , '.join([x.name for x in sd_upscalers])}")
def script_name_to_index(name, scripts):
try:
return [script.title().lower() for script in scripts].index(name.lower())
except:
raise HTTPException(status_code=422, detail=f"Script '{name}' not found")
def validate_sampler_name(name): def validate_sampler_name(name):
config = sd_samplers.all_samplers_map.get(name, None) config = sd_samplers.all_samplers_map.get(name, None)
@ -143,7 +148,21 @@ class Api:
raise HTTPException(status_code=401, detail="Incorrect username or password", headers={"WWW-Authenticate": "Basic"}) raise HTTPException(status_code=401, detail="Incorrect username or password", headers={"WWW-Authenticate": "Basic"})
def get_script(self, script_name, script_runner):
if script_name is None:
return None, None
if not script_runner.scripts:
script_runner.initialize_scripts(False)
ui.create_ui()
script_idx = script_name_to_index(script_name, script_runner.selectable_scripts)
script = script_runner.selectable_scripts[script_idx]
return script, script_idx
def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI): def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
script, script_idx = self.get_script(txt2imgreq.script_name, scripts.scripts_txt2img)
populate = txt2imgreq.copy(update={ # Override __init__ params populate = txt2imgreq.copy(update={ # Override __init__ params
"sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index), "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
"do_not_save_samples": True, "do_not_save_samples": True,
@ -153,14 +172,22 @@ class Api:
if populate.sampler_name: if populate.sampler_name:
populate.sampler_index = None # prevent a warning later on populate.sampler_index = None # prevent a warning later on
args = vars(populate)
args.pop('script_name', None)
with self.queue_lock: with self.queue_lock:
p = StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **vars(populate)) p = StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **args)
shared.state.begin() shared.state.begin()
processed = process_images(p) if script is not None:
p.outpath_grids = opts.outdir_txt2img_grids
p.outpath_samples = opts.outdir_txt2img_samples
p.script_args = [script_idx + 1] + [None] * (script.args_from - 1) + p.script_args
processed = scripts.scripts_txt2img.run(p, *p.script_args)
else:
processed = process_images(p)
shared.state.end() shared.state.end()
b64images = list(map(encode_pil_to_base64, processed.images)) b64images = list(map(encode_pil_to_base64, processed.images))
return TextToImageResponse(images=b64images, parameters=vars(txt2imgreq), info=processed.js()) return TextToImageResponse(images=b64images, parameters=vars(txt2imgreq), info=processed.js())
@ -170,6 +197,8 @@ class Api:
if init_images is None: if init_images is None:
raise HTTPException(status_code=404, detail="Init image not found") raise HTTPException(status_code=404, detail="Init image not found")
script, script_idx = self.get_script(img2imgreq.script_name, scripts.scripts_img2img)
mask = img2imgreq.mask mask = img2imgreq.mask
if mask: if mask:
mask = decode_base64_to_image(mask) mask = decode_base64_to_image(mask)
@ -186,13 +215,20 @@ class Api:
args = vars(populate) args = vars(populate)
args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine. args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
args.pop('script_name', None)
with self.queue_lock: with self.queue_lock:
p = StableDiffusionProcessingImg2Img(sd_model=shared.sd_model, **args) p = StableDiffusionProcessingImg2Img(sd_model=shared.sd_model, **args)
p.init_images = [decode_base64_to_image(x) for x in init_images] p.init_images = [decode_base64_to_image(x) for x in init_images]
shared.state.begin() shared.state.begin()
processed = process_images(p) if script is not None:
p.outpath_grids = opts.outdir_img2img_grids
p.outpath_samples = opts.outdir_img2img_samples
p.script_args = [script_idx + 1] + [None] * (script.args_from - 1) + p.script_args
processed = scripts.scripts_img2img.run(p, *p.script_args)
else:
processed = process_images(p)
shared.state.end() shared.state.end()
b64images = list(map(encode_pil_to_base64, processed.images)) b64images = list(map(encode_pil_to_base64, processed.images))

View file

@ -100,13 +100,13 @@ class PydanticModelGenerator:
StableDiffusionTxt2ImgProcessingAPI = PydanticModelGenerator( StableDiffusionTxt2ImgProcessingAPI = PydanticModelGenerator(
"StableDiffusionProcessingTxt2Img", "StableDiffusionProcessingTxt2Img",
StableDiffusionProcessingTxt2Img, StableDiffusionProcessingTxt2Img,
[{"key": "sampler_index", "type": str, "default": "Euler"}] [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "script_name", "type": str, "default": None}, {"key": "script_args", "type": list, "default": []}]
).generate_model() ).generate_model()
StableDiffusionImg2ImgProcessingAPI = PydanticModelGenerator( StableDiffusionImg2ImgProcessingAPI = PydanticModelGenerator(
"StableDiffusionProcessingImg2Img", "StableDiffusionProcessingImg2Img",
StableDiffusionProcessingImg2Img, StableDiffusionProcessingImg2Img,
[{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "init_images", "type": list, "default": None}, {"key": "denoising_strength", "type": float, "default": 0.75}, {"key": "mask", "type": str, "default": None}, {"key": "include_init_images", "type": bool, "default": False, "exclude" : True}] [{"key": "sampler_index", "type": str, "default": "Euler"}, {"key": "init_images", "type": list, "default": None}, {"key": "denoising_strength", "type": float, "default": 0.75}, {"key": "mask", "type": str, "default": None}, {"key": "include_init_images", "type": bool, "default": False, "exclude" : True}, {"key": "script_name", "type": str, "default": None}, {"key": "script_args", "type": list, "default": []}]
).generate_model() ).generate_model()
class TextToImageResponse(BaseModel): class TextToImageResponse(BaseModel):

View file

@ -197,6 +197,15 @@ def restore_old_hires_fix_params(res):
firstpass_width = res.get('First pass size-1', None) firstpass_width = res.get('First pass size-1', None)
firstpass_height = res.get('First pass size-2', None) firstpass_height = res.get('First pass size-2', None)
if shared.opts.use_old_hires_fix_width_height:
hires_width = int(res.get("Hires resize-1", None))
hires_height = int(res.get("Hires resize-2", None))
if hires_width is not None and hires_height is not None:
res['Size-1'] = hires_width
res['Size-2'] = hires_height
return
if firstpass_width is None or firstpass_height is None: if firstpass_width is None or firstpass_height is None:
return return
@ -205,12 +214,8 @@ def restore_old_hires_fix_params(res):
height = int(res.get("Size-2", 512)) height = int(res.get("Size-2", 512))
if firstpass_width == 0 or firstpass_height == 0: if firstpass_width == 0 or firstpass_height == 0:
# old algorithm for auto-calculating first pass size from modules import processing
desired_pixel_count = 512 * 512 firstpass_width, firstpass_height = processing.old_hires_fix_first_pass_dimensions(width, height)
actual_pixel_count = width * height
scale = math.sqrt(desired_pixel_count / actual_pixel_count)
firstpass_width = math.ceil(scale * width / 64) * 64
firstpass_height = math.ceil(scale * height / 64) * 64
res['Size-1'] = firstpass_width res['Size-1'] = firstpass_width
res['Size-2'] = firstpass_height res['Size-2'] = firstpass_height

View file

@ -98,7 +98,7 @@ class StableDiffusionProcessing():
""" """
The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
""" """
def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None): def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None, script_args: list = None):
if sampler_index is not None: if sampler_index is not None:
print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr) print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
@ -149,7 +149,7 @@ class StableDiffusionProcessing():
self.seed_resize_from_w = 0 self.seed_resize_from_w = 0
self.scripts = None self.scripts = None
self.script_args = None self.script_args = script_args
self.all_prompts = None self.all_prompts = None
self.all_negative_prompts = None self.all_negative_prompts = None
self.all_seeds = None self.all_seeds = None
@ -687,6 +687,18 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
return res return res
def old_hires_fix_first_pass_dimensions(width, height):
"""old algorithm for auto-calculating first pass size"""
desired_pixel_count = 512 * 512
actual_pixel_count = width * height
scale = math.sqrt(desired_pixel_count / actual_pixel_count)
width = math.ceil(scale * width / 64) * 64
height = math.ceil(scale * height / 64) * 64
return width, height
class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
sampler = None sampler = None
@ -703,16 +715,26 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
self.hr_upscale_to_y = hr_resize_y self.hr_upscale_to_y = hr_resize_y
if firstphase_width != 0 or firstphase_height != 0: if firstphase_width != 0 or firstphase_height != 0:
print("firstphase_width/firstphase_height no longer supported; use hr_scale", file=sys.stderr) self.hr_upscale_to_x = self.width
self.hr_scale = self.width / firstphase_width self.hr_upscale_to_y = self.height
self.width = firstphase_width self.width = firstphase_width
self.height = firstphase_height self.height = firstphase_height
self.truncate_x = 0 self.truncate_x = 0
self.truncate_y = 0 self.truncate_y = 0
self.applied_old_hires_behavior_to = None
def init(self, all_prompts, all_seeds, all_subseeds): def init(self, all_prompts, all_seeds, all_subseeds):
if self.enable_hr: if self.enable_hr:
if opts.use_old_hires_fix_width_height and self.applied_old_hires_behavior_to != (self.width, self.height):
self.hr_resize_x = self.width
self.hr_resize_y = self.height
self.hr_upscale_to_x = self.width
self.hr_upscale_to_y = self.height
self.width, self.height = old_hires_fix_first_pass_dimensions(self.width, self.height)
self.applied_old_hires_behavior_to = (self.width, self.height)
if self.hr_resize_x == 0 and self.hr_resize_y == 0: if self.hr_resize_x == 0 and self.hr_resize_y == 0:
self.extra_generation_params["Hires upscale"] = self.hr_scale self.extra_generation_params["Hires upscale"] = self.hr_scale
self.hr_upscale_to_x = int(self.width * self.hr_scale) self.hr_upscale_to_x = int(self.width * self.hr_scale)

View file

@ -83,10 +83,12 @@ class StableDiffusionModelHijack:
clip = None clip = None
optimization_method = None optimization_method = None
embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir) embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase()
def __init__(self):
self.embedding_db.add_embedding_dir(cmd_opts.embeddings_dir)
def hijack(self, m): def hijack(self, m):
if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation: if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
model_embeddings = m.cond_stage_model.roberta.embeddings model_embeddings = m.cond_stage_model.roberta.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self) model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
@ -117,7 +119,6 @@ class StableDiffusionModelHijack:
self.layers = flatten(m) self.layers = flatten(m)
def undo_hijack(self, m): def undo_hijack(self, m):
if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation: if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
m.cond_stage_model = m.cond_stage_model.wrapped m.cond_stage_model = m.cond_stage_model.wrapped

View file

@ -247,9 +247,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
# restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
batch_multipliers = torch.asarray(batch_multipliers).to(devices.device) batch_multipliers = torch.asarray(batch_multipliers).to(devices.device)
original_mean = z.mean() original_mean = z.mean()
z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape) z = z * batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
new_mean = z.mean() new_mean = z.mean()
z *= original_mean / new_mean z = z * (original_mean / new_mean)
return z return z

View file

@ -1,8 +1,9 @@
import torch import torch
import safetensors.torch
import os import os
import collections import collections
from collections import namedtuple from collections import namedtuple
from modules import shared, devices, script_callbacks from modules import shared, devices, script_callbacks, sd_models
from modules.paths import models_path from modules.paths import models_path
import glob import glob
from copy import deepcopy from copy import deepcopy
@ -72,8 +73,10 @@ def refresh_vae_list(vae_path=vae_path, model_path=model_path):
candidates = [ candidates = [
*glob.iglob(os.path.join(model_path, '**/*.vae.ckpt'), recursive=True), *glob.iglob(os.path.join(model_path, '**/*.vae.ckpt'), recursive=True),
*glob.iglob(os.path.join(model_path, '**/*.vae.pt'), recursive=True), *glob.iglob(os.path.join(model_path, '**/*.vae.pt'), recursive=True),
*glob.iglob(os.path.join(model_path, '**/*.vae.safetensors'), recursive=True),
*glob.iglob(os.path.join(vae_path, '**/*.ckpt'), recursive=True), *glob.iglob(os.path.join(vae_path, '**/*.ckpt'), recursive=True),
*glob.iglob(os.path.join(vae_path, '**/*.pt'), recursive=True) *glob.iglob(os.path.join(vae_path, '**/*.pt'), recursive=True),
*glob.iglob(os.path.join(vae_path, '**/*.safetensors'), recursive=True),
] ]
if shared.cmd_opts.vae_path is not None and os.path.isfile(shared.cmd_opts.vae_path): if shared.cmd_opts.vae_path is not None and os.path.isfile(shared.cmd_opts.vae_path):
candidates.append(shared.cmd_opts.vae_path) candidates.append(shared.cmd_opts.vae_path)
@ -137,6 +140,12 @@ def resolve_vae(checkpoint_file=None, vae_file="auto"):
if os.path.isfile(vae_file_try): if os.path.isfile(vae_file_try):
vae_file = vae_file_try vae_file = vae_file_try
print(f"Using VAE found similar to selected model: {vae_file}") print(f"Using VAE found similar to selected model: {vae_file}")
# if still not found, try look for ".vae.safetensors" beside model
if vae_file == "auto":
vae_file_try = model_path + ".vae.safetensors"
if os.path.isfile(vae_file_try):
vae_file = vae_file_try
print(f"Using VAE found similar to selected model: {vae_file}")
# No more fallbacks for auto # No more fallbacks for auto
if vae_file == "auto": if vae_file == "auto":
vae_file = None vae_file = None
@ -163,8 +172,9 @@ def load_vae(model, vae_file=None):
assert os.path.isfile(vae_file), f"VAE file doesn't exist: {vae_file}" assert os.path.isfile(vae_file), f"VAE file doesn't exist: {vae_file}"
print(f"Loading VAE weights from: {vae_file}") print(f"Loading VAE weights from: {vae_file}")
store_base_vae(model) store_base_vae(model)
vae_ckpt = torch.load(vae_file, map_location=shared.weight_load_location)
vae_dict_1 = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss" and k not in vae_ignore_keys} vae_ckpt = sd_models.read_state_dict(vae_file, map_location=shared.weight_load_location)
vae_dict_1 = {k: v for k, v in vae_ckpt.items() if k[0:4] != "loss" and k not in vae_ignore_keys}
_load_vae_dict(model, vae_dict_1) _load_vae_dict(model, vae_dict_1)
if cache_enabled: if cache_enabled:
@ -195,10 +205,12 @@ def _load_vae_dict(model, vae_dict_1):
model.first_stage_model.load_state_dict(vae_dict_1) model.first_stage_model.load_state_dict(vae_dict_1)
model.first_stage_model.to(devices.dtype_vae) model.first_stage_model.to(devices.dtype_vae)
def clear_loaded_vae(): def clear_loaded_vae():
global loaded_vae_file global loaded_vae_file
loaded_vae_file = None loaded_vae_file = None
def reload_vae_weights(sd_model=None, vae_file="auto"): def reload_vae_weights(sd_model=None, vae_file="auto"):
from modules import lowvram, devices, sd_hijack from modules import lowvram, devices, sd_hijack

View file

@ -398,6 +398,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
options_templates.update(options_section(('compatibility', "Compatibility"), { options_templates.update(options_section(('compatibility', "Compatibility"), {
"use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."),
"use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."), "use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."),
"use_old_hires_fix_width_height": OptionInfo(False, "For hires fix, use width/height sliders to set final resolution rather than first pass (disables Upscale by, Resize width/height to)."),
})) }))
options_templates.update(options_section(('interrogate', "Interrogate Options"), { options_templates.update(options_section(('interrogate', "Interrogate Options"), {

View file

@ -15,7 +15,8 @@ import torch
from torch import Tensor from torch import Tensor
from torch.utils.checkpoint import checkpoint from torch.utils.checkpoint import checkpoint
import math import math
from typing import Optional, NamedTuple, Protocol, List from typing import Optional, NamedTuple, List
def narrow_trunc( def narrow_trunc(
input: Tensor, input: Tensor,
@ -25,12 +26,14 @@ def narrow_trunc(
) -> Tensor: ) -> Tensor:
return torch.narrow(input, dim, start, length if input.shape[dim] >= start + length else input.shape[dim] - start) return torch.narrow(input, dim, start, length if input.shape[dim] >= start + length else input.shape[dim] - start)
class AttnChunk(NamedTuple): class AttnChunk(NamedTuple):
exp_values: Tensor exp_values: Tensor
exp_weights_sum: Tensor exp_weights_sum: Tensor
max_score: Tensor max_score: Tensor
class SummarizeChunk(Protocol):
class SummarizeChunk:
@staticmethod @staticmethod
def __call__( def __call__(
query: Tensor, query: Tensor,
@ -38,7 +41,8 @@ class SummarizeChunk(Protocol):
value: Tensor, value: Tensor,
) -> AttnChunk: ... ) -> AttnChunk: ...
class ComputeQueryChunkAttn(Protocol):
class ComputeQueryChunkAttn:
@staticmethod @staticmethod
def __call__( def __call__(
query: Tensor, query: Tensor,
@ -46,6 +50,7 @@ class ComputeQueryChunkAttn(Protocol):
value: Tensor, value: Tensor,
) -> Tensor: ... ) -> Tensor: ...
def _summarize_chunk( def _summarize_chunk(
query: Tensor, query: Tensor,
key: Tensor, key: Tensor,
@ -66,6 +71,7 @@ def _summarize_chunk(
max_score = max_score.squeeze(-1) max_score = max_score.squeeze(-1)
return AttnChunk(exp_values, exp_weights.sum(dim=-1), max_score) return AttnChunk(exp_values, exp_weights.sum(dim=-1), max_score)
def _query_chunk_attention( def _query_chunk_attention(
query: Tensor, query: Tensor,
key: Tensor, key: Tensor,
@ -106,6 +112,7 @@ def _query_chunk_attention(
all_weights = torch.unsqueeze(chunk_weights, -1).sum(dim=0) all_weights = torch.unsqueeze(chunk_weights, -1).sum(dim=0)
return all_values / all_weights return all_values / all_weights
# TODO: refactor CrossAttention#get_attention_scores to share code with this # TODO: refactor CrossAttention#get_attention_scores to share code with this
def _get_attention_scores_no_kv_chunking( def _get_attention_scores_no_kv_chunking(
query: Tensor, query: Tensor,
@ -125,10 +132,12 @@ def _get_attention_scores_no_kv_chunking(
hidden_states_slice = torch.bmm(attn_probs, value) hidden_states_slice = torch.bmm(attn_probs, value)
return hidden_states_slice return hidden_states_slice
class ScannedChunk(NamedTuple): class ScannedChunk(NamedTuple):
chunk_idx: int chunk_idx: int
attn_chunk: AttnChunk attn_chunk: AttnChunk
def efficient_dot_product_attention( def efficient_dot_product_attention(
query: Tensor, query: Tensor,
key: Tensor, key: Tensor,

View file

@ -66,17 +66,41 @@ class Embedding:
return self.cached_checksum return self.cached_checksum
class DirWithTextualInversionEmbeddings:
def __init__(self, path):
self.path = path
self.mtime = None
def has_changed(self):
if not os.path.isdir(self.path):
return False
mt = os.path.getmtime(self.path)
if self.mtime is None or mt > self.mtime:
return True
def update(self):
if not os.path.isdir(self.path):
return
self.mtime = os.path.getmtime(self.path)
class EmbeddingDatabase: class EmbeddingDatabase:
def __init__(self, embeddings_dir): def __init__(self):
self.ids_lookup = {} self.ids_lookup = {}
self.word_embeddings = {} self.word_embeddings = {}
self.skipped_embeddings = {} self.skipped_embeddings = {}
self.dir_mtime = None
self.embeddings_dir = embeddings_dir
self.expected_shape = -1 self.expected_shape = -1
self.embedding_dirs = {}
def add_embedding_dir(self, path):
self.embedding_dirs[path] = DirWithTextualInversionEmbeddings(path)
def clear_embedding_dirs(self):
self.embedding_dirs.clear()
def register_embedding(self, embedding, model): def register_embedding(self, embedding, model):
self.word_embeddings[embedding.name] = embedding self.word_embeddings[embedding.name] = embedding
ids = model.cond_stage_model.tokenize([embedding.name])[0] ids = model.cond_stage_model.tokenize([embedding.name])[0]
@ -93,65 +117,62 @@ class EmbeddingDatabase:
vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1) vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1)
return vec.shape[1] return vec.shape[1]
def load_textual_inversion_embeddings(self, force_reload = False): def load_from_file(self, path, filename):
mt = os.path.getmtime(self.embeddings_dir) name, ext = os.path.splitext(filename)
if not force_reload and self.dir_mtime is not None and mt <= self.dir_mtime: ext = ext.upper()
return
self.dir_mtime = mt if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
self.ids_lookup.clear() _, second_ext = os.path.splitext(name)
self.word_embeddings.clear() if second_ext.upper() == '.PREVIEW':
self.skipped_embeddings.clear()
self.expected_shape = self.get_expected_shape()
def process_file(path, filename):
name, ext = os.path.splitext(filename)
ext = ext.upper()
if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
embed_image = Image.open(path)
if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
name = data.get('name', name)
else:
data = extract_image_data_embed(embed_image)
name = data.get('name', name)
elif ext in ['.BIN', '.PT']:
data = torch.load(path, map_location="cpu")
else:
return return
# textual inversion embeddings embed_image = Image.open(path)
if 'string_to_param' in data: if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
param_dict = data['string_to_param'] data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
if hasattr(param_dict, '_parameters'): name = data.get('name', name)
param_dict = getattr(param_dict, '_parameters') # fix for torch 1.12.1 loading saved file from torch 1.11
assert len(param_dict) == 1, 'embedding file has multiple terms in it'
emb = next(iter(param_dict.items()))[1]
# diffuser concepts
elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
emb = next(iter(data.values()))
if len(emb.shape) == 1:
emb = emb.unsqueeze(0)
else: else:
raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.") data = extract_image_data_embed(embed_image)
name = data.get('name', name)
elif ext in ['.BIN', '.PT']:
data = torch.load(path, map_location="cpu")
else:
return
vec = emb.detach().to(devices.device, dtype=torch.float32) # textual inversion embeddings
embedding = Embedding(vec, name) if 'string_to_param' in data:
embedding.step = data.get('step', None) param_dict = data['string_to_param']
embedding.sd_checkpoint = data.get('sd_checkpoint', None) if hasattr(param_dict, '_parameters'):
embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None) param_dict = getattr(param_dict, '_parameters') # fix for torch 1.12.1 loading saved file from torch 1.11
embedding.vectors = vec.shape[0] assert len(param_dict) == 1, 'embedding file has multiple terms in it'
embedding.shape = vec.shape[-1] emb = next(iter(param_dict.items()))[1]
# diffuser concepts
elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
if self.expected_shape == -1 or self.expected_shape == embedding.shape: emb = next(iter(data.values()))
self.register_embedding(embedding, shared.sd_model) if len(emb.shape) == 1:
else: emb = emb.unsqueeze(0)
self.skipped_embeddings[name] = embedding else:
raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
for root, dirs, fns in os.walk(self.embeddings_dir): vec = emb.detach().to(devices.device, dtype=torch.float32)
embedding = Embedding(vec, name)
embedding.step = data.get('step', None)
embedding.sd_checkpoint = data.get('sd_checkpoint', None)
embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
embedding.vectors = vec.shape[0]
embedding.shape = vec.shape[-1]
if self.expected_shape == -1 or self.expected_shape == embedding.shape:
self.register_embedding(embedding, shared.sd_model)
else:
self.skipped_embeddings[name] = embedding
def load_from_dir(self, embdir):
if not os.path.isdir(embdir.path):
return
for root, dirs, fns in os.walk(embdir.path):
for fn in fns: for fn in fns:
try: try:
fullfn = os.path.join(root, fn) fullfn = os.path.join(root, fn)
@ -159,12 +180,32 @@ class EmbeddingDatabase:
if os.stat(fullfn).st_size == 0: if os.stat(fullfn).st_size == 0:
continue continue
process_file(fullfn, fn) self.load_from_file(fullfn, fn)
except Exception: except Exception:
print(f"Error loading embedding {fn}:", file=sys.stderr) print(f"Error loading embedding {fn}:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr)
continue continue
def load_textual_inversion_embeddings(self, force_reload=False):
if not force_reload:
need_reload = False
for path, embdir in self.embedding_dirs.items():
if embdir.has_changed():
need_reload = True
break
if not need_reload:
return
self.ids_lookup.clear()
self.word_embeddings.clear()
self.skipped_embeddings.clear()
self.expected_shape = self.get_expected_shape()
for path, embdir in self.embedding_dirs.items():
self.load_from_dir(embdir)
embdir.update()
print(f"Textual inversion embeddings loaded({len(self.word_embeddings)}): {', '.join(self.word_embeddings.keys())}") print(f"Textual inversion embeddings loaded({len(self.word_embeddings)}): {', '.join(self.word_embeddings.keys())}")
if len(self.skipped_embeddings) > 0: if len(self.skipped_embeddings) > 0:
print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings.keys())}") print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings.keys())}")
@ -247,11 +288,11 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat
assert os.path.isfile(template_file), "Prompt template file doesn't exist" assert os.path.isfile(template_file), "Prompt template file doesn't exist"
assert steps, "Max steps is empty or 0" assert steps, "Max steps is empty or 0"
assert isinstance(steps, int), "Max steps must be integer" assert isinstance(steps, int), "Max steps must be integer"
assert steps > 0 , "Max steps must be positive" assert steps > 0, "Max steps must be positive"
assert isinstance(save_model_every, int), "Save {name} must be integer" assert isinstance(save_model_every, int), "Save {name} must be integer"
assert save_model_every >= 0 , "Save {name} must be positive or 0" assert save_model_every >= 0, "Save {name} must be positive or 0"
assert isinstance(create_image_every, int), "Create image must be integer" assert isinstance(create_image_every, int), "Create image must be integer"
assert create_image_every >= 0 , "Create image must be positive or 0" assert create_image_every >= 0, "Create image must be positive or 0"
if save_model_every or create_image_every: if save_model_every or create_image_every:
assert log_directory, "Log directory is empty" assert log_directory, "Log directory is empty"

View file

@ -267,7 +267,7 @@ def calc_resolution_hires(enable, width, height, hr_scale, hr_resize_x, hr_resiz
with devices.autocast(): with devices.autocast():
p.init([""], [0], [0]) p.init([""], [0], [0])
return f"resize: from <span class='resolution'>{width}x{height}</span> to <span class='resolution'>{p.hr_upscale_to_x}x{p.hr_upscale_to_y}</span>" return f"resize: from <span class='resolution'>{p.width}x{p.height}</span> to <span class='resolution'>{p.hr_resize_x or p.hr_upscale_to_x}x{p.hr_resize_y or p.hr_upscale_to_y}</span>"
def apply_styles(prompt, prompt_neg, style1_name, style2_name): def apply_styles(prompt, prompt_neg, style1_name, style2_name):
@ -745,15 +745,20 @@ def create_ui():
custom_inputs = modules.scripts.scripts_txt2img.setup_ui() custom_inputs = modules.scripts.scripts_txt2img.setup_ui()
hr_resolution_preview_inputs = [enable_hr, width, height, hr_scale, hr_resize_x, hr_resize_y] hr_resolution_preview_inputs = [enable_hr, width, height, hr_scale, hr_resize_x, hr_resize_y]
hr_resolution_preview_args = dict(
fn=calc_resolution_hires,
inputs=hr_resolution_preview_inputs,
outputs=[hr_final_resolution],
show_progress=False
)
for input in hr_resolution_preview_inputs: for input in hr_resolution_preview_inputs:
input.change(**hr_resolution_preview_args) input.change(
fn=calc_resolution_hires,
inputs=hr_resolution_preview_inputs,
outputs=[hr_final_resolution],
show_progress=False,
)
input.change(
None,
_js="onCalcResolutionHires",
inputs=hr_resolution_preview_inputs,
outputs=[],
show_progress=False,
)
txt2img_gallery, generation_info, html_info, html_log = create_output_panel("txt2img", opts.outdir_txt2img_samples) txt2img_gallery, generation_info, html_info, html_log = create_output_panel("txt2img", opts.outdir_txt2img_samples)
parameters_copypaste.bind_buttons({"txt2img": txt2img_paste}, None, txt2img_prompt) parameters_copypaste.bind_buttons({"txt2img": txt2img_paste}, None, txt2img_prompt)

View file

@ -25,6 +25,8 @@ class Script(scripts.Script):
return [info, overlap, upscaler_index, scale_factor] return [info, overlap, upscaler_index, scale_factor]
def run(self, p, _, overlap, upscaler_index, scale_factor): def run(self, p, _, overlap, upscaler_index, scale_factor):
if isinstance(upscaler_index, str):
upscaler_index = [x.name.lower() for x in shared.sd_upscalers].index(upscaler_index.lower())
processing.fix_seed(p) processing.fix_seed(p)
upscaler = shared.sd_upscalers[upscaler_index] upscaler = shared.sd_upscalers[upscaler_index]

View file

@ -512,7 +512,7 @@ input[type="range"]{
border: none; border: none;
background: none; background: none;
flex: unset; flex: unset;
gap: 0.5em; gap: 1em;
} }
#quicksettings > div > div{ #quicksettings > div > div{
@ -521,6 +521,17 @@ input[type="range"]{
padding: 0; padding: 0;
} }
#quicksettings > div > div > div > div > label > span {
position: relative;
margin-right: 9em;
margin-bottom: -1em;
}
#quicksettings > div > div > label > span {
position: relative;
margin-bottom: -1em;
}
canvas[key="mask"] { canvas[key="mask"] {
z-index: 12 !important; z-index: 12 !important;
filter: invert(); filter: invert();
@ -659,6 +670,10 @@ footer {
min-width: auto; min-width: auto;
} }
.inactive{
opacity: 0.5;
}
/* The following handles localization for right-to-left (RTL) languages like Arabic. /* The following handles localization for right-to-left (RTL) languages like Arabic.
The rtl media type will only be activated by the logic in javascript/localization.js. The rtl media type will only be activated by the logic in javascript/localization.js.
If you change anything above, you need to make sure it is RTL compliant by just running If you change anything above, you need to make sure it is RTL compliant by just running

View file

@ -50,6 +50,12 @@ class TestImg2ImgWorking(unittest.TestCase):
self.simple_img2img["mask"] = encode_pil_to_base64(Image.open(r"test/test_files/mask_basic.png")) self.simple_img2img["mask"] = encode_pil_to_base64(Image.open(r"test/test_files/mask_basic.png"))
self.assertEqual(requests.post(self.url_img2img, json=self.simple_img2img).status_code, 200) self.assertEqual(requests.post(self.url_img2img, json=self.simple_img2img).status_code, 200)
def test_img2img_sd_upscale_performed(self):
self.simple_img2img["script_name"] = "sd upscale"
self.simple_img2img["script_args"] = ["", 8, "Lanczos", 2.0]
self.assertEqual(requests.post(self.url_img2img, json=self.simple_img2img).status_code, 200)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()