Merge branch 'master' into master

This commit is contained in:
AUTOMATIC1111 2022-09-12 13:23:58 +03:00 committed by GitHub
commit c094f00e10
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 155 additions and 11 deletions

View file

@ -283,6 +283,16 @@ wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pt
After that follow the instructions in the `Manual instructions` section starting at step `:: clone repositories for Stable Diffusion and (optionally) CodeFormer`. After that follow the instructions in the `Manual instructions` section starting at step `:: clone repositories for Stable Diffusion and (optionally) CodeFormer`.
### img2img alterantive test
- find it in scripts section
- put description of input image into the Original prompt field
- use Euler only
- recommended: 50 steps, low cfg scale between 1 and 2
- denoising and seed don't matter
- decode cfg scale between 0 and 1
- decode steps 50
- original blue haired woman close nearly reproduces with cfg scale=1.8
## Credits ## Credits
- Stable Diffusion - https://github.com/CompVis/stable-diffusion, https://github.com/CompVis/taming-transformers - Stable Diffusion - https://github.com/CompVis/stable-diffusion, https://github.com/CompVis/taming-transformers
- k-diffusion - https://github.com/crowsonkb/k-diffusion.git - k-diffusion - https://github.com/crowsonkb/k-diffusion.git

View file

@ -11,7 +11,7 @@ from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode from torchvision.transforms.functional import InterpolationMode
import modules.shared as shared import modules.shared as shared
from modules import devices, paths from modules import devices, paths, lowvram
blip_image_eval_size = 384 blip_image_eval_size = 384
blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
@ -75,19 +75,28 @@ class InterrogateModels:
self.dtype = next(self.clip_model.parameters()).dtype self.dtype = next(self.clip_model.parameters()).dtype
def unload(self): def send_clip_to_ram(self):
if not shared.opts.interrogate_keep_models_in_memory: if not shared.opts.interrogate_keep_models_in_memory:
if self.clip_model is not None: if self.clip_model is not None:
self.clip_model = self.clip_model.to(devices.cpu) self.clip_model = self.clip_model.to(devices.cpu)
def send_blip_to_ram(self):
if not shared.opts.interrogate_keep_models_in_memory:
if self.blip_model is not None: if self.blip_model is not None:
self.blip_model = self.blip_model.to(devices.cpu) self.blip_model = self.blip_model.to(devices.cpu)
devices.torch_gc() def unload(self):
self.send_clip_to_ram()
self.send_blip_to_ram()
devices.torch_gc()
def rank(self, image_features, text_array, top_count=1): def rank(self, image_features, text_array, top_count=1):
import clip import clip
if shared.opts.interrogate_clip_dict_limit != 0:
text_array = text_array[0:int(shared.opts.interrogate_clip_dict_limit)]
top_count = min(top_count, len(text_array)) top_count = min(top_count, len(text_array))
text_tokens = clip.tokenize([text for text in text_array]).to(shared.device) text_tokens = clip.tokenize([text for text in text_array]).to(shared.device)
text_features = self.clip_model.encode_text(text_tokens).type(self.dtype) text_features = self.clip_model.encode_text(text_tokens).type(self.dtype)
@ -117,16 +126,24 @@ class InterrogateModels:
res = None res = None
try: try:
if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
lowvram.send_everything_to_cpu()
devices.torch_gc()
self.load() self.load()
caption = self.generate_caption(pil_image) caption = self.generate_caption(pil_image)
self.send_blip_to_ram()
devices.torch_gc()
res = caption res = caption
images = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device) cilp_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext
with torch.no_grad(), precision_scope("cuda"): with torch.no_grad(), precision_scope("cuda"):
image_features = self.clip_model.encode_image(images).type(self.dtype) image_features = self.clip_model.encode_image(cilp_image).type(self.dtype)
image_features /= image_features.norm(dim=-1, keepdim=True) image_features /= image_features.norm(dim=-1, keepdim=True)
@ -143,6 +160,7 @@ class InterrogateModels:
except Exception: except Exception:
print(f"Error interrogating", file=sys.stderr) print(f"Error interrogating", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr)
res += "<error>"
self.unload() self.unload()

View file

@ -5,6 +5,16 @@ module_in_gpu = None
cpu = torch.device("cpu") cpu = torch.device("cpu")
device = gpu = get_optimal_device() device = gpu = get_optimal_device()
def send_everything_to_cpu():
global module_in_gpu
if module_in_gpu is not None:
module_in_gpu.to(cpu)
module_in_gpu = None
def setup_for_low_vram(sd_model, use_medvram): def setup_for_low_vram(sd_model, use_medvram):
parents = {} parents = {}

View file

@ -13,8 +13,6 @@ from modules.devices import get_optimal_device
import modules.styles import modules.styles
import modules.interrogate import modules.interrogate
config_filename = "config.json"
sd_model_file = os.path.join(script_path, 'model.ckpt') sd_model_file = os.path.join(script_path, 'model.ckpt')
if not os.path.exists(sd_model_file): if not os.path.exists(sd_model_file):
sd_model_file = "models/ldm/stable-diffusion-v1/model.ckpt" sd_model_file = "models/ldm/stable-diffusion-v1/model.ckpt"
@ -43,6 +41,8 @@ parser.add_argument("--port", type=int, help="launch gradio with given server po
parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False) parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False)
parser.add_argument("--ui-config-file", type=str, help="filename to use for ui configuration", default=os.path.join(script_path, 'ui-config.json')) parser.add_argument("--ui-config-file", type=str, help="filename to use for ui configuration", default=os.path.join(script_path, 'ui-config.json'))
parser.add_argument("--hide-ui-dir-config", action='store_true', help="hide directory configuration from webui", default=False) parser.add_argument("--hide-ui-dir-config", action='store_true', help="hide directory configuration from webui", default=False)
parser.add_argument("--ui-settings-file", type=str, help="filename to use for ui settings", default=os.path.join(script_path, 'config.json'))
parser.add_argument("--gradio-debug", action='store_true', help="launch gradio with --debug option")
cmd_opts = parser.parse_args() cmd_opts = parser.parse_args()
@ -51,6 +51,7 @@ device = get_optimal_device()
batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram) batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram)
parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram
config_filename = cmd_opts.ui_settings_file
class State: class State:
interrupted = False interrupted = False
@ -129,11 +130,12 @@ class Options:
"multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job. Broken in PyCharm console."), "multiple_tqdm": OptionInfo(True, "Add a second progress bar to the console that shows progress for an entire job. Broken in PyCharm console."),
"face_restoration_model": OptionInfo(None, "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), "face_restoration_model": OptionInfo(None, "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}),
"code_former_weight": OptionInfo(0.5, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), "code_former_weight": OptionInfo(0.5, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}),
"interrogate_keep_models_in_memory": OptionInfo(True, "Interrogate: keep models in VRAM"), "interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"),
"interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"), "interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"),
"interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}), "interrogate_clip_num_beams": OptionInfo(1, "Interrogate: num_beams for BLIP", gr.Slider, {"minimum": 1, "maximum": 16, "step": 1}),
"interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum descripton length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}), "interrogate_clip_min_length": OptionInfo(24, "Interrogate: minimum descripton length (excluding artists, etc..)", gr.Slider, {"minimum": 1, "maximum": 128, "step": 1}),
"interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum descripton length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}), "interrogate_clip_max_length": OptionInfo(48, "Interrogate: maximum descripton length", gr.Slider, {"minimum": 1, "maximum": 256, "step": 1}),
"interrogate_clip_dict_limit": OptionInfo(1500, "Interrogate: maximum number of lines in text file (0 = No limit)"),
} }
def __init__(self): def __init__(self):

View file

@ -270,7 +270,7 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
batch_count = gr.Slider(minimum=1, maximum=cmd_opts.max_batch_count, step=1, label='Batch count', value=1) batch_count = gr.Slider(minimum=1, maximum=cmd_opts.max_batch_count, step=1, label='Batch count', value=1)
batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1) batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1)
cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.0) cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0)
with gr.Group(): with gr.Group():
height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512) height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
@ -413,7 +413,7 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1) batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1)
with gr.Group(): with gr.Group():
cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.0) cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0)
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75) denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75)
denoising_strength_change_factor = gr.Slider(minimum=0.9, maximum=1.1, step=0.01, label='Denoising strength change factor', value=1, visible=False) denoising_strength_change_factor = gr.Slider(minimum=0.9, maximum=1.1, step=0.01, label='Denoising strength change factor', value=1, visible=False)

104
scripts/img2imgalt.py Normal file
View file

@ -0,0 +1,104 @@
import numpy as np
from tqdm import trange
import modules.scripts as scripts
import gradio as gr
from modules import processing, shared, sd_samplers
from modules.processing import Processed
from modules.sd_samplers import samplers
from modules.shared import opts, cmd_opts, state
import torch
import k_diffusion as K
from PIL import Image
from torch import autocast
from einops import rearrange, repeat
def find_noise_for_image(p, cond, uncond, cfg_scale, steps):
x = p.init_latent
s_in = x.new_ones([x.shape[0]])
dnw = K.external.CompVisDenoiser(shared.sd_model)
sigmas = dnw.get_sigmas(steps).flip(0)
shared.state.sampling_steps = steps
for i in trange(1, len(sigmas)):
shared.state.sampling_step += 1
x_in = torch.cat([x] * 2)
sigma_in = torch.cat([sigmas[i] * s_in] * 2)
cond_in = torch.cat([uncond, cond])
c_out, c_in = [K.utils.append_dims(k, x_in.ndim) for k in dnw.get_scalings(sigma_in)]
t = dnw.sigma_to_t(sigma_in)
eps = shared.sd_model.apply_model(x_in * c_in, t, cond=cond_in)
denoised_uncond, denoised_cond = (x_in + eps * c_out).chunk(2)
denoised = denoised_uncond + (denoised_cond - denoised_uncond) * cfg_scale
d = (x - denoised) / sigmas[i]
dt = sigmas[i] - sigmas[i - 1]
x = x + d * dt
sd_samplers.store_latent(x)
# This shouldn't be necessary, but solved some VRAM issues
del x_in, sigma_in, cond_in, c_out, c_in, t,
del eps, denoised_uncond, denoised_cond, denoised, d, dt
shared.state.nextjob()
return x / x.std()
cache = [None, None, None, None, None]
class Script(scripts.Script):
def title(self):
return "img2img alternative test"
def show(self, is_img2img):
return is_img2img
def ui(self, is_img2img):
original_prompt = gr.Textbox(label="Original prompt", lines=1)
cfg = gr.Slider(label="Decode CFG scale", minimum=0.1, maximum=3.0, step=0.1, value=1.0)
st = gr.Slider(label="Decode steps", minimum=1, maximum=150, step=1, value=50)
return [original_prompt, cfg, st]
def run(self, p, original_prompt, cfg, st):
p.batch_size = 1
p.batch_count = 1
def sample_extra(x, conditioning, unconditional_conditioning):
lat = tuple([int(x*10) for x in p.init_latent.cpu().numpy().flatten().tolist()])
if cache[0] is not None and cache[1] == cfg and cache[2] == st and len(cache[3]) == len(lat) and sum(np.array(cache[3])-np.array(lat)) < 100 and cache[4] == original_prompt:
noise = cache[0]
else:
shared.state.job_count += 1
cond = p.sd_model.get_learned_conditioning(p.batch_size * [original_prompt])
noise = find_noise_for_image(p, cond, unconditional_conditioning, cfg, st)
cache[0] = noise
cache[1] = cfg
cache[2] = st
cache[3] = lat
cache[4] = original_prompt
sampler = samplers[p.sampler_index].constructor(p.sd_model)
samples_ddim = sampler.sample(p, noise, conditioning, unconditional_conditioning)
return samples_ddim
p.sample = sample_extra
processed = processing.process_images(p)
return processed

View file

@ -115,7 +115,7 @@ def webui():
run_pnginfo=modules.extras.run_pnginfo run_pnginfo=modules.extras.run_pnginfo
) )
demo.launch(share=cmd_opts.share, server_name="0.0.0.0" if cmd_opts.listen else None, server_port=cmd_opts.port) demo.launch(share=cmd_opts.share, server_name="0.0.0.0" if cmd_opts.listen else None, server_port=cmd_opts.port, debug=cmd_opts.gradio_debug)
if __name__ == "__main__": if __name__ == "__main__":