Add latent upscale option to img2img

Recently, the option to do latent upscale was added to txt2img highres
fix. This feature runs by scaling the latent sample of the image, and
then running a second pass of img2img.

But, in this edition of highres fix, the image and parameters cannot be
changed between the first pass and second pass. We might want to do a
fixup in img2img before doing the second pass, or might want to run the
second pass at a different resolution.

This change adds the option for img2img to perform its upscale in latent
space, rather than image space, giving very similar results to highres
fix with latent upscale.  The result is not exactly the same because
there is an additional latent -> decoder -> image -> encoder -> latent
that won't happen in highres fix, but this conversion has relatively
small losses
This commit is contained in:
Andrew Ryan 2022-12-08 07:09:09 +00:00
parent 44c46f0ed3
commit 358a8628f6
2 changed files with 6 additions and 2 deletions

View file

@ -795,7 +795,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
for img in self.init_images:
image = img.convert("RGB")
if crop_region is None:
if crop_region is None and self.resize_mode != 3:
image = images.resize_image(self.resize_mode, image, self.width, self.height)
if image_mask is not None:
@ -804,6 +804,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.overlay_images.append(image_masked.convert('RGBA'))
# crop_region is not none iif we are doing inpaint full res
if crop_region is not None:
image = image.crop(crop_region)
image = images.resize_image(2, image, self.width, self.height)
@ -840,6 +841,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
if self.resize_mode == 3:
self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
if image_mask is not None:
init_mask = latent_mask
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))

View file

@ -829,7 +829,7 @@ def create_ui():
img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs)
with gr.Row():
resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill"], type="index", value="Just resize")
resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Upscale Latent Space"], type="index", value="Just resize")
steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20)
sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")