Merge pull request #3246 from Milly/fix/train-preprocess-keep-ratio

Preprocess: fixed keep ratio and changed split behavior
This commit is contained in:
AUTOMATIC1111 2022-10-21 18:36:55 +03:00 committed by GitHub
commit 7464f367c3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 24 deletions

View file

@ -1,5 +1,6 @@
import os import os
from PIL import Image, ImageOps from PIL import Image, ImageOps
import math
import platform import platform
import sys import sys
import tqdm import tqdm
@ -11,7 +12,7 @@ if cmd_opts.deepdanbooru:
import modules.deepbooru as deepbooru import modules.deepbooru as deepbooru
def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False): def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
try: try:
if process_caption: if process_caption:
shared.interrogator.load() shared.interrogator.load()
@ -21,7 +22,7 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce
db_opts[deepbooru.OPT_INCLUDE_RANKS] = False db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru) preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio)
finally: finally:
@ -33,11 +34,13 @@ def preprocess(process_src, process_dst, process_width, process_height, preproce
def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False): def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
width = process_width width = process_width
height = process_height height = process_height
src = os.path.abspath(process_src) src = os.path.abspath(process_src)
dst = os.path.abspath(process_dst) dst = os.path.abspath(process_dst)
split_threshold = max(0.0, min(1.0, split_threshold))
overlap_ratio = max(0.0, min(0.9, overlap_ratio))
assert src != dst, 'same directory specified as source and destination' assert src != dst, 'same directory specified as source and destination'
@ -87,6 +90,29 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
if process_flip: if process_flip:
save_pic_with_caption(ImageOps.mirror(image), index, existing_caption=existing_caption) save_pic_with_caption(ImageOps.mirror(image), index, existing_caption=existing_caption)
def split_pic(image, inverse_xy):
if inverse_xy:
from_w, from_h = image.height, image.width
to_w, to_h = height, width
else:
from_w, from_h = image.width, image.height
to_w, to_h = width, height
h = from_h * to_w // from_w
if inverse_xy:
image = image.resize((h, to_w))
else:
image = image.resize((to_w, h))
split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
y_step = (h - to_h) / (split_count - 1)
for i in range(split_count):
y = int(y_step * i)
if inverse_xy:
splitted = image.crop((y, 0, y + to_h, to_w))
else:
splitted = image.crop((0, y, to_w, y + to_h))
yield splitted
for index, imagefile in enumerate(tqdm.tqdm(files)): for index, imagefile in enumerate(tqdm.tqdm(files)):
subindex = [0] subindex = [0]
filename = os.path.join(src, imagefile) filename = os.path.join(src, imagefile)
@ -105,26 +131,16 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
if shared.state.interrupted: if shared.state.interrupted:
break break
ratio = img.height / img.width if img.height > img.width:
is_tall = ratio > 1.35 ratio = (img.width * height) / (img.height * width)
is_wide = ratio < 1 / 1.35 inverse_xy = False
else:
ratio = (img.height * width) / (img.width * height)
inverse_xy = True
if process_split and is_tall: if process_split and ratio < 1.0 and ratio <= split_threshold:
img = img.resize((width, height * img.height // img.width)) for splitted in split_pic(img, inverse_xy):
save_pic(splitted, index, existing_caption=existing_caption)
top = img.crop((0, 0, width, height))
save_pic(top, index, existing_caption=existing_caption)
bot = img.crop((0, img.height - height, width, img.height))
save_pic(bot, index, existing_caption=existing_caption)
elif process_split and is_wide:
img = img.resize((width * img.width // img.height, height))
left = img.crop((0, 0, width, height))
save_pic(left, index, existing_caption=existing_caption)
right = img.crop((img.width - width, 0, img.width, height))
save_pic(right, index, existing_caption=existing_caption)
else: else:
img = images.resize_image(1, img, width, height) img = images.resize_image(1, img, width, height)
save_pic(img, index, existing_caption=existing_caption) save_pic(img, index, existing_caption=existing_caption)

View file

@ -1300,10 +1300,14 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Row(): with gr.Row():
process_flip = gr.Checkbox(label='Create flipped copies') process_flip = gr.Checkbox(label='Create flipped copies')
process_split = gr.Checkbox(label='Split oversized images into two') process_split = gr.Checkbox(label='Split oversized images')
process_caption = gr.Checkbox(label='Use BLIP for caption') process_caption = gr.Checkbox(label='Use BLIP for caption')
process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False) process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False)
with gr.Row(visible=False) as process_split_extra_row:
process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05)
with gr.Row(): with gr.Row():
with gr.Column(scale=3): with gr.Column(scale=3):
gr.HTML(value="") gr.HTML(value="")
@ -1311,6 +1315,12 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Column(): with gr.Column():
run_preprocess = gr.Button(value="Preprocess", variant='primary') run_preprocess = gr.Button(value="Preprocess", variant='primary')
process_split.change(
fn=lambda show: gr_show(show),
inputs=[process_split],
outputs=[process_split_extra_row],
)
with gr.Tab(label="Train"): with gr.Tab(label="Train"):
gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>") gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
with gr.Row(): with gr.Row():
@ -1409,7 +1419,9 @@ def create_ui(wrap_gradio_gpu_call):
process_flip, process_flip,
process_split, process_split,
process_caption, process_caption,
process_caption_deepbooru process_caption_deepbooru,
process_split_threshold,
process_overlap_ratio,
], ],
outputs=[ outputs=[
ti_output, ti_output,