Merge pull request #1752 from Greendayle/dev/deepdanbooru
Added DeepDanbooru interrogator
This commit is contained in:
commit
e00b4df7c6
7 changed files with 110 additions and 6 deletions
|
@ -66,6 +66,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
|
||||||
- separate prompts using uppercase `AND`
|
- separate prompts using uppercase `AND`
|
||||||
- also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
|
- also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
|
||||||
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
|
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
|
||||||
|
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
|
||||||
|
|
||||||
## Installation and Running
|
## Installation and Running
|
||||||
Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
|
Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
|
||||||
|
@ -123,4 +124,5 @@ The documentation was moved from this README over to the project's [wiki](https:
|
||||||
- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
|
- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
|
||||||
- CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator
|
- CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator
|
||||||
- Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
|
- Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
|
||||||
|
- DeepDanbooru - interrogator for anime diffusors https://github.com/KichangKim/DeepDanbooru
|
||||||
- (You)
|
- (You)
|
||||||
|
|
|
@ -33,6 +33,7 @@ def extract_arg(args, name):
|
||||||
|
|
||||||
args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
|
args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
|
||||||
xformers = '--xformers' in args
|
xformers = '--xformers' in args
|
||||||
|
deepdanbooru = '--deepdanbooru' in args
|
||||||
|
|
||||||
|
|
||||||
def repo_dir(name):
|
def repo_dir(name):
|
||||||
|
@ -132,6 +133,9 @@ if not is_installed("xformers") and xformers and platform.python_version().start
|
||||||
elif platform.system() == "Linux":
|
elif platform.system() == "Linux":
|
||||||
run_pip("install xformers", "xformers")
|
run_pip("install xformers", "xformers")
|
||||||
|
|
||||||
|
if not is_installed("deepdanbooru") and deepdanbooru:
|
||||||
|
run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")
|
||||||
|
|
||||||
os.makedirs(dir_repos, exist_ok=True)
|
os.makedirs(dir_repos, exist_ok=True)
|
||||||
|
|
||||||
git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)
|
git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)
|
||||||
|
|
73
modules/deepbooru.py
Normal file
73
modules/deepbooru.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
import os.path
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from multiprocessing import get_context
|
||||||
|
|
||||||
|
|
||||||
|
def _load_tf_and_return_tags(pil_image, threshold):
|
||||||
|
import deepdanbooru as dd
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
this_folder = os.path.dirname(__file__)
|
||||||
|
model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
|
||||||
|
if not os.path.exists(os.path.join(model_path, 'project.json')):
|
||||||
|
# there is no point importing these every time
|
||||||
|
import zipfile
|
||||||
|
from basicsr.utils.download_util import load_file_from_url
|
||||||
|
load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
|
||||||
|
model_path)
|
||||||
|
with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
|
||||||
|
zip_ref.extractall(model_path)
|
||||||
|
os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
|
||||||
|
|
||||||
|
tags = dd.project.load_tags_from_project(model_path)
|
||||||
|
model = dd.project.load_model_from_project(
|
||||||
|
model_path, compile_model=True
|
||||||
|
)
|
||||||
|
|
||||||
|
width = model.input_shape[2]
|
||||||
|
height = model.input_shape[1]
|
||||||
|
image = np.array(pil_image)
|
||||||
|
image = tf.image.resize(
|
||||||
|
image,
|
||||||
|
size=(height, width),
|
||||||
|
method=tf.image.ResizeMethod.AREA,
|
||||||
|
preserve_aspect_ratio=True,
|
||||||
|
)
|
||||||
|
image = image.numpy() # EagerTensor to np.array
|
||||||
|
image = dd.image.transform_and_pad_image(image, width, height)
|
||||||
|
image = image / 255.0
|
||||||
|
image_shape = image.shape
|
||||||
|
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
|
||||||
|
|
||||||
|
y = model.predict(image)[0]
|
||||||
|
|
||||||
|
result_dict = {}
|
||||||
|
|
||||||
|
for i, tag in enumerate(tags):
|
||||||
|
result_dict[tag] = y[i]
|
||||||
|
result_tags_out = []
|
||||||
|
result_tags_print = []
|
||||||
|
for tag in tags:
|
||||||
|
if result_dict[tag] >= threshold:
|
||||||
|
if tag.startswith("rating:"):
|
||||||
|
continue
|
||||||
|
result_tags_out.append(tag)
|
||||||
|
result_tags_print.append(f'{result_dict[tag]} {tag}')
|
||||||
|
|
||||||
|
print('\n'.join(sorted(result_tags_print, reverse=True)))
|
||||||
|
|
||||||
|
return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ')
|
||||||
|
|
||||||
|
|
||||||
|
def subprocess_init_no_cuda():
|
||||||
|
import os
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
||||||
|
|
||||||
|
|
||||||
|
def get_deepbooru_tags(pil_image, threshold=0.5):
|
||||||
|
context = get_context('spawn')
|
||||||
|
with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor:
|
||||||
|
f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, )
|
||||||
|
ret = f.result() # will rethrow any exceptions
|
||||||
|
return ret
|
|
@ -45,6 +45,7 @@ parser.add_argument("--swinir-models-path", type=str, help="Path to directory wi
|
||||||
parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
|
parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
|
||||||
parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
|
parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
|
||||||
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
|
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
|
||||||
|
parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator")
|
||||||
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
|
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
|
||||||
parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
|
parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
|
||||||
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
|
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
|
||||||
|
|
|
@ -25,6 +25,8 @@ import gradio.routes
|
||||||
from modules import sd_hijack
|
from modules import sd_hijack
|
||||||
from modules.paths import script_path
|
from modules.paths import script_path
|
||||||
from modules.shared import opts, cmd_opts
|
from modules.shared import opts, cmd_opts
|
||||||
|
if cmd_opts.deepdanbooru:
|
||||||
|
from modules.deepbooru import get_deepbooru_tags
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
from modules.sd_samplers import samplers, samplers_for_img2img
|
from modules.sd_samplers import samplers, samplers_for_img2img
|
||||||
from modules.sd_hijack import model_hijack
|
from modules.sd_hijack import model_hijack
|
||||||
|
@ -308,6 +310,11 @@ def interrogate(image):
|
||||||
return gr_show(True) if prompt is None else prompt
|
return gr_show(True) if prompt is None else prompt
|
||||||
|
|
||||||
|
|
||||||
|
def interrogate_deepbooru(image):
|
||||||
|
prompt = get_deepbooru_tags(image)
|
||||||
|
return gr_show(True) if prompt is None else prompt
|
||||||
|
|
||||||
|
|
||||||
def create_seed_inputs():
|
def create_seed_inputs():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Box():
|
with gr.Box():
|
||||||
|
@ -444,15 +451,20 @@ def create_toprow(is_img2img):
|
||||||
outputs=[],
|
outputs=[],
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row(scale=1):
|
||||||
if is_img2img:
|
if is_img2img:
|
||||||
interrogate = gr.Button('Interrogate', elem_id="interrogate")
|
interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate")
|
||||||
|
if cmd_opts.deepdanbooru:
|
||||||
|
deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru")
|
||||||
|
else:
|
||||||
|
deepbooru = None
|
||||||
else:
|
else:
|
||||||
interrogate = None
|
interrogate = None
|
||||||
|
deepbooru = None
|
||||||
prompt_style_apply = gr.Button('Apply style', elem_id="style_apply")
|
prompt_style_apply = gr.Button('Apply style', elem_id="style_apply")
|
||||||
save_style = gr.Button('Create style', elem_id="style_create")
|
save_style = gr.Button('Create style', elem_id="style_create")
|
||||||
|
|
||||||
return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, prompt_style_apply, save_style, paste, token_counter, token_button
|
return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button
|
||||||
|
|
||||||
|
|
||||||
def setup_progressbar(progressbar, preview, id_part, textinfo=None):
|
def setup_progressbar(progressbar, preview, id_part, textinfo=None):
|
||||||
|
@ -481,7 +493,7 @@ def create_ui(wrap_gradio_gpu_call):
|
||||||
import modules.txt2img
|
import modules.txt2img
|
||||||
|
|
||||||
with gr.Blocks(analytics_enabled=False) as txt2img_interface:
|
with gr.Blocks(analytics_enabled=False) as txt2img_interface:
|
||||||
txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
|
txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
|
||||||
dummy_component = gr.Label(visible=False)
|
dummy_component = gr.Label(visible=False)
|
||||||
|
|
||||||
with gr.Row(elem_id='txt2img_progress_row'):
|
with gr.Row(elem_id='txt2img_progress_row'):
|
||||||
|
@ -641,7 +653,7 @@ def create_ui(wrap_gradio_gpu_call):
|
||||||
token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter])
|
token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter])
|
||||||
|
|
||||||
with gr.Blocks(analytics_enabled=False) as img2img_interface:
|
with gr.Blocks(analytics_enabled=False) as img2img_interface:
|
||||||
img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
|
img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
|
||||||
|
|
||||||
with gr.Row(elem_id='img2img_progress_row'):
|
with gr.Row(elem_id='img2img_progress_row'):
|
||||||
with gr.Column(scale=1):
|
with gr.Column(scale=1):
|
||||||
|
@ -804,6 +816,13 @@ def create_ui(wrap_gradio_gpu_call):
|
||||||
outputs=[img2img_prompt],
|
outputs=[img2img_prompt],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if cmd_opts.deepdanbooru:
|
||||||
|
img2img_deepbooru.click(
|
||||||
|
fn=interrogate_deepbooru,
|
||||||
|
inputs=[init_img],
|
||||||
|
outputs=[img2img_prompt],
|
||||||
|
)
|
||||||
|
|
||||||
save.click(
|
save.click(
|
||||||
fn=wrap_gradio_call(save_files),
|
fn=wrap_gradio_call(save_files),
|
||||||
_js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
|
_js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
|
||||||
|
|
|
@ -103,7 +103,12 @@
|
||||||
|
|
||||||
#style_apply, #style_create, #interrogate{
|
#style_apply, #style_create, #interrogate{
|
||||||
margin: 0.75em 0.25em 0.25em 0.25em;
|
margin: 0.75em 0.25em 0.25em 0.25em;
|
||||||
min-width: 3em;
|
min-width: 5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
#style_apply, #style_create, #deepbooru{
|
||||||
|
margin: 0.75em 0.25em 0.25em 0.25em;
|
||||||
|
min-width: 5em;
|
||||||
}
|
}
|
||||||
|
|
||||||
#style_pos_col, #style_neg_col{
|
#style_pos_col, #style_neg_col{
|
||||||
|
|
Loading…
Reference in a new issue