Fixes race condition in training when VAE is unloaded
set_current_image can attempt to use the VAE when it is unloaded to the CPU while training
This commit is contained in:
parent
f2b69709ea
commit
39541d7725
2 changed files with 9 additions and 0 deletions
|
@ -390,7 +390,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
|
||||||
with torch.autocast("cuda"):
|
with torch.autocast("cuda"):
|
||||||
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
|
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
|
||||||
|
|
||||||
|
old_parallel_processing_allowed = shared.parallel_processing_allowed
|
||||||
|
|
||||||
if unload:
|
if unload:
|
||||||
|
shared.parallel_processing_allowed = False
|
||||||
shared.sd_model.cond_stage_model.to(devices.cpu)
|
shared.sd_model.cond_stage_model.to(devices.cpu)
|
||||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||||
|
|
||||||
|
@ -531,6 +534,7 @@ Last saved image: {html.escape(last_saved_image)}<br/>
|
||||||
|
|
||||||
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
|
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
|
||||||
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename)
|
save_hypernetwork(hypernetwork, checkpoint, hypernetwork_name, filename)
|
||||||
|
shared.parallel_processing_allowed = old_parallel_processing_allowed
|
||||||
|
|
||||||
return hypernetwork, filename
|
return hypernetwork, filename
|
||||||
|
|
||||||
|
|
|
@ -273,7 +273,11 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc
|
||||||
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
||||||
with torch.autocast("cuda"):
|
with torch.autocast("cuda"):
|
||||||
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file, batch_size=batch_size)
|
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file, batch_size=batch_size)
|
||||||
|
|
||||||
|
old_parallel_processing_allowed = shared.parallel_processing_allowed
|
||||||
|
|
||||||
if unload:
|
if unload:
|
||||||
|
shared.parallel_processing_allowed = False
|
||||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||||
|
|
||||||
embedding.vec.requires_grad = True
|
embedding.vec.requires_grad = True
|
||||||
|
@ -410,6 +414,7 @@ Last saved image: {html.escape(last_saved_image)}<br/>
|
||||||
filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt')
|
filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt')
|
||||||
save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True)
|
save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True)
|
||||||
shared.sd_model.first_stage_model.to(devices.device)
|
shared.sd_model.first_stage_model.to(devices.device)
|
||||||
|
shared.parallel_processing_allowed = old_parallel_processing_allowed
|
||||||
|
|
||||||
return embedding, filename
|
return embedding, filename
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue