Call weighted_forward during training

2023-01-12 15:34:11 +01:00 · 2023-01-12 15:34:11 +01:00 · bc50936745
commit bc50936745
parent 21642000b3
2 changed files with 4 additions and 2 deletions
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@ -640,13 +640,14 @@ def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradi
                
                with devices.autocast():
                    x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
+                    w = batch.weight.to(devices.device, non_blocking=pin_memory)
                    if tag_drop_out != 0 or shuffle_tags:
                        shared.sd_model.cond_stage_model.to(devices.device)
                        c = shared.sd_model.cond_stage_model(batch.cond_text).to(devices.device, non_blocking=pin_memory)
                        shared.sd_model.cond_stage_model.to(devices.cpu)
                    else:
                        c = stack_conds(batch.cond).to(devices.device, non_blocking=pin_memory)
-                    loss = shared.sd_model(x, c)[0] / gradient_step
+                    loss = shared.sd_model.weighted_forward(x, c, w)[0] / gradient_step
                    del x
                    del c

--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@ -480,6 +480,7 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st
            
                with devices.autocast():
                    x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
+                    w = batch.weight.to(devices.device, non_blocking=pin_memory)
                    c = shared.sd_model.cond_stage_model(batch.cond_text)

                    if is_training_inpainting_model:
@ -490,7 +491,7 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st
                    else:
                        cond = c

-                    loss = shared.sd_model(x, cond)[0] / gradient_step
+                    loss = shared.sd_model.weighted_forward(x, cond, w)[0] / gradient_step
                    del x

                    _loss_step += loss.item()