From 254d9946439be9b6266b671db68086eb68ef1e63 Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:45:39 +0400 Subject: [PATCH 1/7] Update devices.py --- modules/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/devices.py b/modules/devices.py index 52c3e7cd..6c6c7233 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -110,7 +110,7 @@ def autocast(disable=False): if disable: return contextlib.nullcontext() - if dtype == torch.float32 or shared.cmd_opts.precision == "full": + if dtype == torch.float32 or shared.cmd_opts.precision == "full" or shared.cmd_opts.upcast_sampling: return contextlib.nullcontext() return torch.autocast("cuda") From 91cfa9718cead1c9834d5fe46a3af54abeacc8e2 Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:47:43 +0400 Subject: [PATCH 2/7] Update sd_hijack_unet.py --- modules/sd_hijack_unet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index 843ab66c..d37ec316 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -67,7 +67,7 @@ def hijack_ddpm_edit(): unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) -if version.parse(torch.__version__) <= version.parse("1.13.1"): +if version.parse(torch.__version__) <= version.parse("1.13.2"): CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast) CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast) CondFunc('open_clip.transformer.ResidualAttentionBlock.__init__', lambda orig_func, *args, **kwargs: kwargs.update({'act_layer': GELUHijack}) and False or orig_func(*args, **kwargs), lambda _, *args, **kwargs: kwargs.get('act_layer') is None or kwargs['act_layer'] == torch.nn.GELU) From c84c9df73799e173fcfafdc9548dbd043ba28682 Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:50:22 +0400 Subject: [PATCH 3/7] Update sd_hijack_optimizations.py --- modules/sd_hijack_optimizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index 2e307b5d..eaff12f0 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -337,7 +337,7 @@ def xformers_attention_forward(self, x, context=None, mask=None): dtype = q.dtype if shared.opts.upcast_attn: - q, k = q.float(), k.float() + q, k, v = q.float(), k.float(), v.float() out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=get_xformers_flash_attention_op(q, k, v)) From 2f0181405f25e1448a55697081e380020fe8c68d Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:53:51 +0400 Subject: [PATCH 4/7] Update lora.py --- extensions-builtin/Lora/lora.py | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions-builtin/Lora/lora.py b/extensions-builtin/Lora/lora.py index 8937b585..7c371deb 100644 --- a/extensions-builtin/Lora/lora.py +++ b/extensions-builtin/Lora/lora.py @@ -178,6 +178,7 @@ def load_loras(names, multipliers=None): def lora_forward(module, input, res): + input = devices.cond_cast_unet(input) if len(loaded_loras) == 0: return res From beb7dda5d6d5baa1570721fd7ca18e236fa02521 Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Fri, 24 Mar 2023 16:25:42 +0400 Subject: [PATCH 5/7] Update sd_hijack_unet.py --- modules/sd_hijack_unet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index d37ec316..15858263 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -67,7 +67,7 @@ def hijack_ddpm_edit(): unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) -if version.parse(torch.__version__) <= version.parse("1.13.2"): +if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available(): CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast) CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast) CondFunc('open_clip.transformer.ResidualAttentionBlock.__init__', lambda orig_func, *args, **kwargs: kwargs.update({'act_layer': GELUHijack}) and False or orig_func(*args, **kwargs), lambda _, *args, **kwargs: kwargs.get('act_layer') is None or kwargs['act_layer'] == torch.nn.GELU) From 280ed8f00fde0ece026339acdd42888ac4dc3167 Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Fri, 24 Mar 2023 16:29:16 +0400 Subject: [PATCH 6/7] Update sd_hijack_optimizations.py --- modules/sd_hijack_optimizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py index eaff12f0..372555ff 100644 --- a/modules/sd_hijack_optimizations.py +++ b/modules/sd_hijack_optimizations.py @@ -372,7 +372,7 @@ def scaled_dot_product_attention_forward(self, x, context=None, mask=None): dtype = q.dtype if shared.opts.upcast_attn: - q, k = q.float(), k.float() + q, k, v = q.float(), k.float(), v.float() # the output of sdp = (batch, num_heads, seq_len, head_dim) hidden_states = torch.nn.functional.scaled_dot_product_attention( From a9eab236d7e8afa4d6205127904a385b2c43bb24 Mon Sep 17 00:00:00 2001 From: FNSpd <125805478+FNSpd@users.noreply.github.com> Date: Fri, 24 Mar 2023 23:08:30 +0400 Subject: [PATCH 7/7] Update devices.py --- modules/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/devices.py b/modules/devices.py index 6c6c7233..52c3e7cd 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -110,7 +110,7 @@ def autocast(disable=False): if disable: return contextlib.nullcontext() - if dtype == torch.float32 or shared.cmd_opts.precision == "full" or shared.cmd_opts.upcast_sampling: + if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() return torch.autocast("cuda")