Move scale multiplication to the front

This commit is contained in:
C43H66N12O12S2 2022-09-18 01:05:31 +03:00 committed by GitHub
parent 8ff6f09320
commit d63dbb3acc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -20,7 +20,7 @@ def split_cross_attention_forward_v1(self, x, context=None, mask=None):
q = self.to_q(x) q = self.to_q(x)
context = default(context, x) context = default(context, x)
k = self.to_k(context) k = self.to_k(context) * self.scale
v = self.to_v(context) v = self.to_v(context)
del context, x del context, x
@ -85,7 +85,7 @@ def split_cross_attention_forward(self, x, context=None, mask=None):
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1] slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
for i in range(0, q.shape[1], slice_size): for i in range(0, q.shape[1], slice_size):
end = i + slice_size end = i + slice_size
s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k) * self.scale s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k)
s2 = s1.softmax(dim=-1, dtype=q.dtype) s2 = s1.softmax(dim=-1, dtype=q.dtype)
del s1 del s1