diff --git a/assignments/hwk03/2a.png b/assignments/hwk03/2a.png index e09e6e5..de9fe94 100644 Binary files a/assignments/hwk03/2a.png and b/assignments/hwk03/2a.png differ diff --git a/assignments/hwk03/2c.png b/assignments/hwk03/2c.png new file mode 100644 index 0000000..adec780 Binary files /dev/null and b/assignments/hwk03/2c.png differ diff --git a/assignments/hwk03/2e.png b/assignments/hwk03/2e.png new file mode 100644 index 0000000..972ce48 Binary files /dev/null and b/assignments/hwk03/2e.png differ diff --git a/assignments/hwk03/EMG.m b/assignments/hwk03/EMG.m index 26b072b..de66559 100644 --- a/assignments/hwk03/EMG.m +++ b/assignments/hwk03/EMG.m @@ -24,15 +24,13 @@ function [h, m, Q] = EMG(x, k, epochs, flag) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Initialise cluster means using k-means %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - [~, ~, ~, D] = kmeans(x, k); + [idx, m] = kmeans(x, k); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Determine the b values for all data points %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% for i = 1:num_data - row = D(i,:); - minIdx = row == min(row); - b(i,minIdx) = 1; + b(i, idx(i)) = 1; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -40,17 +38,17 @@ function [h, m, Q] = EMG(x, k, epochs, flag) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% pi = zeros(k, 1); for i = 1:k - pi(i) = sum(b(:, i)); + pi(i) = sum(b(:, i)) / num_data; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Initialize the covariance matrix estimate % further modifications will need to be made when doing 2(d) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - m = zeros(k, dim); + % m = zeros(k, dim); for i = 1:k data = x(b(:, i) == 1, :); - m(i, :) = mean(data); + % m(i, :) = mean(data); S(:, :, i) = cov(data); end @@ -65,7 +63,7 @@ function [h, m, Q] = EMG(x, k, epochs, flag) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Store the value of the complete log-likelihood function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - Q(2*n - 1) = Q_step(x, m, S, k, pi, h); + Q(2*n - 1) = Q_step(x, m, S, k, pi, h, flag); %%%%%%%%%%%%%%%% @@ -77,7 +75,7 @@ function [h, m, Q] = EMG(x, k, epochs, flag) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Store the value of the complete log-likelihood function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - Q(2*n) = Q_step(x, m, S, k, pi, h); + Q(2*n) = Q_step(x, m, S, k, pi, h, flag); end diff --git a/assignments/hwk03/E_step.m b/assignments/hwk03/E_step.m index b64f19e..e452345 100644 --- a/assignments/hwk03/E_step.m +++ b/assignments/hwk03/E_step.m @@ -27,4 +27,22 @@ function [h] = E_step(x, h, pi, m, S, k) h(j, :) = parts(j, :) ./ s; end + + % parts = zeros(k); + % + % denom = 0; + % for i = 1:k + % N = mvnpdf(x, m(i, :), S(:, :, i)); + % for j = 1:num_data + % parts(i) = parts(i) + pi(i) * N(j); + % end + % denom = denom + parts(i); + % end + % + % for i = 1:k + % h(:, i) = parts(i) ./ denom; + % end + + + end \ No newline at end of file diff --git a/assignments/hwk03/M_step.m b/assignments/hwk03/M_step.m index ddea6f6..a4d8908 100644 --- a/assignments/hwk03/M_step.m +++ b/assignments/hwk03/M_step.m @@ -47,9 +47,9 @@ function [S, m, pi] = M_step(x, h, S, k, flag) % Calculate the covariance matrix estimate % further modifications will need to be made when doing 2(d) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - S = zeros(dim, dim, k) + eps; + S = zeros(dim, dim, k); for i = 1:k - s = zeros(dim, dim); + s = zeros(dim, dim) + eye(dim) * eps; for j = 1:num_data s = s + h(j, i) * (x(j, :) - m(i, :))' * (x(j, :) - m(i, :)); end @@ -59,12 +59,18 @@ function [S, m, pi] = M_step(x, h, S, k, flag) % % MAKE IT SYMMETRIC https://stackoverflow.com/a/38730499 % S(:, :, i) = (s + s') / 2; % https://www.mathworks.com/matlabcentral/answers/366140-eig-gives-a-negative-eigenvalue-for-a-positive-semi-definite-matrix#answer_290270 - s = (s + s') / 2; + % s = (s + s') / 2; % https://www.mathworks.com/matlabcentral/answers/57411-matlab-sometimes-produce-a-covariance-matrix-error-with-non-postive-semidefinite#answer_69524 - [V, D] = eig(s); - s = V * max(D, eps) / V; + % [V, D] = eig(s); + % s = V * max(D, eps) / V; S(:, :, i) = s; end + if flag + for i = 1:k + S(:, :, i) = S(:, :, i) + lambda * eye(dim) / 2; + end + end + end \ No newline at end of file diff --git a/assignments/hwk03/Makefile b/assignments/hwk03/Makefile new file mode 100644 index 0000000..64a0263 --- /dev/null +++ b/assignments/hwk03/Makefile @@ -0,0 +1,11 @@ + +HANDIN_PDF := hw3_sol.pdf +HANDIN_ZIP := hw3_code.zip + +all: $(HANDIN_PDF) $(HANDIN_ZIP) + +$(HANDIN_PDF): hw3_sol.typ + typst compile $< $@ + +$(HANDIN_ZIP): E_step.m EMG.m M_step.m Problem2.m Q_step.m goldy.jpg stadium.jpg + zip $(HANDIN_ZIP) $^ \ No newline at end of file diff --git a/assignments/hwk03/Problem2.m b/assignments/hwk03/Problem2.m index 039da8b..6367e80 100644 --- a/assignments/hwk03/Problem2.m +++ b/assignments/hwk03/Problem2.m @@ -22,32 +22,32 @@ function [] = Problem2() %%%%%%%%%% % 2(a,b) % %%%%%%%%%% - index = 1; - figure(); - for k = 4:4:12 - fprintf("k=%d\n", k); + % index = 1; + % figure(); + % for k = 4:4:12 + % fprintf("k=%d\n", k); - % call EM on data - [h, m, Q] = EMG(stadium_x, k, epochs, false); + % % call EM on data + % [h, m, Q] = EMG(stadium_x, k, epochs, false); - % get compressed version of image - [~,class_index] = max(h,[],2); - compress = m(class_index,:); + % % get compressed version of image + % [~,class_index] = max(h,[],2); + % compress = m(class_index,:); - % 2(a), plot compressed image - subplot(3,2,index) - imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) - index = index + 1; + % % 2(a), plot compressed image + % subplot(3,2,index) + % imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) + % index = index + 1; - % 2(b), plot complete data likelihood curves - subplot(3,2,index) - x = 1:size(Q); - c = repmat([1 0 0; 0 1 0], length(x)/2, 1); - scatter(x,Q,20,c); - index = index + 1; - pause; - end - shg + % % 2(b), plot complete data likelihood curves + % subplot(3,2,index) + % x = 1:size(Q); + % c = repmat([1 0 0; 0 1 0], length(x)/2, 1); + % scatter(x,Q,20,c); + % index = index + 1; + % pause; + % end + % shg %%%%%%%% % 2(c) % @@ -63,14 +63,18 @@ function [] = Problem2() [~,class_index] = max(h,[],2); compress = m(class_index,:); figure(); - subplot(2,1,1) + subplot(3,1,1) imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: plot goldy image after using clusters from k-means %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % begin code here - [~, ~, ~, D] = kmeans(goldy_x, k); + [idx, m] = kmeans(goldy_x, k); + compress = m(idx,:); + subplot(3,1,2) + imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])); + pause; % end code here shg @@ -84,7 +88,7 @@ function [] = Problem2() % plot goldy image using clusters from improved EM [~,class_index] = max(h,[],2); compress = m(class_index,:); - figure(); + subplot(1,1,1) imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) shg end \ No newline at end of file diff --git a/assignments/hwk03/Q_step.m b/assignments/hwk03/Q_step.m index 037a84d..09807b0 100644 --- a/assignments/hwk03/Q_step.m +++ b/assignments/hwk03/Q_step.m @@ -1,4 +1,4 @@ -function [LL] = Q_step(x, m, S, k, pi, h) +function [LL] = Q_step(x, m, S, k, pi, h, flag) [num_data, ~] = size(x); LL = 0; for i = 1:k diff --git a/assignments/hwk03/hw3_sol.typ b/assignments/hwk03/hw3_sol.typ index 11bd0bc..5f69da5 100644 --- a/assignments/hwk03/hw3_sol.typ +++ b/assignments/hwk03/hw3_sol.typ @@ -29,20 +29,20 @@ Solved as: - $ frac(diff E, diff w_(1,j)) &= - sum_t frac(diff E, diff y^t) frac(diff y^t, diff z^t_h) frac(diff z^t_h, diff w_(1,j)) \ - &= - sum_t (frac(r^t, y^t) - frac(1-r^t, 1-y^t)) (y^t (1-y^t) v_h) (x_h cases(0 "if" ww_1 dot xx <0, 1 "otherwise")) \ - &= - sum_t (r^t - y^t) v_h x_h cases(0 "if" ww_1 dot xx <0, 1 "otherwise") \ + &= - sum_t (frac(r^t, y^t) - frac(1-r^t, 1-y^t)) (y^t (1-y^t) v_1) (x_j cases(0 "if" ww_1 dot xx <0, 1 "otherwise")) \ + &= - sum_t (r^t - y^t) v_1 x_j cases(0 "if" ww_1 dot xx <0, 1 "otherwise") \ $ - $ frac(diff E, diff w_(2,j)) &= - sum_t frac(diff E, diff y^t) frac(diff y^t, diff z^t_h) frac(diff z^t_h, diff w_(2,j)) \ - &= - sum_t (r^t - y^t) v_h x_h (1-tanh^2(ww_2 dot xx)) \ + &= - sum_t (r^t - y^t) v_2 x_j (1-tanh^2(ww_2 dot xx)) \ $ Updates: - $Delta v_h = eta sum_t (r^t-y^t) z^t_h$ -- $Delta w_(1,j) = eta sum_t (r^t - y^t) v_h x_h cases(0 "if" ww_1 dot xx <0, 1 "otherwise")$ -- $Delta w_(2,j) = eta sum_t (r^t - y^t) v_h x_h (1-tanh^2(ww_2 dot xx))$ +- $Delta w_(1,j) = eta sum_t (r^t - y^t) v_1 x_j cases(0 "if" ww_1 dot xx <0, 1 "otherwise")$ +- $Delta w_(2,j) = eta sum_t (r^t - y^t) v_2 x_j (1-tanh^2(ww_2 dot xx))$ = Problem 1b @@ -72,10 +72,51 @@ Updates: = Problem 2a + 2b +For this problem I see a gentle increase in the likelihood value after each of +the E + M steps. There is an issue with the first step but I don't know what +it's caused by. + +In general, the higher $k$ was, the more colors there were, and it was able to +produce a better color mapping. The last $k = 12$ had the best "resolution" (not +really resolution since the pixel density didn't change but there are more +detailed shapes). + #image("2a.png") +#pagebreak() + = Problem 2c +For this version, k-means performed a lot better than my initial EM step, even +with a $k$ of 7. I'm suspecting what's happening is that between the EM steps, +the classification of the data changes to spread out inaccurate values, while +with k-means it's always operating on the original data. + +#image("2c.png") + +#pagebreak() + = Problem 2d -MLE of $Sigma_i$ \ No newline at end of file +For the $Sigma$ update step, I added this change: + +#let rtext(t) = { + set text(red) + t +} + +$ + Sigma_i &= frac(1, N_i) sum_(t=1)^N gamma(z^t_i) (x^t - u) (x^t - u)^T + rtext(- frac(lambda, 2) sum_(i=1)^k sum_(j=1)^d (Sigma^(-1)_i)_("jj")) +$ + +The overall maximum likelihood could not be derived because of the difficulty +with logarithm of sums + += Problem 2e + +After implementing this, the result was a lot better. I believe that the +regularization term helps because it makes the $Sigma$s bigger which makes it +converge faster. + +#image("2e.png") \ No newline at end of file