diff --git a/assignments/hwk03/EMG.m b/assignments/hwk03/EMG.m index 2b96290..bc91dae 100644 --- a/assignments/hwk03/EMG.m +++ b/assignments/hwk03/EMG.m @@ -10,55 +10,57 @@ % Q - vector of values of the complete data log-likelihood function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [h, m, Q] = EMG(x, k, epochs, flag) + + % variables + num_clusters = k; % number of clusters + eps = 1e-15; % small value that can be used to avoid obtaining 0's + lambda = 1e-3; % value for improved version of EM + [num_data, dim] = size(x); + h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster + S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster + b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S + Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: Initialise cluster means using k-means + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + means = kmeans(x, k); + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: Determine the b values for all data points + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: Initialize pi's (mixing coefficients) + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: Initialize the covariance matrix estimate + % further modifications will need to be made when doing 2(d) + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + % Main EM loop + for n=1:epochs + %%%%%%%%%%%%%%%% + % E-step + %%%%%%%%%%%%%%%% + fprintf('E-step, epoch #%d\n', n); + [Q, h] = E_step(x, Q, h, pi, m, S, k); - % variables - num_clusters = k; % number of clusters - eps = 1e-15; % small value that can be used to avoid obtaining 0's - lambda = 1e-3; % value for improved version of EM - [num_data, dim] = size(x); - h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster - S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster - b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S - Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Initialise cluster means using k-means - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Determine the b values for all data points - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Initialize pi's (mixing coefficients) + % TODO: Store the value of the complete log-likelihood function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Initialize the covariance matrix estimate - % further modifications will need to be made when doing 2(d) - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%% + % M-step + %%%%%%%%%%%%%%%% + fprintf('M-step, epoch #%d\n', n); + [Q, S, m] = M_step(x, Q, h, S, k); - % Main EM loop - for n=1:epochs - %%%%%%%%%%%%%%%% - % E-step - %%%%%%%%%%%%%%%% - fprintf('E-step, epoch #%d\n', n); - [Q, h] = E_step(x, Q, h, pi, m, S, k); - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Store the value of the complete log-likelihood function - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - %%%%%%%%%%%%%%%% - % M-step - %%%%%%%%%%%%%%%% - fprintf('M-step, epoch #%d\n', n); - [Q, S, m] = M_step(x, Q, h, S, k); - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Store the value of the complete log-likelihood function - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: Store the value of the complete log-likelihood function + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + end end \ No newline at end of file diff --git a/assignments/hwk03/E_step.m b/assignments/hwk03/E_step.m index 460270f..2d7c0a6 100644 --- a/assignments/hwk03/E_step.m +++ b/assignments/hwk03/E_step.m @@ -12,10 +12,11 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [Q, h] = E_step(x, Q, h, pi, m, S, k) - [num_data, ~] = size(x); - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: perform E-step of EM algorithm - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + [num_data, ~] = size(x); + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: perform E-step of EM algorithm + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + z = 1 + 1 end \ No newline at end of file diff --git a/assignments/hwk03/M_step.m b/assignments/hwk03/M_step.m index 4505b1a..381db95 100644 --- a/assignments/hwk03/M_step.m +++ b/assignments/hwk03/M_step.m @@ -11,24 +11,24 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [Q, S, m] = M_step(x, Q, h, S, k) - % get size of data - [num_data, dim] = size(x); - eps = 1e-15; - lambda = 1e-3; % value for improved version of EM + % get size of data + [num_data, dim] = size(x); + eps = 1e-15; + lambda = 1e-3; % value for improved version of EM - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: update mixing coefficients - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: update mixing coefficients + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: update cluster means - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: update cluster means + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Calculate the covariance matrix estimate - % further modifications will need to be made when doing 2(d) - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: Calculate the covariance matrix estimate + % further modifications will need to be made when doing 2(d) + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% end \ No newline at end of file diff --git a/assignments/hwk03/Problem2.m b/assignments/hwk03/Problem2.m index 447304f..72d674a 100644 --- a/assignments/hwk03/Problem2.m +++ b/assignments/hwk03/Problem2.m @@ -1,87 +1,87 @@ function [] = Problem2() - % file names - stadium_fn = "stadium.jpg"; - goldy_fn = "goldy.jpg"; + % file names + stadium_fn = "stadium.jpg"; + goldy_fn = "goldy.jpg"; - % load image and preprocess it - goldy_img = double(imread(goldy_fn))/255; - stadium_img = double(imread(stadium_fn))/255; - - % convert RGB images - goldy_x = reshape(permute(goldy_img, [2 1 3]), [], 3); % convert img from NxMx3 to N*Mx3 - stadium_x = reshape(permute(stadium_img, [2 1 3]), [], 3); + % load image and preprocess it + goldy_img = double(imread(goldy_fn))/255; + stadium_img = double(imread(stadium_fn))/255; + + % convert RGB images + goldy_x = reshape(permute(goldy_img, [2 1 3]), [], 3); % convert img from NxMx3 to N*Mx3 + stadium_x = reshape(permute(stadium_img, [2 1 3]), [], 3); - % get dimensionality of stadium image - [height, width, depth] = size(stadium_img); + % get dimensionality of stadium image + [height, width, depth] = size(stadium_img); - % set epochs (number of iterations to run algorithm for) - epochs = 10; + % set epochs (number of iterations to run algorithm for) + epochs = 10; - %%%%%%%%%% - % 2(a,b) % - %%%%%%%%%% - index = 1; - figure(); - for k = 4:4:12 - fprintf("k=%d\n", k); - - % call EM on data - [h, m, Q] = EMG(stadium_x, k, epochs, false); - - % get compressed version of image - [~,class_index] = max(h,[],2); - compress = m(class_index,:); - - % 2(a), plot compressed image - subplot(3,2,index) - imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) - index = index + 1; - - % 2(b), plot complete data likelihood curves - subplot(3,2,index) - x = 1:size(Q); - c = repmat([1 0 0; 0 1 0], length(x)/2, 1); - scatter(x,Q,20,c); - index = index + 1; - end - shg + %%%%%%%%%% + % 2(a,b) % + %%%%%%%%%% + index = 1; + figure(); + for k = 4:4:12 + fprintf("k=%d\n", k); - %%%%%%%% - % 2(c) % - %%%%%%%% - % get dimensionality of goldy image, and set k=7 - [height, width, depth] = size(goldy_img); - k = 7; + % call EM on data + [h, m, Q] = EMG(stadium_x, k, epochs, false); - % run EM on goldy image - [h, m, Q] = EMG(goldy_x, k, epochs, false); - - % plot goldy image using clusters from EM + % get compressed version of image [~,class_index] = max(h,[],2); compress = m(class_index,:); - figure(); - subplot(2,1,1) + + % 2(a), plot compressed image + subplot(3,2,index) imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) + index = index + 1; - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: plot goldy image after using clusters from k-means - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % begin code here + % 2(b), plot complete data likelihood curves + subplot(3,2,index) + x = 1:size(Q); + c = repmat([1 0 0; 0 1 0], length(x)/2, 1); + scatter(x,Q,20,c); + index = index + 1; + end + shg - % end code here - shg - - %%%%%%%% - % 2(e) % - %%%%%%%% - % run improved version of EM on goldy image - [h, m, Q] = EMG(goldy_x, k, epochs, true); + %%%%%%%% + % 2(c) % + %%%%%%%% + % get dimensionality of goldy image, and set k=7 + [height, width, depth] = size(goldy_img); + k = 7; - % plot goldy image using clusters from improved EM - [~,class_index] = max(h,[],2); - compress = m(class_index,:); - figure(); - imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) - shg + % run EM on goldy image + [h, m, Q] = EMG(goldy_x, k, epochs, false); + + % plot goldy image using clusters from EM + [~,class_index] = max(h,[],2); + compress = m(class_index,:); + figure(); + subplot(2,1,1) + imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % TODO: plot goldy image after using clusters from k-means + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % begin code here + + % end code here + shg + + %%%%%%%% + % 2(e) % + %%%%%%%% + % run improved version of EM on goldy image + [h, m, Q] = EMG(goldy_x, k, epochs, true); + + % plot goldy image using clusters from improved EM + [~,class_index] = max(h,[],2); + compress = m(class_index,:); + figure(); + imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) + shg end \ No newline at end of file diff --git a/assignments/hwk03/hw3_sol.typ b/assignments/hwk03/hw3_sol.typ index c944f40..04507d4 100644 --- a/assignments/hwk03/hw3_sol.typ +++ b/assignments/hwk03/hw3_sol.typ @@ -47,7 +47,7 @@ Updates: = Problem 1b - $E(ww,vv|XX) = - sum_t r^t log y^t + (1 - r^t) log (1 - y^t)$ -- $y^t = "sigmoid"(v_2 z_2 + v_1 z_1 + v_0)$ +- $y^t = "sigmoid"(v_2 z^t_2 + v_1 z^t_1 + v_0)$ - $z^t_1 = "ReLU"(w_2 x^t_2 + w_1 x^t_1 + w_0)$ - $z^t_2 = tanh(w_2 x^t_2 + w_1 x^t_1 + w_0)$ @@ -62,4 +62,8 @@ Updates: &= - sum_t frac(diff E, diff y^t) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \ &= - sum_t (frac(r^t, y^t) - frac(1-r^t, 1-y^t)) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \ &= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \ + &= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (y^t (1-y^t) v_1 frac(diff z^t_1, diff w_j) + y^t (1-y^t) v_2 frac(diff z^t_2, diff w_j)) \ + &= - sum_t (r^t-y^t) (v_1 frac(diff z^t_1, diff w_j) + v_2 frac(diff z^t_2, diff w_j)) \ + &= - sum_t (r^t-y^t) (x^t_j v_1 cases(0 "if" ww dot xx < 0, 1 "otherwise") + x^t_j v_2 (1 - tanh^2 (ww dot xx))) \ + &= - sum_t (r^t-y^t) x^t_j (v_1 cases(0 "if" ww dot xx < 0, 1 "otherwise") + v_2 (1 - tanh^2 (ww dot xx))) \ $ \ No newline at end of file