This commit is contained in:
Michael Zhang 2023-11-12 11:42:19 -06:00
parent 9eee507d73
commit 9da52cfbfa
5 changed files with 143 additions and 136 deletions

View file

@ -11,54 +11,56 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [h, m, Q] = EMG(x, k, epochs, flag) function [h, m, Q] = EMG(x, k, epochs, flag)
% variables % variables
num_clusters = k; % number of clusters num_clusters = k; % number of clusters
eps = 1e-15; % small value that can be used to avoid obtaining 0's eps = 1e-15; % small value that can be used to avoid obtaining 0's
lambda = 1e-3; % value for improved version of EM lambda = 1e-3; % value for improved version of EM
[num_data, dim] = size(x); [num_data, dim] = size(x);
h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster
S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster
b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S
Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialise cluster means using k-means % TODO: Initialise cluster means using k-means
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
means = kmeans(x, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Determine the b values for all data points
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialize pi's (mixing coefficients)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialize the covariance matrix estimate
% further modifications will need to be made when doing 2(d)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Main EM loop
for n=1:epochs
%%%%%%%%%%%%%%%%
% E-step
%%%%%%%%%%%%%%%%
fprintf('E-step, epoch #%d\n', n);
[Q, h] = E_step(x, Q, h, pi, m, S, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Determine the b values for all data points % TODO: Store the value of the complete log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%
% M-step
%%%%%%%%%%%%%%%%
fprintf('M-step, epoch #%d\n', n);
[Q, S, m] = M_step(x, Q, h, S, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialize pi's (mixing coefficients) % TODO: Store the value of the complete log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialize the covariance matrix estimate
% further modifications will need to be made when doing 2(d)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Main EM loop
for n=1:epochs
%%%%%%%%%%%%%%%%
% E-step
%%%%%%%%%%%%%%%%
fprintf('E-step, epoch #%d\n', n);
[Q, h] = E_step(x, Q, h, pi, m, S, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Store the value of the complete log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%
% M-step
%%%%%%%%%%%%%%%%
fprintf('M-step, epoch #%d\n', n);
[Q, S, m] = M_step(x, Q, h, S, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Store the value of the complete log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end end

View file

@ -12,10 +12,11 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Q, h] = E_step(x, Q, h, pi, m, S, k) function [Q, h] = E_step(x, Q, h, pi, m, S, k)
[num_data, ~] = size(x); [num_data, ~] = size(x);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: perform E-step of EM algorithm % TODO: perform E-step of EM algorithm
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
z = 1 + 1
end end

View file

@ -11,24 +11,24 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Q, S, m] = M_step(x, Q, h, S, k) function [Q, S, m] = M_step(x, Q, h, S, k)
% get size of data % get size of data
[num_data, dim] = size(x); [num_data, dim] = size(x);
eps = 1e-15; eps = 1e-15;
lambda = 1e-3; % value for improved version of EM lambda = 1e-3; % value for improved version of EM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: update mixing coefficients % TODO: update mixing coefficients
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: update cluster means % TODO: update cluster means
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Calculate the covariance matrix estimate % TODO: Calculate the covariance matrix estimate
% further modifications will need to be made when doing 2(d) % further modifications will need to be made when doing 2(d)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end end

View file

@ -1,87 +1,87 @@
function [] = Problem2() function [] = Problem2()
% file names % file names
stadium_fn = "stadium.jpg"; stadium_fn = "stadium.jpg";
goldy_fn = "goldy.jpg"; goldy_fn = "goldy.jpg";
% load image and preprocess it % load image and preprocess it
goldy_img = double(imread(goldy_fn))/255; goldy_img = double(imread(goldy_fn))/255;
stadium_img = double(imread(stadium_fn))/255; stadium_img = double(imread(stadium_fn))/255;
% convert RGB images % convert RGB images
goldy_x = reshape(permute(goldy_img, [2 1 3]), [], 3); % convert img from NxMx3 to N*Mx3 goldy_x = reshape(permute(goldy_img, [2 1 3]), [], 3); % convert img from NxMx3 to N*Mx3
stadium_x = reshape(permute(stadium_img, [2 1 3]), [], 3); stadium_x = reshape(permute(stadium_img, [2 1 3]), [], 3);
% get dimensionality of stadium image % get dimensionality of stadium image
[height, width, depth] = size(stadium_img); [height, width, depth] = size(stadium_img);
% set epochs (number of iterations to run algorithm for) % set epochs (number of iterations to run algorithm for)
epochs = 10; epochs = 10;
%%%%%%%%%% %%%%%%%%%%
% 2(a,b) % % 2(a,b) %
%%%%%%%%%% %%%%%%%%%%
index = 1; index = 1;
figure(); figure();
for k = 4:4:12 for k = 4:4:12
fprintf("k=%d\n", k); fprintf("k=%d\n", k);
% call EM on data % call EM on data
[h, m, Q] = EMG(stadium_x, k, epochs, false); [h, m, Q] = EMG(stadium_x, k, epochs, false);
% get compressed version of image % get compressed version of image
[~,class_index] = max(h,[],2);
compress = m(class_index,:);
% 2(a), plot compressed image
subplot(3,2,index)
imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
index = index + 1;
% 2(b), plot complete data likelihood curves
subplot(3,2,index)
x = 1:size(Q);
c = repmat([1 0 0; 0 1 0], length(x)/2, 1);
scatter(x,Q,20,c);
index = index + 1;
end
shg
%%%%%%%%
% 2(c) %
%%%%%%%%
% get dimensionality of goldy image, and set k=7
[height, width, depth] = size(goldy_img);
k = 7;
% run EM on goldy image
[h, m, Q] = EMG(goldy_x, k, epochs, false);
% plot goldy image using clusters from EM
[~,class_index] = max(h,[],2); [~,class_index] = max(h,[],2);
compress = m(class_index,:); compress = m(class_index,:);
figure();
subplot(2,1,1) % 2(a), plot compressed image
subplot(3,2,index)
imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
index = index + 1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 2(b), plot complete data likelihood curves
% TODO: plot goldy image after using clusters from k-means subplot(3,2,index)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% x = 1:size(Q);
% begin code here c = repmat([1 0 0; 0 1 0], length(x)/2, 1);
scatter(x,Q,20,c);
index = index + 1;
end
shg
% end code here %%%%%%%%
shg % 2(c) %
%%%%%%%%
% get dimensionality of goldy image, and set k=7
[height, width, depth] = size(goldy_img);
k = 7;
%%%%%%%% % run EM on goldy image
% 2(e) % [h, m, Q] = EMG(goldy_x, k, epochs, false);
%%%%%%%%
% run improved version of EM on goldy image
[h, m, Q] = EMG(goldy_x, k, epochs, true);
% plot goldy image using clusters from improved EM % plot goldy image using clusters from EM
[~,class_index] = max(h,[],2); [~,class_index] = max(h,[],2);
compress = m(class_index,:); compress = m(class_index,:);
figure(); figure();
imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3])) subplot(2,1,1)
shg imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: plot goldy image after using clusters from k-means
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% begin code here
% end code here
shg
%%%%%%%%
% 2(e) %
%%%%%%%%
% run improved version of EM on goldy image
[h, m, Q] = EMG(goldy_x, k, epochs, true);
% plot goldy image using clusters from improved EM
[~,class_index] = max(h,[],2);
compress = m(class_index,:);
figure();
imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
shg
end end

View file

@ -47,7 +47,7 @@ Updates:
= Problem 1b = Problem 1b
- $E(ww,vv|XX) = - sum_t r^t log y^t + (1 - r^t) log (1 - y^t)$ - $E(ww,vv|XX) = - sum_t r^t log y^t + (1 - r^t) log (1 - y^t)$
- $y^t = "sigmoid"(v_2 z_2 + v_1 z_1 + v_0)$ - $y^t = "sigmoid"(v_2 z^t_2 + v_1 z^t_1 + v_0)$
- $z^t_1 = "ReLU"(w_2 x^t_2 + w_1 x^t_1 + w_0)$ - $z^t_1 = "ReLU"(w_2 x^t_2 + w_1 x^t_1 + w_0)$
- $z^t_2 = tanh(w_2 x^t_2 + w_1 x^t_1 + w_0)$ - $z^t_2 = tanh(w_2 x^t_2 + w_1 x^t_1 + w_0)$
@ -62,4 +62,8 @@ Updates:
&= - sum_t frac(diff E, diff y^t) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \ &= - sum_t frac(diff E, diff y^t) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \
&= - sum_t (frac(r^t, y^t) - frac(1-r^t, 1-y^t)) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \ &= - sum_t (frac(r^t, y^t) - frac(1-r^t, 1-y^t)) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \
&= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \ &= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j)) \
&= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (y^t (1-y^t) v_1 frac(diff z^t_1, diff w_j) + y^t (1-y^t) v_2 frac(diff z^t_2, diff w_j)) \
&= - sum_t (r^t-y^t) (v_1 frac(diff z^t_1, diff w_j) + v_2 frac(diff z^t_2, diff w_j)) \
&= - sum_t (r^t-y^t) (x^t_j v_1 cases(0 "if" ww dot xx < 0, 1 "otherwise") + x^t_j v_2 (1 - tanh^2 (ww dot xx))) \
&= - sum_t (r^t-y^t) x^t_j (v_1 cases(0 "if" ww dot xx < 0, 1 "otherwise") + v_2 (1 - tanh^2 (ww dot xx))) \
$ $