1b

2023-11-12 11:42:19 -06:00 · 2023-11-12 11:42:19 -06:00 · 9da52cfbfa
commit 9da52cfbfa
parent 9eee507d73
5 changed files with 143 additions and 136 deletions
--- a/assignments/hwk03/EMG.m
+++ b/assignments/hwk03/EMG.m
@ -10,55 +10,57 @@
 %         Q - vector of values of the complete data log-likelihood function
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 function [h, m, Q] = EMG(x, k, epochs, flag)
+
+  % variables
+  num_clusters = k; % number of clusters
+  eps = 1e-15; % small value that can be used to avoid obtaining 0's
+  lambda = 1e-3; % value for improved version of EM
+  [num_data, dim] = size(x);
+  h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster
+  S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster
+  b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S 
+  Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step
+
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: Initialise cluster means using k-means
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  means = kmeans(x, k);
+ 
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: Determine the b values for all data points
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ 
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: Initialize pi's (mixing coefficients)
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: Initialize the covariance matrix estimate
+  %       further modifications will need to be made when doing 2(d)
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  
+  % Main EM loop
+  for n=1:epochs
+    %%%%%%%%%%%%%%%% 
+    % E-step
+    %%%%%%%%%%%%%%%%
+    fprintf('E-step, epoch #%d\n', n);
+    [Q, h] =  E_step(x, Q, h, pi, m, S, k);
    
-    % variables
-    num_clusters = k; % number of clusters
-    eps = 1e-15; % small value that can be used to avoid obtaining 0's
-    lambda = 1e-3; % value for improved version of EM
-    [num_data, dim] = size(x);
-    h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster
-    S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster
-    b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S 
-    Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step
-    
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: Initialise cluster means using k-means
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-   
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: Determine the b values for all data points
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-   
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: Initialize pi's (mixing coefficients)
+    % TODO: Store the value of the complete log-likelihood function
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: Initialize the covariance matrix estimate
-    %       further modifications will need to be made when doing 2(d)
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    %%%%%%%%%%%%%%%%
+    % M-step
+    %%%%%%%%%%%%%%%%
+    fprintf('M-step, epoch #%d\n', n);
+    [Q, S, m] = M_step(x, Q, h, S, k);              
    
-    % Main EM loop
-    for n=1:epochs
-        %%%%%%%%%%%%%%%% 
-        % E-step
-        %%%%%%%%%%%%%%%%
-        fprintf('E-step, epoch #%d\n', n);
-        [Q, h] =  E_step(x, Q, h, pi, m, S, k);
-        
-        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-        % TODO: Store the value of the complete log-likelihood function
-        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-        %%%%%%%%%%%%%%%%
-        % M-step
-        %%%%%%%%%%%%%%%%
-        fprintf('M-step, epoch #%d\n', n);
-        [Q, S, m] = M_step(x, Q, h, S, k);              
-        
-        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-        % TODO: Store the value of the complete log-likelihood function
-        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % TODO: Store the value of the complete log-likelihood function
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  end


 end
--- a/assignments/hwk03/E_step.m
+++ b/assignments/hwk03/E_step.m
@ -12,10 +12,11 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 function [Q, h] = E_step(x, Q, h, pi, m, S, k)

-    [num_data, ~] = size(x);
-    
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: perform E-step of EM algorithm
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  [num_data, ~] = size(x);
+
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: perform E-step of EM algorithm
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  z = 1 + 1

 end
--- a/assignments/hwk03/M_step.m
+++ b/assignments/hwk03/M_step.m
@ -11,24 +11,24 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 function [Q, S, m] = M_step(x, Q, h, S, k)
     
-    % get size of data
-    [num_data, dim] = size(x);
-    eps = 1e-15;
-    lambda = 1e-3; % value for improved version of EM 
+  % get size of data
+  [num_data, dim] = size(x);
+  eps = 1e-15;
+  lambda = 1e-3; % value for improved version of EM 

-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: update mixing coefficients
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: update mixing coefficients
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: update cluster means
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: update cluster means
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: Calculate the covariance matrix estimate 
-    %       further modifications will need to be made when doing 2(d)
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: Calculate the covariance matrix estimate 
+  %       further modifications will need to be made when doing 2(d)
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    
 end
--- a/assignments/hwk03/Problem2.m
+++ b/assignments/hwk03/Problem2.m
@ -1,87 +1,87 @@
 function [] = Problem2()

-    % file names
-    stadium_fn = "stadium.jpg";
-    goldy_fn = "goldy.jpg";
+  % file names
+  stadium_fn = "stadium.jpg";
+  goldy_fn = "goldy.jpg";

-    % load image and preprocess it
-    goldy_img = double(imread(goldy_fn))/255;
-    stadium_img = double(imread(stadium_fn))/255;
-    
-    % convert RGB images
-    goldy_x = reshape(permute(goldy_img, [2 1 3]), [], 3); % convert img from NxMx3 to N*Mx3
-    stadium_x = reshape(permute(stadium_img, [2 1 3]), [], 3);
+  % load image and preprocess it
+  goldy_img = double(imread(goldy_fn))/255;
+  stadium_img = double(imread(stadium_fn))/255;
+  
+  % convert RGB images
+  goldy_x = reshape(permute(goldy_img, [2 1 3]), [], 3); % convert img from NxMx3 to N*Mx3
+  stadium_x = reshape(permute(stadium_img, [2 1 3]), [], 3);

-    % get dimensionality of stadium image
-    [height, width, depth] = size(stadium_img);
+  % get dimensionality of stadium image
+  [height, width, depth] = size(stadium_img);

-    % set epochs (number of iterations to run algorithm for)
-    epochs = 10;
+  % set epochs (number of iterations to run algorithm for)
+  epochs = 10;

-    %%%%%%%%%%
-    % 2(a,b) %
-    %%%%%%%%%%
-    index = 1;
-    figure();
-    for k = 4:4:12 
-         fprintf("k=%d\n", k);
-    
-          % call EM on data
-         [h, m, Q] = EMG(stadium_x, k, epochs, false);
-     
-         % get compressed version of image
-         [~,class_index] = max(h,[],2);
-         compress = m(class_index,:);
-     
-         % 2(a), plot compressed image
-         subplot(3,2,index)
-         imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
-         index = index + 1;
-     
-         % 2(b), plot complete data likelihood curves
-         subplot(3,2,index)
-         x = 1:size(Q);
-         c = repmat([1 0 0; 0 1 0], length(x)/2, 1);
-         scatter(x,Q,20,c); 
-         index = index + 1;
-     end
-     shg
+  %%%%%%%%%%
+  % 2(a,b) %
+  %%%%%%%%%%
+  index = 1;
+  figure();
+  for k = 4:4:12 
+    fprintf("k=%d\n", k);

-    %%%%%%%%
-    % 2(c) %
-    %%%%%%%%
-    % get dimensionality of goldy image, and set k=7
-    [height, width, depth] = size(goldy_img);
-    k = 7;
+    % call EM on data
+    [h, m, Q] = EMG(stadium_x, k, epochs, false);

-    % run EM on goldy image
-    [h, m, Q] = EMG(goldy_x, k, epochs, false);
-
-    % plot goldy image using clusters from EM
+    % get compressed version of image
    [~,class_index] = max(h,[],2);
    compress = m(class_index,:);
-    figure();
-    subplot(2,1,1)
+
+    % 2(a), plot compressed image
+    subplot(3,2,index)
    imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
+    index = index + 1;

-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % TODO: plot goldy image after using clusters from k-means
-    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    % begin code here
+    % 2(b), plot complete data likelihood curves
+    subplot(3,2,index)
+    x = 1:size(Q);
+    c = repmat([1 0 0; 0 1 0], length(x)/2, 1);
+    scatter(x,Q,20,c); 
+    index = index + 1;
+  end
+  shg

-    % end code here 
-    shg 
-    
-    %%%%%%%%
-    % 2(e) %
-    %%%%%%%%
-    % run improved version of EM on goldy image 
-    [h, m, Q] = EMG(goldy_x, k, epochs, true);
+  %%%%%%%%
+  % 2(c) %
+  %%%%%%%%
+  % get dimensionality of goldy image, and set k=7
+  [height, width, depth] = size(goldy_img);
+  k = 7;

-    % plot goldy image using clusters from improved EM
-    [~,class_index] = max(h,[],2);
-    compress = m(class_index,:);
-    figure();
-    imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
-    shg
+  % run EM on goldy image
+  [h, m, Q] = EMG(goldy_x, k, epochs, false);
+
+  % plot goldy image using clusters from EM
+  [~,class_index] = max(h,[],2);
+  compress = m(class_index,:);
+  figure();
+  subplot(2,1,1)
+  imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
+
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % TODO: plot goldy image after using clusters from k-means
+  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  % begin code here
+
+  % end code here 
+  shg 
+  
+  %%%%%%%%
+  % 2(e) %
+  %%%%%%%%
+  % run improved version of EM on goldy image 
+  [h, m, Q] = EMG(goldy_x, k, epochs, true);
+
+  % plot goldy image using clusters from improved EM
+  [~,class_index] = max(h,[],2);
+  compress = m(class_index,:);
+  figure();
+  imagesc(permute(reshape(compress, [width, height, depth]),[2 1 3]))
+  shg
 end
--- a/assignments/hwk03/hw3_sol.typ
+++ b/assignments/hwk03/hw3_sol.typ
@ -47,7 +47,7 @@ Updates:
 = Problem 1b

 - $E(ww,vv|XX) = - sum_t r^t log y^t + (1 - r^t) log (1 - y^t)$
- $y^t = "sigmoid"(v_2 z_2 + v_1 z_1 + v_0)$
+- $y^t = "sigmoid"(v_2 z^t_2 + v_1 z^t_1 + v_0)$
 - $z^t_1 = "ReLU"(w_2 x^t_2 + w_1 x^t_1 + w_0)$
 - $z^t_2 = tanh(w_2 x^t_2 + w_1 x^t_1 + w_0)$

@ -62,4 +62,8 @@ Updates:
    &= - sum_t frac(diff E, diff y^t) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j))  \
    &= - sum_t (frac(r^t, y^t) - frac(1-r^t, 1-y^t)) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j))  \
    &= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (frac(diff y^t, diff z^t_1) frac(diff z^t_1, diff w_j) + frac(diff y^t, diff z^t_2) frac(diff z^t_2, diff w_j))  \
+    &= - sum_t (frac(r^t-y^t, y^t (1-y^t))) (y^t (1-y^t) v_1 frac(diff z^t_1, diff w_j) + y^t (1-y^t) v_2 frac(diff z^t_2, diff w_j))  \
+    &= - sum_t (r^t-y^t) (v_1 frac(diff z^t_1, diff w_j) + v_2 frac(diff z^t_2, diff w_j))  \
+    &= - sum_t (r^t-y^t) (x^t_j v_1 cases(0 "if" ww dot xx < 0, 1 "otherwise") + x^t_j v_2 (1 - tanh^2 (ww dot xx)))  \
+    &= - sum_t (r^t-y^t) x^t_j (v_1 cases(0 "if" ww dot xx < 0, 1 "otherwise") + v_2 (1 - tanh^2 (ww dot xx)))  \
  $