upd

2023-11-15 09:53:18 -06:00 · 2023-11-15 09:53:18 -06:00 · fa84edb84c
commit fa84edb84c
parent 9da52cfbfa
4 changed files with 52 additions and 18 deletions
--- a/assignments/hwk03/EMG.m
+++ b/assignments/hwk03/EMG.m
@ -24,20 +24,35 @@ function [h, m, Q] = EMG(x, k, epochs, flag)
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: Initialise cluster means using k-means
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-  means = kmeans(x, k);
+  [~, ~, ~, D] = kmeans(x, k);
 
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: Determine the b values for all data points
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- 
+  for i = 1:num_data
+    row = D(i,:);
+    minIdx = row == min(row);
+    b(i,minIdx) = 1;
+  end
+  
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: Initialize pi's (mixing coefficients)
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  pi = zeros(k, 1);
+  for i = 1:k
+    pi(i) = sum(b(:, i));
+  end

  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: Initialize the covariance matrix estimate
  %       further modifications will need to be made when doing 2(d)
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  m = zeros(k, dim);
+  for i = 1:k
+    data = x(b(:, i) == 1, :);
+    m(i, :) = mean(data);
+    S(:, :, i) = cov(data);
+  end
  
  % Main EM loop
  for n=1:epochs
@ -45,17 +60,24 @@ function [h, m, Q] = EMG(x, k, epochs, flag)
    % E-step
    %%%%%%%%%%%%%%%%
    fprintf('E-step, epoch #%d\n', n);
-    [Q, h] =  E_step(x, Q, h, pi, m, S, k);
+    [h] = E_step(x, h, pi, m, S, k);
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % TODO: Store the value of the complete log-likelihood function
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    L = 0;
+    for i = 1:num_data
+      for j = 1:k
+        prior = mvnpdf(x, m(j, :), S(:, :, j));
+        L = L + h(i, j) * (log(pi(i)) + log(prior(i)));
+      end
+    end

    %%%%%%%%%%%%%%%%
    % M-step
    %%%%%%%%%%%%%%%%
    fprintf('M-step, epoch #%d\n', n);
-    [Q, S, m] = M_step(x, Q, h, S, k);              
+    [Q, S, m] = M_step(x, Q, h, S, k);
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % TODO: Store the value of the complete log-likelihood function
--- a/assignments/hwk03/E_step.m
+++ b/assignments/hwk03/E_step.m
@ -3,20 +3,28 @@
 % Input: x - a nxd matrix (nx3 if using RGB)
 %        Q - vector of values from the complete data log-likelihood function
 %        h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
-%        pi - vector of mixing coefficients 
+%        pi - vector of mixing coefficients
 %        m - cluster means
 %        S - cluster covariance matrices
 %        k - the number of clusters
-% Output: Q - vector of values of the complete data log-likelihood function
-%         h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
+% Output: h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-function [Q, h] = E_step(x, Q, h, pi, m, S, k)
+function [h] = E_step(x, h, pi, m, S, k)

  [num_data, ~] = size(x);

  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: perform E-step of EM algorithm
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-  z = 1 + 1
+  parts = zeros(num_data, k);
+
+  for j = 1:k
+    parts(:, j) = pi(j) * mvnpdf(x, m(j, :), S(:, :, j));
+  end
+
+  s = sum(parts);
+  for i = 1:num_data
+    h(i, :) = parts(i, :) ./ s;
+  end

 end
--- a/assignments/hwk03/M_step.m
+++ b/assignments/hwk03/M_step.m
@ -1,34 +1,37 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Name: E_step.m
+% Name: M_step.m
 % Input: x - a nxd matrix (nx3 if using RGB)
 %        Q - vector of values from the complete data log-likelihood function
 %        h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
 %        S - cluster covariance matrices
 %        k - the number of clusters
-% Output: Q - vector of values of the complete data log-likelihood function
-%         S - cluster covariance matrices
+%        flag - flag to use improved EM to avoid singular covariance matrix
+% Output: S - cluster covariance matrices
 %         m - cluster means
+%         pi - mixing coefficients
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-function [Q, S, m] = M_step(x, Q, h, S, k)
-     
+function [S, m, pi] = M_step(x, h, S, k, flag)
+
  % get size of data
  [num_data, dim] = size(x);
  eps = 1e-15;
-  lambda = 1e-3; % value for improved version of EM 
+  lambda = 1e-3; % value for improved version of EM

  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: update mixing coefficients
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+  
+  

  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  % TODO: update cluster means
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-    
+
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-  % TODO: Calculate the covariance matrix estimate 
+  % TODO: Calculate the covariance matrix estimate
  %       further modifications will need to be made when doing 2(d)
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-    
+
 end
--- a/assignments/hwk03/Problem2.m
+++ b/assignments/hwk03/Problem2.m
@ -1,4 +1,5 @@
 function [] = Problem2()
+  rng(1, "twister");

  % file names
  stadium_fn = "stadium.jpg";