From fa84edb84c6b25ab03a45cfe9faa186d6145cf85 Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Wed, 15 Nov 2023 09:53:18 -0600 Subject: [PATCH] upd --- assignments/hwk03/EMG.m | 30 ++++++++++++++++++++++++++---- assignments/hwk03/E_step.m | 18 +++++++++++++----- assignments/hwk03/M_step.m | 21 ++++++++++++--------- assignments/hwk03/Problem2.m | 1 + 4 files changed, 52 insertions(+), 18 deletions(-) diff --git a/assignments/hwk03/EMG.m b/assignments/hwk03/EMG.m index bc91dae..c4fe80c 100644 --- a/assignments/hwk03/EMG.m +++ b/assignments/hwk03/EMG.m @@ -24,20 +24,35 @@ function [h, m, Q] = EMG(x, k, epochs, flag) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Initialise cluster means using k-means %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - means = kmeans(x, k); + [~, ~, ~, D] = kmeans(x, k); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Determine the b values for all data points %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - + for i = 1:num_data + row = D(i,:); + minIdx = row == min(row); + b(i,minIdx) = 1; + end + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Initialize pi's (mixing coefficients) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + pi = zeros(k, 1); + for i = 1:k + pi(i) = sum(b(:, i)); + end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Initialize the covariance matrix estimate % further modifications will need to be made when doing 2(d) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + m = zeros(k, dim); + for i = 1:k + data = x(b(:, i) == 1, :); + m(i, :) = mean(data); + S(:, :, i) = cov(data); + end % Main EM loop for n=1:epochs @@ -45,17 +60,24 @@ function [h, m, Q] = EMG(x, k, epochs, flag) % E-step %%%%%%%%%%%%%%%% fprintf('E-step, epoch #%d\n', n); - [Q, h] = E_step(x, Q, h, pi, m, S, k); + [h] = E_step(x, h, pi, m, S, k); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Store the value of the complete log-likelihood function %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + L = 0; + for i = 1:num_data + for j = 1:k + prior = mvnpdf(x, m(j, :), S(:, :, j)); + L = L + h(i, j) * (log(pi(i)) + log(prior(i))); + end + end %%%%%%%%%%%%%%%% % M-step %%%%%%%%%%%%%%%% fprintf('M-step, epoch #%d\n', n); - [Q, S, m] = M_step(x, Q, h, S, k); + [Q, S, m] = M_step(x, Q, h, S, k); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: Store the value of the complete log-likelihood function diff --git a/assignments/hwk03/E_step.m b/assignments/hwk03/E_step.m index 2d7c0a6..a7a6198 100644 --- a/assignments/hwk03/E_step.m +++ b/assignments/hwk03/E_step.m @@ -3,20 +3,28 @@ % Input: x - a nxd matrix (nx3 if using RGB) % Q - vector of values from the complete data log-likelihood function % h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params -% pi - vector of mixing coefficients +% pi - vector of mixing coefficients % m - cluster means % S - cluster covariance matrices % k - the number of clusters -% Output: Q - vector of values of the complete data log-likelihood function -% h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params +% Output: h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function [Q, h] = E_step(x, Q, h, pi, m, S, k) +function [h] = E_step(x, h, pi, m, S, k) [num_data, ~] = size(x); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: perform E-step of EM algorithm %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - z = 1 + 1 + parts = zeros(num_data, k); + + for j = 1:k + parts(:, j) = pi(j) * mvnpdf(x, m(j, :), S(:, :, j)); + end + + s = sum(parts); + for i = 1:num_data + h(i, :) = parts(i, :) ./ s; + end end \ No newline at end of file diff --git a/assignments/hwk03/M_step.m b/assignments/hwk03/M_step.m index 381db95..97105e8 100644 --- a/assignments/hwk03/M_step.m +++ b/assignments/hwk03/M_step.m @@ -1,34 +1,37 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Name: E_step.m +% Name: M_step.m % Input: x - a nxd matrix (nx3 if using RGB) % Q - vector of values from the complete data log-likelihood function % h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params % S - cluster covariance matrices % k - the number of clusters -% Output: Q - vector of values of the complete data log-likelihood function -% S - cluster covariance matrices +% flag - flag to use improved EM to avoid singular covariance matrix +% Output: S - cluster covariance matrices % m - cluster means +% pi - mixing coefficients %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -function [Q, S, m] = M_step(x, Q, h, S, k) - +function [S, m, pi] = M_step(x, h, S, k, flag) + % get size of data [num_data, dim] = size(x); eps = 1e-15; - lambda = 1e-3; % value for improved version of EM + lambda = 1e-3; % value for improved version of EM %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: update mixing coefficients %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % TODO: update cluster means %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - % TODO: Calculate the covariance matrix estimate + % TODO: Calculate the covariance matrix estimate % further modifications will need to be made when doing 2(d) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - + end \ No newline at end of file diff --git a/assignments/hwk03/Problem2.m b/assignments/hwk03/Problem2.m index 72d674a..86f1842 100644 --- a/assignments/hwk03/Problem2.m +++ b/assignments/hwk03/Problem2.m @@ -1,4 +1,5 @@ function [] = Problem2() + rng(1, "twister"); % file names stadium_fn = "stadium.jpg";