This commit is contained in:
Michael Zhang 2023-11-15 09:53:18 -06:00
parent 9da52cfbfa
commit fa84edb84c
4 changed files with 52 additions and 18 deletions

View file

@ -24,20 +24,35 @@ function [h, m, Q] = EMG(x, k, epochs, flag)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialise cluster means using k-means % TODO: Initialise cluster means using k-means
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
means = kmeans(x, k); [~, ~, ~, D] = kmeans(x, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Determine the b values for all data points % TODO: Determine the b values for all data points
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for i = 1:num_data
row = D(i,:);
minIdx = row == min(row);
b(i,minIdx) = 1;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialize pi's (mixing coefficients) % TODO: Initialize pi's (mixing coefficients)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
pi = zeros(k, 1);
for i = 1:k
pi(i) = sum(b(:, i));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Initialize the covariance matrix estimate % TODO: Initialize the covariance matrix estimate
% further modifications will need to be made when doing 2(d) % further modifications will need to be made when doing 2(d)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
m = zeros(k, dim);
for i = 1:k
data = x(b(:, i) == 1, :);
m(i, :) = mean(data);
S(:, :, i) = cov(data);
end
% Main EM loop % Main EM loop
for n=1:epochs for n=1:epochs
@ -45,17 +60,24 @@ function [h, m, Q] = EMG(x, k, epochs, flag)
% E-step % E-step
%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%
fprintf('E-step, epoch #%d\n', n); fprintf('E-step, epoch #%d\n', n);
[Q, h] = E_step(x, Q, h, pi, m, S, k); [h] = E_step(x, h, pi, m, S, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Store the value of the complete log-likelihood function % TODO: Store the value of the complete log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
L = 0;
for i = 1:num_data
for j = 1:k
prior = mvnpdf(x, m(j, :), S(:, :, j));
L = L + h(i, j) * (log(pi(i)) + log(prior(i)));
end
end
%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%
% M-step % M-step
%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%
fprintf('M-step, epoch #%d\n', n); fprintf('M-step, epoch #%d\n', n);
[Q, S, m] = M_step(x, Q, h, S, k); [Q, S, m] = M_step(x, Q, h, S, k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Store the value of the complete log-likelihood function % TODO: Store the value of the complete log-likelihood function

View file

@ -3,20 +3,28 @@
% Input: x - a nxd matrix (nx3 if using RGB) % Input: x - a nxd matrix (nx3 if using RGB)
% Q - vector of values from the complete data log-likelihood function % Q - vector of values from the complete data log-likelihood function
% h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params % h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
% pi - vector of mixing coefficients % pi - vector of mixing coefficients
% m - cluster means % m - cluster means
% S - cluster covariance matrices % S - cluster covariance matrices
% k - the number of clusters % k - the number of clusters
% Output: Q - vector of values of the complete data log-likelihood function % Output: h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
% h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Q, h] = E_step(x, Q, h, pi, m, S, k) function [h] = E_step(x, h, pi, m, S, k)
[num_data, ~] = size(x); [num_data, ~] = size(x);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: perform E-step of EM algorithm % TODO: perform E-step of EM algorithm
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
z = 1 + 1 parts = zeros(num_data, k);
for j = 1:k
parts(:, j) = pi(j) * mvnpdf(x, m(j, :), S(:, :, j));
end
s = sum(parts);
for i = 1:num_data
h(i, :) = parts(i, :) ./ s;
end
end end

View file

@ -1,34 +1,37 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Name: E_step.m % Name: M_step.m
% Input: x - a nxd matrix (nx3 if using RGB) % Input: x - a nxd matrix (nx3 if using RGB)
% Q - vector of values from the complete data log-likelihood function % Q - vector of values from the complete data log-likelihood function
% h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params % h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
% S - cluster covariance matrices % S - cluster covariance matrices
% k - the number of clusters % k - the number of clusters
% Output: Q - vector of values of the complete data log-likelihood function % flag - flag to use improved EM to avoid singular covariance matrix
% S - cluster covariance matrices % Output: S - cluster covariance matrices
% m - cluster means % m - cluster means
% pi - mixing coefficients
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Q, S, m] = M_step(x, Q, h, S, k) function [S, m, pi] = M_step(x, h, S, k, flag)
% get size of data % get size of data
[num_data, dim] = size(x); [num_data, dim] = size(x);
eps = 1e-15; eps = 1e-15;
lambda = 1e-3; % value for improved version of EM lambda = 1e-3; % value for improved version of EM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: update mixing coefficients % TODO: update mixing coefficients
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: update cluster means % TODO: update cluster means
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Calculate the covariance matrix estimate % TODO: Calculate the covariance matrix estimate
% further modifications will need to be made when doing 2(d) % further modifications will need to be made when doing 2(d)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end end

View file

@ -1,4 +1,5 @@
function [] = Problem2() function [] = Problem2()
rng(1, "twister");
% file names % file names
stadium_fn = "stadium.jpg"; stadium_fn = "stadium.jpg";