csci5521/assignments/hwk03/EMG.m

84 lines
3.2 KiB
Mathematica
Raw Normal View History

2023-11-10 03:29:17 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Name: EMG.m
% Input: x - a nxd matrix (nx3 if using RGB)
% k - the number of clusters
% epochs - number of iterations (epochs) to run the algorithm for
% flag - flag to use improved EM to avoid singular covariance matrix
% Output: h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
% m - a kxd matrix, the maximum likelihood estimate of the mean
% Q - vector of values of the complete data log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [h, m, Q] = EMG(x, k, epochs, flag)
2023-11-12 17:42:19 +00:00
% variables
num_clusters = k; % number of clusters
eps = 1e-15; % small value that can be used to avoid obtaining 0's
lambda = 1e-3; % value for improved version of EM
[num_data, dim] = size(x);
h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster
S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster
b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S
Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-16 08:46:02 +00:00
% TODO: Initialise cluster means using k-means
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-15 15:53:18 +00:00
[~, ~, ~, D] = kmeans(x, k);
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-16 08:46:02 +00:00
% TODO: Determine the b values for all data points
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-15 15:53:18 +00:00
for i = 1:num_data
row = D(i,:);
minIdx = row == min(row);
b(i,minIdx) = 1;
end
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-16 08:46:02 +00:00
% TODO: Initialize pi's (mixing coefficients)
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-15 15:53:18 +00:00
pi = zeros(k, 1);
for i = 1:k
pi(i) = sum(b(:, i));
end
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-16 08:46:02 +00:00
% TODO: Initialize the covariance matrix estimate
% further modifications will need to be made when doing 2(d)
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-15 15:53:18 +00:00
m = zeros(k, dim);
for i = 1:k
data = x(b(:, i) == 1, :);
m(i, :) = mean(data);
S(:, :, i) = cov(data);
end
2023-11-12 17:42:19 +00:00
% Main EM loop
for n=1:epochs
%%%%%%%%%%%%%%%%
% E-step
%%%%%%%%%%%%%%%%
fprintf('E-step, epoch #%d\n', n);
2023-11-15 15:53:18 +00:00
[h] = E_step(x, h, pi, m, S, k);
2023-11-10 03:29:17 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-16 08:46:02 +00:00
% TODO: Store the value of the complete log-likelihood function
2023-11-10 03:29:17 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-17 02:23:38 +00:00
Q(2*n - 1) = Q_step(x, m, S, k, pi, h);
2023-11-10 03:29:17 +00:00
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%
% M-step
%%%%%%%%%%%%%%%%
fprintf('M-step, epoch #%d\n', n);
2023-11-16 08:46:02 +00:00
[S, m, pi] = M_step(x, h, S, k, flag);
2023-11-10 03:29:17 +00:00
2023-11-12 17:42:19 +00:00
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TODO: Store the value of the complete log-likelihood function
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2023-11-17 02:23:38 +00:00
Q(2*n) = Q_step(x, m, S, k, pi, h);
2023-11-12 17:42:19 +00:00
end
2023-11-10 03:29:17 +00:00
end