88 lines
No EOL
3.3 KiB
Matlab
88 lines
No EOL
3.3 KiB
Matlab
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% Name: EMG.m
|
|
% Input: x - a nxd matrix (nx3 if using RGB)
|
|
% k - the number of clusters
|
|
% epochs - number of iterations (epochs) to run the algorithm for
|
|
% flag - flag to use improved EM to avoid singular covariance matrix
|
|
% Output: h - a nxk matrix, the expectation of the hidden variable z given the data set and distribution params
|
|
% m - a kxd matrix, the maximum likelihood estimate of the mean
|
|
% Q - vector of values of the complete data log-likelihood function
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
function [h, m, Q] = EMG(x, k, epochs, flag)
|
|
|
|
% variables
|
|
num_clusters = k; % number of clusters
|
|
eps = 1e-15; % small value that can be used to avoid obtaining 0's
|
|
lambda = 1e-3; % value for improved version of EM
|
|
[num_data, dim] = size(x);
|
|
h = zeros(num_data, num_clusters); % expectation of data point being part of a cluster
|
|
S = zeros(dim, dim, num_clusters); % covariance matrix for each cluster
|
|
b = zeros(num_data,num_clusters); % cluster assignments, only used for intialization of pi and S
|
|
Q = zeros(epochs*2,1); % vector that can hold complete data log-likelihood after each E and M step
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% TODO: Initialise cluster means using k-means
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
[~, ~, ~, D] = kmeans(x, k);
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% TODO: Determine the b values for all data points
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
for i = 1:num_data
|
|
row = D(i,:);
|
|
minIdx = row == min(row);
|
|
b(i,minIdx) = 1;
|
|
end
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% TODO: Initialize pi's (mixing coefficients)
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
pi = zeros(k, 1);
|
|
for i = 1:k
|
|
pi(i) = sum(b(:, i));
|
|
end
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% TODO: Initialize the covariance matrix estimate
|
|
% further modifications will need to be made when doing 2(d)
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
m = zeros(k, dim);
|
|
for i = 1:k
|
|
data = x(b(:, i) == 1, :);
|
|
m(i, :) = mean(data);
|
|
S(:, :, i) = cov(data);
|
|
end
|
|
|
|
% Main EM loop
|
|
for n=1:epochs
|
|
%%%%%%%%%%%%%%%%
|
|
% E-step
|
|
%%%%%%%%%%%%%%%%
|
|
fprintf('E-step, epoch #%d\n', n);
|
|
[h] = E_step(x, h, pi, m, S, k);
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% TODO: Store the value of the complete log-likelihood function
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
L = 0;
|
|
for i = 1:num_data
|
|
for j = 1:k
|
|
prior = mvnpdf(x, m(j, :), S(:, :, j));
|
|
L = L + h(i, j) * (log(pi(i)) + log(prior(i)));
|
|
end
|
|
end
|
|
|
|
%%%%%%%%%%%%%%%%
|
|
% M-step
|
|
%%%%%%%%%%%%%%%%
|
|
fprintf('M-step, epoch #%d\n', n);
|
|
[Q, S, m] = M_step(x, Q, h, S, k);
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% TODO: Store the value of the complete log-likelihood function
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
end
|
|
|
|
|
|
end |