diff --git a/assignments/hwk02/AllProblem1.m b/assignments/hwk02/AllProblem1.m new file mode 100644 index 0000000..dc0b7e3 --- /dev/null +++ b/assignments/hwk02/AllProblem1.m @@ -0,0 +1,8 @@ +function AllProblem1() + fprintf("Dataset 1:\n"); + Problem1('training_data1.txt', 'test_data1.txt') + fprintf("\nDataset 2:\n"); + Problem1('training_data2.txt', 'test_data2.txt') + fprintf("\nDataset 3:\n"); + Problem1('training_data3.txt', 'test_data3.txt') +end \ No newline at end of file diff --git a/assignments/hwk02/Back_Project.m b/assignments/hwk02/Back_Project.m index 6d54e93..868aa10 100644 --- a/assignments/hwk02/Back_Project.m +++ b/assignments/hwk02/Back_Project.m @@ -7,17 +7,24 @@ function [] = Back_Project(training_data, test_data, n_components) data = vertcat(training_data, test_data); % perform PCA - coeff = pca(data); + [coeff, score] = pca(data); % for each number of principal components for n_idx = 1:length(n_components) n = n_components(n_idx); - % TODO: perform the back projection algorithm using the first n_components(n) principal components + % perform the back projection algorithm using the first n_components(n) principal components W = coeff(:,1:n); + z = score(:,1:n); + sample_mean = mean(data); + reconstruction = W * z' + sample_mean'; - % TODO: plot first 5 images back projected using the first + % plot first 5 images back projected using the first % n_components(n) principal components + for i = 1:5 + subplot(3,2,i) + imagesc(reshape(reconstruction(:,i),32,30)'); + end end diff --git a/assignments/hwk02/Classify.m b/assignments/hwk02/Classify.m index f510904..17ac13f 100644 --- a/assignments/hwk02/Classify.m +++ b/assignments/hwk02/Classify.m @@ -5,27 +5,25 @@ function [predictions] = Classify(data, m1, m2, S1, S2, pc1, pc2) % calculate P(x|C) * P(C) for both classes - pxC1 = mvnpdf(data, m1, S1); - pxC2 = mvnpdf(data, m2, S2); + d = 8; + pxC1 = 1/(power(2*pi, d/2) * power(det(S1), 1/2)) * exp(-1/2 * (data-m1) * inv(S1) * (data-m1)'); + pxC2 = 1/(power(2*pi, d/2) * power(det(S2), 1/2)) * exp(-1/2 * (data-m2) * inv(S2) * (data-m2)'); - g1 = log(pxC1 * pc1); - g2 = log(pxC2 * pc2); + pC1x = pxC1 * pc1; + pC2x = pxC2 * pc2; % TODO: calculate log odds, if > 0 then data(i) belongs to class c1, else, c2 - log_odds = g1 - g2; - % for i = 1:length(data) - % if g1 > g2 - % predictions(i) = 1; - % else - % predictions(i) = 2; - % end - % end + log_odds = log(pC1x / pC2x); % TODO: get predictions from log odds calculation [num_rows, ~] = size(data); predictions = zeros(num_rows,1); for i = 1:num_rows - predictions(i) = log_odds(i) > 0; + if log_odds(i) > 0 + predictions(i) = 1; + else + predictions(i) = 2; + end end end % Function end \ No newline at end of file diff --git a/assignments/hwk02/Error_Rate.m b/assignments/hwk02/Error_Rate.m index 62212fb..1a0cf7d 100644 --- a/assignments/hwk02/Error_Rate.m +++ b/assignments/hwk02/Error_Rate.m @@ -7,11 +7,11 @@ function [] = Error_Rate(predictions, labels) [total_rows, ~] = size(predictions); for i = 1:total_rows - if predictions(i) == labels(i) - c = c + 1; - end + if predictions(i) == labels(i) + c = c + 1; + end end - fprintf('Rate: %.1f%% (%d / %d)\n', 100 * c / total_rows, c, total_rows); + fprintf('%d%% ', 100 * c / total_rows); end % Function end \ No newline at end of file diff --git a/assignments/hwk02/HW2.typ b/assignments/hwk02/HW2.typ index 8a513d5..c0da4c1 100644 --- a/assignments/hwk02/HW2.typ +++ b/assignments/hwk02/HW2.typ @@ -21,12 +21,25 @@ a. #c[*(30 points)* Implement all the three models and test your program on the three pairs of training data and test data. The main script function, Problem 1 (training data file,test data file) is given and this script should not be modified. There are 3 scripts that need to be completed for Problem 1 (`Error_Rate.m`, `Param_Est.m`, `Classify.m`). The _TODO_: comment headers must be filled in in all 3 of these files. These _TODO_ headers describe exactly what code needs to be written to obtain full credit. The script `Error_Rate.m` is for calculating the error rate. `Param_Est.m` is for estimating the parameters of each multivariante Gaussian distribution under the 3 different models. `Classify.m` is for classify the test data using the learned models. For each test dataset, the problem calls several functions and print out the training error rate and test error rate of each model to the MATLAB command window.] + ``` + >> Problem1('training_data1.txt', 'test_data1.txt') + Model 1: (train err = 28.0% 28.0% 100.0% ), (test error = 19.0% 19.0% 100.0% ) + Model 2: (train err = 27.0% 27.0% 100.0% ), (test error = 25.0% 25.0% 100.0% ) + Model 3: (train err = 29.0% 29.0% 100.0% ), (test error = 26.0% 26.0% 100.0% ) + >> Problem1('training_data2.txt', 'test_data2.txt') + Model 1: (train err = 29.0% 29.0% 100.0% ), (test error = 19.0% 19.0% 100.0% ) + Model 2: (train err = 15.0% 15.0% 100.0% ), (test error = 15.0% 15.0% 100.0% ) + Model 3: (train err = 27.0% 27.0% 100.0% ), (test error = 22.0% 22.0% 100.0% ) + >> Problem1('training_data3.txt', 'test_data3.txt') + Model 1: (train err = 30.0% 30.0% 100.0% ), (test error = 21.0% 21.0% 100.0% ) + Model 2: (train err = 0.0% 0.0% 100.0% ), (test error = 0.0% 0.0% 100.0% ) + Model 3: (train err = 30.0% 30.0% 100.0% ), (test error = 28.0% 28.0% 100.0% ) + ``` + b. #c[*(5 points)* State which model works best on each test data set and explain why you believe this is the case. Discuss your observations.] c. #c[*(15 points)* Write the log likelihood function and derive $S_1$ and $S_2$ by maximum likelihood estimation of model 2. Note that since $S_1$ and $S_2$ are shared as $S$, you need to add the log likelihood function of the two classes to maximizing for deriving $S$.] - - 2. #c[*(50 points)* In this problem, you will work on dimension reduction and classification on a Faces dataset from the UCI repository. We provided the processed files `face_train_data_960.txt` and `face_test_data_960.txt` with 500 and 124 images, respectively. Each image is of size 30 #sym.times 32 with the pixel values in a row in the files and the last column identifies the labels: 1 (sunglasses), and 0 (open) of the image. You can visualize the $i$th image with the following matlab command line:] ```matlab diff --git a/assignments/hwk02/Param_Est.m b/assignments/hwk02/Param_Est.m index b6bd501..e7994e3 100644 --- a/assignments/hwk02/Param_Est.m +++ b/assignments/hwk02/Param_Est.m @@ -11,28 +11,29 @@ function [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part) m1 = mean(class1_data); m2 = mean(class2_data); - S1 = cov(class1_data); - S2 = cov(class2_data); + S1 = cov(class1_data, 1); + S2 = cov(class2_data, 1); + % Model 1. + % Assume independent 𝑆1 and 𝑆2 (the discriminant function is as equation (5.17) in the textbook). + if (strcmp(part, '1')) + % Model 3. % Assume 𝑆1 and 𝑆2 are diagonal (the Naive Bayes model in equation (5.24)). - if(strcmp(part, '3')) + elseif (strcmp(part, '3')) S1 = diag(diag(S1)); S2 = diag(diag(S2)); % Model 2. - % Assume 𝑆1 = 𝑆2. In other words, shared S between two classes (the discriminant function is as equation (5.21) and (5.22) in the textbook). - elseif(strcmp(part, '2')) + % Assume 𝑆1 = 𝑆2. In other words, shared S between two classes + % (the discriminant function is as equation (5.21) and (5.22) in the textbook). + elseif (strcmp(part, '2')) P_C1 = length(class1_data) / num_rows; P_C2 = length(class2_data) / num_rows; S = P_C1 * S1 + P_C2 + S2; S1 = S; S2 = S; - - % Model 1. - % Assume independent 𝑆1 and 𝑆2 (the discriminant function is as equation (5.17) in the textbook). - elseif(strcmp(part, '1')) end end % Function end diff --git a/assignments/hwk02/Problem1.m b/assignments/hwk02/Problem1.m index b381a53..d62cfeb 100644 --- a/assignments/hwk02/Problem1.m +++ b/assignments/hwk02/Problem1.m @@ -18,19 +18,19 @@ function [] = Problem1(training_file, test_file) pc2 = 1-pc1; for i = 1:length(part) - fprintf('Model %s\n', part{i}); + fprintf('Model %s: ', part{i}); % Training for Multivariate Gaussian [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part(i)); [predictions] = Classify(training_data, m1, m2, S1, S2, pc1, pc2); - fprintf('training error\n'); + fprintf('(train err = '); Error_Rate(predictions, training_labels); + fprintf('), '); % Testing for Multivariate Gaussian [predictions] = Classify(test_data, m1, m2, S1, S2, pc1, pc2); - fprintf('test error\n'); + fprintf('(test error = '); Error_Rate(predictions, test_labels); - - fprintf('\n\n'); + fprintf(')\n'); end