From 0fc9d8a01f06e044d0973fe9db370a10911fdf4d Mon Sep 17 00:00:00 2001
From: Michael Zhang <mail@mzhang.io>
Date: Wed, 25 Oct 2023 08:32:53 -0500
Subject: [PATCH] classify correctly

---
 assignments/hwk02/Back_Project.m |  2 ++
 assignments/hwk02/Classify.m     | 25 ++++++++++++++++++-------
 assignments/hwk02/Error_Rate.m   |  2 +-
 assignments/hwk02/HW2.typ        |  8 +++++++-
 assignments/hwk02/Param_Est.m    | 20 +++++++++++---------
 5 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/assignments/hwk02/Back_Project.m b/assignments/hwk02/Back_Project.m
index 868aa10..abb9b9e 100644
--- a/assignments/hwk02/Back_Project.m
+++ b/assignments/hwk02/Back_Project.m
@@ -26,6 +26,8 @@ function [] = Back_Project(training_data, test_data, n_components)
       imagesc(reshape(reconstruction(:,i),32,30)');
     end
 
+    pause;
+
   end
 
 end % Function end
\ No newline at end of file
diff --git a/assignments/hwk02/Classify.m b/assignments/hwk02/Classify.m
index 17ac13f..aee9134 100644
--- a/assignments/hwk02/Classify.m
+++ b/assignments/hwk02/Classify.m
@@ -3,20 +3,31 @@
 % these posterior probabilities are compared using the log odds.
 function [predictions] = Classify(data, m1, m2, S1, S2, pc1, pc2)
 
+  [num_rows, d] = size(data);
   % calculate P(x|C) * P(C) for both classes
     
-  d = 8;
-  pxC1 = 1/(power(2*pi, d/2) * power(det(S1), 1/2)) * exp(-1/2 * (data-m1) * inv(S1) * (data-m1)');
-  pxC2 = 1/(power(2*pi, d/2) * power(det(S2), 1/2)) * exp(-1/2 * (data-m2) * inv(S2) * (data-m2)');
+  % pxC1 = 1/(power(2*pi, d/2) * power(det(S1), 1/2)) * exp(-1/2 * (data-m1) * inv(S1) * (data-m1)');
+  % pxC2 = 1/(power(2*pi, d/2) * power(det(S2), 1/2)) * exp(-1/2 * (data-m2) * inv(S2) * (data-m2)');
+  
+  pxC1 = zeros(num_rows,1);
+  pxC2 = zeros(num_rows,1);
+  for i = 1:num_rows
+    x = data(i,:);
+    pxC1(i) = 1/(power(2*pi, d/2) * power(det(S1), 1/2)) * exp(-1/2 * (x-m1) * inv(S1) * (x-m1)');
+    pxC2(i) = 1/(power(2*pi, d/2) * power(det(S2), 1/2)) * exp(-1/2 * (x-m2) * inv(S2) * (x-m2)');
+  end
 
+  % pxC1 = mvnpdf(data, m1, S1);
+  % pxC2 = mvnpdf(data, m2, S2);
+
+  % P(C|x) = (P(x|C) * P(C)) / common factor
   pC1x = pxC1 * pc1;
   pC2x = pxC2 * pc2;
 
-  % TODO: calculate log odds, if > 0 then data(i) belongs to class c1, else, c2
-  log_odds = log(pC1x / pC2x);
+  % calculate log odds, if > 0 then data(i) belongs to class c1, else, c2
+  log_odds = log(pC1x) - log(pC2x);
   
-  % TODO: get predictions from log odds calculation
-  [num_rows, ~] = size(data);
+  % get predictions from log odds calculation
   predictions = zeros(num_rows,1);
   for i = 1:num_rows
     if log_odds(i) > 0
diff --git a/assignments/hwk02/Error_Rate.m b/assignments/hwk02/Error_Rate.m
index 1a0cf7d..b71794f 100644
--- a/assignments/hwk02/Error_Rate.m
+++ b/assignments/hwk02/Error_Rate.m
@@ -7,7 +7,7 @@ function [] = Error_Rate(predictions, labels)
   [total_rows, ~] = size(predictions);
 
   for i = 1:total_rows
-    if predictions(i) == labels(i)
+    if predictions(i) ~= labels(i)
       c = c + 1;
     end
   end
diff --git a/assignments/hwk02/HW2.typ b/assignments/hwk02/HW2.typ
index c0da4c1..8799187 100644
--- a/assignments/hwk02/HW2.typ
+++ b/assignments/hwk02/HW2.typ
@@ -40,6 +40,8 @@
 
   c. #c[*(15 points)* Write the log likelihood function and derive $S_1$ and $S_2$ by maximum likelihood estimation of model 2. Note that since $S_1$ and $S_2$ are shared as $S$, you need to add the log likelihood function of the two classes to maximizing for deriving $S$.]
 
+  The maximum likelihood of a single 
+
 2. #c[*(50 points)* In this problem, you will work on dimension reduction and classification on a Faces dataset from the UCI repository. We provided the processed files `face_train_data_960.txt` and `face_test_data_960.txt` with 500 and 124 images, respectively. Each image is of size 30 #sym.times 32 with the pixel values in a row in the files and the last column identifies the labels: 1 (sunglasses), and 0 (open) of the image. You can visualize the $i$th image with the following matlab command line:]
 
   ```matlab
@@ -58,4 +60,8 @@
 
   I used $K = 41$.
 
-  c. #c[*(20 points)* Use the first $K = {10, 50, 100}$ principle components to approximate the first five images of the training set (first row of the data matrix) by projecting the centered data using the first $K$ principal components then "back project" (weighted sum of the components) to the original space and add the mean. For each $K$, plot the reconstructed image. This can be accomplished by completing the _TODO_ comment headers in the `Back_Project.m` script. Explain your observations in the report.]
\ No newline at end of file
+  c. #c[*(20 points)* Use the first $K = {10, 50, 100}$ principle components to approximate the first five images of the training set (first row of the data matrix) by projecting the centered data using the first $K$ principal components then "back project" (weighted sum of the components) to the original space and add the mean. For each $K$, plot the reconstructed image. This can be accomplished by completing the _TODO_ comment headers in the `Back_Project.m` script. Explain your observations in the report.]
+
+  This "back-projection" seems like a "low-resolution" version of the image, but in particular with lower $K$, it focuses on what the model considers to be the most varied features.
+
+  This is why for $K=10$, most of the faces were not really visible, only blobs that were vaguely face-shaped, along with the shirt. But with higher $K$, more detail was given to some of the other features of the background.
\ No newline at end of file
diff --git a/assignments/hwk02/Param_Est.m b/assignments/hwk02/Param_Est.m
index e7994e3..8fb0a18 100644
--- a/assignments/hwk02/Param_Est.m
+++ b/assignments/hwk02/Param_Est.m
@@ -11,18 +11,13 @@ function [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part)
   m1 = mean(class1_data);
   m2 = mean(class2_data);
   
-  S1 = cov(class1_data, 1);
-  S2 = cov(class2_data, 1);
+  S1 = cov(class1_data);
+  S2 = cov(class2_data);
     
   % Model 1.
   % Assume independent 𝑆1 and 𝑆2 (the discriminant function is as equation (5.17) in the textbook).
   if (strcmp(part, '1'))
-
-  % Model 3.
-  % Assume 𝑆1 and 𝑆2 are diagonal (the Naive Bayes  model in equation (5.24)).
-  elseif (strcmp(part, '3'))
-    S1 = diag(diag(S1));
-    S2 = diag(diag(S2));
+    % Already calculated above so nothing to be done here
 
   % Model 2.
   % Assume 𝑆1 = 𝑆2. In other words, shared S between two classes
@@ -31,9 +26,16 @@ function [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part)
     P_C1 = length(class1_data) / num_rows;
     P_C2 = length(class2_data) / num_rows;
 
-    S = P_C1 * S1 + P_C2 + S2;
+    S = P_C1 * S1 + P_C2 * S2;
     S1 = S;
     S2 = S;
+
+  % Model 3.
+  % Assume 𝑆1 and 𝑆2 are diagonal (the Naive Bayes model in equation (5.24)).
+  elseif (strcmp(part, '3'))
+    % pull diagonals into vector -> turn vector into diagonal matrix
+    S1 = diag(diag(S1));
+    S2 = diag(diag(S2));
   end
   
 end % Function end