updates

2023-10-21 23:32:00 -05:00 · 2023-10-21 23:32:00 -05:00 · 5e820c4227
commit 5e820c4227
parent ac35e842d8
8 changed files with 91 additions and 27 deletions
--- a/assignments/hwk02/Back_Project.m
+++ b/assignments/hwk02/Back_Project.m
@ -4,17 +4,19 @@
 function [] = Back_Project(training_data, test_data, n_components)
  % stack data 
-   data = vertcat(training_data, test_data);
+  data = vertcat(training_data, test_data);
  % TODO: perform PCA
  % for each number of principal components
  for n = 1:length(n_components)
-      % TODO: perform the back projection algorithm using the first n_components(n) principal components
+    % TODO: perform the back projection algorithm using the first n_components(n) principal components
-      % TODO: plot first 5 images back projected using the first
+    % TODO: plot first 5 images back projected using the first
-      % n_components(n) principal components
+    % n_components(n) principal components
  end
--- a/assignments/hwk02/Classify.m
+++ b/assignments/hwk02/Classify.m
@ -3,21 +3,29 @@
 % these posterior probabilities are compared using the log odds.
 function [predictions] = Classify(data, m1, m2, S1, S2, pc1, pc2)
-  d = 8;
+  % calculate P(x|C) * P(C) for both classes
  % TODO: calculate P(x|C) * P(C) for both classes
-  pxC1 = exp(-1/2*(data-m1)./S1*(data-m1)') / (power(2*pi,d/2) * sqrt(det(S1)));
+  pxC1 = mvnpdf(data, m1, S1);
-  pxC2 = exp(-1/2*(data-m2)*(S2\(data-m2).'));
+  pxC2 = mvnpdf(data, m2, S2);
-  g1 = pxC1 * pc1;
+  g1 = log(pxC1 * pc1);
-  g2 = pxC2 * pc2;
+  g2 = log(pxC2 * pc2);
  % TODO: calculate log odds, if > 0 then data(i) belongs to class c1, else, c2
-  for i = 1:length(data)
+  log_odds = g1 - g2;
-      data(i)
+  % for i = 1:length(data)
-  end
+  %     if g1 > g2
  %         predictions(i) = 1;
  %     else
  %         predictions(i) = 2;
  %     end
  % end
  % TODO: get predictions from log odds calculation
  [num_rows, ~] = size(data);
  predictions = zeros(num_rows,1);
  for i = 1:num_rows
      predictions(i) = log_odds(i) > 0;
  end
 end % Function end
--- a/assignments/hwk02/Eigenfaces.m
+++ b/assignments/hwk02/Eigenfaces.m
@ -2,12 +2,17 @@
 % 5 eigenvectors
 function [] = Eigenfaces(training_data, test_data)
-   % stack data 
+  % stack data 
-   data = vertcat(training_data, test_data);
+  data = vertcat(training_data, test_data);
-   % TODO: perform PCA
+  % perform PCA
  coeff = pca(data);
-   % TODO: show the first 5 eigenvectors (see homework for example)
+  % show the first 5 eigenvectors (see homework for example)
-   imagesc(reshape(faces_data(i,1:end-1),32,30)')
+  for i = 1:5
    subplot(3,2,i)
    imagesc(reshape(coeff(:,i),32,30)');
  end
  % pause;
 end % Function end
--- a/assignments/hwk02/Error_Rate.m
+++ b/assignments/hwk02/Error_Rate.m
@ -2,6 +2,16 @@
 % predicted labels that are incorrrect.
 function [] = Error_Rate(predictions, labels)
-    % TODO: compute error rate and print it out
+  % compute error rate and print it out
  c = 0;
  [total_rows, ~] = size(predictions);
  for i = 1:total_rows
      if predictions(i) == labels(i)
          c = c + 1;
      end
  end
  fprintf('Rate: %.1f%% (%d / %d)\n', 100 * c / total_rows, c, total_rows);
 end % Function end
--- a/assignments/hwk02/KNN.m
+++ b/assignments/hwk02/KNN.m
@ -6,18 +6,46 @@ function [test_err] = KNN(k, training_data, test_data, training_labels, test_lab
  n = length(test_data(:,1)); % get number of rows in test data
  preds = zeros(length(test_labels),1); % predict labels for each test point
-  % TODO: compute pairwise euclidean distance between the test data and the
+  % compute pairwise euclidean distance between the test data and the training data
-  % training data
+  pairwise_distance = pdist2(training_data, test_data);
  unique_classes = unique(training_labels);
  % for each data point (row) in the test data
  for t = 1:n
    % TODO: compute k-nearest neighbors for data point
    distances = pairwise_distance(:,t);
    [~, smallest_indexes] = sort(distances, 'ascend');
    smallest_k_indexes = smallest_indexes(1:k);
    distances_by_class = zeros(max(unique_classes), 2);
    for i = 1:length(unique_classes)
      class = unique_classes(i);
      this_class_distances = distances(training_labels == class,:);
      distances_by_class(i,1) = class;
      distances_by_class(i,2) = mean(this_class_distances);
    end
    distances_by_class_table = array2table(distances_by_class);
    % TODO: classify test point using majority rule. Include tie-breaking
    % using whichever class is closer by distance. Fill in preds with the
    % predicted label.
    smallest_k_labels = training_labels(smallest_k_indexes);
    labels_by_count = tabulate(smallest_k_labels);
    labels_by_count_sorted = sortrows(labels_by_count, 2);
    most_frequent_label = labels_by_count_sorted(1,:);
    most_frequent_label_count = most_frequent_label(2);
    labels_that_have_most_frequent_count = labels_by_count_sorted(labels_by_count_sorted(:,2) == most_frequent_label_count,1);
    if length(labels_that_have_most_frequent_count) > 1
      common_indexes = find(ismember(distances_by_class, labels_that_have_most_frequent_count));
      common_distances = distances_by_class(common_indexes,:);
      sorted_distances = sortrows(common_distances,2);
      preds(t) = sorted_distances(1,1);
    else
      winning_label = mode(smallest_k_labels);
      preds(t) = winning_label;
    end
  end
--- a/assignments/hwk02/KNN_Error.m
+++ b/assignments/hwk02/KNN_Error.m
@ -2,13 +2,19 @@
 % errors for k-nearest neighbors using different values of k.
 function [] = KNN_Error(neigenvectors, ks, training_data, test_data, training_labels, test_labels)
-  % TODO: perform PCA
+  % perform PCA
  % TODO: project data using the number of eigenvectors defined by neigenvectors
  % TODO: compute test error for kNN with differents k's. Fill in
  % test_errors with the results for each k in ks.
  test_errors = zeros(1,length(ks));
  for i = 1:length(ks)
    k = ks(i);
    test_errors(i) = KNN(k, training_data, test_data, training_labels, test_labels);
  end
  % print error table
  fprintf("-----------------------------\n");
--- a/assignments/hwk02/Param_Est.m
+++ b/assignments/hwk02/Param_Est.m
@ -4,7 +4,7 @@
 % S2: learned covariance matrix for features of class 2)
 function [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part)
-  [num_rows, num_cols] = size(training_data);
+  [num_rows, ~] = size(training_data);
  class1_data = training_data(training_labels==1,:);
  class2_data = training_data(training_labels==2,:);
@ -14,11 +14,14 @@ function [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part)
  S1 = cov(class1_data);
  S2 = cov(class2_data);
-  % Parameter estimation for 3 different models described in homework
+  % Model 3.
  % Assume 𝑆1 and 𝑆2 are diagonal (the Naive Bayes  model in equation (5.24)).
  if(strcmp(part, '3'))
    S1 = diag(diag(S1));
    S2 = diag(diag(S2));
  % Model 2.
  % Assume 𝑆1 = 𝑆2. In other words, shared S between two classes (the discriminant function is as equation (5.21) and (5.22) in the textbook).
  elseif(strcmp(part, '2'))
    P_C1 = length(class1_data) / num_rows;
    P_C2 = length(class2_data) / num_rows;
@ -27,6 +30,8 @@ function [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part)
    S1 = S;
    S2 = S;
  % Model 1.
  % Assume independent 𝑆1 and 𝑆2 (the discriminant function is as equation (5.17) in the textbook).
  elseif(strcmp(part, '1'))
  end
--- a/assignments/hwk02/Problem1.m
+++ b/assignments/hwk02/Problem1.m
@ -21,7 +21,7 @@ function [] = Problem1(training_file, test_file)
    fprintf('Model %s\n', part{i});
    % Training for Multivariate Gaussian 
-    [m1 m2 S1 S2] = Param_Est(training_data, training_labels, part(i));
+    [m1, m2, S1, S2] = Param_Est(training_data, training_labels, part(i));
    [predictions] = Classify(training_data, m1, m2, S1, S2, pc1, pc2);
    fprintf('training error\n');
    Error_Rate(predictions, training_labels);