From bfcb5764d60fb29de637766846af875dcc6eb930 Mon Sep 17 00:00:00 2001
From: Michael Zhang <mail@mzhang.io>
Date: Mon, 9 Oct 2023 04:30:22 -0500
Subject: [PATCH] update

---
 assignments/01/Makefile    | 2 +-
 assignments/01/lc_openmp.c | 3 ++-
 assignments/01/report.md   | 8 +++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/assignments/01/Makefile b/assignments/01/Makefile
index e123136..3f57a2d 100644
--- a/assignments/01/Makefile
+++ b/assignments/01/Makefile
@@ -7,7 +7,7 @@ CFLAGS := -std=c11 -fopenmp \
 LDFLAGS := -std=c11 -fopenmp -L/opt/homebrew/opt/libomp/lib -O3
 RUST_SOURCES := $(shell find . -name "*.rs")
 
-all: lc_openmp lc_pthreads handin
+all: lc_openmp lc_pthreads
 
 handin: zhan4854.tar.gz
 
diff --git a/assignments/01/lc_openmp.c b/assignments/01/lc_openmp.c
index 914c0f2..85e8c80 100644
--- a/assignments/01/lc_openmp.c
+++ b/assignments/01/lc_openmp.c
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for default(shared)
     for (int i = 0; i < data->dimensions; i++) {
 
-      // #pragma omp parallel for default(shared)
+#pragma omp parallel for default(shared) if (thread_count > data->dimensions)
       for (int j = 0; j < data->rows; j++) {
         FLOAT x_ni_w_ni = 0;
 
@@ -56,6 +56,7 @@ int main(int argc, char **argv) {
       FLOAT numer = 0, denom = 0;
 
       // #pragma omp parallel for default(shared) reduction(+ : numer, denom)
+      // if(thread_count > data->dimensions)
       for (int j = 0; j < data->rows; j++) {
         FLOAT xij = data->buf[data->rows * i + j];
         numer += xij * inner_calc[data->rows * i + j];
diff --git a/assignments/01/report.md b/assignments/01/report.md
index 8c7ae64..de964c2 100644
--- a/assignments/01/report.md
+++ b/assignments/01/report.md
@@ -63,8 +63,10 @@ author: |
 
     This data was generated using the `run_benchmark.sh > out.txt` script.
 
-Small note: There's a part in the end of the program that performs validation on the trained model by using a train/test data set split. I didn't count this towards execution time but felt that it was important enough to keep since it ensured that my program was still behaving correctly.
+## NOTES
 
-```
+I noticed that the loss sometimes fluctuates rather wildly. I think this is because there's no fixed learning rate, so instead of going incrementally, we kind of just take each dimension's minimum and haphazardly combine them together. In Wikipedia's description[^1] of the algorithm, they take the $w_i$ in particular that results in the minimal loss _by itself_, and then only use that $w_i$ for that outer iteration. I'm wondering if this will produce a better convergence, but I tried to stick to implementing the algorithm described in the pdf for the sake of the assignment since I'm guessing the effectiveness of the machine learning model isn't the important thing here.
 
-```
+[^1]: https://en.wikipedia.org/wiki/Coordinate_descent
+
+Also, there's a part in the end of the program that performs validation on the trained model by using a train/test data set split. I didn't count this towards execution time but felt that it was important enough to keep since it ensured that my program was still behaving correctly.