From e0d966087dace69f431c56f946da584d8ca3c81a Mon Sep 17 00:00:00 2001
From: Michael Zhang <mail@mzhang.io>
Date: Mon, 9 Oct 2023 04:17:14 -0500
Subject: [PATCH] polish

---
 Dockerfile                      |  4 ++
 assignments/01/.gitignore       |  3 +-
 assignments/01/Makefile         | 10 ++++-
 assignments/01/lc_pthreads.c    |  5 +++
 assignments/01/report.md        | 70 +++++++++++++++++++++++++++++++++
 assignments/01/run_benchmark.sh | 18 +++++++++
 6 files changed, 107 insertions(+), 3 deletions(-)
 create mode 100644 assignments/01/report.md
 create mode 100755 assignments/01/run_benchmark.sh

diff --git a/Dockerfile b/Dockerfile
index 41bca45..1484784 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,3 +1,4 @@
+ARG DEBIAN_FRONTEND=noninteractive
 FROM ubuntu:22.04
 ENV PATH="/root/.cargo/bin:${PATH}"
 
@@ -9,8 +10,11 @@ RUN apt update -y && apt install -y --no-install-recommends \
     direnv \
     git \
     libomp-dev \
+    pandoc \
     python3 \
     python3-pip \
+    texlive-latex-base \
+    texlive-latex-extra \
     valgrind \
     ;
 RUN pip install poetry
diff --git a/assignments/01/.gitignore b/assignments/01/.gitignore
index 618dcec..41faada 100644
--- a/assignments/01/.gitignore
+++ b/assignments/01/.gitignore
@@ -4,4 +4,5 @@ lc_pthreads
 *.tar.gz
 
 dataset
-out.txt
\ No newline at end of file
+out.txt
+report.pdf
\ No newline at end of file
diff --git a/assignments/01/Makefile b/assignments/01/Makefile
index 1487240..e123136 100644
--- a/assignments/01/Makefile
+++ b/assignments/01/Makefile
@@ -1,4 +1,4 @@
-.PHONY: all handin watch-openmp clean
+.PHONY: all handin watch-openmp watch-report rust clean
 
 CFLAGS := -std=c11 -fopenmp \
 	-I/opt/homebrew/opt/libomp/include \
@@ -18,12 +18,15 @@ clean:
 		dataset/small \
 		*.o
 
-zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile
+zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile ASSIGNMENT.md report.pdf run_benchmark.sh
 	mkdir -p zhan4854
 	cp $^ zhan4854
 	tar -czvf $@ zhan4854
 	rm -r zhan4854
 
+report.pdf: report.md
+	pandoc -o $@ $^
+
 lc_openmp: lc_openmp.o common.o
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm
 
@@ -41,6 +44,9 @@ dataset/small/%.txt: generate_test_data.py
 dataset/mnist/%.txt: generate_test_data.py
 	python generate_test_data.py dataset/MNIST_data.csv dataset/MNIST_label.csv dataset/mnist
 
+watch-report:
+	watchexec -c clear -e md 'make report.pdf'
+
 watch-openmp:
 	watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2'
 
diff --git a/assignments/01/lc_pthreads.c b/assignments/01/lc_pthreads.c
index 689e989..be8f31e 100644
--- a/assignments/01/lc_pthreads.c
+++ b/assignments/01/lc_pthreads.c
@@ -88,6 +88,11 @@ int main(int argc, char **argv) {
     printf("Loss: %0.04f\n", loss);
   }
 
+  double program_end_time = monotonic_seconds();
+  printf("Program time (compute): %0.04fs\n", total_compute_time);
+  printf("Program time (total): %0.04fs\n",
+         program_end_time - program_start_time);
+
   free(inner_calc);
   free(new_w);
   free(data->buf);
diff --git a/assignments/01/report.md b/assignments/01/report.md
new file mode 100644
index 0000000..8c7ae64
--- /dev/null
+++ b/assignments/01/report.md
@@ -0,0 +1,70 @@
+---
+geometry: margin=2cm
+output: pdf_document
+title: CSCI 5451 Assignment 1
+date: \today
+
+author: |
+  | Michael Zhang \<zhan4854@umn.edu\> $\cdot$ ID: 5289259
+---
+
+1.  _A short description of how you went about parallelizing the classification algorithm. You should include how you decomposed the problem and why, i.e., what were the tasks being parallelized._
+
+    The parallelization I used was incredibly simple, just parallelizing outer iterations. I used this same trick for both the OpenMP and the pthreads implementations.
+
+    The reason I didn't go further was that further breaking down of the for loops incurred more overhead from managing the parallelization than was actually gained. I have run this several times and the gains were either neglient, or it actually ran slower than the serial version.
+
+    This also had to do with the fact that I had already inlined most of the calculations to require as few loops as possible, moved all allocations to the top level, and arranged my data buffer in column-major order instead since the iteration pattern was by dimension rather than by row.
+
+2.  _Timing results for 1, 2, 4, 8, and 16 threads for the classification. You should include results with outer iterations set to 10._
+
+    ```
+    ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 1
+    Program time (compute): 0.0069s
+    ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 2
+    Program time (compute): 0.0027s
+    ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 4
+    Program time (compute): 0.0027s
+    ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 8
+    Program time (compute): 0.0033s
+    ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 16
+    Program time (compute): 0.0031s
+    ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 1
+    Program time (compute): 21.5287s
+    ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 2
+    Program time (compute): 10.6175s
+    ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 4
+    Program time (compute): 5.2198s
+    ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 8
+    Program time (compute): 4.5690s
+    ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 16
+    Program time (compute): 3.6433s
+    ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 1
+    Program time (compute): 0.0033s
+    ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 2
+    Program time (compute): 0.0017s
+    ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 4
+    Program time (compute): 0.0011s
+    ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 8
+    Program time (compute): 0.0020s
+    ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 16
+    Program time (compute): 0.0032s
+    ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 1
+    Program time (compute): 21.7196s
+    ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 2
+    Program time (compute): 10.4035s
+    ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 4
+    Program time (compute): 5.2449s
+    ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 8
+    Program time (compute): 4.1550s
+    ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 16
+    Program time (compute): 3.5328s
+    ```
+
+    This data was generated using the `run_benchmark.sh > out.txt` script.
+
+Small note: There's a part in the end of the program that performs validation on the trained model by using a train/test data set split. I didn't count this towards execution time but felt that it was important enough to keep since it ensured that my program was still behaving correctly.
+
+```
+
+```
diff --git a/assignments/01/run_benchmark.sh b/assignments/01/run_benchmark.sh
new file mode 100755
index 0000000..80ab399
--- /dev/null
+++ b/assignments/01/run_benchmark.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+LC_OPENMP=${LC_OPENMP:-./lc_openmp}
+LC_PTHREADS=${LC_PTHREADS:-./lc_pthreads}
+
+SMALL_DATA=${SMALL_DATA:-./dataset/small_data.csv}
+SMALL_LABEL=${SMALL_DATA:-./dataset/small_label.csv}
+
+MNIST_DATA=${MNIST_DATA:-./dataset/MNIST_data.csv}
+MNIST_LABEL=${MNIST_LABEL:-./dataset/MNIST_label.csv}
+
+for impl in ${LC_PTHREADS} ${LC_OPENMP}; do
+    for dataset in "${SMALL_DATA} ${SMALL_LABEL}" "${MNIST_DATA} ${MNIST_LABEL}"; do
+        for t in 1 2 4 8 16; do
+            echo $impl $dataset 10 "$t"
+            $impl $dataset 10 "$t" | grep -F "Program time (compute)"
+        done
+    done
+done
\ No newline at end of file