From e0d966087dace69f431c56f946da584d8ca3c81a Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Mon, 9 Oct 2023 04:17:14 -0500 Subject: [PATCH] polish --- Dockerfile | 4 ++ assignments/01/.gitignore | 3 +- assignments/01/Makefile | 10 ++++- assignments/01/lc_pthreads.c | 5 +++ assignments/01/report.md | 70 +++++++++++++++++++++++++++++++++ assignments/01/run_benchmark.sh | 18 +++++++++ 6 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 assignments/01/report.md create mode 100755 assignments/01/run_benchmark.sh diff --git a/Dockerfile b/Dockerfile index 41bca45..1484784 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +ARG DEBIAN_FRONTEND=noninteractive FROM ubuntu:22.04 ENV PATH="/root/.cargo/bin:${PATH}" @@ -9,8 +10,11 @@ RUN apt update -y && apt install -y --no-install-recommends \ direnv \ git \ libomp-dev \ + pandoc \ python3 \ python3-pip \ + texlive-latex-base \ + texlive-latex-extra \ valgrind \ ; RUN pip install poetry diff --git a/assignments/01/.gitignore b/assignments/01/.gitignore index 618dcec..41faada 100644 --- a/assignments/01/.gitignore +++ b/assignments/01/.gitignore @@ -4,4 +4,5 @@ lc_pthreads *.tar.gz dataset -out.txt \ No newline at end of file +out.txt +report.pdf \ No newline at end of file diff --git a/assignments/01/Makefile b/assignments/01/Makefile index 1487240..e123136 100644 --- a/assignments/01/Makefile +++ b/assignments/01/Makefile @@ -1,4 +1,4 @@ -.PHONY: all handin watch-openmp clean +.PHONY: all handin watch-openmp watch-report rust clean CFLAGS := -std=c11 -fopenmp \ -I/opt/homebrew/opt/libomp/include \ @@ -18,12 +18,15 @@ clean: dataset/small \ *.o -zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile +zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile ASSIGNMENT.md report.pdf run_benchmark.sh mkdir -p zhan4854 cp $^ zhan4854 tar -czvf $@ zhan4854 rm -r zhan4854 +report.pdf: report.md + pandoc -o $@ $^ + lc_openmp: lc_openmp.o common.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm @@ -41,6 +44,9 @@ dataset/small/%.txt: generate_test_data.py dataset/mnist/%.txt: generate_test_data.py python generate_test_data.py dataset/MNIST_data.csv dataset/MNIST_label.csv dataset/mnist +watch-report: + watchexec -c clear -e md 'make report.pdf' + watch-openmp: watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2' diff --git a/assignments/01/lc_pthreads.c b/assignments/01/lc_pthreads.c index 689e989..be8f31e 100644 --- a/assignments/01/lc_pthreads.c +++ b/assignments/01/lc_pthreads.c @@ -88,6 +88,11 @@ int main(int argc, char **argv) { printf("Loss: %0.04f\n", loss); } + double program_end_time = monotonic_seconds(); + printf("Program time (compute): %0.04fs\n", total_compute_time); + printf("Program time (total): %0.04fs\n", + program_end_time - program_start_time); + free(inner_calc); free(new_w); free(data->buf); diff --git a/assignments/01/report.md b/assignments/01/report.md new file mode 100644 index 0000000..8c7ae64 --- /dev/null +++ b/assignments/01/report.md @@ -0,0 +1,70 @@ +--- +geometry: margin=2cm +output: pdf_document +title: CSCI 5451 Assignment 1 +date: \today + +author: | + | Michael Zhang \ $\cdot$ ID: 5289259 +--- + +1. _A short description of how you went about parallelizing the classification algorithm. You should include how you decomposed the problem and why, i.e., what were the tasks being parallelized._ + + The parallelization I used was incredibly simple, just parallelizing outer iterations. I used this same trick for both the OpenMP and the pthreads implementations. + + The reason I didn't go further was that further breaking down of the for loops incurred more overhead from managing the parallelization than was actually gained. I have run this several times and the gains were either neglient, or it actually ran slower than the serial version. + + This also had to do with the fact that I had already inlined most of the calculations to require as few loops as possible, moved all allocations to the top level, and arranged my data buffer in column-major order instead since the iteration pattern was by dimension rather than by row. + +2. _Timing results for 1, 2, 4, 8, and 16 threads for the classification. You should include results with outer iterations set to 10._ + + ``` + ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 1 + Program time (compute): 0.0069s + ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 2 + Program time (compute): 0.0027s + ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 4 + Program time (compute): 0.0027s + ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 8 + Program time (compute): 0.0033s + ./lc_pthreads ./dataset/small_data.csv ./dataset/small_data.csv 10 16 + Program time (compute): 0.0031s + ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 1 + Program time (compute): 21.5287s + ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 2 + Program time (compute): 10.6175s + ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 4 + Program time (compute): 5.2198s + ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 8 + Program time (compute): 4.5690s + ./lc_pthreads ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 16 + Program time (compute): 3.6433s + ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 1 + Program time (compute): 0.0033s + ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 2 + Program time (compute): 0.0017s + ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 4 + Program time (compute): 0.0011s + ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 8 + Program time (compute): 0.0020s + ./lc_openmp ./dataset/small_data.csv ./dataset/small_data.csv 10 16 + Program time (compute): 0.0032s + ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 1 + Program time (compute): 21.7196s + ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 2 + Program time (compute): 10.4035s + ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 4 + Program time (compute): 5.2449s + ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 8 + Program time (compute): 4.1550s + ./lc_openmp ./dataset/MNIST_data.csv ./dataset/MNIST_label.csv 10 16 + Program time (compute): 3.5328s + ``` + + This data was generated using the `run_benchmark.sh > out.txt` script. + +Small note: There's a part in the end of the program that performs validation on the trained model by using a train/test data set split. I didn't count this towards execution time but felt that it was important enough to keep since it ensured that my program was still behaving correctly. + +``` + +``` diff --git a/assignments/01/run_benchmark.sh b/assignments/01/run_benchmark.sh new file mode 100755 index 0000000..80ab399 --- /dev/null +++ b/assignments/01/run_benchmark.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +LC_OPENMP=${LC_OPENMP:-./lc_openmp} +LC_PTHREADS=${LC_PTHREADS:-./lc_pthreads} + +SMALL_DATA=${SMALL_DATA:-./dataset/small_data.csv} +SMALL_LABEL=${SMALL_DATA:-./dataset/small_label.csv} + +MNIST_DATA=${MNIST_DATA:-./dataset/MNIST_data.csv} +MNIST_LABEL=${MNIST_LABEL:-./dataset/MNIST_label.csv} + +for impl in ${LC_PTHREADS} ${LC_OPENMP}; do + for dataset in "${SMALL_DATA} ${SMALL_LABEL}" "${MNIST_DATA} ${MNIST_LABEL}"; do + for t in 1 2 4 8 16; do + echo $impl $dataset 10 "$t" + $impl $dataset 10 "$t" | grep -F "Program time (compute)" + done + done +done \ No newline at end of file