From 9c72bf8dbe17e4fed45f911ce76e737838b7d6a8 Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Mon, 9 Oct 2023 03:49:21 -0500 Subject: [PATCH] implement pthreads --- assignments/01/Makefile | 16 +++- assignments/01/lc_openmp.c | 30 +++---- assignments/01/lc_pthreads.c | 149 ++++++++++++++++++++++++++++++++++- 3 files changed, 171 insertions(+), 24 deletions(-) diff --git a/assignments/01/Makefile b/assignments/01/Makefile index 89bcfa6..1487240 100644 --- a/assignments/01/Makefile +++ b/assignments/01/Makefile @@ -1,4 +1,4 @@ -.PHONY: all watch-openmp clean +.PHONY: all handin watch-openmp clean CFLAGS := -std=c11 -fopenmp \ -I/opt/homebrew/opt/libomp/include \ @@ -7,7 +7,9 @@ CFLAGS := -std=c11 -fopenmp \ LDFLAGS := -std=c11 -fopenmp -L/opt/homebrew/opt/libomp/lib -O3 RUST_SOURCES := $(shell find . -name "*.rs") -all: lc_openmp lc_pthreads +all: lc_openmp lc_pthreads handin + +handin: zhan4854.tar.gz clean: rm -rf \ @@ -18,7 +20,7 @@ clean: zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile mkdir -p zhan4854 - mv $^ zhan4854 + cp $^ zhan4854 tar -czvf $@ zhan4854 rm -r zhan4854 @@ -26,7 +28,7 @@ lc_openmp: lc_openmp.o common.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm lc_pthreads: lc_pthreads.o common.o - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm %.o: %.c $(CC) $(CFLAGS) -o $@ -c $< @@ -42,6 +44,12 @@ dataset/mnist/%.txt: generate_test_data.py watch-openmp: watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2' +run-pthreads-small: lc_pthreads dataset/small/train_data.txt + ./lc_pthreads dataset/small/train_data.txt dataset/small/train_label.txt 10 2 dataset/small/test_data.txt dataset/small/test_label.txt + +run-pthreads-mnist: lc_pthreads dataset/mnist/train_data.txt + ./lc_pthreads dataset/mnist/train_data.txt dataset/mnist/train_label.txt 10 8 dataset/mnist/test_data.txt dataset/mnist/test_label.txt + run-openmp-small: lc_openmp dataset/small/train_data.txt ./lc_openmp dataset/small/train_data.txt dataset/small/train_label.txt 10 2 dataset/small/test_data.txt dataset/small/test_label.txt diff --git a/assignments/01/lc_openmp.c b/assignments/01/lc_openmp.c index 35e9371..914c0f2 100644 --- a/assignments/01/lc_openmp.c +++ b/assignments/01/lc_openmp.c @@ -21,23 +21,16 @@ int main(int argc, char **argv) { omp_set_num_threads(thread_count); struct data *data = read_data(data_file_name); - struct labels *label = read_labels(label_file_name); + struct labels *labels = read_labels(label_file_name); - // NAN CHECK - for (int i = 0; i < data->dimensions * data->rows; i++) { - if (isnan(data->buf[i])) - printf("failed at index %d\n", i); - } + FLOAT *w = calloc(data->dimensions, sizeof(FLOAT)); + FLOAT *new_w = calloc(data->dimensions, sizeof(FLOAT)); + FLOAT *inner_calc = calloc(data->dimensions * data->rows, sizeof(FLOAT)); printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations, thread_count); double program_start_time = monotonic_seconds(); - FLOAT *w = calloc(data->dimensions, sizeof(FLOAT)); - FLOAT *new_w = calloc(data->dimensions, sizeof(FLOAT)); - FLOAT *ouais = calloc(data->dimensions * data->rows, sizeof(FLOAT)); - // FLOAT *loss_matrix = calloc(data->rows, sizeof(FLOAT)); - double total_compute_time = 0; for (int iter = 0; iter < outer_iterations; iter++) { double iter_start_time = monotonic_seconds(); @@ -57,7 +50,7 @@ int main(int argc, char **argv) { x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2]; } - ouais[data->rows * i + j] = label->buf[j] - x_ni_w_ni; + inner_calc[data->rows * i + j] = labels->buf[j] - x_ni_w_ni; } FLOAT numer = 0, denom = 0; @@ -65,7 +58,7 @@ int main(int argc, char **argv) { // #pragma omp parallel for default(shared) reduction(+ : numer, denom) for (int j = 0; j < data->rows; j++) { FLOAT xij = data->buf[data->rows * i + j]; - numer += xij * ouais[data->rows * i + j]; + numer += xij * inner_calc[data->rows * i + j]; denom += xij * xij; } @@ -97,7 +90,7 @@ int main(int argc, char **argv) { loss_value += data->buf[data->rows * i + j] * w[i]; } - loss_value -= label->buf[j]; + loss_value -= labels->buf[j]; loss_sum += loss_value * loss_value; } FLOAT loss = sqrt(loss_sum); @@ -112,15 +105,15 @@ int main(int argc, char **argv) { program_end_time - program_start_time); // free(loss_matrix); - free(ouais); + free(inner_calc); free(new_w); free(data->buf); - free(label->buf); + free(labels->buf); free(data); - free(label); + free(labels); // NOTE: NOT PART OF THE ASSIGNMENT - // Perform testing to ensure that the training actually works + // Perform validation to see how well the model performs on training data if (argc >= 7) { struct data *test_data = read_data(argv[5]); @@ -133,7 +126,6 @@ int main(int argc, char **argv) { output += test_data->buf[test_data->rows * i + j] * w[i]; } - // printf("expected: %f, actual: %f\n", test_label->buf[j], output); FLOAT correct_answer = test_label->buf[j]; FLOAT incorrect_answer = -correct_answer; diff --git a/assignments/01/lc_pthreads.c b/assignments/01/lc_pthreads.c index 4bbee20..689e989 100644 --- a/assignments/01/lc_pthreads.c +++ b/assignments/01/lc_pthreads.c @@ -1,7 +1,23 @@ +#include +#include +#include #include +#include +#include #include "common.h" +struct data *data; +struct labels *labels; +FLOAT *w, *new_w, *inner_calc; +int thread_count; + +struct thread_ctx { + int start, end; +}; + +void *each_thread(void *); + int main(int argc, char **argv) { if (argc < 5) { fprintf(stderr, @@ -12,7 +28,138 @@ int main(int argc, char **argv) { char *data_file_name = argv[1], *label_file_name = argv[2]; int outer_iterations = atoi(argv[3]); - int thread_count = atoi(argv[4]); + thread_count = atoi(argv[4]); + + data = read_data(data_file_name); + labels = read_labels(label_file_name); + + if (data->dimensions < thread_count) + thread_count = data->dimensions; + + w = calloc(data->dimensions, sizeof(FLOAT)); + new_w = calloc(data->dimensions, sizeof(FLOAT)); + inner_calc = calloc(data->dimensions * data->rows, sizeof(FLOAT)); + + printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations, + thread_count); + double program_start_time = monotonic_seconds(); + + double total_compute_time = 0; + for (int iter = 0; iter < outer_iterations; iter++) { + double iter_start_time = monotonic_seconds(); + + // Spawn N threads + pthread_t *thread_pool = malloc(thread_count * sizeof(pthread_t)); + int *wtf = malloc(thread_count * sizeof(int)); + for (int t = 0; t < thread_count; ++t) { + wtf[t] = t; + pthread_create(&thread_pool[t], NULL, each_thread, &wtf[t]); + } + + for (int t = 0; t < thread_count; ++t) { + pthread_join(thread_pool[t], NULL); + } + + double iter_end_time = monotonic_seconds(); + total_compute_time += iter_end_time - iter_start_time; + printf("Iter duration (no print): %0.04fs\n", + iter_end_time - iter_start_time); + + // Update w + // printf("w = ["); + for (int idx = 0; idx < data->dimensions; idx++) { + w[idx] = new_w[idx]; + // printf("%.3f ", w[idx]); + } + // printf("]\n"); + + // Compute loss + FLOAT loss_sum = 0; + for (int j = 0; j < data->rows; j++) { + FLOAT loss_value = 0; + for (int i = 0; i < data->dimensions; i++) { + loss_value += data->buf[data->rows * i + j] * w[i]; + } + + loss_value -= labels->buf[j]; + loss_sum += loss_value * loss_value; + } + FLOAT loss = sqrt(loss_sum); + printf("Loss: %0.04f\n", loss); + } + + free(inner_calc); + free(new_w); + free(data->buf); + free(labels->buf); + free(data); + free(labels); + + // NOTE: NOT PART OF THE ASSIGNMENT + // Perform validation to see how well the model performs on training data + + if (argc >= 7) { + struct data *test_data = read_data(argv[5]); + struct labels *test_label = read_labels(argv[6]); + + int num_correct = 0; + for (int j = 0; j < test_data->rows; j++) { + FLOAT output = 0; + for (int i = 0; i < test_data->dimensions; i++) { + output += test_data->buf[test_data->rows * i + j] * w[i]; + } + + FLOAT correct_answer = test_label->buf[j]; + FLOAT incorrect_answer = -correct_answer; + + if (fabs(output - correct_answer) < fabs(output - incorrect_answer)) + num_correct += 1; + } + + printf("num correct: %d, out of %d (%.2f%%)\n", num_correct, + test_data->rows, (100.0 * num_correct) / test_data->rows); + } + + free(w); return 0; } + +void *each_thread(void *thread_num_void) { + int thread_num = *(int *)thread_num_void; + int num_iterations = data->dimensions / thread_count; + + int start = num_iterations * thread_num; + int end = (thread_num == thread_count - 1) + ? data->dimensions + : num_iterations * (thread_num + 1); + for (int i = start; i < end; i++) { + for (int j = 0; j < data->rows; j++) { + FLOAT x_ni_w_ni = 0; + + // #pragma omp parallel for default(shared) reduction(+ : x_ni_w_ni) + for (int i2 = 0; i2 < data->dimensions; i2++) { + if (i2 == i) + continue; + + x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2]; + } + + inner_calc[data->rows * i + j] = labels->buf[j] - x_ni_w_ni; + } + + FLOAT numer = 0, denom = 0; + + // #pragma omp parallel for default(shared) reduction(+ : numer, denom) + for (int j = 0; j < data->rows; j++) { + FLOAT xij = data->buf[data->rows * i + j]; + numer += xij * inner_calc[data->rows * i + j]; + denom += xij * xij; + } + + if (denom == 0) + new_w[i] = 0; + else + new_w[i] = numer / denom; + } +} \ No newline at end of file