implement pthreads
This commit is contained in:
parent
550ed02ded
commit
9c72bf8dbe
3 changed files with 171 additions and 24 deletions
|
@ -1,4 +1,4 @@
|
||||||
.PHONY: all watch-openmp clean
|
.PHONY: all handin watch-openmp clean
|
||||||
|
|
||||||
CFLAGS := -std=c11 -fopenmp \
|
CFLAGS := -std=c11 -fopenmp \
|
||||||
-I/opt/homebrew/opt/libomp/include \
|
-I/opt/homebrew/opt/libomp/include \
|
||||||
|
@ -7,7 +7,9 @@ CFLAGS := -std=c11 -fopenmp \
|
||||||
LDFLAGS := -std=c11 -fopenmp -L/opt/homebrew/opt/libomp/lib -O3
|
LDFLAGS := -std=c11 -fopenmp -L/opt/homebrew/opt/libomp/lib -O3
|
||||||
RUST_SOURCES := $(shell find . -name "*.rs")
|
RUST_SOURCES := $(shell find . -name "*.rs")
|
||||||
|
|
||||||
all: lc_openmp lc_pthreads
|
all: lc_openmp lc_pthreads handin
|
||||||
|
|
||||||
|
handin: zhan4854.tar.gz
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf \
|
rm -rf \
|
||||||
|
@ -18,7 +20,7 @@ clean:
|
||||||
|
|
||||||
zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile
|
zhan4854.tar.gz: common.c common.h lc_openmp.c lc_pthreads.c Makefile
|
||||||
mkdir -p zhan4854
|
mkdir -p zhan4854
|
||||||
mv $^ zhan4854
|
cp $^ zhan4854
|
||||||
tar -czvf $@ zhan4854
|
tar -czvf $@ zhan4854
|
||||||
rm -r zhan4854
|
rm -r zhan4854
|
||||||
|
|
||||||
|
@ -26,7 +28,7 @@ lc_openmp: lc_openmp.o common.o
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm
|
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm
|
||||||
|
|
||||||
lc_pthreads: lc_pthreads.o common.o
|
lc_pthreads: lc_pthreads.o common.o
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
|
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lm
|
||||||
|
|
||||||
%.o: %.c
|
%.o: %.c
|
||||||
$(CC) $(CFLAGS) -o $@ -c $<
|
$(CC) $(CFLAGS) -o $@ -c $<
|
||||||
|
@ -42,6 +44,12 @@ dataset/mnist/%.txt: generate_test_data.py
|
||||||
watch-openmp:
|
watch-openmp:
|
||||||
watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2'
|
watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2'
|
||||||
|
|
||||||
|
run-pthreads-small: lc_pthreads dataset/small/train_data.txt
|
||||||
|
./lc_pthreads dataset/small/train_data.txt dataset/small/train_label.txt 10 2 dataset/small/test_data.txt dataset/small/test_label.txt
|
||||||
|
|
||||||
|
run-pthreads-mnist: lc_pthreads dataset/mnist/train_data.txt
|
||||||
|
./lc_pthreads dataset/mnist/train_data.txt dataset/mnist/train_label.txt 10 8 dataset/mnist/test_data.txt dataset/mnist/test_label.txt
|
||||||
|
|
||||||
run-openmp-small: lc_openmp dataset/small/train_data.txt
|
run-openmp-small: lc_openmp dataset/small/train_data.txt
|
||||||
./lc_openmp dataset/small/train_data.txt dataset/small/train_label.txt 10 2 dataset/small/test_data.txt dataset/small/test_label.txt
|
./lc_openmp dataset/small/train_data.txt dataset/small/train_label.txt 10 2 dataset/small/test_data.txt dataset/small/test_label.txt
|
||||||
|
|
||||||
|
|
|
@ -21,23 +21,16 @@ int main(int argc, char **argv) {
|
||||||
omp_set_num_threads(thread_count);
|
omp_set_num_threads(thread_count);
|
||||||
|
|
||||||
struct data *data = read_data(data_file_name);
|
struct data *data = read_data(data_file_name);
|
||||||
struct labels *label = read_labels(label_file_name);
|
struct labels *labels = read_labels(label_file_name);
|
||||||
|
|
||||||
// NAN CHECK
|
FLOAT *w = calloc(data->dimensions, sizeof(FLOAT));
|
||||||
for (int i = 0; i < data->dimensions * data->rows; i++) {
|
FLOAT *new_w = calloc(data->dimensions, sizeof(FLOAT));
|
||||||
if (isnan(data->buf[i]))
|
FLOAT *inner_calc = calloc(data->dimensions * data->rows, sizeof(FLOAT));
|
||||||
printf("failed at index %d\n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations,
|
printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations,
|
||||||
thread_count);
|
thread_count);
|
||||||
double program_start_time = monotonic_seconds();
|
double program_start_time = monotonic_seconds();
|
||||||
|
|
||||||
FLOAT *w = calloc(data->dimensions, sizeof(FLOAT));
|
|
||||||
FLOAT *new_w = calloc(data->dimensions, sizeof(FLOAT));
|
|
||||||
FLOAT *ouais = calloc(data->dimensions * data->rows, sizeof(FLOAT));
|
|
||||||
// FLOAT *loss_matrix = calloc(data->rows, sizeof(FLOAT));
|
|
||||||
|
|
||||||
double total_compute_time = 0;
|
double total_compute_time = 0;
|
||||||
for (int iter = 0; iter < outer_iterations; iter++) {
|
for (int iter = 0; iter < outer_iterations; iter++) {
|
||||||
double iter_start_time = monotonic_seconds();
|
double iter_start_time = monotonic_seconds();
|
||||||
|
@ -57,7 +50,7 @@ int main(int argc, char **argv) {
|
||||||
x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2];
|
x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2];
|
||||||
}
|
}
|
||||||
|
|
||||||
ouais[data->rows * i + j] = label->buf[j] - x_ni_w_ni;
|
inner_calc[data->rows * i + j] = labels->buf[j] - x_ni_w_ni;
|
||||||
}
|
}
|
||||||
|
|
||||||
FLOAT numer = 0, denom = 0;
|
FLOAT numer = 0, denom = 0;
|
||||||
|
@ -65,7 +58,7 @@ int main(int argc, char **argv) {
|
||||||
// #pragma omp parallel for default(shared) reduction(+ : numer, denom)
|
// #pragma omp parallel for default(shared) reduction(+ : numer, denom)
|
||||||
for (int j = 0; j < data->rows; j++) {
|
for (int j = 0; j < data->rows; j++) {
|
||||||
FLOAT xij = data->buf[data->rows * i + j];
|
FLOAT xij = data->buf[data->rows * i + j];
|
||||||
numer += xij * ouais[data->rows * i + j];
|
numer += xij * inner_calc[data->rows * i + j];
|
||||||
denom += xij * xij;
|
denom += xij * xij;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,7 +90,7 @@ int main(int argc, char **argv) {
|
||||||
loss_value += data->buf[data->rows * i + j] * w[i];
|
loss_value += data->buf[data->rows * i + j] * w[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
loss_value -= label->buf[j];
|
loss_value -= labels->buf[j];
|
||||||
loss_sum += loss_value * loss_value;
|
loss_sum += loss_value * loss_value;
|
||||||
}
|
}
|
||||||
FLOAT loss = sqrt(loss_sum);
|
FLOAT loss = sqrt(loss_sum);
|
||||||
|
@ -112,15 +105,15 @@ int main(int argc, char **argv) {
|
||||||
program_end_time - program_start_time);
|
program_end_time - program_start_time);
|
||||||
|
|
||||||
// free(loss_matrix);
|
// free(loss_matrix);
|
||||||
free(ouais);
|
free(inner_calc);
|
||||||
free(new_w);
|
free(new_w);
|
||||||
free(data->buf);
|
free(data->buf);
|
||||||
free(label->buf);
|
free(labels->buf);
|
||||||
free(data);
|
free(data);
|
||||||
free(label);
|
free(labels);
|
||||||
|
|
||||||
// NOTE: NOT PART OF THE ASSIGNMENT
|
// NOTE: NOT PART OF THE ASSIGNMENT
|
||||||
// Perform testing to ensure that the training actually works
|
// Perform validation to see how well the model performs on training data
|
||||||
|
|
||||||
if (argc >= 7) {
|
if (argc >= 7) {
|
||||||
struct data *test_data = read_data(argv[5]);
|
struct data *test_data = read_data(argv[5]);
|
||||||
|
@ -133,7 +126,6 @@ int main(int argc, char **argv) {
|
||||||
output += test_data->buf[test_data->rows * i + j] * w[i];
|
output += test_data->buf[test_data->rows * i + j] * w[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// printf("expected: %f, actual: %f\n", test_label->buf[j], output);
|
|
||||||
FLOAT correct_answer = test_label->buf[j];
|
FLOAT correct_answer = test_label->buf[j];
|
||||||
FLOAT incorrect_answer = -correct_answer;
|
FLOAT incorrect_answer = -correct_answer;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,23 @@
|
||||||
|
#include <bits/pthreadtypes.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <pthread.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
struct data *data;
|
||||||
|
struct labels *labels;
|
||||||
|
FLOAT *w, *new_w, *inner_calc;
|
||||||
|
int thread_count;
|
||||||
|
|
||||||
|
struct thread_ctx {
|
||||||
|
int start, end;
|
||||||
|
};
|
||||||
|
|
||||||
|
void *each_thread(void *);
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
if (argc < 5) {
|
if (argc < 5) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
|
@ -12,7 +28,138 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
char *data_file_name = argv[1], *label_file_name = argv[2];
|
char *data_file_name = argv[1], *label_file_name = argv[2];
|
||||||
int outer_iterations = atoi(argv[3]);
|
int outer_iterations = atoi(argv[3]);
|
||||||
int thread_count = atoi(argv[4]);
|
thread_count = atoi(argv[4]);
|
||||||
|
|
||||||
|
data = read_data(data_file_name);
|
||||||
|
labels = read_labels(label_file_name);
|
||||||
|
|
||||||
|
if (data->dimensions < thread_count)
|
||||||
|
thread_count = data->dimensions;
|
||||||
|
|
||||||
|
w = calloc(data->dimensions, sizeof(FLOAT));
|
||||||
|
new_w = calloc(data->dimensions, sizeof(FLOAT));
|
||||||
|
inner_calc = calloc(data->dimensions * data->rows, sizeof(FLOAT));
|
||||||
|
|
||||||
|
printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations,
|
||||||
|
thread_count);
|
||||||
|
double program_start_time = monotonic_seconds();
|
||||||
|
|
||||||
|
double total_compute_time = 0;
|
||||||
|
for (int iter = 0; iter < outer_iterations; iter++) {
|
||||||
|
double iter_start_time = monotonic_seconds();
|
||||||
|
|
||||||
|
// Spawn N threads
|
||||||
|
pthread_t *thread_pool = malloc(thread_count * sizeof(pthread_t));
|
||||||
|
int *wtf = malloc(thread_count * sizeof(int));
|
||||||
|
for (int t = 0; t < thread_count; ++t) {
|
||||||
|
wtf[t] = t;
|
||||||
|
pthread_create(&thread_pool[t], NULL, each_thread, &wtf[t]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int t = 0; t < thread_count; ++t) {
|
||||||
|
pthread_join(thread_pool[t], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
double iter_end_time = monotonic_seconds();
|
||||||
|
total_compute_time += iter_end_time - iter_start_time;
|
||||||
|
printf("Iter duration (no print): %0.04fs\n",
|
||||||
|
iter_end_time - iter_start_time);
|
||||||
|
|
||||||
|
// Update w
|
||||||
|
// printf("w = [");
|
||||||
|
for (int idx = 0; idx < data->dimensions; idx++) {
|
||||||
|
w[idx] = new_w[idx];
|
||||||
|
// printf("%.3f ", w[idx]);
|
||||||
|
}
|
||||||
|
// printf("]\n");
|
||||||
|
|
||||||
|
// Compute loss
|
||||||
|
FLOAT loss_sum = 0;
|
||||||
|
for (int j = 0; j < data->rows; j++) {
|
||||||
|
FLOAT loss_value = 0;
|
||||||
|
for (int i = 0; i < data->dimensions; i++) {
|
||||||
|
loss_value += data->buf[data->rows * i + j] * w[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
loss_value -= labels->buf[j];
|
||||||
|
loss_sum += loss_value * loss_value;
|
||||||
|
}
|
||||||
|
FLOAT loss = sqrt(loss_sum);
|
||||||
|
printf("Loss: %0.04f\n", loss);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(inner_calc);
|
||||||
|
free(new_w);
|
||||||
|
free(data->buf);
|
||||||
|
free(labels->buf);
|
||||||
|
free(data);
|
||||||
|
free(labels);
|
||||||
|
|
||||||
|
// NOTE: NOT PART OF THE ASSIGNMENT
|
||||||
|
// Perform validation to see how well the model performs on training data
|
||||||
|
|
||||||
|
if (argc >= 7) {
|
||||||
|
struct data *test_data = read_data(argv[5]);
|
||||||
|
struct labels *test_label = read_labels(argv[6]);
|
||||||
|
|
||||||
|
int num_correct = 0;
|
||||||
|
for (int j = 0; j < test_data->rows; j++) {
|
||||||
|
FLOAT output = 0;
|
||||||
|
for (int i = 0; i < test_data->dimensions; i++) {
|
||||||
|
output += test_data->buf[test_data->rows * i + j] * w[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
FLOAT correct_answer = test_label->buf[j];
|
||||||
|
FLOAT incorrect_answer = -correct_answer;
|
||||||
|
|
||||||
|
if (fabs(output - correct_answer) < fabs(output - incorrect_answer))
|
||||||
|
num_correct += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("num correct: %d, out of %d (%.2f%%)\n", num_correct,
|
||||||
|
test_data->rows, (100.0 * num_correct) / test_data->rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(w);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *each_thread(void *thread_num_void) {
|
||||||
|
int thread_num = *(int *)thread_num_void;
|
||||||
|
int num_iterations = data->dimensions / thread_count;
|
||||||
|
|
||||||
|
int start = num_iterations * thread_num;
|
||||||
|
int end = (thread_num == thread_count - 1)
|
||||||
|
? data->dimensions
|
||||||
|
: num_iterations * (thread_num + 1);
|
||||||
|
for (int i = start; i < end; i++) {
|
||||||
|
for (int j = 0; j < data->rows; j++) {
|
||||||
|
FLOAT x_ni_w_ni = 0;
|
||||||
|
|
||||||
|
// #pragma omp parallel for default(shared) reduction(+ : x_ni_w_ni)
|
||||||
|
for (int i2 = 0; i2 < data->dimensions; i2++) {
|
||||||
|
if (i2 == i)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2];
|
||||||
|
}
|
||||||
|
|
||||||
|
inner_calc[data->rows * i + j] = labels->buf[j] - x_ni_w_ni;
|
||||||
|
}
|
||||||
|
|
||||||
|
FLOAT numer = 0, denom = 0;
|
||||||
|
|
||||||
|
// #pragma omp parallel for default(shared) reduction(+ : numer, denom)
|
||||||
|
for (int j = 0; j < data->rows; j++) {
|
||||||
|
FLOAT xij = data->buf[data->rows * i + j];
|
||||||
|
numer += xij * inner_calc[data->rows * i + j];
|
||||||
|
denom += xij * xij;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (denom == 0)
|
||||||
|
new_w[i] = 0;
|
||||||
|
else
|
||||||
|
new_w[i] = numer / denom;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue