#include #include #include #include #include #include #include "common.h" struct data *data; struct labels *labels; FLOAT *w, *new_w, *inner_calc; int thread_count; struct thread_ctx { int start, end; }; void *each_thread(void *); int main(int argc, char **argv) { if (argc < 5) { fprintf(stderr, "USAGE: %s data_file label_file outer_iterations thread_count", argv[0]); exit(1); } char *data_file_name = argv[1], *label_file_name = argv[2]; int outer_iterations = atoi(argv[3]); thread_count = atoi(argv[4]); data = read_data(data_file_name); labels = read_labels(label_file_name); if (data->dimensions < thread_count) thread_count = data->dimensions; pthread_t *thread_pool = malloc(thread_count * sizeof(pthread_t)); int *wtf = malloc(thread_count * sizeof(int)); w = calloc(data->dimensions, sizeof(FLOAT)); new_w = calloc(data->dimensions, sizeof(FLOAT)); inner_calc = calloc(data->dimensions * data->rows, sizeof(FLOAT)); printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations, thread_count); double program_start_time = monotonic_seconds(); double total_compute_time = 0; for (int iter = 0; iter < outer_iterations; iter++) { double iter_start_time = monotonic_seconds(); // Spawn N threads for (int t = 0; t < thread_count; ++t) { wtf[t] = t; pthread_create(&thread_pool[t], NULL, each_thread, &wtf[t]); } for (int t = 0; t < thread_count; ++t) { pthread_join(thread_pool[t], NULL); } double iter_end_time = monotonic_seconds(); total_compute_time += iter_end_time - iter_start_time; printf("Iter duration (no print): %0.04fs\n", iter_end_time - iter_start_time); // Update w // printf("w = ["); for (int idx = 0; idx < data->dimensions; idx++) { w[idx] = new_w[idx]; // printf("%.3f ", w[idx]); } // printf("]\n"); // Compute loss FLOAT loss_sum = 0; for (int j = 0; j < data->rows; j++) { FLOAT loss_value = 0; for (int i = 0; i < data->dimensions; i++) { loss_value += data->buf[data->rows * i + j] * w[i]; } loss_value -= labels->buf[j]; loss_sum += loss_value * loss_value; } FLOAT loss = sqrt(loss_sum); printf("Loss: %0.04f\n", loss); } double program_end_time = monotonic_seconds(); printf("Program time (compute): %0.04fs\n", total_compute_time); printf("Program time (total): %0.04fs\n", program_end_time - program_start_time); free(inner_calc); free(new_w); free(data->buf); free(labels->buf); free(data); free(labels); free(thread_pool); free(wtf); // NOTE: NOT PART OF THE ASSIGNMENT // Perform validation to see how well the model performs on training data if (argc >= 7) { struct data *test_data = read_data(argv[5]); struct labels *test_label = read_labels(argv[6]); int num_correct = 0; for (int j = 0; j < test_data->rows; j++) { FLOAT output = 0; for (int i = 0; i < test_data->dimensions; i++) { output += test_data->buf[test_data->rows * i + j] * w[i]; } FLOAT correct_answer = test_label->buf[j]; FLOAT incorrect_answer = -correct_answer; if (fabs(output - correct_answer) < fabs(output - incorrect_answer)) num_correct += 1; } printf("num correct: %d, out of %d (%.2f%%)\n", num_correct, test_data->rows, (100.0 * num_correct) / test_data->rows); free(test_data->buf); free(test_label->buf); free(test_data); free(test_label); } free(w); return 0; } void *each_thread(void *thread_num_void) { int thread_num = *(int *)thread_num_void; int num_iterations = data->dimensions / thread_count; int start = num_iterations * thread_num; int end = (thread_num == thread_count - 1) ? data->dimensions : num_iterations * (thread_num + 1); for (int i = start; i < end; i++) { for (int j = 0; j < data->rows; j++) { FLOAT x_ni_w_ni = 0; // #pragma omp parallel for default(shared) reduction(+ : x_ni_w_ni) for (int i2 = 0; i2 < data->dimensions; i2++) { if (i2 == i) continue; x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2]; } inner_calc[data->rows * i + j] = labels->buf[j] - x_ni_w_ni; } FLOAT numer = 0, denom = 0; // #pragma omp parallel for default(shared) reduction(+ : numer, denom) for (int j = 0; j < data->rows; j++) { FLOAT xij = data->buf[data->rows * i + j]; numer += xij * inner_calc[data->rows * i + j]; denom += xij * xij; } if (denom == 0) new_w[i] = 0; else new_w[i] = numer / denom; } }