This commit is contained in:
Michael Zhang 2023-10-07 19:49:58 -05:00
parent 2aebe9b4c2
commit 4cbd2b4a17
11 changed files with 160 additions and 33 deletions

5
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,5 @@
{
"files.associations": {
"common.h": "c"
}
}

2
assignments/01/.clangd Normal file
View file

@ -0,0 +1,2 @@
CompileFlags:
Add: -I/opt/homebrew/opt/libomp/include

View file

@ -1 +1,2 @@
export BASE_PATH=$PWD
export BASE_PATH=$PWD
export CC=clang-17

View file

@ -2,4 +2,5 @@ lc_openmp
lc_pthreads
*.o
dataset
dataset
out.txt

View file

@ -1,14 +1,22 @@
.PHONY: all
CFLAGS :=
CFLAGS := -std=c11 -fopenmp=libomp -I/opt/homebrew/opt/libomp/include
LDFLAGS := -std=c11 -fopenmp=libomp -L/opt/homebrew/opt/libomp/lib
RUST_SOURCES := $(shell find . -name "*.rs")
all: lc_openmp lc_pthreads
watch-openmp:
watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2'
lc_openmp: lc_openmp.o common.o
gcc -o $@ $^
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
lc_pthreads: lc_pthreads.o common.o
gcc -o $@ $^
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
%.o: %.c
gcc -o $@ -c $<
$(CC) $(CFLAGS) -o $@ -c $<
rust: $(RUST_SOURCES)
cargo run -- ${BASE_PATH}/dataset/{small_data.csv,small_label.csv} 10 2

View file

@ -1,16 +1,10 @@
#define _POSIX_C_SOURCE 199309L
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "common.h"
double monotonic_seconds() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec + ts.tv_nsec * 1e-9;
}
/**
* @brief Output the seconds elapsed while execution.
*
@ -34,11 +28,13 @@ struct data *read_data(char *path) {
fscanf(file, "%u", &result->dimensions);
// Allocate
result->buf = malloc(result->dimensions * result->rows * sizeof(double));
result->buf = malloc(result->dimensions * result->rows * sizeof(FLOAT));
// Read into buffer
for (uint32_t i = 0; i < result->dimensions; i++) {
fscanf(file, "%lf", &result->buf[i]);
for (uint32_t j = 0; j < result->rows; j++) {
for (uint32_t i = 0; i < result->dimensions; i++) {
fscanf(file, FLOAT_FORMAT, &result->buf[result->rows * i + j]);
}
}
fclose(file);
@ -60,11 +56,11 @@ struct labels *read_labels(char *path) {
fscanf(file, "%u", &result->rows);
// Allocate
result->buf = malloc(result->rows * sizeof(double));
result->buf = malloc(result->rows * sizeof(FLOAT));
// Read into buffer
for (uint32_t i = 0; i < result->rows; i++) {
fscanf(file, "%lf", &result->buf[i]);
fscanf(file, FLOAT_FORMAT, &result->buf[i]);
}
fclose(file);

View file

@ -2,6 +2,10 @@
#define COMMON_H_
#include <stdint.h>
#include <time.h>
#define FLOAT float
#define FLOAT_FORMAT "%f"
/**
* @brief Output the seconds elapsed while execution.
@ -17,16 +21,20 @@ void print_time(double const seconds);
*
* @return The number of seconds.
*/
inline double monotonic_seconds();
inline double monotonic_seconds() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec + ts.tv_nsec * 1e-9;
}
struct data {
uint32_t rows, dimensions;
double *buf;
FLOAT *buf;
};
struct labels {
uint32_t rows;
double *buf;
FLOAT *buf;
};
struct data *read_data(char *path);

View file

@ -1,13 +1,74 @@
#include <math.h>
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
int main(int argc, char **argv) {
char *data_file_name = argv[1], *label_file_name = argv[2];
int outer_iterations = atoi(argv[3]);
int thread_count = atoi(argv[4]);
omp_set_num_threads(8);
struct data *data = read_data(data_file_name);
struct labels *label = read_labels(label_file_name);
printf("Read.\n");
printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations,
thread_count);
FLOAT *w = calloc(data->dimensions, sizeof(FLOAT));
for (int iter = 0; iter < outer_iterations; iter++) {
double start_time = monotonic_seconds();
FLOAT *new_w = calloc(data->dimensions, sizeof(FLOAT));
#pragma omp parallel for default(shared)
for (int i = 0; i < data->dimensions; i++) {
FLOAT *ouais = calloc(data->rows, sizeof(FLOAT));
#pragma omp parallel for default(shared)
for (int j = 0; j < data->rows; j++) {
FLOAT x_ni_w_ni = 0;
#pragma omp parallel for default(shared) reduction(+ : x_ni_w_ni)
for (int i2 = 0; i2 < data->dimensions; i2++) {
if (i2 == i)
continue;
x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2];
}
ouais[j] = label->buf[j] - x_ni_w_ni;
}
FLOAT numer = 0, denom = 0;
#pragma omp parallel for default(shared) reduction(+ : numer, denom)
for (int j = 0; j < data->rows; j++) {
FLOAT xij = data->buf[data->dimensions * i + j];
numer = xij * ouais[j];
denom = xij * xij;
}
free(ouais);
new_w[i] = numer / denom;
}
printf("Done.\n");
for (int idx = 0; idx < data->dimensions; idx++) {
printf("%.3f ", new_w[idx]);
}
printf("\n");
free(w);
w = new_w;
double end_time = monotonic_seconds();
print_time(end_time - start_time);
}
free(w);
return 0;
}

34
assignments/01/main.py Normal file
View file

@ -0,0 +1,34 @@
# import numexpr as ne
import numpy as np
with open("dataset/small_data.csv", "r") as f:
desc = f.readline().strip()
rows, dimensions = map(int, desc.split(" "))
data = np.loadtxt(f)
print("loaded data")
with open("dataset/small_label.csv", "r") as f:
desc = f.readline().strip()
rows = int(desc)
labels = np.loadtxt(f)
print("loaded labels")
print(data.shape)
print(labels.shape)
w = np.empty((dimensions, 1))
new_w = np.empty(w.shape)
for _ in range(10):
for i in range(dimensions):
data_ni = np.delete(data, i, axis=1)
w_ni = np.delete(w, i)
res = data_ni @ w_ni
x_i = data[:,i]
numer = x_i.transpose() @ (labels - np.matmul(data_ni, w_ni))
denom = x_i.transpose() @ x_i
new_w[i] = numer / denom
w = new_w
print("w", new_w)

View file

@ -1,7 +1,13 @@
use std::{marker::PhantomData, ops::Index};
pub struct Span<T, F, U = usize>(Vec<T>, F, PhantomData<U>);
pub struct Span<'a, T, F, U = usize>(&'a [T], F, PhantomData<U>);
impl<T, F, U> Index<U> for Span<T, F, U>
impl<'a, T, F, U> Span<'a, T, F, U> {
pub fn new(slice: &[T], func: F) -> Self {
Span(slice, func, PhantomData::default())
}
}
impl<'a, T, F, U> Index<U> for Span<'a, T, F, U>
where
F: Fn(U) -> usize,
{

View file

@ -43,16 +43,23 @@ fn main() -> Result<()> {
let mut w = (0..data.dimensions).map(|_| 0.0).collect::<Vec<_>>();
for _ in 0..opt.outer_iterations {
let w1 = w.clone();
let new_w = w1
.into_par_iter()
.enumerate()
.map(|(i, w_i)| {
let new_w = (0..data.dimensions)
.par_bridge()
.map(|i| {
let x_i_start = data.rows * i;
let x_i_end = data.rows * (i + 1);
let x_i = &data.buf[x_i_start..x_i_end];
let missing_i = i;
let data_ni: Span<f64, _, (usize, usize)> =
Span::new(&data.buf, |(j2, i2): (usize, usize)| {
data.buf
[data.rows * (if i2 >= missing_i { i2 + 1 } else { i2 }) + j2]
});
let w_ni: Span<f64, _, usize> = Span::new(&w, |i2: usize| {
w[if i2 >= missing_i { i2 + 1 } else { i2 }]
});
// X = n x m
// y = n x 1
// w = 1 x m
@ -61,15 +68,13 @@ fn main() -> Result<()> {
// w_ni = (m - 1) x 1
// X_ni_w_ni = n x 1
let missing_i = i;
let x_ni_w_ni = (0..data.rows)
.par_bridge()
.map(|j| {
(0..data.dimensions)
.filter(|i| *i != missing_i)
.par_bridge()
.map(|i| data.buf[data.rows * i + j] * w[i])
.map(|i| data_ni[(j, i)] * w_ni[i])
.reduce(|| 0.0, |a, b| a + b)
})
.collect::<Vec<_>>();