openmp
This commit is contained in:
parent
2aebe9b4c2
commit
4cbd2b4a17
11 changed files with 160 additions and 33 deletions
5
.vscode/settings.json
vendored
Normal file
5
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"common.h": "c"
|
||||
}
|
||||
}
|
2
assignments/01/.clangd
Normal file
2
assignments/01/.clangd
Normal file
|
@ -0,0 +1,2 @@
|
|||
CompileFlags:
|
||||
Add: -I/opt/homebrew/opt/libomp/include
|
|
@ -1 +1,2 @@
|
|||
export BASE_PATH=$PWD
|
||||
export CC=clang-17
|
1
assignments/01/.gitignore
vendored
1
assignments/01/.gitignore
vendored
|
@ -3,3 +3,4 @@ lc_pthreads
|
|||
*.o
|
||||
|
||||
dataset
|
||||
out.txt
|
|
@ -1,14 +1,22 @@
|
|||
.PHONY: all
|
||||
|
||||
CFLAGS :=
|
||||
CFLAGS := -std=c11 -fopenmp=libomp -I/opt/homebrew/opt/libomp/include
|
||||
LDFLAGS := -std=c11 -fopenmp=libomp -L/opt/homebrew/opt/libomp/lib
|
||||
RUST_SOURCES := $(shell find . -name "*.rs")
|
||||
|
||||
all: lc_openmp lc_pthreads
|
||||
|
||||
watch-openmp:
|
||||
watchexec -c clear -e Makefile,c,h 'make lc_openmp && ./lc_openmp ./dataset/small_data.csv ./dataset/small_label.csv 10 2'
|
||||
|
||||
lc_openmp: lc_openmp.o common.o
|
||||
gcc -o $@ $^
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
|
||||
|
||||
lc_pthreads: lc_pthreads.o common.o
|
||||
gcc -o $@ $^
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
|
||||
|
||||
%.o: %.c
|
||||
gcc -o $@ -c $<
|
||||
$(CC) $(CFLAGS) -o $@ -c $<
|
||||
|
||||
rust: $(RUST_SOURCES)
|
||||
cargo run -- ${BASE_PATH}/dataset/{small_data.csv,small_label.csv} 10 2
|
||||
|
|
|
@ -1,16 +1,10 @@
|
|||
#define _POSIX_C_SOURCE 199309L
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
double monotonic_seconds() {
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return ts.tv_sec + ts.tv_nsec * 1e-9;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Output the seconds elapsed while execution.
|
||||
*
|
||||
|
@ -34,11 +28,13 @@ struct data *read_data(char *path) {
|
|||
fscanf(file, "%u", &result->dimensions);
|
||||
|
||||
// Allocate
|
||||
result->buf = malloc(result->dimensions * result->rows * sizeof(double));
|
||||
result->buf = malloc(result->dimensions * result->rows * sizeof(FLOAT));
|
||||
|
||||
// Read into buffer
|
||||
for (uint32_t i = 0; i < result->dimensions; i++) {
|
||||
fscanf(file, "%lf", &result->buf[i]);
|
||||
for (uint32_t j = 0; j < result->rows; j++) {
|
||||
for (uint32_t i = 0; i < result->dimensions; i++) {
|
||||
fscanf(file, FLOAT_FORMAT, &result->buf[result->rows * i + j]);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
|
@ -60,11 +56,11 @@ struct labels *read_labels(char *path) {
|
|||
fscanf(file, "%u", &result->rows);
|
||||
|
||||
// Allocate
|
||||
result->buf = malloc(result->rows * sizeof(double));
|
||||
result->buf = malloc(result->rows * sizeof(FLOAT));
|
||||
|
||||
// Read into buffer
|
||||
for (uint32_t i = 0; i < result->rows; i++) {
|
||||
fscanf(file, "%lf", &result->buf[i]);
|
||||
fscanf(file, FLOAT_FORMAT, &result->buf[i]);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
#define COMMON_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
|
||||
#define FLOAT float
|
||||
#define FLOAT_FORMAT "%f"
|
||||
|
||||
/**
|
||||
* @brief Output the seconds elapsed while execution.
|
||||
|
@ -17,16 +21,20 @@ void print_time(double const seconds);
|
|||
*
|
||||
* @return The number of seconds.
|
||||
*/
|
||||
inline double monotonic_seconds();
|
||||
inline double monotonic_seconds() {
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return ts.tv_sec + ts.tv_nsec * 1e-9;
|
||||
}
|
||||
|
||||
struct data {
|
||||
uint32_t rows, dimensions;
|
||||
double *buf;
|
||||
FLOAT *buf;
|
||||
};
|
||||
|
||||
struct labels {
|
||||
uint32_t rows;
|
||||
double *buf;
|
||||
FLOAT *buf;
|
||||
};
|
||||
|
||||
struct data *read_data(char *path);
|
||||
|
|
|
@ -1,13 +1,74 @@
|
|||
#include <math.h>
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
char *data_file_name = argv[1], *label_file_name = argv[2];
|
||||
int outer_iterations = atoi(argv[3]);
|
||||
int thread_count = atoi(argv[4]);
|
||||
|
||||
omp_set_num_threads(8);
|
||||
|
||||
struct data *data = read_data(data_file_name);
|
||||
struct labels *label = read_labels(label_file_name);
|
||||
printf("Read.\n");
|
||||
printf("Running %d iteration(s) with %d thread(s).\n", outer_iterations,
|
||||
thread_count);
|
||||
|
||||
FLOAT *w = calloc(data->dimensions, sizeof(FLOAT));
|
||||
|
||||
for (int iter = 0; iter < outer_iterations; iter++) {
|
||||
double start_time = monotonic_seconds();
|
||||
FLOAT *new_w = calloc(data->dimensions, sizeof(FLOAT));
|
||||
|
||||
#pragma omp parallel for default(shared)
|
||||
for (int i = 0; i < data->dimensions; i++) {
|
||||
FLOAT *ouais = calloc(data->rows, sizeof(FLOAT));
|
||||
|
||||
#pragma omp parallel for default(shared)
|
||||
for (int j = 0; j < data->rows; j++) {
|
||||
FLOAT x_ni_w_ni = 0;
|
||||
|
||||
#pragma omp parallel for default(shared) reduction(+ : x_ni_w_ni)
|
||||
for (int i2 = 0; i2 < data->dimensions; i2++) {
|
||||
if (i2 == i)
|
||||
continue;
|
||||
|
||||
x_ni_w_ni = data->buf[data->rows * i2 + j] * w[i2];
|
||||
}
|
||||
|
||||
ouais[j] = label->buf[j] - x_ni_w_ni;
|
||||
}
|
||||
|
||||
FLOAT numer = 0, denom = 0;
|
||||
|
||||
#pragma omp parallel for default(shared) reduction(+ : numer, denom)
|
||||
for (int j = 0; j < data->rows; j++) {
|
||||
FLOAT xij = data->buf[data->dimensions * i + j];
|
||||
numer = xij * ouais[j];
|
||||
denom = xij * xij;
|
||||
}
|
||||
|
||||
free(ouais);
|
||||
new_w[i] = numer / denom;
|
||||
}
|
||||
|
||||
printf("Done.\n");
|
||||
for (int idx = 0; idx < data->dimensions; idx++) {
|
||||
printf("%.3f ", new_w[idx]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
free(w);
|
||||
w = new_w;
|
||||
|
||||
double end_time = monotonic_seconds();
|
||||
print_time(end_time - start_time);
|
||||
}
|
||||
|
||||
free(w);
|
||||
|
||||
return 0;
|
||||
}
|
34
assignments/01/main.py
Normal file
34
assignments/01/main.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
# import numexpr as ne
|
||||
import numpy as np
|
||||
|
||||
with open("dataset/small_data.csv", "r") as f:
|
||||
desc = f.readline().strip()
|
||||
rows, dimensions = map(int, desc.split(" "))
|
||||
data = np.loadtxt(f)
|
||||
print("loaded data")
|
||||
|
||||
with open("dataset/small_label.csv", "r") as f:
|
||||
desc = f.readline().strip()
|
||||
rows = int(desc)
|
||||
labels = np.loadtxt(f)
|
||||
print("loaded labels")
|
||||
|
||||
print(data.shape)
|
||||
print(labels.shape)
|
||||
|
||||
w = np.empty((dimensions, 1))
|
||||
new_w = np.empty(w.shape)
|
||||
for _ in range(10):
|
||||
for i in range(dimensions):
|
||||
data_ni = np.delete(data, i, axis=1)
|
||||
w_ni = np.delete(w, i)
|
||||
|
||||
res = data_ni @ w_ni
|
||||
|
||||
x_i = data[:,i]
|
||||
numer = x_i.transpose() @ (labels - np.matmul(data_ni, w_ni))
|
||||
denom = x_i.transpose() @ x_i
|
||||
|
||||
new_w[i] = numer / denom
|
||||
w = new_w
|
||||
print("w", new_w)
|
|
@ -1,7 +1,13 @@
|
|||
use std::{marker::PhantomData, ops::Index};
|
||||
pub struct Span<T, F, U = usize>(Vec<T>, F, PhantomData<U>);
|
||||
pub struct Span<'a, T, F, U = usize>(&'a [T], F, PhantomData<U>);
|
||||
|
||||
impl<T, F, U> Index<U> for Span<T, F, U>
|
||||
impl<'a, T, F, U> Span<'a, T, F, U> {
|
||||
pub fn new(slice: &[T], func: F) -> Self {
|
||||
Span(slice, func, PhantomData::default())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T, F, U> Index<U> for Span<'a, T, F, U>
|
||||
where
|
||||
F: Fn(U) -> usize,
|
||||
{
|
||||
|
|
|
@ -43,16 +43,23 @@ fn main() -> Result<()> {
|
|||
let mut w = (0..data.dimensions).map(|_| 0.0).collect::<Vec<_>>();
|
||||
|
||||
for _ in 0..opt.outer_iterations {
|
||||
let w1 = w.clone();
|
||||
|
||||
let new_w = w1
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(i, w_i)| {
|
||||
let new_w = (0..data.dimensions)
|
||||
.par_bridge()
|
||||
.map(|i| {
|
||||
let x_i_start = data.rows * i;
|
||||
let x_i_end = data.rows * (i + 1);
|
||||
let x_i = &data.buf[x_i_start..x_i_end];
|
||||
|
||||
let missing_i = i;
|
||||
let data_ni: Span<f64, _, (usize, usize)> =
|
||||
Span::new(&data.buf, |(j2, i2): (usize, usize)| {
|
||||
data.buf
|
||||
[data.rows * (if i2 >= missing_i { i2 + 1 } else { i2 }) + j2]
|
||||
});
|
||||
let w_ni: Span<f64, _, usize> = Span::new(&w, |i2: usize| {
|
||||
w[if i2 >= missing_i { i2 + 1 } else { i2 }]
|
||||
});
|
||||
|
||||
// X = n x m
|
||||
// y = n x 1
|
||||
// w = 1 x m
|
||||
|
@ -61,15 +68,13 @@ fn main() -> Result<()> {
|
|||
// w_ni = (m - 1) x 1
|
||||
// X_ni_w_ni = n x 1
|
||||
|
||||
let missing_i = i;
|
||||
|
||||
let x_ni_w_ni = (0..data.rows)
|
||||
.par_bridge()
|
||||
.map(|j| {
|
||||
(0..data.dimensions)
|
||||
.filter(|i| *i != missing_i)
|
||||
.par_bridge()
|
||||
.map(|i| data.buf[data.rows * i + j] * w[i])
|
||||
.map(|i| data_ni[(j, i)] * w_ni[i])
|
||||
.reduce(|| 0.0, |a, b| a + b)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
|
Loading…
Reference in a new issue