csci5451/assignments/02/qs_mpi.c
2023-10-31 01:43:43 +00:00

490 lines
16 KiB
C

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define ORDER_FORWARDS 1
#define ORDER_BACKWARDS 2
#define CTL_SIZE 4
#define GENERIC_MAX(x, y) ((x) > (y) ? (x) : (y))
#define GENERIC_MIN(x, y) ((x) < (y) ? (x) : (y))
#define ENSURE_int(i) _Generic((i), int : (i))
#define ENSURE_float(f) _Generic((f), float : (f))
#define MAX(type, x, y) (type) GENERIC_MAX(ENSURE_##type(x), ENSURE_##type(y))
#define MIN(type, x, y) (type) GENERIC_MIN(ENSURE_##type(x), ENSURE_##type(y))
void init_ctl(int *ctl, int len);
void local_quicksort(int *arr, int lo, int hi);
char *string_of_list(int *arr, int len);
void recursive_quicksort(int *integers, int n, int root, MPI_Comm comm);
int main(int argc, char **argv) {
int rank, p;
MPI_Init(&argc, &argv);
int n = atoi(argv[1]);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &p);
// Generate integers
int n_over_p = n / p;
int integers[n_over_p];
// Minor implementation detail: srand(0) is specially handled by glibc to
// behave as if it was called with srand(1). To get around this, I'm seeding
// with rank + 1
//
// See more: https://stackoverflow.com/a/27386563
srand(rank + 1);
for (int i = 0; i < n_over_p; ++i) {
// TODO: For readability during debugging, I'm capping this
integers[i] = rand() % 101;
// printf(" - %d\n", integers[i]);
}
recursive_quicksort(integers, n, 0, MPI_COMM_WORLD);
// sleep(1);
// printf("[%d] after: %s\n", rank, string_of_list(integers, n_over_p));
// The first node is responsible for collecting all the data and then
// printing it out to the file MPI_Gather(const void *sendbuf, int
// sendcount, MPI_INT, void *recvbuf,
// int recvcount, MPI_INT, 0, MPI_COMM_WORLD);
int recvbuf[n];
MPI_Gather(integers, n_over_p, MPI_INT, recvbuf, n_over_p, MPI_INT, 0,
MPI_COMM_WORLD);
if (rank == 0) {
FILE *f = fopen(argv[2], "w");
// printf("integers: %s\n", string_of_list(recvbuf, n));
printf("[%d] ==== FINAL ====\n", rank);
for (int i = 0; i < p; i += 1) {
printf("[%d] %s\n", rank,
string_of_list(&recvbuf[i * n_over_p], n_over_p));
}
fclose(f);
}
MPI_Finalize();
printf("Done.\n");
return 0;
}
// hi not inclusive
void local_quicksort(int *arr, int lo, int hi) {
int temp;
if (lo >= hi || lo < 0)
return;
int pivot = arr[hi - 1];
int pivot_idx = lo - 1;
for (int j = lo; j < hi; ++j) {
if (arr[j] < pivot) {
pivot_idx += 1;
temp = arr[j];
arr[j] = arr[pivot_idx];
arr[pivot_idx] = temp;
}
}
pivot_idx += 1;
temp = arr[hi - 1];
arr[hi - 1] = arr[pivot_idx];
arr[pivot_idx] = temp;
// Recursive call
local_quicksort(arr, lo, pivot_idx);
local_quicksort(arr, pivot_idx + 1, hi);
}
char *string_of_list(int *arr, int len) {
char *buffer = calloc(sizeof(char), 1000);
int offset = 0; // Keep track of the current position in the buffer
for (int i = 0; i < len; i++) {
offset += sprintf(buffer + offset, "%d", arr[i]);
if (i < len - 1) {
// Add a separator (e.g., comma or space) if it's not the last element
offset += sprintf(buffer + offset, " ");
}
}
return buffer;
}
void recursive_quicksort(int *integers, int n, int root, MPI_Comm comm) {
int rank, p;
MPI_Comm_size(comm, &p);
MPI_Comm_rank(comm, &rank);
if (p == 1) {
// Recursion base case: just sort it serially
local_quicksort(integers, 0, n);
printf("Quicksorted: %s\n", string_of_list(integers, n));
return;
}
sleep(1);
printf("\n\n");
int n_over_p_max = (n + p - 1) / p;
int n_over_p = n / p;
if (rank == root)
n_over_p += n - p * n_over_p;
// printf(
// "[%d] :::::::::::::::::::::::::::: RECURSIVE QUICKSORT (n=%d,
// n/p=%d)\n", rank, n, n_over_p);
// Locally sort
// printf("[%d] Numbers before: %s\n", rank,
// string_of_list(integers, n_over_p));
local_quicksort(integers, 0, n_over_p);
printf("[%d] Numbers after first sort: %s\n", rank,
string_of_list(integers, n_over_p));
// Select a pivot.
// This pivot is broadcasted to all nodes
int pivot;
{
// First, select a random element
int rand_el = integers[rand() % n_over_p];
// Gather it
int rand_els[p];
MPI_Gather(&rand_el, 1, MPI_INT, rand_els, 1, MPI_INT, root, comm);
// Get the median
if (rank == root) {
// Sort
local_quicksort(rand_els, 0, p);
// Get the middle element
pivot = rand_els[p / 2];
}
MPI_Bcast(&pivot, 1, MPI_INT, root, comm);
}
printf("[%d] Broadcasted pivot: %d\n", rank, pivot);
// Determine where the boundary between S (lower) and L (higher) lies
int boundary;
for (int i = 0; i < n_over_p; ++i) {
if (integers[i] >= pivot) {
boundary = i;
break;
}
}
int S_lo = 0, S_hi = boundary;
int L_lo = boundary, L_hi = n_over_p;
int S_size = S_hi - S_lo, L_size = L_hi - L_lo;
// printf("[%d] S: [%d - %d] (%d), L: [%d - %d] (%d)\n", rank, S_lo, S_hi,
// S_size, L_lo, L_hi, L_size);
// Perform global arrangement
int S_global_end, L_reverse_end, S_global_max_end;
MPI_Scan(&S_size, &S_global_end, 1, MPI_INT, MPI_SUM, comm);
MPI_Scan(&L_size, &L_reverse_end, 1, MPI_INT, MPI_SUM, comm);
// printf("[%d] bruh %d\n", rank, S_global_end);
// Get the boundary element between S and L
MPI_Allreduce(&S_global_end, &S_global_max_end, 1, MPI_INT, MPI_MAX, comm);
int S_global_start = S_global_end - S_size,
L_reverse_start = L_reverse_end - L_size,
L_global_start = n - L_reverse_end, L_global_end = n - L_reverse_start;
// printf("[%d] Prefixed S: [%d - %d], Prefixed L: [%d - %d]\n", rank,
// S_global_start, S_global_end - 1, L_global_start, L_global_end - 1);
int S_starting_process = S_global_start / n_over_p,
L_starting_process = L_global_start / n_over_p;
int S_offset = S_global_start % n_over_p,
L_offset = L_global_start % n_over_p;
int *integers_recv_buf = calloc(sizeof(int), n);
int S_ctl[p * CTL_SIZE];
int L_ctl[p * CTL_SIZE];
int S_send_ctl[p * CTL_SIZE];
int L_send_ctl[p * CTL_SIZE];
int recvpart[n_over_p];
int ctl_send_counts[p];
int ctl_send_displs[p];
int send_counts[p];
int send_displs[p];
int recv_counts[p];
int recv_displs[p];
init_ctl(S_ctl, p);
init_ctl(L_ctl, p);
init_ctl(S_send_ctl, p);
init_ctl(L_send_ctl, p);
for (int i = 0; i < p; ++i) {
send_counts[i] = n_over_p;
send_displs[i] = i * n_over_p;
ctl_send_counts[i] = CTL_SIZE;
ctl_send_displs[i] = i * CTL_SIZE;
recv_counts[i] = CTL_SIZE;
recv_displs[i] = i * CTL_SIZE;
}
// Send S to the correct target
{
for (int i = S_lo, dest_pos = S_global_start,
processor = S_starting_process;
i < S_hi;) {
int next_break = MIN(int, S_global_end,
MIN(int, dest_pos + (S_hi - S_lo),
(dest_pos / n_over_p) * n_over_p + n_over_p));
int count = next_break - dest_pos;
int from_local_start = i, from_local_end = i + count;
int from_global_start = rank * n_over_p + from_local_start,
from_global_end = from_global_start + count;
int to_global_start = dest_pos, to_global_end = dest_pos + count;
int to_local_start = to_global_start - processor * n_over_p,
to_local_end = to_global_end - processor * n_over_p;
// printf("[%d] S ->> (count=%d), from local [%d..%d] {%d..%d} -to-> "
// "p#%d [%d..%d] {%d..%d}\n",
// rank, count, from_local_start, from_local_end,
// from_global_start, from_global_end, processor, to_local_start,
// to_local_end, to_global_start, to_global_end);
S_send_ctl[processor * CTL_SIZE] = count;
S_send_ctl[processor * CTL_SIZE + 1] = from_global_start;
S_send_ctl[processor * CTL_SIZE + 2] = to_local_start;
S_send_ctl[processor * CTL_SIZE + 3] = from_local_start;
i += count;
dest_pos += count;
processor += 1;
}
MPI_Alltoallv(S_send_ctl, ctl_send_counts, ctl_send_displs, MPI_INT, S_ctl,
recv_counts, recv_displs, MPI_INT, comm);
}
// Send L to the correct target
{
for (int i = L_lo, dest_pos = L_global_start,
processor = L_starting_process;
i < L_hi;) {
int next_break = MIN(int, L_global_end,
MIN(int, dest_pos + (L_hi - L_lo),
(dest_pos / n_over_p) * n_over_p + n_over_p));
int count = next_break - dest_pos;
int from_local_start = i, from_local_end = i + count;
int from_global_start = rank * n_over_p + from_local_start,
from_global_end = from_global_start + count;
int to_global_start = dest_pos, to_global_end = dest_pos + count;
int to_local_start = to_global_start - processor * n_over_p,
to_local_end = to_global_end - processor * n_over_p;
// printf("[%d] L ->> (count=%d), from local [%d..%d] {%d..%d} -to-> "
// "p#%d [%d..%d] {%d..%d}\n",
// rank, count, from_local_start, from_local_end,
// from_global_start, from_global_end, processor, to_local_start,
// to_local_end, to_global_start, to_global_end);
L_send_ctl[processor * CTL_SIZE] = count;
L_send_ctl[processor * CTL_SIZE + 1] = from_global_start;
L_send_ctl[processor * CTL_SIZE + 2] = to_local_start;
L_send_ctl[processor * CTL_SIZE + 3] = from_local_start;
i += count;
dest_pos += count;
processor += 1;
}
MPI_Alltoallv(L_send_ctl, ctl_send_counts, ctl_send_displs, MPI_INT, L_ctl,
recv_counts, recv_displs, MPI_INT, comm);
}
// After sending S and L information
for (int i = 0; i < p; ++i) {
recv_counts[i] = n_over_p;
recv_displs[i] = i * n_over_p;
}
// MPI_Alltoallv(integers, send_counts, send_displs, MPI_INT,
// integers_recv_buf,
// recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD);
// MPI_Allgather(integers, n_over_p, MPI_INT, integers_recv_buf, n_over_p,
// MPI_INT, comm);
// printf("[%d] ints: %s\n", rank, string_of_list(integers_recv_buf, n));
// Scheme for all send
int integers_recv_2[n_over_p];
int integers_recv_3[n_over_p];
for (int i = 0; i < n_over_p; ++i) {
integers_recv_2[i] = -1;
integers_recv_3[i] = integers[i];
}
for (int host_p = 0; host_p < p; ++host_p) {
if (rank == host_p) {
// Your {S,L}_ctl is a mapping from source_processor -> ctl
// Everyone already knows who needs to send to who now
for (int sender_p = 0; sender_p < p; ++sender_p) {
int S_count = S_ctl[sender_p * CTL_SIZE];
if (S_count > 0) {
int to_local_start = S_ctl[sender_p * CTL_SIZE + 2];
int from_local_start = S_ctl[sender_p * CTL_SIZE + 3];
if (sender_p == host_p) {
for (int k = 0; k < S_count; ++k) {
integers_recv_3[to_local_start + k] =
integers[from_local_start + k];
}
continue;
}
// printf("[%d] - S inbound from host %d to [%d..%d] (%d)\n", rank,
// sender_p, to_local_start, to_local_start + S_count,
// S_count);
MPI_Recv(&integers_recv_2[to_local_start], S_count, MPI_INT, sender_p,
124, comm, MPI_STATUS_IGNORE);
for (int k = 0; k < S_count; ++k) {
integers_recv_3[to_local_start + k] =
integers_recv_2[to_local_start + k];
}
}
}
} else {
// Your {S,L}_send_ctl contains a mapping from dest_processor -> ctl
for (int dest_p = 0; dest_p < p; ++dest_p) {
int S_count = S_send_ctl[dest_p * CTL_SIZE];
if (S_count > 0 && dest_p == host_p) {
int from_local_start = S_send_ctl[dest_p * CTL_SIZE + 3];
// printf("[%d] - S outbound to host %d from [%d..%d] (%d)\n", rank,
// dest_p, from_local_start, from_local_start + S_count,
// S_count);
MPI_Send(&integers[from_local_start], S_count, MPI_INT, dest_p, 124,
comm);
}
}
}
}
for (int host_p = 0; host_p < p; ++host_p) {
if (rank == host_p) {
// Your {S,L}_ctl is a mapping from source_processor -> ctl
// Everyone already knows who needs to send to who now
for (int sender_p = 0; sender_p < p; ++sender_p) {
int L_count = L_ctl[sender_p * CTL_SIZE];
if (L_count > 0) {
int to_local_start = L_ctl[sender_p * CTL_SIZE + 2];
int from_local_start = L_ctl[sender_p * CTL_SIZE + 3];
if (sender_p == host_p) {
for (int k = 0; k < L_count; ++k) {
integers_recv_3[to_local_start + k] =
integers[from_local_start + k];
}
continue;
}
// printf("[%d] - L inbound from host %d to [%d..%d] (%d)\n", rank,
// sender_p, to_local_start, to_local_start + L_count,
// L_count);
MPI_Recv(&integers_recv_2[to_local_start], L_count, MPI_INT, sender_p,
125, comm, MPI_STATUS_IGNORE);
for (int k = 0; k < L_count; ++k) {
integers_recv_3[to_local_start + k] =
integers_recv_2[to_local_start + k];
}
}
}
} else {
// Your {S,L}_send_ctl contains a mapping from dest_processor -> ctl
for (int dest_p = 0; dest_p < p; ++dest_p) {
int L_count = L_send_ctl[dest_p * CTL_SIZE];
if (L_count > 0 && dest_p == host_p) {
int from_local_start = L_send_ctl[dest_p * CTL_SIZE + 3];
// printf("[%d] - L outbound to host %d from [%d..%d] (%d)\n", rank,
// dest_p, from_local_start, from_local_start + L_count,
// L_count);
MPI_Send(&integers[from_local_start], L_count, MPI_INT, dest_p, 125,
comm);
}
}
}
}
printf("[%d] after: %s\n", rank, string_of_list(integers_recv_3, n_over_p));
for (int i = 0; i < n_over_p; ++i) {
integers[i] = integers_recv_3[i];
}
// Now, determine which processes should be responsible for taking the S and L
// arrays
// Specifically, the part where it's split, break the tie to see if it goes
// down or up
int colors[p];
int p_of_split = S_global_max_end / n_over_p;
int split_point = S_global_max_end % n_over_p;
// printf("[%d] p_of_split = %d / %d = %d\n", rank, S_global_max_end,
// n_over_p,
// p_of_split);
int S_split_add = split_point, L_split_sub = n_over_p - split_point;
int lo_start = 0, lo_end;
int hi_start, hi_end = p;
if (split_point > n_over_p / 2) {
// Belongs to the lower group
lo_end = hi_start = p_of_split + 1;
} else {
// Belongs to the higher group
lo_end = hi_start = p_of_split;
}
int child_root = -1;
for (int i = 0; i < p; ++i) {
if (i < lo_end)
colors[i] = 100;
else {
colors[i] = 200;
if (child_root == -1)
child_root = i;
}
}
// MPI_Comm child;
// MPI_Comm_split(comm, colors[rank], rank, &child);
// printf("[%d] Recursing...\n", rank);
// int child_size;
// MPI_Comm_size(child, &child_size);
// int start_at = 0, new_n = child_size * n_over_p;
// if (colors[rank] == 100) {
// new_n += S_split_add;
// } else {
// new_n -= L_split_sub;
// if (rank == p_of_split)
// start_at = split_point;
// }
// recursive_quicksort(integers, n, child_root, child);
// printf("[%d] Done recursing.\n", rank);
// MPI_Comm_free(&child);
}
void init_ctl(int *ctl, int len) {
for (int i = 0; i < len; ++i) {
for (int j = 0; j < CTL_SIZE; ++j) {
ctl[i * CTL_SIZE + j] = -1;
}
}
}