From fa08589cc771569ee894fad049583de1ca10dc7d Mon Sep 17 00:00:00 2001 From: Michael Zhang Date: Mon, 30 Oct 2023 09:09:03 +0000 Subject: [PATCH] holy SHIT it works --- assignments/02/Makefile | 4 +- assignments/02/process_output.py | 24 ++++ assignments/02/qs_mpi.c | 208 ++++++++++++++++++++++++------- 3 files changed, 188 insertions(+), 48 deletions(-) create mode 100644 assignments/02/process_output.py diff --git a/assignments/02/Makefile b/assignments/02/Makefile index d6919cd..cc44f17 100644 --- a/assignments/02/Makefile +++ b/assignments/02/Makefile @@ -2,8 +2,8 @@ CC := cc -CFLAGS := -LDFLAGS := +CFLAGS := -g +LDFLAGS := -g CFLAGS += $(shell pkg-config --cflags mpi) LDFLAGS += $(shell pkg-config --libs mpi) diff --git a/assignments/02/process_output.py b/assignments/02/process_output.py new file mode 100644 index 0000000..1164ae4 --- /dev/null +++ b/assignments/02/process_output.py @@ -0,0 +1,24 @@ +import sys +import re + +pat = re.compile(r"\[(\d+)\] (.*)") + +outputs = {} + +for line in sys.stdin.readlines(): + m = pat.match(line) + if not m: + # print(line) + continue + p = int(m.group(1)) + rest = m.group(2) + + if p not in outputs: outputs[p] = [] + outputs[p].append(rest) + +for p in sorted(outputs.keys()): + lines = outputs[p] + print(f"---- {p} ----") + for line in lines: + print(line) + print() \ No newline at end of file diff --git a/assignments/02/qs_mpi.c b/assignments/02/qs_mpi.c index ce84d06..1ee0bb6 100644 --- a/assignments/02/qs_mpi.c +++ b/assignments/02/qs_mpi.c @@ -1,9 +1,11 @@ #include #include #include +#include #define ORDER_FORWARDS 1 #define ORDER_BACKWARDS 2 +#define CTL_SIZE 3 #define GENERIC_MAX(x, y) ((x) > (y) ? (x) : (y)) #define GENERIC_MIN(x, y) ((x) < (y) ? (x) : (y)) @@ -46,8 +48,8 @@ int main(int argc, char **argv) { int group_root = 0; // Locally sort - printf("[%d] Numbers before: %s\n", rank, - string_of_list(integers, n_over_p)); + // printf("[%d] Numbers before: %s\n", rank, + // string_of_list(integers, n_over_p)); local_quicksort(integers, 0, n_over_p); printf("[%d] Numbers after first sort: %s\n", rank, string_of_list(integers, n_over_p)); @@ -60,8 +62,7 @@ int main(int argc, char **argv) { // Not the real median though, need an existing element of the array pivot = integers[n_over_p / 2]; MPI_Bcast(&pivot, 1, MPI_INT, 0, MPI_COMM_WORLD); - - printf("Median: %d\n", pivot); + printf("--- Broadcasted pivot: %d ---\n", pivot); // Determine where the boundary between S (lower) and L (higher) lies int boundary; @@ -71,9 +72,9 @@ int main(int argc, char **argv) { break; } } - int S_lo = 0, S_hi = boundary - 1; - int L_lo = boundary, L_hi = n_over_p - 1; - int S_size = S_hi - S_lo + 1, L_size = L_hi - L_lo + 1; + int S_lo = 0, S_hi = boundary; + int L_lo = boundary, L_hi = n_over_p; + int S_size = S_hi - S_lo, L_size = L_hi - L_lo; printf("[%d] S: [%d - %d] (%d), L: [%d - %d] (%d)\n", rank, S_lo, S_hi, S_size, L_lo, L_hi, L_size); @@ -85,64 +86,179 @@ int main(int argc, char **argv) { int S_global_start = S_global_end - S_size, L_reverse_start = L_reverse_end - L_size, L_global_start = n - L_reverse_end, L_global_end = n - L_reverse_start; - printf("[%d] S: [%d - %d], L: [%d - %d]\n", rank, S_global_start, - S_global_end - 1, L_global_start, L_global_end - 1); + printf("[%d] Prefixed S: [%d - %d], Prefixed L: [%d - %d]\n", rank, + S_global_start, S_global_end - 1, L_global_start, L_global_end - 1); + int S_starting_process = S_global_start / n_over_p, + L_starting_process = L_global_start / n_over_p; + int S_offset = S_global_start % n_over_p, + L_offset = L_global_start % n_over_p; + + int *integers_recv_buf = calloc(sizeof(int), n); + int S_ctl[p * CTL_SIZE]; + int L_ctl[p * CTL_SIZE]; + int recvpart[n_over_p]; + int send_ctl[p * CTL_SIZE]; + int ctl_send_counts[p]; + int ctl_send_displs[p]; + + int send_counts[p]; + int send_displs[p]; + int recv_counts[p]; + int recv_displs[p]; + for (int i = 0; i < p; ++i) { + send_counts[i] = n_over_p; + send_displs[i] = i * n_over_p; + + S_ctl[i * CTL_SIZE] = 0; + S_ctl[i * CTL_SIZE + 1] = -1; + S_ctl[i * CTL_SIZE + 2] = -1; + L_ctl[i * CTL_SIZE] = 0; + L_ctl[i * CTL_SIZE + 1] = -1; + L_ctl[i * CTL_SIZE + 2] = -1; + + ctl_send_counts[i] = CTL_SIZE; + ctl_send_displs[i] = i * CTL_SIZE; + recv_counts[i] = CTL_SIZE; + recv_displs[i] = i * CTL_SIZE; + } + + // Send S to the correct target { - int recvpart[n_over_p]; - int send_counts[p]; - int send_displs[p]; for (int i = 0; i < p; ++i) { - send_counts[i] = 0; - send_displs[i] = 0; + send_ctl[i * CTL_SIZE] = 0; + send_ctl[i * CTL_SIZE + 1] = -1; + send_ctl[i * CTL_SIZE + 2] = -1; } - // Send it to the correct target - int S_starting_process = S_global_start / n_over_p, - L_starting_process = L_global_start / n_over_p; - int S_offset = S_global_start % n_over_p, - L_offset = L_global_start % n_over_p; - for (int i = S_lo, dest_pos = S_global_start, processor = S_starting_process; i < S_hi;) { - int next_break = MIN(int, dest_pos + (S_hi - S_lo), - (dest_pos / n_over_p) * n_over_p + n_over_p); + int next_break = MIN(int, S_global_end, + MIN(int, dest_pos + (S_hi - S_lo), + (dest_pos / n_over_p) * n_over_p + n_over_p)); int count = next_break - dest_pos; - int local_start = i, local_end = i + count; - int dest_start = dest_pos, dest_end = dest_pos + count; + int from_local_start = i, from_local_end = i + count; + int from_global_start = rank * n_over_p + from_local_start, + from_global_end = from_global_start + count; - printf("[%d] copying from S, local[%d..%d] to dest #%d [%d..%d]\n", rank, - local_start, local_end, processor, dest_start, dest_end); - send_counts[processor] = count; + int to_global_start = dest_pos, to_global_end = dest_pos + count; + int to_local_start = to_global_start - processor * n_over_p, + to_local_end = to_global_end - processor * n_over_p; - // int recvbuf[count]; - // MPI_Sendrecv(&integers[local_start], count, MPI_INT, processor, 123, - // recvbuf, count, MPI_INT, rank, 123, MPI_COMM_WORLD, - // MPI_STATUS_IGNORE); + printf("[%d] S ->> (count=%d), from local [%d..%d] {%d..%d} -to-> " + "p#%d [%d..%d] {%d..%d}\n", + rank, count, from_local_start, from_local_end, from_global_start, + from_global_end, processor, to_local_start, to_local_end, + to_global_start, to_global_end); + send_ctl[processor * CTL_SIZE] = count; + send_ctl[processor * CTL_SIZE + 1] = from_global_start; + send_ctl[processor * CTL_SIZE + 2] = to_local_start; i += count; dest_pos += count; processor += 1; } - for (int i = 1; i < p; ++i) { - send_displs[i] = send_displs[i - 1] + send_counts[i - 1]; - } - - // MPI_Alltoallv(integers, send_counts, send_displs, MPI_INT, recvpart, - // const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, - // MPI_Comm comm) - - printf("[%d] send_counts: %s\n", rank, string_of_list(send_counts, p)); - printf("[%d] send_displs: %s\n", rank, string_of_list(send_displs, p)); - printf("[%d] after: %s\n", rank, string_of_list(recvpart, n_over_p)); + MPI_Alltoallv(send_ctl, ctl_send_counts, ctl_send_displs, MPI_INT, S_ctl, + recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD); } - // The first node is responsible for collecting all the data and then printing - // it out to the file - // MPI_Gather(const void *sendbuf, int sendcount, MPI_INT, void *recvbuf, + // Send L to the correct target + { + for (int i = 0; i < p; ++i) { + send_ctl[i * CTL_SIZE] = 0; + send_ctl[i * CTL_SIZE + 1] = -1; + send_ctl[i * CTL_SIZE + 2] = -1; + } + + for (int i = L_lo, dest_pos = L_global_start, + processor = L_starting_process; + i < L_hi;) { + int next_break = MIN(int, L_global_end, + MIN(int, dest_pos + (L_hi - L_lo), + (dest_pos / n_over_p) * n_over_p + n_over_p)); + int count = next_break - dest_pos; + + int from_local_start = i, from_local_end = i + count; + int from_global_start = rank * n_over_p + from_local_start, + from_global_end = from_global_start + count; + + int to_global_start = dest_pos, to_global_end = dest_pos + count; + int to_local_start = to_global_start - processor * n_over_p, + to_local_end = to_global_end - processor * n_over_p; + + printf("[%d] L ->> (count=%d), from local [%d..%d] {%d..%d} -to-> " + "p#%d [%d..%d] {%d..%d}\n", + rank, count, from_local_start, from_local_end, from_global_start, + from_global_end, processor, to_local_start, to_local_end, + to_global_start, to_global_end); + send_ctl[processor * CTL_SIZE] = count; + send_ctl[processor * CTL_SIZE + 1] = from_global_start; + send_ctl[processor * CTL_SIZE + 2] = to_local_start; + + i += count; + dest_pos += count; + processor += 1; + } + + MPI_Alltoallv(send_ctl, ctl_send_counts, ctl_send_displs, MPI_INT, L_ctl, + recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD); + } + + // After sending S and L information + + for (int i = 0; i < p; ++i) { + recv_counts[i] = n_over_p; + recv_displs[i] = i * n_over_p; + } + + // MPI_Alltoallv(integers, send_counts, send_displs, MPI_INT, + // integers_recv_buf, + // recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD); + MPI_Allgather(integers, n_over_p, MPI_INT, integers_recv_buf, n_over_p, + MPI_INT, MPI_COMM_WORLD); + // printf("[%d] ints: %s\n", rank, string_of_list(integers_recv_buf, n)); + + for (int i = 0; i < p; ++i) { + int count = S_ctl[i * CTL_SIZE]; + int from_global_start = S_ctl[i * CTL_SIZE + 1]; + int to_local_start = S_ctl[i * CTL_SIZE + 2]; + + if (count > 0) { + printf( + "[%d] <<- S received (%d) from processor %d {%d..%d} to [%d..%d]\n", + rank, count, i, from_global_start, from_global_start + count, + to_local_start, to_local_start + count); + for (int j = 0; j < count; ++j) { + integers[to_local_start + j] = integers_recv_buf[from_global_start + j]; + } + } + } + + for (int i = 0; i < p; ++i) { + int count = L_ctl[i * CTL_SIZE]; + int from_global_start = L_ctl[i * CTL_SIZE + 1]; + int to_local_start = L_ctl[i * CTL_SIZE + 2]; + + if (count > 0) { + printf( + "[%d] <<- S received (%d) from processor %d {%d..%d} to [%d..%d]\n", + rank, count, i, from_global_start, from_global_start + count, + to_local_start, to_local_start + count); + for (int j = 0; j < count; ++j) { + integers[to_local_start + j] = integers_recv_buf[from_global_start + j]; + } + } + } + + sleep(1); + printf("[%d] after: %s\n", rank, string_of_list(integers, n_over_p)); + + // The first node is responsible for collecting all the data and then + // printing it out to the file MPI_Gather(const void *sendbuf, int + // sendcount, MPI_INT, void *recvbuf, // int recvcount, MPI_INT, 0, MPI_COMM_WORLD); if (rank == 0) { FILE *f = fopen(argv[2], "w"); @@ -184,7 +300,7 @@ void local_quicksort(int *arr, int lo, int hi) { } char *string_of_list(int *arr, int len) { - char *buffer = malloc(1000); + char *buffer = calloc(sizeof(char), 1000); int offset = 0; // Keep track of the current position in the buffer for (int i = 0; i < len; i++) { offset += sprintf(buffer + offset, "%d", arr[i]);