#include #include #include #include #define ORDER_FORWARDS 1 #define ORDER_BACKWARDS 2 #define CTL_SIZE 3 #define GENERIC_MAX(x, y) ((x) > (y) ? (x) : (y)) #define GENERIC_MIN(x, y) ((x) < (y) ? (x) : (y)) #define ENSURE_int(i) _Generic((i), int : (i)) #define ENSURE_float(f) _Generic((f), float : (f)) #define MAX(type, x, y) (type) GENERIC_MAX(ENSURE_##type(x), ENSURE_##type(y)) #define MIN(type, x, y) (type) GENERIC_MIN(ENSURE_##type(x), ENSURE_##type(y)) void local_quicksort(int *arr, int lo, int hi); char *string_of_list(int *arr, int len); void recursive_quicksort(int *integers, int n, MPI_Comm comm); int main(int argc, char **argv) { int rank, p; MPI_Init(&argc, &argv); int n = atoi(argv[1]); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &p); // Generate integers int n_over_p = n / p; int integers[n_over_p]; // Important implementation detail: srand(0) is specially handled by glibc to // behave as if it was called with srand(1). To get around this, I'm seeding // with rank + 1 // // See more: https://stackoverflow.com/a/27386563 srand(rank + 1); for (int i = 0; i < n_over_p; ++i) { // TODO: For readability during debugging, I'm capping this integers[i] = rand() % 101; // printf(" - %d\n", integers[i]); } recursive_quicksort(integers, n, MPI_COMM_WORLD); sleep(1); printf("[%d] after: %s\n", rank, string_of_list(integers, n_over_p)); // The first node is responsible for collecting all the data and then // printing it out to the file MPI_Gather(const void *sendbuf, int // sendcount, MPI_INT, void *recvbuf, // int recvcount, MPI_INT, 0, MPI_COMM_WORLD); if (rank == 0) { FILE *f = fopen(argv[2], "w"); fclose(f); } MPI_Finalize(); printf("Done.\n"); return 0; } // hi not inclusive void local_quicksort(int *arr, int lo, int hi) { int temp; if (lo >= hi || lo < 0) return; int pivot = arr[hi - 1]; int pivot_idx = lo - 1; for (int j = lo; j < hi; ++j) { if (arr[j] < pivot) { pivot_idx += 1; temp = arr[j]; arr[j] = arr[pivot_idx]; arr[pivot_idx] = temp; } } pivot_idx += 1; temp = arr[hi - 1]; arr[hi - 1] = arr[pivot_idx]; arr[pivot_idx] = temp; // Recursive call local_quicksort(arr, lo, pivot_idx); local_quicksort(arr, pivot_idx + 1, hi); } char *string_of_list(int *arr, int len) { char *buffer = calloc(sizeof(char), 1000); int offset = 0; // Keep track of the current position in the buffer for (int i = 0; i < len; i++) { offset += sprintf(buffer + offset, "%d", arr[i]); if (i < len - 1) { // Add a separator (e.g., comma or space) if it's not the last element offset += sprintf(buffer + offset, " "); } } return buffer; } void recursive_quicksort(int *integers, int n, MPI_Comm comm) { int rank, p; MPI_Comm_size(comm, &p); MPI_Comm_rank(comm, &rank); if (p == 1) { // Recursion base case: just sort it serially local_quicksort(integers, 0, n); return; } int n_over_p = n / p; // Locally sort // printf("[%d] Numbers before: %s\n", rank, // string_of_list(integers, n_over_p)); local_quicksort(integers, 0, n_over_p); printf("[%d] Numbers after first sort: %s\n", rank, string_of_list(integers, n_over_p)); // Select a pivot. // This pivot is broadcasted to all nodes int pivot; // The pivot is selected as the median (see chp. 9.4.4) // Not the real median though, need an existing element of the array pivot = integers[n_over_p / 2]; MPI_Bcast(&pivot, 1, MPI_INT, 0, MPI_COMM_WORLD); // printf("--- Broadcasted pivot: %d ---\n", pivot); // Determine where the boundary between S (lower) and L (higher) lies int boundary; for (int i = 0; i < n_over_p; ++i) { if (integers[i] >= pivot) { boundary = i; break; } } int S_lo = 0, S_hi = boundary; int L_lo = boundary, L_hi = n_over_p; int S_size = S_hi - S_lo, L_size = L_hi - L_lo; printf("[%d] S: [%d - %d] (%d), L: [%d - %d] (%d)\n", rank, S_lo, S_hi, S_size, L_lo, L_hi, L_size); // Perform global arrangement int S_global_end, L_reverse_end, S_global_max_end; MPI_Scan(&S_size, &S_global_end, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Scan(&L_size, &L_reverse_end, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); // printf("[%d] bruh %d\n", rank, S_global_end); MPI_Reduce(&S_global_end, &S_global_max_end, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); int S_global_start = S_global_end - S_size, L_reverse_start = L_reverse_end - L_size, L_global_start = n - L_reverse_end, L_global_end = n - L_reverse_start; printf("[%d] Prefixed S: [%d - %d], Prefixed L: [%d - %d]\n", rank, S_global_start, S_global_end - 1, L_global_start, L_global_end - 1); int S_starting_process = S_global_start / n_over_p, L_starting_process = L_global_start / n_over_p; int S_offset = S_global_start % n_over_p, L_offset = L_global_start % n_over_p; int *integers_recv_buf = calloc(sizeof(int), n); int S_ctl[p * CTL_SIZE]; int L_ctl[p * CTL_SIZE]; int recvpart[n_over_p]; int send_ctl[p * CTL_SIZE]; int ctl_send_counts[p]; int ctl_send_displs[p]; int send_counts[p]; int send_displs[p]; int recv_counts[p]; int recv_displs[p]; for (int i = 0; i < p; ++i) { send_counts[i] = n_over_p; send_displs[i] = i * n_over_p; S_ctl[i * CTL_SIZE] = 0; S_ctl[i * CTL_SIZE + 1] = -1; S_ctl[i * CTL_SIZE + 2] = -1; L_ctl[i * CTL_SIZE] = 0; L_ctl[i * CTL_SIZE + 1] = -1; L_ctl[i * CTL_SIZE + 2] = -1; ctl_send_counts[i] = CTL_SIZE; ctl_send_displs[i] = i * CTL_SIZE; recv_counts[i] = CTL_SIZE; recv_displs[i] = i * CTL_SIZE; } // Send S to the correct target { for (int i = 0; i < p; ++i) { send_ctl[i * CTL_SIZE] = 0; send_ctl[i * CTL_SIZE + 1] = -1; send_ctl[i * CTL_SIZE + 2] = -1; } for (int i = S_lo, dest_pos = S_global_start, processor = S_starting_process; i < S_hi;) { int next_break = MIN(int, S_global_end, MIN(int, dest_pos + (S_hi - S_lo), (dest_pos / n_over_p) * n_over_p + n_over_p)); int count = next_break - dest_pos; int from_local_start = i, from_local_end = i + count; int from_global_start = rank * n_over_p + from_local_start, from_global_end = from_global_start + count; int to_global_start = dest_pos, to_global_end = dest_pos + count; int to_local_start = to_global_start - processor * n_over_p, to_local_end = to_global_end - processor * n_over_p; printf("[%d] S ->> (count=%d), from local [%d..%d] {%d..%d} -to-> " "p#%d [%d..%d] {%d..%d}\n", rank, count, from_local_start, from_local_end, from_global_start, from_global_end, processor, to_local_start, to_local_end, to_global_start, to_global_end); send_ctl[processor * CTL_SIZE] = count; send_ctl[processor * CTL_SIZE + 1] = from_global_start; send_ctl[processor * CTL_SIZE + 2] = to_local_start; i += count; dest_pos += count; processor += 1; } MPI_Alltoallv(send_ctl, ctl_send_counts, ctl_send_displs, MPI_INT, S_ctl, recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD); } // Send L to the correct target { for (int i = 0; i < p; ++i) { send_ctl[i * CTL_SIZE] = 0; send_ctl[i * CTL_SIZE + 1] = -1; send_ctl[i * CTL_SIZE + 2] = -1; } for (int i = L_lo, dest_pos = L_global_start, processor = L_starting_process; i < L_hi;) { int next_break = MIN(int, L_global_end, MIN(int, dest_pos + (L_hi - L_lo), (dest_pos / n_over_p) * n_over_p + n_over_p)); int count = next_break - dest_pos; int from_local_start = i, from_local_end = i + count; int from_global_start = rank * n_over_p + from_local_start, from_global_end = from_global_start + count; int to_global_start = dest_pos, to_global_end = dest_pos + count; int to_local_start = to_global_start - processor * n_over_p, to_local_end = to_global_end - processor * n_over_p; printf("[%d] L ->> (count=%d), from local [%d..%d] {%d..%d} -to-> " "p#%d [%d..%d] {%d..%d}\n", rank, count, from_local_start, from_local_end, from_global_start, from_global_end, processor, to_local_start, to_local_end, to_global_start, to_global_end); send_ctl[processor * CTL_SIZE] = count; send_ctl[processor * CTL_SIZE + 1] = from_global_start; send_ctl[processor * CTL_SIZE + 2] = to_local_start; i += count; dest_pos += count; processor += 1; } MPI_Alltoallv(send_ctl, ctl_send_counts, ctl_send_displs, MPI_INT, L_ctl, recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD); } // After sending S and L information for (int i = 0; i < p; ++i) { recv_counts[i] = n_over_p; recv_displs[i] = i * n_over_p; } // MPI_Alltoallv(integers, send_counts, send_displs, MPI_INT, // integers_recv_buf, // recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(integers, n_over_p, MPI_INT, integers_recv_buf, n_over_p, MPI_INT, MPI_COMM_WORLD); // printf("[%d] ints: %s\n", rank, string_of_list(integers_recv_buf, n)); for (int i = 0; i < p; ++i) { int count = S_ctl[i * CTL_SIZE]; int from_global_start = S_ctl[i * CTL_SIZE + 1]; int to_local_start = S_ctl[i * CTL_SIZE + 2]; if (count > 0) { printf( "[%d] <<- S received (%d) from processor %d {%d..%d} to [%d..%d]\n", rank, count, i, from_global_start, from_global_start + count, to_local_start, to_local_start + count); for (int j = 0; j < count; ++j) { integers[to_local_start + j] = integers_recv_buf[from_global_start + j]; } } } for (int i = 0; i < p; ++i) { int count = L_ctl[i * CTL_SIZE]; int from_global_start = L_ctl[i * CTL_SIZE + 1]; int to_local_start = L_ctl[i * CTL_SIZE + 2]; if (count > 0) { printf( "[%d] <<- S received (%d) from processor %d {%d..%d} to [%d..%d]\n", rank, count, i, from_global_start, from_global_start + count, to_local_start, to_local_start + count); for (int j = 0; j < count; ++j) { integers[to_local_start + j] = integers_recv_buf[from_global_start + j]; } } } // Now, determine which processes should be responsible for taking the S and L // arrays // Specifically, the part where it's split, break the tie to see if it goes // down or up int colors[p]; if (rank == 0) { int p_of_split = S_global_max_end / n_over_p; int split_point = S_global_max_end % n_over_p; printf("[%d] shiet %d\n", rank, p_of_split); int lo_start = 0, lo_end; int hi_start, hi_end = p; if (split_point > n_over_p / 2) { // Belongs to the lower group lo_end = hi_start = p_of_split + 1; } else { // Belongs to the higher group lo_end = hi_start = p_of_split; } for (int i = 0; i < p; ++i) { if (i < lo_end) colors[i] = 100; else colors[i] = 200; } } MPI_Comm child; MPI_Comm_split(comm, colors[rank], rank, &child); printf("[%d] Recursing...\n", rank); MPI_Comm_free(&child); }