csci5451/assignments/02/qs_mpi.c

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

#define ORDER_FORWARDS 1
#define ORDER_BACKWARDS 2

#define GENERIC_MAX(x, y) ((x) > (y) ? (x) : (y))
#define GENERIC_MIN(x, y) ((x) < (y) ? (x) : (y))

#define ENSURE_int(i) _Generic((i), int : (i))
#define ENSURE_float(f) _Generic((f), float : (f))

#define MAX(type, x, y) (type) GENERIC_MAX(ENSURE_##type(x), ENSURE_##type(y))
#define MIN(type, x, y) (type) GENERIC_MIN(ENSURE_##type(x), ENSURE_##type(y))

void local_quicksort(int *arr, int lo, int hi);
char *string_of_list(int *arr, int len);

int main(int argc, char **argv) {
  int rank, p;
  MPI_Init(&argc, &argv);

  int n = atoi(argv[1]);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &p);

  // Generate integers
  int n_over_p = n / p;
  int integers[n_over_p];

  // Important implementation detail: srand(0) is specially handled by glibc to
  // behave as if it was called with srand(1). To get around this, I'm seeding
  // with rank + 1
  //
  // See more: https://stackoverflow.com/a/27386563
  srand(rank + 1);

  for (int i = 0; i < n_over_p; ++i) {
    // TODO: For readability during debugging, I'm capping this
    integers[i] = rand() % 101;
    // printf(" - %d\n", integers[i]);
  }

  int group_root = 0;

  // Locally sort
  printf("[%d] Numbers before:           %s\n", rank,
         string_of_list(integers, n_over_p));
  local_quicksort(integers, 0, n_over_p);
  printf("[%d] Numbers after first sort: %s\n", rank,
         string_of_list(integers, n_over_p));

  // Select a pivot.
  // This pivot is broadcasted to all nodes
  int pivot;

  // The pivot is selected as the median (see chp. 9.4.4)
  // Not the real median though, need an existing element of the array
  pivot = integers[n_over_p / 2];
  MPI_Bcast(&pivot, 1, MPI_INT, 0, MPI_COMM_WORLD);

  printf("Median: %d\n", pivot);

  // Determine where the boundary between S (lower) and L (higher) lies
  int boundary;
  for (int i = 0; i < n_over_p; ++i) {
    if (integers[i] >= pivot) {
      boundary = i;
      break;
    }
  }
  int S_lo = 0, S_hi = boundary - 1;
  int L_lo = boundary, L_hi = n_over_p - 1;
  int S_size = S_hi - S_lo + 1, L_size = L_hi - L_lo + 1;
  printf("[%d] S: [%d - %d] (%d), L: [%d - %d] (%d)\n", rank, S_lo, S_hi,
         S_size, L_lo, L_hi, L_size);

  // Perform global arrangement
  int S_global_end, L_reverse_end;
  MPI_Scan(&S_size, &S_global_end, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
  MPI_Scan(&L_size, &L_reverse_end, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  int S_global_start = S_global_end - S_size,
      L_reverse_start = L_reverse_end - L_size,
      L_global_start = n - L_reverse_end, L_global_end = n - L_reverse_start;
  printf("[%d] S: [%d - %d], L: [%d - %d]\n", rank, S_global_start,
         S_global_end - 1, L_global_start, L_global_end - 1);

  // TODO: This doesn't work :skull: substituting it for a slower method until i
  // Figure it out tomorrow
  if (0) {
    // Send it to the correct target
    int S_starting_process = S_global_start / n_over_p,
        L_starting_process = L_global_start / n_over_p;
    int S_offset = S_global_start % n_over_p,
        L_offset = L_global_start % n_over_p;

    for (int i = S_lo, dest_pos = S_global_start,
             processor = S_starting_process;
         i < S_hi;) {
      int next_break = MIN(int, dest_pos + (S_hi - S_lo),
                           (dest_pos / n_over_p) * n_over_p + n_over_p);
      int count = next_break - dest_pos;

      int local_start = i, local_end = i + count;
      int dest_start = dest_pos, dest_end = dest_pos + count;

      printf("[%d] copying from S, local[%d..%d] to dest #%d [%d..%d]\n", rank,
             local_start, local_end, processor, dest_start, dest_end);

      int recvbuf[count];
      MPI_Sendrecv(&integers[local_start], count, MPI_INT, processor, 123,
                   recvbuf, count, MPI_INT, rank, 123, MPI_COMM_WORLD,
                   MPI_STATUS_IGNORE);

      i += count;
      dest_pos += count;
      processor += 1;
    }

    for (int i = L_lo, dest_pos = L_global_start,
             processor = L_starting_process;
         i < L_hi;) {
      int next_break = MIN(int, L_global_end,
                           MIN(int, dest_pos + (L_hi - L_lo),
                               (dest_pos / n_over_p) * n_over_p + n_over_p));
      int count = next_break - dest_pos;

      int local_start = i, local_end = i + count;
      int dest_start = dest_pos, dest_end = dest_pos + count;

      printf("[%d] copying from L, local[%d..%d] to dest #%d [%d..%d]\n", rank,
             local_start, local_end, processor, dest_start, dest_end);

      i += count;
      dest_pos += count;
      processor += 1;
    }
  }

  if (0) {
    int *fucked = calloc(sizeof(int), n);

    // Send it to the correct target
    int S_starting_process = S_global_start / n_over_p,
        L_starting_process = L_global_start / n_over_p;
    int S_offset = S_global_start % n_over_p,
        L_offset = L_global_start % n_over_p;

    for (int i = S_lo, dest_pos = S_global_start,
             processor = S_starting_process;
         i < S_hi;) {
      int next_break = MIN(int, dest_pos + (S_hi - S_lo),
                           (dest_pos / n_over_p) * n_over_p + n_over_p);
      int count = next_break - dest_pos;

      int local_start = i, local_end = i + count;
      int dest_start = dest_pos, dest_end = dest_pos + count;

      printf("[%d] copying from S, local[%d..%d] to dest #%d [%d..%d]\n", rank,
             local_start, local_end, processor, dest_start, dest_end);

      int recvbuf[count];
      // MPI_Sendrecv(&integers[local_start], count, MPI_INT, processor, 123,
      //              recvbuf, count, MPI_INT, rank, 123, MPI_COMM_WORLD,
      //              MPI_STATUS_IGNORE);
      for (int j = 0; j < count; ++j) {
        fucked[j] = integers[local_start + j];
      }
      // MPI_Scatter(&integers[local_start], count, MPI_INT,
      // &fucked[dest_start],
      //             count, MPI_INT, rank, MPI_COMM_WORLD);
      // MPI_Bcast(&dest_start, 1, MPI_INT, rank, MPI_COMM_WORLD);
      // printf("dest start is %d %d\n", dest_start, count);
      // MPI_Bcast(&fucked[dest_start], count, MPI_INT, rank, MPI_COMM_WORLD);
      // MPI_Alltoall(&fucked[dest_start], count, MPI_INT, &fucked[dest_start],
      //              count, MPI_INT, MPI_COMM_WORLD);
      // MPI_Allgather(&fucked[dest_start], count, MPI_INT, &fucked[dest_start],
      //               count, MPI_INT, MPI_COMM_WORLD);
      int recvs[p];
      int displs[p];
      MPI_Allgather(&count, 1, MPI_INT, &recvs, 1, MPI_INT, MPI_COMM_WORLD);
      displs[0] = 0;
      for (int j = 1; j < p; j++) {
        displs[j] = displs[j - 1] + recvs[j - 1];
      }
      MPI_Allgatherv(&fucked[dest_start], count, MPI_INT, &fucked[dest_start],
                     &recvs[0], &displs[0], MPI_INT, MPI_COMM_WORLD);
      // MPI_Alltoall(&fucked[local_start], count, MPI_INT,
      // &fucked[local_start], count, MPI_INT,
      //              MPI_COMM_WORLD);

      i += count;
      dest_pos += count;
      processor += 1;

      for (int i = 0; i < n; ++i) {
        printf("%d, ", fucked[i]);
      }
      printf("\n");
    }

    /*
        for (int i = L_lo, dest_pos = L_global_start,
                 processor = L_starting_process;
             i < L_hi;) {
          int next_break = MIN(int, L_global_end,
                               MIN(int, dest_pos + (L_hi - L_lo),
                                   (dest_pos / n_over_p) * n_over_p +
       n_over_p)); int count = next_break - dest_pos;

          int local_start = i, local_end = i + count;
          int dest_start = dest_pos, dest_end = dest_pos + count;

          printf("[%d] copying from L, local[%d..%d] to dest #%d [%d..%d]\n",
       rank, local_start, local_end, processor, dest_start, dest_end);

          i += count;
          dest_pos += count;
          processor += 1;
        }
        */
  }

  // The first node is responsible for collecting all the data and then printing
  // it out to the file
  // MPI_Gather(const void *sendbuf, int sendcount, MPI_INT, void *recvbuf,
  //            int recvcount, MPI_INT, 0, MPI_COMM_WORLD);
  if (rank == 0) {
    FILE *f = fopen(argv[2], "w");
    fclose(f);
  }

  MPI_Finalize();
  printf("Done.\n");
  return 0;
}

// hi not inclusive
void local_quicksort(int *arr, int lo, int hi) {
  int temp;

  if (lo >= hi || lo < 0)
    return;

  int pivot = arr[hi - 1];
  int pivot_idx = lo - 1;
  for (int j = lo; j < hi; ++j) {
    if (arr[j] < pivot) {
      pivot_idx += 1;

      temp = arr[j];
      arr[j] = arr[pivot_idx];
      arr[pivot_idx] = temp;
    }
  }

  pivot_idx += 1;
  temp = arr[hi - 1];
  arr[hi - 1] = arr[pivot_idx];
  arr[pivot_idx] = temp;

  // Recursive call
  local_quicksort(arr, lo, pivot_idx);
  local_quicksort(arr, pivot_idx + 1, hi);
}

char *string_of_list(int *arr, int len) {
  char *buffer = malloc(1000);
  int offset = 0; // Keep track of the current position in the buffer
  for (int i = 0; i < len; i++) {
    offset += sprintf(buffer + offset, "%d", arr[i]);
    if (i < len - 1) {
      // Add a separator (e.g., comma or space) if it's not the last element
      offset += sprintf(buffer + offset, " ");
    }
  }

  return buffer;
}
progress 2023-10-23 00:38:42 +00:00			`#include <mpi.h>`
progress 2023-10-29 21:34:22 +00:00			`#include <stdio.h>`
			`#include <stdlib.h>`

L 2023-10-30 03:04:21 +00:00			`#define ORDER_FORWARDS 1`
			`#define ORDER_BACKWARDS 2`

			`#define GENERIC_MAX(x, y) ((x) > (y) ? (x) : (y))`
			`#define GENERIC_MIN(x, y) ((x) < (y) ? (x) : (y))`

			`#define ENSURE_int(i) _Generic((i), int : (i))`
			`#define ENSURE_float(f) _Generic((f), float : (f))`

			`#define MAX(type, x, y) (type) GENERIC_MAX(ENSURE_##type(x), ENSURE_##type(y))`
			`#define MIN(type, x, y) (type) GENERIC_MIN(ENSURE_##type(x), ENSURE_##type(y))`

progress 2023-10-29 21:34:22 +00:00			`void local_quicksort(int *arr, int lo, int hi);`
			`char string_of_list(int arr, int len);`
progress 2023-10-23 00:38:42 +00:00
			`int main(int argc, char **argv) {`
progress 2023-10-29 21:34:22 +00:00			`int rank, p;`
progress 2023-10-23 00:38:42 +00:00			`MPI_Init(&argc, &argv);`
progress 2023-10-29 21:34:22 +00:00
			`int n = atoi(argv[1]);`

			`MPI_Comm_rank(MPI_COMM_WORLD, &rank);`
			`MPI_Comm_size(MPI_COMM_WORLD, &p);`

			`// Generate integers`
			`int n_over_p = n / p;`
			`int integers[n_over_p];`

			`// Important implementation detail: srand(0) is specially handled by glibc to`
			`// behave as if it was called with srand(1). To get around this, I'm seeding`
			`// with rank + 1`
			`//`
			`// See more: https://stackoverflow.com/a/27386563`
			`srand(rank + 1);`

			`for (int i = 0; i < n_over_p; ++i) {`
			`// TODO: For readability during debugging, I'm capping this`
			`integers[i] = rand() % 101;`
			`// printf(" - %d\n", integers[i]);`
			`}`

			`int group_root = 0;`

			`// Locally sort`
			`printf("[%d] Numbers before: %s\n", rank,`
			`string_of_list(integers, n_over_p));`
			`local_quicksort(integers, 0, n_over_p);`
			`printf("[%d] Numbers after first sort: %s\n", rank,`
			`string_of_list(integers, n_over_p));`

			`// Select a pivot.`
			`// This pivot is broadcasted to all nodes`
			`int pivot;`

			`// The pivot is selected as the median (see chp. 9.4.4)`
			`// Not the real median though, need an existing element of the array`
			`pivot = integers[n_over_p / 2];`
			`MPI_Bcast(&pivot, 1, MPI_INT, 0, MPI_COMM_WORLD);`

			`printf("Median: %d\n", pivot);`

			`// Determine where the boundary between S (lower) and L (higher) lies`
			`int boundary;`
			`for (int i = 0; i < n_over_p; ++i) {`
			`if (integers[i] >= pivot) {`
			`boundary = i;`
			`break;`
			`}`
			`}`
			`int S_lo = 0, S_hi = boundary - 1;`
			`int L_lo = boundary, L_hi = n_over_p - 1;`
			`int S_size = S_hi - S_lo + 1, L_size = L_hi - L_lo + 1;`
L 2023-10-30 03:04:21 +00:00			`printf("[%d] S: [%d - %d] (%d), L: [%d - %d] (%d)\n", rank, S_lo, S_hi,`
			`S_size, L_lo, L_hi, L_size);`
progress 2023-10-29 21:34:22 +00:00
			`// Perform global arrangement`
L 2023-10-30 03:04:21 +00:00			`int S_global_end, L_reverse_end;`
progress 2023-10-29 21:34:22 +00:00			`MPI_Scan(&S_size, &S_global_end, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);`
L 2023-10-30 03:04:21 +00:00			`MPI_Scan(&L_size, &L_reverse_end, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);`
progress 2023-10-29 21:34:22 +00:00
			`int S_global_start = S_global_end - S_size,`
L 2023-10-30 03:04:21 +00:00			`L_reverse_start = L_reverse_end - L_size,`
			`L_global_start = n - L_reverse_end, L_global_end = n - L_reverse_start;`
progress 2023-10-29 21:34:22 +00:00			`printf("[%d] S: [%d - %d], L: [%d - %d]\n", rank, S_global_start,`
			`S_global_end - 1, L_global_start, L_global_end - 1);`

fucked up 2023-10-30 06:58:07 +00:00			`// TODO: This doesn't work :skull: substituting it for a slower method until i`
			`// Figure it out tomorrow`
			`if (0) {`
			`// Send it to the correct target`
			`int S_starting_process = S_global_start / n_over_p,`
			`L_starting_process = L_global_start / n_over_p;`
			`int S_offset = S_global_start % n_over_p,`
			`L_offset = L_global_start % n_over_p;`

			`for (int i = S_lo, dest_pos = S_global_start,`
			`processor = S_starting_process;`
			`i < S_hi;) {`
			`int next_break = MIN(int, dest_pos + (S_hi - S_lo),`
			`(dest_pos / n_over_p) * n_over_p + n_over_p);`
			`int count = next_break - dest_pos;`

			`int local_start = i, local_end = i + count;`
			`int dest_start = dest_pos, dest_end = dest_pos + count;`

			`printf("[%d] copying from S, local[%d..%d] to dest #%d [%d..%d]\n", rank,`
			`local_start, local_end, processor, dest_start, dest_end);`

			`int recvbuf[count];`
			`MPI_Sendrecv(&integers[local_start], count, MPI_INT, processor, 123,`
			`recvbuf, count, MPI_INT, rank, 123, MPI_COMM_WORLD,`
			`MPI_STATUS_IGNORE);`

			`i += count;`
			`dest_pos += count;`
			`processor += 1;`
			`}`
L 2023-10-30 03:04:21 +00:00
fucked up 2023-10-30 06:58:07 +00:00			`for (int i = L_lo, dest_pos = L_global_start,`
			`processor = L_starting_process;`
			`i < L_hi;) {`
			`int next_break = MIN(int, L_global_end,`
			`MIN(int, dest_pos + (L_hi - L_lo),`
			`(dest_pos / n_over_p) * n_over_p + n_over_p));`
			`int count = next_break - dest_pos;`
L 2023-10-30 03:04:21 +00:00
fucked up 2023-10-30 06:58:07 +00:00			`int local_start = i, local_end = i + count;`
			`int dest_start = dest_pos, dest_end = dest_pos + count;`
L 2023-10-30 03:04:21 +00:00
fucked up 2023-10-30 06:58:07 +00:00			`printf("[%d] copying from L, local[%d..%d] to dest #%d [%d..%d]\n", rank,`
			`local_start, local_end, processor, dest_start, dest_end);`

			`i += count;`
			`dest_pos += count;`
			`processor += 1;`
			`}`
			`}`

			`if (0) {`
			`int *fucked = calloc(sizeof(int), n);`

			`// Send it to the correct target`
			`int S_starting_process = S_global_start / n_over_p,`
			`L_starting_process = L_global_start / n_over_p;`
			`int S_offset = S_global_start % n_over_p,`
			`L_offset = L_global_start % n_over_p;`

			`for (int i = S_lo, dest_pos = S_global_start,`
			`processor = S_starting_process;`
			`i < S_hi;) {`
			`int next_break = MIN(int, dest_pos + (S_hi - S_lo),`
			`(dest_pos / n_over_p) * n_over_p + n_over_p);`
			`int count = next_break - dest_pos;`

			`int local_start = i, local_end = i + count;`
			`int dest_start = dest_pos, dest_end = dest_pos + count;`

			`printf("[%d] copying from S, local[%d..%d] to dest #%d [%d..%d]\n", rank,`
			`local_start, local_end, processor, dest_start, dest_end);`

			`int recvbuf[count];`
			`// MPI_Sendrecv(&integers[local_start], count, MPI_INT, processor, 123,`
			`// recvbuf, count, MPI_INT, rank, 123, MPI_COMM_WORLD,`
			`// MPI_STATUS_IGNORE);`
			`for (int j = 0; j < count; ++j) {`
			`fucked[j] = integers[local_start + j];`
			`}`
			`// MPI_Scatter(&integers[local_start], count, MPI_INT,`
			`// &fucked[dest_start],`
			`// count, MPI_INT, rank, MPI_COMM_WORLD);`
			`// MPI_Bcast(&dest_start, 1, MPI_INT, rank, MPI_COMM_WORLD);`
			`// printf("dest start is %d %d\n", dest_start, count);`
			`// MPI_Bcast(&fucked[dest_start], count, MPI_INT, rank, MPI_COMM_WORLD);`
			`// MPI_Alltoall(&fucked[dest_start], count, MPI_INT, &fucked[dest_start],`
			`// count, MPI_INT, MPI_COMM_WORLD);`
			`// MPI_Allgather(&fucked[dest_start], count, MPI_INT, &fucked[dest_start],`
			`// count, MPI_INT, MPI_COMM_WORLD);`
			`int recvs[p];`
			`int displs[p];`
			`MPI_Allgather(&count, 1, MPI_INT, &recvs, 1, MPI_INT, MPI_COMM_WORLD);`
			`displs[0] = 0;`
			`for (int j = 1; j < p; j++) {`
			`displs[j] = displs[j - 1] + recvs[j - 1];`
			`}`
			`MPI_Allgatherv(&fucked[dest_start], count, MPI_INT, &fucked[dest_start],`
			`&recvs[0], &displs[0], MPI_INT, MPI_COMM_WORLD);`
			`// MPI_Alltoall(&fucked[local_start], count, MPI_INT,`
			`// &fucked[local_start], count, MPI_INT,`
			`// MPI_COMM_WORLD);`

			`i += count;`
			`dest_pos += count;`
			`processor += 1;`

			`for (int i = 0; i < n; ++i) {`
			`printf("%d, ", fucked[i]);`
			`}`
			`printf("\n");`
			`}`
L 2023-10-30 03:04:21 +00:00
fucked up 2023-10-30 06:58:07 +00:00			`/*`
			`for (int i = L_lo, dest_pos = L_global_start,`
			`processor = L_starting_process;`
			`i < L_hi;) {`
			`int next_break = MIN(int, L_global_end,`
			`MIN(int, dest_pos + (L_hi - L_lo),`
			`(dest_pos / n_over_p) * n_over_p +`
			`n_over_p)); int count = next_break - dest_pos;`

			`int local_start = i, local_end = i + count;`
			`int dest_start = dest_pos, dest_end = dest_pos + count;`

			`printf("[%d] copying from L, local[%d..%d] to dest #%d [%d..%d]\n",`
			`rank, local_start, local_end, processor, dest_start, dest_end);`

			`i += count;`
			`dest_pos += count;`
			`processor += 1;`
			`}`
			`*/`
L 2023-10-30 03:04:21 +00:00			`}`
progress 2023-10-29 21:34:22 +00:00
			`// The first node is responsible for collecting all the data and then printing`
			`// it out to the file`
			`// MPI_Gather(const void sendbuf, int sendcount, MPI_INT, void recvbuf,`
			`// int recvcount, MPI_INT, 0, MPI_COMM_WORLD);`
			`if (rank == 0) {`
			`FILE *f = fopen(argv[2], "w");`
			`fclose(f);`
			`}`

			`MPI_Finalize();`
fucked up 2023-10-30 06:58:07 +00:00			`printf("Done.\n");`
progress 2023-10-29 21:34:22 +00:00			`return 0;`
			`}`

			`// hi not inclusive`
			`void local_quicksort(int *arr, int lo, int hi) {`
			`int temp;`

			`if (lo >= hi \|\| lo < 0)`
			`return;`

			`int pivot = arr[hi - 1];`
			`int pivot_idx = lo - 1;`
			`for (int j = lo; j < hi; ++j) {`
			`if (arr[j] < pivot) {`
			`pivot_idx += 1;`

			`temp = arr[j];`
			`arr[j] = arr[pivot_idx];`
			`arr[pivot_idx] = temp;`
			`}`
			`}`

			`pivot_idx += 1;`
			`temp = arr[hi - 1];`
			`arr[hi - 1] = arr[pivot_idx];`
			`arr[pivot_idx] = temp;`

			`// Recursive call`
			`local_quicksort(arr, lo, pivot_idx);`
			`local_quicksort(arr, pivot_idx + 1, hi);`
progress 2023-10-23 00:38:42 +00:00			`}`
progress 2023-10-29 21:34:22 +00:00
			`char string_of_list(int arr, int len) {`
			`char *buffer = malloc(1000);`
			`int offset = 0; // Keep track of the current position in the buffer`
			`for (int i = 0; i < len; i++) {`
			`offset += sprintf(buffer + offset, "%d", arr[i]);`
			`if (i < len - 1) {`
			`// Add a separator (e.g., comma or space) if it's not the last element`
			`offset += sprintf(buffer + offset, " ");`
			`}`
			`}`

			`return buffer;`
			`}`