consistent results now

2023-11-25 09:18:56 +00:00 · 2023-11-25 09:18:56 +00:00 · 5fe38262c5
commit 5fe38262c5
parent 0619d79caa
5 changed files with 94 additions and 34 deletions
--- a/assignments/03/.gitignore
+++ b/assignments/03/.gitignore
@ -3,3 +3,4 @@ compile_commands.json
 .cache
 report.pdf
 *.tar.gz
 out.txt
--- a/assignments/03/Makefile
+++ b/assignments/03/Makefile
@ -1,7 +1,7 @@
 .PHONY: run clean
-# CFLAGS += -O3
+CFLAGS += -O3
-CFLAGS += -DFMT_HEADER_ONLY -g
+# CFLAGS += -DFMT_HEADER_ONLY -g
 # LDFLAGS += $(shell pkg-config --libs fmt)
 lpa: lpa.cpp Makefile
--- a/assignments/03/bench.sh
+++ b/assignments/03/bench.sh
@ -0,0 +1,8 @@
 for dataset in $(echo "1000.txt" "10000.txt" "1000000.txt" "1000000.txt"); do
    for processors in $(echo 1 2 4 8 16 | tr ' ' '\n'); do
        # file="dataset/both_$dataset"
        file="/export/scratch/CSCI5451_F23/assignment-3/dataset/$dataset"
        echo $processors $file;
        mpirun -n $processors ./lpa $file >> out.txt
    done
 done
--- a/assignments/03/lpa.cpp
+++ b/assignments/03/lpa.cpp
@ -97,6 +97,7 @@ int main(int argc, char **argv) {
  int num_my_edges;
  pair *my_edges;
  int counts[p], displs[p];
  if (rank == 0) {
    line = NULL;
    // pair all_edges[total_num_edges];
@ -140,10 +141,16 @@ int main(int argc, char **argv) {
    // We have to send the last one again here, since it didn't get caught in
    // the loop above
    if (current_process == 0) {
      num_my_edges = edge_counter;
      my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
      memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
    } else {
      MPI_Send(&edge_counter, 1, MPI_INT, current_process, TAG_SEND_NUM_EDGES,
               MPI_COMM_WORLD);
      MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
               TAG_SEND_EDGES, MPI_COMM_WORLD);
    }
    free(all_edges.ptr);
  } else {
@ -161,6 +168,10 @@ int main(int argc, char **argv) {
  }
 #pragma endregion
  if (rank == 0)
    printf("Params: p=%d, |E|=%d, |V|=%d\n", p, total_num_nodes,
           total_num_edges);
  // STEP 2 TIMER STARTS HERE
  MPI_Barrier(MPI_COMM_WORLD);
  double step_2_start_time;
@ -228,7 +239,6 @@ int main(int argc, char **argv) {
  double step_5_start_time;
  if (rank == 0) {
    step_5_start_time = MPI_Wtime();
    printf("STARTING STEP 5: %0.04fs\n", step_5_start_time - step_2_start_time);
  }
 // The processes perform the transfers of non-local labels and  updates of
@ -241,6 +251,10 @@ int main(int argc, char **argv) {
    std::vector<int> send_displs;
    std::vector<int> recv_counts;
    std::vector<int> recv_displs;
    std::vector<int> recvbuf;
    std::map<int, int> remote_labels;
    if (p > 1) {
      int recv_total;
      {
@ -266,11 +280,23 @@ int main(int argc, char **argv) {
        recv_total = offset;
      }
-    std::vector<int> recvbuf(recv_total, 0);
+      recvbuf = std::vector<int>(recv_total, 0);
      MPI_Alltoallv(sendbuf.data(), send_counts.data(), send_displs.data(),
                    MPI_INT, recvbuf.data(), recv_counts.data(),
                    recv_displs.data(), MPI_INT, MPI_COMM_WORLD);
      // Cache efficiently
      for (int i = 0; i < p; ++i) {
        std::vector<int> processor_nodes(recv_map[i].begin(),
                                         recv_map[i].end());
        for (int j = 0; j < recv_counts[i]; ++j) {
          int remote_node = processor_nodes[j];
          int remote_value = recvbuf[recv_displs[i] + j];
          remote_labels[remote_node] = remote_value;
        }
      }
    }
    // For each local node, determine the minimum label out of its neighbors
    std::map<int, int> new_labels;
    for (int i = 0; i < num_my_nodes; ++i) {
@ -281,10 +307,18 @@ int main(int argc, char **argv) {
      int min = current_value;
      for (auto neighbor : adj[node]) {
-        int neighbor_value = lookup_assignment(
+        int neighbor_value;
-            node_label_assignment_vec, my_node_range, recv_map, recvbuf.data(),
+        if (my_node_range.fst <= neighbor && neighbor < my_node_range.snd) {
-            recv_counts.data(), recv_displs.data(), each_num_nodes, rank,
+          neighbor_value =
-            neighbor);
+              node_label_assignment_vec[neighbor - my_node_range.fst];
        } else {
          neighbor_value = remote_labels[neighbor];
        }
        //  = lookup_assignment(
        //     node_label_assignment_vec, my_node_range, recv_map,
        //     recvbuf.data(), recv_counts.data(), recv_displs.data(),
        //     each_num_nodes, rank, neighbor);
        min = MIN(min, neighbor_value);
      }
--- a/assignments/03/report.typ
+++ b/assignments/03/report.typ
@ -0,0 +1,17 @@
 == Step 2-4
 For steps 2-4, I calculated all of each process' outgoing nodes, sorted it in
 order and used its sorted position as a way to identify which nodes are being
 sent.
 This saves an extra communication and lets me index the same items for each
 loop.
 == Step 5
 I exchanged data using the unstructured communication approach, doing an
 all-to-all transfer.
 To read the result efficiently, I tried using the approach given in the slides.
 However, this was taking a long time (up to 45 seconds for the 10,000 case) and
 I tried using STL's `std::map`. This proved to be orders of magnitude faster