assignment 3

2023-12-10 16:58:28 -06:00 · 2023-12-10 16:58:28 -06:00 · 442638205c
commit 442638205c
parent d75da8de6d
9 changed files with 261 additions and 219 deletions
--- a/1
+++ b/1
@ -22,6 +22,7 @@ RUN apt update -y && apt install -y --no-install-recommends \
    pkg-config \
    python3 \
    python3-pip \
+    python3-venv \
    texlive-latex-base \
    texlive-latex-extra \
    valgrind \
--- a/assignments/03/.envrc
+++ b/assignments/03/.envrc
@ -0,0 +1,3 @@
+layout python3
+export OMPI_ALLOW_RUN_AS_ROOT=1
+export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
--- a/assignments/03/.gitignore
+++ b/assignments/03/.gitignore
@ -3,4 +3,7 @@ compile_commands.json
 .cache
 report.pdf
 *.tar.gz
-out.txt
+out.txt
+
+dataset/gen_*.txt
+.direnv
--- a/assignments/03/Makefile
+++ b/assignments/03/Makefile
@ -16,7 +16,7 @@ run:
 report.pdf: report.typ
 	typst compile $< $@

-zhan4854.tar.gz: Makefile ASSIGNMENT.md lpa.cpp report.pdf
+zhan4854.tar.gz: Makefile ASSIGNMENT.md lpa.cpp report.pdf dataset/gen2.py
 	mkdir -p zhan4854
 	cp $^ zhan4854
 	tar -czvf $@ zhan4854
--- a/assignments/03/bench.sh
+++ b/assignments/03/bench.sh
@ -2,7 +2,6 @@ for dataset in $(echo "1000.txt" "10000.txt" "100000.txt" "1000000.txt"); do
    for processors in $(echo 1 2 4 8 16 | tr ' ' '\n'); do
        # file="dataset/both_$dataset"
        file="/export/scratch/CSCI5451_F23/assignment-3/dataset/$dataset"
-        echo $processors $file;
-        mpirun -n $processors ./lpa $file graphout.txt >> stdout.txt
+        mpirun -n $processors ./lpa $file "graph_out/$dataset-$processors.txt" >> "stdout_out/$dataset-$processors.txt"
    done
 done
--- a/assignments/03/dataset/gen2.py
+++ b/assignments/03/dataset/gen2.py
@ -0,0 +1,25 @@
+import igraph as ig
+import random
+import sys
+
+try:
+    N = int(sys.argv[1])
+except:
+    N = 1000
+
+random.seed(0)
+g = ig.Graph.Growing_Random(N, 5)
+components = g.connected_components(mode='weak')
+print(len(components))
+
+with open(f"dataset/gen_{N}.txt", "w") as f:
+
+    both_edges = []
+    for edge in g.es:
+        both_edges.append((edge.source, edge.target))
+        both_edges.append((edge.target, edge.source))
+
+    num_edges = len(both_edges)
+    f.write(f"{N} {num_edges}\n")
+    for v1, v2 in sorted(both_edges):
+        f.write(f"{v1} {v2}\n")
--- a/assignments/03/lpa.cpp
+++ b/assignments/03/lpa.cpp
@ -1,3 +1,8 @@
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <functional>
+#include <limits>
 #include <map>
 #include <set>
 #include <vector>
@ -7,23 +12,15 @@
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
+#include <utility>

-#ifdef FMT_HEADER_ONLY
-#include <fmt/format.h>
-#include <fmt/ranges.h>
-#endif
+// #include <fmt/format.h>
+// #include <fmt/ranges.h>

 #define TAG_SEND_NUM_EDGES 1001
 #define TAG_SEND_EDGES 1002
 #define TAG_SEND_FINAL_RESULT 1003

-#define MIN(a, b)                                                              \
-  ({                                                                           \
-    __typeof__(a) _a = (a);                                                    \
-    __typeof__(b) _b = (b);                                                    \
-    _a < _b ? _a : _b;                                                         \
-  })
-
 typedef struct {
  int fst;
  int snd;
@ -39,18 +36,9 @@ void pair_vector_init(struct pair_vector *);
 void pair_vector_clear(struct pair_vector *);
 void pair_vector_push(struct pair_vector *v, int fst, int snd);

-pair compute_node_range(int p, int total_num_nodes, int each_num_nodes,
-                        int process);
-int lookup_assignment(int *base_node_assignment, pair my_node_range,
-                      std::map<int, std::set<int>> recv_map, int *recvbuf,
-                      int *recv_counts, int *recv_displs, int each_num_nodes,
-                      int rank, int node_number);
-
 int main(int argc, char **argv) {
-  MPI_Init(&argc, &argv);
-  int rank, p;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-  MPI_Comm_size(MPI_COMM_WORLD, &p);
+  MPI::Init(argc, argv);
+  int rank = MPI::COMM_WORLD.Get_rank(), p = MPI::COMM_WORLD.Get_size();

  MPI_Datatype IntPairType;
  init_pair_type(&IntPairType);
@ -65,15 +53,14 @@ int main(int argc, char **argv) {
  pair params;

  if (rank == 0) {
+    printf("Processors: %d, file: %s\n", p, argv[1]);
    fp = fopen(argv[1], "r");
-
-    // Read the first line
-    if (getline(&line, &len, fp) != -1)
+    if ((read = getline(&line, &len, fp)) != -1)
      sscanf(line, "%d %d", &params.fst, &params.snd);
  }

  // Send the params
-  MPI_Bcast(&params, 1, IntPairType, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&params, 1, IntPairType, 0, MPI::COMM_WORLD);
  int total_num_nodes = params.fst;
  int total_num_edges = params.snd;
  int each_num_nodes = total_num_nodes / p;
@ -83,15 +70,17 @@ int main(int argc, char **argv) {
      rank == p - 1 ? total_num_nodes - rank * each_num_nodes : each_num_nodes;
  int my_nodes[num_my_nodes];

-  pair node_ranges[p];
-  for (int i = 0; i < p; ++i)
-    node_ranges[i] = compute_node_range(p, total_num_nodes, each_num_nodes, i);
+  std::function<std::pair<int, int>(int)> node_range =
+      [p, total_num_nodes, each_num_nodes](int process) {
+        int start = process * each_num_nodes;
+        int end = process == p - 1 ? total_num_nodes : start + each_num_nodes;
+        return std::make_pair(start, end);
+      };

  // Read the edges
  int num_my_edges;
  pair *my_edges;
  int counts[p], displs[p];
-
  if (rank == 0) {
    line = NULL;
    // pair all_edges[total_num_edges];
@ -100,31 +89,30 @@ int main(int argc, char **argv) {

    // For the current process, what's the last node we're expecting to see?
    int current_process = 0;
-    pair current_node_range = node_ranges[current_process];
+    std::pair<int, int> current_node_range = node_range(current_process);
    int edge_counter = 0;

    for (int i = 0; i < total_num_edges; ++i) {
-      if (getline(&line, &len, fp) == -1)
-        break;
+      getline(&line, &len, fp);

      int fst, snd;
      sscanf(line, "%d %d", &fst, &snd);

-      if (fst >= current_node_range.snd) {
+      if (fst >= current_node_range.second) {
        if (current_process == 0) {
          num_my_edges = edge_counter;
          my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
          memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
        } else {
          MPI_Send(&edge_counter, 1, MPI_INT, current_process,
-                   TAG_SEND_NUM_EDGES, MPI_COMM_WORLD);
+                   TAG_SEND_NUM_EDGES, MPI::COMM_WORLD);
          MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
-                   TAG_SEND_EDGES, MPI_COMM_WORLD);
+                   TAG_SEND_EDGES, MPI::COMM_WORLD);
        }

        // We're starting on the next process
        current_process += 1;
-        current_node_range = node_ranges[current_process];
+        current_node_range = node_range(current_process);
        edge_counter = 0;
        pair_vector_clear(&all_edges);
      }
@ -141,18 +129,27 @@ int main(int argc, char **argv) {
      memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
    } else {
      MPI_Send(&edge_counter, 1, MPI_INT, current_process, TAG_SEND_NUM_EDGES,
-               MPI_COMM_WORLD);
+               MPI::COMM_WORLD);
      MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
-               TAG_SEND_EDGES, MPI_COMM_WORLD);
+               TAG_SEND_EDGES, MPI::COMM_WORLD);
    }
-
-    free(all_edges.ptr);
  } else {
-    MPI_Recv(&num_my_edges, 1, MPI_INT, 0, TAG_SEND_NUM_EDGES, MPI_COMM_WORLD,
+    MPI_Recv(&num_my_edges, 1, MPI_INT, 0, TAG_SEND_NUM_EDGES, MPI::COMM_WORLD,
             NULL);
    my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
    MPI_Recv(my_edges, num_my_edges, IntPairType, 0, TAG_SEND_EDGES,
-             MPI_COMM_WORLD, NULL);
+             MPI::COMM_WORLD, NULL);
+  }
+
+  char *buf = (char *)calloc(sizeof(char), 1000);
+  int offset = 0; // Keep track of the current position in the buffer
+  for (int i = 0; i < std::min(num_my_edges, 5); i++) {
+    offset +=
+        sprintf(buf + offset, "(%d, %d)", my_edges[i].fst, my_edges[i].snd);
+    if (i < len - 1) {
+      // Add a separator (e.g., comma or space) if it's not the last
+      offset += sprintf(buf + offset, " ");
+    }
  }

  if (rank == 0) {
@ -162,25 +159,19 @@ int main(int argc, char **argv) {
  }
 #pragma endregion

-  if (rank == 0)
-    printf("Params: p=%d, |E|=%d, |V|=%d\n", p, total_num_nodes,
-           total_num_edges);
-
  // STEP 2 TIMER STARTS HERE
-  MPI_Barrier(MPI_COMM_WORLD);
-  double step_2_start_time;
-  if (rank == 0)
-    step_2_start_time = MPI_Wtime();
+  MPI::COMM_WORLD.Barrier();
+  double step_2_start_time = MPI::Wtime();

 // Each process analyzes the non-local edges that are contained in its portion
 // of the graph.
 #pragma region
-  int node_label_assignment_vec[num_my_nodes];
-  pair my_node_range = node_ranges[rank];
+  std::map<int, int> node_label_assignment;
+  std::pair<int, int> my_node_range = node_range(rank);

  // Initial node assignment
-  for (int idx = 0; idx < num_my_nodes; ++idx) {
-    node_label_assignment_vec[idx] = my_node_range.fst + idx;
+  for (int i = my_node_range.first; i < my_node_range.second; ++i) {
+    node_label_assignment[i] = i;
  }

  std::map<int, std::set<int>> adj;
@ -191,12 +182,12 @@ int main(int argc, char **argv) {
    pair edge = my_edges[i];
    adj[edge.fst].insert(edge.snd);

-    if (!(my_node_range.fst <= edge.fst && edge.fst < my_node_range.snd)) {
+    if (!(my_node_range.first <= edge.fst && edge.fst < my_node_range.second)) {
      non_local_nodes.insert(edge.fst);
      non_local_edges.insert(std::make_pair(edge.snd, edge.fst));
    }

-    if (!(my_node_range.fst <= edge.snd && edge.snd < my_node_range.snd)) {
+    if (!(my_node_range.first <= edge.snd && edge.snd < my_node_range.second)) {
      non_local_nodes.insert(edge.snd);
      non_local_edges.insert(std::make_pair(edge.fst, edge.snd));
    }
@ -212,105 +203,87 @@ int main(int argc, char **argv) {
  for (auto entry : non_local_edges) {
    int local_node = entry.first, remote_node = entry.second;

-    int remote_process = remote_node / each_num_nodes;
+    int corresponding_process = remote_node / each_num_nodes;
    // The last process gets some extra nodes
-    if (remote_process >= p)
-      remote_process = p - 1;
+    if (corresponding_process >= p)
+      corresponding_process = p - 1;

-    send_map[remote_process].insert(local_node);
-    recv_map[remote_process].insert(remote_node);
+    send_map[corresponding_process].insert(local_node);
+    recv_map[corresponding_process].insert(remote_node);
  }
 #pragma endregion

 // All the processes are communicating to figure out which process needs to
 // send what data to the other processes.
 #pragma region
+  // Nothing needs to be done here, I'm using the fact that everything is sent
+  // in sorted order to ensure that both sides are referring to the same thing
 #pragma endregion

  // STEP 5 TIMER STARTS HERE
-  MPI_Barrier(MPI_COMM_WORLD);
-  double step_5_start_time;
-  if (rank == 0) {
-    step_5_start_time = MPI_Wtime();
-  }
+  MPI::COMM_WORLD.Barrier();
+  double step_5_start_time = MPI::Wtime();

 // The processes perform the transfers of non-local labels and  updates of
 // local labels until convergence.
 #pragma region
  while (true) {
    // First, exchange the data that needs to be exchanged
-    int sendbuf[num_my_nodes];
-    int send_counts[p];
-    int send_displs[p];
-    int recv_counts[p];
-    int recv_displs[p];
-    std::map<int, int> remote_labels;
+    std::vector<int> sendbuf;
+    std::vector<int> send_counts;
+    std::vector<int> send_displs;
+    std::vector<int> recv_counts;
+    std::vector<int> recv_displs;

-    if (p > 1) {
-
-      int recv_total;
-      {
-        int offset = 0;
-        for (int i = 0; i < p; ++i) {
-          int count = send_map[i].size();
-          for (auto local_node : send_map[i]) {
-            sendbuf[offset + local_node - my_node_range.fst] =
-                node_label_assignment_vec[local_node - my_node_range.fst];
-          }
-          send_counts[i] = count;
-          send_displs[i] = offset;
-          offset += count;
+    int recv_total;
+    {
+      int offset = 0;
+      for (int i = 0; i < p; ++i) {
+        int count = send_map[i].size();
+        // std::sort(send_map[i].begin(), send_map[i].end());
+        for (auto k : send_map[i]) {
+          sendbuf.push_back(node_label_assignment[k]);
        }
-
-        offset = 0;
-        for (int i = 0; i < p; ++i) {
-          int count = recv_map[i].size();
-          recv_counts[i] = count;
-          recv_displs[i] = offset;
-          offset += count;
-        }
-        recv_total = offset;
+        send_counts.push_back(count);
+        send_displs.push_back(offset);
+        offset += count;
      }

-      int recvbuf[recv_total];
-      MPI_Alltoallv(sendbuf, send_counts, send_displs, MPI_INT, recvbuf,
-                    recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD);
-
-      // Cache efficiently
+      offset = 0;
      for (int i = 0; i < p; ++i) {
-        std::vector<int> processor_nodes(recv_map[i].begin(),
-                                         recv_map[i].end());
-        for (int j = 0; j < recv_counts[i]; ++j) {
-          int remote_node = processor_nodes[j];
-          int remote_value = recvbuf[recv_displs[i] + j];
-          remote_labels[remote_node] = remote_value;
-        }
+        int count = recv_map[i].size();
+        // std::sort(recv_map[i].begin(), recv_map[i].end());
+        recv_counts.push_back(count);
+        recv_displs.push_back(offset);
+        offset += count;
+      }
+      recv_total = offset;
+    }
+
+    std::vector<int> recvbuf(recv_total, 0);
+    MPI::COMM_WORLD.Alltoallv(sendbuf.data(), send_counts.data(),
+                              send_displs.data(), MPI_INT, recvbuf.data(),
+                              recv_counts.data(), recv_displs.data(), MPI_INT);
+
+    std::map<int, int> total_node_label_assignment(node_label_assignment);
+    for (int i = 0; i < p; ++i) {
+      std::vector<int> ouais(recv_map[i].begin(), recv_map[i].end());
+      for (int j = 0; j < recv_counts[i]; ++j) {
+        int remote_node = ouais[j];
+        int remote_value = recvbuf[recv_displs[i] + j];
+        total_node_label_assignment[remote_node] = remote_value;
      }
    }

    // For each local node, determine the minimum label out of its neighbors
    std::map<int, int> new_labels;
-    for (int i = 0; i < num_my_nodes; ++i) {
-      int node = my_node_range.fst + i;
-
-      // int current_value = total_node_label_assignment[i];
-      int current_value = node_label_assignment_vec[i];
+    for (int i = my_node_range.first; i < my_node_range.second; ++i) {
+      int current_value = total_node_label_assignment[i];
      int min = current_value;

-      for (auto neighbor : adj[node]) {
-        int neighbor_value;
-        if (my_node_range.fst <= neighbor && neighbor < my_node_range.snd) {
-          neighbor_value =
-              node_label_assignment_vec[neighbor - my_node_range.fst];
-        } else {
-          neighbor_value = remote_labels[neighbor];
-        }
-
-        //  = lookup_assignment(
-        //     node_label_assignment_vec, my_node_range, recv_map,
-        //     recvbuf.data(), recv_counts.data(), recv_displs.data(),
-        //     each_num_nodes, rank, neighbor);
-        min = MIN(min, neighbor_value);
+      for (auto neighbor : adj[i]) {
+        if (total_node_label_assignment[neighbor] < min)
+          min = total_node_label_assignment[neighbor];
      }

      if (min < current_value) {
@ -321,8 +294,8 @@ int main(int argc, char **argv) {
    // Have there been any changes in the labels?
    int num_changes = new_labels.size();
    int total_changes;
-    MPI_Allreduce(&num_changes, &total_changes, 1, MPI_INT, MPI_SUM,
-                  MPI_COMM_WORLD);
+    MPI::COMM_WORLD.Allreduce(&num_changes, &total_changes, 1, MPI_INT,
+                              MPI::SUM);

    if (total_changes == 0) {
      break;
@ -330,19 +303,14 @@ int main(int argc, char **argv) {

    // Update the original node assignment
    for (auto entry : new_labels) {
-      node_label_assignment_vec[entry.first] = entry.second;
+      node_label_assignment[entry.first] = entry.second;
    }
-
-    if (rank == 0)
-      printf("total changes: %d\n", total_changes);
  }
 #pragma endregion

  // END TIMERS
-  MPI_Barrier(MPI_COMM_WORLD);
-  double end_time;
-  if (rank == 0)
-    end_time = MPI_Wtime();
+  MPI::COMM_WORLD.Barrier();
+  double end_time = MPI::Wtime();

  if (rank == 0) {
    printf("2-5 Time: %0.04fs\n", end_time - step_2_start_time);
@ -353,36 +321,39 @@ int main(int argc, char **argv) {
 // disk.
 #pragma region
  if (rank == 0) {
-    FILE *fp = fopen(argv[2], "w");
-    std::map<int, int> label_count;
-    for (int process_idx = 0; process_idx < p; ++process_idx) {
-      pair this_node_range = node_ranges[process_idx];
-      int count = this_node_range.snd - this_node_range.fst;
-      if (process_idx == 0) {
+    std::vector<int> all_assignments(total_num_nodes);
+    // std::map<int, int> label_count;
+    int ctr = 0;
+    for (int i = 0; i < p; ++i) {
+      std::pair<int, int> this_node_range = node_range(i);
+      int count = this_node_range.second - this_node_range.first;
+      if (i == 0) {
        for (int j = 0; j < count; ++j) {
-          fprintf(fp, "%d\n", node_label_assignment_vec[j]);
-          label_count[node_label_assignment_vec[j]]++;
+          all_assignments[this_node_range.first + j] =
+              node_label_assignment[this_node_range.first + j];
+          // label_count[all_assignments[this_node_range.first + j]]++;
        }
      } else {
-        int recvbuf[count];
-        MPI_Recv(&recvbuf, count, MPI_INT, process_idx, TAG_SEND_FINAL_RESULT,
-                 MPI_COMM_WORLD, NULL);
-        for (int j = 0; j < count; ++j) {
-          fprintf(fp, "%d\n", recvbuf[j]);
-          label_count[recvbuf[j]]++;
-        }
+        MPI::COMM_WORLD.Recv(&all_assignments[this_node_range.first], count,
+                             MPI::INT, i, TAG_SEND_FINAL_RESULT);
+        // for (int j = 0; j < count; ++j) {
+        //   label_count[all_assignments[this_node_range.first + j]]++;
+        // }
      }
    }

-    printf("%d\n", label_count.size());
-
+    // std::cout << "Done! " << label_count.size() << std::endl;
  } else {
-    MPI_Send(node_label_assignment_vec, num_my_nodes, MPI_INT, 0,
-             TAG_SEND_FINAL_RESULT, MPI_COMM_WORLD);
+    std::vector<int> flat_assignments;
+    for (int i = my_node_range.first; i < my_node_range.second; ++i) {
+      flat_assignments.push_back(node_label_assignment[i]);
+    }
+    MPI::COMM_WORLD.Send(flat_assignments.data(), flat_assignments.size(),
+                         MPI::INT, 0, TAG_SEND_FINAL_RESULT);
  }
 #pragma endregion

-  MPI_Finalize();
+  MPI::Finalize();
  return 0;
 }

@ -422,59 +393,4 @@ void pair_vector_push(struct pair_vector *v, int fst, int snd) {
  v->ptr[v->len].fst = fst;
  v->ptr[v->len].snd = snd;
  v->len++;
-}
-
-pair compute_node_range(int p, int total_num_nodes, int each_num_nodes,
-                        int process) {
-  int start = process * each_num_nodes;
-  int end = process == p - 1 ? total_num_nodes : start + each_num_nodes;
-  return {.fst = start, .snd = end};
-}
-
-int lookup_assignment(int *base_node_assignment, pair my_node_range,
-                      std::map<int, std::set<int>> recv_map, int *recvbuf,
-                      int *recv_counts, int *recv_displs, int each_num_nodes,
-                      int rank, int node_number) {
-  int process_from = node_number / each_num_nodes;
-
-  // Just return from local if local
-  if (process_from == rank)
-    return base_node_assignment[node_number - my_node_range.fst];
-
-  int count = recv_counts[process_from];
-  int displs = recv_displs[process_from];
-
-  // Determine what index this node is
-  int index = -1, ctr = 0;
-  std::vector<int> inner(recv_map[process_from].begin(),
-                         recv_map[process_from].end());
-
-  {
-    // Use binary search...
-    int lo = 0, hi = count;
-    while (lo < hi) {
-      int mid = (lo + hi) / 2;
-      int midk = inner[mid];
-      if (node_number < midk)
-        hi = mid;
-      else if (node_number > midk)
-        lo = mid;
-      else {
-        index = mid;
-        break;
-      }
-    }
-  }
-
-  // for (int i = 0; i < count; ++i) {
-  //   int remote_node = inner[i];
-  //   if (node_number == remote_node) {
-  //     index = ctr;
-  //     break;
-  //   }
-  //   ctr++;
-  // }
-
-  // Pull the corresponding value from the map
-  return recvbuf[recv_displs[process_from] + index];
-}
+}
--- a/assignments/03/process.py
+++ b/assignments/03/process.py
@ -0,0 +1,33 @@
+import re
+
+WTF = re.compile(r".*: (\d+),.*dataset/(\d+).txt")
+
+by_size = dict()
+with open("stdout.txt") as f:
+    while True:
+        line1 = f.readline().strip()
+        if not line1: break
+        m = WTF.match(line1)
+        processors = int(m.group(1))
+        size = int(m.group(2))
+
+        if size not in by_size: by_size[size] = dict()
+
+        line2 = f.readline().strip()
+        line3 = f.readline().strip()
+
+        time2 = line2.split(": ")[1]
+        time5 = line3.split(": ")[1]
+
+        if processors not in by_size[size]: by_size[size][processors] = (time2, time5)
+
+print("#table(")
+print("  columns: (auto, auto, auto, auto, auto, auto),")
+columns = [1, 2, 4, 8, 16]
+print("  [], ", ", ".join(map(lambda c: f"[{c}]", columns)), ",")
+for size, entries in sorted(by_size.items()):
+    print(f"  [{size}],")
+    for processors, (time2, time5) in sorted(entries.items()):
+        print(f"  [{time2} #linebreak() {time5}],", end = None)
+    print()
+print(")")
--- a/assignments/03/report.typ
+++ b/assignments/03/report.typ
@ -13,5 +13,67 @@ I exchanged data using the unstructured communication approach, doing an
 all-to-all transfer.

 To read the result efficiently, I tried using the approach given in the slides.
-However, this was taking a long time (up to 45 seconds for the 10,000 case) and
-I tried using STL's `std::map`. This proved to be orders of magnitude faster
+I also tried to use binary search since this would yield $log(n)$ time.
+However, this was taking a long time (up to 45 seconds for the 10,000 case), and
+it was the bottleneck.  Using STL's `std::map` proved to be orders of magnitude
+faster.
+
+== Other remarks
+
+On the original example dataset, it poorly using larger numbers. I have an
+explanation for this after looking at the performance characteristics of the
+run: it completes in one iteration where every single edge is assigned. The data
+distribution also indicates that almost everything is connected into the first
+node, which isn't balanced.
+
+I've written a generation script in Python using the `igraph` library.
+
+- 1,000: 93 components
+- 10,000: 947 components
+- 100,000: 9,423 components
+- 1,000,000: 92,880 components
+
+Using this data, I was able to achieve much better speedup. I didn't attach the
+actual data files but they can be generated with the same script (seeded for
+reproducibility).
+
+*NOTE:* I noticed that afterwards, the data was changed again, with a more balanced graph this time.
+So the numbers will not reflect the poorer performance.
+
+== Timing on example dataset
+
+This experiment was performed on CSELabs by using my bench script, and the table
+was generated with another script.
+
+#table(
+  columns: (auto, auto, auto, auto, auto, auto),
+  [],  [1], [2], [4], [8], [16] ,
+  [1000],
+  [0.0249s #linebreak() 0.0151s],
+  [0.0234s #linebreak() 0.0122s],
+  [0.0206s #linebreak() 0.0099s],
+  [0.0491s #linebreak() 0.0248s],
+  [0.0177s #linebreak() 0.0106s],
+
+  [10000],
+  [0.2929s #linebreak() 0.1830s],
+  [0.2933s #linebreak() 0.1540s],
+  [0.2457s #linebreak() 0.1178s],
+  [0.3793s #linebreak() 0.1328s],
+  [0.2473s #linebreak() 0.1197s],
+
+  [100000],
+  [3.7888s #linebreak() 2.4881s],
+  [3.7592s #linebreak() 2.0212s],
+  [3.3819s #linebreak() 1.6036s],
+  [2.9485s #linebreak() 1.3954s],
+  [2.8593s #linebreak() 1.3107s],
+
+  [1000000],
+  [46.7895s #linebreak() 31.9648s],
+  [45.2284s #linebreak() 24.8540s],
+  [40.3994s #linebreak() 20.2851s],
+  [36.9628s #linebreak() 17.6794s],
+  [35.7110s #linebreak() 16.6276s],
+
+)