assignment 3
This commit is contained in:
parent
d75da8de6d
commit
442638205c
9 changed files with 261 additions and 219 deletions
|
@ -22,6 +22,7 @@ RUN apt update -y && apt install -y --no-install-recommends \
|
||||||
pkg-config \
|
pkg-config \
|
||||||
python3 \
|
python3 \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
|
python3-venv \
|
||||||
texlive-latex-base \
|
texlive-latex-base \
|
||||||
texlive-latex-extra \
|
texlive-latex-extra \
|
||||||
valgrind \
|
valgrind \
|
||||||
|
|
3
assignments/03/.envrc
Normal file
3
assignments/03/.envrc
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
layout python3
|
||||||
|
export OMPI_ALLOW_RUN_AS_ROOT=1
|
||||||
|
export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
|
3
assignments/03/.gitignore
vendored
3
assignments/03/.gitignore
vendored
|
@ -4,3 +4,6 @@ compile_commands.json
|
||||||
report.pdf
|
report.pdf
|
||||||
*.tar.gz
|
*.tar.gz
|
||||||
out.txt
|
out.txt
|
||||||
|
|
||||||
|
dataset/gen_*.txt
|
||||||
|
.direnv
|
|
@ -16,7 +16,7 @@ run:
|
||||||
report.pdf: report.typ
|
report.pdf: report.typ
|
||||||
typst compile $< $@
|
typst compile $< $@
|
||||||
|
|
||||||
zhan4854.tar.gz: Makefile ASSIGNMENT.md lpa.cpp report.pdf
|
zhan4854.tar.gz: Makefile ASSIGNMENT.md lpa.cpp report.pdf dataset/gen2.py
|
||||||
mkdir -p zhan4854
|
mkdir -p zhan4854
|
||||||
cp $^ zhan4854
|
cp $^ zhan4854
|
||||||
tar -czvf $@ zhan4854
|
tar -czvf $@ zhan4854
|
||||||
|
|
|
@ -2,7 +2,6 @@ for dataset in $(echo "1000.txt" "10000.txt" "100000.txt" "1000000.txt"); do
|
||||||
for processors in $(echo 1 2 4 8 16 | tr ' ' '\n'); do
|
for processors in $(echo 1 2 4 8 16 | tr ' ' '\n'); do
|
||||||
# file="dataset/both_$dataset"
|
# file="dataset/both_$dataset"
|
||||||
file="/export/scratch/CSCI5451_F23/assignment-3/dataset/$dataset"
|
file="/export/scratch/CSCI5451_F23/assignment-3/dataset/$dataset"
|
||||||
echo $processors $file;
|
mpirun -n $processors ./lpa $file "graph_out/$dataset-$processors.txt" >> "stdout_out/$dataset-$processors.txt"
|
||||||
mpirun -n $processors ./lpa $file graphout.txt >> stdout.txt
|
|
||||||
done
|
done
|
||||||
done
|
done
|
25
assignments/03/dataset/gen2.py
Normal file
25
assignments/03/dataset/gen2.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
import igraph as ig
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
|
||||||
|
try:
|
||||||
|
N = int(sys.argv[1])
|
||||||
|
except:
|
||||||
|
N = 1000
|
||||||
|
|
||||||
|
random.seed(0)
|
||||||
|
g = ig.Graph.Growing_Random(N, 5)
|
||||||
|
components = g.connected_components(mode='weak')
|
||||||
|
print(len(components))
|
||||||
|
|
||||||
|
with open(f"dataset/gen_{N}.txt", "w") as f:
|
||||||
|
|
||||||
|
both_edges = []
|
||||||
|
for edge in g.es:
|
||||||
|
both_edges.append((edge.source, edge.target))
|
||||||
|
both_edges.append((edge.target, edge.source))
|
||||||
|
|
||||||
|
num_edges = len(both_edges)
|
||||||
|
f.write(f"{N} {num_edges}\n")
|
||||||
|
for v1, v2 in sorted(both_edges):
|
||||||
|
f.write(f"{v1} {v2}\n")
|
|
@ -1,3 +1,8 @@
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -7,23 +12,15 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#ifdef FMT_HEADER_ONLY
|
// #include <fmt/format.h>
|
||||||
#include <fmt/format.h>
|
// #include <fmt/ranges.h>
|
||||||
#include <fmt/ranges.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define TAG_SEND_NUM_EDGES 1001
|
#define TAG_SEND_NUM_EDGES 1001
|
||||||
#define TAG_SEND_EDGES 1002
|
#define TAG_SEND_EDGES 1002
|
||||||
#define TAG_SEND_FINAL_RESULT 1003
|
#define TAG_SEND_FINAL_RESULT 1003
|
||||||
|
|
||||||
#define MIN(a, b) \
|
|
||||||
({ \
|
|
||||||
__typeof__(a) _a = (a); \
|
|
||||||
__typeof__(b) _b = (b); \
|
|
||||||
_a < _b ? _a : _b; \
|
|
||||||
})
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int fst;
|
int fst;
|
||||||
int snd;
|
int snd;
|
||||||
|
@ -39,18 +36,9 @@ void pair_vector_init(struct pair_vector *);
|
||||||
void pair_vector_clear(struct pair_vector *);
|
void pair_vector_clear(struct pair_vector *);
|
||||||
void pair_vector_push(struct pair_vector *v, int fst, int snd);
|
void pair_vector_push(struct pair_vector *v, int fst, int snd);
|
||||||
|
|
||||||
pair compute_node_range(int p, int total_num_nodes, int each_num_nodes,
|
|
||||||
int process);
|
|
||||||
int lookup_assignment(int *base_node_assignment, pair my_node_range,
|
|
||||||
std::map<int, std::set<int>> recv_map, int *recvbuf,
|
|
||||||
int *recv_counts, int *recv_displs, int each_num_nodes,
|
|
||||||
int rank, int node_number);
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
MPI_Init(&argc, &argv);
|
MPI::Init(argc, argv);
|
||||||
int rank, p;
|
int rank = MPI::COMM_WORLD.Get_rank(), p = MPI::COMM_WORLD.Get_size();
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &p);
|
|
||||||
|
|
||||||
MPI_Datatype IntPairType;
|
MPI_Datatype IntPairType;
|
||||||
init_pair_type(&IntPairType);
|
init_pair_type(&IntPairType);
|
||||||
|
@ -65,15 +53,14 @@ int main(int argc, char **argv) {
|
||||||
pair params;
|
pair params;
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
|
printf("Processors: %d, file: %s\n", p, argv[1]);
|
||||||
fp = fopen(argv[1], "r");
|
fp = fopen(argv[1], "r");
|
||||||
|
if ((read = getline(&line, &len, fp)) != -1)
|
||||||
// Read the first line
|
|
||||||
if (getline(&line, &len, fp) != -1)
|
|
||||||
sscanf(line, "%d %d", ¶ms.fst, ¶ms.snd);
|
sscanf(line, "%d %d", ¶ms.fst, ¶ms.snd);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send the params
|
// Send the params
|
||||||
MPI_Bcast(¶ms, 1, IntPairType, 0, MPI_COMM_WORLD);
|
MPI_Bcast(¶ms, 1, IntPairType, 0, MPI::COMM_WORLD);
|
||||||
int total_num_nodes = params.fst;
|
int total_num_nodes = params.fst;
|
||||||
int total_num_edges = params.snd;
|
int total_num_edges = params.snd;
|
||||||
int each_num_nodes = total_num_nodes / p;
|
int each_num_nodes = total_num_nodes / p;
|
||||||
|
@ -83,15 +70,17 @@ int main(int argc, char **argv) {
|
||||||
rank == p - 1 ? total_num_nodes - rank * each_num_nodes : each_num_nodes;
|
rank == p - 1 ? total_num_nodes - rank * each_num_nodes : each_num_nodes;
|
||||||
int my_nodes[num_my_nodes];
|
int my_nodes[num_my_nodes];
|
||||||
|
|
||||||
pair node_ranges[p];
|
std::function<std::pair<int, int>(int)> node_range =
|
||||||
for (int i = 0; i < p; ++i)
|
[p, total_num_nodes, each_num_nodes](int process) {
|
||||||
node_ranges[i] = compute_node_range(p, total_num_nodes, each_num_nodes, i);
|
int start = process * each_num_nodes;
|
||||||
|
int end = process == p - 1 ? total_num_nodes : start + each_num_nodes;
|
||||||
|
return std::make_pair(start, end);
|
||||||
|
};
|
||||||
|
|
||||||
// Read the edges
|
// Read the edges
|
||||||
int num_my_edges;
|
int num_my_edges;
|
||||||
pair *my_edges;
|
pair *my_edges;
|
||||||
int counts[p], displs[p];
|
int counts[p], displs[p];
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
line = NULL;
|
line = NULL;
|
||||||
// pair all_edges[total_num_edges];
|
// pair all_edges[total_num_edges];
|
||||||
|
@ -100,31 +89,30 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
// For the current process, what's the last node we're expecting to see?
|
// For the current process, what's the last node we're expecting to see?
|
||||||
int current_process = 0;
|
int current_process = 0;
|
||||||
pair current_node_range = node_ranges[current_process];
|
std::pair<int, int> current_node_range = node_range(current_process);
|
||||||
int edge_counter = 0;
|
int edge_counter = 0;
|
||||||
|
|
||||||
for (int i = 0; i < total_num_edges; ++i) {
|
for (int i = 0; i < total_num_edges; ++i) {
|
||||||
if (getline(&line, &len, fp) == -1)
|
getline(&line, &len, fp);
|
||||||
break;
|
|
||||||
|
|
||||||
int fst, snd;
|
int fst, snd;
|
||||||
sscanf(line, "%d %d", &fst, &snd);
|
sscanf(line, "%d %d", &fst, &snd);
|
||||||
|
|
||||||
if (fst >= current_node_range.snd) {
|
if (fst >= current_node_range.second) {
|
||||||
if (current_process == 0) {
|
if (current_process == 0) {
|
||||||
num_my_edges = edge_counter;
|
num_my_edges = edge_counter;
|
||||||
my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
|
my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
|
||||||
memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
|
memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
|
||||||
} else {
|
} else {
|
||||||
MPI_Send(&edge_counter, 1, MPI_INT, current_process,
|
MPI_Send(&edge_counter, 1, MPI_INT, current_process,
|
||||||
TAG_SEND_NUM_EDGES, MPI_COMM_WORLD);
|
TAG_SEND_NUM_EDGES, MPI::COMM_WORLD);
|
||||||
MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
|
MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
|
||||||
TAG_SEND_EDGES, MPI_COMM_WORLD);
|
TAG_SEND_EDGES, MPI::COMM_WORLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We're starting on the next process
|
// We're starting on the next process
|
||||||
current_process += 1;
|
current_process += 1;
|
||||||
current_node_range = node_ranges[current_process];
|
current_node_range = node_range(current_process);
|
||||||
edge_counter = 0;
|
edge_counter = 0;
|
||||||
pair_vector_clear(&all_edges);
|
pair_vector_clear(&all_edges);
|
||||||
}
|
}
|
||||||
|
@ -141,18 +129,27 @@ int main(int argc, char **argv) {
|
||||||
memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
|
memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
|
||||||
} else {
|
} else {
|
||||||
MPI_Send(&edge_counter, 1, MPI_INT, current_process, TAG_SEND_NUM_EDGES,
|
MPI_Send(&edge_counter, 1, MPI_INT, current_process, TAG_SEND_NUM_EDGES,
|
||||||
MPI_COMM_WORLD);
|
MPI::COMM_WORLD);
|
||||||
MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
|
MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
|
||||||
TAG_SEND_EDGES, MPI_COMM_WORLD);
|
TAG_SEND_EDGES, MPI::COMM_WORLD);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(all_edges.ptr);
|
|
||||||
} else {
|
} else {
|
||||||
MPI_Recv(&num_my_edges, 1, MPI_INT, 0, TAG_SEND_NUM_EDGES, MPI_COMM_WORLD,
|
MPI_Recv(&num_my_edges, 1, MPI_INT, 0, TAG_SEND_NUM_EDGES, MPI::COMM_WORLD,
|
||||||
NULL);
|
NULL);
|
||||||
my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
|
my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
|
||||||
MPI_Recv(my_edges, num_my_edges, IntPairType, 0, TAG_SEND_EDGES,
|
MPI_Recv(my_edges, num_my_edges, IntPairType, 0, TAG_SEND_EDGES,
|
||||||
MPI_COMM_WORLD, NULL);
|
MPI::COMM_WORLD, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
char *buf = (char *)calloc(sizeof(char), 1000);
|
||||||
|
int offset = 0; // Keep track of the current position in the buffer
|
||||||
|
for (int i = 0; i < std::min(num_my_edges, 5); i++) {
|
||||||
|
offset +=
|
||||||
|
sprintf(buf + offset, "(%d, %d)", my_edges[i].fst, my_edges[i].snd);
|
||||||
|
if (i < len - 1) {
|
||||||
|
// Add a separator (e.g., comma or space) if it's not the last
|
||||||
|
offset += sprintf(buf + offset, " ");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
|
@ -162,25 +159,19 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
if (rank == 0)
|
|
||||||
printf("Params: p=%d, |E|=%d, |V|=%d\n", p, total_num_nodes,
|
|
||||||
total_num_edges);
|
|
||||||
|
|
||||||
// STEP 2 TIMER STARTS HERE
|
// STEP 2 TIMER STARTS HERE
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI::COMM_WORLD.Barrier();
|
||||||
double step_2_start_time;
|
double step_2_start_time = MPI::Wtime();
|
||||||
if (rank == 0)
|
|
||||||
step_2_start_time = MPI_Wtime();
|
|
||||||
|
|
||||||
// Each process analyzes the non-local edges that are contained in its portion
|
// Each process analyzes the non-local edges that are contained in its portion
|
||||||
// of the graph.
|
// of the graph.
|
||||||
#pragma region
|
#pragma region
|
||||||
int node_label_assignment_vec[num_my_nodes];
|
std::map<int, int> node_label_assignment;
|
||||||
pair my_node_range = node_ranges[rank];
|
std::pair<int, int> my_node_range = node_range(rank);
|
||||||
|
|
||||||
// Initial node assignment
|
// Initial node assignment
|
||||||
for (int idx = 0; idx < num_my_nodes; ++idx) {
|
for (int i = my_node_range.first; i < my_node_range.second; ++i) {
|
||||||
node_label_assignment_vec[idx] = my_node_range.fst + idx;
|
node_label_assignment[i] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<int, std::set<int>> adj;
|
std::map<int, std::set<int>> adj;
|
||||||
|
@ -191,12 +182,12 @@ int main(int argc, char **argv) {
|
||||||
pair edge = my_edges[i];
|
pair edge = my_edges[i];
|
||||||
adj[edge.fst].insert(edge.snd);
|
adj[edge.fst].insert(edge.snd);
|
||||||
|
|
||||||
if (!(my_node_range.fst <= edge.fst && edge.fst < my_node_range.snd)) {
|
if (!(my_node_range.first <= edge.fst && edge.fst < my_node_range.second)) {
|
||||||
non_local_nodes.insert(edge.fst);
|
non_local_nodes.insert(edge.fst);
|
||||||
non_local_edges.insert(std::make_pair(edge.snd, edge.fst));
|
non_local_edges.insert(std::make_pair(edge.snd, edge.fst));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(my_node_range.fst <= edge.snd && edge.snd < my_node_range.snd)) {
|
if (!(my_node_range.first <= edge.snd && edge.snd < my_node_range.second)) {
|
||||||
non_local_nodes.insert(edge.snd);
|
non_local_nodes.insert(edge.snd);
|
||||||
non_local_edges.insert(std::make_pair(edge.fst, edge.snd));
|
non_local_edges.insert(std::make_pair(edge.fst, edge.snd));
|
||||||
}
|
}
|
||||||
|
@ -212,105 +203,87 @@ int main(int argc, char **argv) {
|
||||||
for (auto entry : non_local_edges) {
|
for (auto entry : non_local_edges) {
|
||||||
int local_node = entry.first, remote_node = entry.second;
|
int local_node = entry.first, remote_node = entry.second;
|
||||||
|
|
||||||
int remote_process = remote_node / each_num_nodes;
|
int corresponding_process = remote_node / each_num_nodes;
|
||||||
// The last process gets some extra nodes
|
// The last process gets some extra nodes
|
||||||
if (remote_process >= p)
|
if (corresponding_process >= p)
|
||||||
remote_process = p - 1;
|
corresponding_process = p - 1;
|
||||||
|
|
||||||
send_map[remote_process].insert(local_node);
|
send_map[corresponding_process].insert(local_node);
|
||||||
recv_map[remote_process].insert(remote_node);
|
recv_map[corresponding_process].insert(remote_node);
|
||||||
}
|
}
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
// All the processes are communicating to figure out which process needs to
|
// All the processes are communicating to figure out which process needs to
|
||||||
// send what data to the other processes.
|
// send what data to the other processes.
|
||||||
#pragma region
|
#pragma region
|
||||||
|
// Nothing needs to be done here, I'm using the fact that everything is sent
|
||||||
|
// in sorted order to ensure that both sides are referring to the same thing
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
// STEP 5 TIMER STARTS HERE
|
// STEP 5 TIMER STARTS HERE
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI::COMM_WORLD.Barrier();
|
||||||
double step_5_start_time;
|
double step_5_start_time = MPI::Wtime();
|
||||||
if (rank == 0) {
|
|
||||||
step_5_start_time = MPI_Wtime();
|
|
||||||
}
|
|
||||||
|
|
||||||
// The processes perform the transfers of non-local labels and updates of
|
// The processes perform the transfers of non-local labels and updates of
|
||||||
// local labels until convergence.
|
// local labels until convergence.
|
||||||
#pragma region
|
#pragma region
|
||||||
while (true) {
|
while (true) {
|
||||||
// First, exchange the data that needs to be exchanged
|
// First, exchange the data that needs to be exchanged
|
||||||
int sendbuf[num_my_nodes];
|
std::vector<int> sendbuf;
|
||||||
int send_counts[p];
|
std::vector<int> send_counts;
|
||||||
int send_displs[p];
|
std::vector<int> send_displs;
|
||||||
int recv_counts[p];
|
std::vector<int> recv_counts;
|
||||||
int recv_displs[p];
|
std::vector<int> recv_displs;
|
||||||
std::map<int, int> remote_labels;
|
|
||||||
|
|
||||||
if (p > 1) {
|
|
||||||
|
|
||||||
int recv_total;
|
int recv_total;
|
||||||
{
|
{
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for (int i = 0; i < p; ++i) {
|
for (int i = 0; i < p; ++i) {
|
||||||
int count = send_map[i].size();
|
int count = send_map[i].size();
|
||||||
for (auto local_node : send_map[i]) {
|
// std::sort(send_map[i].begin(), send_map[i].end());
|
||||||
sendbuf[offset + local_node - my_node_range.fst] =
|
for (auto k : send_map[i]) {
|
||||||
node_label_assignment_vec[local_node - my_node_range.fst];
|
sendbuf.push_back(node_label_assignment[k]);
|
||||||
}
|
}
|
||||||
send_counts[i] = count;
|
send_counts.push_back(count);
|
||||||
send_displs[i] = offset;
|
send_displs.push_back(offset);
|
||||||
offset += count;
|
offset += count;
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
for (int i = 0; i < p; ++i) {
|
for (int i = 0; i < p; ++i) {
|
||||||
int count = recv_map[i].size();
|
int count = recv_map[i].size();
|
||||||
recv_counts[i] = count;
|
// std::sort(recv_map[i].begin(), recv_map[i].end());
|
||||||
recv_displs[i] = offset;
|
recv_counts.push_back(count);
|
||||||
|
recv_displs.push_back(offset);
|
||||||
offset += count;
|
offset += count;
|
||||||
}
|
}
|
||||||
recv_total = offset;
|
recv_total = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
int recvbuf[recv_total];
|
std::vector<int> recvbuf(recv_total, 0);
|
||||||
MPI_Alltoallv(sendbuf, send_counts, send_displs, MPI_INT, recvbuf,
|
MPI::COMM_WORLD.Alltoallv(sendbuf.data(), send_counts.data(),
|
||||||
recv_counts, recv_displs, MPI_INT, MPI_COMM_WORLD);
|
send_displs.data(), MPI_INT, recvbuf.data(),
|
||||||
|
recv_counts.data(), recv_displs.data(), MPI_INT);
|
||||||
|
|
||||||
// Cache efficiently
|
std::map<int, int> total_node_label_assignment(node_label_assignment);
|
||||||
for (int i = 0; i < p; ++i) {
|
for (int i = 0; i < p; ++i) {
|
||||||
std::vector<int> processor_nodes(recv_map[i].begin(),
|
std::vector<int> ouais(recv_map[i].begin(), recv_map[i].end());
|
||||||
recv_map[i].end());
|
|
||||||
for (int j = 0; j < recv_counts[i]; ++j) {
|
for (int j = 0; j < recv_counts[i]; ++j) {
|
||||||
int remote_node = processor_nodes[j];
|
int remote_node = ouais[j];
|
||||||
int remote_value = recvbuf[recv_displs[i] + j];
|
int remote_value = recvbuf[recv_displs[i] + j];
|
||||||
remote_labels[remote_node] = remote_value;
|
total_node_label_assignment[remote_node] = remote_value;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For each local node, determine the minimum label out of its neighbors
|
// For each local node, determine the minimum label out of its neighbors
|
||||||
std::map<int, int> new_labels;
|
std::map<int, int> new_labels;
|
||||||
for (int i = 0; i < num_my_nodes; ++i) {
|
for (int i = my_node_range.first; i < my_node_range.second; ++i) {
|
||||||
int node = my_node_range.fst + i;
|
int current_value = total_node_label_assignment[i];
|
||||||
|
|
||||||
// int current_value = total_node_label_assignment[i];
|
|
||||||
int current_value = node_label_assignment_vec[i];
|
|
||||||
int min = current_value;
|
int min = current_value;
|
||||||
|
|
||||||
for (auto neighbor : adj[node]) {
|
for (auto neighbor : adj[i]) {
|
||||||
int neighbor_value;
|
if (total_node_label_assignment[neighbor] < min)
|
||||||
if (my_node_range.fst <= neighbor && neighbor < my_node_range.snd) {
|
min = total_node_label_assignment[neighbor];
|
||||||
neighbor_value =
|
|
||||||
node_label_assignment_vec[neighbor - my_node_range.fst];
|
|
||||||
} else {
|
|
||||||
neighbor_value = remote_labels[neighbor];
|
|
||||||
}
|
|
||||||
|
|
||||||
// = lookup_assignment(
|
|
||||||
// node_label_assignment_vec, my_node_range, recv_map,
|
|
||||||
// recvbuf.data(), recv_counts.data(), recv_displs.data(),
|
|
||||||
// each_num_nodes, rank, neighbor);
|
|
||||||
min = MIN(min, neighbor_value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min < current_value) {
|
if (min < current_value) {
|
||||||
|
@ -321,8 +294,8 @@ int main(int argc, char **argv) {
|
||||||
// Have there been any changes in the labels?
|
// Have there been any changes in the labels?
|
||||||
int num_changes = new_labels.size();
|
int num_changes = new_labels.size();
|
||||||
int total_changes;
|
int total_changes;
|
||||||
MPI_Allreduce(&num_changes, &total_changes, 1, MPI_INT, MPI_SUM,
|
MPI::COMM_WORLD.Allreduce(&num_changes, &total_changes, 1, MPI_INT,
|
||||||
MPI_COMM_WORLD);
|
MPI::SUM);
|
||||||
|
|
||||||
if (total_changes == 0) {
|
if (total_changes == 0) {
|
||||||
break;
|
break;
|
||||||
|
@ -330,19 +303,14 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
// Update the original node assignment
|
// Update the original node assignment
|
||||||
for (auto entry : new_labels) {
|
for (auto entry : new_labels) {
|
||||||
node_label_assignment_vec[entry.first] = entry.second;
|
node_label_assignment[entry.first] = entry.second;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rank == 0)
|
|
||||||
printf("total changes: %d\n", total_changes);
|
|
||||||
}
|
}
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
// END TIMERS
|
// END TIMERS
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI::COMM_WORLD.Barrier();
|
||||||
double end_time;
|
double end_time = MPI::Wtime();
|
||||||
if (rank == 0)
|
|
||||||
end_time = MPI_Wtime();
|
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
printf("2-5 Time: %0.04fs\n", end_time - step_2_start_time);
|
printf("2-5 Time: %0.04fs\n", end_time - step_2_start_time);
|
||||||
|
@ -353,36 +321,39 @@ int main(int argc, char **argv) {
|
||||||
// disk.
|
// disk.
|
||||||
#pragma region
|
#pragma region
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
FILE *fp = fopen(argv[2], "w");
|
std::vector<int> all_assignments(total_num_nodes);
|
||||||
std::map<int, int> label_count;
|
// std::map<int, int> label_count;
|
||||||
for (int process_idx = 0; process_idx < p; ++process_idx) {
|
int ctr = 0;
|
||||||
pair this_node_range = node_ranges[process_idx];
|
for (int i = 0; i < p; ++i) {
|
||||||
int count = this_node_range.snd - this_node_range.fst;
|
std::pair<int, int> this_node_range = node_range(i);
|
||||||
if (process_idx == 0) {
|
int count = this_node_range.second - this_node_range.first;
|
||||||
|
if (i == 0) {
|
||||||
for (int j = 0; j < count; ++j) {
|
for (int j = 0; j < count; ++j) {
|
||||||
fprintf(fp, "%d\n", node_label_assignment_vec[j]);
|
all_assignments[this_node_range.first + j] =
|
||||||
label_count[node_label_assignment_vec[j]]++;
|
node_label_assignment[this_node_range.first + j];
|
||||||
|
// label_count[all_assignments[this_node_range.first + j]]++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int recvbuf[count];
|
MPI::COMM_WORLD.Recv(&all_assignments[this_node_range.first], count,
|
||||||
MPI_Recv(&recvbuf, count, MPI_INT, process_idx, TAG_SEND_FINAL_RESULT,
|
MPI::INT, i, TAG_SEND_FINAL_RESULT);
|
||||||
MPI_COMM_WORLD, NULL);
|
// for (int j = 0; j < count; ++j) {
|
||||||
for (int j = 0; j < count; ++j) {
|
// label_count[all_assignments[this_node_range.first + j]]++;
|
||||||
fprintf(fp, "%d\n", recvbuf[j]);
|
// }
|
||||||
label_count[recvbuf[j]]++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%d\n", label_count.size());
|
// std::cout << "Done! " << label_count.size() << std::endl;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
MPI_Send(node_label_assignment_vec, num_my_nodes, MPI_INT, 0,
|
std::vector<int> flat_assignments;
|
||||||
TAG_SEND_FINAL_RESULT, MPI_COMM_WORLD);
|
for (int i = my_node_range.first; i < my_node_range.second; ++i) {
|
||||||
|
flat_assignments.push_back(node_label_assignment[i]);
|
||||||
|
}
|
||||||
|
MPI::COMM_WORLD.Send(flat_assignments.data(), flat_assignments.size(),
|
||||||
|
MPI::INT, 0, TAG_SEND_FINAL_RESULT);
|
||||||
}
|
}
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
MPI_Finalize();
|
MPI::Finalize();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -423,58 +394,3 @@ void pair_vector_push(struct pair_vector *v, int fst, int snd) {
|
||||||
v->ptr[v->len].snd = snd;
|
v->ptr[v->len].snd = snd;
|
||||||
v->len++;
|
v->len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
pair compute_node_range(int p, int total_num_nodes, int each_num_nodes,
|
|
||||||
int process) {
|
|
||||||
int start = process * each_num_nodes;
|
|
||||||
int end = process == p - 1 ? total_num_nodes : start + each_num_nodes;
|
|
||||||
return {.fst = start, .snd = end};
|
|
||||||
}
|
|
||||||
|
|
||||||
int lookup_assignment(int *base_node_assignment, pair my_node_range,
|
|
||||||
std::map<int, std::set<int>> recv_map, int *recvbuf,
|
|
||||||
int *recv_counts, int *recv_displs, int each_num_nodes,
|
|
||||||
int rank, int node_number) {
|
|
||||||
int process_from = node_number / each_num_nodes;
|
|
||||||
|
|
||||||
// Just return from local if local
|
|
||||||
if (process_from == rank)
|
|
||||||
return base_node_assignment[node_number - my_node_range.fst];
|
|
||||||
|
|
||||||
int count = recv_counts[process_from];
|
|
||||||
int displs = recv_displs[process_from];
|
|
||||||
|
|
||||||
// Determine what index this node is
|
|
||||||
int index = -1, ctr = 0;
|
|
||||||
std::vector<int> inner(recv_map[process_from].begin(),
|
|
||||||
recv_map[process_from].end());
|
|
||||||
|
|
||||||
{
|
|
||||||
// Use binary search...
|
|
||||||
int lo = 0, hi = count;
|
|
||||||
while (lo < hi) {
|
|
||||||
int mid = (lo + hi) / 2;
|
|
||||||
int midk = inner[mid];
|
|
||||||
if (node_number < midk)
|
|
||||||
hi = mid;
|
|
||||||
else if (node_number > midk)
|
|
||||||
lo = mid;
|
|
||||||
else {
|
|
||||||
index = mid;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for (int i = 0; i < count; ++i) {
|
|
||||||
// int remote_node = inner[i];
|
|
||||||
// if (node_number == remote_node) {
|
|
||||||
// index = ctr;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
// ctr++;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Pull the corresponding value from the map
|
|
||||||
return recvbuf[recv_displs[process_from] + index];
|
|
||||||
}
|
|
||||||
|
|
33
assignments/03/process.py
Normal file
33
assignments/03/process.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
WTF = re.compile(r".*: (\d+),.*dataset/(\d+).txt")
|
||||||
|
|
||||||
|
by_size = dict()
|
||||||
|
with open("stdout.txt") as f:
|
||||||
|
while True:
|
||||||
|
line1 = f.readline().strip()
|
||||||
|
if not line1: break
|
||||||
|
m = WTF.match(line1)
|
||||||
|
processors = int(m.group(1))
|
||||||
|
size = int(m.group(2))
|
||||||
|
|
||||||
|
if size not in by_size: by_size[size] = dict()
|
||||||
|
|
||||||
|
line2 = f.readline().strip()
|
||||||
|
line3 = f.readline().strip()
|
||||||
|
|
||||||
|
time2 = line2.split(": ")[1]
|
||||||
|
time5 = line3.split(": ")[1]
|
||||||
|
|
||||||
|
if processors not in by_size[size]: by_size[size][processors] = (time2, time5)
|
||||||
|
|
||||||
|
print("#table(")
|
||||||
|
print(" columns: (auto, auto, auto, auto, auto, auto),")
|
||||||
|
columns = [1, 2, 4, 8, 16]
|
||||||
|
print(" [], ", ", ".join(map(lambda c: f"[{c}]", columns)), ",")
|
||||||
|
for size, entries in sorted(by_size.items()):
|
||||||
|
print(f" [{size}],")
|
||||||
|
for processors, (time2, time5) in sorted(entries.items()):
|
||||||
|
print(f" [{time2} #linebreak() {time5}],", end = None)
|
||||||
|
print()
|
||||||
|
print(")")
|
|
@ -13,5 +13,67 @@ I exchanged data using the unstructured communication approach, doing an
|
||||||
all-to-all transfer.
|
all-to-all transfer.
|
||||||
|
|
||||||
To read the result efficiently, I tried using the approach given in the slides.
|
To read the result efficiently, I tried using the approach given in the slides.
|
||||||
However, this was taking a long time (up to 45 seconds for the 10,000 case) and
|
I also tried to use binary search since this would yield $log(n)$ time.
|
||||||
I tried using STL's `std::map`. This proved to be orders of magnitude faster
|
However, this was taking a long time (up to 45 seconds for the 10,000 case), and
|
||||||
|
it was the bottleneck. Using STL's `std::map` proved to be orders of magnitude
|
||||||
|
faster.
|
||||||
|
|
||||||
|
== Other remarks
|
||||||
|
|
||||||
|
On the original example dataset, it poorly using larger numbers. I have an
|
||||||
|
explanation for this after looking at the performance characteristics of the
|
||||||
|
run: it completes in one iteration where every single edge is assigned. The data
|
||||||
|
distribution also indicates that almost everything is connected into the first
|
||||||
|
node, which isn't balanced.
|
||||||
|
|
||||||
|
I've written a generation script in Python using the `igraph` library.
|
||||||
|
|
||||||
|
- 1,000: 93 components
|
||||||
|
- 10,000: 947 components
|
||||||
|
- 100,000: 9,423 components
|
||||||
|
- 1,000,000: 92,880 components
|
||||||
|
|
||||||
|
Using this data, I was able to achieve much better speedup. I didn't attach the
|
||||||
|
actual data files but they can be generated with the same script (seeded for
|
||||||
|
reproducibility).
|
||||||
|
|
||||||
|
*NOTE:* I noticed that afterwards, the data was changed again, with a more balanced graph this time.
|
||||||
|
So the numbers will not reflect the poorer performance.
|
||||||
|
|
||||||
|
== Timing on example dataset
|
||||||
|
|
||||||
|
This experiment was performed on CSELabs by using my bench script, and the table
|
||||||
|
was generated with another script.
|
||||||
|
|
||||||
|
#table(
|
||||||
|
columns: (auto, auto, auto, auto, auto, auto),
|
||||||
|
[], [1], [2], [4], [8], [16] ,
|
||||||
|
[1000],
|
||||||
|
[0.0249s #linebreak() 0.0151s],
|
||||||
|
[0.0234s #linebreak() 0.0122s],
|
||||||
|
[0.0206s #linebreak() 0.0099s],
|
||||||
|
[0.0491s #linebreak() 0.0248s],
|
||||||
|
[0.0177s #linebreak() 0.0106s],
|
||||||
|
|
||||||
|
[10000],
|
||||||
|
[0.2929s #linebreak() 0.1830s],
|
||||||
|
[0.2933s #linebreak() 0.1540s],
|
||||||
|
[0.2457s #linebreak() 0.1178s],
|
||||||
|
[0.3793s #linebreak() 0.1328s],
|
||||||
|
[0.2473s #linebreak() 0.1197s],
|
||||||
|
|
||||||
|
[100000],
|
||||||
|
[3.7888s #linebreak() 2.4881s],
|
||||||
|
[3.7592s #linebreak() 2.0212s],
|
||||||
|
[3.3819s #linebreak() 1.6036s],
|
||||||
|
[2.9485s #linebreak() 1.3954s],
|
||||||
|
[2.8593s #linebreak() 1.3107s],
|
||||||
|
|
||||||
|
[1000000],
|
||||||
|
[46.7895s #linebreak() 31.9648s],
|
||||||
|
[45.2284s #linebreak() 24.8540s],
|
||||||
|
[40.3994s #linebreak() 20.2851s],
|
||||||
|
[36.9628s #linebreak() 17.6794s],
|
||||||
|
[35.7110s #linebreak() 16.6276s],
|
||||||
|
|
||||||
|
)
|
Loading…
Reference in a new issue