consistent results now
This commit is contained in:
parent
0619d79caa
commit
5fe38262c5
5 changed files with 94 additions and 34 deletions
3
assignments/03/.gitignore
vendored
3
assignments/03/.gitignore
vendored
|
@ -2,4 +2,5 @@ lpa
|
|||
compile_commands.json
|
||||
.cache
|
||||
report.pdf
|
||||
*.tar.gz
|
||||
*.tar.gz
|
||||
out.txt
|
|
@ -1,7 +1,7 @@
|
|||
.PHONY: run clean
|
||||
|
||||
# CFLAGS += -O3
|
||||
CFLAGS += -DFMT_HEADER_ONLY -g
|
||||
CFLAGS += -O3
|
||||
# CFLAGS += -DFMT_HEADER_ONLY -g
|
||||
# LDFLAGS += $(shell pkg-config --libs fmt)
|
||||
|
||||
lpa: lpa.cpp Makefile
|
||||
|
|
8
assignments/03/bench.sh
Executable file
8
assignments/03/bench.sh
Executable file
|
@ -0,0 +1,8 @@
|
|||
for dataset in $(echo "1000.txt" "10000.txt" "1000000.txt" "1000000.txt"); do
|
||||
for processors in $(echo 1 2 4 8 16 | tr ' ' '\n'); do
|
||||
# file="dataset/both_$dataset"
|
||||
file="/export/scratch/CSCI5451_F23/assignment-3/dataset/$dataset"
|
||||
echo $processors $file;
|
||||
mpirun -n $processors ./lpa $file >> out.txt
|
||||
done
|
||||
done
|
|
@ -97,6 +97,7 @@ int main(int argc, char **argv) {
|
|||
int num_my_edges;
|
||||
pair *my_edges;
|
||||
int counts[p], displs[p];
|
||||
|
||||
if (rank == 0) {
|
||||
line = NULL;
|
||||
// pair all_edges[total_num_edges];
|
||||
|
@ -140,10 +141,16 @@ int main(int argc, char **argv) {
|
|||
|
||||
// We have to send the last one again here, since it didn't get caught in
|
||||
// the loop above
|
||||
MPI_Send(&edge_counter, 1, MPI_INT, current_process, TAG_SEND_NUM_EDGES,
|
||||
MPI_COMM_WORLD);
|
||||
MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
|
||||
TAG_SEND_EDGES, MPI_COMM_WORLD);
|
||||
if (current_process == 0) {
|
||||
num_my_edges = edge_counter;
|
||||
my_edges = (pair *)calloc(num_my_edges, sizeof(pair));
|
||||
memcpy(my_edges, all_edges.ptr, edge_counter * sizeof(pair));
|
||||
} else {
|
||||
MPI_Send(&edge_counter, 1, MPI_INT, current_process, TAG_SEND_NUM_EDGES,
|
||||
MPI_COMM_WORLD);
|
||||
MPI_Send(all_edges.ptr, edge_counter, IntPairType, current_process,
|
||||
TAG_SEND_EDGES, MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
free(all_edges.ptr);
|
||||
} else {
|
||||
|
@ -161,6 +168,10 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
#pragma endregion
|
||||
|
||||
if (rank == 0)
|
||||
printf("Params: p=%d, |E|=%d, |V|=%d\n", p, total_num_nodes,
|
||||
total_num_edges);
|
||||
|
||||
// STEP 2 TIMER STARTS HERE
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
double step_2_start_time;
|
||||
|
@ -228,7 +239,6 @@ int main(int argc, char **argv) {
|
|||
double step_5_start_time;
|
||||
if (rank == 0) {
|
||||
step_5_start_time = MPI_Wtime();
|
||||
printf("STARTING STEP 5: %0.04fs\n", step_5_start_time - step_2_start_time);
|
||||
}
|
||||
|
||||
// The processes perform the transfers of non-local labels and updates of
|
||||
|
@ -241,36 +251,52 @@ int main(int argc, char **argv) {
|
|||
std::vector<int> send_displs;
|
||||
std::vector<int> recv_counts;
|
||||
std::vector<int> recv_displs;
|
||||
std::vector<int> recvbuf;
|
||||
std::map<int, int> remote_labels;
|
||||
|
||||
int recv_total;
|
||||
{
|
||||
int offset = 0;
|
||||
for (int i = 0; i < p; ++i) {
|
||||
int count = send_map[i].size();
|
||||
for (auto local_node : send_map[i]) {
|
||||
sendbuf.push_back(
|
||||
node_label_assignment_vec[local_node - my_node_range.fst]);
|
||||
if (p > 1) {
|
||||
|
||||
int recv_total;
|
||||
{
|
||||
int offset = 0;
|
||||
for (int i = 0; i < p; ++i) {
|
||||
int count = send_map[i].size();
|
||||
for (auto local_node : send_map[i]) {
|
||||
sendbuf.push_back(
|
||||
node_label_assignment_vec[local_node - my_node_range.fst]);
|
||||
}
|
||||
send_counts.push_back(count);
|
||||
send_displs.push_back(offset);
|
||||
offset += count;
|
||||
}
|
||||
send_counts.push_back(count);
|
||||
send_displs.push_back(offset);
|
||||
offset += count;
|
||||
|
||||
offset = 0;
|
||||
for (int i = 0; i < p; ++i) {
|
||||
int count = recv_map[i].size();
|
||||
recv_counts.push_back(count);
|
||||
recv_displs.push_back(offset);
|
||||
offset += count;
|
||||
}
|
||||
recv_total = offset;
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
recvbuf = std::vector<int>(recv_total, 0);
|
||||
MPI_Alltoallv(sendbuf.data(), send_counts.data(), send_displs.data(),
|
||||
MPI_INT, recvbuf.data(), recv_counts.data(),
|
||||
recv_displs.data(), MPI_INT, MPI_COMM_WORLD);
|
||||
|
||||
// Cache efficiently
|
||||
for (int i = 0; i < p; ++i) {
|
||||
int count = recv_map[i].size();
|
||||
recv_counts.push_back(count);
|
||||
recv_displs.push_back(offset);
|
||||
offset += count;
|
||||
std::vector<int> processor_nodes(recv_map[i].begin(),
|
||||
recv_map[i].end());
|
||||
for (int j = 0; j < recv_counts[i]; ++j) {
|
||||
int remote_node = processor_nodes[j];
|
||||
int remote_value = recvbuf[recv_displs[i] + j];
|
||||
remote_labels[remote_node] = remote_value;
|
||||
}
|
||||
}
|
||||
recv_total = offset;
|
||||
}
|
||||
|
||||
std::vector<int> recvbuf(recv_total, 0);
|
||||
MPI_Alltoallv(sendbuf.data(), send_counts.data(), send_displs.data(),
|
||||
MPI_INT, recvbuf.data(), recv_counts.data(),
|
||||
recv_displs.data(), MPI_INT, MPI_COMM_WORLD);
|
||||
|
||||
// For each local node, determine the minimum label out of its neighbors
|
||||
std::map<int, int> new_labels;
|
||||
for (int i = 0; i < num_my_nodes; ++i) {
|
||||
|
@ -281,10 +307,18 @@ int main(int argc, char **argv) {
|
|||
int min = current_value;
|
||||
|
||||
for (auto neighbor : adj[node]) {
|
||||
int neighbor_value = lookup_assignment(
|
||||
node_label_assignment_vec, my_node_range, recv_map, recvbuf.data(),
|
||||
recv_counts.data(), recv_displs.data(), each_num_nodes, rank,
|
||||
neighbor);
|
||||
int neighbor_value;
|
||||
if (my_node_range.fst <= neighbor && neighbor < my_node_range.snd) {
|
||||
neighbor_value =
|
||||
node_label_assignment_vec[neighbor - my_node_range.fst];
|
||||
} else {
|
||||
neighbor_value = remote_labels[neighbor];
|
||||
}
|
||||
|
||||
// = lookup_assignment(
|
||||
// node_label_assignment_vec, my_node_range, recv_map,
|
||||
// recvbuf.data(), recv_counts.data(), recv_displs.data(),
|
||||
// each_num_nodes, rank, neighbor);
|
||||
min = MIN(min, neighbor_value);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
== Step 2-4
|
||||
|
||||
For steps 2-4, I calculated all of each process' outgoing nodes, sorted it in
|
||||
order and used its sorted position as a way to identify which nodes are being
|
||||
sent.
|
||||
|
||||
This saves an extra communication and lets me index the same items for each
|
||||
loop.
|
||||
|
||||
== Step 5
|
||||
|
||||
I exchanged data using the unstructured communication approach, doing an
|
||||
all-to-all transfer.
|
||||
|
||||
To read the result efficiently, I tried using the approach given in the slides.
|
||||
However, this was taking a long time (up to 45 seconds for the 10,000 case) and
|
||||
I tried using STL's `std::map`. This proved to be orders of magnitude faster
|
Loading…
Reference in a new issue