diff --git a/assignments/04/km_cuda.cu b/assignments/04/km_cuda.cu index 938e9f7..2cd0c23 100644 --- a/assignments/04/km_cuda.cu +++ b/assignments/04/km_cuda.cu @@ -112,7 +112,7 @@ int main(int argc, char **argv) { // Allocate memory on the GPU CUDACHECK( cudaMalloc((void **)¢roids, num_clusters * dim * sizeof(float))); - CUDACHECK(cudaMalloc((void **)&clusterMap, N * sizeof(int))); + CUDACHECK(cudaMallocManaged((void **)&clusterMap, N * sizeof(int))); CUDACHECK(cudaMallocManaged((void **)&clusterCount, num_clusters * sizeof(unsigned int))); CUDACHECK(cudaMalloc((void **)&data, N * dim * sizeof(float))); @@ -170,7 +170,7 @@ int main(int argc, char **argv) { printf("Is dirty: %d\n", *dirtyBit); #pragma endregion -#pragma region +#pragma region Iteration int it = 0; while (*dirtyBit) { printf("Iteration %d (dirty=%d)\n", it, *dirtyBit); @@ -205,5 +205,31 @@ int main(int argc, char **argv) { } #pragma endregion +#pragma region + { + FILE *fp = fopen("clusters.txt", "w"); + for (int i = 0; i < N; ++i) + fprintf(fp, "%d\n", clusterMap[i]); + fclose(fp); + } + + { + FILE *fp = fopen("centroids.txt", "w"); + fprintf(fp, "%d %d\n", num_clusters, dim); + float *line = (float *)malloc(dim * sizeof(float)); + for (int i = 0; i < num_clusters; ++i) { + CUDACHECK(cudaMemcpy(line, ¢roids[i * dim], dim * sizeof(float), + cudaMemcpyDeviceToHost)); + for (int d = 0; d < dim; ++d) + fprintf(fp, "%.3f ", line[d]); + fprintf(fp, "\n"); + } + free(line); + fclose(fp); + } + + printf("Done.\n"); +#pragma endregion + return 0; }