// #define _POSIX_C_SOURCE 200809L #include #define CUDACHECK(err) \ do { \ cuda_check((err), __FILE__, __LINE__); \ } while (false) inline void cuda_check(cudaError_t error_code, const char *file, int line) { if (error_code != cudaSuccess) { fprintf(stderr, "CUDA Error %d: %s. In file '%s' on line %d\n", error_code, cudaGetErrorString(error_code), file, line); fflush(stderr); exit(error_code); } } __global__ void findDistanceToCentroid(float *centroidDistances, float *data) { centroidDistances[blockIdx.x] = blockIdx.x; } int main(int argc, char **argv) { int runtimeVersion, driverVersion; cudaRuntimeGetVersion(&runtimeVersion); cudaDriverGetVersion(&driverVersion); printf("Runtime Version: %d, Driver Version: %d\n", runtimeVersion, driverVersion); char *data_file = argv[1]; int num_clusters = atoi(argv[2]); int num_thread_blocks = atoi(argv[3]); int num_threads_per_block = atoi(argv[4]); int N, dim; float *centroids, *data, *centroidDistances; int *clusterMap; #pragma region Read in data { FILE *fp = fopen(data_file, "r"); // Read first line size_t n; char *line = NULL; if (!getline(&line, &n, fp)) return -1; sscanf(line, "%d %d", &N, &dim); free(line); line = NULL; // Allocate memory on the GPU CUDACHECK( cudaMalloc((void **)¢roids, num_clusters * dim * sizeof(float))); cudaMalloc((void **)&clusterMap, N * sizeof(int)); cudaMalloc((void **)&data, N * dim * sizeof(float)); cudaMalloc((void **)¢roidDistances, N * num_clusters * sizeof(float)); // Read the rest of the lines { // Buffer for copying int *currentLine = (int *)malloc(dim * sizeof(int)); for (int i = 0; i < N; ++i) { if (!getline(&line, &n, fp)) return -1; for (int j = 0; j < dim; ++j) sscanf(line, "%d", ¤tLine[j]); cudaMemcpy(&data[i * dim], currentLine, dim * sizeof(float), cudaMemcpyHostToDevice); } free(currentLine); } printf("Done copying.\n"); fclose(fp); } #pragma endregion #pragma region Select the initial K centroids { cudaMemcpy(centroids, data, num_clusters * dim * sizeof(float), cudaMemcpyDeviceToDevice); } #pragma endregion #pragma region Assign each data point to the closest centroid, \ measured via Euclidean distance. { findDistanceToCentroid<<<10, 10>>>(centroidDistances, data); cudaDeviceSynchronize(); printf("Shiet\n"); float wtf[10]; cudaMemcpy(wtf, centroidDistances, 10 * sizeof(float), cudaMemcpyDeviceToHost); for (int i = 0; i < 10; ++i) { printf("asdf %d %f\n", i, wtf[i]); } } #pragma endregion return 0; }