csci5451/assignments/04/km_cuda.cu

106 lines
2.9 KiB
Text
Raw Normal View History

2023-12-10 21:40:31 +00:00
// #define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#define CUDACHECK(err) \
do { \
cuda_check((err), __FILE__, __LINE__); \
} while (false)
inline void cuda_check(cudaError_t error_code, const char *file, int line) {
if (error_code != cudaSuccess) {
fprintf(stderr, "CUDA Error %d: %s. In file '%s' on line %d\n", error_code,
cudaGetErrorString(error_code), file, line);
fflush(stderr);
exit(error_code);
}
}
__global__ void findDistanceToCentroid(float *centroidDistances, float *data) {
centroidDistances[blockIdx.x] = blockIdx.x;
}
int main(int argc, char **argv) {
int runtimeVersion, driverVersion;
cudaRuntimeGetVersion(&runtimeVersion);
cudaDriverGetVersion(&driverVersion);
printf("Runtime Version: %d, Driver Version: %d\n", runtimeVersion,
driverVersion);
char *data_file = argv[1];
int num_clusters = atoi(argv[2]);
int num_thread_blocks = atoi(argv[3]);
int num_threads_per_block = atoi(argv[4]);
int N, dim;
float *centroids, *data, *centroidDistances;
int *clusterMap;
#pragma region Read in data
{
FILE *fp = fopen(data_file, "r");
// Read first line
size_t n;
char *line = NULL;
if (!getline(&line, &n, fp))
return -1;
sscanf(line, "%d %d", &N, &dim);
free(line);
line = NULL;
// Allocate memory on the GPU
CUDACHECK(
cudaMalloc((void **)&centroids, num_clusters * dim * sizeof(float)));
cudaMalloc((void **)&clusterMap, N * sizeof(int));
cudaMalloc((void **)&data, N * dim * sizeof(float));
cudaMalloc((void **)&centroidDistances, N * num_clusters * sizeof(float));
// Read the rest of the lines
{
// Buffer for copying
int *currentLine = (int *)malloc(dim * sizeof(int));
for (int i = 0; i < N; ++i) {
if (!getline(&line, &n, fp))
return -1;
for (int j = 0; j < dim; ++j)
sscanf(line, "%d", &currentLine[j]);
cudaMemcpy(&data[i * dim], currentLine, dim * sizeof(float),
cudaMemcpyHostToDevice);
}
free(currentLine);
}
printf("Done copying.\n");
fclose(fp);
}
#pragma endregion
#pragma region Select the initial K centroids
{
cudaMemcpy(centroids, data, num_clusters * dim * sizeof(float),
cudaMemcpyDeviceToDevice);
}
#pragma endregion
#pragma region Assign each data point to the closest centroid, \
measured via Euclidean distance.
{
findDistanceToCentroid<<<10, 10>>>(centroidDistances, data);
cudaDeviceSynchronize();
printf("Shiet\n");
float wtf[10];
cudaMemcpy(wtf, centroidDistances, 10 * sizeof(float),
cudaMemcpyDeviceToHost);
for (int i = 0; i < 10; ++i) {
printf("asdf %d %f\n", i, wtf[i]);
}
}
#pragma endregion
return 0;
}