106 lines
2.9 KiB
Text
106 lines
2.9 KiB
Text
|
// #define _POSIX_C_SOURCE 200809L
|
||
|
#include <stdio.h>
|
||
|
|
||
|
#define CUDACHECK(err) \
|
||
|
do { \
|
||
|
cuda_check((err), __FILE__, __LINE__); \
|
||
|
} while (false)
|
||
|
inline void cuda_check(cudaError_t error_code, const char *file, int line) {
|
||
|
if (error_code != cudaSuccess) {
|
||
|
fprintf(stderr, "CUDA Error %d: %s. In file '%s' on line %d\n", error_code,
|
||
|
cudaGetErrorString(error_code), file, line);
|
||
|
fflush(stderr);
|
||
|
exit(error_code);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__global__ void findDistanceToCentroid(float *centroidDistances, float *data) {
|
||
|
centroidDistances[blockIdx.x] = blockIdx.x;
|
||
|
}
|
||
|
|
||
|
int main(int argc, char **argv) {
|
||
|
int runtimeVersion, driverVersion;
|
||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||
|
cudaDriverGetVersion(&driverVersion);
|
||
|
printf("Runtime Version: %d, Driver Version: %d\n", runtimeVersion,
|
||
|
driverVersion);
|
||
|
|
||
|
char *data_file = argv[1];
|
||
|
int num_clusters = atoi(argv[2]);
|
||
|
int num_thread_blocks = atoi(argv[3]);
|
||
|
int num_threads_per_block = atoi(argv[4]);
|
||
|
|
||
|
int N, dim;
|
||
|
float *centroids, *data, *centroidDistances;
|
||
|
int *clusterMap;
|
||
|
|
||
|
#pragma region Read in data
|
||
|
{
|
||
|
FILE *fp = fopen(data_file, "r");
|
||
|
|
||
|
// Read first line
|
||
|
size_t n;
|
||
|
char *line = NULL;
|
||
|
if (!getline(&line, &n, fp))
|
||
|
return -1;
|
||
|
|
||
|
sscanf(line, "%d %d", &N, &dim);
|
||
|
free(line);
|
||
|
line = NULL;
|
||
|
|
||
|
// Allocate memory on the GPU
|
||
|
CUDACHECK(
|
||
|
cudaMalloc((void **)¢roids, num_clusters * dim * sizeof(float)));
|
||
|
cudaMalloc((void **)&clusterMap, N * sizeof(int));
|
||
|
cudaMalloc((void **)&data, N * dim * sizeof(float));
|
||
|
cudaMalloc((void **)¢roidDistances, N * num_clusters * sizeof(float));
|
||
|
|
||
|
// Read the rest of the lines
|
||
|
{
|
||
|
// Buffer for copying
|
||
|
int *currentLine = (int *)malloc(dim * sizeof(int));
|
||
|
for (int i = 0; i < N; ++i) {
|
||
|
if (!getline(&line, &n, fp))
|
||
|
return -1;
|
||
|
|
||
|
for (int j = 0; j < dim; ++j)
|
||
|
sscanf(line, "%d", ¤tLine[j]);
|
||
|
|
||
|
cudaMemcpy(&data[i * dim], currentLine, dim * sizeof(float),
|
||
|
cudaMemcpyHostToDevice);
|
||
|
}
|
||
|
free(currentLine);
|
||
|
}
|
||
|
|
||
|
printf("Done copying.\n");
|
||
|
|
||
|
fclose(fp);
|
||
|
}
|
||
|
#pragma endregion
|
||
|
|
||
|
#pragma region Select the initial K centroids
|
||
|
{
|
||
|
cudaMemcpy(centroids, data, num_clusters * dim * sizeof(float),
|
||
|
cudaMemcpyDeviceToDevice);
|
||
|
}
|
||
|
#pragma endregion
|
||
|
|
||
|
#pragma region Assign each data point to the closest centroid, \
|
||
|
measured via Euclidean distance.
|
||
|
{
|
||
|
findDistanceToCentroid<<<10, 10>>>(centroidDistances, data);
|
||
|
cudaDeviceSynchronize();
|
||
|
printf("Shiet\n");
|
||
|
|
||
|
float wtf[10];
|
||
|
cudaMemcpy(wtf, centroidDistances, 10 * sizeof(float),
|
||
|
cudaMemcpyDeviceToHost);
|
||
|
for (int i = 0; i < 10; ++i) {
|
||
|
printf("asdf %d %f\n", i, wtf[i]);
|
||
|
}
|
||
|
}
|
||
|
#pragma endregion
|
||
|
|
||
|
return 0;
|
||
|
}
|