Question: Hello, I am new to Parallel Programming. I am trying to implement pthreads for a sequential program in C. The program is Kmeans and I

Hello,

I am new to Parallel Programming. I am trying to implement pthreads for a sequential program in C. The program is Kmeans and I am trying to know the parts, which can be parallelized, The Sequentioacode is below.

#include

#include

#include

#include

#include

#define MAX_POINTS 4096

#define MAX_CLUSTERS 32

typedef struct point

{

float x; // The x-coordinate of the point

float y; // The y-coordinate of the point

int cluster; // The cluster that the point belongs to

} point;

int N; // number of entries in the data

int k; // number of centroids

point data[MAX_POINTS]; // Data coordinates

point cluster[MAX_CLUSTERS]; // The coordinates of each cluster center (also called centroid)

void read_data()

{

N = 1797;

k = 9;

FILE* fp = fopen("kmeans-data.txt", "r");

if (fp == NULL) {

perror("Cannot open the file");

exit(EXIT_FAILURE);

}

// Initialize points from the data file

float temp;

for (int i = 0; i < N; i++)

{

fscanf(fp, "%f %f", &data[i].x, &data[i].y);

data[i].cluster = -1; // Initialize the cluster number to -1

}

printf("Read the problem data! ");

// Initialize centroids randomly

srand(0); // Setting 0 as the random number generation seed

for (int i = 0; i < k; i++)

{

int r = rand() % N;

cluster[i].x = data[r].x;

cluster[i].y = data[r].y;

}

fclose(fp);

}

int get_closest_centroid(int i, int k)

{

/* find the nearest centroid */

int nearest_cluster = -1;

double xdist, ydist, dist, min_dist;

min_dist = dist = INT_MAX;

for (int c = 0; c < k; c++)

{ // For each centroid

// Calculate the square of the Euclidean distance between that centroid and the point

xdist = data[i].x - cluster[c].x;

ydist = data[i].y - cluster[c].y;

dist = xdist * xdist + ydist * ydist; // The square of Euclidean distance

//printf("%.2lf ", dist);

if (dist <= min_dist)

{

min_dist = dist;

nearest_cluster = c;

}

}

//printf("----------- ");

return nearest_cluster;

}

bool assign_clusters_to_points()

{

bool something_changed = false;

int old_cluster = -1, new_cluster = -1;

for (int i = 0; i < N; i++)

{ // For each data point

old_cluster = data[i].cluster;

new_cluster = get_closest_centroid(i, k);

data[i].cluster = new_cluster; // Assign a cluster to the point i

if (old_cluster != new_cluster)

{

something_changed = true;

}

}

return something_changed;

}

void update_cluster_centers()

{

/* Update the cluster centers */

int c;

int count[MAX_CLUSTERS] = { 0 }; // Array to keep track of the number of points in each cluster

point temp[MAX_CLUSTERS] = { 0.0 };

for (int i = 0; i < N; i++)

{

c = data[i].cluster;

count[c]++;

temp[c].x += data[i].x;

temp[c].y += data[i].y;

}

for (int i = 0; i < k; i++)

{

cluster[i].x = temp[i].x / count[i];

cluster[i].y = temp[i].y / count[i];

}

}

int kmeans(int k)

{

bool somechange;

int iter = 0;

do {

iter++; // Keep track of number of iterations

somechange = assign_clusters_to_points();

update_cluster_centers();

} while (somechange);

printf("Number of iterations taken = %d ", iter);

printf("Computed cluster numbers successfully! ");

}

void write_results()

{

FILE* fp = fopen("kmeans-results.txt", "w");

if (fp == NULL) {

perror("Cannot open the file");

exit(EXIT_FAILURE);

}

else

{

for (int i = 0; i < N; i++)

{

fprintf(fp, "%.2f %.2f %d ", data[i].x, data[i].y, data[i].cluster);

}

}

printf("Wrote the results to a file! ");

}

int main()

{

read_data();

kmeans(k);

write_results();

}

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!