Question: Complete the program and also include ouput of the progam: #!/usr/bin/python # This program attend to read data from a csv file, # and apply

Complete the program and also include ouput of the progam:

#!/usr/bin/python

# This program attend to read data from a csv file,

# and apply kmean, then output the result.

from pylab import plot,show

from numpy import vstack,array

from numpy.random import rand

from scipy.cluster.vq import kmeans, vq, whiten

import csv

if __name__ == "__main__":

# clusters

K = 3

data_arr = []

clust_name_arr = []

with open('clustering.csv', 'rb') as f:

reader = csv.reader(f)

for row in reader:

data_arr.append([float(x) for x in row[1:]])

clust_name_arr.append([row[0]])

data = vstack( data_arr )

clust_name = vstack(clust_name_arr)

# normalization

data = whiten(data)

# computing K-Means with K (clusters)

centroids, distortion = kmeans(data,3)

print "distortion = " + str(distortion)

# assign each sample to a cluster

idx,_ = vq(data,centroids)

# some plotting using numpy's logical indexing

plot(data[idx==0,0], data[idx==0,1],'ob',

data[idx==1,0], data[idx==1,1],'or',

data[idx==2,0], data[idx==2,1],'og')

print clust_name

print data

for i in range(K):

result_names = clust_name[idx==i, 0]

print "================================="

print "Cluster " + str(i+1)

for name in result_names:

print name

plot(centroids[:,0],

centroids[:,1],

'sg',markersize=8)

show()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!