Question: Below is my code and I'm using Visual Studio Code (Python 3.9.13) to program and for some reason it won't read in my breast_cancer_wisconsin.data file

Below is my code and I'm using Visual Studio Code (Python 3.9.13) to program and for some reason it won't read in my breast_cancer_wisconsin.data file any longer. It did previously and generated random centroid outputs, but now it doesn't recognize my data file, when it did previously.  Below is the error message I get when I go to run my code.

FileNotFoundError: [Errno 2] No such file or directory: 'breast_cancer_wisconsin.data'

I deleted and re-saved the data file, I even tried renaming the data file, I tried removing other folders in the workspace, and restarted VSCode and I still get the same "FileNotFoundError" message in my terminal.

Any help appreciated, thank you!

 

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

import math

 

def distance(u, point): #helper function to compute Euclidian distance from data points to centroids

    dist = math.sqrt(sum([(a - b) ** 2 for a, b in zip(u, point)]))

    dist = round(dist, 6)

    return dist

 

def main():

    col = ['SCN', 'A2', 'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'Class'] #dataframe column names

    df = pd.read_csv('breast_cancer_wisconsin.data', na_values = '?', names = col) #read in data file

   

    #Fills NaN values with mean

    df['A7'].fillna(df['A7'].mean(), inplace = True)

   

    #new dataframe

    new_df = df.iloc[:, 1:10].astype('float64')

    nrows = new_df.shape[0]

   

    #Two random centroids generated, labels are u2 & u4

    u2_idx, u4_idx = np.random.choice(nrows-1, 2)

    u2 = new_df.iloc[u2_idx]

    u4 = new_df.iloc[u4_idx]

   

    #INITIAL STEP

    #print initial centroids generated

    print('Randomly selected row', u2_idx, 'for centroid mu_2.\n')

    print('Initial centroid mu_2:')

    print(u2)

    print()

    print('Randomly selected row', u4_idx, 'for centroid mu_4.\n')

    print('Initial centroid mu_4:')

    print(u4)

   

    df['Predicted_Class'] = [-1] * len(df)

   

    itr = 0

   

    #ASSIGN STEP

    #Loop 50 times or until centroids are equal

    for i in range(10):

        itr = itr + 1    

        for i in range(nrows):

            dist_u2 = distance(u2, new_df.iloc[i])

            dist_u4 = distance(u4, new_df.iloc[i])

           

            if dist_u2 <= dist_u4:

                new_df.at[i, 'Predicted_Class'] = 2 #cluster 2

            else:

                new_df.at[i, 'Predicted_Class'] = 4 #cluster 4

        i_2 = new_df[new_df['Predicted_Class'] == 2].index

        i_4 = new_df[new_df['Predicted_Class'] == 4].index

       

        #RECOMPUTE STEP

        #Update u2 & u4 by computing mean from cluster 2 & cluster 4 data points

        mu_2 = np.mean(new_df.iloc[i_2, :9])

        mu_4 = np.mean(new_df.iloc[i_4, :9])

       

        if mu_2.equals(u2) and mu_4.equals(u4): #stop looping when centroids are equal

            break

        u2 = mu_2

        u4 = mu_4

    print('Program ended after', itr, 'iterations.\n')

   

    #update values & print final results

    print('Final centroid mu_2:')

    for i in range (len(u2)):

        print(col[i+1], '     ', mu_2[i])

    print('')

    print('Final centroid mu_4:')

    for i in range (len(u4)):

        print(col[i+1], '     ', mu_4[i])

       

    #Include first 20 rows in report, run program multiple times and submit best results  

    print('\nFinal cluster assignment:\n')

    df['Predicted_Class'] = new_df['Predicted_Class'].astype('int')

    print()

    #print(df[['SCN', 'Class', 'Predicted_Class']].head(21))

       

#Save output to CSV

    df[['SCN', 'Class', 'Predicted_Class']].to_csv('output.csv')


 

main()  

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock

The error message youre encountering FileNotFoundError Errno 2 No such file or directory breastcance... View full answer

blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!