Question: I need help with task 1, can anyone help me solve it? Task 1: Use PCA from previous assignment to convert data from 4D to
I need help with task 1, can anyone help me solve it?
Task 1: Use PCA from previous assignment to convert data from 4D to 2D. If you did not manage to implement PCA, you can simply use first 2 dimensions of the database. (4 points if PCA is used, otherwise 1 point)
Task 2a: Implement kNN classification algorithm and apply it to classify iris database. (2 points)
Task 2b: Try different values of k = {1, 2, 3, 4, 5} and print prediction accuracy for validation set. Please select optimal value of k and justify your selection. (3 points)
Task 3: Implement random forest classification and apply it to classify iris database. Use any reasonable parameters you want. (3 points)
Taks 4: Visualize results of kNN and random forests (4 points)
# Load packages as usual
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import random
import matplotlib.cm as cm
import numpy.matlib
from matplotlib.colors import ListedColormap
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
# Manipulating figure sizes
import matplotlib
matplotlib.rcParams['figure.figsize'] = (15,7)
matplotlib.rc('font', size=15)
matplotlib.rc('axes', titlesize=15)
def __read(fileName, pTrainSamples = 0.6, pValidSamples = 0.2):
emp_df = pd.read_csv(fileName)
values = emp_df.values
values = emp_df.values.astype(np.float)
nTrainSamples = int(values.shape[0] * pTrainSamples)
nValidSamples = int(values.shape[0] * pValidSamples)
trainingFeatures = values[0:nTrainSamples, 0:-1]
trainingLabels = values[0:nTrainSamples, -1]
validationFeatures = values[nTrainSamples:nTrainSamples + nValidSamples, 0:-1]
validationLabels = values[nTrainSamples:nTrainSamples + nValidSamples, -1]
testingFeatures = values[nTrainSamples + nValidSamples:, 0:-1]
testingLabels = values[nTrainSamples + nValidSamples:, -1]
return trainingFeatures.astype(np.float), trainingLabels.astype(np.int), \
validationFeatures.astype(np.float), validationLabels.astype(np.int), \
testingFeatures.astype(np.float), testingLabels.astype(np.int)
trainingFeatures, trainingLabels, validationFeatures, validationLabels, testingFeatures, testingLabels = __read('C:/Users/me/iris_new.csv')
# Converting data to two dimensions using PCA
# The conversion to 2D from 4D input is needed to simplify visualization of the results. Please generate PCA on the training, and then convert training and testing data from 4D to 2D.
def __PCA(data):
# Extract data dimensions
d, N = data.shape
# First, center the data
center = np.mean(data, 1)
centers = np.matlib.repmat(center, N, 1)
data_cent = data - np.transpose(centers)
# Compute covariance and its eigenvalues from centered data
Sigma = np.cov(data_cent)
evals, evecs = np.linalg.eigh(Sigma)
# Return eigenvalues and eigenvectors
return np.flip(evals,0), np.flip(evecs, 1)
def __transformData(features, PCevecs):
return np.dot(features, PCevecs[:, 0:2])
PCevals, PCevecs = __PCA(trainingFeatures)
trainingFeatures2D = __transformData(trainingFeatures, PCevecs)
validationFeatures2D = __transformData(validationFeatures, PCevecs)
testingFeatures2D = __transformData(testingFeatures, PCevecs)
print('shape training = ', trainingFeatures2D.shape)
print('shape validation = ', validationFeatures2D.shape)
print('shape testing = ', testingFeatures2D.shape) # ----------------- Data (iris_new.csv):
| 6.012273103359637716e+00,2.921568019750800715e+00,5.144015868688889270e+00,1.970088604348821493e+00,2.000000000000000000e+00 | |||||||||||||
| 6.206367020846028204e+00,3.228161981733847608e+00,5.003777773209909796e+00,2.070722282333296693e+00,2.000000000000000000e+00 | |||||||||||||
| 5.625798030443882602e+00,3.904787165603675891e+00,1.357799778276990610e+00,4.936702532950165145e-01,0.000000000000000000e+00 | |||||||||||||
| 6.808195662123572411e+00,3.178650674586387392e+00,5.100970180963188660e+00,1.889159208617503083e+00,1.000000000000000000e+00 | |||||||||||||
| 6.581056019090129716e+00,2.913359774022858506e+00,4.587302757255009134e+00,1.544190829702803081e+00,1.000000000000000000e+00 | |||||||||||||
| 4.922743626456581367e+00,3.535691449125819119e+00,2.132672532444197522e+00,4.239278473932364122e-01,0.000000000000000000e+00 | |||||||||||||
| 5.802146320330803242e+00,2.946526640063509728e+00,4.959872418259350546e+00,2.021122281946927668e+00,2.000000000000000000e+00 | |||||||||||||
| 6.375128323431418664e+00,2.666462047101149313e+00,5.821189071787672198e+00,1.536890095459532590e+00,2.000000000000000000e+00 | |||||||||||||
| 7.533823868138515678e+00,3.132375225578859279e+00,6.572352764606229414e+00,1.905853718958894216e+00,2.000000000000000000e+00 | |||||||||||||
| 6.829638868899861492e+00,3.331713498791738726e+00,4.813943469329544200e+00,1.586665861661761712e+00,1.000000000000000000e+00 | |||||||||||||
| 5.950381506190458580e+00,2.832591299094304738e+00,5.124514747648298396e+00,2.075933042607615686e+00,2.000000000000000000e+00 | |||||||||||||
| 6.698675459825690126e+00,3.013641660238998377e+00,5.871907459071210589e+00,2.316142751115663589e+00,2.000000000000000000e+00 | |||||||||||||
| 5.404459128494982778e+00,3.457856717182044459e+00,1.539995605002054235e+00,3.332571255378059538e-01,0.000000000000000000e+00 | |||||||||||||
| 4.495654321360945893e+00,3.305647532933800203e+00,1.584864478338738003e+00,3.858567310400071726e-01,0.000000000000000000e+00 | |||||||||||||
| 5.339075543530506174e+00,3.973787538327554270e+00,1.634324363275604819e+00,5.430132463146704058e-01,0.000000000000000000e+00 | |||||||||||||
| 7.601062852092417721e+00,3.002185731093820920e+00,6.343795144436067623e+00,2.139230252694762413e+00,2.000000000000000000e+00 | |||||||||||||
| 6.351704787787541662e+00,3.429505864743360721e+00,6.014392461776881582e+00,2.566410057648921050e+00,2.000000000000000000e+00 | |||||||||||||
| 6.379738060276616274e+00,2.632994102455576968e+00,5.275529757863798075e+00,2.047544577918903652e+00,2.000000000000000000e+00 | |||||||||||||
| 5.741775363214967953e+00,4.513852190532458231e+00,1.690940065951078264e+00,5.135774649003405079e-01,0.000000000000000000e+00 | |||||||||||||
| 5.505925961819941428e+00,3.122705091202445438e+00,4.543438336381168341e+00,1.664080033291209793e+00,1.000000000000000000e+00 | |||||||||||||
| 7.801400975875588450e+00,3.168540458555272110e+00,6.116126874927260815e+00,2.426846823877313764e+00,2.000000000000000000e+00 | |||||||||||||
| 5.357132049992936018e+00,3.983135840533841066e+00,1.662844363455381558e+00,3.625930788722653286e-01,0.000000000000000000e+00 | |||||||||||||
| 7.019590355323099828e+00,3.122505046361530923e+00,5.029930044598924788e+00,1.547275107884078960e+00,1.000000000000000000e+00 | |||||||||||||
| 6.911969636692916197e+00,3.392803025461516242e+00,6.155437090960832691e+00,2.541774303108266242e+00,2.000000000000000000e+00 |
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
