Question: Start with iris7_explore_7models.py , and perform the following : 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2,

Start with iris7_explore_7models.py, and perform the following:

1. For the decision trees classifier:
a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree depth generate DOT/gv and PNG files. In addition, for each tree depth show the training time and predication time.
b. Find the optimal maximum depth hyper parameter that produces the highest precision.
c. Show the code running under Python 3 along with correct output.
d. Show program output with no command line options and the output for each of the following command line options: --help, --version, --sample, --eval and --summaryonly
e. Compare the results between the original code and the modified code.
***Iris7_expolre_7models code beliw**

# iris7_explore_7models.py

# Load system libraries

import sys

import datetime

import random

# Load ML libraries

import pandas

from pandas.plotting import scatter_matrix

from matplotlib import pyplot

from sklearn import model_selection

from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.svm import SVC as SVMClassifier

from scipy.spatial import distance

def _main():

if (_showingHelp()):

_showHelp()

exit(0)

if (_showingVersions()):

_showVersions()

# load dataset

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Loading data");

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

# url = "file:////iris.csv"

names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']

dataset = pandas.read_csv(url, names=names)

if (_showingSamples()):

_sampleData(dataset)

_visualizeData(dataset, pyplot, scatter_matrix)

# split data into train/test datasets

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Splitting data into training and test sets");

array = dataset.values

X = array[:,0:4]

Y = array[:,4]

test_size = 0.20

seed = 7

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

# test options and perform evaluation metric

seed = 7

scoring = 'accuracy'

models = []

models.append(('Logistic Regression', LogisticRegression()))

models.append(('KNN Neighbors', KNeighborsClassifier()))

models.append(('Support Vector', SVMClassifier()))

models.append(('DecisionTree', DecisionTreeClassifier()))

models.append(('Random Forest', RandomForestClassifier()))

if (not _showingSummaryOnly()):

# evaluate models

results = []

names = []

msg = ""

for name, model in models:

print(datetime.datetime.now(), "explore_iris_7: Evaluate model %s" % name);

kfold = model_selection.KFold(n_splits=10, random_state=seed)

cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)

results.append(cv_results)

names.append(name)

msg += "%20s: \t\t%f\t(%f)" % (name, cv_results.mean(), cv_results.std()) + " "

print(msg)

print();

if ((_showingEval())):

# compare algorithms

fig = pyplot.figure()

fig.suptitle('Compare algorithms')

ax = fig.add_subplot(111)

pyplot.boxplot(results)

ax.set_xticklabels(names)

pyplot.show()

# make predictions on test dataset

lor = LogisticRegression()

_train(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test)

_predict(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test)

knn = KNeighborsClassifier()

_train(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)

_predict(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)

svm = SVMClassifier()

_train(svm, "Support Vector", X_train, Y_train, X_test, Y_test)

_predict(svm, "Support Vector", X_train, Y_train, X_test, Y_test)

dtc = DecisionTreeClassifier()

_train(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)

_predict(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)

rfc = RandomForestClassifier()

_train(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)

_predict(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)

myrnd = myRNDClassifier()

_train(myrnd, "My Random", X_train, Y_train, X_test, Y_test)

_predict(myrnd, "My Random", X_train, Y_train, X_test, Y_test)

myknn = myKNNClassifier()

_train(myknn, "My KNN", X_train, Y_train, X_test, Y_test)

_predict(myknn, "My KNN", X_train, Y_train, X_test, Y_test)

models.clear()

models.append(('Logistic Regression', lor))

models.append(('KNN Neighbors', knn))

models.append(('Support Vector', svm))

models.append(('DecisionTree', dtc))

models.append(('Random Forest', rfc))

models.append(('My Random', myrnd))

models.append(('My KNN', myknn))

_predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test)

#####################################################

# My random classifier

class myRNDClassifier:

def fit(self, X_train, y_train):

self.X_train = X_train

self.y_train = y_train

def predict(self, X_test):

predictions = []

for row in X_test:

label = random.choice(self.y_train)

predictions.append(label)

return predictions

#####################################################

# My KNN K =1 classifier

class myKNNClassifier:

def fit(self, X_train, y_train):

self.X_train = X_train

self.y_train = y_train

def predict(self, X_test):

predictions = []

for row in X_test:

label = self.closest(row)

predictions.append(label)

return predictions

def closest(self, row):

best_distance = euc(row, self.X_train[0])

best_index = 0

for i in range(1, len(self.X_train)):

dist = euc(row, self.X_train[i])

if (dist < best_distance):

best_distance = dist

best_index = i

return(self.y_train[best_index])

def euc(a,b):

return(distance.euclidean(a,b))

#####################################################

# training and prediction functions

def _train(alg, algName, X_train, Y_train, X_test, Y_test):

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin training: ", algName)

alg.fit(X_train, Y_train)

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End training: ", algName)

def _predict(alg, algName, X_train, Y_train, X_test, Y_test):

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin prediction: ", algName)

predictions = alg.predict(X_test)

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End prediction: ", algName)

if (not _showingSummaryOnly()): print("%s: accuracy_score=%0.2f" % (algName, accuracy_score(Y_test, predictions)))

if (not _showingSummaryOnly()): print(confusion_matrix(Y_test, predictions))

if (not _showingSummaryOnly()): print(classification_report(Y_test, predictions))

def _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test):

print("Algorithm\t\tAccuracy Score")

for name, model in models:

predictions = model.predict(X_test)

print("%20s\t\t%0.2f" % (name, accuracy_score(Y_test, predictions)))

#####################################################

# data sampling and visualization functions

def _sampleData(dataset):

# show shape, first 10 records, description of frame and its distribution

print(dataset.shape)

print(dataset.head(20))

print(dataset.describe())

# show class distribution and distribution by sepal-length

print(dataset.groupby('class').size())

print(dataset.groupby('sepal-length').size())

def _visualizeData(dataset, pyplot, scatter_matrix):

# visualize data and draw box and whisker plots

dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)

pyplot.show()

# show histograms and scatter plot matrix

dataset.hist()

pyplot.show()

scatter_matrix(dataset)

pyplot.show()

#####################################################

# helper functions

def _parseArgumets(arg):

for i in range(1, len(sys.argv)):

if (sys.argv[i] == arg):

return 1

def _showingHelp():

return(_parseArgumets("--help"))

def _showingSummaryOnly():

return(_parseArgumets("--summaryonly"))

def _showingVersions():

return(not _parseArgumets("--summaryonly") and _parseArgumets("--version"))

def _showingSamples():

return(not _parseArgumets("--summaryonly") and _parseArgumets("--sample"))

def _showingEval():

return(not _parseArgumets("--summaryonly") and _parseArgumets("--eval"))

def _showHelp():

print("iris7_explore_7models: syntax iris7_explore_7models --version --sample --eval --summaryonly")

print("--help: show this help message");

print("--version: show version info for Python runtime and ML libraries");

print("--sample: show sample data");

print("--eval: show evaluation of algorithms")

print("--summaryonly: show only a summary of algorithms and their accuracy scores")

def _showVersions():

# check versions of Python runtime and ML libraries

import sys

print('Python: {}'.format(sys.version))

import scipy

print('scipy: {}'.format(scipy.__version__))

import numpy

print('numpy: {}'.format(numpy.__version__))

import matplotlib

print('matplotlib: {}'.format(matplotlib.__version__))

import pandas

print('pandas: {}'.format(pandas.__version__))

import sklearn

print('sklearn: {}'.format(sklearn.__version__))

_main()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!