Question: Start with iris7_explore_7models.py , and perform the following : 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2,

Start with iris7_explore_7models.py, and perform the following:

1. For the decision trees classifier:

a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree depth generate DOT/gv and PNG files. In addition, for each tree depth show the training time and predication time.

b. Find the optimal maximum depth hyper parameter that produces the highest precision.

c. Show the code running under Python 3 along with correct output.

d. Show program output with no command line options and the output for each of the following command line options: --help, --version, --sample, --eval and --summaryonly

e. Compare the results between the original code and the modified code.

***Iris7_expolre_7models code beliw**

# iris7_explore_7models.py

# Load system libraries

import sys

import datetime

import random

# Load ML libraries

import pandas

from pandas.plotting import scatter_matrix

from matplotlib import pyplot

from sklearn import model_selection

from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.svm import SVC as SVMClassifier

from scipy.spatial import distance

def _main():

if (_showingHelp()):

_showHelp()

exit(0)

if (_showingVersions()):

_showVersions()

# load dataset

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Loading data");

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

# url = "file:////iris.csv"

names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']

dataset = pandas.read_csv(url, names=names)

if (_showingSamples()):

_sampleData(dataset)

_visualizeData(dataset, pyplot, scatter_matrix)

# split data into train/test datasets

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Splitting data into training and test sets");

array = dataset.values

X = array[:,0:4]

Y = array[:,4]

test_size = 0.20

seed = 7

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

# test options and perform evaluation metric

seed = 7

scoring = 'accuracy'

models = []

models.append(('Logistic Regression', LogisticRegression()))

models.append(('KNN Neighbors', KNeighborsClassifier()))

models.append(('Support Vector', SVMClassifier()))

models.append(('DecisionTree', DecisionTreeClassifier()))

models.append(('Random Forest', RandomForestClassifier()))

if (not _showingSummaryOnly()):

# evaluate models

results = []

names = []

msg = ""

for name, model in models:

print(datetime.datetime.now(), "explore_iris_7: Evaluate model %s" % name);

kfold = model_selection.KFold(n_splits=10, random_state=seed)

cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)

results.append(cv_results)

names.append(name)

msg += "%20s: \t\t%f\t(%f)" % (name, cv_results.mean(), cv_results.std()) + " "

print(msg)

print();

if ((_showingEval())):

# compare algorithms

fig = pyplot.figure()

fig.suptitle('Compare algorithms')

ax = fig.add_subplot(111)

pyplot.boxplot(results)

ax.set_xticklabels(names)

pyplot.show()

# make predictions on test dataset

lor = LogisticRegression()

_train(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test)

_predict(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test)

knn = KNeighborsClassifier()

_train(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)

_predict(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)

svm = SVMClassifier()

_train(svm, "Support Vector", X_train, Y_train, X_test, Y_test)

_predict(svm, "Support Vector", X_train, Y_train, X_test, Y_test)

dtc = DecisionTreeClassifier()

_train(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)

_predict(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)

rfc = RandomForestClassifier()

_train(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)

_predict(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)

myrnd = myRNDClassifier()

_train(myrnd, "My Random", X_train, Y_train, X_test, Y_test)

_predict(myrnd, "My Random", X_train, Y_train, X_test, Y_test)

myknn = myKNNClassifier()

_train(myknn, "My KNN", X_train, Y_train, X_test, Y_test)

_predict(myknn, "My KNN", X_train, Y_train, X_test, Y_test)

models.clear()

models.append(('Logistic Regression', lor))

models.append(('KNN Neighbors', knn))

models.append(('Support Vector', svm))

models.append(('DecisionTree', dtc))

models.append(('Random Forest', rfc))

models.append(('My Random', myrnd))

models.append(('My KNN', myknn))

_predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test)

#####################################################

# My random classifier

class myRNDClassifier:

def fit(self, X_train, y_train):

self.X_train = X_train

self.y_train = y_train

def predict(self, X_test):

predictions = []

for row in X_test:

label = random.choice(self.y_train)

predictions.append(label)

return predictions

#####################################################

# My KNN K =1 classifier

class myKNNClassifier:

def fit(self, X_train, y_train):

self.X_train = X_train

self.y_train = y_train

def predict(self, X_test):

predictions = []

for row in X_test:

label = self.closest(row)

predictions.append(label)

return predictions

def closest(self, row):

best_distance = euc(row, self.X_train[0])

best_index = 0

for i in range(1, len(self.X_train)):

dist = euc(row, self.X_train[i])

if (dist < best_distance):

best_distance = dist

best_index = i

return(self.y_train[best_index])

def euc(a,b):

return(distance.euclidean(a,b))

#####################################################

# training and prediction functions

def _train(alg, algName, X_train, Y_train, X_test, Y_test):

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin training: ", algName)

alg.fit(X_train, Y_train)

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End training: ", algName)

def _predict(alg, algName, X_train, Y_train, X_test, Y_test):

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin prediction: ", algName)

predictions = alg.predict(X_test)

if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End prediction: ", algName)

if (not _showingSummaryOnly()): print("%s: accuracy_score=%0.2f" % (algName, accuracy_score(Y_test, predictions)))

if (not _showingSummaryOnly()): print(confusion_matrix(Y_test, predictions))

if (not _showingSummaryOnly()): print(classification_report(Y_test, predictions))

def _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test):

print("Algorithm\t\tAccuracy Score")

for name, model in models:

predictions = model.predict(X_test)

print("%20s\t\t%0.2f" % (name, accuracy_score(Y_test, predictions)))

#####################################################

# data sampling and visualization functions

def _sampleData(dataset):

# show shape, first 10 records, description of frame and its distribution

print(dataset.shape)

print(dataset.head(20))

print(dataset.describe())

# show class distribution and distribution by sepal-length

print(dataset.groupby('class').size())

print(dataset.groupby('sepal-length').size())

def _visualizeData(dataset, pyplot, scatter_matrix):

# visualize data and draw box and whisker plots

dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)

pyplot.show()

# show histograms and scatter plot matrix

dataset.hist()

pyplot.show()

scatter_matrix(dataset)

pyplot.show()

#####################################################

# helper functions

def _parseArgumets(arg):

for i in range(1, len(sys.argv)):

if (sys.argv[i] == arg):

return 1

def _showingHelp():

return(_parseArgumets("--help"))

def _showingSummaryOnly():

return(_parseArgumets("--summaryonly"))

def _showingVersions():

return(not _parseArgumets("--summaryonly") and _parseArgumets("--version"))

def _showingSamples():

return(not _parseArgumets("--summaryonly") and _parseArgumets("--sample"))

def _showingEval():

return(not _parseArgumets("--summaryonly") and _parseArgumets("--eval"))

def _showHelp():

print("iris7_explore_7models: syntax iris7_explore_7models --version --sample --eval --summaryonly")

print("--help: show this help message");

print("--version: show version info for Python runtime and ML libraries");

print("--sample: show sample data");

print("--eval: show evaluation of algorithms")

print("--summaryonly: show only a summary of algorithms and their accuracy scores")

def _showVersions():

# check versions of Python runtime and ML libraries

import sys

print('Python: {}'.format(sys.version))

import scipy

print('scipy: {}'.format(scipy.__version__))

import numpy

print('numpy: {}'.format(numpy.__version__))

import matplotlib

print('matplotlib: {}'.format(matplotlib.__version__))

import pandas

print('pandas: {}'.format(pandas.__version__))

import sklearn

print('sklearn: {}'.format(sklearn.__version__))

_main()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree...

Start with iris7_explore_7models.py , and perform the following : 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree...

***Machine Learning Problem*** Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2, 4, 8 and...

How do i go about solving this problem.... Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of...

Part B [ 5 points ] : Build a decision tree classifier using the sklearn toolbox. Then compute metrics for performance like precision and recall. This is a binary classification problem, therefore we...

I would like some help on the following parts. I have started the code for Part 2 and I need help for that part and Part 3. Thank you for your time and effort. Part 2: KNN Classifier without...

Q1 Learning a Tree 2 Points Select one option. Consider the following dataset. x[1] x[2] x[3] - - +1 0 - 0 -1 0 -1 0 0 - +1 If we use the decision tree algorithm to learn a decision tree from this...

Chapter 1 Operations and Productivity Section 1 What is Operations Management? 4) Operations management is the set of activities that creates value in the form of goods and services by transforming...

data mining subject 1- summary the artical 2-what is data size 3- recoreds applied 4-what techqinecs is used 5- explain resualts EMPIRICAL STUDY ON SELECTION OF TEAM MEMBERS FOR SOFTWARE PROJECTS -...

A4,A5 and A6 Table 1: Data Description Field Description STU ID Student ID index). Honor Class Output Column. Honor class of the student at the end of his/her undergrad- uate studies. It has values...

Design a bias circuit so that the current that flows in M1 and M2 is 1A. What are the small signal resistances looking into the drains of M2 and M1 ? what is the minimum voltage across M1 and M2 for...

Why does boring assure concentricity between the hole axis and the axis of rotation of the workpiece (for boring tool), whereas drilling does not?

0 If a company's average collection period is higher ( takes longer to collect ) than the industry's average then the company may be: Enlorcing creds condisions upon its customers that are too...

CT Corp Comprehensive Question Canadian Tire Corporation, Limited ( Canadian Tire ) is a family of companies that includes a retail segment and a financial services division, among others. The retail...

3. How serious a problem is the new digital divide? Explain your answer. There may be only 11 players on the pitch during a match, but the Blackburn Rovers Football Club in the UK employs more than...

2. Why is access to technology insufficient to eliminate the digital divide? There may be only 11 players on the pitch during a match, but the Blackburn Rovers Football Club in the UK employs more...

3-16 Is there anything else Barnes & Noble and the book publishers should be doing to stimulate more business? Borders Group (including many former Waldenbooks rebranded as Borders Express)...