Question: How do i go about solving this problem.... Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision

How do i go about solving this problem....

Start with iris7_explore_7models.py, and perform the following:

1. For the decision trees classifier:

a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree depth generate DOT/gv and PNG files. In addition, for each tree depth show the training time and predication time.

_______________iris7_explore_7models.py is Below____________________

# iris7_explore_7models.py

# Load system libraries import sys import datetime import random

# Load ML libraries import pandas

from pandas.plotting import scatter_matrix from matplotlib import pyplot

from sklearn import model_selection from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC as SVMClassifier

from scipy.spatial import distance def _main(): if (_showingHelp()): _showHelp() exit(0) if (_showingVersions()): _showVersions() # load dataset if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Loading data"); url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" # url = "file:////iris.csv" names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] dataset = pandas.read_csv(url, names=names)

if (_showingSamples()): _sampleData(dataset) _visualizeData(dataset, pyplot, scatter_matrix)

# split data into train/test datasets if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Splitting data into training and test sets"); array = dataset.values X = array[:,0:4] Y = array[:,4] test_size = 0.20 seed = 7 X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

# test options and perform evaluation metric seed = 7 scoring = 'accuracy' models = [] models.append(('Logistic Regression', LogisticRegression())) models.append(('KNN Neighbors', KNeighborsClassifier())) models.append(('Support Vector', SVMClassifier())) models.append(('DecisionTree', DecisionTreeClassifier())) models.append(('Random Forest', RandomForestClassifier())) if (not _showingSummaryOnly()): # evaluate models results = [] names = [] msg = "" for name, model in models: print(datetime.datetime.now(), "explore_iris_7: Evaluate model %s" % name); kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg += "%20s: \t\t%f\t(%f)" % (name, cv_results.mean(), cv_results.std()) + " " print(msg) print(); if ((_showingEval())): # compare algorithms fig = pyplot.figure() fig.suptitle('Compare algorithms') ax = fig.add_subplot(111) pyplot.boxplot(results) ax.set_xticklabels(names) pyplot.show()

# make predictions on test dataset lor = LogisticRegression() _train(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test) _predict(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test) knn = KNeighborsClassifier() _train(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test) _predict(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)

svm = SVMClassifier() _train(svm, "Support Vector", X_train, Y_train, X_test, Y_test) _predict(svm, "Support Vector", X_train, Y_train, X_test, Y_test)

dtc = DecisionTreeClassifier() _train(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test) _predict(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)

rfc = RandomForestClassifier() _train(rfc, "Random Forest", X_train, Y_train, X_test, Y_test) _predict(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)

myrnd = myRNDClassifier() _train(myrnd, "My Random", X_train, Y_train, X_test, Y_test) _predict(myrnd, "My Random", X_train, Y_train, X_test, Y_test)

myknn = myKNNClassifier() _train(myknn, "My KNN", X_train, Y_train, X_test, Y_test) _predict(myknn, "My KNN", X_train, Y_train, X_test, Y_test)

models.clear() models.append(('Logistic Regression', lor)) models.append(('KNN Neighbors', knn)) models.append(('Support Vector', svm)) models.append(('DecisionTree', dtc)) models.append(('Random Forest', rfc)) models.append(('My Random', myrnd)) models.append(('My KNN', myknn)) _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test)

##################################################### # My random classifier class myRNDClassifier: def fit(self, X_train, y_train): self.X_train = X_train self.y_train = y_train def predict(self, X_test): predictions = [] for row in X_test: label = random.choice(self.y_train) predictions.append(label) return predictions

##################################################### # My KNN K =1 classifier class myKNNClassifier: def fit(self, X_train, y_train): self.X_train = X_train self.y_train = y_train def predict(self, X_test): predictions = [] for row in X_test: label = self.closest(row) predictions.append(label) return predictions def closest(self, row): best_distance = euc(row, self.X_train[0]) best_index = 0 for i in range(1, len(self.X_train)): dist = euc(row, self.X_train[i]) if (dist < best_distance): best_distance = dist best_index = i return(self.y_train[best_index])

def euc(a,b): return(distance.euclidean(a,b)) ##################################################### # training and prediction functions def _train(alg, algName, X_train, Y_train, X_test, Y_test): if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin training: ", algName) alg.fit(X_train, Y_train) if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End training: ", algName)

def _predict(alg, algName, X_train, Y_train, X_test, Y_test): if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin prediction: ", algName) predictions = alg.predict(X_test) if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End prediction: ", algName) if (not _showingSummaryOnly()): print("%s: accuracy_score=%0.2f" % (algName, accuracy_score(Y_test, predictions))) if (not _showingSummaryOnly()): print(confusion_matrix(Y_test, predictions)) if (not _showingSummaryOnly()): print(classification_report(Y_test, predictions))

def _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test): print("Algorithm\t\tAccuracy Score") for name, model in models: predictions = model.predict(X_test) print("%20s\t\t%0.2f" % (name, accuracy_score(Y_test, predictions))) ##################################################### # data sampling and visualization functions def _sampleData(dataset): # show shape, first 10 records, description of frame and its distribution print(dataset.shape) print(dataset.head(20)) print(dataset.describe())

# show class distribution and distribution by sepal-length print(dataset.groupby('class').size()) print(dataset.groupby('sepal-length').size())

def _visualizeData(dataset, pyplot, scatter_matrix): # visualize data and draw box and whisker plots dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False) pyplot.show()

# show histograms and scatter plot matrix dataset.hist() pyplot.show() scatter_matrix(dataset) pyplot.show() ##################################################### # helper functions def _parseArgumets(arg): for i in range(1, len(sys.argv)): if (sys.argv[i] == arg): return 1

def _showingHelp(): return(_parseArgumets("--help"))

def _showingSummaryOnly(): return(_parseArgumets("--summaryonly"))

def _showingVersions(): return(not _parseArgumets("--summaryonly") and _parseArgumets("--version"))

def _showingSamples(): return(not _parseArgumets("--summaryonly") and _parseArgumets("--sample"))

def _showingEval(): return(not _parseArgumets("--summaryonly") and _parseArgumets("--eval"))

def _showHelp(): print("iris7_explore_7models: syntax iris7_explore_7models --version --sample --eval --summaryonly") print("--help: show this help message"); print("--version: show version info for Python runtime and ML libraries"); print("--sample: show sample data"); print("--eval: show evaluation of algorithms") print("--summaryonly: show only a summary of algorithms and their accuracy scores")

def _showVersions(): # check versions of Python runtime and ML libraries import sys print('Python: {}'.format(sys.version))

import scipy print('scipy: {}'.format(scipy.__version__))

import numpy print('numpy: {}'.format(numpy.__version__))

import matplotlib print('matplotlib: {}'.format(matplotlib.__version__))

import pandas print('pandas: {}'.format(pandas.__version__))

import sklearn print('sklearn: {}'.format(sklearn.__version__)) _main()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!