Question: How do i go about solving this problem.... Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision

How do i go about solving this problem....

Start with iris7_explore_7models.py, and perform the following:

1. For the decision trees classifier:

a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree depth generate DOT/gv and PNG files. In addition, for each tree depth show the training time and predication time.

_______________iris7_explore_7models.py is Below____________________

# iris7_explore_7models.py

# Load system libraries import sys import datetime import random

# Load ML libraries import pandas

from pandas.plotting import scatter_matrix from matplotlib import pyplot

from sklearn import model_selection from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC as SVMClassifier

from scipy.spatial import distance def _main(): if (_showingHelp()): _showHelp() exit(0) if (_showingVersions()): _showVersions() # load dataset if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Loading data"); url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" # url = "file:////iris.csv" names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] dataset = pandas.read_csv(url, names=names)

if (_showingSamples()): _sampleData(dataset) _visualizeData(dataset, pyplot, scatter_matrix)

# split data into train/test datasets if (not _showingSummaryOnly()): print(datetime.datetime.now(), "explore_iris_7: Splitting data into training and test sets"); array = dataset.values X = array[:,0:4] Y = array[:,4] test_size = 0.20 seed = 7 X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

# test options and perform evaluation metric seed = 7 scoring = 'accuracy' models = [] models.append(('Logistic Regression', LogisticRegression())) models.append(('KNN Neighbors', KNeighborsClassifier())) models.append(('Support Vector', SVMClassifier())) models.append(('DecisionTree', DecisionTreeClassifier())) models.append(('Random Forest', RandomForestClassifier())) if (not _showingSummaryOnly()): # evaluate models results = [] names = [] msg = "" for name, model in models: print(datetime.datetime.now(), "explore_iris_7: Evaluate model %s" % name); kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg += "%20s: \t\t%f\t(%f)" % (name, cv_results.mean(), cv_results.std()) + " " print(msg) print(); if ((_showingEval())): # compare algorithms fig = pyplot.figure() fig.suptitle('Compare algorithms') ax = fig.add_subplot(111) pyplot.boxplot(results) ax.set_xticklabels(names) pyplot.show()

# make predictions on test dataset lor = LogisticRegression() _train(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test) _predict(lor, "Logistic Regression", X_train, Y_train, X_test, Y_test) knn = KNeighborsClassifier() _train(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test) _predict(knn, "KNeighbors Classifier", X_train, Y_train, X_test, Y_test)

svm = SVMClassifier() _train(svm, "Support Vector", X_train, Y_train, X_test, Y_test) _predict(svm, "Support Vector", X_train, Y_train, X_test, Y_test)

dtc = DecisionTreeClassifier() _train(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test) _predict(dtc, "Decision Tree Classifier", X_train, Y_train, X_test, Y_test)

rfc = RandomForestClassifier() _train(rfc, "Random Forest", X_train, Y_train, X_test, Y_test) _predict(rfc, "Random Forest", X_train, Y_train, X_test, Y_test)

myrnd = myRNDClassifier() _train(myrnd, "My Random", X_train, Y_train, X_test, Y_test) _predict(myrnd, "My Random", X_train, Y_train, X_test, Y_test)

myknn = myKNNClassifier() _train(myknn, "My KNN", X_train, Y_train, X_test, Y_test) _predict(myknn, "My KNN", X_train, Y_train, X_test, Y_test)

models.clear() models.append(('Logistic Regression', lor)) models.append(('KNN Neighbors', knn)) models.append(('Support Vector', svm)) models.append(('DecisionTree', dtc)) models.append(('Random Forest', rfc)) models.append(('My Random', myrnd)) models.append(('My KNN', myknn)) _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test)

##################################################### # My random classifier class myRNDClassifier: def fit(self, X_train, y_train): self.X_train = X_train self.y_train = y_train def predict(self, X_test): predictions = [] for row in X_test: label = random.choice(self.y_train) predictions.append(label) return predictions

##################################################### # My KNN K =1 classifier class myKNNClassifier: def fit(self, X_train, y_train): self.X_train = X_train self.y_train = y_train def predict(self, X_test): predictions = [] for row in X_test: label = self.closest(row) predictions.append(label) return predictions def closest(self, row): best_distance = euc(row, self.X_train[0]) best_index = 0 for i in range(1, len(self.X_train)): dist = euc(row, self.X_train[i]) if (dist < best_distance): best_distance = dist best_index = i return(self.y_train[best_index])

def euc(a,b): return(distance.euclidean(a,b)) ##################################################### # training and prediction functions def _train(alg, algName, X_train, Y_train, X_test, Y_test): if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin training: ", algName) alg.fit(X_train, Y_train) if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End training: ", algName)

def _predict(alg, algName, X_train, Y_train, X_test, Y_test): if (not _showingSummaryOnly()): print(datetime.datetime.now(), "Begin prediction: ", algName) predictions = alg.predict(X_test) if (not _showingSummaryOnly()): print(datetime.datetime.now(), "End prediction: ", algName) if (not _showingSummaryOnly()): print("%s: accuracy_score=%0.2f" % (algName, accuracy_score(Y_test, predictions))) if (not _showingSummaryOnly()): print(confusion_matrix(Y_test, predictions)) if (not _showingSummaryOnly()): print(classification_report(Y_test, predictions))

def _predictionAccuracySummary(models, X_train, Y_train, X_test, Y_test): print("Algorithm\t\tAccuracy Score") for name, model in models: predictions = model.predict(X_test) print("%20s\t\t%0.2f" % (name, accuracy_score(Y_test, predictions))) ##################################################### # data sampling and visualization functions def _sampleData(dataset): # show shape, first 10 records, description of frame and its distribution print(dataset.shape) print(dataset.head(20)) print(dataset.describe())

# show class distribution and distribution by sepal-length print(dataset.groupby('class').size()) print(dataset.groupby('sepal-length').size())

def _visualizeData(dataset, pyplot, scatter_matrix): # visualize data and draw box and whisker plots dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False) pyplot.show()

# show histograms and scatter plot matrix dataset.hist() pyplot.show() scatter_matrix(dataset) pyplot.show() ##################################################### # helper functions def _parseArgumets(arg): for i in range(1, len(sys.argv)): if (sys.argv[i] == arg): return 1

def _showingHelp(): return(_parseArgumets("--help"))

def _showingSummaryOnly(): return(_parseArgumets("--summaryonly"))

def _showingVersions(): return(not _parseArgumets("--summaryonly") and _parseArgumets("--version"))

def _showingSamples(): return(not _parseArgumets("--summaryonly") and _parseArgumets("--sample"))

def _showingEval(): return(not _parseArgumets("--summaryonly") and _parseArgumets("--eval"))

def _showHelp(): print("iris7_explore_7models: syntax iris7_explore_7models --version --sample --eval --summaryonly") print("--help: show this help message"); print("--version: show version info for Python runtime and ML libraries"); print("--sample: show sample data"); print("--eval: show evaluation of algorithms") print("--summaryonly: show only a summary of algorithms and their accuracy scores")

def _showVersions(): # check versions of Python runtime and ML libraries import sys print('Python: {}'.format(sys.version))

import scipy print('scipy: {}'.format(scipy.__version__))

import numpy print('numpy: {}'.format(numpy.__version__))

import matplotlib print('matplotlib: {}'.format(matplotlib.__version__))

import pandas print('pandas: {}'.format(pandas.__version__))

import sklearn print('sklearn: {}'.format(sklearn.__version__)) _main()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

***Machine Learning Problem*** Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2, 4, 8 and...

Start with iris7_explore_7models.py, and perform the following: 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree...

Start with iris7_explore_7models.py , and perform the following : 1. For the decision trees classifier: a. Visualize the decision tree with maximum depth of 2, 4, 8 and unrestricted. For each tree...

make a project on topic priority queue using binary heap help with the research paper giving above to implementation code in python data structure do it in one day S.L. Graham, R.L. Rivest Editors...

Follow the steps given in Machine Learning With R , Chapter 5, section "Example Identifying Risky Bank Loans Using C5.0 Decision Trees." download the credit. csv file from Packt Publishing's website...

Portray in words what transforms you would have to make to your execution to some degree (a) to accomplish this and remark on the benefits and detriments of this thought.You are approached to compose...

data mining subject 1- summary the artical 2-what is data size 3- recoreds applied 4-what techqinecs is used 5- explain resualts EMPIRICAL STUDY ON SELECTION OF TEAM MEMBERS FOR SOFTWARE PROJECTS -...

Prolog You are approached to compose a Prolog program to work with twofold trees. Your code shouldn't depend on any library predicates and you ought to expect that the mediator is running without...

Briefly describe ASCII and Unicode and draw attention to any relationship between them. [3 marks] (b) Briefly explain what a Reader is in the context of reading characters from data. [3 marks] A...

Solve the following differential equations by integrating both side of the equation w.r.t the given unknown: d?y 2.5 = x? 4; y(0) = 2; y'(0) = 1 %3D %3D dx2

Predict the phenotypic consequences of each of the following mutations: A. Apetalal defective B. Pistillata defective C. Apetalal and pistillata defective

Match the long - term trading techniques with their descriptions. Automatically invest cash dividends to purchase more shares Purchasing an equal dollar amount of a stock at equal intervals Purchase...

Report - Using Excel To Graph Data Using Excel to Graph Data To complete this experiment, you need to request a data set to work with. Generate data set (5pts) Graphing Data Set Pi (n) is the ratio...

What tends to skew and distort Average Salaries in most Gender Pay Equity Studies?

The FedScope employment database has a number of Dimension Tables and a Single Fact Table, as shown in Table 7.1. Which columns/data elements in the Fact Table would be most useful in Pay Equity...

After Defining and Building a Multidimensional OLAP Cube, what is stored in the Cube?