Question: #Data Visualization import seaborn as sns import matplotlib.pyplot as plt sns . barplot ( x = class, y = data [ class ]
#Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
snsbarplotx"class", ydataclassindex, palette'mako', datamushroomdata
#The number of poisonous mushrooms is almost twice the number of normal mushrooms. There is an imbalance data problem.
#We will be using Matplotlib pyplot and Seaborn to plot our data.
#
from sklearn import preprocessing
#Label encoding is used to convert categorical features to numerical values.
def labelencodefitmushroomdata, columns:
result mushroomdata.copy
encoders
for column in columns:
encoder preprocessing.LabelEncoder
resultcolumn encoder.fittransformresultcolumn
encoderscolumn encoder
return result, encoders
#
data encoders labelencodefitdatadata.columns
datahead
#
def correlationmapmushroomdata, method:
corr mushroomdata.corrmethod
ix corr.sortvaluesclass ascendingFalseindex
dfsortedbycorrelation mushroomdata.loc: ix
corr dfsortedbycorrelation.corrmethod
pltsubplotsfigsize
with snsaxesstylewhite:
# display a correlation heatmap
ax snsheatmapcorr annotTrue
pltshow
#
correlationmapdata method"spearman"
#Gillsize has the highest correlation with class. It should be included to the model.
#There some highly correlated variables such as gillcolor & ringtype, gillcolor & bruises, bruises & stalksurfacebelowring etc. These highly correlated variables ohuld be discarded from the model to obtain more accurate results.
#
y dataclass # contains only "class", target, variable.
X datailoc:: # contains independent variable.
#
from sklearn.featureselection import SelectKBest
import numpy as np
def SelectKBestCustomizedmushroomdata, k scorefunc, target"class":
Xmushroomdata.dropcolumnstarget
ymushroomdatatarget
nprandom.seed # for mutualinfo regression
fs SelectKBestscorefuncscorefunc, kk
fsfitX y
mask fsgetsupport
selectedfeatures feature for bool, feature in zipmask Xcolumns if bool
return selectedfeatures
#
from sklearn.featureselection import mutualinfoclassif
mutualinfoclassifX y randomstate
#
mutualinfoselection SelectKBestCustomizeddata mutualinfoclassif
#
mutualinfoselection
#
Xnew Xodor'gillsize',
'gillcolor',
'stalksurfaceabovering',
'stalksurfacebelowring',
'stalkcolorabovering',
'stalkcolorbelowring',
'ringtype',
'sporeprintcolor'
#
dataselectedfeatures dataodor
'gillsize',
'gillcolor',
'stalksurfaceabovering',
'stalksurfacebelowring',
'stalkcolorabovering',
'stalkcolorbelowring',
'ringtype',
'sporeprintcolor',
'class'
#
a # number of rows
b # number of columns
c # initialize plot counter
fig pltfigurefigsize
for i in dataselectedfeatures:
pltsubplota b c
#plttitle subplot: formati a b c
pltxlabeli
snsbarplotxi ydataselectedfeaturesiindex, palette'Setr hue"class", datadataselectedfeatures
c c
pltshow THE PYTHON CODE GIVEN ABOVE IS RELATED TO RANDOM FOREST CLASSIFICATION IN THE DATA SCIENCE COURSE.
PLEASE INTERPRET THIS CODE AND PREPARE A REPORT ACCORDING TO THE SUBJECTS AND CODES.
Step by Step Solution
There are 3 Steps involved in it
1 Expert Approved Answer
Step: 1 Unlock
Question Has Been Solved by an Expert!
Get step-by-step solutions from verified subject matter experts
Step: 2 Unlock
Step: 3 Unlock
