Question: I am working on this code for my project, but the accuracy is 0.9516013654843938, I need to improve the accuracy by using feature selection and

I am working on this code for my project, but the accuracy is 0.9516013654843938, I need to improve the accuracy by using feature selection and pre-processing to get a higher result, could you please modify the code in a way that I can achieve a better outcome?

#Importing Libraries (ANN)

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

import sklearn

from keras.models import Sequential

from keras.layers import Dense, Dropout, Activation, Flatten

from keras.layers import Conv2D, MaxPooling2D

from sklearn.feature_selection import SelectFromModel

from sklearn.ensemble import ExtraTreesClassifier

from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

from sklearn.datasets import make_classification

from sklearn.metrics import accuracy_score

from sklearn.metrics import f1_score

from sklearn.metrics import precision_score

from sklearn.metrics import recall_score

from sklearn.metrics import cohen_kappa_score

from sklearn.metrics import roc_auc_score

import warnings

malData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/MalwareData.csv", sep='|')

malData.head()

malData.shape

malData.describe()

fig = plt.figure()

ax = fig.add_axes([0,0,1,1])

ax.hist(malData['legitimate'],20)

plt.show()

#Data Cleaning

y=malData['legitimate']

malData = malData.drop(['legitimate'], axis=1)

malData = malData.drop(['Name'], axis=1)

malData = malData.drop(['md5'], axis=1)

print("The Name and md5 variables are reomved successfully")

#Spliting the dataset into test and train

x_train, x_test, y_train, y_test = train_test_split(malData,y,test_size=0.2, random_state=4)

x_train.shape

NNmodel = Sequential()

NNmodel.add(Dense(32, input_dim=54, activation = "relu"))

NNmodel.add(Dense(16, activation= "relu"))

NNmodel.add(Dense(8, activation= "sigmoid"))

NNmodel.add(Dense(4, activation= "relu"))

NNmodel.add(Dense(1, activation= "sigmoid"))

NNmodel.summary()

NNmodel.compile(loss = "binary_crossentropy", optimizer = "rmsprop", metrics = ["accuracy"])

#fit Model

NNmodel.fit(x_train, y_train, epochs=5, batch_size=32)

# Accuracy on the training dataset

trainPred = NNmodel.predict(x_train)

trainPred = [1 if y>=0.5 else 0 for y in trainPred]

print(accuracy_score(y_train, trainPred))

# Accuracy of the test dataset

y_prediction=NNmodel.predict(x_test)

y_prediction=[1 if y>=0.5 else 0 for y in y_prediction]

precision = precision_score(y_test, y_prediction)

print('Precision: %f' % precision)

# recall: tp / (tp + fn)

recall = recall_score(y_test, y_prediction)

print('Recall: %f' % recall)

# f1: 2 tp / (2 tp + fp + fn

# f1: 2 tp / (2 tp + fp + fn)

f1 = f1_score(y_test, y_prediction)

print('F1 score: %f' % f1)

# kappa

kappa = cohen_kappa_score(y_test, y_prediction)

print('Cohens kappa: %f' % kappa)

# ROC AUC

auc = roc_auc_score(y_test, y_prediction)

print('ROC AUC: %f' % auc)

# confusion matrix

matrix = confusion_matrix(y_test, y_prediction)

print(matrix)

print(accuracy_score(y_test, y_prediction))

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!