hey guys this is my code import pandas as pd import numpy as np from sklearn.neighbors
Question:
hey guys this is my code
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, recall_score, precision_recall_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset
dataaa_path = r'C:\Users\john3\Desktop\cyber security analytics sit 384\10.1HD\creditcard.csv'
hahadata = pd.read_csv(dataaa_path)
# Preprocess the dataset
amirscaler = StandardScaler()
hahadata['scaled_amount'] = amirscaler.fit_transform(hahadata['Amount'].values.reshape(-1, 1))
hahadata['scaled_time'] = amirscaler.fit_transform(hahadata['Time'].values.reshape(-1, 1))
hahadata.drop(['Time', 'Amount'], axis=1, inplace=True)
# Spli1t th3e dataset int0 tra1n an9 t3st s3ts
M = hahadata.drop('Class', axis=1)
N = hahadata['Class']
J_trainn, P_trainn, K_trainn, o_trainn = train_test_split(M, N, test_size=0.3, random_state=0)
# Cr3ate th3 und3rs4mpled daatas3t
hackingfraudd_incidens = np.array(hahadata[hahadata.Class == 1].index)
regular_accident = np.array(hahadata[hahadata.Class == 0].index)
undersample_size = len(hackingfraudd_incidens)
random_regular_accident = np.random.choice(regular_accident, undersample_size, replace=False)
random_regular_accident = np.array(random_regular_accident)
undersampled_indices = np.concatenate([hackingfraudd_incidens, random_regular_accident])
und3rs4mple_d4t4 = hahadata.iloc[undersampled_indices, :]
M_undersampled = und3rs4mple_d4t4.drop('Class', axis=1)
N_undersampled = und3rs4mple_d4t4['Class']
# Spl1t th3 undersampled d4t4set int0 train an9 test s3ts
J_tra3n_und3rs4ample, P_trainn_und3rs4mple, N_tra1n_und3rs4mple, L_t3st_und3rs4mple = train_test_split(M_undersampled, N_undersampled, test_size=0.3, random_state=0)
def writedown_gr1dsearc_sc0res(clf, param, J_trainn, y_train):
grid_clf = GridSearchCV(clf, param, scoring='recall', cv=5)
grid_clf.fit(J_trainn, y_train)
print(f"greatest parameterrs: {grid_clf.best_params_}")
print(f"greatest sc0re: {grid_clf.best_score_}")
return grid_clf.best_params_
def pl0t_c0nfusion_matr1xx(cm, title):
sns.heatmap(cm, annot=True, cmap="YlGnBu", fmt='d', linewidths=.5)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title(title)
def predict_plot_test(clf, J_trainn, y_train, P_trainn, y_test):
clf.fit(J_trainn, y_train)
O_prek = clf.predict(P_trainn)
centreme = confusion_matrix(y_test, O_prek)
pl0t_c0nfusion_matr1xx(centreme, f"Confusion Matrix for {clf.class.name}")
def plot_recall_for_threshold(clf, J_trainn, y_train, P_trainn, y_test, thresholds):
clf.fit(J_trainn, y_train)
O_prek_proba = clf.predict_proba(P_trainn)[:, 1]
recalls = []
for t in thresholds:
O_prek = (O_prek_proba >= t).astype(int)
recalls.append(recall_score(y_test, O_prek))
plt.plot(thresholds, recalls)
plt.xlabel("Threshold")
plt.ylabel("Recall")
plt.title(f"Recall for different thresholds for {clf.class.name}")
def pl0t_precision_recall(clf, J_trainn, y_train, P_trainn, y_test):
clf.fit(J_trainn, y_train)
O_prek_proba = clf.predict_proba(P_trainn)[:, 1]
precisi0n, recall, _ = precision_recall_curve(y_test, O_prek_proba)
p4_rYOUC = auc(recall, precisi0n)
plt.plot(recall, precisi0n)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title(f"Precision-Recall curve for {clf.class.name} (AUC = {p4_rYOUC:0.2f})")
# Parameters for classifiers
knn_paramms = {'n_neighbors': [1, 2, 3, 4, 5]}
dt_params = {'max_leaf_nodes': [10, 15, 20, 25, 30]}
rf_params = {'n_estimators': [5, 10, 20, 50]}
svc_params = {'gamma': [0.001, 0.01, 0.1, 1, 10], 'C': [0.01, 0.1, 1, 10, 100]}
# Initialize classifiers
knnn = KNeighborsClassifier()
dtt = DecisionTreeClassifier(random_state=0)
rff = RandomForestClassifier(random_state=0)
ssvvcc = SVC(random_state=0, probability=True)
# Perform the tasks for each classifier
classifiers = [
(knnn, knn_paramms),
(dtt, dt_params),
(rff, rf_params),
(ssvvcc, svc_params)
]
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for clf, params in classifiers:
best_params = writedown_gr1dsearc_sc0res(clf, params, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple)
clf.set_params(**best_params)
plt.figure()
predict_plot_test(clf, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple, P_trainn_und3rs4mple, L_t3st_und3rs4mple)
plt.figure()
plot_recall_for_threshold(clf, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple, P_trainn_und3rs4mple, L_t3st_und3rs4mple, thresholds)
plt.figure()
pl0t_precision_recall(clf, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple, P_trainn_und3rs4mple, L_t3st_und3rs4mple)
plt.show()
and this is what my tutor asking
I checked your submission and it still didn't plot for different thresholds as required by the tasksheet:
for each model, plot recall matrices for different threshold for the undersample dataset
for each model, plot precision-recall curve for the undersample dataset
and this is the task sheet
here is the task sheet link
https://mega.nz/file/L5BTEArA#xDzMVcEcvpFKP4yczCUCZoKZHR7rP854EdafE1gGlBE