Question: hey guys this is my code import pandas as pd import numpy as np from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble

hey guys this is my code  
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, recall_score, precision_recall_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
dataaa_path = r'C:\Users\john3\Desktop\cyber security analytics sit 384\10.1HD\creditcard.csv'
hahadata = pd.read_csv(dataaa_path)

# Preprocess the dataset
amirscaler = StandardScaler()
hahadata['scaled_amount'] = amirscaler.fit_transform(hahadata['Amount'].values.reshape(-1, 1))
hahadata['scaled_time'] = amirscaler.fit_transform(hahadata['Time'].values.reshape(-1, 1))
hahadata.drop(['Time', 'Amount'], axis=1, inplace=True)

# Spli1t th3e dataset int0 tra1n an9 t3st s3ts
M = hahadata.drop('Class', axis=1)
N = hahadata['Class']
J_trainn, P_trainn, K_trainn, o_trainn = train_test_split(M, N, test_size=0.3, random_state=0)

# Cr3ate th3 und3rs4mpled daatas3t
hackingfraudd_incidens = np.array(hahadata[hahadata.Class == 1].index)
regular_accident = np.array(hahadata[hahadata.Class == 0].index)
undersample_size = len(hackingfraudd_incidens)
random_regular_accident = np.random.choice(regular_accident, undersample_size, replace=False)
random_regular_accident = np.array(random_regular_accident)
undersampled_indices = np.concatenate([hackingfraudd_incidens, random_regular_accident])

und3rs4mple_d4t4 = hahadata.iloc[undersampled_indices, :]
M_undersampled = und3rs4mple_d4t4.drop('Class', axis=1)
N_undersampled = und3rs4mple_d4t4['Class']

# Spl1t th3 undersampled d4t4set int0 train an9 test s3ts
J_tra3n_und3rs4ample, P_trainn_und3rs4mple, N_tra1n_und3rs4mple, L_t3st_und3rs4mple = train_test_split(M_undersampled, N_undersampled, test_size=0.3, random_state=0)
def writedown_gr1dsearc_sc0res(clf, param, J_trainn, y_train):
    grid_clf = GridSearchCV(clf, param, scoring='recall', cv=5)
    grid_clf.fit(J_trainn, y_train)

    print(f"greatest parameterrs: {grid_clf.best_params_}")
    print(f"greatest sc0re: {grid_clf.best_score_}")
    return grid_clf.best_params_

def pl0t_c0nfusion_matr1xx(cm, title):
    sns.heatmap(cm, annot=True, cmap="YlGnBu", fmt='d', linewidths=.5)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title(title)

def predict_plot_test(clf, J_trainn, y_train, P_trainn, y_test):
    clf.fit(J_trainn, y_train)
    O_prek = clf.predict(P_trainn)

    centreme = confusion_matrix(y_test, O_prek)
    pl0t_c0nfusion_matr1xx(centreme, f"Confusion Matrix for {clf.class.name}")

def plot_recall_for_threshold(clf, J_trainn, y_train, P_trainn, y_test, thresholds):
    clf.fit(J_trainn, y_train)
    O_prek_proba = clf.predict_proba(P_trainn)[:, 1]

    recalls = []
    for t in thresholds:
        O_prek = (O_prek_proba >= t).astype(int)
        recalls.append(recall_score(y_test, O_prek))

    plt.plot(thresholds, recalls)
    plt.xlabel("Threshold")
    plt.ylabel("Recall")
    plt.title(f"Recall for different thresholds for {clf.class.name}")

def pl0t_precision_recall(clf, J_trainn, y_train, P_trainn, y_test):
    clf.fit(J_trainn, y_train)
    O_prek_proba = clf.predict_proba(P_trainn)[:, 1]
    precisi0n, recall, _ = precision_recall_curve(y_test, O_prek_proba)
    p4_rYOUC = auc(recall, precisi0n)

    plt.plot(recall, precisi0n)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title(f"Precision-Recall curve for {clf.class.name} (AUC = {p4_rYOUC:0.2f})")

# Parameters for classifiers
knn_paramms = {'n_neighbors': [1, 2, 3, 4, 5]}
dt_params = {'max_leaf_nodes': [10, 15, 20, 25, 30]}
rf_params = {'n_estimators': [5, 10, 20, 50]}
svc_params = {'gamma': [0.001, 0.01, 0.1, 1, 10], 'C': [0.01, 0.1, 1, 10, 100]}
# Initialize classifiers
knnn = KNeighborsClassifier()
dtt = DecisionTreeClassifier(random_state=0)
rff = RandomForestClassifier(random_state=0)
ssvvcc = SVC(random_state=0, probability=True)

# Perform the tasks for each classifier
classifiers = [
    (knnn, knn_paramms),
    (dtt, dt_params),
    (rff, rf_params),
    (ssvvcc, svc_params)
]

thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

for clf, params in classifiers:
    best_params = writedown_gr1dsearc_sc0res(clf, params, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple)
    clf.set_params(**best_params)

    plt.figure()
    predict_plot_test(clf, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple, P_trainn_und3rs4mple, L_t3st_und3rs4mple)

    plt.figure()
    plot_recall_for_threshold(clf, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple, P_trainn_und3rs4mple, L_t3st_und3rs4mple, thresholds)

    plt.figure()
    pl0t_precision_recall(clf, J_tra3n_und3rs4ample, N_tra1n_und3rs4mple, P_trainn_und3rs4mple, L_t3st_und3rs4mple)

plt.show()

and this is what my tutor asking 
I checked your submission and it still didn't plot for different thresholds as required by the tasksheet:

for each model, plot recall matrices for different threshold for the undersample dataset

for each model, plot precision-recall curve for the undersample dataset
and this is the task sheet 

here is the task sheet link
https://mega.nz/file/L5BTEArA#xDzMVcEcvpFKP4yczCUCZoKZHR7rP854EdafE1gGlBE

 

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!