Question: could someone help me change all the names and function names and make this code very unique since i get high plagrism import pandas

could someone help me change all the names and function names and make this code very unique since i get high plagrism

 

import pandas as pd

import numpy as np

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.metrics import confusion_matrix, recall_score, precision_recall_curve, auc

import matplotlib.pyplot as plt

import seaborn as sns


 

# Load the dataset

data_path = r'C:\Users\john3\Desktop\cyber security analytics sit 384\10.1HD\creditcard.csv'

data = pd.read_csv(data_path)


 

# Preprocess the dataset

scaler = StandardScaler()

data['scaled_amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))

data['scaled_time'] = scaler.fit_transform(data['Time'].values.reshape(-1, 1))

data.drop(['Time', 'Amount'], axis=1, inplace=True)


 

# Split the dataset into train and test sets

X = data.drop('Class', axis=1)

y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)


 

# Create the undersampled dataset

fraud_indices = np.array(data[data.Class == 1].index)

normal_indices = np.array(data[data.Class == 0].index)

undersample_size = len(fraud_indices)

random_normal_indices = np.random.choice(normal_indices, undersample_size, replace=False)

random_normal_indices = np.array(random_normal_indices)

undersampled_indices = np.concatenate([fraud_indices, random_normal_indices])


 

undersampled_data = data.iloc[undersampled_indices, :]

X_undersampled = undersampled_data.drop('Class', axis=1)

y_undersampled = undersampled_data['Class']


 

# Split the undersampled dataset into train and test sets

X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersampled, y_undersampled, test_size=0.3, random_state=0)

def print_gridsearch_scores(clf, param, x_train, y_train):

    grid_clf = GridSearchCV(clf, param, scoring='recall', cv=5)

    grid_clf.fit(x_train, y_train)

   

    print(f"Best parameters: {grid_clf.best_params_}")

    print(f"Best score: {grid_clf.best_score_}")

    return grid_clf.best_params_


 

def plot_confusion_matrix(cm, title):

    sns.heatmap(cm, annot=True, cmap="YlGnBu", fmt='d', linewidths=.5)

    plt.xlabel("Predicted")

    plt.ylabel("True")

    plt.title(title)


 

def predict_plot_test(clf, x_train, y_train, x_test, y_test):

    clf.fit(x_train, y_train)

    y_pred = clf.predict(x_test)

   

    cm = confusion_matrix(y_test, y_pred)

    plot_confusion_matrix(cm, f"Confusion Matrix for {clf.__class__.__name__}")


 

def plot_recall_for_threshold(clf, x_train, y_train, x_test, y_test, thresholds):

    clf.fit(x_train, y_train)

    y_pred_proba = clf.predict_proba(x_test)[:, 1]

   

    recalls = []

    for t in thresholds:

        y_pred = (y_pred_proba >= t).astype(int)

        recalls.append(recall_score(y_test, y_pred))

   

    plt.plot(thresholds, recalls)

    plt.xlabel("Threshold")

    plt.ylabel("Recall")

    plt.title(f"Recall for different thresholds for {clf.__class__.__name__}")


 

def plot_precision_recall(clf, x_train, y_train, x_test, y_test):

    clf.fit(x_train, y_train)

    y_pred_proba = clf.predict_proba(x_test)[:, 1]

    precision, recall, _ = precision_recall_curve(y_test, y_pred_proba)

    pr_auc = auc(recall, precision)


 

    plt.plot(recall, precision)

    plt.xlabel("Recall")

    plt.ylabel("Precision")

    plt.title(f"Precision-Recall curve for {clf.__class__.__name__} (AUC = {pr_auc:0.2f})")


 

# Parameters for classifiers

knn_params = {'n_neighbors': [1, 2, 3, 4, 5]}

dt_params = {'max_leaf_nodes': [10, 15, 20, 25, 30]}

rf_params = {'n_estimators': [5, 10, 20, 50]}

svc_params = {'gamma': [0.001, 0.01, 0.1, 1, 10], 'C': [0.01, 0.1, 1, 10, 100]}


 

# Initialize classifiers

knn = KNeighborsClassifier()

dt = DecisionTreeClassifier(random_state=0)

rf = RandomForestClassifier(random_state=0)

svc = SVC(random_state=0, probability=True)


 

# Perform the tasks for each classifier

classifiers = [

    (knn, knn_params),

    (dt, dt_params),

    (rf, rf_params),

    (svc, svc_params)

]


 

thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]


 

for clf, params in classifiers:

    best_params = print_gridsearch_scores(clf, params, X_train_undersample, y_train_undersample)

    clf.set_params(**best_params)

   

    plt.figure()

    predict_plot_test(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)

   

    plt.figure()

    plot_recall_for_threshold(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample, thresholds)

   

    plt.figure()

    plot_precision_recall(clf, X_train_undersample, y_train_undersample, X_test_undersample, y_test_undersample)

   

plt.show()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!