Question: Hyperparameter Tuning section of the code not working import pandas as pd import seaborn as sns import matplotlib.pyplot as plt # Import numpy and give

Hyperparameter Tuning section of the code not working
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Import numpy and give it the alias np
import numpy as np
# Load dataset
data = pd.read_csv('/content/drive/MyDrive/Cancer_Data.csv')
# Display basic information
print(data.info())
print(data.describe())
# Visualize class distribution
sns.countplot(x='diagnosis', data=data)
plt.title('Distribution of Malignant and Benign Tumors')
plt.show()
# Handle missing values
# Exclude non-numeric columns from mean calculation
numeric_data = data.select_dtypes(include=np.number)
# Replace infinite values with NaN
numeric_data.replace([np.inf, -np.inf], np.nan, inplace=True)
# Calculate mean without infinite values
# Check if there are any columns with all values as NaN after replacing inf
for col in numeric_data.columns:
if numeric_data[col].isnull().all():
# Handle columns with all NaN values - here, we drop the column
numeric_data.drop(col, axis=1, inplace=True)
data.drop(col, axis=1, inplace=True)
else:
data[col]= numeric_data[col].fillna(numeric_data[col].mean())
# Encode categorical variables
data['diagnosis']= data['diagnosis'].map({'M': 1,'B': 0}) # M = malignant, B = benign
#Normalize numerical features to ensure they contribute equally to distance calculations in SVC and Random Forest.
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = data.drop('diagnosis', axis=1)
#Splitting the Data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, data['diagnosis'], test_size=0.2, random_state=42)
#Fit and transform the scaler on the training data only
X_train_scaled = scaler.fit_transform(X_train)
#Transform the test data using the scaler fit on the training data
X_test_scaled = scaler.transform(X_test)
#Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV
# Define parameter grids
param_grid_svc ={'C': [0.1,1,10], 'kernel': ['linear','rbf']}
param_grid_rf ={'n_estimators': [50,100], 'max_depth': [None,10]}
# Create GridSearchCV objects
grid_svc = GridSearchCV(SVC(), param_grid_svc, cv=5)
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5)
# Fit models with grid search
grid_svc.fit(X_train, y_train)
grid_rf.fit(X_train, y_train)
#Model Evaluation
#Import the SVC class
from sklearn.svm import SVC
#Create an SVC model
svc_model = SVC()
#Train the model
svc_model.fit(X_train_scaled, y_train)
# Import the RandomForestClassifier class
from sklearn.ensemble import RandomForestClassifier
# Create a RandomForestClassifier model
rf_model = RandomForestClassifier(random_state=42) # Add a random state for reproducibility
# Train the model
rf_model.fit(X_train_scaled, y_train)
from sklearn.metrics import accuracy_score, classification_report
# Predictions from each model
svc_pred = svc_model.predict(X_test_scaled) # Predict on the scaled test data
rf_pred = rf_model.predict(X_test_scaled) # Predict on the scaled test data
# Evaluate models
print("SVC Classification Report:
", classification_report(y_test, svc_pred))
print("Random Forest Classification Report:
", classification_report(y_test, rf_pred))

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!