Question: # # Preprocessing Pipeline # Define preprocessing for numerical and categorical features numeric _ features = [ ' age ' , 'trestbps', 'chol', 'thalch', 'oldpeak'

# # Preprocessing Pipeline
# Define preprocessing for numerical and categorical features
numeric_features =['age', 'trestbps', 'chol', 'thalch', 'oldpeak']
categorical_features =['sex','cp','fbs', 'restecg', 'slope', 'ca', 'thal']
# Numerical pipeline
numeric_pipeline = Pipeline([
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
])
# Categorical pipeline
categorical_pipeline = Pipeline([
('imputer', SimpleImputer(strategy='most_frequent')),
('onehot', OneHotEncoder(drop='first'))
])
# Combine pipelines into a full preprocessing pipeline
preprocessor = ColumnTransformer([
('num', numeric_pipeline, numeric_features),
('cat', categorical_pipeline, categorical_features)
])
# # Split the Data
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and train model
log_reg = Pipeline([
('preprocessor', preprocessor),
('classifier', LogisticRegression(max_iter=1000))
])
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
# Evaluation
print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:
", classification_report(y_test, y_pred))
print("Confusion Matrix:
", confusion_matrix(y_test, y_pred))
# Create and train model
decision_tree = Pipeline([
('preprocessor', preprocessor),
('classifier', DecisionTreeClassifier())
])
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)
# Evaluation
print("Decision Tree")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:
", classification_report(y_test, y_pred))
print("Confusion Matrix:
", confusion_matrix(y_test, y_pred))
# # Random Forest
#
# In[42]:
# Create and train model
random_forest = Pipeline([
('preprocessor', preprocessor),
('classifier', RandomForestClassifier())
])
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
# Evaluation
print("Random Forest")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:
", classification_report(y_test, y_pred))
print("Confusion Matrix:
", confusion_matrix(y_test, y_pred))
# # Support Vector Machine (SVM)
# In[43]:
# Create and train model
svm = Pipeline([
('preprocessor', preprocessor),
('classifier', SVC(probability=True))
])
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
# Evaluation
print("Support Vector Machine")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:
", classification_report(y_test, y_pred))
print("Confusion Matrix:
", confusion_matrix(y_test, y_pred))
# # K-Nearest Neighbors (KNN)
#
# In[44]:
# Create and train model
knn = Pipeline([
('preprocessor', preprocessor),
('classifier', KNeighborsClassifier())
])
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
# Evaluation
print("K-Nearest Neighbors")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:
", classification_report(y_test, y_pred))
print("Confusion Matrix:
", confusion_matrix(y_test, y_pred))
# # Hyperparameter Tuning for Each Model
# # Logistic Regression
#
# In[47]:
# Define hyperparameters for Logistic Regression
param_grid_log_reg ={
'classifier__C': [0.1,1,10],
'classifier__solver': ['liblinear', 'saga']
}
# Set up GridSearchCV
grid_log_reg = GridSearchCV(Pipeline([
('preprocessor', preprocessor),
('classifier', LogisticRegression(max_iter=1000))
]), param_grid_log_reg, cv=5, scoring='accuracy')
# Fit GridSearchCV
grid_log_reg.fit(X_train, y_train)
# Best parameters and score
print("Best Parameters for Logistic Regression:", grid_log_reg.best_params_)
print("Best Score for Logistic Regression:", grid_log_reg.best_score_)
# Evaluate on the test set
y_pred = grid_log_reg.predict(X_test)
print("Logistic Regression Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:
", classification_report(y_test, y_pred))
print("Confusion Matrix:
", confusion_matrix(y_test, y_pred))
# # Decision Tree
# In[48]:
# Define hyperparameters for Decision Tree
param_grid_dec_tree ={
'classifier__max_depth': [None,10,20,30],
'classifier__min_samples_split': [2,5,10],
'classifier__criterion': ['gini', 'entropy']
}
# Set up GridSearchCV
grid_dec_tree = GridSearchCV(Pipeline([
('preprocessor', preprocessor),
('classifier', DecisionTreeClassifier())
]), param_grid_dec_tree, cv=5, scoring='accuracy')
# Fit GridSearchCV
grid_dec_tree.fit(X_train, y_train)
# Best parameters and score
print("Best Parameters for Decision Tree:", grid_dec_tree.best_params_)
print("Best Score for Decision Tree:", grid_dec_tree.best_score_)
Explain this code ?

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!