Question: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model _ selection import train _ test _

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
# Load dataset
df = pd.read_csv('heart_disease_uci.csv')
# Check the first few rows of the dataset
df.head()
# # Get basic information and summary statistics
df.info()
df.describe()
# # Check Data Types and Missing Values
# Data types and missing values
df.info()
# Checking for missing values
df.isnull().sum()
# # Distribution of Age
plt.figure(figsize=(10,6))
sns.histplot(df['age'], bins=20, kde=True, color='blue')
plt.title('Age Distribution')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()
# # Heart Disease by Sex
plt.figure(figsize=(8,5))
sns.countplot(x='sex', hue='num', data=df)
plt.title('Heart Disease by Gender')
plt.xlabel('Sex (0= Female, 1= Male)')
plt.ylabel('Count')
plt.legend(title='Heart Disease', labels=['No', 'Yes'])
plt.show()
# # Chest Pain Type (cp) Distribution
plt.figure(figsize=(8,5))
sns.countplot(x='cp', data=df)
plt.title('Chest Pain Type Distribution')
plt.xlabel('Chest Pain Type (0-3)')
plt.ylabel('Count')
plt.show()
# # Correlation Heatmap
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Load dataset
df = pd.read_csv('heart_disease_uci.csv')
# Convert categorical columns to numeric
df['sex']= df['sex'].map({'Male': 1, 'Female': 0})
df['cp']= pd.Categorical(df['cp']).codes
df['thal']= pd.Categorical(df['thal']).codes
df['fbs']= df['fbs'].map({True: 1, False: 0})
df['dataset']= pd.Categorical(df['dataset']).codes # Convert 'dataset' column to numeric if needed
# Convert other categorical columns
df['restecg']= pd.Categorical(df['restecg']).codes
df['slope']= pd.Categorical(df['slope']).codes
# Drop non-numeric columns if any remain
df = df.select_dtypes(include=[np.number])
# Create the correlation matrix
plt.figure(figsize=(12,8))
corr_matrix = df.corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()
# # Resting Blood Pressure (trestbps) Distribution
plt.figure(figsize=(10,6))
sns.histplot(df['trestbps'], bins=20, kde=True, color='green')
plt.title('Resting Blood Pressure Distribution')
plt.xlabel('Resting Blood Pressure (mm Hg)')
plt.ylabel('Count')
plt.show()
# # Cholesterol Levels Distribution (chol)
plt.figure(figsize=(10,6))
sns.histplot(df['chol'], bins=20, kde=True, color='red')
plt.title('Cholesterol Levels Distribution')
plt.xlabel('Cholesterol (mg/dl)')
plt.ylabel('Count')
plt.show()
# # Oldpeak Distribution by Heart Disease
plt.figure(figsize=(8,6))
sns.boxplot(x='num', y='oldpeak', data=df)
plt.title('Oldpeak Distribution by Heart Disease')
plt.xlabel('Heart Disease (0= No,1= Yes)')
plt.ylabel('Oldpeak')
plt.show()
# # Thalach (Maximum Heart Rate Achieved) vs. Age
plt.figure(figsize=(10,6))
sns.scatterplot(x='age', y='thalch', hue='num', data=df, palette='coolwarm')
plt.title('Age vs. Maximum Heart Rate Achieved (Thalach)')
plt.xlabel('Age')
plt.ylabel('Maximum Heart Rate Achieved')
plt.legend(title='Heart Disease', labels=['No', 'Yes'])
plt.show()
# # Number of Major Vessels (ca) vs. Heart Disease
plt.figure(figsize=(8,6))
sns.countplot(x='ca', hue='num', data=df, palette='Set2')
plt.title('Number of Major Vessels (ca) vs. Heart Disease')
plt.xlabel('Number of Major Vessels')
plt.ylabel('Count')
plt.legend(title='Heart Disease', labels=['No', 'Yes'])
plt.show()
# # Data Preprocessing
# # Identify Features and Target Variable
# Features and target variable
X = df.drop('num', axis=1)
y = df['num']
Kindly explain this code ?

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!