Question: Please solve this ERROR!!!!!!! KeyError Traceback ( most recent call last ) in ( ) 9 0 9 1 # Build the decision tree -

Please solve this ERROR!!!!!!!
KeyError Traceback (most recent call last)
in ()
90
91 # Build the decision tree
--->92 decision_tree = build_decision_tree(df)
93
94 # Print the decision tree
1 frames
in build_decision_tree(data, tree)
78 # Set the outcome for the current branch
79 if len(counts)==1:
--->80 tree[best_split_attribute][value]= outcomes[0]
81 else:
82 tree[best_split_attribute][value]= outcomes[index]
KeyError: 'House Type'
I checked my csv file but House Type data is well contained in the csv file.
============================
import pandas as pd
import numpy as np
# Load dataset into a pandas dataframe
df = pd.read_csv('dataset.csv')
# Define a function to calculate entropy
def entropy(target_col):
elements, counts = np.unique(target_col, return_counts=True)
probs = counts/len(target_col)
entropy = np.sum(-probs * np.log2(probs))
return entropy
# Define a function to calculate information gain
def info_gain(data, split_attribute_name, target_name="Outcome"):
# Calculate the entropy of the entire dataset
total_entropy = entropy(data[target_name])
# Calculate the values and corresponding counts for the split attribute
vals, counts = np.unique(data[split_attribute_name], return_counts=True)
# Calculate the weighted entropy of the split data
weighted_entropy = np.sum([(counts[i]/np.sum(counts))* entropy(data.where(data[split_attribute_name]==vals[i]).dropna()[target_name]) for i in range(len(vals))])
# Calculate the information gain
info_gain = total_entropy - weighted_entropy
return info_gain
# Define a function to get the best split attribute
def get_best_split(data):
# Get the list of column names
columns = list(data.columns)
# Remove the target column name
columns.remove('Outcome')
# Calculate the information gain for each column
info_gains =[info_gain(data, column) for column in columns]
# Get the index of the column with the highest information gain
best_column_index = np.argmax(info_gains)
# Return the name of the best split attribute
return columns[best_column_index]
# Define the decision tree building function
def build_decision_tree(data, tree=None):
# Get the best split attribute
best_split_attribute = get_best_split(data)
# Get the unique values for the best split attribute
values = np.unique(data[best_split_attribute])
# Create a new tree node with the best split attribute
if tree is None:
tree ={}
tree[best_split_attribute]={}
# For each value of the best split attribute, create a new branch
for value in values:
# Create a new branch for the current value
sub_data = data.where(data[best_split_attribute]== value).dropna()
# Get the most common outcome for the current branch
outcomes, counts = np.unique(sub_data['Outcome'], return_counts=True)
index = np.argmax(counts)
# Set the outcome for the current branch
if len(counts)==1:
tree[best_split_attribute][value]= outcomes[0]
else:
tree[best_split_attribute][value]= outcomes[index]
# Recursively build the subtree for the current branch
if len(sub_data.drop(columns=[best_split_attribute, 'Outcome']))>0:
subtree = build_decision_tree(sub_data.drop(columns=[best_split_attribute]),{})
tree[best_split_attribute][value]= subtree
# Return the tree
return tree
# Build the decision tree
decision_tree = build_decision_tree(df)
# Print the decision tree
decision_tree

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!