Question: Python code for full bayes algorithm without using libraries. I have been working on coding bayes classifier. I managed to bring out a working model

Python code for full bayes algorithm without using libraries.

I have been working on coding bayes classifier. I managed to bring out a working model of naive bayes classifier. Any suggestions on how to convert it to full bayes classifier? I have written down various functions for printing means, covariance matrix and probabilities.

The current model works well and I am able to train and predict various values from csv files. But I would need to convert this to full bayes for fitting my needs better.

Code :

import csv

import random

import math

import pandas as pd

df = pd.read_csv('iris.txt.shuffled', sep=',', header=None)

from sklearn.preprocessing import LabelEncoder

import numpy as np

means = df.groupby([4]).mean()[[0, 1, 2, 3]]

means.columns = ['Feature 1', 'Feature 2', 'Feature 3', 'Feature 4']

print('Means: ', means, ' ')

le = LabelEncoder()

df[4] = le.fit_transform(df[4])

dataset = df.values.tolist()

print('Covariance matrix: ', np.cov(df[[0, 1, 2, 3]].T), ' ')

def loadCsv(filename):

df = pd.read_csv('iris.txt.shuffled', sep=',', header=None)

print('Class probabilities: ', df[4].value_counts()/df.shape[0], ' ')

le = LabelEncoder()

df[4] = le.fit_transform(df[4])

dataset = df.values.tolist()

return dataset

def splitDataset(dataset, splitRatio):

trainSize = int(len(dataset) * splitRatio)

trainSet = []

copy = list(dataset)

while len(trainSet) < trainSize:

index = random.randrange(len(copy))

trainSet.append(copy.pop(index))

return [trainSet, copy]

def separateByClass(dataset):

separated = {}

for i in range(len(dataset)):

vector = dataset[i]

if (vector[-1] not in separated):

separated[vector[-1]] = []

separated[vector[-1]].append(vector)

return separated

def mean(numbers):

return sum(numbers)/float(len(numbers))

def stdev(numbers):

avg = mean(numbers)

variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)

return math.sqrt(variance)

def summarize(dataset):

summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]

del summaries[-1]

return summaries

def summarizeByClass(dataset):

separated = separateByClass(dataset)

summaries = {}

for classValue, instances in zip(separated.keys(), separated.values()):

summaries[classValue] = summarize(instances)

return summaries

def calculateProbability(x, mean, stdev):

exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))

return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

def calculateClassProbabilities(summaries, inputVector):

probabilities = {}

for classValue, classSummaries in zip(summaries.keys(), summaries.values()):

probabilities[classValue] = 1

for i in range(len(classSummaries)):

mean, stdev = classSummaries[i]

x = inputVector[i]

probabilities[classValue] *= calculateProbability(x, mean, stdev)

return probabilities

def predict(summaries, inputVector):

probabilities = calculateClassProbabilities(summaries, inputVector)

bestLabel, bestProb = None, -1

for classValue, probability in zip(probabilities.keys(), probabilities.values()):

if bestLabel is None or probability > bestProb:

bestProb = probability

bestLabel = classValue

return bestLabel

def getPredictions(summaries, testSet):

predictions = []

for i in range(len(testSet)):

result = predict(summaries, testSet[i])

predictions.append(result)

return predictions

def getAccuracy(testSet, predictions):

correct = 0

for i in range(len(testSet)):

if testSet[i][-1] == predictions[i]:

correct += 1

return (correct/float(len(testSet))) * 100.0

def main():

filename = 'iris'

splitRatio = 0.80

dataset = loadCsv(filename)

trainingSet, testSet = splitDataset(dataset, splitRatio)

print('Split {} rows into train={} and test={} rows'.format(len(dataset), len(trainingSet), len(testSet)))

# prepare model

summaries = summarizeByClass(trainingSet)

# test model

predictions = getPredictions(summaries, testSet)

accuracy = getAccuracy(testSet, predictions)

print('Accuracy: {}%'.format(accuracy))

main()

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!