Question: import pylab import math import random class Runner(object): def _init_(self,gender,age,time): sefl.featureVec = (age, time) self.label = gender def featureDist(self, other): dist = 0.0 for i

import pylab import math import random

class Runner(object): def _init_(self,gender,age,time): sefl.featureVec = (age, time) self.label = gender def featureDist(self, other): dist = 0.0 for i in range(len(self.featureVec)): dist += abs(self.featureVex[i] - other.featureVec[i])**2 return dist**0.5 def getTime(self): return self.featureVec[1] def getAge(self): return self.featureVec[0] def getlabel(self): return self.label def getFeatures(self): return self.featureVec def _str_(self): return str(self.getAge()) + ', ' + str(self.getTime()) + ', ' + self.label

def buildMrathonExamples(fileName): data = getBMData(fileName) examples = [] for i in range(len(data['age'])): a = Runner(data['gender'][i], data['age'][i], data['time'][i]) examples.append(a) return examples

def divide80_20(examples): sampleIndices = random.sample(range(len(examples)), len(examples)//5) trainingSet, testSet = [], [] for i in range(len(examples)): if i in sampleIndices: testSet.append(examples[i]) else: trainingSet.append(examples[i]) return trainingSet, testSet

def findKNearest(examples,exampleSet, k): kNearest, distances = [], [] for i in range(k): kNearest.append(exampleSet[i]) distances.append(example.featureDist(exampleSet[i])) maxDist = max(distances) for e in exampleSet[k:]: dist = example.featureDist(e) if dist < maxDist: maxIndex = distances.index(maxDist) knearest[maxIndex] = e distances[maxIndex] = dist maxDist = max(distances) return kNearest, distances

def KNearestClassify(training, testSet, label, k): truePos, falsPos, trueNeg, falseNeg = 0, 0, 0, 0 for e in testSet: nearest, distances = findKNearest(e, training, k) numMatch = 0 for i in range(len(nearest)): if nearest[i].getLabel() == label: numMatch += 1 if numMatch > k//2: if e.getLabel() == label: truePos += 1 else: falseNeg += 1 else: if e.getLabel() != label: trueNeg += 1 else: falseNeg +=1 return truePos, falsPos, trueNeg, falseNeg

def prevalenceClassify(training, testSet, label): numWithLabel = 0 for e in trianing: if e.getLabel() == label: numWithLabel += 1 probLabel = numWithLabel/len(training) truePos, falsPos, trueNeg, falseNeg =0, 0, 0, 0 for e in testSet: if random.random() < probLabel: if e.getLabel() == label: truePos += 1 else: falsePose += 1 else: if e.getLabel() != label: trueNeg += 1 else: falseNeg += 1 return truePos, falsPos, trueNeg, falseNeg

def findK(training, minK, maxK, numFolds, label): for k in range(minK, maxK + 1, 2): score = 0.0 for i in range(numFolds): fold = random.sample(training, min(5000, len(training))) examples, testSet = divide80_20(fold) truePos, falsPos, trueNeg, falseNeg = KNearestClassify(examples, testSet, label, k) score += accuracy(truePos, falsPos, trueNeg, falseNeg) accuracies.append(score/numFolds) pylab.plot(range(minK, maxK +1, 2), accuracies) pylab.title('Average Accuracy vs k (' + str(numFolds) + ' folds)') pylab.xlabel('k') pylab.ylabel('Accuracy')

findK(training, 1, 21, 1, 'M')

producing an error:

--> 115 prevalenceClassify(training, testSet, 'M') 116 NameError: name 'training' is not defined

PLEASE HELP FIX THIS SO IT WILL RUN. THANKS! ....I am not sure what data is missing

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!