Question: void calculateProbabilities ( const std::vector& train, double probabilities [ MAX _ CLASS ] [ MAX _ COLS - 1 ] [ 3 ] , double

void calculateProbabilities(const std::vector& train, double probabilities[MAX_CLASS][MAX_COLS -1][3], double classPriors[MAX_CLASS], double lambda){
int classCounts[MAX_CLASS]={0};
for (int c =0; c < MAX_CLASS; ++c){
for (int i =0; i < MAX_COLS -1; ++i){
for (int v =0; v <3; ++v){
probabilities[c][i][v]= lambda;
}
}
}
for (const auto& record : train){
classCounts[record.classLabel]++;
for (size_t i =0; i < record.attributes.size(); ++i){
if (record.attributes[i]!= MISSING){
probabilities[record.classLabel][i][record.attributes[i]]++;
}
}
}
for (int c =0; c < MAX_CLASS; ++c){
classPriors[c]= static_cast(classCounts[c])/ train.size();
for (int i =0; i < MAX_COLS -1; ++i){
double total = classCounts[c]+3* lambda;
for (int v =0; v <3; ++v){
probabilities[c][i][v]/= total;
}
}
}
}
ClassLabel naiveBayesPredict(const Record& record, const double probabilities[MAX_CLASS][MAX_COLS -1][3], const double classPriors[MAX_CLASS]){
double logProbs[MAX_CLASS]={ std::log(classPriors[DEMOCRAT]), std::log(classPriors[REPUBLICAN])};
for (int c =0; c < MAX_CLASS; ++c){
for (size_t i =0; i < record.attributes.size(); ++i){
if (record.attributes[i]!= MISSING){
logProbs[c]+= std::log(probabilities[c][i][record.attributes[i]]);
}
}
}
if (logProbs[DEMOCRAT]> logProbs[REPUBLICAN]){
return DEMOCRAT;
}
else {
return REPUBLICAN;
}
}
void evaluateModel(const std::vector& data, const double probabilities[MAX_CLASS][MAX_COLS -1][3], const double classPriors[MAX_CLASS], double& accuracy){
int correct =0;
for (const auto& record : data){
if (naiveBayesPredict(record, probabilities, classPriors)== record.classLabel){
correct++;
}
}
accuracy = static_cast(correct)/ data.size();
}
void crossValidate(const std::vector& records, double lambda, double& averageAccuracy, double& stdDev, std::vector& foldAccuracies){
size_t foldSize = records.size()/10;
foldAccuracies.clear();
for (int i =0; i <10; ++i){
std::vector train, test;
for (size_t j =0; j < records.size(); ++j){
if (j >= i * foldSize && j <(i +1)* foldSize){
test.push_back(records[j]);
}
else {
train.push_back(records[j]);
}
}
double probabilities[MAX_CLASS][MAX_COLS -1][3]={};
double classPriors[MAX_CLASS]={};
calculateProbabilities(train, probabilities, classPriors, lambda);
double foldAccuracy;
evaluateModel(test, probabilities, classPriors, foldAccuracy);
foldAccuracies.push_back(foldAccuracy);
}
double sum =0.0;
for (size_t i =0; i < foldAccuracies.size(); ++i){
sum += foldAccuracies[i];
}
double mean = sum / foldAccuracies.size();
double sqSum =0.0;
for (size_t i =0; i < foldAccuracies.size(); ++i){
sqSum +=(foldAccuracies[i]- mean)*(foldAccuracies[i]- mean);
}
stdDev = std::sqrt(sqSum / foldAccuracies.size());
averageAccuracy = mean;
}
Optimize this C++ code for Naive Bayes Classification of republicans and democrats dataset. Can you make it more understandable and clear, and to be connected? In this particular code is implemented naive bayes with Laplas and logarithm for handling the unexpected errors and deviations

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!