Question: #include pch . h #include MarkovChain.h #include #include #include #include #include #include #include #include / / For std::min using namespace std; string

#include "pch.h"
#include "MarkovChain.h"
#include
#include
#include
#include
#include
#include
#include
#include // For std::min
using namespace std;
string pickRandomWord(const wordsAndCounts_t& wordsAndCounts){
unsigned int totalTickets =0;
for (const auto& pair : wordsAndCounts){
totalTickets += pair.second;
}
if (totalTickets ==0){
throw runtime_error("No words to pick from.");
}
unsigned int winningTicket = rand()% totalTickets;
unsigned int currentTicket =0;
for (const auto& pair : wordsAndCounts){
currentTicket += pair.second;
if (winningTicket < currentTicket){
return pair.first;
}
}
throw runtime_error("Failed to pick a random word.");
}
void MarkovChain::initializeChains(const vector& words){
size_t end =(order ==0)? words.size() : words.size()- order;
for (size_t i =0; i < end; ++i){
vector key;
// Create N-grams if order isn't 0
if (order >0){
key.assign(words.begin()+ i, words.begin()+ i + order);
}
// Determine the next word and increment the chain
string nextWord =(order ==0)? words[i] : words[i + order];
chains[key][nextWord]++;
}
// For order 0, the key should be an empty vector
if (order ==0){
vector emptyKey;
for (const auto& word : words){
chains[emptyKey][word]++;
}
}
}
string MarkovChain::generateText(const unsigned int length) const {
if (chains.empty()){
throw EmptyMarkovChainException(); // Throw exception if chains are not initialized
}
vector generatedText;
unsigned int maxLength = min(length,100U);
if (order ==0){
// Directly generate random words from the source text for 0-grams
const auto& wordMap = chains.at({});
if (wordMap.empty()){
throw runtime_error("No words available for zero-order generation.");
}
vector allWords;
for (const auto& pair : wordMap){
for (unsigned int i =0; i < pair.second; ++i){
allWords.push_back(pair.first);
}
}
for (unsigned int i =0; i < maxLength; ++i){
string nextWord = allWords[rand()% allWords.size()];
generatedText.push_back(nextWord);
}
}
else {
// Initialize with a random n-gram
auto it = chains.begin();
advance(it, rand()% chains.size());
vector currentNgram = it->first;
generatedText.insert(generatedText.end(), currentNgram.begin(), currentNgram.end());
while (generatedText.size()< maxLength){
auto nextWordsIt = chains.find(currentNgram);
if (nextWordsIt == chains.end()|| nextWordsIt->second.empty()){
break; // No possible next words
}
string nextWord = pickRandomWord(nextWordsIt->second);
generatedText.push_back(nextWord);
// Cycle to the next n-gram
currentNgram.erase(currentNgram.begin());
currentNgram.push_back(nextWord);
}
}
// Join generated words into a single string
stringstream result;
for (size_t i =0; i < generatedText.size(); ++i){
if (i !=0){
result <<"";
}
result << generatedText[i];
}
return result.str();
}
vector readCorpus(const string fileName){
ifstream file(fileName);
if (!file.is_open()){
cerr << "Unable to open file" << endl;
throw runtime_error("Could not open file");
}
vector words;
string word;
while (file >> word){
words.push_back(word);
}
file.close();
return words;
}TEST(GenerationTest, RejectsOverfitting){
srand(0);
MarkovChain mc(10);
const vector words = readCorpus("corpus_small.txt");
mc.initializeChains(words);
ASSERT_THROW({
try {
mc.generateText(100);
} catch (const OverfittingException& e){
// and this tests that it has the correct message
ASSERT_STREQ("Order was too high! Overfitting occured. Only one candidate was available at each step.", e.what());
throw;
}
}, OverfittingException);
}
The code is failing these two test
TEST(GenerationTest, Generate0Grams){
srand(0);
MarkovChain mc(0);
const vector words = readCorpus("corpus_small.txt");
mc.initializeChains(words);
const auto& chains = mc.getChains();
ASSERT_EQ(chains.size(),1)<< "Expected 0-grams chains to have a single key.";
const auto numUniqueWords = set(words.begin(), words.end()).size();
const auto onlyValueSize = chains.at({}).size();
ASSERT_EQ(numUniqueWords, onlyValueSize)<<"0-grams chain empty vector value should be the size of the words, which is "<< numU

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!