Question: help make my code more efficient for long speeches 2. identify unique word-pairs and calculate their frequencies. bool wordpairMapping( vector & sentences, map < pair
help make my code more efficient for long speeches
2. identify unique word-pairs and calculate their frequencies.
bool wordpairMapping( vector& sentences, map< pair , int> &wordpairFreq_map);
Given a list of sentences stored in the first argument sentences, this function identifies all the all the unique word-pairs and each word-pair's frequency. The identified (word-pair, frequency)'s will be stored into wordpariFreq_map, which is a map of (key, value) pairs. The key of this map a word-pair and the value is the frequency of this word-pair. This function will return true if the mapping is successful; false otherwise.
Note that
Tokens are case insensitive. We will consider lower case in this project. Whitespaces will be the token delimiter.
The two words in a word-pair are different. For example, event though the first sentence above contains two the, you are not going to construct a word pair
Order does not matter between two words in a word-pair. For example, the word-pair
Suggestions:
Use istringstream to tokenize a sentence.
Use set to store all the unique tokens identified in a sentence.
Assume sentences consists of the following 3 sentences:
The first story is about connecting the dots. The first story is about connecting the dots. The first story is about connecting the dots.
This function is going to identify a total of 21 word-pairs as follows:
: 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3 : 3
My code:
bool wordpairMapping( vector<string>& sentences, map< pair<string,string>, int> &wordpairFreq_map){
bool found = false;
int count =0;
vector<string> vec;
vector<string> vect;
vector<string> tokens;
for(unsigned int i = 0; i < sentences.size(); i ++){
string str = sentences[i];
char *cstr = new char[str.length() + 1];
strcpy(cstr, str.c_str());
char * pch;
pch = strtok (cstr,".");
while (pch != NULL)
{
tokens.push_back(pch);
pch = strtok (NULL, ".");
}
}
map < pair<string, string>, int > :: iterator it;
string tempo;
string line ="";
for(unsigned int i =0; i < tokens.size(); i++){
line = tokens[i];
for (int j =0; j < line.length(); j++){
if(line[j] == '-'){
line.erase(line.begin()+j);
}
}
vect.push_back(line);
}
int temp =0;
for(unsigned int i =0; i < vect.size(); i++){
vector<string> str;
string line = vect[i];
for(int i =0; i < line.length(); i ++){
if(!isspace(line[i])){
temp =i;
while(!isspace(line[i]))
i++;
string word = line.substr(temp, i-temp);
transform(word.begin(), word.end(), word.begin(), ::tolower);
str.push_back(word);
}
}
for(unsigned int i =0; i < str.size(); i++){
for(int j =i+1; j < str.size(); j++){
if((str[j] < str[i]) && (str[i] != str[j])){
tempo = str[i];
str[i] = str[j];
str[j] = tempo;
}
}
}
for(unsigned int i =0; i < str.size(); i++){
for(unsigned int j =i+1; j < str.size(); j++){
if(str[i] == str[j]){
str.erase(str.begin()+j);
}
}
}
for( unsigned int k =0; k < str.size(); k ++){
for(unsigned int l =k+1; l < str.size(); l++){
if(str[k] != str[l]){
bool check = true;
pair<string, string> p;
p.first = str[k];
p.second = str[l];
if(!wordpairFreq_map.empty()){
for(it = wordpairFreq_map.begin(); it != wordpairFreq_map.end(); it++){
if((it->first.first == p.first && it->first.second == p.second )|| (it->first.second == p.first &&it->first.first == p.second) ){
it->second++;
check = false;
count ++;
}
}
if(check) {
wordpairFreq_map.insert(make_pair(make_pair(p.first, p.second), 1));
count++;
}
}
if(wordpairFreq_map.empty()) {
wordpairFreq_map.insert(make_pair(make_pair(p.first, p.second), 1));
count++;
}
}
}
}
found = true;
}
return found;
}
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
