Question: https://www.cs.rutgers.edu/courses/112/classes/fall_2017_venugopal/progs/prog4/prog4.html package lse; import java.io.*; import java.util.*; /** * This class builds an index of keywords. Each keyword maps to a set of pages in
https://www.cs.rutgers.edu/courses/112/classes/fall_2017_venugopal/progs/prog4/prog4.html
package lse;
import java.io.*;
import java.util.*;
/**
* This class builds an index of keywords. Each keyword maps to a set of pages in
* which it occurs, with frequency of occurrence in each page.
*
*/
public class LittleSearchEngine {
/**
* This is a hash table of all keywords. The key is the actual keyword, and the associated value is
* an array list of all occurrences of the keyword in documents. The array list is maintained in
* DESCENDING order of frequencies.
*/
HashMap> keywordsIndex;
/**
* The hash set of all noise words.
*/
HashSet noiseWords;
/**
* Creates the keyWordsIndex and noiseWords hash tables.
*/
public LittleSearchEngine() {
keywordsIndex = new HashMap>(1000,2.0f);
noiseWords = new HashSet(100,2.0f);
}
/**
* Scans a document, and loads all keywords found into a hash table of keyword occurrences
* in the document. Uses the getKeyWord method to separate keywords from other words.
*
* @param docFile Name of the document file to be scanned and loaded
* @return Hash table of keywords in the given document, each associated with an Occurrence object
* @throws FileNotFoundException If the document file is not found on disk
*/
public HashMap loadKeywordsFromDocument(String docFile)
throws FileNotFoundException {
/** COMPLETE THIS METHOD **/
// following line is a placeholder to make the program compile
// you should modify it as needed when you write your code
return null;
}
/**
* Merges the keywords for a single document into the master keywordsIndex
* hash table. For each keyword, its Occurrence in the current document
* must be inserted in the correct place (according to descending order of
* frequency) in the same keyword's Occurrence list in the master hash table.
* This is done by calling the insertLastOccurrence method.
*
* @param kws Keywords hash table for a document
*/
public void mergeKeywords(HashMap kws) {
/** COMPLETE THIS METHOD **/
}
/**
* Given a word, returns it as a keyword if it passes the keyword test,
* otherwise returns null. A keyword is any word that, after being stripped of any
* trailing punctuation, consists only of alphabetic letters, and is not
* a noise word. All words are treated in a case-INsensitive manner.
*
* Punctuation characters are the following: '.', ',', '?', ':', ';' and '!'
*
* @param word Candidate word
* @return Keyword (word without trailing punctuation, LOWER CASE)
*/
public String getKeyword(String word) {
/** COMPLETE THIS METHOD **/
// following line is a placeholder to make the program compile
// you should modify it as needed when you write your code
return null;
}
/**
* Inserts the last occurrence in the parameter list in the correct position in the
* list, based on ordering occurrences on descending frequencies. The elements
* 0..n-2 in the list are already in the correct order. Insertion is done by
* first finding the correct spot using binary search, then inserting at that spot.
*
* @param occs List of Occurrences
* @return Sequence of mid point indexes in the input list checked by the binary search process,
* null if the size of the input list is 1. This returned array list is only used to test
* your code - it is not used elsewhere in the program.
*/
public ArrayList insertLastOccurrence(ArrayList occs) {
/** COMPLETE THIS METHOD **/
// following line is a placeholder to make the program compile
// you should modify it as needed when you write your code
return null;
}
/**
* This method indexes all keywords found in all the input documents. When this
* method is done, the keywordsIndex hash table will be filled with all keywords,
* each of which is associated with an array list of Occurrence objects, arranged
* in decreasing frequencies of occurrence.
*
* @param docsFile Name of file that has a list of all the document file names, one name per line
* @param noiseWordsFile Name of file that has a list of noise words, one noise word per line
* @throws FileNotFoundException If there is a problem locating any of the input files on disk
*/
public void makeIndex(String docsFile, String noiseWordsFile)
throws FileNotFoundException {
// load noise words to hash table
Scanner sc = new Scanner(new File(noiseWordsFile));
while (sc.hasNext()) {
String word = sc.next();
noiseWords.add(word);
}
// index all keywords
sc = new Scanner(new File(docsFile));
while (sc.hasNext()) {
String docFile = sc.next();
HashMap kws = loadKeywordsFromDocument(docFile);
mergeKeywords(kws);
}
sc.close();
}
/**
* Search result for "kw1 or kw2". A document is in the result set if kw1 or kw2 occurs in that
* document. Result set is arranged in descending order of document frequencies. (Note that a
* matching document will only appear once in the result.) Ties in frequency values are broken
* in favor of the first keyword. (That is, if kw1 is in doc1 with frequency f1, and kw2 is in doc2
* also with the same frequency f1, then doc1 will take precedence over doc2 in the result.
* The result set is limited to 5 entries. If there are no matches at all, result is null.
*
* @param kw1 First keyword
* @param kw1 Second keyword
* @return List of documents in which either kw1 or kw2 occurs, arranged in descending order of
* frequencies. The result size is limited to 5 documents. If there are no matches, returns null.
*/
public ArrayList top5search(String kw1, String kw2) {
/** COMPLETE THIS METHOD **/
// following line is a placeholder to make the program compile
// you should modify it as needed when you write your code
return null;
}
}
package lse;
/**
* This class encapsulates an occurrences of a keyword in a document. It stores the
* document name, and the frequency of occurrence in that document. Occurrences are
* associated with keywords in an index hash table.
*
* @author RU-NB-CS112
*
*/
public class Occurrence {
/**
* Document in which a keyword occurs.
*/
String document;
/**
* The frequency (number of times) the keyword occurs in the above document.
*/
int frequency;
/**
* Initializes this occurrence with the given document,frequency pair.
*
* @param doc Document name
* @param freq Frequency
*/
public Occurrence(String doc, int freq) {
document = doc;
frequency = freq;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
public String toString() {
return "(" + document + "," + frequency + ")";
}
}
//Docs.txtfile//
AliceCh1.txt WowCh1.txt
//AliceCh1.txtfile//
ALICE'S ADVENTURES IN WONDERLAND
Lewis Carroll
THE MILLENNIUM FULCRUM EDITION 3.0
CHAPTER I. Down the Rabbit-Hole
Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?'So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. In another moment down went Alice after it, never once considering how in the world she was to get out again.
She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.
//noisewords.txtfile//
about after all also an and another any are as at be because been before being between both but by came can come could did do does each else for from get got has had he have her here him himself his how if in into is it its just like make many me might more most much must my never now of on only or other our out over re said same see she should since so some still such take than that the their them then there these they this those through to too under up use very want was way we well were what when where which while who will with would you your a b c d e f g h i j k l m n o p q r s t u v w x y z
//WowCh1.txtfile//
The War of the Worlds
by H. G. Wells [1898]
But who shall dwell in these worlds if they be inhabited? . . . Are we or they Lords of the World? . . . And how are all things made for man?-- KEPLER (quoted in The Anatomy of Melancholy)
BOOK ONE
THE COMING OF THE MARTIANS
CHAPTER ONE
THE EVE OF THE WAR
The planet Mars, I scarcely need remind the reader, revolves about the
sun at a mean distance of 140,000,000 miles, and the light and heat it receives from the sun is barely half of that received by this world. It must be, if the nebular hypothesis has any truth, older than our world; and long before this earth ceased to be molten, life upon its surface must have begun its course. The fact that it is scarcely one seventh of the volume of the earth must have accelerated its cooling to the temperature at which life could begin. It has air and water and all that is necessary for the support of animated existence.
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
