Question: package index; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import comparators.TfIdfComparator; import documents.DocumentId; /** * A
package index;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import comparators.TfIdfComparator;
import documents.DocumentId;
/**
* A simplified document indexer and search engine.
*
* Documents are added to the engine one-by-one, and uniquely identified by a DocumentId.
*
* Documents are internally represented as "terms", which are lowercased versions of each word
* in the document.
*
* Queries for terms are also made on the lowercased version of the term. Terms are
* therefore case-insensitive.
*
* Lookups for documents can be done by term, and the most relevant document(s) to a specific term
* (as computed by tf-idf) can also be retrieved.
*
* See:
* -
* -
* -
*
* @author Marc Liberatore
*
*/
public class SearchEngine {
public Map
/**
* Inserts a document into the search engine for later analysis and retrieval.
*
* The document is uniquely identified by a documentId; attempts to re-insert the same
* document are ignored.
*
* The document is supplied as a Reader; this method stores the document contents for
* later analysis and retrieval.
*
* @param documentId
* @param reader
* @throws IOException iff the reader throws an exception
*/
public void addDocument(DocumentId documentId, Reader reader) throws IOException {
String Document = "";
int x = reader.read();
while(x != -1){
Document += (char)x;
x = reader.read();
}
Document = Document.toLowerCase();
DocumentId p = search.get(documentId);
if(p ==null){
search.put(Document, documentId);
}
}
/**
* Returns the set of DocumentIds contained within the search engine that contain a given term.
*
* @param term
* @return the set of DocumentIds that contain a given term
*/
public Set
Set
String k = term.toLowerCase();
for(String doc: search.keySet()){
if(doc.contains(k)){
t.add(search.get(doc));
}
}
return t;
}
/**
* Returns the term frequency of a term in a particular document.
*
* The term frequency is number of times the term appears in a document.
*
* See
* @param documentId
* @param term
* @return the term frequency of a term in a particular document
* @throws IllegalArgumentException if the documentId has not been added to the engine
*/
public int termFrequency(DocumentId documentId, String term) throws IllegalArgumentException {
if(!search.containsValue(documentId)){
throw new IllegalArgumentException();
}
term.toLowerCase();
String Document = "";
for(String i: search.keySet()){
if(search.get(i).equals(documentId)){
Document = i;
break;
}
}
int count = 0;
int i = Document.indexOf(term);
while(i !=-1){
count++;
Document = Document.substring(i+term.length());
i = Document.indexOf(term);
}
return count;
}
/**
* Returns the inverse document frequency of a term across all documents in the index.
*
* For our purposes, IDF is defined as log ((1 + N) / (1 + M)) where
* N is the number of documents in total, and M
* is the number of documents where the term appears.
*
* @param term
* @return the inverse document frequency of term
*/
public double inverseDocumentFrequency(String term) {
double searchSize = search.size(), mTotal = 0;
term.toLowerCase();
for(String Document: search.keySet()){
if(Document.indexOf(term) != -1){
mTotal++;
}
}
double value = (double)Math.log((double)(1+searchSize) / (double)(1+ mTotal));
return (double)value;
}
/**
* Returns the tfidf score of a particular term for a particular document.
*
* tfidf is the product of term frequency and inverse document frequency for the given term and document.
*
* @param documentId
* @param term
* @return the tfidf of the the term/document
* @throws IllegalArgumentException if the documentId has not been added to the engine
*/
public double tfIdf(DocumentId documentId, String term) throws IllegalArgumentException {
if(!search.containsValue(documentId)){
throw new IllegalArgumentException();
}
return termFrequency(documentId, term) * inverseDocumentFrequency(term);
}
/**
* Returns a sorted list of documents, most relevant to least relevant, for the given term.
*
* A document with a larger tfidf score is more relevant than a document with a lower tfidf score.
*
* Each document in the returned list must contain the term.
*
* @param term
* @return a list of documents sorted in descending order by tfidf
*/
public List
List
Set
TfIdfComparator comparekek = new TfIdfComparator(this, term);
for(DocumentId h: dc){
Documents.add(h);
for(int i = 0; i< Documents.size(); i++){
if(comparekek.compare(h, Documents.get(i)) <= 0){
Documents.add(i, h);
break;
}
if(i == Documents.size()-1)
Documents.add(h);
if(Documents.size() == 0){
Documents.add(h);
}
}
}
Documents.sort(comparekek);
return Documents;
}
}
-------------------------------------------------------------------------------------------------------------------------------------
package comparators;
import documents.DocumentId;
import index.SearchEngine;
import java.util.Comparator;
/**
* Compare two documents in a search engine by tf-idf using a given term.
*
* Using this comparator, the *larger* item should "come before" a smaller one so
* that sort returns the list in descending (largest-to-smallest) order.
*
* It breaks ties by using the lexicographic ordering of the document IDs (that is, by using
* o1.id.compareTo(o2.id)).
*
* @author liberato
*
*/
public class TfIdfComparator implements Comparator
private final SearchEngine searchEngine;
private final String term;
public TfIdfComparator(SearchEngine searchEngine, String term) {
this.searchEngine = searchEngine;
this.term = term;
}
@Override
public int compare(DocumentId o1, DocumentId o2) {
return 0;
}
}
--------------------------------
I cannot figure out how to implement the compare method in the second class, so that it works with the SearchEngine class
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
