Question: package index; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import comparators.TfIdfComparator; import documents.DocumentId; /** * A

package index;

import java.io.IOException;

import java.io.Reader;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.HashSet;

import java.util.List;

import java.util.Map;

import java.util.Set;

import comparators.TfIdfComparator;

import documents.DocumentId;

/**

* A simplified document indexer and search engine.

*

* Documents are added to the engine one-by-one, and uniquely identified by a DocumentId.

*

* Documents are internally represented as "terms", which are lowercased versions of each word

* in the document.

*

* Queries for terms are also made on the lowercased version of the term. Terms are

* therefore case-insensitive.

*

* Lookups for documents can be done by term, and the most relevant document(s) to a specific term

* (as computed by tf-idf) can also be retrieved.

*

* See:

* -

* -

* -

*

* @author Marc Liberatore

*

*/

public class SearchEngine {

public Map search = new HashMap();

/**

* Inserts a document into the search engine for later analysis and retrieval.

*

* The document is uniquely identified by a documentId; attempts to re-insert the same

* document are ignored.

*

* The document is supplied as a Reader; this method stores the document contents for

* later analysis and retrieval.

*

* @param documentId

* @param reader

* @throws IOException iff the reader throws an exception

*/

public void addDocument(DocumentId documentId, Reader reader) throws IOException {

String Document = "";

int x = reader.read();

while(x != -1){

Document += (char)x;

x = reader.read();

}

Document = Document.toLowerCase();

DocumentId p = search.get(documentId);

if(p ==null){

search.put(Document, documentId);

}

}

/**

* Returns the set of DocumentIds contained within the search engine that contain a given term.

*

* @param term

* @return the set of DocumentIds that contain a given term

*/

public Set indexLookup(String term) {

Set t = new HashSet();

String k = term.toLowerCase();

for(String doc: search.keySet()){

if(doc.contains(k)){

t.add(search.get(doc));

}

}

return t;

}

/**

* Returns the term frequency of a term in a particular document.

*

* The term frequency is number of times the term appears in a document.

*

* See

* @param documentId

* @param term

* @return the term frequency of a term in a particular document

* @throws IllegalArgumentException if the documentId has not been added to the engine

*/

public int termFrequency(DocumentId documentId, String term) throws IllegalArgumentException {

if(!search.containsValue(documentId)){

throw new IllegalArgumentException();

}

term.toLowerCase();

String Document = "";

for(String i: search.keySet()){

if(search.get(i).equals(documentId)){

Document = i;

break;

}

}

int count = 0;

int i = Document.indexOf(term);

while(i !=-1){

count++;

Document = Document.substring(i+term.length());

i = Document.indexOf(term);

}

return count;

}

/**

* Returns the inverse document frequency of a term across all documents in the index.

*

* For our purposes, IDF is defined as log ((1 + N) / (1 + M)) where

* N is the number of documents in total, and M

* is the number of documents where the term appears.

*

* @param term

* @return the inverse document frequency of term

*/

public double inverseDocumentFrequency(String term) {

double searchSize = search.size(), mTotal = 0;

term.toLowerCase();

for(String Document: search.keySet()){

if(Document.indexOf(term) != -1){

mTotal++;

}

}

double value = (double)Math.log((double)(1+searchSize) / (double)(1+ mTotal));

return (double)value;

}

/**

* Returns the tfidf score of a particular term for a particular document.

*

* tfidf is the product of term frequency and inverse document frequency for the given term and document.

*

* @param documentId

* @param term

* @return the tfidf of the the term/document

* @throws IllegalArgumentException if the documentId has not been added to the engine

*/

public double tfIdf(DocumentId documentId, String term) throws IllegalArgumentException {

if(!search.containsValue(documentId)){

throw new IllegalArgumentException();

}

return termFrequency(documentId, term) * inverseDocumentFrequency(term);

}

/**

* Returns a sorted list of documents, most relevant to least relevant, for the given term.

*

* A document with a larger tfidf score is more relevant than a document with a lower tfidf score.

*

* Each document in the returned list must contain the term.

*

* @param term

* @return a list of documents sorted in descending order by tfidf

*/

public List relevanceLookup(String term) {

List Documents = new ArrayList();

Set dc = indexLookup(term);

TfIdfComparator comparekek = new TfIdfComparator(this, term);

for(DocumentId h: dc){

Documents.add(h);

for(int i = 0; i< Documents.size(); i++){

if(comparekek.compare(h, Documents.get(i)) <= 0){

Documents.add(i, h);

break;

}

if(i == Documents.size()-1)

Documents.add(h);

if(Documents.size() == 0){

Documents.add(h);

}

}

}

Documents.sort(comparekek);

return Documents;

}

}

-------------------------------------------------------------------------------------------------------------------------------------

package comparators;

import documents.DocumentId;

import index.SearchEngine;

import java.util.Comparator;

/**

* Compare two documents in a search engine by tf-idf using a given term.

*

* Using this comparator, the *larger* item should "come before" a smaller one so

* that sort returns the list in descending (largest-to-smallest) order.

*

* It breaks ties by using the lexicographic ordering of the document IDs (that is, by using

* o1.id.compareTo(o2.id)).

*

* @author liberato

*

*/

public class TfIdfComparator implements Comparator {

private final SearchEngine searchEngine;

private final String term;

public TfIdfComparator(SearchEngine searchEngine, String term) {

this.searchEngine = searchEngine;

this.term = term;

}

@Override

public int compare(DocumentId o1, DocumentId o2) {

return 0;

}

}

--------------------------------

I cannot figure out how to implement the compare method in the second class, so that it works with the SearchEngine class

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!