Question: import documents.DocumentId; import index.SearchEngine; import java.util.Comparator; /** * Compare two documents in a search engine by tf-idf using a given term. * * Using this

import documents.DocumentId;

import index.SearchEngine;

import java.util.Comparator;

/**

* Compare two documents in a search engine by tf-idf using a given term.

*

* Using this comparator, the *larger* item should "come before" a smaller one

*

* It breaks ties by using the lexicographic ordering of the document IDs (that is, by using

* o1.id.compareTo(o2.id)).

*

*/

public class TfIdfComparator implements Comparator {

private final SearchEngine searchEngine;

private final String term;

public TfIdfComparator(SearchEngine searchEngine, String term) {

this.searchEngine = searchEngine;

this.term = term;

}

@Override

public int compare(DocumentId o1, DocumentId o2) {

return 0;

}

}

=========================================================================

import java.io.IOException;

import java.io.Reader;

import java.util.List;

import java.util.Set;

import comparators.TfIdfComparator;

import documents.DocumentId;

/**

* A simplified document indexer and search engine.

* Documents are added to the engine one-by-one, and uniquely identified by a DocumentId.

*

* Documents are internally represented as "terms", which are lowercased versions of each word

* in the document.

*

* Queries for terms are also made on the lowercased version of the term. Terms are

* therefore case-insensitive.

*

* Lookups for documents can be done by term, and the most relevant document(s) to a specific term

* (as computed by tf-idf) can also be retrieved.

*/

public class SearchEngine {

/**

* Inserts a document into the search engine for later analysis and retrieval.

*

* The document is uniquely identified by a documentId; attempts to re-insert the same

* document are ignored.

*

* The document is supplied as a Reader; this method stores the document contents for

* later analysis and retrieval.

*

* @param documentId

* @param reader

* @throws IOException iff the reader throws an exception

*/

public void addDocument(DocumentId documentId, Reader reader) throws IOException {

}

/**

* Returns the set of DocumentIds contained within the search engine that contain a given term.

* (To be clear: a Map>)

* @param term

* @return the set of DocumentIds that contain a given term

*/

public Set indexLookup(String term) {

return null;

}

/**

* Returns the term frequency of a term in a particular document.

*

* The term frequency is number of times the term appears in a document.

* (suggest a Map>)

* See

* @param documentId

* @param term

* @return the term frequency of a term in a particular document

* @throws IllegalArgumentException if the documentId has not been added to the engine

*/

public int termFrequency(DocumentId documentId, String term) throws IllegalArgumentException {

return 0;

}

/**

* Returns the inverse document frequency of a term across all documents in the index.

*

* For our purposes, IDF is defined as log ((1 + N) / (1 + M)) where

* N is the number of documents in total, and M

* is the number of documents where the term appears.

* (Can use Math.log to compute the logarithm)

* @param term

* @return the inverse document frequency of term

*/

public double inverseDocumentFrequency(String term) {

return 0.0;

}

/**

* Returns the tfidf score of a particular term for a particular document.

*

* tfidf is the product of term frequency and inverse document frequency for the given term and document.

*

* @param documentId

* @param term

* @return the tfidf of the the term/document

* @throws IllegalArgumentException if the documentId has not been added to the engine

*/

public double tfIdf(DocumentId documentId, String term) throws IllegalArgumentException {

return 0.0;

}

/**

* Returns a sorted list of documents, most relevant to least relevant, for the given term.

*

* A document with a larger tfidf score is more relevant than a document with a lower tfidf score.

*

* Each document in the returned list must contain the term.

* (implement TfIdfComparator.compare)

* @param term

* @return a list of documents sorted in descending order by tfidf

*/

public List relevanceLookup(String term) {

return null;

}

}

Translate written descriptions of behavior into code. Thank you!

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!