Question: package index; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import comparators.TfIdfComparator; import documents.DocumentId; /** * A

package index;

import java.io.IOException;

import java.io.Reader;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.HashSet;

import java.util.List;

import java.util.Map;

import java.util.Set;

import comparators.TfIdfComparator;

import documents.DocumentId;

/**

* A simplified document indexer and search engine.

* Documents are added to the engine one-by-one, and uniquely identified by a DocumentId.

* Documents are internally represented as "terms", which are lowercased versions of each word

* in the document.

* Queries for terms are also made on the lowercased version of the term. Terms are

* therefore case-insensitive.

* Lookups for documents can be done by term, and the most relevant document(s) to a specific term

* (as computed by tf-idf) can also be retrieved.

* See:

* -

* @author Marc Liberatore

public class SearchEngine {

public Map search = new HashMap();

/**

* Inserts a document into the search engine for later analysis and retrieval.

* The document is uniquely identified by a documentId; attempts to re-insert the same

* document are ignored.

* The document is supplied as a Reader; this method stores the document contents for

* later analysis and retrieval.

* @param documentId

* @param reader

* @throws IOException iff the reader throws an exception

public void addDocument(DocumentId documentId, Reader reader) throws IOException {

String Document = "";

int x = reader.read();

while(x != -1){

Document += (char)x;

x = reader.read();

}

Document = Document.toLowerCase();

DocumentId p = search.get(documentId);

if(p ==null){

search.put(Document, documentId);

}

/**

* Returns the set of DocumentIds contained within the search engine that contain a given term.

* @param term

* @return the set of DocumentIds that contain a given term

public Set indexLookup(String term) {

Set t = new HashSet();

String k = term.toLowerCase();

for(String doc: search.keySet()){

if(doc.contains(k)){

t.add(search.get(doc));

}

return t;

}

/**

* Returns the term frequency of a term in a particular document.

* The term frequency is number of times the term appears in a document.

* See

* @param documentId

* @param term

* @return the term frequency of a term in a particular document

* @throws IllegalArgumentException if the documentId has not been added to the engine

public int termFrequency(DocumentId documentId, String term) throws IllegalArgumentException {

if(!search.containsValue(documentId)){

throw new IllegalArgumentException();

}

term.toLowerCase();

String Document = "";

for(String i: search.keySet()){

if(search.get(i).equals(documentId)){

Document = i;

break;

}

int count = 0;

int i = Document.indexOf(term);

while(i !=-1){

count++;

Document = Document.substring(i+term.length());

i = Document.indexOf(term);

}

return count;

}

/**

* Returns the inverse document frequency of a term across all documents in the index.

* For our purposes, IDF is defined as log ((1 + N) / (1 + M)) where

* N is the number of documents in total, and M

* is the number of documents where the term appears.

* @param term

* @return the inverse document frequency of term

public double inverseDocumentFrequency(String term) {

double searchSize = search.size(), mTotal = 0;

term.toLowerCase();

for(String Document: search.keySet()){

if(Document.indexOf(term) != -1){

mTotal++;

}

double value = (double)Math.log((double)(1+searchSize) / (double)(1+ mTotal));

return (double)value;

}

/**

* Returns the tfidf score of a particular term for a particular document.

* tfidf is the product of term frequency and inverse document frequency for the given term and document.

* @param documentId

* @param term

* @return the tfidf of the the term/document

* @throws IllegalArgumentException if the documentId has not been added to the engine

public double tfIdf(DocumentId documentId, String term) throws IllegalArgumentException {

if(!search.containsValue(documentId)){

throw new IllegalArgumentException();

}

return termFrequency(documentId, term) * inverseDocumentFrequency(term);

}

/**

* Returns a sorted list of documents, most relevant to least relevant, for the given term.

* A document with a larger tfidf score is more relevant than a document with a lower tfidf score.

* Each document in the returned list must contain the term.

* @param term

* @return a list of documents sorted in descending order by tfidf

public List relevanceLookup(String term) {

List Documents = new ArrayList();

Set dc = indexLookup(term);

TfIdfComparator comparekek = new TfIdfComparator(this, term);

for(DocumentId h: dc){

Documents.add(h);

for(int i = 0; i< Documents.size(); i++){

if(comparekek.compare(h, Documents.get(i)) <= 0){

Documents.add(i, h);

break;

}

if(i == Documents.size()-1)

Documents.add(h);

if(Documents.size() == 0){

Documents.add(h);

}

Documents.sort(comparekek);

return Documents;

}

-------------------------------------------------------------------------------------------------------------------------------------

package comparators;

import documents.DocumentId;

import index.SearchEngine;

import java.util.Comparator;

/**

* Compare two documents in a search engine by tf-idf using a given term.

* Using this comparator, the *larger* item should "come before" a smaller one so

* that sort returns the list in descending (largest-to-smallest) order.

* It breaks ties by using the lexicographic ordering of the document IDs (that is, by using

* o1.id.compareTo(o2.id)).

* @author liberato

public class TfIdfComparator implements Comparator {

private final SearchEngine searchEngine;

private final String term;

public TfIdfComparator(SearchEngine searchEngine, String term) {

this.searchEngine = searchEngine;

this.term = term;

}

@Override

public int compare(DocumentId o1, DocumentId o2) {

return 0;

}

--------------------------------

I cannot figure out how to implement the compare method in the second class, so that it works with the SearchEngine class

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

public List getLikes(String user) This will take a String representing a user (like Mike) and return a unique List containing all of the users that have liked the user Mike. public List...

package index; import java.io.IOException; import java.io.Reader; import java.util.List; import java.util.Set; import comparators.TfIdfComparator; import documents.DocumentId; /** * A simplified...

For the programming portion of the assignment, please submit only your .java files and your README.txt. Your code should be well commented. In addition please include a detailed README.txt file which...

import documents.DocumentId; import index.SearchEngine; import java.util.Comparator; /** * Compare two documents in a search engine by tf-idf using a given term. * * Using this comparator, the...

I need help adding two-game implementations and a menu selection option to a Java Server-Client Homework. I was given an attached code to this question that needs to be worked on, which is listed...

import documents.DocumentId; import index.SearchEngine; import java.util.Comparator; /** * Compare two documents in a search engine by tf-idf using a given term. * * Using this comparator, the...

//NEED TO DEAL WITH stronglyConnectedComponent method find the strongly connected component with the vertex with the key. package graphs; import java.util.ArrayList; import java.util.HashMap; import...

The number of typhoons hitting an area is a Poisson process with rate = 2.5 hurricanes per year. (a) Find the probability that the area will be affected by at least three hurricanes in space (i) of...

Waters Landscaping, Inc., completed the following transactions during its first month of operations for January 2012: a. Gary Waters invested $7,500 cash and a truck valued at $1 5,000 to start...

What advantage does PI offer over NPV in capital rationing? It's better for comparing projects of different sizes. It is simpler than NPV . Pl gives a poL

CT Corp Comprehensive Question Canadian Tire Corporation, Limited ( Canadian Tire ) is a family of companies that includes a retail segment and a financial services division, among others. The retail...

1. Do you think that chief executives should still receive an enormous bonus even if the organisation that they have led has performed badly? List the arguments for and against.

explain the role of various people and groups in health and safety and wellbeing at work

explain the key points in the main legislation relating to health, safety and wellbeing at work