Question: Using the Java language: The Code import java.io.File; import java.util.Scanner; import java.util.Map.Entry; import java.util.AbstractMap; import java.util.Arrays; import java.util.Comparator; public class WordCountSort { public static void

Using the Java language:

Using the Java language: The Code import java.io.File; import java.util.Scanner; import java.util.Map.Entry;import java.util.AbstractMap; import java.util.Arrays; import java.util.Comparator; public class WordCountSort { public static

The Code

import java.io.File;

import java.util.Scanner;

import java.util.Map.Entry;

import java.util.AbstractMap;

import java.util.Arrays;

import java.util.Comparator;

public class WordCountSort {

public static void selectionSort(String[] data) {

int n = data.length;

for (int i = 0; i

int minIndex = i;

for (int j = i + 1; j

if (data[minIndex].compareTo(data[j])

minIndex = j;

}

}

if (i != minIndex)

swap(data, minIndex, i);

}

}

public static void insertionSort(String[] data) {

int n = data.length;

for (int k = 1; k

String cur = data[k];

int j = k;

while (j > 0 && data[j - 1].compareTo(cur) > 0) {

data[j] = data[j - 1];

j--;

}

data[j] = cur;

}

}

/** Merge two strings. See the textbook for explanation. **/

public static void merge(String[] S1, String[] S2, String[] S) {

int i = 0, j = 0;

while (i + j

if (j == S2.length || (i S1[i].compareTo(S2[j])

S[i + j] = S1[i++];

else

S[i + j] = S2[j++];

}

}

public static void mergeSort(String[] S) {

int n = S.length;

if (n

return;

int mid = n / 2;

// partition the string into two strings

String[] S1 = Arrays.copyOfRange(S, 0, mid);

String[] S2 = Arrays.copyOfRange(S, mid, n);

mergeSort(S1);

mergeSort(S2);

merge(S1, S2, S);

}

private static void swap(String[] array, int i, int j) {

String tmp = array[i];

array[i] = array[j];

array[j] = tmp;

}

public static EntryString, Integer> count_ARRAY_SORT(String[] tokens, String sortMethod) {

int CAPACITY = 1000000;

String[] words = new String[CAPACITY];

int[] counts = new int[CAPACITY];

if (sortMethod.equals("SELECT"))

selectionSort(tokens);

else if (sortMethod.equals("INSERT"))

insertionSort(tokens);

else if (sortMethod.equals("MERGE"))

mergeSort(tokens);

else if (sortMethod.equals("JAVA"))

Arrays.sort(tokens);

else if (sortMethod.equals("QUICK"))

quickSortInPlace(tokens); //This method needs to be created

else

System.out.println(sortMethod + " sorting method does not exist.");

int j = 0, k = 0;

int len = tokens.length;

while (j

int duplicates = 1;

while (j

j++;

duplicates++;

}

words[k] = tokens[j];

counts[k] = duplicates;

j++;

k++;

}

/** get the max count **/

int maxCount = 0;

String maxWord = "";

for (int i = 0; i

if (counts[i] > maxCount) {

maxWord = words[i];

maxCount = counts[i];

}

}

return new AbstractMap.SimpleEntryString, Integer>(maxWord, maxCount);

}

static String[] readText(String PATH) throws Exception {

Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");

// tokenize text. any characters other than English letters(a-z and A-Z

// ) are delimiters.

int length = 0;

while (doc.hasNext()) {

doc.next();

length++;

}

String[] tokens = new String[length];

Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");

length = 0;

while (s.hasNext()) {

tokens[length] = s.next().toLowerCase();

length++;

}

doc.close();

return tokens;

}

public static void main(String[] args) throws Exception {

String PATH = "/PATH/dblp";

String[] METHODS = { "MERGE", "INSERT", "SELECT" };

String[] DATASETS = { "200", "500", "1k", "5k", "10k", "100k", "1m", "" }; //Files containing that many lines of text

String[] tokens;

// run the experiments on different data sets

for (int j = 0; j

// run the experiments using different methods

System.out.println("Data is " + DATASETS[j]);

for (int i = 0; i

tokens = readText(PATH + DATASETS[j] + ".txt");

long startTime = System.currentTimeMillis();

EntryString, Integer> entry = count_ARRAY_SORT(tokens, METHODS[i]);

long endTime = System.currentTimeMillis();

String time = String.format("%12d", endTime - startTime);

System.out.println(METHODS[i] + " method\t time=" + time + ". Most popular word is " + entry.getKey()

+ ":" + entry.getValue());

}

}

}

}

The dataset

https://file.io/hmhr0K

Task 1 (2 marks). Demonstrate the performance of the algorithms You are required to run these three a gorithms, and observe their performance differences. We prepared a set of test datasets of different sizes. You can click the following data sets to download them. Try to run the data sets in increasing order up to the point when it is unbearably long. 10k lines Link to ablplok 200 lines Link to dblp200 500 lines Link to dblp500 lk lines Link to dblplk 5k lines Link to dblp5k 100k lines Link to dblpl00k 1m lines Link to dblplm 3.6m lines Link to dblp Task 1 (2 marks). Demonstrate the performance of the algorithms You are required to run these three a gorithms, and observe their performance differences. We prepared a set of test datasets of different sizes. You can click the following data sets to download them. Try to run the data sets in increasing order up to the point when it is unbearably long. 10k lines Link to ablplok 200 lines Link to dblp200 500 lines Link to dblp500 lk lines Link to dblplk 5k lines Link to dblp5k 100k lines Link to dblpl00k 1m lines Link to dblplm 3.6m lines Link to dblp

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!