Question: help me run the code import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; import java.util.List;
help me run the code
import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Scanner; import weka.core.Utils; import java.util.List;
public class Driver {
//Attribute class to store discrete attribute values and information gain of an attribute class Attribute{ double infoGain; Map> attributeValues; public void setInfoGain(double infoGain) { this.infoGain = infoGain; } public void setAttributeValues(Map> attributeValues) { this.attributeValues = attributeValues; } public double getInfoGain() { return infoGain; } public Map> getAttributeValues() { return attributeValues; } } int attributesCount; double trainingSamplesPercentage, modelAccuracy; String inputDataFileName; //Map to store the dataset with each line having an unique id Map> dataMap, testDataMap; LinkedList treeNodes = new LinkedList<>(); //Act as a queue to process each node in the tree DataNode root; private static Scanner inputScanner; //Constructor to initialize variables public Driver(double trainingSamplesPercentage,String fileName) { attributesCount = 0; this.trainingSamplesPercentage = trainingSamplesPercentage; modelAccuracy = 0.0; inputDataFileName = fileName; dataMap = new HashMap>(); testDataMap = new HashMap>(); treeNodes = new LinkedList<>(); } //Function to read the input file and filling the data map public void fileRead(){ try (BufferedReader br = new BufferedReader(new FileReader(inputDataFileName))) { String dataLine; int lineCount = 0; while((dataLine = br.readLine()) != null){ List dataLineList = Arrays.asList(dataLine.split(",")); //Converting each line into a list of Strings if(lineCount == 0) attributesCount = dataLineList.size(); else dataMap.put(lineCount, dataLineList); lineCount++; } //Separating the data into training and test data sets lineCount -= 1; int trainingSamplesCount = (int) (Math.round(trainingSamplesPercentage * lineCount/100)); for(int i = trainingSamplesCount+1;i<=lineCount;i++){ testDataMap.put(i, dataMap.get(i)); dataMap.remove(i); } } catch (FileNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } } //Function to creating the root - starting the tree public void createRoot(){ List ids = new ArrayList<>(); List attributes = new ArrayList<>(); ids.addAll(dataMap.keySet()); //Filling attributes list for(int i=1;i<=attributesCount-1;i++){ attributes.add(i); } root = new DataNode(false,ids,attributes); //Setting data and children for the root node treeNodes.add(root); //Adding root of the tree to the queue } //Function to count discrete class values in a given node private List getClassCounts(DataNode node){ List classCounts = new ArrayList<>(); //Counting class values Map> classMap = getAttributeValueCount(node.getNodeData(), attributesCount-1); for(Map.Entry> pair : classMap.entrySet()){ classCounts.add(pair.getValue().size()); } return classCounts; } //Function to return count of each attribute value of a current node public Map> getAttributeValueCount(List nodeData,int whichAttribute){ List valuesList; Map> classMap = new HashMap<>(); //Finding which data id supports corresponding attribute value for (Integer id : nodeData) { List data = dataMap.get(id); String value = data.get(whichAttribute); if(classMap.containsKey(value)){ valuesList = classMap.get(value); valuesList.add(id); } else{ valuesList = new ArrayList<>(); valuesList.add(id); } classMap.put(value, valuesList); } return classMap; } //Function to calculate entropy of a given node public double calculateEntropy(int numInstances,List classCounts){ double entropy = 0.0; //Calculating Entropy of the given node for (Integer count : classCounts) { entropy -= count * Utils.log2(count); } entropy /= (double) numInstances; return entropy + Utils.log2(numInstances); } //Function to calculate Information Gain of an attribute of a particular node public Attribute calculateInfoGain(DataNode node,int whichAttribute){ Attribute thisAttribute = new Attribute(); double infoGain = node.getEntropy(); Map> valuesMap = getAttributeValueCount(node.getNodeData(), whichAttribute); thisAttribute.setAttributeValues(valuesMap); for(Map.Entry> pair : valuesMap.entrySet()){ List localClassCounts = new ArrayList<>(); //Counting each class value of a particular attribute value Map> localClassMap = getAttributeValueCount(pair.getValue(), attributesCount-1); for(Map.Entry> localPair : localClassMap.entrySet()){ localClassCounts.add(localPair.getValue().size()); } infoGain -= ((double) pair.getValue().size() / (double) node.getNumInstances()) * calculateEntropy(pair.getValue().size(),localClassCounts); } thisAttribute.setInfoGain(infoGain); return thisAttribute; } public void buildTree(DataNode node){ List classCounts = new ArrayList<>(); List validAttributes = node.getValidAttributes(); Map attributeMap = new HashMap(); classCounts = getClassCounts(node); node.setEntropy(calculateEntropy(node.getNumInstances(),classCounts)); //Calculating Information Gain for all valid attributes of a node and selecting an attribute with maximum info gain System.out.println("Information Gain of all attributes in the current node:"); double infoGain = 0; for(int attribute = 0; attribute < validAttributes.size(); attribute++){ int currentAttribute = validAttributes.get(attribute); attributeMap.put(currentAttribute, calculateInfoGain(node, validAttributes.get(attribute)-1)); double attrInfoGain = attributeMap.get(currentAttribute).infoGain; System.out.println(attrInfoGain); if(attrInfoGain>infoGain || infoGain == 0){ //Setting splitting attribute of the node to an attribute with maximum info gain node.setSplittingAttribute(currentAttribute); infoGain = attrInfoGain; } } //Creating child nodes //n child nodes if there are n different values for an attribute int splittingAttribute = node.getSplittingAttribute();
Map> attributeValues = attributeMap.get(splittingAttribute).getAttributeValues(); for(Map.Entry> attributeValue : attributeValues.entrySet()){ DataNode childNode = new DataNode(); //Creating a new child node //Loading data into the child node List childNodedata = attributeValue.getValue(); childNode.setNodeData(childNodedata); List childNodeValidAttributes = new ArrayList<>(); //Loading all attributes that the child node can use to split the data childNodeValidAttributes.addAll(validAttributes); //Removing an attribute that the parent node used to split the data childNodeValidAttributes.remove(validAttributes.indexOf(splittingAttribute)); childNode.setValidAttributes(childNodeValidAttributes);
//Adding child node to the queue only if it is not a leaf node - Pruning steps can be applied here if(getAttributeValueCount(childNodedata, attributesCount-1).size() > 1 && !childNodeValidAttributes.isEmpty()){ childNode.setLeaf(false); treeNodes.add(childNode); } else childNode.setLeaf(true); node.setChildNode(attributeValue.getKey(), childNode); //Setting a link from the parent to the child node } System.out.println("Splitting attribute based on the Information Gain: " + splittingAttribute); System.out.println("Number of children for the current node: "); for(Map.Entry> attributeValue : attributeValues.entrySet()){ System.out.println("Data IDs in the child node on edge " + attributeValue.getKey()); DataNode child = node.getChildNode(attributeValue.getKey()); System.out.println(child.getNodeData().toString()); System.out.println("This child is leaf: " + child.isLeaf()); } System.out.println(" "); } //Helps Iterating through the tree public void initiateTree(){ while(!treeNodes.isEmpty()){ buildTree(treeNodes.pop()); } } //Function to calculate accuracy of the given node for a given test sample private double calculateAccuracy(DataNode node, List testSample){ double testSampleAccuracy = 0.0; if(node.isLeaf){ Map> classMap = getAttributeValueCount(node.getNodeData(), attributesCount-1); int nodeClassCount = classMap.size(); // Setting accuracy to 0 if the leaf node does not have corresponding class label if(!classMap.containsKey(testSample.get(attributesCount-1))){ testSampleAccuracy = 0.0; } else{ // Setting accuracy 100% if the node contains only 1 class label if(nodeClassCount == 1){ testSampleAccuracy = 100.0; } else{ //Calculating accuracy if there are multiple classes testSampleAccuracy = 100.0/(double) nodeClassCount; } }
} else{ int splittingAttribute = node.getSplittingAttribute(); String testSampleSplittingAttributeValue = testSample.get(splittingAttribute-1); DataNode nextChildNode = node.getChildNode(testSampleSplittingAttributeValue); testSampleAccuracy = calculateAccuracy(nextChildNode, testSample); } return testSampleAccuracy; } public double startTree(){ fileRead(); createRoot(); initiateTree(); int testSamplesCount = 0; for(Map.Entry> testData : testDataMap.entrySet()){ modelAccuracy += calculateAccuracy(root, testData.getValue()); testSamplesCount++; } modelAccuracy /= testSamplesCount; return Math.round(modelAccuracy * 100) / 100; }
}