http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java index 2434652..dac0b7d 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java @@ -1,4 +1,3 @@ - /* Project Knowledge Discovery from Data Streams, FCT LIAAD-INESC TEC, * * Contact: [email protected] @@ -35,206 +34,220 @@ import com.yahoo.labs.samoa.moa.core.DoubleVector; import com.yahoo.labs.samoa.moa.core.ObjectRepository; import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; -public class FIMTDDNumericAttributeClassObserver extends BinaryTreeNumericAttributeClassObserver implements NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - protected class Node implements Serializable { - - private static final long serialVersionUID = 1L; +public class FIMTDDNumericAttributeClassObserver extends BinaryTreeNumericAttributeClassObserver implements + NumericAttributeClassObserver { - // The split point to use - public double cut_point; + private static final long serialVersionUID = 1L; - // E-BST statistics - public DoubleVector leftStatistics = new DoubleVector(); - public DoubleVector rightStatistics = new DoubleVector(); + protected class Node implements Serializable { - // Child nodes - public Node left; - public Node right; - - public Node(double val, double label, double weight) { - this.cut_point = val; - this.leftStatistics.addToValue(0, 1); - this.leftStatistics.addToValue(1, label); - this.leftStatistics.addToValue(2, label * label); - } + private static final long serialVersionUID = 1L; - /** - * Insert a new value into the tree, updating both the sum of values and - * sum of squared values arrays - */ - public void insertValue(double val, double label, double weight) { - - // If the new value equals the value stored in a node, update - // the left (<=) node information - if (val == this.cut_point) { - this.leftStatistics.addToValue(0, 1); - this.leftStatistics.addToValue(1, label); - this.leftStatistics.addToValue(2, label * label); - } // If the new value is less than the value in a node, update the - // left distribution and send the value down to the left child node. - // If no left child exists, create one - else if (val <= this.cut_point) { - this.leftStatistics.addToValue(0, 1); - this.leftStatistics.addToValue(1, label); - this.leftStatistics.addToValue(2, label * label); - if (this.left == null) { - this.left = new Node(val, label, weight); - } else { - this.left.insertValue(val, label, weight); - } - } // If the new value is greater than the value in a node, update the - // right (>) distribution and send the value down to the right child node. - // If no right child exists, create one - else { // val > cut_point - this.rightStatistics.addToValue(0, 1); - this.rightStatistics.addToValue(1, label); - this.rightStatistics.addToValue(2, label * label); - if (this.right == null) { - this.right = new Node(val, label, weight); - } else { - this.right.insertValue(val, label, weight); - } - } - } - } + // The split point to use + public double cut_point; - // Root node of the E-BST structure for this attribute - public Node root = null; - - // Global variables for use in the FindBestSplit algorithm - double sumTotalLeft; - double sumTotalRight; - double sumSqTotalLeft; - double sumSqTotalRight; - double countRightTotal; - double countLeftTotal; - - public void observeAttributeClass(double attVal, double classVal, double weight) { - if (!Double.isNaN(attVal)) { - if (this.root == null) { - this.root = new Node(attVal, classVal, weight); - } else { - this.root.insertValue(attVal, classVal, weight); - } - } - } + // E-BST statistics + public DoubleVector leftStatistics = new DoubleVector(); + public DoubleVector rightStatistics = new DoubleVector(); - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } + // Child nodes + public Node left; + public Node right; - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(SplitCriterion criterion, double[] preSplitDist, int attIndex, boolean binaryOnly) { - - // Initialise global variables - sumTotalLeft = 0; - sumTotalRight = preSplitDist[1]; - sumSqTotalLeft = 0; - sumSqTotalRight = preSplitDist[2]; - countLeftTotal = 0; - countRightTotal = preSplitDist[0]; - return searchForBestSplitOption(this.root, null, criterion, attIndex); + public Node(double val, double label, double weight) { + this.cut_point = val; + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); } /** - * Implementation of the FindBestSplit algorithm from E.Ikonomovska et al. + * Insert a new value into the tree, updating both the sum of values and sum + * of squared values arrays */ - protected AttributeSplitSuggestion searchForBestSplitOption(Node currentNode, AttributeSplitSuggestion currentBestOption, SplitCriterion criterion, int attIndex) { - // Return null if the current node is null or we have finished looking through all the possible splits - if (currentNode == null || countRightTotal == 0.0) { - return currentBestOption; - } - - if (currentNode.left != null) { - currentBestOption = searchForBestSplitOption(currentNode.left, currentBestOption, criterion, attIndex); + public void insertValue(double val, double label, double weight) { + + // If the new value equals the value stored in a node, update + // the left (<=) node information + if (val == this.cut_point) { + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + } // If the new value is less than the value in a node, update the + // left distribution and send the value down to the left child node. + // If no left child exists, create one + else if (val <= this.cut_point) { + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + if (this.left == null) { + this.left = new Node(val, label, weight); + } else { + this.left.insertValue(val, label, weight); } - - sumTotalLeft += currentNode.leftStatistics.getValue(1); - sumTotalRight -= currentNode.leftStatistics.getValue(1); - sumSqTotalLeft += currentNode.leftStatistics.getValue(2); - sumSqTotalRight -= currentNode.leftStatistics.getValue(2); - countLeftTotal += currentNode.leftStatistics.getValue(0); - countRightTotal -= currentNode.leftStatistics.getValue(0); - - double[][] postSplitDists = new double[][]{{countLeftTotal, sumTotalLeft, sumSqTotalLeft}, {countRightTotal, sumTotalRight, sumSqTotalRight}}; - double[] preSplitDist = new double[]{(countLeftTotal + countRightTotal), (sumTotalLeft + sumTotalRight), (sumSqTotalLeft + sumSqTotalRight)}; - double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); - - if ((currentBestOption == null) || (merit > currentBestOption.merit)) { - currentBestOption = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - currentNode.cut_point, true), postSplitDists, merit); - + } // If the new value is greater than the value in a node, update the + // right (>) distribution and send the value down to the right child node. + // If no right child exists, create one + else { // val > cut_point + this.rightStatistics.addToValue(0, 1); + this.rightStatistics.addToValue(1, label); + this.rightStatistics.addToValue(2, label * label); + if (this.right == null) { + this.right = new Node(val, label, weight); + } else { + this.right.insertValue(val, label, weight); } - - if (currentNode.right != null) { - currentBestOption = searchForBestSplitOption(currentNode.right, currentBestOption, criterion, attIndex); - } - sumTotalLeft -= currentNode.leftStatistics.getValue(1); - sumTotalRight += currentNode.leftStatistics.getValue(1); - sumSqTotalLeft -= currentNode.leftStatistics.getValue(2); - sumSqTotalRight += currentNode.leftStatistics.getValue(2); - countLeftTotal -= currentNode.leftStatistics.getValue(0); - countRightTotal += currentNode.leftStatistics.getValue(0); - - return currentBestOption; + } + } + } + + // Root node of the E-BST structure for this attribute + public Node root = null; + + // Global variables for use in the FindBestSplit algorithm + double sumTotalLeft; + double sumTotalRight; + double sumSqTotalLeft; + double sumSqTotalRight; + double countRightTotal; + double countLeftTotal; + + public void observeAttributeClass(double attVal, double classVal, double weight) { + if (!Double.isNaN(attVal)) { + if (this.root == null) { + this.root = new Node(attVal, classVal, weight); + } else { + this.root.insertValue(attVal, classVal, weight); + } + } + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(SplitCriterion criterion, double[] preSplitDist, + int attIndex, boolean binaryOnly) { + + // Initialise global variables + sumTotalLeft = 0; + sumTotalRight = preSplitDist[1]; + sumSqTotalLeft = 0; + sumSqTotalRight = preSplitDist[2]; + countLeftTotal = 0; + countRightTotal = preSplitDist[0]; + return searchForBestSplitOption(this.root, null, criterion, attIndex); + } + + /** + * Implementation of the FindBestSplit algorithm from E.Ikonomovska et al. + */ + protected AttributeSplitSuggestion searchForBestSplitOption(Node currentNode, + AttributeSplitSuggestion currentBestOption, SplitCriterion criterion, int attIndex) { + // Return null if the current node is null or we have finished looking + // through all the possible splits + if (currentNode == null || countRightTotal == 0.0) { + return currentBestOption; } - /** - * A method to remove all nodes in the E-BST in which it and all it's - * children represent 'bad' split points - */ - public void removeBadSplits(SplitCriterion criterion, double lastCheckRatio, double lastCheckSDR, double lastCheckE) { - removeBadSplitNodes(criterion, this.root, lastCheckRatio, lastCheckSDR, lastCheckE); + if (currentNode.left != null) { + currentBestOption = searchForBestSplitOption(currentNode.left, currentBestOption, criterion, attIndex); } - /** - * Recursive method that first checks all of a node's children before - * deciding if it is 'bad' and may be removed - */ - private boolean removeBadSplitNodes(SplitCriterion criterion, Node currentNode, double lastCheckRatio, double lastCheckSDR, double lastCheckE) { - boolean isBad = false; + sumTotalLeft += currentNode.leftStatistics.getValue(1); + sumTotalRight -= currentNode.leftStatistics.getValue(1); + sumSqTotalLeft += currentNode.leftStatistics.getValue(2); + sumSqTotalRight -= currentNode.leftStatistics.getValue(2); + countLeftTotal += currentNode.leftStatistics.getValue(0); + countRightTotal -= currentNode.leftStatistics.getValue(0); - if (currentNode == null) { - return true; - } + double[][] postSplitDists = new double[][] { { countLeftTotal, sumTotalLeft, sumSqTotalLeft }, + { countRightTotal, sumTotalRight, sumSqTotalRight } }; + double[] preSplitDist = new double[] { (countLeftTotal + countRightTotal), (sumTotalLeft + sumTotalRight), + (sumSqTotalLeft + sumSqTotalRight) }; + double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); - if (currentNode.left != null) { - isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); - } + if ((currentBestOption == null) || (merit > currentBestOption.merit)) { + currentBestOption = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + currentNode.cut_point, true), postSplitDists, merit); - if (currentNode.right != null && isBad) { - isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); - } - - if (isBad) { - - double[][] postSplitDists = new double[][]{{currentNode.leftStatistics.getValue(0), currentNode.leftStatistics.getValue(1), currentNode.leftStatistics.getValue(2)}, {currentNode.rightStatistics.getValue(0), currentNode.rightStatistics.getValue(1), currentNode.rightStatistics.getValue(2)}}; - double[] preSplitDist = new double[]{(currentNode.leftStatistics.getValue(0) + currentNode.rightStatistics.getValue(0)), (currentNode.leftStatistics.getValue(1) + currentNode.rightStatistics.getValue(1)), (currentNode.leftStatistics.getValue(2) + currentNode.rightStatistics.getValue(2))}; - double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); + } - if ((merit / lastCheckSDR) < (lastCheckRatio - (2 * lastCheckE))) { - currentNode = null; - return true; - } - } + if (currentNode.right != null) { + currentBestOption = searchForBestSplitOption(currentNode.right, currentBestOption, criterion, attIndex); + } + sumTotalLeft -= currentNode.leftStatistics.getValue(1); + sumTotalRight += currentNode.leftStatistics.getValue(1); + sumSqTotalLeft -= currentNode.leftStatistics.getValue(2); + sumSqTotalRight += currentNode.leftStatistics.getValue(2); + countLeftTotal -= currentNode.leftStatistics.getValue(0); + countRightTotal += currentNode.leftStatistics.getValue(0); + + return currentBestOption; + } + + /** + * A method to remove all nodes in the E-BST in which it and all it's children + * represent 'bad' split points + */ + public void removeBadSplits(SplitCriterion criterion, double lastCheckRatio, double lastCheckSDR, double lastCheckE) { + removeBadSplitNodes(criterion, this.root, lastCheckRatio, lastCheckSDR, lastCheckE); + } + + /** + * Recursive method that first checks all of a node's children before deciding + * if it is 'bad' and may be removed + */ + private boolean removeBadSplitNodes(SplitCriterion criterion, Node currentNode, double lastCheckRatio, + double lastCheckSDR, double lastCheckE) { + boolean isBad = false; + + if (currentNode == null) { + return true; + } - return false; + if (currentNode.left != null) { + isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub + if (currentNode.right != null && isBad) { + isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); } - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub + if (isBad) { + + double[][] postSplitDists = new double[][] { + { currentNode.leftStatistics.getValue(0), currentNode.leftStatistics.getValue(1), + currentNode.leftStatistics.getValue(2) }, + { currentNode.rightStatistics.getValue(0), currentNode.rightStatistics.getValue(1), + currentNode.rightStatistics.getValue(2) } }; + double[] preSplitDist = new double[] { + (currentNode.leftStatistics.getValue(0) + currentNode.rightStatistics.getValue(0)), + (currentNode.leftStatistics.getValue(1) + currentNode.rightStatistics.getValue(1)), + (currentNode.leftStatistics.getValue(2) + currentNode.rightStatistics.getValue(2)) }; + double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); + + if ((merit / lastCheckSDR) < (lastCheckRatio - (2 * lastCheckE))) { + currentNode = null; + return true; + } } + + return false; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } }
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java index 21f58b1..107fa29 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java @@ -37,146 +37,147 @@ import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; import com.github.javacliparser.IntOption; /** - * Class for observing the class data distribution for a numeric attribute using gaussian estimators. - * This observer monitors the class distribution of a given attribute. - * Used in naive Bayes and decision trees to monitor data statistics on leaves. - * + * Class for observing the class data distribution for a numeric attribute using + * gaussian estimators. This observer monitors the class distribution of a given + * attribute. Used in naive Bayes and decision trees to monitor data statistics + * on leaves. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class GaussianNumericAttributeClassObserver extends AbstractOptionHandler - implements NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - protected DoubleVector minValueObservedPerClass = new DoubleVector(); - - protected DoubleVector maxValueObservedPerClass = new DoubleVector(); - - protected AutoExpandVector<GaussianEstimator> attValDistPerClass = new AutoExpandVector<>(); - - /** - * @param classVal - * @return The requested Estimator if it exists, or null if not present. - */ - public GaussianEstimator getEstimator(int classVal) { - return this.attValDistPerClass.get(classVal); - } - - public IntOption numBinsOption = new IntOption("numBins", 'n', - "The number of bins.", 10, 1, Integer.MAX_VALUE); - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (!Utils.isMissingValue(attVal)) { - GaussianEstimator valDist = this.attValDistPerClass.get(classVal); - if (valDist == null) { - valDist = new GaussianEstimator(); - this.attValDistPerClass.set(classVal, valDist); - this.minValueObservedPerClass.setValue(classVal, attVal); - this.maxValueObservedPerClass.setValue(classVal, attVal); - } else { - if (attVal < this.minValueObservedPerClass.getValue(classVal)) { - this.minValueObservedPerClass.setValue(classVal, attVal); - } - if (attVal > this.maxValueObservedPerClass.getValue(classVal)) { - this.maxValueObservedPerClass.setValue(classVal, attVal); - } - } - valDist.addObservation(attVal, weight); + implements NumericAttributeClassObserver { + + private static final long serialVersionUID = 1L; + + protected DoubleVector minValueObservedPerClass = new DoubleVector(); + + protected DoubleVector maxValueObservedPerClass = new DoubleVector(); + + protected AutoExpandVector<GaussianEstimator> attValDistPerClass = new AutoExpandVector<>(); + + /** + * @param classVal + * @return The requested Estimator if it exists, or null if not present. + */ + public GaussianEstimator getEstimator(int classVal) { + return this.attValDistPerClass.get(classVal); + } + + public IntOption numBinsOption = new IntOption("numBins", 'n', + "The number of bins.", 10, 1, Integer.MAX_VALUE); + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (!Utils.isMissingValue(attVal)) { + GaussianEstimator valDist = this.attValDistPerClass.get(classVal); + if (valDist == null) { + valDist = new GaussianEstimator(); + this.attValDistPerClass.set(classVal, valDist); + this.minValueObservedPerClass.setValue(classVal, attVal); + this.maxValueObservedPerClass.setValue(classVal, attVal); + } else { + if (attVal < this.minValueObservedPerClass.getValue(classVal)) { + this.minValueObservedPerClass.setValue(classVal, attVal); } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - GaussianEstimator obs = this.attValDistPerClass.get(classVal); - return obs != null ? obs.probabilityDensity(attVal) : 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - double[] suggestedSplitValues = getSplitPointSuggestions(); - for (double splitValue : suggestedSplitValues) { - double[][] postSplitDists = getClassDistsResultingFromBinarySplit(splitValue); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, splitValue, - true), postSplitDists, merit); - } + if (attVal > this.maxValueObservedPerClass.getValue(classVal)) { + this.maxValueObservedPerClass.setValue(classVal, attVal); } - return bestSuggestion; + } + valDist.addObservation(attVal, weight); } - - public double[] getSplitPointSuggestions() { - Set<Double> suggestedSplitValues = new TreeSet<>(); - double minValue = Double.POSITIVE_INFINITY; - double maxValue = Double.NEGATIVE_INFINITY; - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - GaussianEstimator estimator = this.attValDistPerClass.get(i); - if (estimator != null) { - if (this.minValueObservedPerClass.getValue(i) < minValue) { - minValue = this.minValueObservedPerClass.getValue(i); - } - if (this.maxValueObservedPerClass.getValue(i) > maxValue) { - maxValue = this.maxValueObservedPerClass.getValue(i); - } - } + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + GaussianEstimator obs = this.attValDistPerClass.get(classVal); + return obs != null ? obs.probabilityDensity(attVal) : 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + double[] suggestedSplitValues = getSplitPointSuggestions(); + for (double splitValue : suggestedSplitValues) { + double[][] postSplitDists = getClassDistsResultingFromBinarySplit(splitValue); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, splitValue, + true), postSplitDists, merit); + } + } + return bestSuggestion; + } + + public double[] getSplitPointSuggestions() { + Set<Double> suggestedSplitValues = new TreeSet<>(); + double minValue = Double.POSITIVE_INFINITY; + double maxValue = Double.NEGATIVE_INFINITY; + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + GaussianEstimator estimator = this.attValDistPerClass.get(i); + if (estimator != null) { + if (this.minValueObservedPerClass.getValue(i) < minValue) { + minValue = this.minValueObservedPerClass.getValue(i); } - if (minValue < Double.POSITIVE_INFINITY) { - double range = maxValue - minValue; - for (int i = 0; i < this.numBinsOption.getValue(); i++) { - double splitValue = range / (this.numBinsOption.getValue() + 1.0) * (i + 1) - + minValue; - if ((splitValue > minValue) && (splitValue < maxValue)) { - suggestedSplitValues.add(splitValue); - } - } + if (this.maxValueObservedPerClass.getValue(i) > maxValue) { + maxValue = this.maxValueObservedPerClass.getValue(i); } - double[] suggestions = new double[suggestedSplitValues.size()]; - int i = 0; - for (double suggestion : suggestedSplitValues) { - suggestions[i++] = suggestion; + } + } + if (minValue < Double.POSITIVE_INFINITY) { + double range = maxValue - minValue; + for (int i = 0; i < this.numBinsOption.getValue(); i++) { + double splitValue = range / (this.numBinsOption.getValue() + 1.0) * (i + 1) + + minValue; + if ((splitValue > minValue) && (splitValue < maxValue)) { + suggestedSplitValues.add(splitValue); } - return suggestions; + } } - - // assume all values equal to splitValue go to lhs - public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { - DoubleVector lhsDist = new DoubleVector(); - DoubleVector rhsDist = new DoubleVector(); - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - GaussianEstimator estimator = this.attValDistPerClass.get(i); - if (estimator != null) { - if (splitValue < this.minValueObservedPerClass.getValue(i)) { - rhsDist.addToValue(i, estimator.getTotalWeightObserved()); - } else if (splitValue >= this.maxValueObservedPerClass.getValue(i)) { - lhsDist.addToValue(i, estimator.getTotalWeightObserved()); - } else { - double[] weightDist = estimator.estimatedWeight_LessThan_EqualTo_GreaterThan_Value(splitValue); - lhsDist.addToValue(i, weightDist[0] + weightDist[1]); - rhsDist.addToValue(i, weightDist[2]); - } - } + double[] suggestions = new double[suggestedSplitValues.size()]; + int i = 0; + for (double suggestion : suggestedSplitValues) { + suggestions[i++] = suggestion; + } + return suggestions; + } + + // assume all values equal to splitValue go to lhs + public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { + DoubleVector lhsDist = new DoubleVector(); + DoubleVector rhsDist = new DoubleVector(); + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + GaussianEstimator estimator = this.attValDistPerClass.get(i); + if (estimator != null) { + if (splitValue < this.minValueObservedPerClass.getValue(i)) { + rhsDist.addToValue(i, estimator.getTotalWeightObserved()); + } else if (splitValue >= this.maxValueObservedPerClass.getValue(i)) { + lhsDist.addToValue(i, estimator.getTotalWeightObserved()); + } else { + double[] weightDist = estimator.estimatedWeight_LessThan_EqualTo_GreaterThan_Value(splitValue); + lhsDist.addToValue(i, weightDist[0] + weightDist[1]); + rhsDist.addToValue(i, weightDist[2]); } - return new double[][]{lhsDist.getArrayRef(), rhsDist.getArrayRef()}; + } } + return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; + } - @Override - public void getDescription(StringBuilder sb, int indent) { - } + @Override + public void getDescription(StringBuilder sb, int indent) { + } - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - } + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + } - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java index 3de1146..9aaf0b8 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java @@ -34,93 +34,95 @@ import com.github.javacliparser.IntOption; import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; /** - * Class for observing the class data distribution for a numeric attribute using Greenwald and Khanna methodology. - * This observer monitors the class distribution of a given attribute. - * Used in naive Bayes and decision trees to monitor data statistics on leaves. - * + * Class for observing the class data distribution for a numeric attribute using + * Greenwald and Khanna methodology. This observer monitors the class + * distribution of a given attribute. Used in naive Bayes and decision trees to + * monitor data statistics on leaves. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ -public class GreenwaldKhannaNumericAttributeClassObserver extends AbstractOptionHandler implements NumericAttributeClassObserver { +public class GreenwaldKhannaNumericAttributeClassObserver extends AbstractOptionHandler implements + NumericAttributeClassObserver { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - protected AutoExpandVector<GreenwaldKhannaQuantileSummary> attValDistPerClass = new AutoExpandVector<>(); + protected AutoExpandVector<GreenwaldKhannaQuantileSummary> attValDistPerClass = new AutoExpandVector<>(); - public IntOption numTuplesOption = new IntOption("numTuples", 'n', - "The number of tuples.", 10, 1, Integer.MAX_VALUE); + public IntOption numTuplesOption = new IntOption("numTuples", 'n', + "The number of tuples.", 10, 1, Integer.MAX_VALUE); - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (!Utils.isMissingValue(attVal)) { - GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal); - if (valDist == null) { - valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); - this.attValDistPerClass.set(classVal, valDist); - } - // TODO: not taking weight into account - valDist.insert(attVal); - } + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (!Utils.isMissingValue(attVal)) { + GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal); + if (valDist == null) { + valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); + this.attValDistPerClass.set(classVal, valDist); + } + // TODO: not taking weight into account + valDist.insert(attVal); } + } - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - for (GreenwaldKhannaQuantileSummary qs : this.attValDistPerClass) { - if (qs != null) { - double[] cutpoints = qs.getSuggestedCutpoints(); - for (double cutpoint : cutpoints) { - double[][] postSplitDists = getClassDistsResultingFromBinarySplit(cutpoint); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) - || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - cutpoint, true), postSplitDists, merit); - } - } - } + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + for (GreenwaldKhannaQuantileSummary qs : this.attValDistPerClass) { + if (qs != null) { + double[] cutpoints = qs.getSuggestedCutpoints(); + for (double cutpoint : cutpoints) { + double[][] postSplitDists = getClassDistsResultingFromBinarySplit(cutpoint); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) + || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + cutpoint, true), postSplitDists, merit); + } } - return bestSuggestion; + } } + return bestSuggestion; + } - // assume all values equal to splitValue go to lhs - public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { - DoubleVector lhsDist = new DoubleVector(); - DoubleVector rhsDist = new DoubleVector(); - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - GreenwaldKhannaQuantileSummary estimator = this.attValDistPerClass.get(i); - if (estimator != null) { - long countBelow = estimator.getCountBelow(splitValue); - lhsDist.addToValue(i, countBelow); - rhsDist.addToValue(i, estimator.getTotalCount() - countBelow); - } - } - return new double[][]{lhsDist.getArrayRef(), rhsDist.getArrayRef()}; + // assume all values equal to splitValue go to lhs + public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { + DoubleVector lhsDist = new DoubleVector(); + DoubleVector rhsDist = new DoubleVector(); + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + GreenwaldKhannaQuantileSummary estimator = this.attValDistPerClass.get(i); + if (estimator != null) { + long countBelow = estimator.getCountBelow(splitValue); + lhsDist.addToValue(i, countBelow); + rhsDist.addToValue(i, estimator.getTotalCount() - countBelow); + } } + return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; + } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java index d605e84..7b34fa4 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java @@ -33,146 +33,146 @@ import com.yahoo.labs.samoa.moa.core.DoubleVector; import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; /** - * Class for observing the class data distribution for a nominal attribute. - * This observer monitors the class distribution of a given attribute. - * Used in naive Bayes and decision trees to monitor data statistics on leaves. - * + * Class for observing the class data distribution for a nominal attribute. This + * observer monitors the class distribution of a given attribute. Used in naive + * Bayes and decision trees to monitor data statistics on leaves. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class NominalAttributeClassObserver extends AbstractOptionHandler implements DiscreteAttributeClassObserver { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - protected double totalWeightObserved = 0.0; + protected double totalWeightObserved = 0.0; - protected double missingWeightObserved = 0.0; + protected double missingWeightObserved = 0.0; - public AutoExpandVector<DoubleVector> attValDistPerClass = new AutoExpandVector<>(); + public AutoExpandVector<DoubleVector> attValDistPerClass = new AutoExpandVector<>(); - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (Utils.isMissingValue(attVal)) { - this.missingWeightObserved += weight; - } else { - int attValInt = (int) attVal; - DoubleVector valDist = this.attValDistPerClass.get(classVal); - if (valDist == null) { - valDist = new DoubleVector(); - this.attValDistPerClass.set(classVal, valDist); - } - valDist.addToValue(attValInt, weight); - } - this.totalWeightObserved += weight; + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (Utils.isMissingValue(attVal)) { + this.missingWeightObserved += weight; + } else { + int attValInt = (int) attVal; + DoubleVector valDist = this.attValDistPerClass.get(classVal); + if (valDist == null) { + valDist = new DoubleVector(); + this.attValDistPerClass.set(classVal, valDist); + } + valDist.addToValue(attValInt, weight); } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - DoubleVector obs = this.attValDistPerClass.get(classVal); - return obs != null ? (obs.getValue((int) attVal) + 1.0) - / (obs.sumOfValues() + obs.numValues()) : 0.0; + this.totalWeightObserved += weight; + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + DoubleVector obs = this.attValDistPerClass.get(classVal); + return obs != null ? (obs.getValue((int) attVal) + 1.0) + / (obs.sumOfValues() + obs.numValues()) : 0.0; + } + + public double totalWeightOfClassObservations() { + return this.totalWeightObserved; + } + + public double weightOfObservedMissingValues() { + return this.missingWeightObserved; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + int maxAttValsObserved = getMaxAttValsObserved(); + if (!binaryOnly) { + double[][] postSplitDists = getClassDistsResultingFromMultiwaySplit(maxAttValsObserved); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + bestSuggestion = new AttributeSplitSuggestion( + new NominalAttributeMultiwayTest(attIndex), postSplitDists, + merit); } - - public double totalWeightOfClassObservations() { - return this.totalWeightObserved; + for (int valIndex = 0; valIndex < maxAttValsObserved; valIndex++) { + double[][] postSplitDists = getClassDistsResultingFromBinarySplit(valIndex); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NominalAttributeBinaryTest(attIndex, valIndex), + postSplitDists, merit); + } } - - public double weightOfObservedMissingValues() { - return this.missingWeightObserved; + return bestSuggestion; + } + + public int getMaxAttValsObserved() { + int maxAttValsObserved = 0; + for (DoubleVector attValDist : this.attValDistPerClass) { + if ((attValDist != null) + && (attValDist.numValues() > maxAttValsObserved)) { + maxAttValsObserved = attValDist.numValues(); + } } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - int maxAttValsObserved = getMaxAttValsObserved(); - if (!binaryOnly) { - double[][] postSplitDists = getClassDistsResultingFromMultiwaySplit(maxAttValsObserved); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - bestSuggestion = new AttributeSplitSuggestion( - new NominalAttributeMultiwayTest(attIndex), postSplitDists, - merit); - } - for (int valIndex = 0; valIndex < maxAttValsObserved; valIndex++) { - double[][] postSplitDists = getClassDistsResultingFromBinarySplit(valIndex); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NominalAttributeBinaryTest(attIndex, valIndex), - postSplitDists, merit); - } - } - return bestSuggestion; + return maxAttValsObserved; + } + + public double[][] getClassDistsResultingFromMultiwaySplit( + int maxAttValsObserved) { + DoubleVector[] resultingDists = new DoubleVector[maxAttValsObserved]; + for (int i = 0; i < resultingDists.length; i++) { + resultingDists[i] = new DoubleVector(); } - - public int getMaxAttValsObserved() { - int maxAttValsObserved = 0; - for (DoubleVector attValDist : this.attValDistPerClass) { - if ((attValDist != null) - && (attValDist.numValues() > maxAttValsObserved)) { - maxAttValsObserved = attValDist.numValues(); - } + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + DoubleVector attValDist = this.attValDistPerClass.get(i); + if (attValDist != null) { + for (int j = 0; j < attValDist.numValues(); j++) { + resultingDists[j].addToValue(i, attValDist.getValue(j)); } - return maxAttValsObserved; + } } - - public double[][] getClassDistsResultingFromMultiwaySplit( - int maxAttValsObserved) { - DoubleVector[] resultingDists = new DoubleVector[maxAttValsObserved]; - for (int i = 0; i < resultingDists.length; i++) { - resultingDists[i] = new DoubleVector(); - } - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - DoubleVector attValDist = this.attValDistPerClass.get(i); - if (attValDist != null) { - for (int j = 0; j < attValDist.numValues(); j++) { - resultingDists[j].addToValue(i, attValDist.getValue(j)); - } - } - } - double[][] distributions = new double[maxAttValsObserved][]; - for (int i = 0; i < distributions.length; i++) { - distributions[i] = resultingDists[i].getArrayRef(); - } - return distributions; + double[][] distributions = new double[maxAttValsObserved][]; + for (int i = 0; i < distributions.length; i++) { + distributions[i] = resultingDists[i].getArrayRef(); } - - public double[][] getClassDistsResultingFromBinarySplit(int valIndex) { - DoubleVector equalsDist = new DoubleVector(); - DoubleVector notEqualDist = new DoubleVector(); - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - DoubleVector attValDist = this.attValDistPerClass.get(i); - if (attValDist != null) { - for (int j = 0; j < attValDist.numValues(); j++) { - if (j == valIndex) { - equalsDist.addToValue(i, attValDist.getValue(j)); - } else { - notEqualDist.addToValue(i, attValDist.getValue(j)); - } - } - } + return distributions; + } + + public double[][] getClassDistsResultingFromBinarySplit(int valIndex) { + DoubleVector equalsDist = new DoubleVector(); + DoubleVector notEqualDist = new DoubleVector(); + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + DoubleVector attValDist = this.attValDistPerClass.get(i); + if (attValDist != null) { + for (int j = 0; j < attValDist.numValues(); j++) { + if (j == valIndex) { + equalsDist.addToValue(i, attValDist.getValue(j)); + } else { + notEqualDist.addToValue(i, attValDist.getValue(j)); + } } - return new double[][]{equalsDist.getArrayRef(), - notEqualDist.getArrayRef()}; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); + } } + return new double[][] { equalsDist.getArrayRef(), + notEqualDist.getArrayRef() }; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } + + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java index def0666..ab14d97 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java @@ -27,54 +27,54 @@ import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; /** - * Class for observing the class data distribution for a null attribute. - * This method is used to disable the observation for an attribute. - * Used in decision trees to monitor data statistics on leaves. - * + * Class for observing the class data distribution for a null attribute. This + * method is used to disable the observation for an attribute. Used in decision + * trees to monitor data statistics on leaves. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class NullAttributeClassObserver extends AbstractOptionHandler implements AttributeClassObserver { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - } + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + } - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - return 0.0; - } + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + return 0.0; + } - public double totalWeightOfClassObservations() { - return 0.0; - } + public double totalWeightOfClassObservations() { + return 0.0; + } - public double weightOfObservedMissingValues() { - return 0.0; - } + public double weightOfObservedMissingValues() { + return 0.0; + } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - return null; - } + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + return null; + } - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java index 1660d5f..ca7a50a 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java @@ -22,13 +22,12 @@ package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; /** * Interface for observing the class data distribution for a numeric attribute. - * This observer monitors the class distribution of a given attribute. - * Used in naive Bayes and decision trees to monitor data statistics on leaves. - * + * This observer monitors the class distribution of a given attribute. Used in + * naive Bayes and decision trees to monitor data statistics on leaves. + * * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ + * @version $Revision: 7 $ */ public interface NumericAttributeClassObserver extends AttributeClassObserver { - } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java index c7f3d7b..9650532 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java @@ -36,188 +36,188 @@ import com.github.javacliparser.IntOption; import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; /** - * Class for observing the class data distribution for a numeric attribute as in VFML. - * Used in naive Bayes and decision trees to monitor data statistics on leaves. - * + * Class for observing the class data distribution for a numeric attribute as in + * VFML. Used in naive Bayes and decision trees to monitor data statistics on + * leaves. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class VFMLNumericAttributeClassObserver extends AbstractOptionHandler implements NumericAttributeClassObserver { - private static final long serialVersionUID = 1L; - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } - - protected class Bin implements Serializable { - - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - public double lowerBound, upperBound; + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } - public DoubleVector classWeights = new DoubleVector(); + protected class Bin implements Serializable { - public int boundaryClass; - - public double boundaryWeight; - } + private static final long serialVersionUID = 1L; - protected List<Bin> binList = new ArrayList<>(); - - public IntOption numBinsOption = new IntOption("numBins", 'n', - "The number of bins.", 10, 1, Integer.MAX_VALUE); - - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (!Utils.isMissingValue(attVal)) { - if (this.binList.size() < 1) { - // create the first bin - Bin newBin = new Bin(); - newBin.classWeights.addToValue(classVal, weight); - newBin.boundaryClass = classVal; - newBin.boundaryWeight = weight; - newBin.upperBound = attVal; - newBin.lowerBound = attVal; - this.binList.add(newBin); - } else { - // find bin containing new example with binary search - int index = 0; - boolean found = false; - int min = 0; - int max = this.binList.size() - 1; - while ((min <= max) && !found) { - int i = (min + max) / 2; - Bin bin = this.binList.get(i); - if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound)) - || ((i == this.binList.size() - 1) - && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) { - found = true; - index = i; - } else if (attVal < bin.lowerBound) { - max = i - 1; - } else { - min = i + 1; - } - } - boolean first = false; - boolean last = false; - if (!found) { - // determine if it is before or after the existing range - Bin bin = this.binList.get(0); - if (bin.lowerBound > attVal) { - // go before the first bin - index = 0; - first = true; - } else { - // if we haven't found it yet value must be > last bins - // upperBound - index = this.binList.size() - 1; - last = true; - } - } - Bin bin = this.binList.get(index); // VLIndex(ct->bins, index); - if ((bin.lowerBound == attVal) - || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue()) - // {//1000) - // { - // if this is the exact same boundary and class as the bin - // boundary or we aren't adding new bins any more then - // increment - // boundary counts - bin.classWeights.addToValue(classVal, weight); - if ((bin.boundaryClass == classVal) - && (bin.lowerBound == attVal)) { - // if it is also the same class then special case it - bin.boundaryWeight += weight; - } - } else { - // create a new bin - Bin newBin = new Bin(); - newBin.classWeights.addToValue(classVal, weight); - newBin.boundaryWeight = weight; - newBin.boundaryClass = classVal; - newBin.upperBound = bin.upperBound; - newBin.lowerBound = attVal; - - double percent = 0.0; - // estimate initial counts with a linear interpolation - if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) { - percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound)); - } - - // take out the boundry points, they stay with the old bin - bin.classWeights.addToValue(bin.boundaryClass, - -bin.boundaryWeight); - DoubleVector weightToShift = new DoubleVector( - bin.classWeights); - weightToShift.scaleValues(percent); - newBin.classWeights.addValues(weightToShift); - bin.classWeights.subtractValues(weightToShift); - // put the boundry examples back in - bin.classWeights.addToValue(bin.boundaryClass, - bin.boundaryWeight); - - // insert the new bin in the right place - if (last) { - bin.upperBound = attVal; - newBin.upperBound = attVal; - this.binList.add(newBin); - } else if (first) { - newBin.upperBound = bin.lowerBound; - this.binList.add(0, newBin); - } else { - newBin.upperBound = bin.upperBound; - bin.upperBound = attVal; - this.binList.add(index + 1, newBin); - } - } - } + public double lowerBound, upperBound; + + public DoubleVector classWeights = new DoubleVector(); + + public int boundaryClass; + + public double boundaryWeight; + } + + protected List<Bin> binList = new ArrayList<>(); + + public IntOption numBinsOption = new IntOption("numBins", 'n', + "The number of bins.", 10, 1, Integer.MAX_VALUE); + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (!Utils.isMissingValue(attVal)) { + if (this.binList.size() < 1) { + // create the first bin + Bin newBin = new Bin(); + newBin.classWeights.addToValue(classVal, weight); + newBin.boundaryClass = classVal; + newBin.boundaryWeight = weight; + newBin.upperBound = attVal; + newBin.lowerBound = attVal; + this.binList.add(newBin); + } else { + // find bin containing new example with binary search + int index = 0; + boolean found = false; + int min = 0; + int max = this.binList.size() - 1; + while ((min <= max) && !found) { + int i = (min + max) / 2; + Bin bin = this.binList.get(i); + if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound)) + || ((i == this.binList.size() - 1) + && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) { + found = true; + index = i; + } else if (attVal < bin.lowerBound) { + max = i - 1; + } else { + min = i + 1; + } } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - DoubleVector rightDist = new DoubleVector(); - for (Bin bin : this.binList) { - rightDist.addValues(bin.classWeights); + boolean first = false; + boolean last = false; + if (!found) { + // determine if it is before or after the existing range + Bin bin = this.binList.get(0); + if (bin.lowerBound > attVal) { + // go before the first bin + index = 0; + first = true; + } else { + // if we haven't found it yet value must be > last bins + // upperBound + index = this.binList.size() - 1; + last = true; + } } - DoubleVector leftDist = new DoubleVector(); - for (Bin bin : this.binList) { - leftDist.addValues(bin.classWeights); - rightDist.subtractValues(bin.classWeights); - double[][] postSplitDists = new double[][]{ - leftDist.getArrayCopy(), rightDist.getArrayCopy()}; - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - bin.upperBound, false), postSplitDists, merit); - } + Bin bin = this.binList.get(index); // VLIndex(ct->bins, index); + if ((bin.lowerBound == attVal) + || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue()) + // {//1000) + // { + // if this is the exact same boundary and class as the bin + // boundary or we aren't adding new bins any more then + // increment + // boundary counts + bin.classWeights.addToValue(classVal, weight); + if ((bin.boundaryClass == classVal) + && (bin.lowerBound == attVal)) { + // if it is also the same class then special case it + bin.boundaryWeight += weight; + } + } else { + // create a new bin + Bin newBin = new Bin(); + newBin.classWeights.addToValue(classVal, weight); + newBin.boundaryWeight = weight; + newBin.boundaryClass = classVal; + newBin.upperBound = bin.upperBound; + newBin.lowerBound = attVal; + + double percent = 0.0; + // estimate initial counts with a linear interpolation + if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) { + percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound)); + } + + // take out the boundry points, they stay with the old bin + bin.classWeights.addToValue(bin.boundaryClass, + -bin.boundaryWeight); + DoubleVector weightToShift = new DoubleVector( + bin.classWeights); + weightToShift.scaleValues(percent); + newBin.classWeights.addValues(weightToShift); + bin.classWeights.subtractValues(weightToShift); + // put the boundry examples back in + bin.classWeights.addToValue(bin.boundaryClass, + bin.boundaryWeight); + + // insert the new bin in the right place + if (last) { + bin.upperBound = attVal; + newBin.upperBound = attVal; + this.binList.add(newBin); + } else if (first) { + newBin.upperBound = bin.lowerBound; + this.binList.add(0, newBin); + } else { + newBin.upperBound = bin.upperBound; + bin.upperBound = attVal; + this.binList.add(index + 1, newBin); + } } - return bestSuggestion; + } } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + DoubleVector rightDist = new DoubleVector(); + for (Bin bin : this.binList) { + rightDist.addValues(bin.classWeights); } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub + DoubleVector leftDist = new DoubleVector(); + for (Bin bin : this.binList) { + leftDist.addValues(bin.classWeights); + rightDist.subtractValues(bin.classWeights); + double[][] postSplitDists = new double[][] { + leftDist.getArrayCopy(), rightDist.getArrayCopy() }; + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + bin.upperBound, false), postSplitDists, merit); + } } + return bestSuggestion; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java index d1267a7..b273c33 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java @@ -21,15 +21,16 @@ package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; */ /** - * Abstract binary conditional test for instances to use to split nodes in Hoeffding trees. - * + * Abstract binary conditional test for instances to use to split nodes in + * Hoeffding trees. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public abstract class InstanceConditionalBinaryTest extends InstanceConditionalTest { - @Override - public int maxBranches() { - return 2; - } + @Override + public int maxBranches() { + return 2; + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java index 4d1b955..b893e06 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java @@ -25,52 +25,57 @@ import com.yahoo.labs.samoa.instances.InstancesHeader; import com.yahoo.labs.samoa.instances.Instance; /** - * Abstract conditional test for instances to use to split nodes in Hoeffding trees. - * + * Abstract conditional test for instances to use to split nodes in Hoeffding + * trees. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public abstract class InstanceConditionalTest extends AbstractMOAObject { - /** - * Returns the number of the branch for an instance, -1 if unknown. - * - * @param inst the instance to be used - * @return the number of the branch for an instance, -1 if unknown. - */ - public abstract int branchForInstance(Instance inst); + /** + * Returns the number of the branch for an instance, -1 if unknown. + * + * @param inst + * the instance to be used + * @return the number of the branch for an instance, -1 if unknown. + */ + public abstract int branchForInstance(Instance inst); - /** - * Gets whether the number of the branch for an instance is known. - * - * @param inst - * @return true if the number of the branch for an instance is known - */ - public boolean resultKnownForInstance(Instance inst) { - return branchForInstance(inst) >= 0; - } + /** + * Gets whether the number of the branch for an instance is known. + * + * @param inst + * @return true if the number of the branch for an instance is known + */ + public boolean resultKnownForInstance(Instance inst) { + return branchForInstance(inst) >= 0; + } - /** - * Gets the number of maximum branches, -1 if unknown. - * - * @return the number of maximum branches, -1 if unknown.. - */ - public abstract int maxBranches(); + /** + * Gets the number of maximum branches, -1 if unknown. + * + * @return the number of maximum branches, -1 if unknown.. + */ + public abstract int maxBranches(); - /** - * Gets the text that describes the condition of a branch. It is used to describe the branch. - * - * @param branch the number of the branch to describe - * @param context the context or header of the data stream - * @return the text that describes the condition of the branch - */ - public abstract String describeConditionForBranch(int branch, - InstancesHeader context); + /** + * Gets the text that describes the condition of a branch. It is used to + * describe the branch. + * + * @param branch + * the number of the branch to describe + * @param context + * the context or header of the data stream + * @return the text that describes the condition of the branch + */ + public abstract String describeConditionForBranch(int branch, + InstancesHeader context); - /** - * Returns an array with the attributes that the test depends on. - * - * @return an array with the attributes that the test depends on - */ - public abstract int[] getAttsTestDependsOn(); + /** + * Returns an array with the attributes that the test depends on. + * + * @return an array with the attributes that the test depends on + */ + public abstract int[] getAttsTestDependsOn(); } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java index da3c717..5056737 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java @@ -24,50 +24,51 @@ import com.yahoo.labs.samoa.instances.InstancesHeader; import com.yahoo.labs.samoa.instances.Instance; /** - * Nominal binary conditional test for instances to use to split nodes in Hoeffding trees. - * + * Nominal binary conditional test for instances to use to split nodes in + * Hoeffding trees. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class NominalAttributeBinaryTest extends InstanceConditionalBinaryTest { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - protected int attIndex; + protected int attIndex; - protected int attValue; + protected int attValue; - public NominalAttributeBinaryTest(int attIndex, int attValue) { - this.attIndex = attIndex; - this.attValue = attValue; - } + public NominalAttributeBinaryTest(int attIndex, int attValue) { + this.attIndex = attIndex; + this.attValue = attValue; + } - @Override - public int branchForInstance(Instance inst) { - int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex - : this.attIndex + 1; - return inst.isMissing(instAttIndex) ? -1 : ((int) inst.value(instAttIndex) == this.attValue ? 0 : 1); - } + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex + : this.attIndex + 1; + return inst.isMissing(instAttIndex) ? -1 : ((int) inst.value(instAttIndex) == this.attValue ? 0 : 1); + } - @Override - public String describeConditionForBranch(int branch, InstancesHeader context) { - if ((branch == 0) || (branch == 1)) { - return InstancesHeader.getAttributeNameString(context, - this.attIndex) - + (branch == 0 ? " = " : " != ") - + InstancesHeader.getNominalValueString(context, - this.attIndex, this.attValue); - } - throw new IndexOutOfBoundsException(); + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch == 0) || (branch == 1)) { + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + (branch == 0 ? " = " : " != ") + + InstancesHeader.getNominalValueString(context, + this.attIndex, this.attValue); } + throw new IndexOutOfBoundsException(); + } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } - @Override - public int[] getAttsTestDependsOn() { - return new int[]{this.attIndex}; - } + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java index 82c91d3..5c64070 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java @@ -24,48 +24,49 @@ import com.yahoo.labs.samoa.instances.InstancesHeader; import com.yahoo.labs.samoa.instances.Instance; /** - * Nominal multi way conditional test for instances to use to split nodes in Hoeffding trees. - * + * Nominal multi way conditional test for instances to use to split nodes in + * Hoeffding trees. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class NominalAttributeMultiwayTest extends InstanceConditionalTest { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - protected int attIndex; + protected int attIndex; - public NominalAttributeMultiwayTest(int attIndex) { - this.attIndex = attIndex; - } + public NominalAttributeMultiwayTest(int attIndex) { + this.attIndex = attIndex; + } - @Override - public int branchForInstance(Instance inst) { - int instAttIndex = this.attIndex ; //< inst.classIndex() ? this.attIndex - //: this.attIndex + 1; - return inst.isMissing(instAttIndex) ? -1 : (int) inst.value(instAttIndex); - } + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex; // < inst.classIndex() ? this.attIndex + // : this.attIndex + 1; + return inst.isMissing(instAttIndex) ? -1 : (int) inst.value(instAttIndex); + } - @Override - public String describeConditionForBranch(int branch, InstancesHeader context) { - return InstancesHeader.getAttributeNameString(context, this.attIndex) - + " = " - + InstancesHeader.getNominalValueString(context, this.attIndex, - branch); - } + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + return InstancesHeader.getAttributeNameString(context, this.attIndex) + + " = " + + InstancesHeader.getNominalValueString(context, this.attIndex, + branch); + } - @Override - public int maxBranches() { - return -1; - } + @Override + public int maxBranches() { + return -1; + } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } - @Override - public int[] getAttsTestDependsOn() { - return new int[]{this.attIndex}; - } + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java index 82c7395..0a05742 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java @@ -24,69 +24,70 @@ import com.yahoo.labs.samoa.instances.InstancesHeader; import com.yahoo.labs.samoa.instances.Instance; /** - * Numeric binary conditional test for instances to use to split nodes in Hoeffding trees. - * + * Numeric binary conditional test for instances to use to split nodes in + * Hoeffding trees. + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class NumericAttributeBinaryTest extends InstanceConditionalBinaryTest { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - protected int attIndex; + protected int attIndex; - protected double attValue; + protected double attValue; - protected boolean equalsPassesTest; + protected boolean equalsPassesTest; - public NumericAttributeBinaryTest(int attIndex, double attValue, - boolean equalsPassesTest) { - this.attIndex = attIndex; - this.attValue = attValue; - this.equalsPassesTest = equalsPassesTest; - } + public NumericAttributeBinaryTest(int attIndex, double attValue, + boolean equalsPassesTest) { + this.attIndex = attIndex; + this.attValue = attValue; + this.equalsPassesTest = equalsPassesTest; + } - @Override - public int branchForInstance(Instance inst) { - int instAttIndex = this.attIndex ; // < inst.classIndex() ? this.attIndex - // : this.attIndex + 1; - if (inst.isMissing(instAttIndex)) { - return -1; - } - double v = inst.value(instAttIndex); - if (v == this.attValue) { - return this.equalsPassesTest ? 0 : 1; - } - return v < this.attValue ? 0 : 1; + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex; // < inst.classIndex() ? this.attIndex + // : this.attIndex + 1; + if (inst.isMissing(instAttIndex)) { + return -1; } - - @Override - public String describeConditionForBranch(int branch, InstancesHeader context) { - if ((branch == 0) || (branch == 1)) { - char compareChar = branch == 0 ? '<' : '>'; - int equalsBranch = this.equalsPassesTest ? 0 : 1; - return InstancesHeader.getAttributeNameString(context, - this.attIndex) - + ' ' - + compareChar - + (branch == equalsBranch ? "= " : " ") - + InstancesHeader.getNumericValueString(context, - this.attIndex, this.attValue); - } - throw new IndexOutOfBoundsException(); + double v = inst.value(instAttIndex); + if (v == this.attValue) { + return this.equalsPassesTest ? 0 : 1; } + return v < this.attValue ? 0 : 1; + } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch == 0) || (branch == 1)) { + char compareChar = branch == 0 ? '<' : '>'; + int equalsBranch = this.equalsPassesTest ? 0 : 1; + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + ' ' + + compareChar + + (branch == equalsBranch ? "= " : " ") + + InstancesHeader.getNumericValueString(context, + this.attIndex, this.attValue); } + throw new IndexOutOfBoundsException(); + } - @Override - public int[] getAttsTestDependsOn() { - return new int[]{this.attIndex}; - } + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } - public double getSplitValue() { - return this.attValue; - } + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } + + public double getSplitValue() { + return this.attValue; + } }
