http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/AbstractClassifier.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/AbstractClassifier.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/AbstractClassifier.java deleted file mode 100644 index 295ae49..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/AbstractClassifier.java +++ /dev/null @@ -1,378 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; - -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.instances.Instance; -import com.yahoo.labs.samoa.instances.InstancesHeader; -import com.yahoo.labs.samoa.moa.MOAObject; -import com.yahoo.labs.samoa.moa.core.Example; -import com.yahoo.labs.samoa.moa.core.Measurement; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.core.StringUtils; -import com.yahoo.labs.samoa.moa.core.Utils; -import com.yahoo.labs.samoa.moa.learners.Learner; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -public abstract class AbstractClassifier extends AbstractOptionHandler implements Classifier { - - @Override - public String getPurposeString() { - return "MOA Classifier: " + getClass().getCanonicalName(); - } - - /** Header of the instances of the data stream */ - protected InstancesHeader modelContext; - - /** Sum of the weights of the instances trained by this model */ - protected double trainingWeightSeenByModel = 0.0; - - /** Random seed used in randomizable learners */ - protected int randomSeed = 1; - - /** Option for randomizable learners to change the random seed */ - protected IntOption randomSeedOption; - - /** Random Generator used in randomizable learners */ - public Random classifierRandom; - - /** - * Creates an classifier and setups the random seed option if the classifier is randomizable. - */ - public AbstractClassifier() { - if (isRandomizable()) { - this.randomSeedOption = new IntOption("randomSeed", 'r', - "Seed for random behaviour of the classifier.", 1); - } - } - - @Override - public void prepareForUseImpl(TaskMonitor monitor, - ObjectRepository repository) { - if (this.randomSeedOption != null) { - this.randomSeed = this.randomSeedOption.getValue(); - } - if (!trainingHasStarted()) { - resetLearning(); - } - } - - @Override - public double[] getVotesForInstance(Example<Instance> example) { - return getVotesForInstance(example.getData()); - } - - @Override - public abstract double[] getVotesForInstance(Instance inst); - - @Override - public void setModelContext(InstancesHeader ih) { - if ((ih != null) && (ih.classIndex() < 0)) { - throw new IllegalArgumentException( - "Context for a classifier must include a class to learn"); - } - if (trainingHasStarted() - && (this.modelContext != null) - && ((ih == null) || !contextIsCompatible(this.modelContext, ih))) { - throw new IllegalArgumentException( - "New context is not compatible with existing model"); - } - this.modelContext = ih; - } - - @Override - public InstancesHeader getModelContext() { - return this.modelContext; - } - - @Override - public void setRandomSeed(int s) { - this.randomSeed = s; - if (this.randomSeedOption != null) { - // keep option consistent - this.randomSeedOption.setValue(s); - } - } - - @Override - public boolean trainingHasStarted() { - return this.trainingWeightSeenByModel > 0.0; - } - - @Override - public double trainingWeightSeenByModel() { - return this.trainingWeightSeenByModel; - } - - @Override - public void resetLearning() { - this.trainingWeightSeenByModel = 0.0; - if (isRandomizable()) { - this.classifierRandom = new Random(this.randomSeed); - } - resetLearningImpl(); - } - - @Override - public void trainOnInstance(Instance inst) { - if (inst.weight() > 0.0) { - this.trainingWeightSeenByModel += inst.weight(); - trainOnInstanceImpl(inst); - } - } - - @Override - public Measurement[] getModelMeasurements() { - List<Measurement> measurementList = new LinkedList<>(); - measurementList.add(new Measurement("model training instances", - trainingWeightSeenByModel())); - measurementList.add(new Measurement("model serialized size (bytes)", - measureByteSize())); - Measurement[] modelMeasurements = getModelMeasurementsImpl(); - if (modelMeasurements != null) { - measurementList.addAll(Arrays.asList(modelMeasurements)); - } - // add average of sub-model measurements - Learner[] subModels = getSublearners(); - if ((subModels != null) && (subModels.length > 0)) { - List<Measurement[]> subMeasurements = new LinkedList<>(); - for (Learner subModel : subModels) { - if (subModel != null) { - subMeasurements.add(subModel.getModelMeasurements()); - } - } - Measurement[] avgMeasurements = Measurement.averageMeasurements(subMeasurements - .toArray(new Measurement[subMeasurements.size()][])); - measurementList.addAll(Arrays.asList(avgMeasurements)); - } - return measurementList.toArray(new Measurement[measurementList.size()]); - } - - @Override - public void getDescription(StringBuilder out, int indent) { - StringUtils.appendIndented(out, indent, "Model type: "); - out.append(this.getClass().getName()); - StringUtils.appendNewline(out); - Measurement.getMeasurementsDescription(getModelMeasurements(), out, - indent); - StringUtils.appendNewlineIndented(out, indent, "Model description:"); - StringUtils.appendNewline(out); - if (trainingHasStarted()) { - getModelDescription(out, indent); - } else { - StringUtils.appendIndented(out, indent, - "Model has not been trained."); - } - } - - @Override - public Learner[] getSublearners() { - return null; - } - - @Override - public Classifier[] getSubClassifiers() { - return null; - } - - @Override - public Classifier copy() { - return (Classifier) super.copy(); - } - - @Override - public MOAObject getModel() { - return this; - } - - @Override - public void trainOnInstance(Example<Instance> example) { - trainOnInstance(example.getData()); - } - - @Override - public boolean correctlyClassifies(Instance inst) { - return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst.classValue(); - } - - /** - * Gets the name of the attribute of the class from the header. - * - * @return the string with name of the attribute of the class - */ - public String getClassNameString() { - return InstancesHeader.getClassNameString(this.modelContext); - } - - /** - * Gets the name of a label of the class from the header. - * - * @param classLabelIndex - * the label index - * @return the name of the label of the class - */ - public String getClassLabelString(int classLabelIndex) { - return InstancesHeader.getClassLabelString(this.modelContext, - classLabelIndex); - } - - /** - * Gets the name of an attribute from the header. - * - * @param attIndex - * the attribute index - * @return the name of the attribute - */ - public String getAttributeNameString(int attIndex) { - return InstancesHeader.getAttributeNameString(this.modelContext, attIndex); - } - - /** - * Gets the name of a value of an attribute from the header. - * - * @param attIndex - * the attribute index - * @param valIndex - * the value of the attribute - * @return the name of the value of the attribute - */ - public String getNominalValueString(int attIndex, int valIndex) { - return InstancesHeader.getNominalValueString(this.modelContext, attIndex, valIndex); - } - - /** - * Returns if two contexts or headers of instances are compatible.<br> - * <br> - * - * Two contexts are compatible if they follow the following rules:<br> - * Rule 1: num classes can increase but never decrease<br> - * Rule 2: num attributes can increase but never decrease<br> - * Rule 3: num nominal attribute values can increase but never decrease<br> - * Rule 4: attribute types must stay in the same order (although class can move; is always skipped over)<br> - * <br> - * - * Attribute names are free to change, but should always still represent the original attributes. - * - * @param originalContext - * the first context to compare - * @param newContext - * the second context to compare - * @return true if the two contexts are compatible. - */ - public static boolean contextIsCompatible(InstancesHeader originalContext, - InstancesHeader newContext) { - - if (newContext.numClasses() < originalContext.numClasses()) { - return false; // rule 1 - } - if (newContext.numAttributes() < originalContext.numAttributes()) { - return false; // rule 2 - } - int oPos = 0; - int nPos = 0; - while (oPos < originalContext.numAttributes()) { - if (oPos == originalContext.classIndex()) { - oPos++; - if (!(oPos < originalContext.numAttributes())) { - break; - } - } - if (nPos == newContext.classIndex()) { - nPos++; - } - if (originalContext.attribute(oPos).isNominal()) { - if (!newContext.attribute(nPos).isNominal()) { - return false; // rule 4 - } - if (newContext.attribute(nPos).numValues() < originalContext.attribute(oPos).numValues()) { - return false; // rule 3 - } - } else { - assert (originalContext.attribute(oPos).isNumeric()); - if (!newContext.attribute(nPos).isNumeric()) { - return false; // rule 4 - } - } - oPos++; - nPos++; - } - return true; // all checks clear - } - - /** - * Resets this classifier. It must be similar to starting a new classifier from scratch. <br> - * <br> - * - * The reason for ...Impl methods: ease programmer burden by not requiring them to remember calls to super in - * overridden methods. Note that this will produce compiler errors if not overridden. - */ - public abstract void resetLearningImpl(); - - /** - * Trains this classifier incrementally using the given instance.<br> - * <br> - * - * The reason for ...Impl methods: ease programmer burden by not requiring them to remember calls to super in - * overridden methods. Note that this will produce compiler errors if not overridden. - * - * @param inst - * the instance to be used for training - */ - public abstract void trainOnInstanceImpl(Instance inst); - - /** - * Gets the current measurements of this classifier.<br> - * <br> - * - * The reason for ...Impl methods: ease programmer burden by not requiring them to remember calls to super in - * overridden methods. Note that this will produce compiler errors if not overridden. - * - * @return an array of measurements to be used in evaluation tasks - */ - protected abstract Measurement[] getModelMeasurementsImpl(); - - /** - * Returns a string representation of the model. - * - * @param out - * the stringbuilder to add the description - * @param indent - * the number of characters to indent - */ - public abstract void getModelDescription(StringBuilder out, int indent); - - /** - * Gets the index of the attribute in the instance, given the index of the attribute in the learner. - * - * @param index - * the index of the attribute in the learner - * @return the index in the instance - */ - protected static int modelAttIndexToInstanceAttIndex(int index) { - return index; // inst.classIndex() > index ? index : index + 1; - } -}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Classifier.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Classifier.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Classifier.java deleted file mode 100644 index d537e6b..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Classifier.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.instances.Instance; -import com.yahoo.labs.samoa.moa.core.Example; -import com.yahoo.labs.samoa.moa.learners.Learner; - -/** - * Classifier interface for incremental classification models. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public interface Classifier extends Learner<Example<Instance>> { - - /** - * Gets the classifiers of this ensemble. Returns null if this learner is a single learner. - * - * @return an array of the learners of the ensemble - */ - public Classifier[] getSubClassifiers(); - - /** - * Produces a copy of this learner. - * - * @return the copy of this learner - */ - public Classifier copy(); - - /** - * Gets whether this classifier correctly classifies an instance. Uses getVotesForInstance to obtain the prediction - * and the instance to obtain its true class. - * - * - * @param inst - * the instance to be classified - * @return true if the instance is correctly classified - */ - public boolean correctlyClassifies(Instance inst); - - /** - * Trains this learner incrementally using the given example. - * - * @param inst - * the instance to be used for training - */ - public void trainOnInstance(Instance inst); - - /** - * Predicts the class memberships for a given instance. If an instance is unclassified, the returned array elements - * must be all zero. - * - * @param inst - * the instance to be classified - * @return an array containing the estimated membership probabilities of the test instance in each class - */ - public double[] getVotesForInstance(Instance inst); -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Regressor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Regressor.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Regressor.java deleted file mode 100644 index 656a60c..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Regressor.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Regressor interface for incremental regression models. It is used only in the GUI Regression Tab. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public interface Regressor { - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/AttributeSplitSuggestion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/AttributeSplitSuggestion.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/AttributeSplitSuggestion.java deleted file mode 100644 index 3178c40..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/AttributeSplitSuggestion.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; - -/** - * Class for computing attribute split suggestions given a split test. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class AttributeSplitSuggestion extends AbstractMOAObject implements Comparable<AttributeSplitSuggestion> { - - private static final long serialVersionUID = 1L; - - public InstanceConditionalTest splitTest; - - public double[][] resultingClassDistributions; - - public double merit; - - public AttributeSplitSuggestion() { - } - - public AttributeSplitSuggestion(InstanceConditionalTest splitTest, - double[][] resultingClassDistributions, double merit) { - this.splitTest = splitTest; - this.resultingClassDistributions = resultingClassDistributions.clone(); - this.merit = merit; - } - - public int numSplits() { - return this.resultingClassDistributions.length; - } - - public double[] resultingClassDistributionFromSplit(int splitIndex) { - return this.resultingClassDistributions[splitIndex].clone(); - } - - @Override - public int compareTo(AttributeSplitSuggestion comp) { - return Double.compare(this.merit, comp.merit); - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // do nothing - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/AttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/AttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/AttributeClassObserver.java deleted file mode 100644 index 6566461..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/AttributeClassObserver.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.options.OptionHandler; - -/** - * Interface for observing the class data distribution for an attribute. This observer monitors the class distribution - * of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public interface AttributeClassObserver extends OptionHandler { - - /** - * Updates statistics of this observer given an attribute value, a class and the weight of the instance observed - * - * @param attVal - * the value of the attribute - * @param classVal - * the class - * @param weight - * the weight of the instance - */ - public void observeAttributeClass(double attVal, int classVal, double weight); - - /** - * Gets the probability for an attribute value given a class - * - * @param attVal - * the attribute value - * @param classVal - * the class - * @return probability for an attribute value given a class - */ - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal); - - /** - * Gets the best split suggestion given a criterion and a class distribution - * - * @param criterion - * the split criterion to use - * @param preSplitDist - * the class distribution before the split - * @param attIndex - * the attribute index - * @param binaryOnly - * true to use binary splits - * @return suggestion of best attribute split - */ - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly); - - public void observeAttributeTarget(double attVal, double target); - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserver.java deleted file mode 100644 index 1e55aef..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserver.java +++ /dev/null @@ -1,183 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.core.DoubleVector; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -/** - * Class for observing the class data distribution for a numeric attribute using a binary tree. This observer monitors - * the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on - * leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class BinaryTreeNumericAttributeClassObserver extends AbstractOptionHandler - implements NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - public class Node implements Serializable { - - private static final long serialVersionUID = 1L; - - public double cut_point; - - public DoubleVector classCountsLeft = new DoubleVector(); - - public DoubleVector classCountsRight = new DoubleVector(); - - public Node left; - - public Node right; - - public Node(double val, int label, double weight) { - this.cut_point = val; - this.classCountsLeft.addToValue(label, weight); - } - - public void insertValue(double val, int label, double weight) { - if (val == this.cut_point) { - this.classCountsLeft.addToValue(label, weight); - } else if (val <= this.cut_point) { - this.classCountsLeft.addToValue(label, weight); - if (this.left == null) { - this.left = new Node(val, label, weight); - } else { - this.left.insertValue(val, label, weight); - } - } else { // val > cut_point - this.classCountsRight.addToValue(label, weight); - if (this.right == null) { - this.right = new Node(val, label, weight); - } else { - this.right.insertValue(val, label, weight); - } - } - } - } - - public Node root = null; - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (Double.isNaN(attVal)) { // Instance.isMissingValue(attVal) - } else { - if (this.root == null) { - this.root = new Node(attVal, classVal, weight); - } else { - this.root.insertValue(attVal, classVal, weight); - } - } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - return searchForBestSplitOption(this.root, null, null, null, null, false, - criterion, preSplitDist, attIndex); - } - - protected AttributeSplitSuggestion searchForBestSplitOption( - Node currentNode, AttributeSplitSuggestion currentBestOption, - double[] actualParentLeft, - double[] parentLeft, double[] parentRight, boolean leftChild, - SplitCriterion criterion, double[] preSplitDist, int attIndex) { - if (currentNode == null) { - return currentBestOption; - } - DoubleVector leftDist = new DoubleVector(); - DoubleVector rightDist = new DoubleVector(); - if (parentLeft == null) { - leftDist.addValues(currentNode.classCountsLeft); - rightDist.addValues(currentNode.classCountsRight); - } else { - leftDist.addValues(parentLeft); - rightDist.addValues(parentRight); - if (leftChild) { - // get the exact statistics of the parent value - DoubleVector exactParentDist = new DoubleVector(); - exactParentDist.addValues(actualParentLeft); - exactParentDist.subtractValues(currentNode.classCountsLeft); - exactParentDist.subtractValues(currentNode.classCountsRight); - - // move the subtrees - leftDist.subtractValues(currentNode.classCountsRight); - rightDist.addValues(currentNode.classCountsRight); - - // move the exact value from the parent - rightDist.addValues(exactParentDist); - leftDist.subtractValues(exactParentDist); - - } else { - leftDist.addValues(currentNode.classCountsLeft); - rightDist.subtractValues(currentNode.classCountsLeft); - } - } - double[][] postSplitDists = new double[][] { leftDist.getArrayRef(), - rightDist.getArrayRef() }; - double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); - if ((currentBestOption == null) || (merit > currentBestOption.merit)) { - currentBestOption = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - currentNode.cut_point, true), postSplitDists, merit); - - } - currentBestOption = searchForBestSplitOption(currentNode.left, - currentBestOption, currentNode.classCountsLeft.getArrayRef(), postSplitDists[0], postSplitDists[1], true, - criterion, preSplitDist, attIndex); - currentBestOption = searchForBestSplitOption(currentNode.right, - currentBestOption, currentNode.classCountsLeft.getArrayRef(), postSplitDists[0], postSplitDists[1], false, - criterion, preSplitDist, attIndex); - return currentBestOption; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserverRegression.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserverRegression.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserverRegression.java deleted file mode 100644 index 5f75038..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/BinaryTreeNumericAttributeClassObserverRegression.java +++ /dev/null @@ -1,148 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -/** - * Class for observing the class data distribution for a numeric attribute using a binary tree. This observer monitors - * the class distribution of a given attribute. - * - * <p> - * Learning Adaptive Model Rules from High-Speed Data Streams, ECML 2013, E. Almeida, C. Ferreira, P. Kosina and J. - * Gama; - * </p> - * - * @author E. Almeida, J. Gama - * @version $Revision: 2$ - */ -public class BinaryTreeNumericAttributeClassObserverRegression extends AbstractOptionHandler - implements NumericAttributeClassObserver { - - public static final long serialVersionUID = 1L; - - public class Node implements Serializable { - - private static final long serialVersionUID = 1L; - - public double cut_point; - - public double[] lessThan; // This array maintains statistics for the instance reaching the node with attribute values less than or iqual to the cutpoint. - - public double[] greaterThan; // This array maintains statistics for the instance reaching the node with attribute values greater than to the cutpoint. - - public Node left; - - public Node right; - - public Node(double val, double target) { - this.cut_point = val; - this.lessThan = new double[3]; - this.greaterThan = new double[3]; - this.lessThan[0] = target; // The sum of their target attribute values. - this.lessThan[1] = target * target; // The sum of the squared target attribute values. - this.lessThan[2] = 1.0; // A counter of the number of instances that have reached the node. - this.greaterThan[0] = 0.0; - this.greaterThan[1] = 0.0; - this.greaterThan[2] = 0.0; - } - - public void insertValue(double val, double target) { - if (val == this.cut_point) { - this.lessThan[0] = this.lessThan[0] + target; - this.lessThan[1] = this.lessThan[1] + (target * target); - this.lessThan[2] = this.lessThan[2] + 1; - } else if (val <= this.cut_point) { - this.lessThan[0] = this.lessThan[0] + target; - this.lessThan[1] = this.lessThan[1] + (target * target); - this.lessThan[2] = this.lessThan[2] + 1; - if (this.left == null) { - this.left = new Node(val, target); - } else { - this.left.insertValue(val, target); - } - } else { - this.greaterThan[0] = this.greaterThan[0] + target; - this.greaterThan[1] = this.greaterThan[1] + (target * target); - this.greaterThan[2] = this.greaterThan[2] + 1; - if (this.right == null) { - - this.right = new Node(val, target); - } else { - this.right.insertValue(val, target); - } - } - } - } - - public Node root1 = null; - - public void observeAttributeTarget(double attVal, double target) { - if (!Double.isNaN(attVal)) { - if (this.root1 == null) { - this.root1 = new Node(attVal, target); - } else { - this.root1.insertValue(attVal, target); - } - } - } - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - return 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - return searchForBestSplitOption(this.root1, null, null, null, null, false, - criterion, preSplitDist, attIndex); - } - - protected AttributeSplitSuggestion searchForBestSplitOption( - Node currentNode, AttributeSplitSuggestion currentBestOption, - double[] actualParentLeft, - double[] parentLeft, double[] parentRight, boolean leftChild, - SplitCriterion criterion, double[] preSplitDist, int attIndex) { - - return currentBestOption; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/DiscreteAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/DiscreteAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/DiscreteAttributeClassObserver.java deleted file mode 100644 index a7d0af7..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/DiscreteAttributeClassObserver.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Interface for observing the class data distribution for a discrete (nominal) attribute. This observer monitors the - * class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public interface DiscreteAttributeClassObserver extends AttributeClassObserver { - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java deleted file mode 100644 index aa745ce..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/FIMTDDNumericAttributeClassObserver.java +++ /dev/null @@ -1,250 +0,0 @@ -/* Project Knowledge Discovery from Data Streams, FCT LIAAD-INESC TEC, - * - * Contact: [email protected] - */ - -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; - -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.core.DoubleVector; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -public class FIMTDDNumericAttributeClassObserver extends BinaryTreeNumericAttributeClassObserver implements - NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - protected class Node implements Serializable { - - private static final long serialVersionUID = 1L; - - // The split point to use - public double cut_point; - - // E-BST statistics - public DoubleVector leftStatistics = new DoubleVector(); - public DoubleVector rightStatistics = new DoubleVector(); - - // Child nodes - public Node left; - public Node right; - - public Node(double val, double label, double weight) { - this.cut_point = val; - this.leftStatistics.addToValue(0, 1); - this.leftStatistics.addToValue(1, label); - this.leftStatistics.addToValue(2, label * label); - } - - /** - * Insert a new value into the tree, updating both the sum of values and sum of squared values arrays - */ - public void insertValue(double val, double label, double weight) { - - // If the new value equals the value stored in a node, update - // the left (<=) node information - if (val == this.cut_point) { - this.leftStatistics.addToValue(0, 1); - this.leftStatistics.addToValue(1, label); - this.leftStatistics.addToValue(2, label * label); - } // If the new value is less than the value in a node, update the - // left distribution and send the value down to the left child node. - // If no left child exists, create one - else if (val <= this.cut_point) { - this.leftStatistics.addToValue(0, 1); - this.leftStatistics.addToValue(1, label); - this.leftStatistics.addToValue(2, label * label); - if (this.left == null) { - this.left = new Node(val, label, weight); - } else { - this.left.insertValue(val, label, weight); - } - } // If the new value is greater than the value in a node, update the - // right (>) distribution and send the value down to the right child node. - // If no right child exists, create one - else { // val > cut_point - this.rightStatistics.addToValue(0, 1); - this.rightStatistics.addToValue(1, label); - this.rightStatistics.addToValue(2, label * label); - if (this.right == null) { - this.right = new Node(val, label, weight); - } else { - this.right.insertValue(val, label, weight); - } - } - } - } - - // Root node of the E-BST structure for this attribute - public Node root = null; - - // Global variables for use in the FindBestSplit algorithm - double sumTotalLeft; - double sumTotalRight; - double sumSqTotalLeft; - double sumSqTotalRight; - double countRightTotal; - double countLeftTotal; - - public void observeAttributeClass(double attVal, double classVal, double weight) { - if (!Double.isNaN(attVal)) { - if (this.root == null) { - this.root = new Node(attVal, classVal, weight); - } else { - this.root.insertValue(attVal, classVal, weight); - } - } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(SplitCriterion criterion, double[] preSplitDist, - int attIndex, boolean binaryOnly) { - - // Initialise global variables - sumTotalLeft = 0; - sumTotalRight = preSplitDist[1]; - sumSqTotalLeft = 0; - sumSqTotalRight = preSplitDist[2]; - countLeftTotal = 0; - countRightTotal = preSplitDist[0]; - return searchForBestSplitOption(this.root, null, criterion, attIndex); - } - - /** - * Implementation of the FindBestSplit algorithm from E.Ikonomovska et al. - */ - protected AttributeSplitSuggestion searchForBestSplitOption(Node currentNode, - AttributeSplitSuggestion currentBestOption, SplitCriterion criterion, int attIndex) { - // Return null if the current node is null or we have finished looking - // through all the possible splits - if (currentNode == null || countRightTotal == 0.0) { - return currentBestOption; - } - - if (currentNode.left != null) { - currentBestOption = searchForBestSplitOption(currentNode.left, currentBestOption, criterion, attIndex); - } - - sumTotalLeft += currentNode.leftStatistics.getValue(1); - sumTotalRight -= currentNode.leftStatistics.getValue(1); - sumSqTotalLeft += currentNode.leftStatistics.getValue(2); - sumSqTotalRight -= currentNode.leftStatistics.getValue(2); - countLeftTotal += currentNode.leftStatistics.getValue(0); - countRightTotal -= currentNode.leftStatistics.getValue(0); - - double[][] postSplitDists = new double[][] { { countLeftTotal, sumTotalLeft, sumSqTotalLeft }, - { countRightTotal, sumTotalRight, sumSqTotalRight } }; - double[] preSplitDist = new double[] { (countLeftTotal + countRightTotal), (sumTotalLeft + sumTotalRight), - (sumSqTotalLeft + sumSqTotalRight) }; - double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); - - if ((currentBestOption == null) || (merit > currentBestOption.merit)) { - currentBestOption = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - currentNode.cut_point, true), postSplitDists, merit); - - } - - if (currentNode.right != null) { - currentBestOption = searchForBestSplitOption(currentNode.right, currentBestOption, criterion, attIndex); - } - sumTotalLeft -= currentNode.leftStatistics.getValue(1); - sumTotalRight += currentNode.leftStatistics.getValue(1); - sumSqTotalLeft -= currentNode.leftStatistics.getValue(2); - sumSqTotalRight += currentNode.leftStatistics.getValue(2); - countLeftTotal -= currentNode.leftStatistics.getValue(0); - countRightTotal += currentNode.leftStatistics.getValue(0); - - return currentBestOption; - } - - /** - * A method to remove all nodes in the E-BST in which it and all it's children represent 'bad' split points - */ - public void removeBadSplits(SplitCriterion criterion, double lastCheckRatio, double lastCheckSDR, double lastCheckE) { - removeBadSplitNodes(criterion, this.root, lastCheckRatio, lastCheckSDR, lastCheckE); - } - - /** - * Recursive method that first checks all of a node's children before deciding if it is 'bad' and may be removed - */ - private boolean removeBadSplitNodes(SplitCriterion criterion, Node currentNode, double lastCheckRatio, - double lastCheckSDR, double lastCheckE) { - boolean isBad = false; - - if (currentNode == null) { - return true; - } - - if (currentNode.left != null) { - isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); - } - - if (currentNode.right != null && isBad) { - isBad = removeBadSplitNodes(criterion, currentNode.left, lastCheckRatio, lastCheckSDR, lastCheckE); - } - - if (isBad) { - - double[][] postSplitDists = new double[][] { - { currentNode.leftStatistics.getValue(0), currentNode.leftStatistics.getValue(1), - currentNode.leftStatistics.getValue(2) }, - { currentNode.rightStatistics.getValue(0), currentNode.rightStatistics.getValue(1), - currentNode.rightStatistics.getValue(2) } }; - double[] preSplitDist = new double[] { - (currentNode.leftStatistics.getValue(0) + currentNode.rightStatistics.getValue(0)), - (currentNode.leftStatistics.getValue(1) + currentNode.rightStatistics.getValue(1)), - (currentNode.leftStatistics.getValue(2) + currentNode.rightStatistics.getValue(2)) }; - double merit = criterion.getMeritOfSplit(preSplitDist, postSplitDists); - - if ((merit / lastCheckSDR) < (lastCheckRatio - (2 * lastCheckE))) { - currentNode = null; - return true; - } - } - - return false; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java deleted file mode 100644 index 83ecbe0..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GaussianNumericAttributeClassObserver.java +++ /dev/null @@ -1,182 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; -import com.yahoo.labs.samoa.moa.core.Utils; - -import java.util.Set; -import java.util.TreeSet; -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; - -import com.yahoo.labs.samoa.moa.core.AutoExpandVector; -import com.yahoo.labs.samoa.moa.core.DoubleVector; -import com.yahoo.labs.samoa.moa.core.GaussianEstimator; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.github.javacliparser.IntOption; - -/** - * Class for observing the class data distribution for a numeric attribute using gaussian estimators. This observer - * monitors the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data - * statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class GaussianNumericAttributeClassObserver extends AbstractOptionHandler - implements NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - protected DoubleVector minValueObservedPerClass = new DoubleVector(); - - protected DoubleVector maxValueObservedPerClass = new DoubleVector(); - - protected AutoExpandVector<GaussianEstimator> attValDistPerClass = new AutoExpandVector<>(); - - /** - * @param classVal - * @return The requested Estimator if it exists, or null if not present. - */ - public GaussianEstimator getEstimator(int classVal) { - return this.attValDistPerClass.get(classVal); - } - - public IntOption numBinsOption = new IntOption("numBins", 'n', - "The number of bins.", 10, 1, Integer.MAX_VALUE); - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (!Utils.isMissingValue(attVal)) { - GaussianEstimator valDist = this.attValDistPerClass.get(classVal); - if (valDist == null) { - valDist = new GaussianEstimator(); - this.attValDistPerClass.set(classVal, valDist); - this.minValueObservedPerClass.setValue(classVal, attVal); - this.maxValueObservedPerClass.setValue(classVal, attVal); - } else { - if (attVal < this.minValueObservedPerClass.getValue(classVal)) { - this.minValueObservedPerClass.setValue(classVal, attVal); - } - if (attVal > this.maxValueObservedPerClass.getValue(classVal)) { - this.maxValueObservedPerClass.setValue(classVal, attVal); - } - } - valDist.addObservation(attVal, weight); - } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - GaussianEstimator obs = this.attValDistPerClass.get(classVal); - return obs != null ? obs.probabilityDensity(attVal) : 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - double[] suggestedSplitValues = getSplitPointSuggestions(); - for (double splitValue : suggestedSplitValues) { - double[][] postSplitDists = getClassDistsResultingFromBinarySplit(splitValue); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, splitValue, - true), postSplitDists, merit); - } - } - return bestSuggestion; - } - - public double[] getSplitPointSuggestions() { - Set<Double> suggestedSplitValues = new TreeSet<>(); - double minValue = Double.POSITIVE_INFINITY; - double maxValue = Double.NEGATIVE_INFINITY; - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - GaussianEstimator estimator = this.attValDistPerClass.get(i); - if (estimator != null) { - if (this.minValueObservedPerClass.getValue(i) < minValue) { - minValue = this.minValueObservedPerClass.getValue(i); - } - if (this.maxValueObservedPerClass.getValue(i) > maxValue) { - maxValue = this.maxValueObservedPerClass.getValue(i); - } - } - } - if (minValue < Double.POSITIVE_INFINITY) { - double range = maxValue - minValue; - for (int i = 0; i < this.numBinsOption.getValue(); i++) { - double splitValue = range / (this.numBinsOption.getValue() + 1.0) * (i + 1) - + minValue; - if ((splitValue > minValue) && (splitValue < maxValue)) { - suggestedSplitValues.add(splitValue); - } - } - } - double[] suggestions = new double[suggestedSplitValues.size()]; - int i = 0; - for (double suggestion : suggestedSplitValues) { - suggestions[i++] = suggestion; - } - return suggestions; - } - - // assume all values equal to splitValue go to lhs - public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { - DoubleVector lhsDist = new DoubleVector(); - DoubleVector rhsDist = new DoubleVector(); - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - GaussianEstimator estimator = this.attValDistPerClass.get(i); - if (estimator != null) { - if (splitValue < this.minValueObservedPerClass.getValue(i)) { - rhsDist.addToValue(i, estimator.getTotalWeightObserved()); - } else if (splitValue >= this.maxValueObservedPerClass.getValue(i)) { - lhsDist.addToValue(i, estimator.getTotalWeightObserved()); - } else { - double[] weightDist = estimator.estimatedWeight_LessThan_EqualTo_GreaterThan_Value(splitValue); - lhsDist.addToValue(i, weightDist[0] + weightDist[1]); - rhsDist.addToValue(i, weightDist[2]); - } - } - } - return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - } - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java deleted file mode 100644 index 04d1232..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java +++ /dev/null @@ -1,127 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.core.Utils; - -import com.yahoo.labs.samoa.moa.core.AutoExpandVector; -import com.yahoo.labs.samoa.moa.core.DoubleVector; -import com.yahoo.labs.samoa.moa.core.GreenwaldKhannaQuantileSummary; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -/** - * Class for observing the class data distribution for a numeric attribute using Greenwald and Khanna methodology. This - * observer monitors the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data - * statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class GreenwaldKhannaNumericAttributeClassObserver extends AbstractOptionHandler implements - NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - protected AutoExpandVector<GreenwaldKhannaQuantileSummary> attValDistPerClass = new AutoExpandVector<>(); - - public IntOption numTuplesOption = new IntOption("numTuples", 'n', - "The number of tuples.", 10, 1, Integer.MAX_VALUE); - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (!Utils.isMissingValue(attVal)) { - GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal); - if (valDist == null) { - valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); - this.attValDistPerClass.set(classVal, valDist); - } - // TODO: not taking weight into account - valDist.insert(attVal); - } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - for (GreenwaldKhannaQuantileSummary qs : this.attValDistPerClass) { - if (qs != null) { - double[] cutpoints = qs.getSuggestedCutpoints(); - for (double cutpoint : cutpoints) { - double[][] postSplitDists = getClassDistsResultingFromBinarySplit(cutpoint); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) - || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - cutpoint, true), postSplitDists, merit); - } - } - } - } - return bestSuggestion; - } - - // assume all values equal to splitValue go to lhs - public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { - DoubleVector lhsDist = new DoubleVector(); - DoubleVector rhsDist = new DoubleVector(); - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - GreenwaldKhannaQuantileSummary estimator = this.attValDistPerClass.get(i); - if (estimator != null) { - long countBelow = estimator.getCountBelow(splitValue); - lhsDist.addToValue(i, countBelow); - rhsDist.addToValue(i, estimator.getTotalCount() - countBelow); - } - } - return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java deleted file mode 100644 index 159f4dc..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java +++ /dev/null @@ -1,177 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NominalAttributeBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NominalAttributeMultiwayTest; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; -import com.yahoo.labs.samoa.moa.core.Utils; - -import com.yahoo.labs.samoa.moa.core.AutoExpandVector; -import com.yahoo.labs.samoa.moa.core.DoubleVector; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; - -/** - * Class for observing the class data distribution for a nominal attribute. This observer monitors the class - * distribution of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class NominalAttributeClassObserver extends AbstractOptionHandler implements DiscreteAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - protected double totalWeightObserved = 0.0; - - protected double missingWeightObserved = 0.0; - - public AutoExpandVector<DoubleVector> attValDistPerClass = new AutoExpandVector<>(); - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (Utils.isMissingValue(attVal)) { - this.missingWeightObserved += weight; - } else { - int attValInt = (int) attVal; - DoubleVector valDist = this.attValDistPerClass.get(classVal); - if (valDist == null) { - valDist = new DoubleVector(); - this.attValDistPerClass.set(classVal, valDist); - } - valDist.addToValue(attValInt, weight); - } - this.totalWeightObserved += weight; - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - DoubleVector obs = this.attValDistPerClass.get(classVal); - return obs != null ? (obs.getValue((int) attVal) + 1.0) - / (obs.sumOfValues() + obs.numValues()) : 0.0; - } - - public double totalWeightOfClassObservations() { - return this.totalWeightObserved; - } - - public double weightOfObservedMissingValues() { - return this.missingWeightObserved; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - int maxAttValsObserved = getMaxAttValsObserved(); - if (!binaryOnly) { - double[][] postSplitDists = getClassDistsResultingFromMultiwaySplit(maxAttValsObserved); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - bestSuggestion = new AttributeSplitSuggestion( - new NominalAttributeMultiwayTest(attIndex), postSplitDists, - merit); - } - for (int valIndex = 0; valIndex < maxAttValsObserved; valIndex++) { - double[][] postSplitDists = getClassDistsResultingFromBinarySplit(valIndex); - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NominalAttributeBinaryTest(attIndex, valIndex), - postSplitDists, merit); - } - } - return bestSuggestion; - } - - public int getMaxAttValsObserved() { - int maxAttValsObserved = 0; - for (DoubleVector attValDist : this.attValDistPerClass) { - if ((attValDist != null) - && (attValDist.numValues() > maxAttValsObserved)) { - maxAttValsObserved = attValDist.numValues(); - } - } - return maxAttValsObserved; - } - - public double[][] getClassDistsResultingFromMultiwaySplit( - int maxAttValsObserved) { - DoubleVector[] resultingDists = new DoubleVector[maxAttValsObserved]; - for (int i = 0; i < resultingDists.length; i++) { - resultingDists[i] = new DoubleVector(); - } - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - DoubleVector attValDist = this.attValDistPerClass.get(i); - if (attValDist != null) { - for (int j = 0; j < attValDist.numValues(); j++) { - resultingDists[j].addToValue(i, attValDist.getValue(j)); - } - } - } - double[][] distributions = new double[maxAttValsObserved][]; - for (int i = 0; i < distributions.length; i++) { - distributions[i] = resultingDists[i].getArrayRef(); - } - return distributions; - } - - public double[][] getClassDistsResultingFromBinarySplit(int valIndex) { - DoubleVector equalsDist = new DoubleVector(); - DoubleVector notEqualDist = new DoubleVector(); - for (int i = 0; i < this.attValDistPerClass.size(); i++) { - DoubleVector attValDist = this.attValDistPerClass.get(i); - if (attValDist != null) { - for (int j = 0; j < attValDist.numValues(); j++) { - if (j == valIndex) { - equalsDist.addToValue(i, attValDist.getValue(j)); - } else { - notEqualDist.addToValue(i, attValDist.getValue(j)); - } - } - } - } - return new double[][] { equalsDist.getArrayRef(), - notEqualDist.getArrayRef() }; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java deleted file mode 100644 index ec5f83f..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -/** - * Class for observing the class data distribution for a null attribute. This method is used to disable the observation - * for an attribute. Used in decision trees to monitor data statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class NullAttributeClassObserver extends AbstractOptionHandler implements AttributeClassObserver { - - private static final long serialVersionUID = 1L; - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - return 0.0; - } - - public double totalWeightOfClassObservations() { - return 0.0; - } - - public double weightOfObservedMissingValues() { - return 0.0; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - return null; - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java deleted file mode 100644 index 17f8bd5..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Interface for observing the class data distribution for a numeric attribute. This observer monitors the class - * distribution of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public interface NumericAttributeClassObserver extends AttributeClassObserver { - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java deleted file mode 100644 index c34e90d..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java +++ /dev/null @@ -1,222 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.core.Utils; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; - -import com.yahoo.labs.samoa.moa.core.DoubleVector; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; - -/** - * Class for observing the class data distribution for a numeric attribute as in VFML. Used in naive Bayes and decision - * trees to monitor data statistics on leaves. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class VFMLNumericAttributeClassObserver extends AbstractOptionHandler implements NumericAttributeClassObserver { - - private static final long serialVersionUID = 1L; - - @Override - public void observeAttributeTarget(double attVal, double target) { - throw new UnsupportedOperationException("Not supported yet."); - } - - protected class Bin implements Serializable { - - private static final long serialVersionUID = 1L; - - public double lowerBound, upperBound; - - public DoubleVector classWeights = new DoubleVector(); - - public int boundaryClass; - - public double boundaryWeight; - } - - protected List<Bin> binList = new ArrayList<>(); - - public IntOption numBinsOption = new IntOption("numBins", 'n', - "The number of bins.", 10, 1, Integer.MAX_VALUE); - - @Override - public void observeAttributeClass(double attVal, int classVal, double weight) { - if (!Utils.isMissingValue(attVal)) { - if (this.binList.size() < 1) { - // create the first bin - Bin newBin = new Bin(); - newBin.classWeights.addToValue(classVal, weight); - newBin.boundaryClass = classVal; - newBin.boundaryWeight = weight; - newBin.upperBound = attVal; - newBin.lowerBound = attVal; - this.binList.add(newBin); - } else { - // find bin containing new example with binary search - int index = 0; - boolean found = false; - int min = 0; - int max = this.binList.size() - 1; - while ((min <= max) && !found) { - int i = (min + max) / 2; - Bin bin = this.binList.get(i); - if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound)) - || ((i == this.binList.size() - 1) - && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) { - found = true; - index = i; - } else if (attVal < bin.lowerBound) { - max = i - 1; - } else { - min = i + 1; - } - } - boolean first = false; - boolean last = false; - if (!found) { - // determine if it is before or after the existing range - Bin bin = this.binList.get(0); - if (bin.lowerBound > attVal) { - // go before the first bin - index = 0; - first = true; - } else { - // if we haven't found it yet value must be > last bins - // upperBound - index = this.binList.size() - 1; - last = true; - } - } - Bin bin = this.binList.get(index); // VLIndex(ct->bins, index); - if ((bin.lowerBound == attVal) - || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue()) - // {//1000) - // { - // if this is the exact same boundary and class as the bin - // boundary or we aren't adding new bins any more then - // increment - // boundary counts - bin.classWeights.addToValue(classVal, weight); - if ((bin.boundaryClass == classVal) - && (bin.lowerBound == attVal)) { - // if it is also the same class then special case it - bin.boundaryWeight += weight; - } - } else { - // create a new bin - Bin newBin = new Bin(); - newBin.classWeights.addToValue(classVal, weight); - newBin.boundaryWeight = weight; - newBin.boundaryClass = classVal; - newBin.upperBound = bin.upperBound; - newBin.lowerBound = attVal; - - double percent = 0.0; - // estimate initial counts with a linear interpolation - if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) { - percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound)); - } - - // take out the boundry points, they stay with the old bin - bin.classWeights.addToValue(bin.boundaryClass, - -bin.boundaryWeight); - DoubleVector weightToShift = new DoubleVector( - bin.classWeights); - weightToShift.scaleValues(percent); - newBin.classWeights.addValues(weightToShift); - bin.classWeights.subtractValues(weightToShift); - // put the boundry examples back in - bin.classWeights.addToValue(bin.boundaryClass, - bin.boundaryWeight); - - // insert the new bin in the right place - if (last) { - bin.upperBound = attVal; - newBin.upperBound = attVal; - this.binList.add(newBin); - } else if (first) { - newBin.upperBound = bin.lowerBound; - this.binList.add(0, newBin); - } else { - newBin.upperBound = bin.upperBound; - bin.upperBound = attVal; - this.binList.add(index + 1, newBin); - } - } - } - } - } - - @Override - public double probabilityOfAttributeValueGivenClass(double attVal, - int classVal) { - // TODO: NaiveBayes broken until implemented - return 0.0; - } - - @Override - public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( - SplitCriterion criterion, double[] preSplitDist, int attIndex, - boolean binaryOnly) { - AttributeSplitSuggestion bestSuggestion = null; - DoubleVector rightDist = new DoubleVector(); - for (Bin bin : this.binList) { - rightDist.addValues(bin.classWeights); - } - DoubleVector leftDist = new DoubleVector(); - for (Bin bin : this.binList) { - leftDist.addValues(bin.classWeights); - rightDist.subtractValues(bin.classWeights); - double[][] postSplitDists = new double[][] { - leftDist.getArrayCopy(), rightDist.getArrayCopy() }; - double merit = criterion.getMeritOfSplit(preSplitDist, - postSplitDists); - if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { - bestSuggestion = new AttributeSplitSuggestion( - new NumericAttributeBinaryTest(attIndex, - bin.upperBound, false), postSplitDists, merit); - } - } - return bestSuggestion; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - // TODO Auto-generated method stub - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java deleted file mode 100644 index 5eee6af..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Abstract binary conditional test for instances to use to split nodes in Hoeffding trees. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public abstract class InstanceConditionalBinaryTest extends InstanceConditionalTest { - - @Override - public int maxBranches() { - return 2; - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java deleted file mode 100644 index cd83e50..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; -import com.yahoo.labs.samoa.instances.InstancesHeader; -import com.yahoo.labs.samoa.instances.Instance; - -/** - * Abstract conditional test for instances to use to split nodes in Hoeffding trees. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public abstract class InstanceConditionalTest extends AbstractMOAObject { - - /** - * Returns the number of the branch for an instance, -1 if unknown. - * - * @param inst - * the instance to be used - * @return the number of the branch for an instance, -1 if unknown. - */ - public abstract int branchForInstance(Instance inst); - - /** - * Gets whether the number of the branch for an instance is known. - * - * @param inst - * @return true if the number of the branch for an instance is known - */ - public boolean resultKnownForInstance(Instance inst) { - return branchForInstance(inst) >= 0; - } - - /** - * Gets the number of maximum branches, -1 if unknown. - * - * @return the number of maximum branches, -1 if unknown.. - */ - public abstract int maxBranches(); - - /** - * Gets the text that describes the condition of a branch. It is used to describe the branch. - * - * @param branch - * the number of the branch to describe - * @param context - * the context or header of the data stream - * @return the text that describes the condition of the branch - */ - public abstract String describeConditionForBranch(int branch, - InstancesHeader context); - - /** - * Returns an array with the attributes that the test depends on. - * - * @return an array with the attributes that the test depends on - */ - public abstract int[] getAttsTestDependsOn(); -}
