http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java new file mode 100644 index 0000000..e47d6c4 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/GreenwaldKhannaNumericAttributeClassObserver.java @@ -0,0 +1,127 @@ +package org.apache.samoa.moa.classifiers.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; +import org.apache.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; +import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; +import org.apache.samoa.moa.core.AutoExpandVector; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.GreenwaldKhannaQuantileSummary; +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.core.Utils; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.IntOption; + +/** + * Class for observing the class data distribution for a numeric attribute using Greenwald and Khanna methodology. This + * observer monitors the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data + * statistics on leaves. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class GreenwaldKhannaNumericAttributeClassObserver extends AbstractOptionHandler implements + NumericAttributeClassObserver { + + private static final long serialVersionUID = 1L; + + protected AutoExpandVector<GreenwaldKhannaQuantileSummary> attValDistPerClass = new AutoExpandVector<>(); + + public IntOption numTuplesOption = new IntOption("numTuples", 'n', + "The number of tuples.", 10, 1, Integer.MAX_VALUE); + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (!Utils.isMissingValue(attVal)) { + GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal); + if (valDist == null) { + valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); + this.attValDistPerClass.set(classVal, valDist); + } + // TODO: not taking weight into account + valDist.insert(attVal); + } + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + for (GreenwaldKhannaQuantileSummary qs : this.attValDistPerClass) { + if (qs != null) { + double[] cutpoints = qs.getSuggestedCutpoints(); + for (double cutpoint : cutpoints) { + double[][] postSplitDists = getClassDistsResultingFromBinarySplit(cutpoint); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) + || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + cutpoint, true), postSplitDists, merit); + } + } + } + } + return bestSuggestion; + } + + // assume all values equal to splitValue go to lhs + public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { + DoubleVector lhsDist = new DoubleVector(); + DoubleVector rhsDist = new DoubleVector(); + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + GreenwaldKhannaQuantileSummary estimator = this.attValDistPerClass.get(i); + if (estimator != null) { + long countBelow = estimator.getCountBelow(splitValue); + lhsDist.addToValue(i, countBelow); + rhsDist.addToValue(i, estimator.getTotalCount() - countBelow); + } + } + return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } + + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java new file mode 100644 index 0000000..40044d0 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NominalAttributeClassObserver.java @@ -0,0 +1,176 @@ +package org.apache.samoa.moa.classifiers.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; +import org.apache.samoa.moa.classifiers.core.conditionaltests.NominalAttributeBinaryTest; +import org.apache.samoa.moa.classifiers.core.conditionaltests.NominalAttributeMultiwayTest; +import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; +import org.apache.samoa.moa.core.AutoExpandVector; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.core.Utils; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +/** + * Class for observing the class data distribution for a nominal attribute. This observer monitors the class + * distribution of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on leaves. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NominalAttributeClassObserver extends AbstractOptionHandler implements DiscreteAttributeClassObserver { + + private static final long serialVersionUID = 1L; + + protected double totalWeightObserved = 0.0; + + protected double missingWeightObserved = 0.0; + + public AutoExpandVector<DoubleVector> attValDistPerClass = new AutoExpandVector<>(); + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (Utils.isMissingValue(attVal)) { + this.missingWeightObserved += weight; + } else { + int attValInt = (int) attVal; + DoubleVector valDist = this.attValDistPerClass.get(classVal); + if (valDist == null) { + valDist = new DoubleVector(); + this.attValDistPerClass.set(classVal, valDist); + } + valDist.addToValue(attValInt, weight); + } + this.totalWeightObserved += weight; + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + DoubleVector obs = this.attValDistPerClass.get(classVal); + return obs != null ? (obs.getValue((int) attVal) + 1.0) + / (obs.sumOfValues() + obs.numValues()) : 0.0; + } + + public double totalWeightOfClassObservations() { + return this.totalWeightObserved; + } + + public double weightOfObservedMissingValues() { + return this.missingWeightObserved; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + int maxAttValsObserved = getMaxAttValsObserved(); + if (!binaryOnly) { + double[][] postSplitDists = getClassDistsResultingFromMultiwaySplit(maxAttValsObserved); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + bestSuggestion = new AttributeSplitSuggestion( + new NominalAttributeMultiwayTest(attIndex), postSplitDists, + merit); + } + for (int valIndex = 0; valIndex < maxAttValsObserved; valIndex++) { + double[][] postSplitDists = getClassDistsResultingFromBinarySplit(valIndex); + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NominalAttributeBinaryTest(attIndex, valIndex), + postSplitDists, merit); + } + } + return bestSuggestion; + } + + public int getMaxAttValsObserved() { + int maxAttValsObserved = 0; + for (DoubleVector attValDist : this.attValDistPerClass) { + if ((attValDist != null) + && (attValDist.numValues() > maxAttValsObserved)) { + maxAttValsObserved = attValDist.numValues(); + } + } + return maxAttValsObserved; + } + + public double[][] getClassDistsResultingFromMultiwaySplit( + int maxAttValsObserved) { + DoubleVector[] resultingDists = new DoubleVector[maxAttValsObserved]; + for (int i = 0; i < resultingDists.length; i++) { + resultingDists[i] = new DoubleVector(); + } + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + DoubleVector attValDist = this.attValDistPerClass.get(i); + if (attValDist != null) { + for (int j = 0; j < attValDist.numValues(); j++) { + resultingDists[j].addToValue(i, attValDist.getValue(j)); + } + } + } + double[][] distributions = new double[maxAttValsObserved][]; + for (int i = 0; i < distributions.length; i++) { + distributions[i] = resultingDists[i].getArrayRef(); + } + return distributions; + } + + public double[][] getClassDistsResultingFromBinarySplit(int valIndex) { + DoubleVector equalsDist = new DoubleVector(); + DoubleVector notEqualDist = new DoubleVector(); + for (int i = 0; i < this.attValDistPerClass.size(); i++) { + DoubleVector attValDist = this.attValDistPerClass.get(i); + if (attValDist != null) { + for (int j = 0; j < attValDist.numValues(); j++) { + if (j == valIndex) { + equalsDist.addToValue(i, attValDist.getValue(j)); + } else { + notEqualDist.addToValue(i, attValDist.getValue(j)); + } + } + } + } + return new double[][] { equalsDist.getArrayRef(), + notEqualDist.getArrayRef() }; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } + + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java new file mode 100644 index 0000000..7889a73 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NullAttributeClassObserver.java @@ -0,0 +1,79 @@ +package org.apache.samoa.moa.classifiers.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; +import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +/** + * Class for observing the class data distribution for a null attribute. This method is used to disable the observation + * for an attribute. Used in decision trees to monitor data statistics on leaves. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NullAttributeClassObserver extends AbstractOptionHandler implements AttributeClassObserver { + + private static final long serialVersionUID = 1L; + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + return 0.0; + } + + public double totalWeightOfClassObservations() { + return 0.0; + } + + public double weightOfObservedMissingValues() { + return 0.0; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + return null; + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } + + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java new file mode 100644 index 0000000..0557bf1 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java @@ -0,0 +1,32 @@ +package org.apache.samoa.moa.classifiers.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Interface for observing the class data distribution for a numeric attribute. This observer monitors the class + * distribution of a given attribute. Used in naive Bayes and decision trees to monitor data statistics on leaves. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public interface NumericAttributeClassObserver extends AttributeClassObserver { + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java new file mode 100644 index 0000000..1a62a6f --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java @@ -0,0 +1,222 @@ +package org.apache.samoa.moa.classifiers.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; +import org.apache.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; +import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.core.Utils; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.IntOption; + +/** + * Class for observing the class data distribution for a numeric attribute as in VFML. Used in naive Bayes and decision + * trees to monitor data statistics on leaves. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class VFMLNumericAttributeClassObserver extends AbstractOptionHandler implements NumericAttributeClassObserver { + + private static final long serialVersionUID = 1L; + + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } + + protected class Bin implements Serializable { + + private static final long serialVersionUID = 1L; + + public double lowerBound, upperBound; + + public DoubleVector classWeights = new DoubleVector(); + + public int boundaryClass; + + public double boundaryWeight; + } + + protected List<Bin> binList = new ArrayList<>(); + + public IntOption numBinsOption = new IntOption("numBins", 'n', + "The number of bins.", 10, 1, Integer.MAX_VALUE); + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (!Utils.isMissingValue(attVal)) { + if (this.binList.size() < 1) { + // create the first bin + Bin newBin = new Bin(); + newBin.classWeights.addToValue(classVal, weight); + newBin.boundaryClass = classVal; + newBin.boundaryWeight = weight; + newBin.upperBound = attVal; + newBin.lowerBound = attVal; + this.binList.add(newBin); + } else { + // find bin containing new example with binary search + int index = 0; + boolean found = false; + int min = 0; + int max = this.binList.size() - 1; + while ((min <= max) && !found) { + int i = (min + max) / 2; + Bin bin = this.binList.get(i); + if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound)) + || ((i == this.binList.size() - 1) + && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) { + found = true; + index = i; + } else if (attVal < bin.lowerBound) { + max = i - 1; + } else { + min = i + 1; + } + } + boolean first = false; + boolean last = false; + if (!found) { + // determine if it is before or after the existing range + Bin bin = this.binList.get(0); + if (bin.lowerBound > attVal) { + // go before the first bin + index = 0; + first = true; + } else { + // if we haven't found it yet value must be > last bins + // upperBound + index = this.binList.size() - 1; + last = true; + } + } + Bin bin = this.binList.get(index); // VLIndex(ct->bins, index); + if ((bin.lowerBound == attVal) + || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue()) + // {//1000) + // { + // if this is the exact same boundary and class as the bin + // boundary or we aren't adding new bins any more then + // increment + // boundary counts + bin.classWeights.addToValue(classVal, weight); + if ((bin.boundaryClass == classVal) + && (bin.lowerBound == attVal)) { + // if it is also the same class then special case it + bin.boundaryWeight += weight; + } + } else { + // create a new bin + Bin newBin = new Bin(); + newBin.classWeights.addToValue(classVal, weight); + newBin.boundaryWeight = weight; + newBin.boundaryClass = classVal; + newBin.upperBound = bin.upperBound; + newBin.lowerBound = attVal; + + double percent = 0.0; + // estimate initial counts with a linear interpolation + if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) { + percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound)); + } + + // take out the boundry points, they stay with the old bin + bin.classWeights.addToValue(bin.boundaryClass, + -bin.boundaryWeight); + DoubleVector weightToShift = new DoubleVector( + bin.classWeights); + weightToShift.scaleValues(percent); + newBin.classWeights.addValues(weightToShift); + bin.classWeights.subtractValues(weightToShift); + // put the boundry examples back in + bin.classWeights.addToValue(bin.boundaryClass, + bin.boundaryWeight); + + // insert the new bin in the right place + if (last) { + bin.upperBound = attVal; + newBin.upperBound = attVal; + this.binList.add(newBin); + } else if (first) { + newBin.upperBound = bin.lowerBound; + this.binList.add(0, newBin); + } else { + newBin.upperBound = bin.upperBound; + bin.upperBound = attVal; + this.binList.add(index + 1, newBin); + } + } + } + } + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + DoubleVector rightDist = new DoubleVector(); + for (Bin bin : this.binList) { + rightDist.addValues(bin.classWeights); + } + DoubleVector leftDist = new DoubleVector(); + for (Bin bin : this.binList) { + leftDist.addValues(bin.classWeights); + rightDist.subtractValues(bin.classWeights); + double[][] postSplitDists = new double[][] { + leftDist.getArrayCopy(), rightDist.getArrayCopy() }; + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + bin.upperBound, false), postSplitDists, merit); + } + } + return bestSuggestion; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java new file mode 100644 index 0000000..5857ae2 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java @@ -0,0 +1,35 @@ +package org.apache.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Abstract binary conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public abstract class InstanceConditionalBinaryTest extends InstanceConditionalTest { + + @Override + public int maxBranches() { + return 2; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java new file mode 100644 index 0000000..1d228e0 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java @@ -0,0 +1,79 @@ +package org.apache.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.moa.AbstractMOAObject; + +/** + * Abstract conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public abstract class InstanceConditionalTest extends AbstractMOAObject { + + /** + * Returns the number of the branch for an instance, -1 if unknown. + * + * @param inst + * the instance to be used + * @return the number of the branch for an instance, -1 if unknown. + */ + public abstract int branchForInstance(Instance inst); + + /** + * Gets whether the number of the branch for an instance is known. + * + * @param inst + * @return true if the number of the branch for an instance is known + */ + public boolean resultKnownForInstance(Instance inst) { + return branchForInstance(inst) >= 0; + } + + /** + * Gets the number of maximum branches, -1 if unknown. + * + * @return the number of maximum branches, -1 if unknown.. + */ + public abstract int maxBranches(); + + /** + * Gets the text that describes the condition of a branch. It is used to describe the branch. + * + * @param branch + * the number of the branch to describe + * @param context + * the context or header of the data stream + * @return the text that describes the condition of the branch + */ + public abstract String describeConditionForBranch(int branch, + InstancesHeader context); + + /** + * Returns an array with the attributes that the test depends on. + * + * @return an array with the attributes that the test depends on + */ + public abstract int[] getAttsTestDependsOn(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java new file mode 100644 index 0000000..1190aaa --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java @@ -0,0 +1,73 @@ +package org.apache.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstancesHeader; + +/** + * Nominal binary conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NominalAttributeBinaryTest extends InstanceConditionalBinaryTest { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + protected int attValue; + + public NominalAttributeBinaryTest(int attIndex, int attValue) { + this.attIndex = attIndex; + this.attValue = attValue; + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex + : this.attIndex + 1; + return inst.isMissing(instAttIndex) ? -1 : ((int) inst.value(instAttIndex) == this.attValue ? 0 : 1); + } + + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch == 0) || (branch == 1)) { + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + (branch == 0 ? " = " : " != ") + + InstancesHeader.getNominalValueString(context, + this.attIndex, this.attValue); + } + throw new IndexOutOfBoundsException(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java new file mode 100644 index 0000000..f4816a1 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java @@ -0,0 +1,71 @@ +package org.apache.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstancesHeader; + +/** + * Nominal multi way conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NominalAttributeMultiwayTest extends InstanceConditionalTest { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + public NominalAttributeMultiwayTest(int attIndex) { + this.attIndex = attIndex; + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex; // < inst.classIndex() ? this.attIndex + // : this.attIndex + 1; + return inst.isMissing(instAttIndex) ? -1 : (int) inst.value(instAttIndex); + } + + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + return InstancesHeader.getAttributeNameString(context, this.attIndex) + + " = " + + InstancesHeader.getNominalValueString(context, this.attIndex, + branch); + } + + @Override + public int maxBranches() { + return -1; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java new file mode 100644 index 0000000..3552869 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java @@ -0,0 +1,92 @@ +package org.apache.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstancesHeader; + +/** + * Numeric binary conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NumericAttributeBinaryTest extends InstanceConditionalBinaryTest { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + protected double attValue; + + protected boolean equalsPassesTest; + + public NumericAttributeBinaryTest(int attIndex, double attValue, + boolean equalsPassesTest) { + this.attIndex = attIndex; + this.attValue = attValue; + this.equalsPassesTest = equalsPassesTest; + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex; // < inst.classIndex() ? this.attIndex + // : this.attIndex + 1; + if (inst.isMissing(instAttIndex)) { + return -1; + } + double v = inst.value(instAttIndex); + if (v == this.attValue) { + return this.equalsPassesTest ? 0 : 1; + } + return v < this.attValue ? 0 : 1; + } + + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch == 0) || (branch == 1)) { + char compareChar = branch == 0 ? '<' : '>'; + int equalsBranch = this.equalsPassesTest ? 0 : 1; + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + ' ' + + compareChar + + (branch == equalsBranch ? "= " : " ") + + InstancesHeader.getNumericValueString(context, + this.attIndex, this.attValue); + } + throw new IndexOutOfBoundsException(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } + + public double getSplitValue() { + return this.attValue; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWIN.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWIN.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWIN.java new file mode 100644 index 0000000..38b774b --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWIN.java @@ -0,0 +1,590 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.AbstractMOAObject; + +/** + * ADaptive sliding WINdow method. This method is a change detector and estimator. It keeps a variable-length window of + * recently seen items, with the property that the window has the maximal length statistically consistent with the + * hypothesis "there has been no change in the average value inside the window". + * + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class ADWIN extends AbstractMOAObject { + + private class List extends AbstractMOAObject { + + protected int count; + + protected ListItem head; + + protected ListItem tail; + + public List() { + // post: initializes the list to be empty. + clear(); + addToHead(); + } + + /* Interface Store Methods */ + public int size() { + // post: returns the number of elements in the list. + return this.count; + } + + public ListItem head() { + // post: returns the number of elements in the list. + return this.head; + } + + public ListItem tail() { + // post: returns the number of elements in the list. + return this.tail; + } + + public boolean isEmpty() { + // post: returns the true iff store is empty. + return (this.size() == 0); + } + + public void clear() { + // post: clears the list so that it contains no elements. + this.head = null; + this.tail = null; + this.count = 0; + } + + /* Interface List Methods */ + public void addToHead() { + // pre: anObject is non-null + // post: the object is added to the beginning of the list + this.head = new ListItem(this.head, null); + if (this.tail == null) { + this.tail = this.head; + } + this.count++; + } + + public void removeFromHead() { + // pre: list is not empty + // post: removes and returns first object from the list + // ListItem temp; + // temp = this.head; + this.head = this.head.next(); + if (this.head != null) { + this.head.setPrevious(null); + } else { + this.tail = null; + } + this.count--; + } + + public void addToTail() { + // pre: anObject is non-null + // post: the object is added at the end of the list + this.tail = new ListItem(null, this.tail); + if (this.head == null) { + this.head = this.tail; + } + this.count++; + } + + public void removeFromTail() { + // pre: list is not empty + // post: the last object in the list is removed and returned + // ListItem temp; + // temp = this.tail; + this.tail = this.tail.previous(); + if (this.tail == null) { + this.head = null; + } else { + this.tail.setNext(null); + } + this.count--; + // temp=null; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + } + } + + private class ListItem extends AbstractMOAObject { + protected ListItem next; + + protected ListItem previous; + + protected int bucketSizeRow = 0; + + protected int MAXBUCKETS = ADWIN.MAXBUCKETS; + + protected double bucketTotal[] = new double[MAXBUCKETS + 1]; + + protected double bucketVariance[] = new double[MAXBUCKETS + 1]; + + public ListItem() { + // post: initializes the node to be a tail node + // containing the given value. + this(null, null); + } + + public void clear() { + bucketSizeRow = 0; + for (int k = 0; k <= MAXBUCKETS; k++) { + clearBucket(k); + } + } + + private void clearBucket(int k) { + setTotal(0, k); + setVariance(0, k); + } + + public ListItem(ListItem nextNode, ListItem previousNode) { + // post: initializes the node to contain the given + // object and link to the given next node. + // this.data = element; + this.next = nextNode; + this.previous = previousNode; + if (nextNode != null) { + nextNode.previous = this; + } + if (previousNode != null) { + previousNode.next = this; + } + clear(); + } + + public void insertBucket(double Value, double Variance) { + // insert a Bucket at the end + int k = bucketSizeRow; + bucketSizeRow++; + // Insert new bucket + setTotal(Value, k); + setVariance(Variance, k); + } + + public void RemoveBucket() { + // Removes the first Buvket + compressBucketsRow(1); + } + + public void compressBucketsRow(int NumberItemsDeleted) { + // Delete first elements + for (int k = NumberItemsDeleted; k <= MAXBUCKETS; k++) { + bucketTotal[k - NumberItemsDeleted] = bucketTotal[k]; + bucketVariance[k - NumberItemsDeleted] = bucketVariance[k]; + } + for (int k = 1; k <= NumberItemsDeleted; k++) { + clearBucket(MAXBUCKETS - k + 1); + } + bucketSizeRow -= NumberItemsDeleted; + // BucketNumber-=NumberItemsDeleted; + } + + public ListItem previous() { + // post: returns the previous node. + return this.previous; + } + + public void setPrevious(ListItem previous) { + // post: sets the previous node to be the given node + this.previous = previous; + } + + public ListItem next() { + // post: returns the next node. + return this.next; + } + + public void setNext(ListItem next) { + // post: sets the next node to be the given node + this.next = next; + } + + public double Total(int k) { + // post: returns the element in this node + return bucketTotal[k]; + } + + public double Variance(int k) { + // post: returns the element in this node + return bucketVariance[k]; + } + + public void setTotal(double value, int k) { + // post: sets the element in this node to the given + // object. + bucketTotal[k] = value; + } + + public void setVariance(double value, int k) { + // post: sets the element in this node to the given + // object. + bucketVariance[k] = value; + } + + /* + * public ListItem(Object element, ListItem nextNode){ // post: initializes + * the node to contain the given // object and link to the given next node. + * this.data = element; this.next = nextNode; } public ListItem(Object + * element) { // post: initializes the node to be a tail node // containing + * the given value. this(element, null); } + * + * + * public Object value() { // post: returns the element in this node return + * this.data; } public void setValue(Object anObject) { // post: sets the + * element in this node to the given // object. this.data = anObject; } + */ + + @Override + public void getDescription(StringBuilder sb, int indent) { + } + } + + public static final double DELTA = .002; // .1; + + private static final int mintMinimLongitudWindow = 10; // 10 + + private double mdbldelta = .002; // .1; + + private int mintTime = 0; + + private int mintClock = 32; + + private double mdblWidth = 0; // Mean of Width = mdblWidth/Number of items + // BUCKET + + public static final int MAXBUCKETS = 5; + + private int lastBucketRow = 0; + + private double TOTAL = 0; + + private double VARIANCE = 0; + + private int WIDTH = 0; + + private int BucketNumber = 0; + + private int Detect = 0; + + private int numberDetections = 0; + + private int DetectTwice = 0; + + private boolean blnBucketDeleted = false; + + private int BucketNumberMAX = 0; + + private int mintMinWinLength = 5; + + private List listRowBuckets; + + public boolean getChange() { + return blnBucketDeleted; + } + + public void resetChange() { + blnBucketDeleted = false; + } + + public int getBucketsUsed() { + return BucketNumberMAX; + } + + public int getWidth() { + return WIDTH; + } + + public void setClock(int intClock) { + mintClock = intClock; + } + + public int getClock() { + return mintClock; + } + + public boolean getWarning() { + return false; + } + + public boolean getDetect() { + return (Detect == mintTime); + } + + public int getNumberDetections() { + return numberDetections; + } + + public double getTotal() { + return TOTAL; + } + + public double getEstimation() { + return TOTAL / WIDTH; + } + + public double getVariance() { + return VARIANCE / WIDTH; + } + + public double getWidthT() { + return mdblWidth; + } + + private void initBuckets() { + // Init buckets + listRowBuckets = new List(); + lastBucketRow = 0; + TOTAL = 0; + VARIANCE = 0; + WIDTH = 0; + BucketNumber = 0; + } + + private void insertElement(double Value) { + WIDTH++; + insertElementBucket(0, Value, listRowBuckets.head()); + double incVariance = 0; + if (WIDTH > 1) { + incVariance = (WIDTH - 1) * (Value - TOTAL / (WIDTH - 1)) * (Value - TOTAL / (WIDTH - 1)) / WIDTH; + } + VARIANCE += incVariance; + TOTAL += Value; + compressBuckets(); + } + + private void insertElementBucket(double Variance, double Value, ListItem Node) { + // Insert new bucket + Node.insertBucket(Value, Variance); + BucketNumber++; + if (BucketNumber > BucketNumberMAX) { + BucketNumberMAX = BucketNumber; + } + } + + private int bucketSize(int Row) { + return (int) Math.pow(2, Row); + } + + public int deleteElement() { + // LIST + // Update statistics + ListItem Node; + Node = listRowBuckets.tail(); + int n1 = bucketSize(lastBucketRow); + WIDTH -= n1; + TOTAL -= Node.Total(0); + double u1 = Node.Total(0) / n1; + double incVariance = Node.Variance(0) + n1 * WIDTH * (u1 - TOTAL / WIDTH) * (u1 - TOTAL / WIDTH) / (n1 + WIDTH); + VARIANCE -= incVariance; + + // Delete Bucket + Node.RemoveBucket(); + BucketNumber--; + if (Node.bucketSizeRow == 0) { + listRowBuckets.removeFromTail(); + lastBucketRow--; + } + return n1; + } + + public void compressBuckets() { + // Traverse the list of buckets in increasing order + int n1, n2; + double u2, u1, incVariance; + ListItem cursor; + ListItem nextNode; + cursor = listRowBuckets.head(); + int i = 0; + do { + // Find the number of buckets in a row + int k = cursor.bucketSizeRow; + // If the row is full, merge buckets + if (k == MAXBUCKETS + 1) { + nextNode = cursor.next(); + if (nextNode == null) { + listRowBuckets.addToTail(); + nextNode = cursor.next(); + lastBucketRow++; + } + n1 = bucketSize(i); + n2 = bucketSize(i); + u1 = cursor.Total(0) / n1; + u2 = cursor.Total(1) / n2; + incVariance = n1 * n2 * (u1 - u2) * (u1 - u2) / (n1 + n2); + + nextNode.insertBucket(cursor.Total(0) + cursor.Total(1), cursor.Variance(0) + cursor.Variance(1) + incVariance); + BucketNumber++; + cursor.compressBucketsRow(2); + if (nextNode.bucketSizeRow <= MAXBUCKETS) { + break; + } + } else { + break; + } + cursor = cursor.next(); + i++; + } while (cursor != null); + } + + public boolean setInput(double intEntrada) { + return setInput(intEntrada, mdbldelta); + } + + public boolean setInput(double intEntrada, double delta) { + boolean blnChange = false; + boolean blnExit; + ListItem cursor; + mintTime++; + + // 1,2)Increment window in one element + insertElement(intEntrada); + blnBucketDeleted = false; + // 3)Reduce window + if (mintTime % mintClock == 0 && getWidth() > mintMinimLongitudWindow) { + boolean blnReduceWidth = true; // Diference + + while (blnReduceWidth) // Diference + { + blnReduceWidth = false; // Diference + blnExit = false; + int n0 = 0; + int n1 = WIDTH; + double u0 = 0; + double u1 = getTotal(); + double v0 = 0; + double v1 = VARIANCE; + double n2; + double u2; + + cursor = listRowBuckets.tail(); + int i = lastBucketRow; + do { + for (int k = 0; k <= (cursor.bucketSizeRow - 1); k++) { + n2 = bucketSize(i); + u2 = cursor.Total(k); + if (n0 > 0) { + v0 += cursor.Variance(k) + (double) n0 * n2 * (u0 / n0 - u2 / n2) * (u0 / n0 - u2 / n2) / (n0 + n2); + } + if (n1 > 0) { + v1 -= cursor.Variance(k) + (double) n1 * n2 * (u1 / n1 - u2 / n2) * (u1 / n1 - u2 / n2) / (n1 + n2); + } + + n0 += bucketSize(i); + n1 -= bucketSize(i); + u0 += cursor.Total(k); + u1 -= cursor.Total(k); + + if (i == 0 && k == cursor.bucketSizeRow - 1) { + blnExit = true; + break; + } + double absvalue = (u0 / n0) - (u1 / n1); // n1<WIDTH-mintMinWinLength-1 + if ((n1 > mintMinWinLength + 1 && n0 > mintMinWinLength + 1) && // Diference NEGATIVE if( + blnCutexpression(n0, n1, u0, u1, v0, v1, absvalue, delta)) { + blnBucketDeleted = true; + Detect = mintTime; + + if (Detect == 0) { + Detect = mintTime; + // blnFirst=true; + // blnWarning=true; + } else if (DetectTwice == 0) { + DetectTwice = mintTime; + // blnDetect=true; + } + blnReduceWidth = true; // Diference + blnChange = true; + if (getWidth() > 0) { // Reduce width of the window + // while (n0>0) // Diference NEGATIVE + n0 -= deleteElement(); + blnExit = true; + break; + } + } // End if + }// Next k + cursor = cursor.previous(); + i--; + } while (((!blnExit && cursor != null))); + }// End While // Diference + }// End if + + mdblWidth += getWidth(); + if (blnChange) { + numberDetections++; + } + return blnChange; + } + + private boolean blnCutexpression(int n0, int n1, double u0, double u1, double v0, double v1, double absvalue, + double delta) { + int n = getWidth(); + double dd = Math.log(2 * Math.log(n) / delta); // -- ull perque el ln n va al numerador. + // Formula Gener 2008 + double v = getVariance(); + double m = ((double) 1 / ((n0 - mintMinWinLength + 1))) + ((double) 1 / ((n1 - mintMinWinLength + 1))); + double epsilon = Math.sqrt(2 * m * v * dd) + (double) 2 / 3 * dd * m; + + return (Math.abs(absvalue) > epsilon); + } + + public ADWIN() { + mdbldelta = DELTA; + initBuckets(); + Detect = 0; + numberDetections = 0; + DetectTwice = 0; + + } + + public ADWIN(double d) { + mdbldelta = d; + initBuckets(); + Detect = 0; + numberDetections = 0; + DetectTwice = 0; + } + + public ADWIN(int cl) { + mdbldelta = DELTA; + initBuckets(); + Detect = 0; + numberDetections = 0; + DetectTwice = 0; + mintClock = cl; + } + + public String getEstimatorInfo() { + return "ADWIN;;"; + } + + public void setW(int W0) { + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java new file mode 100644 index 0000000..8410e6f --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java @@ -0,0 +1,70 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FloatOption; + +/** + * Drift detection method based in ADWIN. ADaptive sliding WINdow is a change detector and estimator. It keeps a + * variable-length window of recently seen items, with the property that the window has the maximal length statistically + * consistent with the hypothesis "there has been no change in the average value inside the window". + * + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class ADWINChangeDetector extends AbstractChangeDetector { + + protected ADWIN adwin; + + public FloatOption deltaAdwinOption = new FloatOption("deltaAdwin", 'a', + "Delta of Adwin change detection", 0.002, 0.0, 1.0); + + @Override + public void input(double inputValue) { + if (this.adwin == null) { + resetLearning(); + } + this.isChangeDetected = adwin.setInput(inputValue); + this.isWarningZone = false; + this.delay = 0.0; + this.estimation = adwin.getEstimation(); + } + + @Override + public void resetLearning() { + adwin = new ADWIN(this.deltaAdwinOption.getValue()); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java new file mode 100644 index 0000000..da29c50 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java @@ -0,0 +1,141 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.options.AbstractOptionHandler; + +/** + * Abstract Change Detector. All change detectors in MOA extend this class. + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public abstract class AbstractChangeDetector extends AbstractOptionHandler + implements ChangeDetector { + + /** + * Change was detected + */ + protected boolean isChangeDetected; + + /** + * Warning Zone: after a warning and before a change + */ + protected boolean isWarningZone; + + /** + * Prediction for the next value based in previous seen values + */ + protected double estimation; + + /** + * Delay in detecting change + */ + protected double delay; + + /** + * Resets this change detector. It must be similar to starting a new change detector from scratch. + * + */ + public void resetLearning() { + this.isChangeDetected = false; + this.isWarningZone = false; + this.estimation = 0.0; + this.delay = 0.0; + } + + /** + * Adding a numeric value to the change detector<br> + * <br> + * + * The output of the change detector is modified after the insertion of a new item inside. + * + * @param inputValue + * the number to insert into the change detector + */ + public abstract void input(double inputValue); + + /** + * Gets whether there is change detected. + * + * @return true if there is change + */ + public boolean getChange() { + return this.isChangeDetected; + } + + /** + * Gets whether the change detector is in the warning zone, after a warning alert and before a change alert. + * + * @return true if the change detector is in the warning zone + */ + public boolean getWarningZone() { + return this.isWarningZone; + } + + /** + * Gets the prediction of next values. + * + * @return a prediction of the next value + */ + public double getEstimation() { + return this.estimation; + } + + /** + * Gets the length of the delay in the change detected. + * + * @return he length of the delay in the change detected + */ + public double getDelay() { + return this.delay; + } + + /** + * Gets the output state of the change detection. + * + * @return an array with the number of change detections, number of warnings, delay, and estimation. + */ + public double[] getOutput() { + return new double[] { this.isChangeDetected ? 1 : 0, this.isWarningZone ? 1 : 0, this.delay, this.estimation }; + } + + /** + * Returns a string representation of the model. + * + * @param sb + * the stringbuilder to add the description + * @param indent + * the number of characters to indent + */ + @Override + public abstract void getDescription(StringBuilder sb, int indent); + + /** + * Produces a copy of this change detector method + * + * @return the copy of this change detector method + */ + @Override + public ChangeDetector copy() { + return (ChangeDetector) super.copy(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java new file mode 100644 index 0000000..74660a5 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java @@ -0,0 +1,103 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.options.OptionHandler; + +/** + * Change Detector interface to implement methods that detects change. + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public interface ChangeDetector extends OptionHandler { + + /** + * Resets this change detector. It must be similar to starting a new change detector from scratch. + * + */ + public void resetLearning(); + + /** + * Adding a numeric value to the change detector<br> + * <br> + * + * The output of the change detector is modified after the insertion of a new item inside. + * + * @param inputValue + * the number to insert into the change detector + */ + public void input(double inputValue); + + /** + * Gets whether there is change detected. + * + * @return true if there is change + */ + public boolean getChange(); + + /** + * Gets whether the change detector is in the warning zone, after a warning alert and before a change alert. + * + * @return true if the change detector is in the warning zone + */ + public boolean getWarningZone(); + + /** + * Gets the prediction of next values. + * + * @return a prediction of the next value + */ + public double getEstimation(); + + /** + * Gets the length of the delay in the change detected. + * + * @return he length of the delay in the change detected + */ + public double getDelay(); + + /** + * Gets the output state of the change detection. + * + * @return an array with the number of change detections, number of warnings, delay, and estimation. + */ + public double[] getOutput(); + + /** + * Returns a string representation of the model. + * + * @param out + * the stringbuilder to add the description + * @param indent + * the number of characters to indent + */ + @Override + public void getDescription(StringBuilder sb, int indent); + + /** + * Produces a copy of this drift detection method + * + * @return the copy of this drift detection method + */ + @Override + public ChangeDetector copy(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/CusumDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/CusumDM.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/CusumDM.java new file mode 100644 index 0000000..051f74e --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/CusumDM.java @@ -0,0 +1,111 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; + +/** + * Drift detection method based in Cusum + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class CusumDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption deltaOption = new FloatOption("delta", 'd', + "Delta parameter of the Cusum Test", 0.005, 0.0, 1.0); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Threshold parameter of the Cusum Test", 50, 0.0, Float.MAX_VALUE); + + private int m_n; + + private double sum; + + private double x_mean; + + private double delta; + + private double lambda; + + public CusumDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + x_mean = 0.0; + sum = 0.0; + delta = this.deltaOption.getValue(); + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double x) { + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + x_mean = x_mean + (x - x_mean) / (double) m_n; + sum = Math.max(0, sum + x - x_mean - this.delta); + m_n++; + + // System.out.print(prediction + " " + m_n + " " + (m_p+m_s) + " "); + this.estimation = x_mean; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (sum > this.lambda) { + this.isChangeDetected = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/DDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/DDM.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/DDM.java new file mode 100644 index 0000000..9515657 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/DDM.java @@ -0,0 +1,119 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.IntOption; + +/** + * Drift detection method based in DDM method of Joao Gama SBIA 2004. + * + * <p> + * João Gama, Pedro Medas, Gladys Castillo, Pedro Pereira Rodrigues: Learning with Drift Detection. SBIA 2004: 286-295 + * </p> + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class DDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + // private static final int DDM_MINNUMINST = 30; + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + private int m_n; + + private double m_p; + + private double m_s; + + private double m_psmin; + + private double m_pmin; + + private double m_smin; + + public DDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + m_p = 1; + m_s = 0; + m_psmin = Double.MAX_VALUE; + m_pmin = Double.MAX_VALUE; + m_smin = Double.MAX_VALUE; + } + + @Override + public void input(double prediction) { + // prediction must be 1 or 0 + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + m_p = m_p + (prediction - m_p) / (double) m_n; + m_s = Math.sqrt(m_p * (1 - m_p) / (double) m_n); + + m_n++; + + // System.out.print(prediction + " " + m_n + " " + (m_p+m_s) + " "); + this.estimation = m_p; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (m_p + m_s <= m_psmin) { + m_pmin = m_p; + m_smin = m_s; + m_psmin = m_p + m_s; + } + + if (m_n > this.minNumInstancesOption.getValue() && m_p + m_s > m_pmin + 3 * m_smin) { + this.isChangeDetected = true; + // resetLearning(); + } else + this.isWarningZone = m_p + m_s > m_pmin + 2 * m_smin; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EDDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EDDM.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EDDM.java new file mode 100644 index 0000000..736948c --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EDDM.java @@ -0,0 +1,139 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in EDDM method of Manuel Baena et al. + * + * <p> + * Early Drift Detection Method. Manuel Baena-Garcia, Jose Del Campo-Avila, Raúl Fidalgo, Albert Bifet, Ricard Gavalda, + * Rafael Morales-Bueno. In Fourth International Workshop on Knowledge Discovery from Data Streams, 2006. + * </p> + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class EDDM extends AbstractChangeDetector { + + /** + * + */ + private static final long serialVersionUID = 140980267062162000L; + + private static final double FDDM_OUTCONTROL = 0.9; + + private static final double FDDM_WARNING = 0.95; + + private static final double FDDM_MINNUMINSTANCES = 30; + + private double m_numErrors; + + private int m_minNumErrors = 30; + + private int m_n; + + private int m_d; + + private int m_lastd; + + private double m_mean; + + private double m_stdTemp; + + private double m_m2smax; + + public EDDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + m_numErrors = 0; + m_d = 0; + m_lastd = 0; + m_mean = 0.0; + m_stdTemp = 0.0; + m_m2smax = 0.0; + this.estimation = 0.0; + } + + @Override + public void input(double prediction) { + // prediction must be 1 or 0 + // It monitors the error rate + // System.out.print(prediction + " " + m_n + " " + probability + " "); + if (this.isChangeDetected) { + resetLearning(); + } + this.isChangeDetected = false; + + m_n++; + if (prediction == 1.0) { + this.isWarningZone = false; + this.delay = 0; + m_numErrors += 1; + m_lastd = m_d; + m_d = m_n - 1; + int distance = m_d - m_lastd; + double oldmean = m_mean; + m_mean = m_mean + ((double) distance - m_mean) / m_numErrors; + this.estimation = m_mean; + m_stdTemp = m_stdTemp + (distance - m_mean) * (distance - oldmean); + double std = Math.sqrt(m_stdTemp / m_numErrors); + double m2s = m_mean + 2 * std; + + if (m2s > m_m2smax) { + if (m_n > FDDM_MINNUMINSTANCES) { + m_m2smax = m2s; + } + // m_lastLevel = DDM_INCONTROL_LEVEL; + // System.out.print(1 + " "); + } else { + double p = m2s / m_m2smax; + // System.out.print(p + " "); + if (m_n > FDDM_MINNUMINSTANCES && m_numErrors > m_minNumErrors + && p < FDDM_OUTCONTROL) { + // System.out.println(m_mean + ",D"); + this.isChangeDetected = true; + // resetLearning(); + } else { + this.isWarningZone = m_n > FDDM_MINNUMINSTANCES + && m_numErrors > m_minNumErrors && p < FDDM_WARNING; + } + } + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java new file mode 100644 index 0000000..05ab344 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java @@ -0,0 +1,122 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; + +/** + * Drift detection method based in EWMA Charts of Ross, Adams, Tasoulis and Hand 2012 + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class EWMAChartDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + // private static final int DDM_MIN_NUM_INST = 30; + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Lambda parameter of the EWMA Chart Method", 0.2, 0.0, Float.MAX_VALUE); + + private double m_n; + + private double m_sum; + + private double m_p; + + private double m_s; + + private double lambda; + + private double z_t; + + public EWMAChartDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1.0; + m_sum = 0.0; + m_p = 0.0; + m_s = 0.0; + z_t = 0.0; + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double prediction) { + // prediction must be 1 or 0 + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + m_sum += prediction; + + m_p = m_sum / m_n; // m_p + (prediction - m_p) / (double) (m_n+1); + + m_s = Math.sqrt(m_p * (1.0 - m_p) * lambda * (1.0 - Math.pow(1.0 - lambda, 2.0 * m_n)) / (2.0 - lambda)); + + m_n++; + + z_t += lambda * (prediction - z_t); + + double L_t = 3.97 - 6.56 * m_p + 48.73 * Math.pow(m_p, 3) - 330.13 * Math.pow(m_p, 5) + 848.18 * Math.pow(m_p, 7); // %1 FP + this.estimation = m_p; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (m_n > this.minNumInstancesOption.getValue() && z_t > m_p + L_t * m_s) { + this.isChangeDetected = true; + // resetLearning(); + } else { + this.isWarningZone = z_t > m_p + 0.5 * L_t * m_s; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java new file mode 100644 index 0000000..d197d97 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java @@ -0,0 +1,109 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; + +/** + * Drift detection method based in Geometric Moving Average Test + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class GeometricMovingAverageDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Threshold parameter of the Geometric Moving Average Test", 1, 0.0, Float.MAX_VALUE); + + public FloatOption alphaOption = new FloatOption("alpha", 'a', + "Alpha parameter of the Geometric Moving Average Test", .99, 0.0, 1.0); + + private double m_n; + + private double sum; + + private double x_mean; + + private double alpha; + + private double lambda; + + public GeometricMovingAverageDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1.0; + x_mean = 0.0; + sum = 0.0; + alpha = this.alphaOption.getValue(); + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double x) { + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + x_mean = x_mean + (x - x_mean) / m_n; + sum = alpha * sum + (1.0 - alpha) * (x - x_mean); + m_n++; + this.estimation = x_mean; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (sum > this.lambda) { + this.isChangeDetected = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +}
