http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java new file mode 100644 index 0000000..c7f3d7b --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/VFMLNumericAttributeClassObserver.java @@ -0,0 +1,223 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.core.Utils; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion; +import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; +import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; + +import com.yahoo.labs.samoa.moa.core.DoubleVector; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Class for observing the class data distribution for a numeric attribute as in VFML. + * Used in naive Bayes and decision trees to monitor data statistics on leaves. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class VFMLNumericAttributeClassObserver extends AbstractOptionHandler implements NumericAttributeClassObserver { + + private static final long serialVersionUID = 1L; + + @Override + public void observeAttributeTarget(double attVal, double target) { + throw new UnsupportedOperationException("Not supported yet."); + } + + protected class Bin implements Serializable { + + private static final long serialVersionUID = 1L; + + public double lowerBound, upperBound; + + public DoubleVector classWeights = new DoubleVector(); + + public int boundaryClass; + + public double boundaryWeight; + } + + protected List<Bin> binList = new ArrayList<>(); + + public IntOption numBinsOption = new IntOption("numBins", 'n', + "The number of bins.", 10, 1, Integer.MAX_VALUE); + + + @Override + public void observeAttributeClass(double attVal, int classVal, double weight) { + if (!Utils.isMissingValue(attVal)) { + if (this.binList.size() < 1) { + // create the first bin + Bin newBin = new Bin(); + newBin.classWeights.addToValue(classVal, weight); + newBin.boundaryClass = classVal; + newBin.boundaryWeight = weight; + newBin.upperBound = attVal; + newBin.lowerBound = attVal; + this.binList.add(newBin); + } else { + // find bin containing new example with binary search + int index = 0; + boolean found = false; + int min = 0; + int max = this.binList.size() - 1; + while ((min <= max) && !found) { + int i = (min + max) / 2; + Bin bin = this.binList.get(i); + if (((attVal >= bin.lowerBound) && (attVal < bin.upperBound)) + || ((i == this.binList.size() - 1) + && (attVal >= bin.lowerBound) && (attVal <= bin.upperBound))) { + found = true; + index = i; + } else if (attVal < bin.lowerBound) { + max = i - 1; + } else { + min = i + 1; + } + } + boolean first = false; + boolean last = false; + if (!found) { + // determine if it is before or after the existing range + Bin bin = this.binList.get(0); + if (bin.lowerBound > attVal) { + // go before the first bin + index = 0; + first = true; + } else { + // if we haven't found it yet value must be > last bins + // upperBound + index = this.binList.size() - 1; + last = true; + } + } + Bin bin = this.binList.get(index); // VLIndex(ct->bins, index); + if ((bin.lowerBound == attVal) + || (this.binList.size() >= this.numBinsOption.getValue())) {// Option.getValue()) + // {//1000) + // { + // if this is the exact same boundary and class as the bin + // boundary or we aren't adding new bins any more then + // increment + // boundary counts + bin.classWeights.addToValue(classVal, weight); + if ((bin.boundaryClass == classVal) + && (bin.lowerBound == attVal)) { + // if it is also the same class then special case it + bin.boundaryWeight += weight; + } + } else { + // create a new bin + Bin newBin = new Bin(); + newBin.classWeights.addToValue(classVal, weight); + newBin.boundaryWeight = weight; + newBin.boundaryClass = classVal; + newBin.upperBound = bin.upperBound; + newBin.lowerBound = attVal; + + double percent = 0.0; + // estimate initial counts with a linear interpolation + if (!((bin.upperBound - bin.lowerBound == 0) || last || first)) { + percent = 1.0 - ((attVal - bin.lowerBound) / (bin.upperBound - bin.lowerBound)); + } + + // take out the boundry points, they stay with the old bin + bin.classWeights.addToValue(bin.boundaryClass, + -bin.boundaryWeight); + DoubleVector weightToShift = new DoubleVector( + bin.classWeights); + weightToShift.scaleValues(percent); + newBin.classWeights.addValues(weightToShift); + bin.classWeights.subtractValues(weightToShift); + // put the boundry examples back in + bin.classWeights.addToValue(bin.boundaryClass, + bin.boundaryWeight); + + // insert the new bin in the right place + if (last) { + bin.upperBound = attVal; + newBin.upperBound = attVal; + this.binList.add(newBin); + } else if (first) { + newBin.upperBound = bin.lowerBound; + this.binList.add(0, newBin); + } else { + newBin.upperBound = bin.upperBound; + bin.upperBound = attVal; + this.binList.add(index + 1, newBin); + } + } + } + } + } + + @Override + public double probabilityOfAttributeValueGivenClass(double attVal, + int classVal) { + // TODO: NaiveBayes broken until implemented + return 0.0; + } + + @Override + public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( + SplitCriterion criterion, double[] preSplitDist, int attIndex, + boolean binaryOnly) { + AttributeSplitSuggestion bestSuggestion = null; + DoubleVector rightDist = new DoubleVector(); + for (Bin bin : this.binList) { + rightDist.addValues(bin.classWeights); + } + DoubleVector leftDist = new DoubleVector(); + for (Bin bin : this.binList) { + leftDist.addValues(bin.classWeights); + rightDist.subtractValues(bin.classWeights); + double[][] postSplitDists = new double[][]{ + leftDist.getArrayCopy(), rightDist.getArrayCopy()}; + double merit = criterion.getMeritOfSplit(preSplitDist, + postSplitDists); + if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) { + bestSuggestion = new AttributeSplitSuggestion( + new NumericAttributeBinaryTest(attIndex, + bin.upperBound, false), postSplitDists, merit); + } + } + return bestSuggestion; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + // TODO Auto-generated method stub + } +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java new file mode 100644 index 0000000..d1267a7 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java @@ -0,0 +1,35 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Abstract binary conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public abstract class InstanceConditionalBinaryTest extends InstanceConditionalTest { + + @Override + public int maxBranches() { + return 2; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java new file mode 100644 index 0000000..4d1b955 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalTest.java @@ -0,0 +1,76 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.AbstractMOAObject; +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Abstract conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public abstract class InstanceConditionalTest extends AbstractMOAObject { + + /** + * Returns the number of the branch for an instance, -1 if unknown. + * + * @param inst the instance to be used + * @return the number of the branch for an instance, -1 if unknown. + */ + public abstract int branchForInstance(Instance inst); + + /** + * Gets whether the number of the branch for an instance is known. + * + * @param inst + * @return true if the number of the branch for an instance is known + */ + public boolean resultKnownForInstance(Instance inst) { + return branchForInstance(inst) >= 0; + } + + /** + * Gets the number of maximum branches, -1 if unknown. + * + * @return the number of maximum branches, -1 if unknown.. + */ + public abstract int maxBranches(); + + /** + * Gets the text that describes the condition of a branch. It is used to describe the branch. + * + * @param branch the number of the branch to describe + * @param context the context or header of the data stream + * @return the text that describes the condition of the branch + */ + public abstract String describeConditionForBranch(int branch, + InstancesHeader context); + + /** + * Returns an array with the attributes that the test depends on. + * + * @return an array with the attributes that the test depends on + */ + public abstract int[] getAttsTestDependsOn(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java new file mode 100644 index 0000000..da3c717 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeBinaryTest.java @@ -0,0 +1,73 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Nominal binary conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NominalAttributeBinaryTest extends InstanceConditionalBinaryTest { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + protected int attValue; + + public NominalAttributeBinaryTest(int attIndex, int attValue) { + this.attIndex = attIndex; + this.attValue = attValue; + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex + : this.attIndex + 1; + return inst.isMissing(instAttIndex) ? -1 : ((int) inst.value(instAttIndex) == this.attValue ? 0 : 1); + } + + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch == 0) || (branch == 1)) { + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + (branch == 0 ? " = " : " != ") + + InstancesHeader.getNominalValueString(context, + this.attIndex, this.attValue); + } + throw new IndexOutOfBoundsException(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[]{this.attIndex}; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java new file mode 100644 index 0000000..82c91d3 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java @@ -0,0 +1,71 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Nominal multi way conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NominalAttributeMultiwayTest extends InstanceConditionalTest { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + public NominalAttributeMultiwayTest(int attIndex) { + this.attIndex = attIndex; + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex ; //< inst.classIndex() ? this.attIndex + //: this.attIndex + 1; + return inst.isMissing(instAttIndex) ? -1 : (int) inst.value(instAttIndex); + } + + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + return InstancesHeader.getAttributeNameString(context, this.attIndex) + + " = " + + InstancesHeader.getNominalValueString(context, this.attIndex, + branch); + } + + @Override + public int maxBranches() { + return -1; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[]{this.attIndex}; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java new file mode 100644 index 0000000..82c7395 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NumericAttributeBinaryTest.java @@ -0,0 +1,92 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Numeric binary conditional test for instances to use to split nodes in Hoeffding trees. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class NumericAttributeBinaryTest extends InstanceConditionalBinaryTest { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + protected double attValue; + + protected boolean equalsPassesTest; + + public NumericAttributeBinaryTest(int attIndex, double attValue, + boolean equalsPassesTest) { + this.attIndex = attIndex; + this.attValue = attValue; + this.equalsPassesTest = equalsPassesTest; + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex ; // < inst.classIndex() ? this.attIndex + // : this.attIndex + 1; + if (inst.isMissing(instAttIndex)) { + return -1; + } + double v = inst.value(instAttIndex); + if (v == this.attValue) { + return this.equalsPassesTest ? 0 : 1; + } + return v < this.attValue ? 0 : 1; + } + + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch == 0) || (branch == 1)) { + char compareChar = branch == 0 ? '<' : '>'; + int equalsBranch = this.equalsPassesTest ? 0 : 1; + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + ' ' + + compareChar + + (branch == equalsBranch ? "= " : " ") + + InstancesHeader.getNumericValueString(context, + this.attIndex, this.attValue); + } + throw new IndexOutOfBoundsException(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[]{this.attIndex}; + } + + public double getSplitValue() { + return this.attValue; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWIN.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWIN.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWIN.java new file mode 100644 index 0000000..b05a302 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWIN.java @@ -0,0 +1,604 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.AbstractMOAObject; + +/** + * ADaptive sliding WINdow method. This method is a change detector and estimator. + * It keeps a variable-length window of recently seen + * items, with the property that the window has the maximal length statistically + * consistent with the hypothesis "there has been no change in the average value + * inside the window". + * + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class ADWIN extends AbstractMOAObject { + + private class List extends AbstractMOAObject { + + protected int count; + + protected ListItem head; + + protected ListItem tail; + + public List() { +// post: initializes the list to be empty. + clear(); + addToHead(); + } + + /* Interface Store Methods */ + public int size() { + // post: returns the number of elements in the list. + return this.count; + } + + public ListItem head() { + // post: returns the number of elements in the list. + return this.head; + } + + public ListItem tail() { + // post: returns the number of elements in the list. + return this.tail; + } + + public boolean isEmpty() { + // post: returns the true iff store is empty. + return (this.size() == 0); + } + + public void clear() { + // post: clears the list so that it contains no elements. + this.head = null; + this.tail = null; + this.count = 0; + } + + /* Interface List Methods */ + public void addToHead() { + // pre: anObject is non-null + // post: the object is added to the beginning of the list + this.head = new ListItem(this.head, null); + if (this.tail == null) { + this.tail = this.head; + } + this.count++; + } + + public void removeFromHead() { + // pre: list is not empty + // post: removes and returns first object from the list +// ListItem temp; +// temp = this.head; + this.head = this.head.next(); + if (this.head != null) { + this.head.setPrevious(null); + } else { + this.tail = null; + } + this.count--; + } + + public void addToTail() { +// pre: anObject is non-null +// post: the object is added at the end of the list + this.tail = new ListItem(null, this.tail); + if (this.head == null) { + this.head = this.tail; + } + this.count++; + } + + public void removeFromTail() { +// pre: list is not empty +// post: the last object in the list is removed and returned +// ListItem temp; +// temp = this.tail; + this.tail = this.tail.previous(); + if (this.tail == null) { + this.head = null; + } else { + this.tail.setNext(null); + } + this.count--; + //temp=null; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + } + } + + private class ListItem extends AbstractMOAObject { + protected ListItem next; + + protected ListItem previous; + + protected int bucketSizeRow = 0; + + protected int MAXBUCKETS = ADWIN.MAXBUCKETS; + + protected double bucketTotal[] = new double[MAXBUCKETS + 1]; + + protected double bucketVariance[] = new double[MAXBUCKETS + 1]; + + public ListItem() { +// post: initializes the node to be a tail node +// containing the given value. + this(null, null); + } + + public void clear() { + bucketSizeRow = 0; + for (int k = 0; k <= MAXBUCKETS; k++) { + clearBucket(k); + } + } + + private void clearBucket(int k) { + setTotal(0, k); + setVariance(0, k); + } + + public ListItem(ListItem nextNode, ListItem previousNode) { +// post: initializes the node to contain the given +// object and link to the given next node. + //this.data = element; + this.next = nextNode; + this.previous = previousNode; + if (nextNode != null) { + nextNode.previous = this; + } + if (previousNode != null) { + previousNode.next = this; + } + clear(); + } + + public void insertBucket(double Value, double Variance) { +// insert a Bucket at the end + int k = bucketSizeRow; + bucketSizeRow++; + //Insert new bucket + setTotal(Value, k); + setVariance(Variance, k); + } + + public void RemoveBucket() { +// Removes the first Buvket + compressBucketsRow(1); + } + + public void compressBucketsRow(int NumberItemsDeleted) { + //Delete first elements + for (int k = NumberItemsDeleted; k <= MAXBUCKETS; k++) { + bucketTotal[k - NumberItemsDeleted] = bucketTotal[k]; + bucketVariance[k - NumberItemsDeleted] = bucketVariance[k]; + } + for (int k = 1; k <= NumberItemsDeleted; k++) { + clearBucket(MAXBUCKETS - k + 1); + } + bucketSizeRow -= NumberItemsDeleted; + //BucketNumber-=NumberItemsDeleted; + } + + public ListItem previous() { +// post: returns the previous node. + return this.previous; + } + + public void setPrevious(ListItem previous) { +// post: sets the previous node to be the given node + this.previous = previous; + } + + public ListItem next() { +// post: returns the next node. + return this.next; + } + + public void setNext(ListItem next) { +// post: sets the next node to be the given node + this.next = next; + } + + public double Total(int k) { +// post: returns the element in this node + return bucketTotal[k]; + } + + public double Variance(int k) { +// post: returns the element in this node + return bucketVariance[k]; + } + + public void setTotal(double value, int k) { +// post: sets the element in this node to the given +// object. + bucketTotal[k] = value; + } + + public void setVariance(double value, int k) { +// post: sets the element in this node to the given +// object. + bucketVariance[k] = value; + } + /* + public ListItem(Object element, + ListItem nextNode){ + // post: initializes the node to contain the given + // object and link to the given next node. + this.data = element; + this.next = nextNode; + } + public ListItem(Object element) { + // post: initializes the node to be a tail node + // containing the given value. + this(element, null); + } + + + public Object value() { + // post: returns the element in this node + return this.data; + } + public void setValue(Object anObject) { + // post: sets the element in this node to the given + // object. + this.data = anObject; + } + */ + + @Override + public void getDescription(StringBuilder sb, int indent) { + } + } + + public static final double DELTA = .002; //.1; + + private static final int mintMinimLongitudWindow = 10; //10 + + private double mdbldelta = .002; //.1; + + private int mintTime = 0; + + private int mintClock = 32; + + private double mdblWidth = 0; // Mean of Width = mdblWidth/Number of items + //BUCKET + + public static final int MAXBUCKETS = 5; + + private int lastBucketRow = 0; + + private double TOTAL = 0; + + private double VARIANCE = 0; + + private int WIDTH = 0; + + private int BucketNumber = 0; + + private int Detect = 0; + + private int numberDetections = 0; + + private int DetectTwice = 0; + + private boolean blnBucketDeleted = false; + + private int BucketNumberMAX = 0; + + private int mintMinWinLength = 5; + + private List listRowBuckets; + + public boolean getChange() { + return blnBucketDeleted; + } + + public void resetChange() { + blnBucketDeleted = false; + } + + public int getBucketsUsed() { + return BucketNumberMAX; + } + + public int getWidth() { + return WIDTH; + } + + public void setClock(int intClock) { + mintClock = intClock; + } + + public int getClock() { + return mintClock; + } + + public boolean getWarning() { + return false; + } + + public boolean getDetect() { + return (Detect == mintTime); + } + + public int getNumberDetections() { + return numberDetections; + } + + public double getTotal() { + return TOTAL; + } + + public double getEstimation() { + return TOTAL / WIDTH; + } + + public double getVariance() { + return VARIANCE / WIDTH; + } + + public double getWidthT() { + return mdblWidth; + } + + private void initBuckets() { + //Init buckets + listRowBuckets = new List(); + lastBucketRow = 0; + TOTAL = 0; + VARIANCE = 0; + WIDTH = 0; + BucketNumber = 0; + } + + private void insertElement(double Value) { + WIDTH++; + insertElementBucket(0, Value, listRowBuckets.head()); + double incVariance = 0; + if (WIDTH > 1) { + incVariance = (WIDTH - 1) * (Value - TOTAL / (WIDTH - 1)) * (Value - TOTAL / (WIDTH - 1)) / WIDTH; + } + VARIANCE += incVariance; + TOTAL += Value; + compressBuckets(); + } + + private void insertElementBucket(double Variance, double Value, ListItem Node) { + //Insert new bucket + Node.insertBucket(Value, Variance); + BucketNumber++; + if (BucketNumber > BucketNumberMAX) { + BucketNumberMAX = BucketNumber; + } + } + + private int bucketSize(int Row) { + return (int) Math.pow(2, Row); + } + + public int deleteElement() { + //LIST + //Update statistics + ListItem Node; + Node = listRowBuckets.tail(); + int n1 = bucketSize(lastBucketRow); + WIDTH -= n1; + TOTAL -= Node.Total(0); + double u1 = Node.Total(0) / n1; + double incVariance = Node.Variance(0) + n1 * WIDTH * (u1 - TOTAL / WIDTH) * (u1 - TOTAL / WIDTH) / (n1 + WIDTH); + VARIANCE -= incVariance; + + //Delete Bucket + Node.RemoveBucket(); + BucketNumber--; + if (Node.bucketSizeRow == 0) { + listRowBuckets.removeFromTail(); + lastBucketRow--; + } + return n1; + } + + public void compressBuckets() { + //Traverse the list of buckets in increasing order + int n1, n2; + double u2, u1, incVariance; + ListItem cursor; + ListItem nextNode; + cursor = listRowBuckets.head(); + int i = 0; + do { + //Find the number of buckets in a row + int k = cursor.bucketSizeRow; + //If the row is full, merge buckets + if (k == MAXBUCKETS + 1) { + nextNode = cursor.next(); + if (nextNode == null) { + listRowBuckets.addToTail(); + nextNode = cursor.next(); + lastBucketRow++; + } + n1 = bucketSize(i); + n2 = bucketSize(i); + u1 = cursor.Total(0) / n1; + u2 = cursor.Total(1) / n2; + incVariance = n1 * n2 * (u1 - u2) * (u1 - u2) / (n1 + n2); + + nextNode.insertBucket(cursor.Total(0) + cursor.Total(1), cursor.Variance(0) + cursor.Variance(1) + incVariance); + BucketNumber++; + cursor.compressBucketsRow(2); + if (nextNode.bucketSizeRow <= MAXBUCKETS) { + break; + } + } else { + break; + } + cursor = cursor.next(); + i++; + } while (cursor != null); + } + + public boolean setInput(double intEntrada) { + return setInput(intEntrada, mdbldelta); + } + + public boolean setInput(double intEntrada, double delta) { + boolean blnChange = false; + boolean blnExit; + ListItem cursor; + mintTime++; + + //1,2)Increment window in one element + insertElement(intEntrada); + blnBucketDeleted = false; + //3)Reduce window + if (mintTime % mintClock == 0 && getWidth() > mintMinimLongitudWindow) { + boolean blnReduceWidth = true; // Diference + + while (blnReduceWidth) // Diference + { + blnReduceWidth = false; // Diference + blnExit = false; + int n0 = 0; + int n1 = WIDTH; + double u0 = 0; + double u1 = getTotal(); + double v0 = 0; + double v1 = VARIANCE; + double n2; + double u2; + + cursor = listRowBuckets.tail(); + int i = lastBucketRow; + do { + for (int k = 0; k <= (cursor.bucketSizeRow - 1); k++) { + n2 = bucketSize(i); + u2 = cursor.Total(k); + if (n0 > 0) { + v0 += cursor.Variance(k) + (double) n0 * n2 * (u0 / n0 - u2 / n2) * (u0 / n0 - u2 / n2) / (n0 + n2); + } + if (n1 > 0) { + v1 -= cursor.Variance(k) + (double) n1 * n2 * (u1 / n1 - u2 / n2) * (u1 / n1 - u2 / n2) / (n1 + n2); + } + + n0 += bucketSize(i); + n1 -= bucketSize(i); + u0 += cursor.Total(k); + u1 -= cursor.Total(k); + + if (i == 0 && k == cursor.bucketSizeRow - 1) { + blnExit = true; + break; + } + double absvalue = (u0 / n0) - (u1 / n1); //n1<WIDTH-mintMinWinLength-1 + if ((n1 > mintMinWinLength + 1 && n0 > mintMinWinLength + 1) && // Diference NEGATIVE + //if( + blnCutexpression(n0, n1, u0, u1, v0, v1, absvalue, delta)) { + blnBucketDeleted = true; + Detect = mintTime; + + if (Detect == 0) { + Detect = mintTime; + //blnFirst=true; + //blnWarning=true; + } else if (DetectTwice == 0) { + DetectTwice = mintTime; + //blnDetect=true; + } + blnReduceWidth = true; // Diference + blnChange = true; + if (getWidth() > 0) { //Reduce width of the window + //while (n0>0) // Diference NEGATIVE + n0 -= deleteElement(); + blnExit = true; + break; + } + } //End if + }//Next k + cursor = cursor.previous(); + i--; + } while (((!blnExit && cursor != null))); + }//End While // Diference + }//End if + + mdblWidth += getWidth(); + if (blnChange) { + numberDetections++; + } + return blnChange; + } + + private boolean blnCutexpression(int n0, int n1, double u0, double u1, double v0, double v1, double absvalue, double delta) { + int n = getWidth(); + double dd = Math.log(2 * Math.log(n) / delta); // -- ull perque el ln n va al numerador. + // Formula Gener 2008 + double v = getVariance(); + double m = ((double) 1 / ((n0 - mintMinWinLength + 1))) + ((double) 1 / ((n1 - mintMinWinLength + 1))); + double epsilon = Math.sqrt(2 * m * v * dd) + (double) 2 / 3 * dd * m; + + return (Math.abs(absvalue) > epsilon); + } + + public ADWIN() { + mdbldelta = DELTA; + initBuckets(); + Detect = 0; + numberDetections = 0; + DetectTwice = 0; + + } + + public ADWIN(double d) { + mdbldelta = d; + initBuckets(); + Detect = 0; + numberDetections = 0; + DetectTwice = 0; + } + + public ADWIN(int cl) { + mdbldelta = DELTA; + initBuckets(); + Detect = 0; + numberDetections = 0; + DetectTwice = 0; + mintClock = cl; + } + + public String getEstimatorInfo() { + return "ADWIN;;"; + } + + public void setW(int W0) { + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java new file mode 100644 index 0000000..b582351 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ADWINChangeDetector.java @@ -0,0 +1,73 @@ + +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.FloatOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + + +/** + * Drift detection method based in ADWIN. ADaptive sliding WINdow is a change + * detector and estimator. It keeps a variable-length window of recently seen + * items, with the property that the window has the maximal length statistically + * consistent with the hypothesis "there has been no change in the average value + * inside the window". + * + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class ADWINChangeDetector extends AbstractChangeDetector { + + protected ADWIN adwin; + + public FloatOption deltaAdwinOption = new FloatOption("deltaAdwin", 'a', + "Delta of Adwin change detection", 0.002, 0.0, 1.0); + + @Override + public void input(double inputValue) { + if (this.adwin == null) { + resetLearning(); + } + this.isChangeDetected = adwin.setInput(inputValue); + this.isWarningZone = false; + this.delay = 0.0; + this.estimation = adwin.getEstimation(); + } + + @Override + public void resetLearning() { + adwin = new ADWIN(this.deltaAdwinOption.getValue()); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java new file mode 100644 index 0000000..ff591ab --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/AbstractChangeDetector.java @@ -0,0 +1,143 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; + +/** + * Abstract Change Detector. All change detectors in MOA extend this class. + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public abstract class AbstractChangeDetector extends AbstractOptionHandler + implements ChangeDetector { + + + + /** + * Change was detected + */ + protected boolean isChangeDetected; + + /** + * Warning Zone: after a warning and before a change + */ + protected boolean isWarningZone; + + /** + * Prediction for the next value based in previous seen values + */ + protected double estimation; + + /** + * Delay in detecting change + */ + protected double delay; + + /** + * Resets this change detector. It must be similar to starting a new change + * detector from scratch. + * + */ + public void resetLearning() { + this.isChangeDetected = false; + this.isWarningZone = false; + this.estimation = 0.0; + this.delay = 0.0; + } + + /** + * Adding a numeric value to the change detector<br><br> + * + * The output of the change detector is modified after the insertion of a + * new item inside. + * + * @param inputValue the number to insert into the change detector + */ + public abstract void input(double inputValue); + + /** + * Gets whether there is change detected. + * + * @return true if there is change + */ + public boolean getChange() { + return this.isChangeDetected; + } + + /** + * Gets whether the change detector is in the warning zone, after a warning + * alert and before a change alert. + * + * @return true if the change detector is in the warning zone + */ + public boolean getWarningZone() { + return this.isWarningZone; + } + + /** + * Gets the prediction of next values. + * + * @return a prediction of the next value + */ + public double getEstimation() { + return this.estimation; + } + + /** + * Gets the length of the delay in the change detected. + * + * @return he length of the delay in the change detected + */ + public double getDelay() { + return this.delay; + } + + /** + * Gets the output state of the change detection. + * + * @return an array with the number of change detections, number of + * warnings, delay, and estimation. + */ + public double[] getOutput() { + return new double[]{this.isChangeDetected ? 1 : 0, this.isWarningZone ? 1 : 0, this.delay, this.estimation}; + } + + /** + * Returns a string representation of the model. + * + * @param sb the stringbuilder to add the description + * @param indent the number of characters to indent + */ + @Override + public abstract void getDescription(StringBuilder sb, int indent); + + /** + * Produces a copy of this change detector method + * + * @return the copy of this change detector method + */ + @Override + public ChangeDetector copy() { + return (ChangeDetector) super.copy(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java new file mode 100644 index 0000000..47dc203 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/ChangeDetector.java @@ -0,0 +1,102 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.options.OptionHandler; + +/** + * Change Detector interface to implement methods that detects change. + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public interface ChangeDetector extends OptionHandler { + + /** + * Resets this change detector. It must be similar to starting a new change + * detector from scratch. + * + */ + public void resetLearning(); + + /** + * Adding a numeric value to the change detector<br><br> + * + * The output of the change detector is modified after the insertion of a + * new item inside. + * + * @param inputValue the number to insert into the change detector + */ + public void input(double inputValue); + + /** + * Gets whether there is change detected. + * + * @return true if there is change + */ + public boolean getChange(); + + /** + * Gets whether the change detector is in the warning zone, after a warning alert and before a change alert. + * + * @return true if the change detector is in the warning zone + */ + public boolean getWarningZone(); + + /** + * Gets the prediction of next values. + * + * @return a prediction of the next value + */ + public double getEstimation(); + + /** + * Gets the length of the delay in the change detected. + * + * @return he length of the delay in the change detected + */ + public double getDelay(); + + /** + * Gets the output state of the change detection. + * + * @return an array with the number of change detections, number of + * warnings, delay, and estimation. + */ + public double[] getOutput(); + + /** + * Returns a string representation of the model. + * + * @param out the stringbuilder to add the description + * @param indent the number of characters to indent + */ + @Override + public void getDescription(StringBuilder sb, int indent); + + /** + * Produces a copy of this drift detection method + * + * @return the copy of this drift detection method + */ + @Override + public ChangeDetector copy(); +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/CusumDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/CusumDM.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/CusumDM.java new file mode 100644 index 0000000..e372ae8 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/CusumDM.java @@ -0,0 +1,110 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in Cusum + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class CusumDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption deltaOption = new FloatOption("delta", 'd', + "Delta parameter of the Cusum Test", 0.005, 0.0, 1.0); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Threshold parameter of the Cusum Test", 50, 0.0, Float.MAX_VALUE); + + private int m_n; + + private double sum; + + private double x_mean; + + private double delta; + + private double lambda; + + public CusumDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + x_mean = 0.0; + sum = 0.0; + delta = this.deltaOption.getValue(); + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double x) { + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + x_mean = x_mean + (x - x_mean) / (double) m_n; + sum = Math.max(0, sum + x - x_mean - this.delta); + m_n++; + + // System.out.print(prediction + " " + m_n + " " + (m_p+m_s) + " "); + this.estimation = x_mean; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (sum > this.lambda) { + this.isChangeDetected = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/DDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/DDM.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/DDM.java new file mode 100644 index 0000000..af5dd52 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/DDM.java @@ -0,0 +1,117 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in DDM method of Joao Gama SBIA 2004. + * + * <p>João Gama, Pedro Medas, Gladys Castillo, Pedro Pereira Rodrigues: Learning + * with Drift Detection. SBIA 2004: 286-295 </p> + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class DDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + //private static final int DDM_MINNUMINST = 30; + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + private int m_n; + + private double m_p; + + private double m_s; + + private double m_psmin; + + private double m_pmin; + + private double m_smin; + + public DDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + m_p = 1; + m_s = 0; + m_psmin = Double.MAX_VALUE; + m_pmin = Double.MAX_VALUE; + m_smin = Double.MAX_VALUE; + } + + @Override + public void input(double prediction) { + // prediction must be 1 or 0 + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + m_p = m_p + (prediction - m_p) / (double) m_n; + m_s = Math.sqrt(m_p * (1 - m_p) / (double) m_n); + + m_n++; + + // System.out.print(prediction + " " + m_n + " " + (m_p+m_s) + " "); + this.estimation = m_p; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (m_p + m_s <= m_psmin) { + m_pmin = m_p; + m_smin = m_s; + m_psmin = m_p + m_s; + } + + if (m_n > this.minNumInstancesOption.getValue() && m_p + m_s > m_pmin + 3 * m_smin) { + this.isChangeDetected = true; + //resetLearning(); + } else + this.isWarningZone = m_p + m_s > m_pmin + 2 * m_smin; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EDDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EDDM.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EDDM.java new file mode 100644 index 0000000..2ab1022 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EDDM.java @@ -0,0 +1,138 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in EDDM method of Manuel Baena et al. + * + * <p>Early Drift Detection Method. Manuel Baena-Garcia, Jose Del Campo-Avila, + * Raúl Fidalgo, Albert Bifet, Ricard Gavalda, Rafael Morales-Bueno. In Fourth + * International Workshop on Knowledge Discovery from Data Streams, 2006.</p> + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class EDDM extends AbstractChangeDetector { + + /** + * + */ + private static final long serialVersionUID = 140980267062162000L; + + private static final double FDDM_OUTCONTROL = 0.9; + + private static final double FDDM_WARNING = 0.95; + + private static final double FDDM_MINNUMINSTANCES = 30; + + private double m_numErrors; + + private int m_minNumErrors = 30; + + private int m_n; + + private int m_d; + + private int m_lastd; + + private double m_mean; + + private double m_stdTemp; + + private double m_m2smax; + + public EDDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + m_numErrors = 0; + m_d = 0; + m_lastd = 0; + m_mean = 0.0; + m_stdTemp = 0.0; + m_m2smax = 0.0; + this.estimation = 0.0; + } + + @Override + public void input(double prediction) { + // prediction must be 1 or 0 + // It monitors the error rate + // System.out.print(prediction + " " + m_n + " " + probability + " "); + if (this.isChangeDetected) { + resetLearning(); + } + this.isChangeDetected = false; + + m_n++; + if (prediction == 1.0) { + this.isWarningZone = false; + this.delay = 0; + m_numErrors += 1; + m_lastd = m_d; + m_d = m_n - 1; + int distance = m_d - m_lastd; + double oldmean = m_mean; + m_mean = m_mean + ((double) distance - m_mean) / m_numErrors; + this.estimation = m_mean; + m_stdTemp = m_stdTemp + (distance - m_mean) * (distance - oldmean); + double std = Math.sqrt(m_stdTemp / m_numErrors); + double m2s = m_mean + 2 * std; + + if (m2s > m_m2smax) { + if (m_n > FDDM_MINNUMINSTANCES) { + m_m2smax = m2s; + } + //m_lastLevel = DDM_INCONTROL_LEVEL; + // System.out.print(1 + " "); + } else { + double p = m2s / m_m2smax; + // System.out.print(p + " "); + if (m_n > FDDM_MINNUMINSTANCES && m_numErrors > m_minNumErrors + && p < FDDM_OUTCONTROL) { + //System.out.println(m_mean + ",D"); + this.isChangeDetected = true; + //resetLearning(); + } else { + this.isWarningZone = m_n > FDDM_MINNUMINSTANCES + && m_numErrors > m_minNumErrors && p < FDDM_WARNING; + } + } + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java new file mode 100644 index 0000000..7f98099 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/EWMAChartDM.java @@ -0,0 +1,122 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in EWMA Charts of Ross, Adams, Tasoulis and Hand + * 2012 + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class EWMAChartDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + //private static final int DDM_MIN_NUM_INST = 30; + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Lambda parameter of the EWMA Chart Method", 0.2, 0.0, Float.MAX_VALUE); + + private double m_n; + + private double m_sum; + + private double m_p; + + private double m_s; + + private double lambda; + + private double z_t; + + public EWMAChartDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1.0; + m_sum = 0.0; + m_p = 0.0; + m_s = 0.0; + z_t = 0.0; + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double prediction) { + // prediction must be 1 or 0 + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + m_sum += prediction; + + m_p = m_sum/m_n; // m_p + (prediction - m_p) / (double) (m_n+1); + + m_s = Math.sqrt( m_p * (1.0 - m_p)* lambda * (1.0 - Math.pow(1.0 - lambda, 2.0 * m_n)) / (2.0 - lambda)); + + m_n++; + + z_t += lambda * (prediction - z_t); + + double L_t = 3.97 - 6.56 * m_p + 48.73 * Math.pow(m_p, 3) - 330.13 * Math.pow(m_p, 5) + 848.18 * Math.pow(m_p, 7); //%1 FP + this.estimation = m_p; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (m_n > this.minNumInstancesOption.getValue() && z_t > m_p + L_t * m_s) { + this.isChangeDetected = true; + //resetLearning(); + } else { + this.isWarningZone = z_t > m_p + 0.5 * L_t * m_s; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java new file mode 100644 index 0000000..5ed44e5 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/GeometricMovingAverageDM.java @@ -0,0 +1,108 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in Geometric Moving Average Test + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class GeometricMovingAverageDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Threshold parameter of the Geometric Moving Average Test", 1, 0.0, Float.MAX_VALUE); + + public FloatOption alphaOption = new FloatOption("alpha", 'a', + "Alpha parameter of the Geometric Moving Average Test", .99, 0.0, 1.0); + + private double m_n; + + private double sum; + + private double x_mean; + + private double alpha; + + private double lambda; + + public GeometricMovingAverageDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1.0; + x_mean = 0.0; + sum = 0.0; + alpha = this.alphaOption.getValue(); + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double x) { + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + x_mean = x_mean + (x - x_mean) / m_n; + sum = alpha * sum + (1.0- alpha) * (x - x_mean); + m_n++; + this.estimation = x_mean; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (sum > this.lambda) { + this.isChangeDetected = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java new file mode 100644 index 0000000..7a94a17 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java @@ -0,0 +1,114 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Drift detection method based in Page Hinkley Test. + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class PageHinkleyDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption deltaOption = new FloatOption("delta", 'd', + "Delta parameter of the Page Hinkley Test", 0.005, 0.0, 1.0); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Lambda parameter of the Page Hinkley Test", 50, 0.0, Float.MAX_VALUE); + + public FloatOption alphaOption = new FloatOption("alpha", 'a', + "Alpha parameter of the Page Hinkley Test", 1 - 0.0001, 0.0, 1.0); + + private int m_n; + + private double sum; + + private double x_mean; + + private double alpha; + + private double delta; + + private double lambda; + + public PageHinkleyDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + x_mean = 0.0; + sum = 0.0; + delta = this.deltaOption.getValue(); + alpha = this.alphaOption.getValue(); + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double x) { + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + x_mean = x_mean + (x - x_mean) / (double) m_n; + sum = this.alpha * sum + (x - x_mean - this.delta); + m_n++; + this.estimation = x_mean; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (sum > this.lambda) { + this.isChangeDetected = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java new file mode 100644 index 0000000..185b12f --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java @@ -0,0 +1,86 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.core.Utils; +import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Class for computing splitting criteria using Gini + * with respect to distributions of class values. + * The split criterion is used as a parameter on + * decision trees and decision stumps. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class GiniSplitCriterion extends AbstractOptionHandler implements + SplitCriterion { + + private static final long serialVersionUID = 1L; + + @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + double totalWeight = 0.0; + double[] distWeights = new double[postSplitDists.length]; + for (int i = 0; i < postSplitDists.length; i++) { + distWeights[i] = Utils.sum(postSplitDists[i]); + totalWeight += distWeights[i]; + } + double gini = 0.0; + for (int i = 0; i < postSplitDists.length; i++) { + gini += (distWeights[i] / totalWeight) + * computeGini(postSplitDists[i], distWeights[i]); + } + return 1.0 - gini; + } + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + return 1.0; + } + + public static double computeGini(double[] dist, double distSumOfWeights) { + double gini = 1.0; + for (double aDist : dist) { + double relFreq = aDist / distSumOfWeights; + gini -= relFreq * relFreq; + } + return gini; + } + + public static double computeGini(double[] dist) { + return computeGini(dist, Utils.sum(dist)); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java new file mode 100644 index 0000000..f136996 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java @@ -0,0 +1,118 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.FloatOption; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.core.Utils; +import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +/** + * Class for computing splitting criteria using information gain + * with respect to distributions of class values. + * The split criterion is used as a parameter on + * decision trees and decision stumps. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class InfoGainSplitCriterion extends AbstractOptionHandler implements + SplitCriterion { + + private static final long serialVersionUID = 1L; + + public FloatOption minBranchFracOption = new FloatOption("minBranchFrac", + 'f', + "Minimum fraction of weight required down at least two branches.", + 0.01, 0.0, 0.5); + + @Override + public double getMeritOfSplit(double[] preSplitDist, + double[][] postSplitDists) { + if (numSubsetsGreaterThanFrac(postSplitDists, this.minBranchFracOption.getValue()) < 2) { + return Double.NEGATIVE_INFINITY; + } + return computeEntropy(preSplitDist) - computeEntropy(postSplitDists); + } + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + int numClasses = preSplitDist.length > 2 ? preSplitDist.length : 2; + return Utils.log2(numClasses); + } + + public static double computeEntropy(double[] dist) { + double entropy = 0.0; + double sum = 0.0; + for (double d : dist) { + if (d > 0.0) { // TODO: how small can d be before log2 overflows? + entropy -= d * Utils.log2(d); + sum += d; + } + } + return sum > 0.0 ? (entropy + sum * Utils.log2(sum)) / sum : 0.0; + } + + public static double computeEntropy(double[][] dists) { + double totalWeight = 0.0; + double[] distWeights = new double[dists.length]; + for (int i = 0; i < dists.length; i++) { + distWeights[i] = Utils.sum(dists[i]); + totalWeight += distWeights[i]; + } + double entropy = 0.0; + for (int i = 0; i < dists.length; i++) { + entropy += distWeights[i] * computeEntropy(dists[i]); + } + return entropy / totalWeight; + } + + public static int numSubsetsGreaterThanFrac(double[][] distributions, double minFrac) { + double totalWeight = 0.0; + double[] distSums = new double[distributions.length]; + for (int i = 0; i < distSums.length; i++) { + for (int j = 0; j < distributions[i].length; j++) { + distSums[i] += distributions[i][j]; + } + totalWeight += distSums[i]; + } + int numGreater = 0; + for (double d : distSums) { + double frac = d / totalWeight; + if (frac > minFrac) { + numGreater++; + } + } + return numGreater; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +}
