http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java new file mode 100644 index 0000000..60e1e1c --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java @@ -0,0 +1,54 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2012 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.core.Utils; + +/** + * Class for computing splitting criteria using information gain with respect to + * distributions of class values for Multilabel data. The split criterion is + * used as a parameter on decision trees and decision stumps. + * + * @author Richard Kirkby ([email protected]) + * @author Jesse Read ([email protected]) + * @version $Revision: 1 $ + */ +public class InfoGainSplitCriterionMultilabel extends InfoGainSplitCriterion { + + private static final long serialVersionUID = 1L; + + public static double computeEntropy(double[] dist) { + double entropy = 0.0; + double sum = 0.0; + for (double d : dist) { + sum += d; + } + if (sum > 0.0) { + for (double num : dist) { + double d = num / sum; + if (d > 0.0) { // TODO: how small can d be before log2 overflows? + entropy -= d * Utils.log2(d) + (1 - d) * Utils.log2(1 - d); //Extension to Multilabel + } + } + } + return sum > 0.0 ? entropy : 0.0; + } +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java new file mode 100644 index 0000000..a23c93b --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java @@ -0,0 +1,33 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2012 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +public class SDRSplitCriterion extends VarianceReductionSplitCriterion { + private static final long serialVersionUID = 1L; + + public static double computeSD(double[] dist) { + int N = (int)dist[0]; + double sum = dist[1]; + double sumSq = dist[2]; + return Math.sqrt((sumSq - ((sum * sum)/N))/N); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java new file mode 100644 index 0000000..eba390e --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java @@ -0,0 +1,56 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.options.OptionHandler; + +/** + * Interface for computing splitting criteria. + * with respect to distributions of class values. + * The split criterion is used as a parameter on + * decision trees and decision stumps. + * The two split criteria most used are + * Information Gain and Gini. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public interface SplitCriterion extends OptionHandler { + + /** + * Computes the merit of splitting for a given + * ditribution before the split and after it. + * + * @param preSplitDist the class distribution before the split + * @param postSplitDists the class distribution after the split + * @return value of the merit of splitting + */ + public double getMeritOfSplit(double[] preSplitDist, + double[][] postSplitDists); + + /** + * Computes the range of splitting merit + * + * @param preSplitDist the class distribution before the split + * @return value of the range of splitting merit + */ + public double getRangeOfMerit(double[] preSplitDist); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java new file mode 100644 index 0000000..c5ca348 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java @@ -0,0 +1,99 @@ +package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; + +public class VarianceReductionSplitCriterion extends AbstractOptionHandler implements SplitCriterion { + + private static final long serialVersionUID = 1L; + +/* @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + + double N = preSplitDist[0]; + double SDR = computeSD(preSplitDist); + + // System.out.print("postSplitDists.length"+postSplitDists.length+"\n"); + for(int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR -= (Ni/N)*computeSD(postSplitDists[i]); + } + + return SDR; + }*/ + + @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + double SDR=0.0; + double N = preSplitDist[0]; + int count = 0; + + for (int i1 = 0; i1 < postSplitDists.length; i1++) { + double[] postSplitDist = postSplitDists[i1]; + double Ni = postSplitDist[0]; + if (Ni >= 5.0) { + count = count + 1; + } + } + + if(count == postSplitDists.length){ + SDR = computeSD(preSplitDist); + for(int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR -= (Ni/N)*computeSD(postSplitDists[i]); + } + } + return SDR; + } + + + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + return 1; + } + + public static double computeSD(double[] dist) { + + int N = (int)dist[0]; + double sum = dist[1]; + double sumSq = dist[2]; + // return Math.sqrt((sumSq - ((sum * sum)/N))/N); + return (sumSq - ((sum * sum)/N))/N; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/functions/MajorityClass.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/functions/MajorityClass.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/functions/MajorityClass.java new file mode 100644 index 0000000..6c2c807 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/functions/MajorityClass.java @@ -0,0 +1,84 @@ +package com.yahoo.labs.samoa.moa.classifiers.functions; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.classifiers.AbstractClassifier; +import com.yahoo.labs.samoa.moa.core.DoubleVector; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.moa.core.StringUtils; +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Majority class learner. This is the simplest classifier. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class MajorityClass extends AbstractClassifier { + + private static final long serialVersionUID = 1L; + + @Override + public String getPurposeString() { + return "Majority class classifier: always predicts the class that has been observed most frequently the in the training data."; + } + + protected DoubleVector observedClassDistribution; + + @Override + public void resetLearningImpl() { + this.observedClassDistribution = new DoubleVector(); + } + + @Override + public void trainOnInstanceImpl(Instance inst) { + this.observedClassDistribution.addToValue((int) inst.classValue(), inst.weight()); + } + + public double[] getVotesForInstance(Instance i) { + return this.observedClassDistribution.getArrayCopy(); + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + return null; + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + StringUtils.appendIndented(out, indent, "Predicted majority "); + out.append(getClassNameString()); + out.append(" = "); + out.append(getClassLabelString(this.observedClassDistribution.maxIndex())); + StringUtils.appendNewline(out); + for (int i = 0; i < this.observedClassDistribution.numValues(); i++) { + StringUtils.appendIndented(out, indent, "Observed weight of "); + out.append(getClassLabelString(i)); + out.append(": "); + out.append(this.observedClassDistribution.getValue(i)); + StringUtils.appendNewline(out); + } + } + + public boolean isRandomizable() { + return false; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/Predicate.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/Predicate.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/Predicate.java new file mode 100644 index 0000000..d6bdcaa --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/Predicate.java @@ -0,0 +1,33 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Interface for a predicate (a feature) in rules. + * + */ +public interface Predicate { + + public boolean evaluate(Instance instance); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java new file mode 100644 index 0000000..8b4c332 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java @@ -0,0 +1,126 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers.FIMTDDNumericAttributeClassObserver; + + +public class FIMTDDNumericAttributeClassLimitObserver extends FIMTDDNumericAttributeClassObserver { + + /** + * + */ + private static final long serialVersionUID = 1L; + protected int maxNodes; + //public IntOption maxNodesOption = new IntOption("maxNodesOption", 'z', "Maximum number of nodes", 50, 0, Integer.MAX_VALUE); + + + protected int numNodes; + + public int getMaxNodes() { + return this.maxNodes; + } + + public void setMaxNodes(int maxNodes) { + this.maxNodes = maxNodes; + } + + @Override + public void observeAttributeClass(double attVal, double classVal, double weight) { + if (Double.isNaN(attVal)) { //Instance.isMissingValue(attVal) + } else { + if (this.root == null) { + //maxNodes=maxNodesOption.getValue(); + maxNodes = 50; + this.root = new FIMTDDNumericAttributeClassLimitObserver.Node(attVal, classVal, weight); + } else { + this.root.insertValue(attVal, classVal, weight); + } + } + } + + protected class Node extends FIMTDDNumericAttributeClassObserver.Node { + /** + * + */ + private static final long serialVersionUID = -4484141636424708465L; + + + + public Node(double val, double label, double weight) { + super(val, label, weight); + } + + protected Node root = null; + + + + /** + * Insert a new value into the tree, updating both the sum of values and + * sum of squared values arrays + */ + @Override + public void insertValue(double val, double label, double weight) { + + // If the new value equals the value stored in a node, update + // the left (<=) node information + if (val == this.cut_point) + { + this.leftStatistics.addToValue(0,1); + this.leftStatistics.addToValue(1,label); + this.leftStatistics.addToValue(2,label*label); + } + // If the new value is less than the value in a node, update the + // left distribution and send the value down to the left child node. + // If no left child exists, create one + else if (val <= this.cut_point) { + this.leftStatistics.addToValue(0,1); + this.leftStatistics.addToValue(1,label); + this.leftStatistics.addToValue(2,label*label); + if (this.left == null) { + if(numNodes<maxNodes){ + this.left = new Node(val, label, weight); + ++numNodes; + } + } else { + this.left.insertValue(val, label, weight); + } + } + // If the new value is greater than the value in a node, update the + // right (>) distribution and send the value down to the right child node. + // If no right child exists, create one + else { // val > cut_point + this.rightStatistics.addToValue(0,1); + this.rightStatistics.addToValue(1,label); + this.rightStatistics.addToValue(2,label*label); + if (this.right == null) { + if(numNodes<maxNodes){ + this.right = new Node(val, label, weight); + ++numNodes; + } + } else { + this.right.insertValue(val, label, weight); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java new file mode 100644 index 0000000..5f27c40 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java @@ -0,0 +1,180 @@ +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/* + * NumericAttributeBinaryRulePredicate.java + * Copyright (C) 2013 University of Porto, Portugal + * @author E. Almeida, A. Carvalho, J. Gama + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + */ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.conditionaltests; + +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalBinaryTest; +import com.yahoo.labs.samoa.moa.classifiers.rules.core.Predicate; + +/** + * Numeric binary conditional test for instances to use to split nodes in + * AMRules. + * + * @version $Revision: 1 $ + */ +public class NumericAttributeBinaryRulePredicate extends InstanceConditionalBinaryTest implements Predicate { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + protected double attValue; + + protected int operator; // 0 =, 1<=, 2> + + public NumericAttributeBinaryRulePredicate() { + this(0,0,0); + } + public NumericAttributeBinaryRulePredicate(int attIndex, double attValue, + int operator) { + this.attIndex = attIndex; + this.attValue = attValue; + this.operator = operator; + } + + public NumericAttributeBinaryRulePredicate(NumericAttributeBinaryRulePredicate oldTest) { + this(oldTest.attIndex, oldTest.attValue, oldTest.operator); + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex + : this.attIndex + 1; + if (inst.isMissing(instAttIndex)) { + return -1; + } + double v = inst.value(instAttIndex); + int ret = 0; + switch (this.operator) { + case 0: + ret = (v == this.attValue) ? 0 : 1; + break; + case 1: + ret = (v <= this.attValue) ? 0 : 1; + break; + case 2: + ret = (v > this.attValue) ? 0 : 1; + } + return ret; + } + + /** + * + */ + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch >= 0) && (branch <= 2)) { + String compareChar = (branch == 0) ? "=" : (branch == 1) ? "<=" : ">"; + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + ' ' + + compareChar + + InstancesHeader.getNumericValueString(context, + this.attIndex, this.attValue); + } + throw new IndexOutOfBoundsException(); + } + + /** + * + */ + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[]{this.attIndex}; + } + + public double getSplitValue() { + return this.attValue; + } + + @Override + public boolean evaluate(Instance inst) { + return (branchForInstance(inst) == 0); + } + + @Override + public String toString() { + if ((operator >= 0) && (operator <= 2)) { + String compareChar = (operator == 0) ? "=" : (operator == 1) ? "<=" : ">"; + //int equalsBranch = this.equalsPassesTest ? 0 : 1; + return "x" + this.attIndex + + ' ' + + compareChar + + ' ' + + this.attValue; + } + throw new IndexOutOfBoundsException(); + } + + public boolean isEqual(NumericAttributeBinaryRulePredicate predicate) { + return (this.attIndex == predicate.attIndex + && this.attValue == predicate.attValue + && this.operator == predicate.operator); + } + + public boolean isUsingSameAttribute(NumericAttributeBinaryRulePredicate predicate) { + return (this.attIndex == predicate.attIndex + && this.operator == predicate.operator); + } + + public boolean isIncludedInRuleNode( + NumericAttributeBinaryRulePredicate predicate) { + boolean ret; + if (this.operator == 1) { // <= + ret = (predicate.attValue <= this.attValue); + } else { // > + ret = (predicate.attValue > this.attValue); + } + + return ret; + } + + public void setAttributeValue( + NumericAttributeBinaryRulePredicate ruleSplitNodeTest) { + this.attValue = ruleSplitNodeTest.attValue; + + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java new file mode 100644 index 0000000..58d1e2f --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java @@ -0,0 +1,105 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/* + * SDRSplitCriterionAMRules.java + * Copyright (C) 2014 University of Porto, Portugal + * @author A. Bifet, J. Duarte, J. Gama + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + */ + +import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SDRSplitCriterion; +import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; + + +public class SDRSplitCriterionAMRules extends SDRSplitCriterion implements SplitCriterion { + + private static final long serialVersionUID = 1L; + + + @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + double SDR=0.0; + double N = preSplitDist[0]; + int count = 0; + + for(int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + if(Ni >=0.05*preSplitDist[0]){ + count = count +1; + } + } + if(count == postSplitDists.length){ + SDR = computeSD(preSplitDist); + + for(int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR -= (Ni/N)*computeSD(postSplitDists[i]); + + } + } + return SDR; + } + + + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + return 1; + } + + public static double[] computeBranchSplitMerits(double[][] postSplitDists) { + double[] SDR = new double[postSplitDists.length]; + double N = 0; + + for(int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + N += Ni; + } + for(int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR[i] = (Ni/N)*computeSD(postSplitDists[i]); + } + return SDR; + + } + + +} + http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java new file mode 100644 index 0000000..4e93b03 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java @@ -0,0 +1,103 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.List; + +import com.yahoo.labs.samoa.moa.AbstractMOAObject; + +/** + * AbstractErrorWeightedVote class for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public abstract class AbstractErrorWeightedVote extends AbstractMOAObject implements ErrorWeightedVote{ + /** + * + */ + private static final long serialVersionUID = -7340491298217227675L; + protected List<double[]> votes; + protected List<Double> errors; + protected double[] weights; + + + + public AbstractErrorWeightedVote() { + super(); + votes = new ArrayList<double[]>(); + errors = new ArrayList<Double>(); + } + + public AbstractErrorWeightedVote(AbstractErrorWeightedVote aewv) { + super(); + votes = new ArrayList<double[]>(); + for (double[] vote:aewv.votes) { + double[] v = new double[vote.length]; + for (int i=0; i<vote.length; i++) v[i] = vote[i]; + votes.add(v); + } + errors = new ArrayList<Double>(); + for (Double db:aewv.errors) { + errors.add(db.doubleValue()); + } + if (aewv.weights != null) { + weights = new double[aewv.weights.length]; + for (int i = 0; i<aewv.weights.length; i++) + weights[i] = aewv.weights[i]; + } + } + + + @Override + public void addVote(double [] vote, double error) { + votes.add(vote); + errors.add(error); + } + + @Override + abstract public double[] computeWeightedVote(); + + @Override + public double getWeightedError() + { + double weightedError=0; + if (weights!=null && weights.length==errors.size()) + { + for (int i=0; i<weights.length; ++i) + weightedError+=errors.get(i)*weights[i]; + } + else + weightedError=-1; + return weightedError; + } + + @Override + public double [] getWeights() { + return weights; + } + + @Override + public int getNumberVotes() { + return votes.size(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java new file mode 100644 index 0000000..943dd9d --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java @@ -0,0 +1,81 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.MOAObject; + +/** + * ErrorWeightedVote interface for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public interface ErrorWeightedVote { + + /** + * Adds a vote and the corresponding error for the computation of the weighted vote and respective weighted error. + * + * @param vote a vote returned by a classifier + * @param error the error associated to the vote + */ + public void addVote(double [] vote, double error); + + /** + * Computes the weighted vote. + * Also updates the weights of the votes. + * + * @return the weighted vote + */ + public double [] computeWeightedVote(); + + /** + * Returns the weighted error. + * + * @pre computeWeightedVote() + * @return the weighted error + */ + public double getWeightedError(); + + /** + * Return the weights error. + * + * @pre computeWeightedVote() + * @return the weights + */ + public double [] getWeights(); + + + /** + * The number of votes added so far. + * + * @return the number of votes + */ + public int getNumberVotes(); + + /** + * Creates a copy of the object + * + * @return copy of the object + */ + public MOAObject copy(); + + public ErrorWeightedVote getACopy(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java new file mode 100644 index 0000000..401dc58 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java @@ -0,0 +1,99 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * InverseErrorWeightedVote class for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public class InverseErrorWeightedVote extends AbstractErrorWeightedVote { + + /** + * + */ + private static final double EPS = 0.000000001; //just to prevent divide by 0 in 1/X -> 1/(x+EPS) + private static final long serialVersionUID = 6359349250620616482L; + + public InverseErrorWeightedVote() { + super(); + } + + public InverseErrorWeightedVote(AbstractErrorWeightedVote aewv) { + super(aewv); + } + + @Override + public double[] computeWeightedVote() { + int n=votes.size(); + weights=new double[n]; + double [] weightedVote=null; + if (n>0){ + int d=votes.get(0).length; + weightedVote=new double[d]; + double sumError=0; + //weights are 1/(error+eps) + for (int i=0; i<n; ++i){ + if(errors.get(i)<Double.MAX_VALUE){ + weights[i]=1.0/(errors.get(i)+EPS); + sumError+=weights[i]; + } + else + weights[i]=0; + + } + + if(sumError>0) + for (int i=0; i<n; ++i) + { + //normalize so that weights sum 1 + weights[i]/=sumError; + //compute weighted vote + for(int j=0; j<d; j++) + weightedVote[j]+=votes.get(i)[j]*weights[i]; + } + //Only occurs if all errors=Double.MAX_VALUE + else + { + //compute arithmetic vote + for (int i=0; i<n; ++i) + { + for(int j=0; j<d; j++) + weightedVote[j]+=votes.get(i)[j]/n; + } + } + } + return weightedVote; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + + } + + @Override + public InverseErrorWeightedVote getACopy() { + return new InverseErrorWeightedVote(this); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java new file mode 100644 index 0000000..ce7a74f --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java @@ -0,0 +1,73 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +/** + * UniformWeightedVote class for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public class UniformWeightedVote extends AbstractErrorWeightedVote { + + + private static final long serialVersionUID = 6359349250620616482L; + + public UniformWeightedVote() { + super(); + } + + public UniformWeightedVote(AbstractErrorWeightedVote aewv) { + super(aewv); + } + + @Override + public double[] computeWeightedVote() { + int n=votes.size(); + weights=new double[n]; + double [] weightedVote=null; + if (n>0){ + int d=votes.get(0).length; + weightedVote=new double[d]; + for (int i=0; i<n; i++) + { + weights[i]=1.0/n; + for(int j=0; j<d; j++) + weightedVote[j]+=(votes.get(i)[j]*weights[i]); + } + + } + return weightedVote; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + + } + + @Override + public UniformWeightedVote getACopy() { + return new UniformWeightedVote(this); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java new file mode 100644 index 0000000..133c755 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java @@ -0,0 +1,83 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Page-Hinkley Test with more weight for recent instances. + * + */ + +public class PageHinkleyFading extends PageHinkleyTest { + /** + * + */ + private static final long serialVersionUID = 7110953184708812339L; + private double fadingFactor=0.99; + + public PageHinkleyFading() { + super(); + } + + public PageHinkleyFading(double threshold, double alpha) { + super(threshold, alpha); + } + protected double instancesSeen; + + @Override + public void reset() { + + super.reset(); + this.instancesSeen=0; + + } + + @Override + public boolean update(double error) { + this.instancesSeen=1+fadingFactor*this.instancesSeen; + double absolutError = Math.abs(error); + + this.sumAbsolutError = fadingFactor*this.sumAbsolutError + absolutError; + if (this.instancesSeen > 30) { + double mT = absolutError - (this.sumAbsolutError / this.instancesSeen) - this.alpha; + this.cumulativeSum = this.cumulativeSum + mT; // Update the cumulative mT sum + if (this.cumulativeSum < this.minimumValue) { // Update the minimum mT value if the new mT is smaller than the current minimum + this.minimumValue = this.cumulativeSum; + } + return (((this.cumulativeSum - this.minimumValue) > this.threshold)); + } + return false; + } + + @Override + public PageHinkleyTest getACopy() { + PageHinkleyFading newTest = new PageHinkleyFading(this.threshold, this.alpha); + this.copyFields(newTest); + return newTest; + } + + @Override + protected void copyFields(PageHinkleyTest newTest) { + super.copyFields(newTest); + PageHinkleyFading newFading = (PageHinkleyFading) newTest; + newFading.fadingFactor = this.fadingFactor; + newFading.instancesSeen = this.instancesSeen; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java new file mode 100644 index 0000000..c313224 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java @@ -0,0 +1,96 @@ +package com.yahoo.labs.samoa.moa.classifiers.rules.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 - 2014 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +/** + * Page-Hinkley Test with equal weights for all instances. + * + */ +public class PageHinkleyTest implements Serializable { + + private static final long serialVersionUID = 1L; + protected double cumulativeSum; + + public double getCumulativeSum() { + return cumulativeSum; + } + + public double getMinimumValue() { + return minimumValue; + } + + + protected double minimumValue; + protected double sumAbsolutError; + protected long phinstancesSeen; + protected double threshold; + protected double alpha; + + public PageHinkleyTest() { + this(0,0); + } + + public PageHinkleyTest(double threshold, double alpha) { + this.threshold = threshold; + this.alpha = alpha; + this.reset(); + } + + public void reset() { + this.cumulativeSum = 0.0; + this.minimumValue = Double.MAX_VALUE; + this.sumAbsolutError = 0.0; + this.phinstancesSeen = 0; + } + + //Compute Page-Hinkley test + public boolean update(double error) { + + this.phinstancesSeen++; + double absolutError = Math.abs(error); + this.sumAbsolutError = this.sumAbsolutError + absolutError; + if (this.phinstancesSeen > 30) { + double mT = absolutError - (this.sumAbsolutError / this.phinstancesSeen) - this.alpha; + this.cumulativeSum = this.cumulativeSum + mT; // Update the cumulative mT sum + if (this.cumulativeSum < this.minimumValue) { // Update the minimum mT value if the new mT is smaller than the current minimum + this.minimumValue = this.cumulativeSum; + } + return (((this.cumulativeSum - this.minimumValue) > this.threshold)); + } + return false; + } + + public PageHinkleyTest getACopy() { + PageHinkleyTest newTest = new PageHinkleyTest(this.threshold, this.alpha); + this.copyFields(newTest); + return newTest; + } + + protected void copyFields(PageHinkleyTest newTest) { + newTest.cumulativeSum = this.cumulativeSum; + newTest.minimumValue = this.minimumValue; + newTest.sumAbsolutError = this.sumAbsolutError; + newTest.phinstancesSeen = this.phinstancesSeen; + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java new file mode 100644 index 0000000..47e1379 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java @@ -0,0 +1,173 @@ + +package com.yahoo.labs.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import java.util.Arrays; +import com.yahoo.labs.samoa.instances.Instance; + +/* micro cluster, as defined by Aggarwal et al, On Clustering Massive Data Streams: A Summarization Praradigm + * in the book Data streams : models and algorithms, by Charu C Aggarwal + * @article{ + title = {Data Streams: Models and Algorithms}, + author = {Aggarwal, Charu C.}, + year = {2007}, + publisher = {Springer Science+Business Media, LLC}, + url = {http://ebooks.ulb.tu-darmstadt.de/11157/}, + institution = {eBooks [http://ebooks.ulb.tu-darmstadt.de/perl/oai2] (Germany)}, +} + +DEFINITION A micro-clusterfor a set of d-dimensionalpoints Xi,. .Xi, +with t i m e s t a m p s ~. . .T,, is the (2-d+3)tuple (CF2", CFlX CF2t, CFlt, n), +wherein CF2" and CFlX each correspond to a vector of d entries. The definition of each of these entries is as follows: + +o For each dimension, the sum of the squares of the data values is maintained +in CF2". Thus, CF2" contains d values. The p-th entry of CF2" is equal to +\sum_j=1^n(x_i_j)^2 + +o For each dimension, the sum of the data values is maintained in C F l X . +Thus, CFIX contains d values. The p-th entry of CFIX is equal to +\sum_j=1^n x_i_j + +o The sum of the squares of the time stamps Ti,. .Tin maintained in CF2t + +o The sum of the time stamps Ti, . . .Tin maintained in CFlt. + +o The number of data points is maintained in n. + + */ +public abstract class CFCluster extends SphereCluster { + + private static final long serialVersionUID = 1L; + + protected double radiusFactor = 1.8; + + /** + * Number of points in the cluster. + */ + protected double N; + /** + * Linear sum of all the points added to the cluster. + */ + public double[] LS; + /** + * Squared sum of all the points added to the cluster. + */ + public double[] SS; + + /** + * Instantiates an empty kernel with the given dimensionality. + * @param dimensions The number of dimensions of the points that can be in + * this kernel. + */ + public CFCluster(Instance instance, int dimensions) { + this(instance.toDoubleArray(), dimensions); + } + + protected CFCluster(int dimensions) { + this.N = 0; + this.LS = new double[dimensions]; + this.SS = new double[dimensions]; + Arrays.fill(this.LS, 0.0); + Arrays.fill(this.SS, 0.0); + } + + public CFCluster(double [] center, int dimensions) { + this.N = 1; + this.LS = center; + this.SS = new double[dimensions]; + for (int i = 0; i < SS.length; i++) { + SS[i]=Math.pow(center[i], 2); + } + } + + public CFCluster(CFCluster cluster) { + this.N = cluster.N; + this.LS = Arrays.copyOf(cluster.LS, cluster.LS.length); + this.SS = Arrays.copyOf(cluster.SS, cluster.SS.length); + } + + public void add(CFCluster cluster ) { + this.N += cluster.N; + addVectors( this.LS, cluster.LS ); + addVectors( this.SS, cluster.SS ); + } + + public abstract CFCluster getCF(); + + /** + * @return this kernels' center + */ + @Override + public double[] getCenter() { + assert (this.N>0); + double res[] = new double[this.LS.length]; + for ( int i = 0; i < res.length; i++ ) { + res[i] = this.LS[i] / N; + } + return res; + } + + + @Override + public abstract double getInclusionProbability(Instance instance); + + /** + * See interface <code>Cluster</code> + * @return The radius of the cluster. + */ + @Override + public abstract double getRadius(); + + /** + * See interface <code>Cluster</code> + * @return The weight. + * @see Cluster#getWeight() + */ + @Override + public double getWeight() { + return N; + } + + public void setN(double N){ + this.N = N; + } + + public double getN() { + return N; + } + + /** + * Adds the second array to the first array element by element. The arrays + * must have the same length. + * @param a1 Vector to which the second vector is added. + * @param a2 Vector to be added. This vector does not change. + */ + public static void addVectors(double[] a1, double[] a2) { + assert (a1 != null); + assert (a2 != null); + assert (a1.length == a2.length) : "Adding two arrays of different " + + "length"; + + for (int i = 0; i < a1.length; i++) { + a1[i] += a2[i]; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java new file mode 100644 index 0000000..a9380a8 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java @@ -0,0 +1,172 @@ + +package com.yahoo.labs.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.moa.AbstractMOAObject; + +public abstract class Cluster extends AbstractMOAObject { + + private static final long serialVersionUID = 1L; + + private double id = -1; + private double gtLabel = -1; + + private Map<String, String> measure_values; + + + public Cluster(){ + this.measure_values = new HashMap<>(); + } + /** + * @return the center of the cluster + */ + public abstract double[] getCenter(); + + /** + * Returns the weight of this cluster, not neccessarily normalized. + * It could, for instance, simply return the number of points contined + * in this cluster. + * @return the weight + */ + public abstract double getWeight(); + + /** + * Returns the probability of the given point belonging to + * this cluster. + * + * @param instance + * @return a value between 0 and 1 + */ + public abstract double getInclusionProbability(Instance instance); + + + //TODO: for non sphere cluster sample points, find out MIN MAX neighbours within cluster + //and return the relative distance + //public abstract double getRelativeHullDistance(Instance instance); + + @Override + public void getDescription(StringBuilder sb, int i) { + sb.append("Cluster Object"); + } + + public void setId(double id) { + this.id = id; + } + + public double getId() { + return id; + } + + public boolean isGroundTruth(){ + return gtLabel != -1; + } + + public void setGroundTruth(double truth){ + gtLabel = truth; + } + + public double getGroundTruth(){ + return gtLabel; + } + + + /** + * Samples this cluster by returning a point from inside it. + * @param random a random number source + * @return an Instance that lies inside this cluster + */ + public abstract Instance sample(Random random); + + + public void setMeasureValue(String measureKey, String value){ + measure_values.put(measureKey, value); + } + + public void setMeasureValue(String measureKey, double value){ + measure_values.put(measureKey, Double.toString(value)); + } + + + public String getMeasureValue(String measureKey){ + if(measure_values.containsKey(measureKey)) + return measure_values.get(measureKey); + else + return ""; + } + + + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue){ + infoTitle.add("ClusterID"); + infoValue.add(Integer.toString((int)getId())); + + infoTitle.add("Type"); + infoValue.add(getClass().getSimpleName()); + + double c[] = getCenter(); + if(c!=null) + for (int i = 0; i < c.length; i++) { + infoTitle.add("Dim"+i); + infoValue.add(Double.toString(c[i])); + } + + infoTitle.add("Weight"); + infoValue.add(Double.toString(getWeight())); + + } + + public String getInfo() { + List<String> infoTitle = new ArrayList<>(); + List<String> infoValue = new ArrayList<>(); + getClusterSpecificInfo(infoTitle, infoValue); + + StringBuilder sb = new StringBuilder(); + + //Cluster properties + sb.append("<html>"); + sb.append("<table>"); + int i = 0; + while(i < infoTitle.size() && i < infoValue.size()){ + sb.append("<tr><td>"+infoTitle.get(i)+"</td><td>"+infoValue.get(i)+"</td></tr>"); + i++; + } + sb.append("</table>"); + + //Evaluation info + sb.append("<br>"); + sb.append("<b>Evaluation</b><br>"); + sb.append("<table>"); + for (Object o : measure_values.entrySet()) { + Map.Entry e = (Map.Entry) o; + sb.append("<tr><td>" + e.getKey() + "</td><td>" + e.getValue() + "</td></tr>"); + } + sb.append("</table>"); + sb.append("</html>"); + return sb.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java new file mode 100644 index 0000000..5f76ba5 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java @@ -0,0 +1,276 @@ +package com.yahoo.labs.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import com.yahoo.labs.samoa.moa.AbstractMOAObject; +import com.yahoo.labs.samoa.moa.core.AutoExpandVector; +import com.yahoo.labs.samoa.moa.core.DataPoint; +import com.yahoo.labs.samoa.instances.Attribute; +import com.yahoo.labs.samoa.instances.Instance; + +public class Clustering extends AbstractMOAObject{ + + private AutoExpandVector<Cluster> clusters; + + public Clustering() { + this.clusters = new AutoExpandVector<Cluster>(); + } + + public Clustering( Cluster[] clusters ) { + this.clusters = new AutoExpandVector<Cluster>(); + for (int i = 0; i < clusters.length; i++) { + this.clusters.add(clusters[i]); + } + } + + public Clustering(List<? extends Instance> points){ + HashMap<Integer, Integer> labelMap = classValues(points); + int dim = points.get(0).dataset().numAttributes()-1; + + int numClasses = labelMap.size(); + int noiseLabel; + + Attribute classLabel = points.get(0).dataset().classAttribute(); + int lastLabelIndex = classLabel.numValues() - 1; + if (classLabel.value(lastLabelIndex) == "noise") { + noiseLabel = lastLabelIndex; + } else { + noiseLabel = -1; + } + + ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses]; + for (int i = 0; i < numClasses; i++) { + sorted_points[i] = new ArrayList<Instance>(); + } + for (Instance point : points) { + int clusterid = (int)point.classValue(); + if(clusterid == noiseLabel) continue; + sorted_points[labelMap.get(clusterid)].add((Instance)point); + } + this.clusters = new AutoExpandVector<Cluster>(); + for (int i = 0; i < numClasses; i++) { + if(sorted_points[i].size()>0){ + SphereCluster s = new SphereCluster(sorted_points[i],dim); + s.setId(sorted_points[i].get(0).classValue()); + s.setGroundTruth(sorted_points[i].get(0).classValue()); + clusters.add(s); + } + } + } + + public Clustering(ArrayList<DataPoint> points, double overlapThreshold, int initMinPoints){ + HashMap<Integer, Integer> labelMap = Clustering.classValues(points); + int dim = points.get(0).dataset().numAttributes()-1; + + int numClasses = labelMap.size(); + int num = 0; + + ArrayList<DataPoint>[] sorted_points = (ArrayList<DataPoint>[]) new ArrayList[numClasses]; + for (int i = 0; i < numClasses; i++) { + sorted_points[i] = new ArrayList<DataPoint>(); + } + for (DataPoint point : points) { + int clusterid = (int)point.classValue(); + if(clusterid == -1) continue; + sorted_points[labelMap.get(clusterid)].add(point); + num++; + } + + clusters = new AutoExpandVector<Cluster>(); + int microID = 0; + for (int i = 0; i < numClasses; i++) { + ArrayList<SphereCluster> microByClass = new ArrayList<SphereCluster>(); + ArrayList<DataPoint> pointInCluster = new ArrayList<DataPoint>(); + ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList(); + + pointInCluster.addAll(sorted_points[i]); + while(pointInCluster.size()>0){ + ArrayList<Instance> micro_points = new ArrayList<Instance>(); + for (int j = 0; j < initMinPoints && !pointInCluster.isEmpty(); j++) { + micro_points.add((Instance)pointInCluster.get(0)); + pointInCluster.remove(0); + } + if(micro_points.size() > 0){ + SphereCluster s = new SphereCluster(micro_points,dim); + for (int c = 0; c < microByClass.size(); c++) { + if(((SphereCluster)microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold ){ + micro_points.addAll(pointInMicroClusters.get(c)); + s = new SphereCluster(micro_points,dim); + pointInMicroClusters.remove(c); + microByClass.remove(c); + //System.out.println("Removing redundant cluster based on radius overlap"+c); + } + } + for (int j = 0; j < pointInCluster.size(); j++) { + Instance instance = pointInCluster.get(j); + if(s.getInclusionProbability(instance) > 0.8){ + pointInCluster.remove(j); + micro_points.add(instance); + } + } + s.setWeight(micro_points.size()); + microByClass.add(s); + pointInMicroClusters.add(micro_points); + microID++; + } + } + // + boolean changed = true; + while(changed){ + changed = false; + for(int c = 0; c < microByClass.size(); c++) { + for(int c1 = c+1; c1 < microByClass.size(); c1++) { + double overlap = microByClass.get(c).overlapRadiusDegree(microByClass.get(c1)); +// System.out.println("Overlap C"+(clustering.size()+c)+" ->C"+(clustering.size()+c1)+": "+overlap); + if(overlap > overlapThreshold){ + pointInMicroClusters.get(c).addAll(pointInMicroClusters.get(c1)); + SphereCluster s = new SphereCluster(pointInMicroClusters.get(c),dim); + microByClass.set(c, s); + pointInMicroClusters.remove(c1); + microByClass.remove(c1); + changed = true; + break; + } + } + } + } + for (int j = 0; j < microByClass.size(); j++) { + microByClass.get(j).setGroundTruth(sorted_points[i].get(0).classValue()); + clusters.add(microByClass.get(j)); + } + + } + for (int j = 0; j < clusters.size(); j++) { + clusters.get(j).setId(j); + } + + } + + /** + * @param points + * @return an array with the min and max class label value + */ + public static HashMap<Integer, Integer> classValues(List<? extends Instance> points){ + HashMap<Integer,Integer> classes = new HashMap<Integer, Integer>(); + int workcluster = 0; + boolean hasnoise = false; + for (int i = 0; i < points.size(); i++) { + int label = (int) points.get(i).classValue(); + if(label == -1){ + hasnoise = true; + } + else{ + if(!classes.containsKey(label)){ + classes.put(label,workcluster); + workcluster++; + } + } + } + if(hasnoise) + classes.put(-1,workcluster); + return classes; + } + + public Clustering(AutoExpandVector<Cluster> clusters) { + this.clusters = clusters; + } + + + /** + * add a cluster to the clustering + */ + public void add(Cluster cluster){ + clusters.add(cluster); + } + + /** + * remove a cluster from the clustering + */ + public void remove(int index){ + if(index < clusters.size()){ + clusters.remove(index); + } + } + + /** + * remove a cluster from the clustering + */ + public Cluster get(int index){ + if(index < clusters.size()){ + return clusters.get(index); + } + return null; + } + + /** + * @return the <code>Clustering</code> as an AutoExpandVector + */ + public AutoExpandVector<Cluster> getClustering() { + return clusters; + } + + /** + * @return A deepcopy of the <code>Clustering</code> as an AutoExpandVector + */ + public AutoExpandVector<Cluster> getClusteringCopy() { + return (AutoExpandVector<Cluster>)clusters.copy(); + } + + + /** + * @return the number of clusters + */ + public int size() { + return clusters.size(); + } + + /** + * @return the number of dimensions of this clustering + */ + public int dimension() { + assert (clusters.size() != 0); + return clusters.get(0).getCenter().length; + } + + @Override + public void getDescription(StringBuilder sb, int i) { + sb.append("Clustering Object"); + } + + + + public double getMaxInclusionProbability(Instance point) { + double maxInclusion = 0.0; + for (int i = 0; i < clusters.size(); i++) { + maxInclusion = Math.max(clusters.get(i).getInclusionProbability(point), + maxInclusion); + } + return maxInclusion; + } + + + + + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java new file mode 100644 index 0000000..45c947e --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java @@ -0,0 +1,84 @@ +package com.yahoo.labs.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.dreizak.miniball.model.ArrayPointSet; +import com.dreizak.miniball.model.PointSet; +import java.util.ArrayList; +import java.util.List; + +public class Miniball { + + private int dimension; + private com.dreizak.miniball.highdim.Miniball mb; + private PointStorage pointSet; + + public Miniball(int dimension) { + this.dimension = dimension; + } + + void clear() { + this.pointSet = new PointStorage(this.dimension); + } + + void check_in(double[] array) { + this.pointSet.add(array); + } + + double[] center() { + return this.mb.center(); + } + + double radius() { + return this.mb.radius(); + } + + void build() { + this.mb = new com.dreizak.miniball.highdim.Miniball(this.pointSet); + } + + public class PointStorage implements PointSet { + + protected int dimension; + protected List<double[]> L; + + public PointStorage(int dimension) { + this.dimension = dimension; + this.L = new ArrayList<double[]>(); + } + + public void add(double[] array) { + this.L.add(array); + } + + public int size() { + return L.size(); + } + + public int dimension() { + return dimension; + } + + public double coord(int point, int coordinate) { + return L.get(point)[coordinate]; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java new file mode 100644 index 0000000..558b0f1 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java @@ -0,0 +1,373 @@ + +package com.yahoo.labs.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.DenseInstance; +import com.yahoo.labs.samoa.instances.Instance; +import java.util.List; +import java.util.Random; + +/** + * A simple implementation of the <code>Cluster</code> interface representing + * spherical clusters. The inclusion probability is one inside the sphere and zero + * everywhere else. + * + */ +public class SphereCluster extends Cluster { + + private static final long serialVersionUID = 1L; + + private double[] center; + private double radius; + private double weight; + + + public SphereCluster(double[] center, double radius) { + this( center, radius, 1.0 ); + } + + public SphereCluster() { + } + + public SphereCluster( double[] center, double radius, double weightedSize) { + this(); + this.center = center; + this.radius = radius; + this.weight = weightedSize; + } + + public SphereCluster(int dimensions, double radius, Random random) { + this(); + this.center = new double[dimensions]; + this.radius = radius; + + // Position randomly but keep hypersphere inside the boundaries + double interval = 1.0 - 2 * radius; + for (int i = 0; i < center.length; i++) { + this.center[i] = (random.nextDouble() * interval) + radius; + } + this.weight = 0.0; + } + + + public SphereCluster(List<?extends Instance> instances, int dimension){ + this(); + if(instances == null || instances.size() <= 0) + return; + + weight = instances.size(); + + Miniball mb = new Miniball(dimension); + mb.clear(); + + for (Instance instance : instances) { + mb.check_in(instance.toDoubleArray()); + } + + mb.build(); + center = mb.center(); + radius = mb.radius(); + mb.clear(); + } + + + /** + * Checks whether two <code>SphereCluster</code> overlap based on radius + * NOTE: overlapRadiusDegree only calculates the overlap based + * on the centers and the radi, so not the real overlap + * + * TODO: should we do this by MC to get the real overlap??? + * + * @param other + * @return + */ + + public double overlapRadiusDegree(SphereCluster other) { + + + double[] center0 = getCenter(); + double radius0 = getRadius(); + + double[] center1 = other.getCenter(); + double radius1 = other.getRadius(); + + double radiusBig; + double radiusSmall; + if(radius0 < radius1){ + radiusBig = radius1; + radiusSmall = radius0; + } + else{ + radiusBig = radius0; + radiusSmall = radius1; + } + + double dist = 0; + for (int i = 0; i < center0.length; i++) { + double delta = center0[i] - center1[i]; + dist += delta * delta; + } + dist = Math.sqrt(dist); + + if(dist > radiusSmall + radiusBig) + return 0; + if(dist + radiusSmall <= radiusBig){ + //one lies within the other + return 1; + } + else{ + return (radiusSmall+radiusBig-dist)/(2*radiusSmall); + } + } + + public void combine(SphereCluster cluster) { + double[] center = getCenter(); + double[] newcenter = new double[center.length]; + double[] other_center = cluster.getCenter(); + double other_weight = cluster.getWeight(); + double other_radius = cluster.getRadius(); + + for (int i = 0; i < center.length; i++) { + newcenter[i] = (center[i]*getWeight()+other_center[i]*other_weight)/(getWeight()+other_weight); + } + + center = newcenter; + double r_0 = getRadius() + Math.abs(distance(center, newcenter)); + double r_1 = other_radius + Math.abs(distance(other_center, newcenter)); + radius = Math.max(r_0, r_1); + weight+= other_weight; + } + + public void merge(SphereCluster cluster) { + double[] c0 = getCenter(); + double w0 = getWeight(); + double r0 = getRadius(); + + double[] c1 = cluster.getCenter(); + double w1 = cluster.getWeight(); + double r1 = cluster.getRadius(); + + //vector + double[] v = new double[c0.length]; + //center distance + double d = 0; + + for (int i = 0; i < c0.length; i++) { + v[i] = c0[i] - c1[i]; + d += v[i] * v[i]; + } + d = Math.sqrt(d); + + + + double r; + double[] c = new double[c0.length]; + + //one lays within the others + if(d + r0 <= r1 || d + r1 <= r0){ + if(d + r0 <= r1){ + r = r1; + c = c1; + } + else{ + r = r0; + c = c0; + } + } + else{ + r = (r0 + r1 + d)/2.0; + for (int i = 0; i < c.length; i++) { + c[i] = c1[i] - v[i]/d * (r1-r); + } + } + + setCenter(c); + setRadius(r); + setWeight(w0+w1); + + } + + @Override + public double[] getCenter() { + double[] copy = new double[center.length]; + System.arraycopy(center, 0, copy, 0, center.length); + return copy; + } + + public void setCenter(double[] center) { + this.center = center; + } + + public double getRadius() { + return radius; + } + + public void setRadius( double radius ) { + this.radius = radius; + } + + @Override + public double getWeight() { + return weight; + } + + public void setWeight( double weight ) { + this.weight = weight; + } + + @Override + public double getInclusionProbability(Instance instance) { + if (getCenterDistance(instance) <= getRadius()) { + return 1.0; + } + return 0.0; + } + + public double getCenterDistance(Instance instance) { + double distance = 0.0; + //get the center through getCenter so subclass have a chance + double[] center = getCenter(); + for (int i = 0; i < center.length; i++) { + double d = center[i] - instance.value(i); + distance += d * d; + } + return Math.sqrt(distance); + } + + public double getCenterDistance(SphereCluster other) { + return distance(getCenter(), other.getCenter()); + } + + /* + * the minimal distance between the surface of two clusters. + * is negative if the two clusters overlap + */ + public double getHullDistance(SphereCluster other) { + double distance; + //get the center through getCenter so subclass have a chance + double[] center0 = getCenter(); + double[] center1 = other.getCenter(); + distance = distance(center0, center1); + + distance = distance - getRadius() - other.getRadius(); + return distance; + } + + /* + */ + /** + * When a clusters looses points the new minimal bounding sphere can be + * partly outside of the originating cluster. If a another cluster is + * right next to the original cluster (without overlapping), the new + * cluster can be overlapping with this second cluster. OverlapSave + * will tell you if the current cluster can degenerate so much that it + * overlaps with cluster 'other' + * + * @param other the potentially overlapping cluster + * @return true if cluster can potentially overlap + */ + public boolean overlapSave(SphereCluster other){ + //use basic geometry to figure out the maximal degenerated cluster + //comes down to Max(radius *(sin alpha + cos alpha)) which is + double minDist = Math.sqrt(2)*(getRadius() + other.getRadius()); + double diff = getCenterDistance(other) - minDist; + + return diff > 0; + } + + private double distance(double[] v1, double[] v2){ + double distance = 0.0; + double[] center = getCenter(); + for (int i = 0; i < center.length; i++) { + double d = v1[i] - v2[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + public double[] getDistanceVector(Instance instance){ + return distanceVector(getCenter(), instance.toDoubleArray()); + } + + public double[] getDistanceVector(SphereCluster other){ + return distanceVector(getCenter(), other.getCenter()); + } + + private double[] distanceVector(double[] v1, double[] v2){ + double[] v = new double[v1.length]; + for (int i = 0; i < v1.length; i++) { + v[i] = v2[i] - v1[i]; + } + return v; + } + + + /** + * Samples this cluster by returning a point from inside it. + * @param random a random number source + * @return a point that lies inside this cluster + */ + public Instance sample(Random random) { + // Create sample in hypersphere coordinates + //get the center through getCenter so subclass have a chance + double[] center = getCenter(); + + final int dimensions = center.length; + + final double sin[] = new double[dimensions - 1]; + final double cos[] = new double[dimensions - 1]; + final double length = random.nextDouble() * getRadius(); + + double lastValue = 1.0; + for (int i = 0; i < dimensions-1; i++) { + double angle = random.nextDouble() * 2 * Math.PI; + sin[i] = lastValue * Math.sin( angle ); // Store cumulative values + cos[i] = Math.cos( angle ); + lastValue = sin[i]; + } + + // Calculate cartesian coordinates + double res[] = new double[dimensions]; + + // First value uses only cosines + res[0] = center[0] + length*cos[0]; + + // Loop through 'middle' coordinates which use cosines and sines + for (int i = 1; i < dimensions-1; i++) { + res[i] = center[i] + length*sin[i-1]*cos[i]; + } + + // Last value uses only sines + res[dimensions-1] = center[dimensions-1] + length*sin[dimensions-2]; + + return new DenseInstance(1.0, res); + } + + @Override + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { + super.getClusterSpecificInfo(infoTitle, infoValue); + infoTitle.add("Radius"); + infoValue.add(Double.toString(getRadius())); + } + + +}
