http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java deleted file mode 100644 index c761808..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/* - * NumericAttributeBinaryRulePredicate.java - * Copyright (C) 2013 University of Porto, Portugal - * @author E. Almeida, A. Carvalho, J. Gama - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - */ -package com.yahoo.labs.samoa.moa.classifiers.rules.core.conditionaltests; - -import com.yahoo.labs.samoa.instances.Instance; -import com.yahoo.labs.samoa.instances.InstancesHeader; -import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalBinaryTest; -import com.yahoo.labs.samoa.moa.classifiers.rules.core.Predicate; - -/** - * Numeric binary conditional test for instances to use to split nodes in AMRules. - * - * @version $Revision: 1 $ - */ -public class NumericAttributeBinaryRulePredicate extends InstanceConditionalBinaryTest implements Predicate { - - private static final long serialVersionUID = 1L; - - protected int attIndex; - - protected double attValue; - - protected int operator; // 0 =, 1<=, 2> - - public NumericAttributeBinaryRulePredicate() { - this(0, 0, 0); - } - - public NumericAttributeBinaryRulePredicate(int attIndex, double attValue, - int operator) { - this.attIndex = attIndex; - this.attValue = attValue; - this.operator = operator; - } - - public NumericAttributeBinaryRulePredicate(NumericAttributeBinaryRulePredicate oldTest) { - this(oldTest.attIndex, oldTest.attValue, oldTest.operator); - } - - @Override - public int branchForInstance(Instance inst) { - int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex - : this.attIndex + 1; - if (inst.isMissing(instAttIndex)) { - return -1; - } - double v = inst.value(instAttIndex); - int ret = 0; - switch (this.operator) { - case 0: - ret = (v == this.attValue) ? 0 : 1; - break; - case 1: - ret = (v <= this.attValue) ? 0 : 1; - break; - case 2: - ret = (v > this.attValue) ? 0 : 1; - } - return ret; - } - - /** - * - */ - @Override - public String describeConditionForBranch(int branch, InstancesHeader context) { - if ((branch >= 0) && (branch <= 2)) { - String compareChar = (branch == 0) ? "=" : (branch == 1) ? "<=" : ">"; - return InstancesHeader.getAttributeNameString(context, - this.attIndex) - + ' ' - + compareChar - + InstancesHeader.getNumericValueString(context, - this.attIndex, this.attValue); - } - throw new IndexOutOfBoundsException(); - } - - /** - * - */ - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } - - @Override - public int[] getAttsTestDependsOn() { - return new int[] { this.attIndex }; - } - - public double getSplitValue() { - return this.attValue; - } - - @Override - public boolean evaluate(Instance inst) { - return (branchForInstance(inst) == 0); - } - - @Override - public String toString() { - if ((operator >= 0) && (operator <= 2)) { - String compareChar = (operator == 0) ? "=" : (operator == 1) ? "<=" : ">"; - // int equalsBranch = this.equalsPassesTest ? 0 : 1; - return "x" + this.attIndex - + ' ' - + compareChar - + ' ' - + this.attValue; - } - throw new IndexOutOfBoundsException(); - } - - public boolean isEqual(NumericAttributeBinaryRulePredicate predicate) { - return (this.attIndex == predicate.attIndex - && this.attValue == predicate.attValue - && this.operator == predicate.operator); - } - - public boolean isUsingSameAttribute(NumericAttributeBinaryRulePredicate predicate) { - return (this.attIndex == predicate.attIndex - && this.operator == predicate.operator); - } - - public boolean isIncludedInRuleNode( - NumericAttributeBinaryRulePredicate predicate) { - boolean ret; - if (this.operator == 1) { // <= - ret = (predicate.attValue <= this.attValue); - } else { // > - ret = (predicate.attValue > this.attValue); - } - - return ret; - } - - public void setAttributeValue( - NumericAttributeBinaryRulePredicate ruleSplitNodeTest) { - this.attValue = ruleSplitNodeTest.attValue; - - } -}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java deleted file mode 100644 index 6feec6a..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java +++ /dev/null @@ -1,99 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.core.splitcriteria; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/* - * SDRSplitCriterionAMRules.java - * Copyright (C) 2014 University of Porto, Portugal - * @author A. Bifet, J. Duarte, J. Gama - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - */ - -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SDRSplitCriterion; -import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; - -public class SDRSplitCriterionAMRules extends SDRSplitCriterion implements SplitCriterion { - - private static final long serialVersionUID = 1L; - - @Override - public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { - double SDR = 0.0; - double N = preSplitDist[0]; - int count = 0; - - for (int i = 0; i < postSplitDists.length; i++) - { - double Ni = postSplitDists[i][0]; - if (Ni >= 0.05 * preSplitDist[0]) { - count = count + 1; - } - } - if (count == postSplitDists.length) { - SDR = computeSD(preSplitDist); - - for (int i = 0; i < postSplitDists.length; i++) - { - double Ni = postSplitDists[i][0]; - SDR -= (Ni / N) * computeSD(postSplitDists[i]); - - } - } - return SDR; - } - - @Override - public double getRangeOfMerit(double[] preSplitDist) { - return 1; - } - - public static double[] computeBranchSplitMerits(double[][] postSplitDists) { - double[] SDR = new double[postSplitDists.length]; - double N = 0; - - for (int i = 0; i < postSplitDists.length; i++) - { - double Ni = postSplitDists[i][0]; - N += Ni; - } - for (int i = 0; i < postSplitDists.length; i++) - { - double Ni = postSplitDists[i][0]; - SDR[i] = (Ni / N) * computeSD(postSplitDists[i]); - } - return SDR; - - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java deleted file mode 100644 index a8bfafc..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java +++ /dev/null @@ -1,101 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.List; - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; - -/** - * AbstractErrorWeightedVote class for weighted votes based on estimates of errors. - * - * @author Joao Duarte ([email protected]) - * @version $Revision: 1 $ - */ -public abstract class AbstractErrorWeightedVote extends AbstractMOAObject implements ErrorWeightedVote { - /** - * - */ - private static final long serialVersionUID = -7340491298217227675L; - protected List<double[]> votes; - protected List<Double> errors; - protected double[] weights; - - public AbstractErrorWeightedVote() { - super(); - votes = new ArrayList<double[]>(); - errors = new ArrayList<Double>(); - } - - public AbstractErrorWeightedVote(AbstractErrorWeightedVote aewv) { - super(); - votes = new ArrayList<double[]>(); - for (double[] vote : aewv.votes) { - double[] v = new double[vote.length]; - for (int i = 0; i < vote.length; i++) - v[i] = vote[i]; - votes.add(v); - } - errors = new ArrayList<Double>(); - for (Double db : aewv.errors) { - errors.add(db.doubleValue()); - } - if (aewv.weights != null) { - weights = new double[aewv.weights.length]; - for (int i = 0; i < aewv.weights.length; i++) - weights[i] = aewv.weights[i]; - } - } - - @Override - public void addVote(double[] vote, double error) { - votes.add(vote); - errors.add(error); - } - - @Override - abstract public double[] computeWeightedVote(); - - @Override - public double getWeightedError() - { - double weightedError = 0; - if (weights != null && weights.length == errors.size()) - { - for (int i = 0; i < weights.length; ++i) - weightedError += errors.get(i) * weights[i]; - } - else - weightedError = -1; - return weightedError; - } - - @Override - public double[] getWeights() { - return weights; - } - - @Override - public int getNumberVotes() { - return votes.size(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java deleted file mode 100644 index c905d65..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java +++ /dev/null @@ -1,81 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.MOAObject; - -/** - * ErrorWeightedVote interface for weighted votes based on estimates of errors. - * - * @author Joao Duarte ([email protected]) - * @version $Revision: 1 $ - */ -public interface ErrorWeightedVote { - - /** - * Adds a vote and the corresponding error for the computation of the weighted vote and respective weighted error. - * - * @param vote - * a vote returned by a classifier - * @param error - * the error associated to the vote - */ - public void addVote(double[] vote, double error); - - /** - * Computes the weighted vote. Also updates the weights of the votes. - * - * @return the weighted vote - */ - public double[] computeWeightedVote(); - - /** - * Returns the weighted error. - * - * @pre computeWeightedVote() - * @return the weighted error - */ - public double getWeightedError(); - - /** - * Return the weights error. - * - * @pre computeWeightedVote() - * @return the weights - */ - public double[] getWeights(); - - /** - * The number of votes added so far. - * - * @return the number of votes - */ - public int getNumberVotes(); - - /** - * Creates a copy of the object - * - * @return copy of the object - */ - public MOAObject copy(); - - public ErrorWeightedVote getACopy(); -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java deleted file mode 100644 index 8f9c3b0..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java +++ /dev/null @@ -1,99 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * InverseErrorWeightedVote class for weighted votes based on estimates of errors. - * - * @author Joao Duarte ([email protected]) - * @version $Revision: 1 $ - */ -public class InverseErrorWeightedVote extends AbstractErrorWeightedVote { - - /** - * - */ - private static final double EPS = 0.000000001; // just to prevent divide by 0 in 1/X -> 1/(x+EPS) - private static final long serialVersionUID = 6359349250620616482L; - - public InverseErrorWeightedVote() { - super(); - } - - public InverseErrorWeightedVote(AbstractErrorWeightedVote aewv) { - super(aewv); - } - - @Override - public double[] computeWeightedVote() { - int n = votes.size(); - weights = new double[n]; - double[] weightedVote = null; - if (n > 0) { - int d = votes.get(0).length; - weightedVote = new double[d]; - double sumError = 0; - // weights are 1/(error+eps) - for (int i = 0; i < n; ++i) { - if (errors.get(i) < Double.MAX_VALUE) { - weights[i] = 1.0 / (errors.get(i) + EPS); - sumError += weights[i]; - } - else - weights[i] = 0; - - } - - if (sumError > 0) - for (int i = 0; i < n; ++i) - { - // normalize so that weights sum 1 - weights[i] /= sumError; - // compute weighted vote - for (int j = 0; j < d; j++) - weightedVote[j] += votes.get(i)[j] * weights[i]; - } - // Only occurs if all errors=Double.MAX_VALUE - else - { - // compute arithmetic vote - for (int i = 0; i < n; ++i) - { - for (int j = 0; j < d; j++) - weightedVote[j] += votes.get(i)[j] / n; - } - } - } - return weightedVote; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - - } - - @Override - public InverseErrorWeightedVote getACopy() { - return new InverseErrorWeightedVote(this); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java deleted file mode 100644 index a1cb0c2..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * UniformWeightedVote class for weighted votes based on estimates of errors. - * - * @author Joao Duarte ([email protected]) - * @version $Revision: 1 $ - */ -public class UniformWeightedVote extends AbstractErrorWeightedVote { - - private static final long serialVersionUID = 6359349250620616482L; - - public UniformWeightedVote() { - super(); - } - - public UniformWeightedVote(AbstractErrorWeightedVote aewv) { - super(aewv); - } - - @Override - public double[] computeWeightedVote() { - int n = votes.size(); - weights = new double[n]; - double[] weightedVote = null; - if (n > 0) { - int d = votes.get(0).length; - weightedVote = new double[d]; - for (int i = 0; i < n; i++) - { - weights[i] = 1.0 / n; - for (int j = 0; j < d; j++) - weightedVote[j] += (votes.get(i)[j] * weights[i]); - } - - } - return weightedVote; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - - } - - @Override - public UniformWeightedVote getACopy() { - return new UniformWeightedVote(this); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java deleted file mode 100644 index ab9b498..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.driftdetection; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Page-Hinkley Test with more weight for recent instances. - * - */ - -public class PageHinkleyFading extends PageHinkleyTest { - /** - * - */ - private static final long serialVersionUID = 7110953184708812339L; - private double fadingFactor = 0.99; - - public PageHinkleyFading() { - super(); - } - - public PageHinkleyFading(double threshold, double alpha) { - super(threshold, alpha); - } - - protected double instancesSeen; - - @Override - public void reset() { - - super.reset(); - this.instancesSeen = 0; - - } - - @Override - public boolean update(double error) { - this.instancesSeen = 1 + fadingFactor * this.instancesSeen; - double absolutError = Math.abs(error); - - this.sumAbsolutError = fadingFactor * this.sumAbsolutError + absolutError; - if (this.instancesSeen > 30) { - double mT = absolutError - (this.sumAbsolutError / this.instancesSeen) - this.alpha; - this.cumulativeSum = this.cumulativeSum + mT; // Update the cumulative mT sum - if (this.cumulativeSum < this.minimumValue) { // Update the minimum mT value if the new mT is smaller than the current minimum - this.minimumValue = this.cumulativeSum; - } - return (((this.cumulativeSum - this.minimumValue) > this.threshold)); - } - return false; - } - - @Override - public PageHinkleyTest getACopy() { - PageHinkleyFading newTest = new PageHinkleyFading(this.threshold, this.alpha); - this.copyFields(newTest); - return newTest; - } - - @Override - protected void copyFields(PageHinkleyTest newTest) { - super.copyFields(newTest); - PageHinkleyFading newFading = (PageHinkleyFading) newTest; - newFading.fadingFactor = this.fadingFactor; - newFading.instancesSeen = this.instancesSeen; - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java deleted file mode 100644 index 8037c94..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java +++ /dev/null @@ -1,95 +0,0 @@ -package com.yahoo.labs.samoa.moa.classifiers.rules.driftdetection; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; - -/** - * Page-Hinkley Test with equal weights for all instances. - * - */ -public class PageHinkleyTest implements Serializable { - - private static final long serialVersionUID = 1L; - protected double cumulativeSum; - - public double getCumulativeSum() { - return cumulativeSum; - } - - public double getMinimumValue() { - return minimumValue; - } - - protected double minimumValue; - protected double sumAbsolutError; - protected long phinstancesSeen; - protected double threshold; - protected double alpha; - - public PageHinkleyTest() { - this(0, 0); - } - - public PageHinkleyTest(double threshold, double alpha) { - this.threshold = threshold; - this.alpha = alpha; - this.reset(); - } - - public void reset() { - this.cumulativeSum = 0.0; - this.minimumValue = Double.MAX_VALUE; - this.sumAbsolutError = 0.0; - this.phinstancesSeen = 0; - } - - // Compute Page-Hinkley test - public boolean update(double error) { - - this.phinstancesSeen++; - double absolutError = Math.abs(error); - this.sumAbsolutError = this.sumAbsolutError + absolutError; - if (this.phinstancesSeen > 30) { - double mT = absolutError - (this.sumAbsolutError / this.phinstancesSeen) - this.alpha; - this.cumulativeSum = this.cumulativeSum + mT; // Update the cumulative mT sum - if (this.cumulativeSum < this.minimumValue) { // Update the minimum mT value if the new mT is smaller than the current minimum - this.minimumValue = this.cumulativeSum; - } - return (((this.cumulativeSum - this.minimumValue) > this.threshold)); - } - return false; - } - - public PageHinkleyTest getACopy() { - PageHinkleyTest newTest = new PageHinkleyTest(this.threshold, this.alpha); - this.copyFields(newTest); - return newTest; - } - - protected void copyFields(PageHinkleyTest newTest) { - newTest.cumulativeSum = this.cumulativeSum; - newTest.minimumValue = this.minimumValue; - newTest.sumAbsolutError = this.sumAbsolutError; - newTest.phinstancesSeen = this.phinstancesSeen; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java deleted file mode 100644 index bf7cdd6..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/CFCluster.java +++ /dev/null @@ -1,176 +0,0 @@ -package com.yahoo.labs.samoa.moa.cluster; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ -import java.util.Arrays; -import com.yahoo.labs.samoa.instances.Instance; - -/* micro cluster, as defined by Aggarwal et al, On Clustering Massive Data Streams: A Summarization Praradigm - * in the book Data streams : models and algorithms, by Charu C Aggarwal - * @article{ - title = {Data Streams: Models and Algorithms}, - author = {Aggarwal, Charu C.}, - year = {2007}, - publisher = {Springer Science+Business Media, LLC}, - url = {http://ebooks.ulb.tu-darmstadt.de/11157/}, - institution = {eBooks [http://ebooks.ulb.tu-darmstadt.de/perl/oai2] (Germany)}, - } - - DEFINITION A micro-clusterfor a set of d-dimensionalpoints Xi,. .Xi, - with t i m e s t a m p s ~. . .T,, is the (2-d+3)tuple (CF2", CFlX CF2t, CFlt, n), - wherein CF2" and CFlX each correspond to a vector of d entries. The definition of each of these entries is as follows: - - o For each dimension, the sum of the squares of the data values is maintained - in CF2". Thus, CF2" contains d values. The p-th entry of CF2" is equal to - \sum_j=1^n(x_i_j)^2 - - o For each dimension, the sum of the data values is maintained in C F l X . - Thus, CFIX contains d values. The p-th entry of CFIX is equal to - \sum_j=1^n x_i_j - - o The sum of the squares of the time stamps Ti,. .Tin maintained in CF2t - - o The sum of the time stamps Ti, . . .Tin maintained in CFlt. - - o The number of data points is maintained in n. - - */ -public abstract class CFCluster extends SphereCluster { - - private static final long serialVersionUID = 1L; - - protected double radiusFactor = 1.8; - - /** - * Number of points in the cluster. - */ - protected double N; - /** - * Linear sum of all the points added to the cluster. - */ - public double[] LS; - /** - * Squared sum of all the points added to the cluster. - */ - public double[] SS; - - /** - * Instantiates an empty kernel with the given dimensionality. - * - * @param dimensions - * The number of dimensions of the points that can be in this kernel. - */ - public CFCluster(Instance instance, int dimensions) { - this(instance.toDoubleArray(), dimensions); - } - - protected CFCluster(int dimensions) { - this.N = 0; - this.LS = new double[dimensions]; - this.SS = new double[dimensions]; - Arrays.fill(this.LS, 0.0); - Arrays.fill(this.SS, 0.0); - } - - public CFCluster(double[] center, int dimensions) { - this.N = 1; - this.LS = center; - this.SS = new double[dimensions]; - for (int i = 0; i < SS.length; i++) { - SS[i] = Math.pow(center[i], 2); - } - } - - public CFCluster(CFCluster cluster) { - this.N = cluster.N; - this.LS = Arrays.copyOf(cluster.LS, cluster.LS.length); - this.SS = Arrays.copyOf(cluster.SS, cluster.SS.length); - } - - public void add(CFCluster cluster) { - this.N += cluster.N; - addVectors(this.LS, cluster.LS); - addVectors(this.SS, cluster.SS); - } - - public abstract CFCluster getCF(); - - /** - * @return this kernels' center - */ - @Override - public double[] getCenter() { - assert (this.N > 0); - double res[] = new double[this.LS.length]; - for (int i = 0; i < res.length; i++) { - res[i] = this.LS[i] / N; - } - return res; - } - - @Override - public abstract double getInclusionProbability(Instance instance); - - /** - * See interface <code>Cluster</code> - * - * @return The radius of the cluster. - */ - @Override - public abstract double getRadius(); - - /** - * See interface <code>Cluster</code> - * - * @return The weight. - * @see Cluster#getWeight() - */ - @Override - public double getWeight() { - return N; - } - - public void setN(double N) { - this.N = N; - } - - public double getN() { - return N; - } - - /** - * Adds the second array to the first array element by element. The arrays must have the same length. - * - * @param a1 - * Vector to which the second vector is added. - * @param a2 - * Vector to be added. This vector does not change. - */ - public static void addVectors(double[] a1, double[] a2) { - assert (a1 != null); - assert (a2 != null); - assert (a1.length == a2.length) : "Adding two arrays of different " - + "length"; - - for (int i = 0; i < a1.length; i++) { - a1[i] += a2[i]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java deleted file mode 100644 index 9f77c42..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Cluster.java +++ /dev/null @@ -1,168 +0,0 @@ -package com.yahoo.labs.samoa.moa.cluster; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import com.yahoo.labs.samoa.instances.Instance; -import com.yahoo.labs.samoa.moa.AbstractMOAObject; - -public abstract class Cluster extends AbstractMOAObject { - - private static final long serialVersionUID = 1L; - - private double id = -1; - private double gtLabel = -1; - - private Map<String, String> measure_values; - - public Cluster() { - this.measure_values = new HashMap<>(); - } - - /** - * @return the center of the cluster - */ - public abstract double[] getCenter(); - - /** - * Returns the weight of this cluster, not neccessarily normalized. It could, for instance, simply return the number - * of points contined in this cluster. - * - * @return the weight - */ - public abstract double getWeight(); - - /** - * Returns the probability of the given point belonging to this cluster. - * - * @param instance - * @return a value between 0 and 1 - */ - public abstract double getInclusionProbability(Instance instance); - - // TODO: for non sphere cluster sample points, find out MIN MAX neighbours - // within cluster - // and return the relative distance - // public abstract double getRelativeHullDistance(Instance instance); - - @Override - public void getDescription(StringBuilder sb, int i) { - sb.append("Cluster Object"); - } - - public void setId(double id) { - this.id = id; - } - - public double getId() { - return id; - } - - public boolean isGroundTruth() { - return gtLabel != -1; - } - - public void setGroundTruth(double truth) { - gtLabel = truth; - } - - public double getGroundTruth() { - return gtLabel; - } - - /** - * Samples this cluster by returning a point from inside it. - * - * @param random - * a random number source - * @return an Instance that lies inside this cluster - */ - public abstract Instance sample(Random random); - - public void setMeasureValue(String measureKey, String value) { - measure_values.put(measureKey, value); - } - - public void setMeasureValue(String measureKey, double value) { - measure_values.put(measureKey, Double.toString(value)); - } - - public String getMeasureValue(String measureKey) { - if (measure_values.containsKey(measureKey)) - return measure_values.get(measureKey); - else - return ""; - } - - protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { - infoTitle.add("ClusterID"); - infoValue.add(Integer.toString((int) getId())); - - infoTitle.add("Type"); - infoValue.add(getClass().getSimpleName()); - - double c[] = getCenter(); - if (c != null) - for (int i = 0; i < c.length; i++) { - infoTitle.add("Dim" + i); - infoValue.add(Double.toString(c[i])); - } - - infoTitle.add("Weight"); - infoValue.add(Double.toString(getWeight())); - - } - - public String getInfo() { - List<String> infoTitle = new ArrayList<>(); - List<String> infoValue = new ArrayList<>(); - getClusterSpecificInfo(infoTitle, infoValue); - - StringBuilder sb = new StringBuilder(); - - // Cluster properties - sb.append("<html>"); - sb.append("<table>"); - int i = 0; - while (i < infoTitle.size() && i < infoValue.size()) { - sb.append("<tr><td>" + infoTitle.get(i) + "</td><td>" + infoValue.get(i) + "</td></tr>"); - i++; - } - sb.append("</table>"); - - // Evaluation info - sb.append("<br>"); - sb.append("<b>Evaluation</b><br>"); - sb.append("<table>"); - for (Object o : measure_values.entrySet()) { - Map.Entry e = (Map.Entry) o; - sb.append("<tr><td>" + e.getKey() + "</td><td>" + e.getValue() + "</td></tr>"); - } - sb.append("</table>"); - sb.append("</html>"); - return sb.toString(); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java deleted file mode 100644 index 70a5e79..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java +++ /dev/null @@ -1,269 +0,0 @@ -package com.yahoo.labs.samoa.moa.cluster; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; -import com.yahoo.labs.samoa.moa.core.AutoExpandVector; -import com.yahoo.labs.samoa.moa.core.DataPoint; -import com.yahoo.labs.samoa.instances.Attribute; -import com.yahoo.labs.samoa.instances.Instance; - -public class Clustering extends AbstractMOAObject { - - private AutoExpandVector<Cluster> clusters; - - public Clustering() { - this.clusters = new AutoExpandVector<>(); - } - - public Clustering(Cluster[] clusters) { - this.clusters = new AutoExpandVector<>(); - Collections.addAll(this.clusters, clusters); - } - - public Clustering(List<? extends Instance> points) { - HashMap<Integer, Integer> labelMap = classValues(points); - int dim = points.get(0).dataset().numAttributes() - 1; - - int numClasses = labelMap.size(); - int noiseLabel; - - Attribute classLabel = points.get(0).dataset().classAttribute(); - int lastLabelIndex = classLabel.numValues() - 1; - if ("noise".equalsIgnoreCase(classLabel.value(lastLabelIndex))) { - noiseLabel = lastLabelIndex; - } else { - noiseLabel = -1; - } - - ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses]; - for (int i = 0; i < numClasses; i++) { - sorted_points[i] = new ArrayList<>(); - } - - for (Instance point : points) { - int clusterId = (int) point.classValue(); - if (clusterId != noiseLabel) { - sorted_points[labelMap.get(clusterId)].add(point); - } - } - - this.clusters = new AutoExpandVector<>(); - for (int i = 0; i < numClasses; i++) { - if (sorted_points[i].size() > 0) { - SphereCluster s = new SphereCluster(sorted_points[i], dim); - s.setId(sorted_points[i].get(0).classValue()); - s.setGroundTruth(sorted_points[i].get(0).classValue()); - clusters.add(s); - } - } - } - - public Clustering(ArrayList<DataPoint> points, double overlapThreshold, int initMinPoints) { - HashMap<Integer, Integer> labelMap = Clustering.classValues(points); - int dim = points.get(0).dataset().numAttributes() - 1; - - int numClasses = labelMap.size(); - - ArrayList<DataPoint>[] sorted_points = (ArrayList<DataPoint>[]) new ArrayList[numClasses]; - for (int i = 0; i < numClasses; i++) { - sorted_points[i] = new ArrayList<>(); - } - - for (DataPoint point : points) { - int clusterId = (int) point.classValue(); - if (clusterId != -1) { - sorted_points[labelMap.get(clusterId)].add(point); - } - } - - clusters = new AutoExpandVector<>(); - for (int i = 0; i < numClasses; i++) { - ArrayList<SphereCluster> microByClass = new ArrayList<>(); - ArrayList<DataPoint> pointInCluster = new ArrayList<>(); - ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList<>(); - - pointInCluster.addAll(sorted_points[i]); - while (pointInCluster.size() > 0) { - ArrayList<Instance> micro_points = new ArrayList<>(); - for (int j = 0; j < initMinPoints && !pointInCluster.isEmpty(); j++) { - micro_points.add(pointInCluster.get(0)); - pointInCluster.remove(0); - } - if (micro_points.size() > 0) { - SphereCluster s = new SphereCluster(micro_points, dim); - for (int c = 0; c < microByClass.size(); c++) { - if ((microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold) { - micro_points.addAll(pointInMicroClusters.get(c)); - s = new SphereCluster(micro_points, dim); - pointInMicroClusters.remove(c); - microByClass.remove(c); - } - } - - for (int j = 0; j < pointInCluster.size(); j++) { - Instance instance = pointInCluster.get(j); - if (s.getInclusionProbability(instance) > 0.8) { - pointInCluster.remove(j); - micro_points.add(instance); - } - } - s.setWeight(micro_points.size()); - microByClass.add(s); - pointInMicroClusters.add(micro_points); - } - } - // - boolean changed = true; - while (changed) { - changed = false; - for (int c = 0; c < microByClass.size(); c++) { - for (int c1 = c + 1; c1 < microByClass.size(); c1++) { - double overlap = microByClass.get(c).overlapRadiusDegree(microByClass.get(c1)); - if (overlap > overlapThreshold) { - pointInMicroClusters.get(c).addAll(pointInMicroClusters.get(c1)); - SphereCluster s = new SphereCluster(pointInMicroClusters.get(c), dim); - microByClass.set(c, s); - pointInMicroClusters.remove(c1); - microByClass.remove(c1); - changed = true; - break; - } - } - } - } - - for (SphereCluster microByClas : microByClass) { - microByClas.setGroundTruth(sorted_points[i].get(0).classValue()); - clusters.add(microByClas); - } - } - - for (int j = 0; j < clusters.size(); j++) { - clusters.get(j).setId(j); - } - - } - - /** - * @param points - points to be clustered - * @return an array with the min and max class label value - */ - public static HashMap<Integer, Integer> classValues(List<? extends Instance> points) { - HashMap<Integer, Integer> classes = new HashMap<>(); - int workCluster = 0; - boolean hasNoise = false; - for (Instance point : points) { - int label = (int) point.classValue(); - if (label == -1) { - hasNoise = true; - } else { - if (!classes.containsKey(label)) { - classes.put(label, workCluster); - workCluster++; - } - } - } - - if (hasNoise) { - classes.put(-1, workCluster); - } - return classes; - } - - public Clustering(AutoExpandVector<Cluster> clusters) { - this.clusters = clusters; - } - - /** - * add a cluster to the clustering - */ - public void add(Cluster cluster) { - clusters.add(cluster); - } - - /** - * remove a cluster from the clustering - */ - public void remove(int index) { - if (index < clusters.size()) { - clusters.remove(index); - } - } - - /** - * remove a cluster from the clustering - */ - public Cluster get(int index) { - if (index < clusters.size()) { - return clusters.get(index); - } - return null; - } - - /** - * @return the <code>Clustering</code> as an AutoExpandVector - */ - public AutoExpandVector<Cluster> getClustering() { - return clusters; - } - - /** - * @return A deepcopy of the <code>Clustering</code> as an AutoExpandVector - */ - public AutoExpandVector<Cluster> getClusteringCopy() { - return (AutoExpandVector<Cluster>) clusters.copy(); - } - - /** - * @return the number of clusters - */ - public int size() { - return clusters.size(); - } - - /** - * @return the number of dimensions of this clustering - */ - public int dimension() { - assert (clusters.size() != 0); - return clusters.get(0).getCenter().length; - } - - @Override - public void getDescription(StringBuilder sb, int i) { - sb.append("Clustering Object"); - } - - public double getMaxInclusionProbability(Instance point) { - double maxInclusion = 0.0; - for (Cluster cluster : clusters) { - maxInclusion = Math.max(cluster.getInclusionProbability(point), maxInclusion); - } - return maxInclusion; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java deleted file mode 100644 index 1cfcf52..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.yahoo.labs.samoa.moa.cluster; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.dreizak.miniball.model.ArrayPointSet; -import com.dreizak.miniball.model.PointSet; -import java.util.ArrayList; -import java.util.List; - -public class Miniball { - - private int dimension; - private com.dreizak.miniball.highdim.Miniball mb; - private PointStorage pointSet; - - public Miniball(int dimension) { - this.dimension = dimension; - } - - void clear() { - this.pointSet = new PointStorage(this.dimension); - } - - void check_in(double[] array) { - this.pointSet.add(array); - } - - double[] center() { - return this.mb.center(); - } - - double radius() { - return this.mb.radius(); - } - - void build() { - this.mb = new com.dreizak.miniball.highdim.Miniball(this.pointSet); - } - - public class PointStorage implements PointSet { - - protected int dimension; - protected List<double[]> L; - - public PointStorage(int dimension) { - this.dimension = dimension; - this.L = new ArrayList<double[]>(); - } - - public void add(double[] array) { - this.L.add(array); - } - - public int size() { - return L.size(); - } - - public int dimension() { - return dimension; - } - - public double coord(int point, int coordinate) { - return L.get(point)[coordinate]; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java deleted file mode 100644 index 55c8fdd..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/SphereCluster.java +++ /dev/null @@ -1,363 +0,0 @@ -package com.yahoo.labs.samoa.moa.cluster; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.instances.DenseInstance; -import com.yahoo.labs.samoa.instances.Instance; -import java.util.List; -import java.util.Random; - -/** - * A simple implementation of the <code>Cluster</code> interface representing spherical clusters. The inclusion - * probability is one inside the sphere and zero everywhere else. - * - */ -public class SphereCluster extends Cluster { - - private static final long serialVersionUID = 1L; - - private double[] center; - private double radius; - private double weight; - - public SphereCluster(double[] center, double radius) { - this(center, radius, 1.0); - } - - public SphereCluster() { - } - - public SphereCluster(double[] center, double radius, double weightedSize) { - this(); - this.center = center; - this.radius = radius; - this.weight = weightedSize; - } - - public SphereCluster(int dimensions, double radius, Random random) { - this(); - this.center = new double[dimensions]; - this.radius = radius; - - // Position randomly but keep hypersphere inside the boundaries - double interval = 1.0 - 2 * radius; - for (int i = 0; i < center.length; i++) { - this.center[i] = (random.nextDouble() * interval) + radius; - } - this.weight = 0.0; - } - - public SphereCluster(List<? extends Instance> instances, int dimension) { - this(); - if (instances == null || instances.size() <= 0) - return; - - weight = instances.size(); - - Miniball mb = new Miniball(dimension); - mb.clear(); - - for (Instance instance : instances) { - mb.check_in(instance.toDoubleArray()); - } - - mb.build(); - center = mb.center(); - radius = mb.radius(); - mb.clear(); - } - - /** - * Checks whether two <code>SphereCluster</code> overlap based on radius NOTE: overlapRadiusDegree only calculates the - * overlap based on the centers and the radi, so not the real overlap - * - * TODO: should we do this by MC to get the real overlap??? - * - * @param other - * @return - */ - - public double overlapRadiusDegree(SphereCluster other) { - - double[] center0 = getCenter(); - double radius0 = getRadius(); - - double[] center1 = other.getCenter(); - double radius1 = other.getRadius(); - - double radiusBig; - double radiusSmall; - if (radius0 < radius1) { - radiusBig = radius1; - radiusSmall = radius0; - } - else { - radiusBig = radius0; - radiusSmall = radius1; - } - - double dist = 0; - for (int i = 0; i < center0.length; i++) { - double delta = center0[i] - center1[i]; - dist += delta * delta; - } - dist = Math.sqrt(dist); - - if (dist > radiusSmall + radiusBig) - return 0; - if (dist + radiusSmall <= radiusBig) { - // one lies within the other - return 1; - } - else { - return (radiusSmall + radiusBig - dist) / (2 * radiusSmall); - } - } - - public void combine(SphereCluster cluster) { - double[] center = getCenter(); - double[] newcenter = new double[center.length]; - double[] other_center = cluster.getCenter(); - double other_weight = cluster.getWeight(); - double other_radius = cluster.getRadius(); - - for (int i = 0; i < center.length; i++) { - newcenter[i] = (center[i] * getWeight() + other_center[i] * other_weight) / (getWeight() + other_weight); - } - - center = newcenter; - double r_0 = getRadius() + Math.abs(distance(center, newcenter)); - double r_1 = other_radius + Math.abs(distance(other_center, newcenter)); - radius = Math.max(r_0, r_1); - weight += other_weight; - } - - public void merge(SphereCluster cluster) { - double[] c0 = getCenter(); - double w0 = getWeight(); - double r0 = getRadius(); - - double[] c1 = cluster.getCenter(); - double w1 = cluster.getWeight(); - double r1 = cluster.getRadius(); - - // vector - double[] v = new double[c0.length]; - // center distance - double d = 0; - - for (int i = 0; i < c0.length; i++) { - v[i] = c0[i] - c1[i]; - d += v[i] * v[i]; - } - d = Math.sqrt(d); - - double r; - double[] c = new double[c0.length]; - - // one lays within the others - if (d + r0 <= r1 || d + r1 <= r0) { - if (d + r0 <= r1) { - r = r1; - c = c1; - } - else { - r = r0; - c = c0; - } - } - else { - r = (r0 + r1 + d) / 2.0; - for (int i = 0; i < c.length; i++) { - c[i] = c1[i] - v[i] / d * (r1 - r); - } - } - - setCenter(c); - setRadius(r); - setWeight(w0 + w1); - - } - - @Override - public double[] getCenter() { - double[] copy = new double[center.length]; - System.arraycopy(center, 0, copy, 0, center.length); - return copy; - } - - public void setCenter(double[] center) { - this.center = center; - } - - public double getRadius() { - return radius; - } - - public void setRadius(double radius) { - this.radius = radius; - } - - @Override - public double getWeight() { - return weight; - } - - public void setWeight(double weight) { - this.weight = weight; - } - - @Override - public double getInclusionProbability(Instance instance) { - if (getCenterDistance(instance) <= getRadius()) { - return 1.0; - } - return 0.0; - } - - public double getCenterDistance(Instance instance) { - double distance = 0.0; - // get the center through getCenter so subclass have a chance - double[] center = getCenter(); - for (int i = 0; i < center.length; i++) { - double d = center[i] - instance.value(i); - distance += d * d; - } - return Math.sqrt(distance); - } - - public double getCenterDistance(SphereCluster other) { - return distance(getCenter(), other.getCenter()); - } - - /* - * the minimal distance between the surface of two clusters. is negative if - * the two clusters overlap - */ - public double getHullDistance(SphereCluster other) { - double distance; - // get the center through getCenter so subclass have a chance - double[] center0 = getCenter(); - double[] center1 = other.getCenter(); - distance = distance(center0, center1); - - distance = distance - getRadius() - other.getRadius(); - return distance; - } - - /* - */ - /** - * When a clusters looses points the new minimal bounding sphere can be partly outside of the originating cluster. If - * a another cluster is right next to the original cluster (without overlapping), the new cluster can be overlapping - * with this second cluster. OverlapSave will tell you if the current cluster can degenerate so much that it overlaps - * with cluster 'other' - * - * @param other - * the potentially overlapping cluster - * @return true if cluster can potentially overlap - */ - public boolean overlapSave(SphereCluster other) { - // use basic geometry to figure out the maximal degenerated cluster - // comes down to Max(radius *(sin alpha + cos alpha)) which is - double minDist = Math.sqrt(2) * (getRadius() + other.getRadius()); - double diff = getCenterDistance(other) - minDist; - - return diff > 0; - } - - private double distance(double[] v1, double[] v2) { - double distance = 0.0; - double[] center = getCenter(); - for (int i = 0; i < center.length; i++) { - double d = v1[i] - v2[i]; - distance += d * d; - } - return Math.sqrt(distance); - } - - public double[] getDistanceVector(Instance instance) { - return distanceVector(getCenter(), instance.toDoubleArray()); - } - - public double[] getDistanceVector(SphereCluster other) { - return distanceVector(getCenter(), other.getCenter()); - } - - private double[] distanceVector(double[] v1, double[] v2) { - double[] v = new double[v1.length]; - for (int i = 0; i < v1.length; i++) { - v[i] = v2[i] - v1[i]; - } - return v; - } - - /** - * Samples this cluster by returning a point from inside it. - * - * @param random - * a random number source - * @return a point that lies inside this cluster - */ - public Instance sample(Random random) { - // Create sample in hypersphere coordinates - // get the center through getCenter so subclass have a chance - double[] center = getCenter(); - - final int dimensions = center.length; - - final double sin[] = new double[dimensions - 1]; - final double cos[] = new double[dimensions - 1]; - final double length = random.nextDouble() * getRadius(); - - double lastValue = 1.0; - for (int i = 0; i < dimensions - 1; i++) { - double angle = random.nextDouble() * 2 * Math.PI; - sin[i] = lastValue * Math.sin(angle); // Store cumulative values - cos[i] = Math.cos(angle); - lastValue = sin[i]; - } - - // Calculate cartesian coordinates - double res[] = new double[dimensions]; - - // First value uses only cosines - res[0] = center[0] + length * cos[0]; - - // Loop through 'middle' coordinates which use cosines and sines - for (int i = 1; i < dimensions - 1; i++) { - res[i] = center[i] + length * sin[i - 1] * cos[i]; - } - - // Last value uses only sines - res[dimensions - 1] = center[dimensions - 1] + length * sin[dimensions - 2]; - - return new DenseInstance(1.0, res); - } - - @Override - protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { - super.getClusterSpecificInfo(infoTitle, infoValue); - infoTitle.add("Radius"); - infoValue.add(Double.toString(getRadius())); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java deleted file mode 100644 index 8fb386b..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java +++ /dev/null @@ -1,298 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.LinkedList; -import java.util.List; -import java.util.Random; -import com.yahoo.labs.samoa.moa.cluster.Clustering; - -import com.yahoo.labs.samoa.instances.InstancesHeader; -import com.yahoo.labs.samoa.moa.core.Measurement; -import com.yahoo.labs.samoa.moa.core.ObjectRepository; -import com.yahoo.labs.samoa.moa.core.StringUtils; -import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; -import com.github.javacliparser.FlagOption; -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; -import com.yahoo.labs.samoa.instances.Instance; -import com.yahoo.labs.samoa.instances.Instances; - -public abstract class AbstractClusterer extends AbstractOptionHandler - implements Clusterer { - - @Override - public String getPurposeString() { - return "MOA Clusterer: " + getClass().getCanonicalName(); - } - - protected InstancesHeader modelContext; - - protected double trainingWeightSeenByModel = 0.0; - - protected int randomSeed = 1; - - protected IntOption randomSeedOption; - - public FlagOption evaluateMicroClusteringOption; - - protected Random clustererRandom; - - protected Clustering clustering; - - public AbstractClusterer() { - if (isRandomizable()) { - this.randomSeedOption = new IntOption("randomSeed", 'r', - "Seed for random behaviour of the Clusterer.", 1); - } - - if (implementsMicroClusterer()) { - this.evaluateMicroClusteringOption = - new FlagOption("evaluateMicroClustering", 'M', - "Evaluate the underlying microclustering instead of the macro clustering"); - } - } - - @Override - public void prepareForUseImpl(TaskMonitor monitor, - ObjectRepository repository) { - if (this.randomSeedOption != null) { - this.randomSeed = this.randomSeedOption.getValue(); - } - if (!trainingHasStarted()) { - resetLearning(); - } - clustering = new Clustering(); - } - - public void setModelContext(InstancesHeader ih) { - if ((ih != null) && (ih.classIndex() < 0)) { - throw new IllegalArgumentException( - "Context for a Clusterer must include a class to learn"); - } - if (trainingHasStarted() - && (this.modelContext != null) - && ((ih == null) || !contextIsCompatible(this.modelContext, ih))) { - throw new IllegalArgumentException( - "New context is not compatible with existing model"); - } - this.modelContext = ih; - } - - public InstancesHeader getModelContext() { - return this.modelContext; - } - - public void setRandomSeed(int s) { - this.randomSeed = s; - if (this.randomSeedOption != null) { - // keep option consistent - this.randomSeedOption.setValue(s); - } - } - - public boolean trainingHasStarted() { - return this.trainingWeightSeenByModel > 0.0; - } - - public double trainingWeightSeenByModel() { - return this.trainingWeightSeenByModel; - } - - public void resetLearning() { - this.trainingWeightSeenByModel = 0.0; - if (isRandomizable()) { - this.clustererRandom = new Random(this.randomSeed); - } - resetLearningImpl(); - } - - public void trainOnInstance(Instance inst) { - if (inst.weight() > 0.0) { - this.trainingWeightSeenByModel += inst.weight(); - trainOnInstanceImpl(inst); - } - } - - public Measurement[] getModelMeasurements() { - List<Measurement> measurementList = new LinkedList<Measurement>(); - measurementList.add(new Measurement("model training instances", - trainingWeightSeenByModel())); - measurementList.add(new Measurement("model serialized size (bytes)", - measureByteSize())); - Measurement[] modelMeasurements = getModelMeasurementsImpl(); - if (modelMeasurements != null) { - for (Measurement measurement : modelMeasurements) { - measurementList.add(measurement); - } - } - // add average of sub-model measurements - Clusterer[] subModels = getSubClusterers(); - if ((subModels != null) && (subModels.length > 0)) { - List<Measurement[]> subMeasurements = new LinkedList<Measurement[]>(); - for (Clusterer subModel : subModels) { - if (subModel != null) { - subMeasurements.add(subModel.getModelMeasurements()); - } - } - Measurement[] avgMeasurements = Measurement - .averageMeasurements(subMeasurements - .toArray(new Measurement[subMeasurements.size()][])); - for (Measurement measurement : avgMeasurements) { - measurementList.add(measurement); - } - } - return measurementList.toArray(new Measurement[measurementList.size()]); - } - - public void getDescription(StringBuilder out, int indent) { - StringUtils.appendIndented(out, indent, "Model type: "); - out.append(this.getClass().getName()); - StringUtils.appendNewline(out); - Measurement.getMeasurementsDescription(getModelMeasurements(), out, - indent); - StringUtils.appendNewlineIndented(out, indent, "Model description:"); - StringUtils.appendNewline(out); - if (trainingHasStarted()) { - getModelDescription(out, indent); - } else { - StringUtils.appendIndented(out, indent, - "Model has not been trained."); - } - } - - public Clusterer[] getSubClusterers() { - return null; - } - - @Override - public Clusterer copy() { - return (Clusterer) super.copy(); - } - - // public boolean correctlyClassifies(Instance inst) { - // return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst - // .classValue(); - // } - - public String getClassNameString() { - return InstancesHeader.getClassNameString(this.modelContext); - } - - public String getClassLabelString(int classLabelIndex) { - return InstancesHeader.getClassLabelString(this.modelContext, - classLabelIndex); - } - - public String getAttributeNameString(int attIndex) { - return InstancesHeader.getAttributeNameString(this.modelContext, - attIndex); - } - - public String getNominalValueString(int attIndex, int valIndex) { - return InstancesHeader.getNominalValueString(this.modelContext, - attIndex, valIndex); - } - - // originalContext notnull - // newContext notnull - public static boolean contextIsCompatible(InstancesHeader originalContext, - InstancesHeader newContext) { - // rule 1: num classes can increase but never decrease - // rule 2: num attributes can increase but never decrease - // rule 3: num nominal attribute values can increase but never decrease - // rule 4: attribute types must stay in the same order (although class - // can - // move; is always skipped over) - // attribute names are free to change, but should always still represent - // the original attributes - if (newContext.numClasses() < originalContext.numClasses()) { - return false; // rule 1 - } - if (newContext.numAttributes() < originalContext.numAttributes()) { - return false; // rule 2 - } - int oPos = 0; - int nPos = 0; - while (oPos < originalContext.numAttributes()) { - if (oPos == originalContext.classIndex()) { - oPos++; - if (!(oPos < originalContext.numAttributes())) { - break; - } - } - if (nPos == newContext.classIndex()) { - nPos++; - } - if (originalContext.attribute(oPos).isNominal()) { - if (!newContext.attribute(nPos).isNominal()) { - return false; // rule 4 - } - if (newContext.attribute(nPos).numValues() < originalContext - .attribute(oPos).numValues()) { - return false; // rule 3 - } - } else { - assert (originalContext.attribute(oPos).isNumeric()); - if (!newContext.attribute(nPos).isNumeric()) { - return false; // rule 4 - } - } - oPos++; - nPos++; - } - return true; // all checks clear - } - - // reason for ...Impl methods: - // ease programmer burden by not requiring them to remember calls to super - // in overridden methods & will produce compiler errors if not overridden - - public abstract void resetLearningImpl(); - - public abstract void trainOnInstanceImpl(Instance inst); - - protected abstract Measurement[] getModelMeasurementsImpl(); - - public abstract void getModelDescription(StringBuilder out, int indent); - - protected static int modelAttIndexToInstanceAttIndex(int index, - Instance inst) { - return inst.classIndex() > index ? index : index + 1; - } - - protected static int modelAttIndexToInstanceAttIndex(int index, - Instances insts) { - return insts.classIndex() > index ? index : index + 1; - } - - public boolean implementsMicroClusterer() { - return false; - } - - public boolean keepClassLabel() { - return false; - } - - public Clustering getMicroClusteringResult() { - return null; - }; -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java deleted file mode 100644 index 2ddb770..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java +++ /dev/null @@ -1,356 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Random; -import com.github.javacliparser.FloatOption; -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.moa.cluster.Clustering; -import com.yahoo.labs.samoa.moa.cluster.SphereCluster; -import com.yahoo.labs.samoa.moa.core.Measurement; -import com.yahoo.labs.samoa.moa.core.DataPoint; -import com.yahoo.labs.samoa.instances.Instance; - -public class ClusterGenerator extends AbstractClusterer { - - private static final long serialVersionUID = 1L; - - public IntOption timeWindowOption = new IntOption("timeWindow", - 't', "Rang of the window.", 1000); - - public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 'r', - "The average radii of the centroids in the model.", 0, 0, 1); - - public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 'R', - "The average radii of the centroids in the model.", 0, 0, 1); - - public FloatOption positionOffsetOption = new FloatOption("positionOffset", 'p', - "The average radii of the centroids in the model.", 0, 0, 1); - - public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 'D', - "Deletes complete clusters from the clustering.", 0, 0, 1); - - public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j', - "Join two clusters if their hull distance is less minRadius times this factor.", 0, 0, 1); - - public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A', - "Adds additional clusters.", 0, 0, 1); - - private static double err_intervall_width = 0.0; - private ArrayList<DataPoint> points; - private int instanceCounter; - private int windowCounter; - private Random random; - private Clustering sourceClustering = null; - - @Override - public void resetLearningImpl() { - points = new ArrayList<DataPoint>(); - instanceCounter = 0; - windowCounter = 0; - random = new Random(227); - - // joinClustersOption.set(); - // evaluateMicroClusteringOption.set(); - } - - @Override - public void trainOnInstanceImpl(Instance inst) { - if (windowCounter >= timeWindowOption.getValue()) { - points.clear(); - windowCounter = 0; - } - windowCounter++; - instanceCounter++; - points.add(new DataPoint(inst, instanceCounter)); - } - - @Override - public boolean implementsMicroClusterer() { - return true; - } - - public void setSourceClustering(Clustering source) { - sourceClustering = source; - } - - @Override - public Clustering getMicroClusteringResult() { - // System.out.println("Numcluster:"+clustering.size()+" / "+num); - // Clustering source_clustering = new Clustering(points, overlapThreshold, - // microInitMinPoints); - if (sourceClustering == null) { - - System.out.println("You need to set a source clustering for the ClusterGenerator to work"); - return null; - } - return alterClustering(sourceClustering); - } - - public Clustering getClusteringResult() { - sourceClustering = new Clustering(points); - // if(sourceClustering == null){ - // System.out.println("You need to set a source clustering for the ClusterGenerator to work"); - // return null; - // } - return alterClustering(sourceClustering); - } - - private Clustering alterClustering(Clustering scclustering) { - // percentage of the radius that will be cut off - // 0: no changes to radius - // 1: radius of 0 - double errLevelRadiusDecrease = radiusDecreaseOption.getValue(); - - // 0: no changes to radius - // 1: radius 100% bigger - double errLevelRadiusIncrease = radiusIncreaseOption.getValue(); - - // 0: no changes - // 1: distance between centers is 2 * original radius - double errLevelPosition = positionOffsetOption.getValue(); - - int numRemoveCluster = (int) (clusterRemoveOption.getValue() * scclustering.size()); - - int numAddCluster = (int) (clusterAddOption.getValue() * scclustering.size()); - - for (int c = 0; c < numRemoveCluster; c++) { - int delId = random.nextInt(scclustering.size()); - scclustering.remove(delId); - } - - int numCluster = scclustering.size(); - double[] err_seeds = new double[numCluster]; - double err_seed_sum = 0.0; - double tmp_seed; - for (int i = 0; i < numCluster; i++) { - tmp_seed = random.nextDouble(); - err_seeds[i] = err_seed_sum + tmp_seed; - err_seed_sum += tmp_seed; - } - - double sumWeight = 0; - for (int i = 0; i < numCluster; i++) { - sumWeight += scclustering.get(i).getWeight(); - } - - Clustering clustering = new Clustering(); - - for (int i = 0; i < numCluster; i++) { - if (!(scclustering.get(i) instanceof SphereCluster)) { - System.out.println("Not a Sphere Cluster"); - continue; - } - SphereCluster sourceCluster = (SphereCluster) scclustering.get(i); - double[] center = Arrays.copyOf(sourceCluster.getCenter(), sourceCluster.getCenter().length); - double weight = sourceCluster.getWeight(); - double radius = sourceCluster.getRadius(); - - // move cluster center - if (errLevelPosition > 0) { - double errOffset = random.nextDouble() * err_intervall_width / 2.0; - double errOffsetDirection = ((random.nextBoolean()) ? 1 : -1); - double level = errLevelPosition + errOffsetDirection * errOffset; - double[] vector = new double[center.length]; - double vectorLength = 0; - for (int d = 0; d < center.length; d++) { - vector[d] = (random.nextBoolean() ? 1 : -1) * random.nextDouble(); - vectorLength += Math.pow(vector[d], 2); - } - vectorLength = Math.sqrt(vectorLength); - - // max is when clusters are next to each other - double length = 2 * radius * level; - - for (int d = 0; d < center.length; d++) { - // normalize length and then strecht to reach error position - vector[d] = vector[d] / vectorLength * length; - } - // System.out.println("Center "+Arrays.toString(center)); - // System.out.println("Vector "+Arrays.toString(vector)); - // check if error position is within bounds - double[] newCenter = new double[center.length]; - for (int d = 0; d < center.length; d++) { - // check bounds, otherwise flip vector - if (center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1) { - newCenter[d] = center[d] + vector[d]; - } - else { - newCenter[d] = center[d] + (-1) * vector[d]; - } - } - center = newCenter; - for (int d = 0; d < center.length; d++) { - if (newCenter[d] >= 0 && newCenter[d] <= 1) { - } - else { - System.out - .println("This shouldnt have happend, Cluster center out of bounds:" + Arrays.toString(newCenter)); - } - } - // System.out.println("new Center "+Arrays.toString(newCenter)); - - } - - // alter radius - if (errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0) { - double errOffset = random.nextDouble() * err_intervall_width / 2.0; - int errOffsetDirection = ((random.nextBoolean()) ? 1 : -1); - - if (errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || random.nextBoolean())) { - double level = (errLevelRadiusDecrease + errOffsetDirection * errOffset);// *sourceCluster.getWeight()/sumWeight; - level = (level < 0) ? 0 : level; - level = (level > 1) ? 1 : level; - radius *= (1 - level); - } - else { - double level = errLevelRadiusIncrease + errOffsetDirection * errOffset; - level = (level < 0) ? 0 : level; - level = (level > 1) ? 1 : level; - radius += radius * level; - } - } - - SphereCluster newCluster = new SphereCluster(center, radius, weight); - newCluster.setMeasureValue("Source Cluster", "C" + sourceCluster.getId()); - - clustering.add(newCluster); - } - - if (joinClustersOption.getValue() > 0) { - clustering = joinClusters(clustering); - } - - // add new clusters by copying clusters and set a random center - for (int c = 0; c < numAddCluster; c++) { - int copyId = random.nextInt(clustering.size()); - SphereCluster scorg = (SphereCluster) clustering.get(copyId); - int dim = scorg.getCenter().length; - double[] center = new double[dim]; - double radius = scorg.getRadius(); - - boolean outofbounds = true; - int tryCounter = 0; - while (outofbounds && tryCounter < 20) { - tryCounter++; - outofbounds = false; - for (int j = 0; j < center.length; j++) { - center[j] = random.nextDouble(); - if (center[j] - radius < 0 || center[j] + radius > 1) { - outofbounds = true; - break; - } - } - } - if (outofbounds) { - System.out.println("Coludn't place additional cluster"); - } - else { - SphereCluster scnew = new SphereCluster(center, radius, scorg.getWeight() / 2); - scorg.setWeight(scorg.getWeight() - scnew.getWeight()); - clustering.add(scnew); - } - } - - return clustering; - - } - - private Clustering joinClusters(Clustering clustering) { - - double radiusFactor = joinClustersOption.getValue(); - boolean[] merged = new boolean[clustering.size()]; - - Clustering mclustering = new Clustering(); - - if (radiusFactor > 0) { - for (int c1 = 0; c1 < clustering.size(); c1++) { - SphereCluster sc1 = (SphereCluster) clustering.get(c1); - double minDist = Double.MAX_VALUE; - double minOver = 1; - int maxindexCon = -1; - int maxindexOver = -1; - for (int c2 = 0; c2 < clustering.size(); c2++) { - SphereCluster sc2 = (SphereCluster) clustering.get(c2); - // double over = sc1.overlapRadiusDegree(sc2); - // if(over > 0 && over < minOver){ - // minOver = over; - // maxindexOver = c2; - // } - double dist = sc1.getHullDistance(sc2); - double threshold = Math.min(sc1.getRadius(), sc2.getRadius()) * radiusFactor; - if (dist > 0 && dist < minDist && dist < threshold) { - minDist = dist; - maxindexCon = c2; - } - } - int maxindex = -1; - if (maxindexOver != -1) - maxindex = maxindexOver; - else - maxindex = maxindexCon; - - if (maxindex != -1 && !merged[c1]) { - merged[c1] = true; - merged[maxindex] = true; - SphereCluster scnew = new SphereCluster(sc1.getCenter(), sc1.getRadius(), sc1.getWeight()); - SphereCluster sc2 = (SphereCluster) clustering.get(maxindex); - scnew.merge(sc2); - mclustering.add(scnew); - } - } - } - - for (int i = 0; i < merged.length; i++) { - if (!merged[i]) - mclustering.add(clustering.get(i)); - } - - return mclustering; - - } - - @Override - protected Measurement[] getModelMeasurementsImpl() { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public void getModelDescription(StringBuilder out, int indent) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public boolean isRandomizable() { - return false; - } - - @Override - public boolean keepClassLabel() { - return true; - } - - public double[] getVotesForInstance(Instance inst) { - return null; - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java deleted file mode 100644 index 80c243d..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.MOAObject; -import com.yahoo.labs.samoa.moa.cluster.Clustering; -import com.yahoo.labs.samoa.instances.InstancesHeader; -import com.yahoo.labs.samoa.moa.core.Measurement; -import com.yahoo.labs.samoa.moa.options.OptionHandler; -import com.yahoo.labs.samoa.instances.Instance; - -public interface Clusterer extends MOAObject, OptionHandler { - - public void setModelContext(InstancesHeader ih); - - public InstancesHeader getModelContext(); - - public boolean isRandomizable(); - - public void setRandomSeed(int s); - - public boolean trainingHasStarted(); - - public double trainingWeightSeenByModel(); - - public void resetLearning(); - - public void trainOnInstance(Instance inst); - - public double[] getVotesForInstance(Instance inst); - - public Measurement[] getModelMeasurements(); - - public Clusterer[] getSubClusterers(); - - public Clusterer copy(); - - public Clustering getClusteringResult(); - - public boolean implementsMicroClusterer(); - - public Clustering getMicroClusteringResult(); - - public boolean keepClassLabel(); - -}
