http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java new file mode 100644 index 0000000..08514b0 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/driftdetection/PageHinkleyDM.java @@ -0,0 +1,115 @@ +package org.apache.samoa.moa.classifiers.core.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; + +/** + * Drift detection method based in Page Hinkley Test. + * + * + * @author Manuel Baena ([email protected]) + * @version $Revision: 7 $ + */ +public class PageHinkleyDM extends AbstractChangeDetector { + + private static final long serialVersionUID = -3518369648142099719L; + + public IntOption minNumInstancesOption = new IntOption( + "minNumInstances", + 'n', + "The minimum number of instances before permitting detecting change.", + 30, 0, Integer.MAX_VALUE); + + public FloatOption deltaOption = new FloatOption("delta", 'd', + "Delta parameter of the Page Hinkley Test", 0.005, 0.0, 1.0); + + public FloatOption lambdaOption = new FloatOption("lambda", 'l', + "Lambda parameter of the Page Hinkley Test", 50, 0.0, Float.MAX_VALUE); + + public FloatOption alphaOption = new FloatOption("alpha", 'a', + "Alpha parameter of the Page Hinkley Test", 1 - 0.0001, 0.0, 1.0); + + private int m_n; + + private double sum; + + private double x_mean; + + private double alpha; + + private double delta; + + private double lambda; + + public PageHinkleyDM() { + resetLearning(); + } + + @Override + public void resetLearning() { + m_n = 1; + x_mean = 0.0; + sum = 0.0; + delta = this.deltaOption.getValue(); + alpha = this.alphaOption.getValue(); + lambda = this.lambdaOption.getValue(); + } + + @Override + public void input(double x) { + // It monitors the error rate + if (this.isChangeDetected) { + resetLearning(); + } + + x_mean = x_mean + (x - x_mean) / (double) m_n; + sum = this.alpha * sum + (x - x_mean - this.delta); + m_n++; + this.estimation = x_mean; + this.isChangeDetected = false; + this.isWarningZone = false; + this.delay = 0; + + if (m_n < this.minNumInstancesOption.getValue()) { + return; + } + + if (sum > this.lambda) { + this.isChangeDetected = true; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java new file mode 100644 index 0000000..a10319a --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/GiniSplitCriterion.java @@ -0,0 +1,84 @@ +package org.apache.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.core.Utils; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +/** + * Class for computing splitting criteria using Gini with respect to distributions of class values. The split criterion + * is used as a parameter on decision trees and decision stumps. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class GiniSplitCriterion extends AbstractOptionHandler implements + SplitCriterion { + + private static final long serialVersionUID = 1L; + + @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + double totalWeight = 0.0; + double[] distWeights = new double[postSplitDists.length]; + for (int i = 0; i < postSplitDists.length; i++) { + distWeights[i] = Utils.sum(postSplitDists[i]); + totalWeight += distWeights[i]; + } + double gini = 0.0; + for (int i = 0; i < postSplitDists.length; i++) { + gini += (distWeights[i] / totalWeight) + * computeGini(postSplitDists[i], distWeights[i]); + } + return 1.0 - gini; + } + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + return 1.0; + } + + public static double computeGini(double[] dist, double distSumOfWeights) { + double gini = 1.0; + for (double aDist : dist) { + double relFreq = aDist / distSumOfWeights; + gini -= relFreq * relFreq; + } + return gini; + } + + public static double computeGini(double[] dist) { + return computeGini(dist, Utils.sum(dist)); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java new file mode 100644 index 0000000..4ee88cc --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterion.java @@ -0,0 +1,117 @@ +package org.apache.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.core.Utils; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FloatOption; + +/** + * Class for computing splitting criteria using information gain with respect to distributions of class values. The + * split criterion is used as a parameter on decision trees and decision stumps. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class InfoGainSplitCriterion extends AbstractOptionHandler implements + SplitCriterion { + + private static final long serialVersionUID = 1L; + + public FloatOption minBranchFracOption = new FloatOption("minBranchFrac", + 'f', + "Minimum fraction of weight required down at least two branches.", + 0.01, 0.0, 0.5); + + @Override + public double getMeritOfSplit(double[] preSplitDist, + double[][] postSplitDists) { + if (numSubsetsGreaterThanFrac(postSplitDists, this.minBranchFracOption.getValue()) < 2) { + return Double.NEGATIVE_INFINITY; + } + return computeEntropy(preSplitDist) - computeEntropy(postSplitDists); + } + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + int numClasses = preSplitDist.length > 2 ? preSplitDist.length : 2; + return Utils.log2(numClasses); + } + + public static double computeEntropy(double[] dist) { + double entropy = 0.0; + double sum = 0.0; + for (double d : dist) { + if (d > 0.0) { // TODO: how small can d be before log2 overflows? + entropy -= d * Utils.log2(d); + sum += d; + } + } + return sum > 0.0 ? (entropy + sum * Utils.log2(sum)) / sum : 0.0; + } + + public static double computeEntropy(double[][] dists) { + double totalWeight = 0.0; + double[] distWeights = new double[dists.length]; + for (int i = 0; i < dists.length; i++) { + distWeights[i] = Utils.sum(dists[i]); + totalWeight += distWeights[i]; + } + double entropy = 0.0; + for (int i = 0; i < dists.length; i++) { + entropy += distWeights[i] * computeEntropy(dists[i]); + } + return entropy / totalWeight; + } + + public static int numSubsetsGreaterThanFrac(double[][] distributions, double minFrac) { + double totalWeight = 0.0; + double[] distSums = new double[distributions.length]; + for (int i = 0; i < distSums.length; i++) { + for (int j = 0; j < distributions[i].length; j++) { + distSums[i] += distributions[i][j]; + } + totalWeight += distSums[i]; + } + int numGreater = 0; + for (double d : distSums) { + double frac = d / totalWeight; + if (frac > minFrac) { + numGreater++; + } + } + return numGreater; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java new file mode 100644 index 0000000..369dce1 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java @@ -0,0 +1,53 @@ +package org.apache.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.Utils; + +/** + * Class for computing splitting criteria using information gain with respect to distributions of class values for + * Multilabel data. The split criterion is used as a parameter on decision trees and decision stumps. + * + * @author Richard Kirkby ([email protected]) + * @author Jesse Read ([email protected]) + * @version $Revision: 1 $ + */ +public class InfoGainSplitCriterionMultilabel extends InfoGainSplitCriterion { + + private static final long serialVersionUID = 1L; + + public static double computeEntropy(double[] dist) { + double entropy = 0.0; + double sum = 0.0; + for (double d : dist) { + sum += d; + } + if (sum > 0.0) { + for (double num : dist) { + double d = num / sum; + if (d > 0.0) { // TODO: how small can d be before log2 overflows? + entropy -= d * Utils.log2(d) + (1 - d) * Utils.log2(1 - d); // Extension to Multilabel + } + } + } + return sum > 0.0 ? entropy : 0.0; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java new file mode 100644 index 0000000..1e4f099 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java @@ -0,0 +1,33 @@ +package org.apache.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +public class SDRSplitCriterion extends VarianceReductionSplitCriterion { + private static final long serialVersionUID = 1L; + + public static double computeSD(double[] dist) { + int N = (int) dist[0]; + double sum = dist[1]; + double sumSq = dist[2]; + return Math.sqrt((sumSq - ((sum * sum) / N)) / N); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java new file mode 100644 index 0000000..d97886a --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java @@ -0,0 +1,55 @@ +package org.apache.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.options.OptionHandler; + +/** + * Interface for computing splitting criteria. with respect to distributions of class values. The split criterion is + * used as a parameter on decision trees and decision stumps. The two split criteria most used are Information Gain and + * Gini. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public interface SplitCriterion extends OptionHandler { + + /** + * Computes the merit of splitting for a given ditribution before the split and after it. + * + * @param preSplitDist + * the class distribution before the split + * @param postSplitDists + * the class distribution after the split + * @return value of the merit of splitting + */ + public double getMeritOfSplit(double[] preSplitDist, + double[][] postSplitDists); + + /** + * Computes the range of splitting merit + * + * @param preSplitDist + * the class distribution before the split + * @return value of the range of splitting merit + */ + public double getRangeOfMerit(double[] preSplitDist); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java new file mode 100644 index 0000000..421c734 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/core/splitcriteria/VarianceReductionSplitCriterion.java @@ -0,0 +1,94 @@ +package org.apache.samoa.moa.classifiers.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +public class VarianceReductionSplitCriterion extends AbstractOptionHandler implements SplitCriterion { + + private static final long serialVersionUID = 1L; + + /* + * @Override public double getMeritOfSplit(double[] preSplitDist, double[][] + * postSplitDists) { + * + * double N = preSplitDist[0]; double SDR = computeSD(preSplitDist); + * + * // System.out.print("postSplitDists.length"+postSplitDists.length+"\n"); + * for(int i = 0; i < postSplitDists.length; i++) { double Ni = + * postSplitDists[i][0]; SDR -= (Ni/N)*computeSD(postSplitDists[i]); } + * + * return SDR; } + */ + + @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + double SDR = 0.0; + double N = preSplitDist[0]; + int count = 0; + + for (int i1 = 0; i1 < postSplitDists.length; i1++) { + double[] postSplitDist = postSplitDists[i1]; + double Ni = postSplitDist[0]; + if (Ni >= 5.0) { + count = count + 1; + } + } + + if (count == postSplitDists.length) { + SDR = computeSD(preSplitDist); + for (int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR -= (Ni / N) * computeSD(postSplitDists[i]); + } + } + return SDR; + } + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + return 1; + } + + public static double computeSD(double[] dist) { + + int N = (int) dist[0]; + double sum = dist[1]; + double sumSq = dist[2]; + // return Math.sqrt((sumSq - ((sum * sum)/N))/N); + return (sumSq - ((sum * sum) / N)) / N; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + protected void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + // TODO Auto-generated method stub + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java new file mode 100644 index 0000000..8474d88 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/functions/MajorityClass.java @@ -0,0 +1,84 @@ +package org.apache.samoa.moa.classifiers.functions; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.classifiers.AbstractClassifier; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.Measurement; +import org.apache.samoa.moa.core.StringUtils; + +/** + * Majority class learner. This is the simplest classifier. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class MajorityClass extends AbstractClassifier { + + private static final long serialVersionUID = 1L; + + @Override + public String getPurposeString() { + return "Majority class classifier: always predicts the class that has been observed most frequently the in the training data."; + } + + protected DoubleVector observedClassDistribution; + + @Override + public void resetLearningImpl() { + this.observedClassDistribution = new DoubleVector(); + } + + @Override + public void trainOnInstanceImpl(Instance inst) { + this.observedClassDistribution.addToValue((int) inst.classValue(), inst.weight()); + } + + public double[] getVotesForInstance(Instance i) { + return this.observedClassDistribution.getArrayCopy(); + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + return null; + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + StringUtils.appendIndented(out, indent, "Predicted majority "); + out.append(getClassNameString()); + out.append(" = "); + out.append(getClassLabelString(this.observedClassDistribution.maxIndex())); + StringUtils.appendNewline(out); + for (int i = 0; i < this.observedClassDistribution.numValues(); i++) { + StringUtils.appendIndented(out, indent, "Observed weight of "); + out.append(getClassLabelString(i)); + out.append(": "); + out.append(this.observedClassDistribution.getValue(i)); + StringUtils.appendNewline(out); + } + } + + public boolean isRandomizable() { + return false; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java new file mode 100644 index 0000000..a744897 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/Predicate.java @@ -0,0 +1,33 @@ +package org.apache.samoa.moa.classifiers.rules.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; + +/** + * Interface for a predicate (a feature) in rules. + * + */ +public interface Predicate { + + public boolean evaluate(Instance instance); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java new file mode 100644 index 0000000..b235d7b --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/attributeclassobservers/FIMTDDNumericAttributeClassLimitObserver.java @@ -0,0 +1,121 @@ +package org.apache.samoa.moa.classifiers.rules.core.attributeclassobservers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.classifiers.core.attributeclassobservers.FIMTDDNumericAttributeClassObserver; + +import com.github.javacliparser.IntOption; + +public class FIMTDDNumericAttributeClassLimitObserver extends FIMTDDNumericAttributeClassObserver { + + /** + * + */ + private static final long serialVersionUID = 1L; + protected int maxNodes; + // public IntOption maxNodesOption = new IntOption("maxNodesOption", 'z', + // "Maximum number of nodes", 50, 0, Integer.MAX_VALUE); + + protected int numNodes; + + public int getMaxNodes() { + return this.maxNodes; + } + + public void setMaxNodes(int maxNodes) { + this.maxNodes = maxNodes; + } + + @Override + public void observeAttributeClass(double attVal, double classVal, double weight) { + if (Double.isNaN(attVal)) { // Instance.isMissingValue(attVal) + } else { + if (this.root == null) { + // maxNodes=maxNodesOption.getValue(); + maxNodes = 50; + this.root = new FIMTDDNumericAttributeClassLimitObserver.Node(attVal, classVal, weight); + } else { + this.root.insertValue(attVal, classVal, weight); + } + } + } + + protected class Node extends FIMTDDNumericAttributeClassObserver.Node { + /** + * + */ + private static final long serialVersionUID = -4484141636424708465L; + + public Node(double val, double label, double weight) { + super(val, label, weight); + } + + protected Node root = null; + + /** + * Insert a new value into the tree, updating both the sum of values and sum of squared values arrays + */ + @Override + public void insertValue(double val, double label, double weight) { + + // If the new value equals the value stored in a node, update + // the left (<=) node information + if (val == this.cut_point) + { + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + } + // If the new value is less than the value in a node, update the + // left distribution and send the value down to the left child node. + // If no left child exists, create one + else if (val <= this.cut_point) { + this.leftStatistics.addToValue(0, 1); + this.leftStatistics.addToValue(1, label); + this.leftStatistics.addToValue(2, label * label); + if (this.left == null) { + if (numNodes < maxNodes) { + this.left = new Node(val, label, weight); + ++numNodes; + } + } else { + this.left.insertValue(val, label, weight); + } + } + // If the new value is greater than the value in a node, update the + // right (>) distribution and send the value down to the right child node. + // If no right child exists, create one + else { // val > cut_point + this.rightStatistics.addToValue(0, 1); + this.rightStatistics.addToValue(1, label); + this.rightStatistics.addToValue(2, label * label); + if (this.right == null) { + if (numNodes < maxNodes) { + this.right = new Node(val, label, weight); + ++numNodes; + } + } else { + this.right.insertValue(val, label, weight); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java new file mode 100644 index 0000000..9124fb8 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/conditionaltests/NumericAttributeBinaryRulePredicate.java @@ -0,0 +1,180 @@ +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/* + * NumericAttributeBinaryRulePredicate.java + * Copyright (C) 2013 University of Porto, Portugal + * @author E. Almeida, A. Carvalho, J. Gama + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + */ +package org.apache.samoa.moa.classifiers.rules.core.conditionaltests; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalBinaryTest; +import org.apache.samoa.moa.classifiers.rules.core.Predicate; + +/** + * Numeric binary conditional test for instances to use to split nodes in AMRules. + * + * @version $Revision: 1 $ + */ +public class NumericAttributeBinaryRulePredicate extends InstanceConditionalBinaryTest implements Predicate { + + private static final long serialVersionUID = 1L; + + protected int attIndex; + + protected double attValue; + + protected int operator; // 0 =, 1<=, 2> + + public NumericAttributeBinaryRulePredicate() { + this(0, 0, 0); + } + + public NumericAttributeBinaryRulePredicate(int attIndex, double attValue, + int operator) { + this.attIndex = attIndex; + this.attValue = attValue; + this.operator = operator; + } + + public NumericAttributeBinaryRulePredicate(NumericAttributeBinaryRulePredicate oldTest) { + this(oldTest.attIndex, oldTest.attValue, oldTest.operator); + } + + @Override + public int branchForInstance(Instance inst) { + int instAttIndex = this.attIndex < inst.classIndex() ? this.attIndex + : this.attIndex + 1; + if (inst.isMissing(instAttIndex)) { + return -1; + } + double v = inst.value(instAttIndex); + int ret = 0; + switch (this.operator) { + case 0: + ret = (v == this.attValue) ? 0 : 1; + break; + case 1: + ret = (v <= this.attValue) ? 0 : 1; + break; + case 2: + ret = (v > this.attValue) ? 0 : 1; + } + return ret; + } + + /** + * + */ + @Override + public String describeConditionForBranch(int branch, InstancesHeader context) { + if ((branch >= 0) && (branch <= 2)) { + String compareChar = (branch == 0) ? "=" : (branch == 1) ? "<=" : ">"; + return InstancesHeader.getAttributeNameString(context, + this.attIndex) + + ' ' + + compareChar + + InstancesHeader.getNumericValueString(context, + this.attIndex, this.attValue); + } + throw new IndexOutOfBoundsException(); + } + + /** + * + */ + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } + + @Override + public int[] getAttsTestDependsOn() { + return new int[] { this.attIndex }; + } + + public double getSplitValue() { + return this.attValue; + } + + @Override + public boolean evaluate(Instance inst) { + return (branchForInstance(inst) == 0); + } + + @Override + public String toString() { + if ((operator >= 0) && (operator <= 2)) { + String compareChar = (operator == 0) ? "=" : (operator == 1) ? "<=" : ">"; + // int equalsBranch = this.equalsPassesTest ? 0 : 1; + return "x" + this.attIndex + + ' ' + + compareChar + + ' ' + + this.attValue; + } + throw new IndexOutOfBoundsException(); + } + + public boolean isEqual(NumericAttributeBinaryRulePredicate predicate) { + return (this.attIndex == predicate.attIndex + && this.attValue == predicate.attValue + && this.operator == predicate.operator); + } + + public boolean isUsingSameAttribute(NumericAttributeBinaryRulePredicate predicate) { + return (this.attIndex == predicate.attIndex + && this.operator == predicate.operator); + } + + public boolean isIncludedInRuleNode( + NumericAttributeBinaryRulePredicate predicate) { + boolean ret; + if (this.operator == 1) { // <= + ret = (predicate.attValue <= this.attValue); + } else { // > + ret = (predicate.attValue > this.attValue); + } + + return ret; + } + + public void setAttributeValue( + NumericAttributeBinaryRulePredicate ruleSplitNodeTest) { + this.attValue = ruleSplitNodeTest.attValue; + + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java new file mode 100644 index 0000000..34b33bc --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/splitcriteria/SDRSplitCriterionAMRules.java @@ -0,0 +1,99 @@ +package org.apache.samoa.moa.classifiers.rules.core.splitcriteria; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/* + * SDRSplitCriterionAMRules.java + * Copyright (C) 2014 University of Porto, Portugal + * @author A. Bifet, J. Duarte, J. Gama + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * + */ + +import org.apache.samoa.moa.classifiers.core.splitcriteria.SDRSplitCriterion; +import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; + +public class SDRSplitCriterionAMRules extends SDRSplitCriterion implements SplitCriterion { + + private static final long serialVersionUID = 1L; + + @Override + public double getMeritOfSplit(double[] preSplitDist, double[][] postSplitDists) { + double SDR = 0.0; + double N = preSplitDist[0]; + int count = 0; + + for (int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + if (Ni >= 0.05 * preSplitDist[0]) { + count = count + 1; + } + } + if (count == postSplitDists.length) { + SDR = computeSD(preSplitDist); + + for (int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR -= (Ni / N) * computeSD(postSplitDists[i]); + + } + } + return SDR; + } + + @Override + public double getRangeOfMerit(double[] preSplitDist) { + return 1; + } + + public static double[] computeBranchSplitMerits(double[][] postSplitDists) { + double[] SDR = new double[postSplitDists.length]; + double N = 0; + + for (int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + N += Ni; + } + for (int i = 0; i < postSplitDists.length; i++) + { + double Ni = postSplitDists[i][0]; + SDR[i] = (Ni / N) * computeSD(postSplitDists[i]); + } + return SDR; + + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java new file mode 100644 index 0000000..d86bee2 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/AbstractErrorWeightedVote.java @@ -0,0 +1,101 @@ +package org.apache.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.samoa.moa.AbstractMOAObject; + +/** + * AbstractErrorWeightedVote class for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public abstract class AbstractErrorWeightedVote extends AbstractMOAObject implements ErrorWeightedVote { + /** + * + */ + private static final long serialVersionUID = -7340491298217227675L; + protected List<double[]> votes; + protected List<Double> errors; + protected double[] weights; + + public AbstractErrorWeightedVote() { + super(); + votes = new ArrayList<double[]>(); + errors = new ArrayList<Double>(); + } + + public AbstractErrorWeightedVote(AbstractErrorWeightedVote aewv) { + super(); + votes = new ArrayList<double[]>(); + for (double[] vote : aewv.votes) { + double[] v = new double[vote.length]; + for (int i = 0; i < vote.length; i++) + v[i] = vote[i]; + votes.add(v); + } + errors = new ArrayList<Double>(); + for (Double db : aewv.errors) { + errors.add(db.doubleValue()); + } + if (aewv.weights != null) { + weights = new double[aewv.weights.length]; + for (int i = 0; i < aewv.weights.length; i++) + weights[i] = aewv.weights[i]; + } + } + + @Override + public void addVote(double[] vote, double error) { + votes.add(vote); + errors.add(error); + } + + @Override + abstract public double[] computeWeightedVote(); + + @Override + public double getWeightedError() + { + double weightedError = 0; + if (weights != null && weights.length == errors.size()) + { + for (int i = 0; i < weights.length; ++i) + weightedError += errors.get(i) * weights[i]; + } + else + weightedError = -1; + return weightedError; + } + + @Override + public double[] getWeights() { + return weights; + } + + @Override + public int getNumberVotes() { + return votes.size(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java new file mode 100644 index 0000000..c4167bd --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/ErrorWeightedVote.java @@ -0,0 +1,81 @@ +package org.apache.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.MOAObject; + +/** + * ErrorWeightedVote interface for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public interface ErrorWeightedVote { + + /** + * Adds a vote and the corresponding error for the computation of the weighted vote and respective weighted error. + * + * @param vote + * a vote returned by a classifier + * @param error + * the error associated to the vote + */ + public void addVote(double[] vote, double error); + + /** + * Computes the weighted vote. Also updates the weights of the votes. + * + * @return the weighted vote + */ + public double[] computeWeightedVote(); + + /** + * Returns the weighted error. + * + * @pre computeWeightedVote() + * @return the weighted error + */ + public double getWeightedError(); + + /** + * Return the weights error. + * + * @pre computeWeightedVote() + * @return the weights + */ + public double[] getWeights(); + + /** + * The number of votes added so far. + * + * @return the number of votes + */ + public int getNumberVotes(); + + /** + * Creates a copy of the object + * + * @return copy of the object + */ + public MOAObject copy(); + + public ErrorWeightedVote getACopy(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java new file mode 100644 index 0000000..561e132 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/InverseErrorWeightedVote.java @@ -0,0 +1,99 @@ +package org.apache.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * InverseErrorWeightedVote class for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public class InverseErrorWeightedVote extends AbstractErrorWeightedVote { + + /** + * + */ + private static final double EPS = 0.000000001; // just to prevent divide by 0 in 1/X -> 1/(x+EPS) + private static final long serialVersionUID = 6359349250620616482L; + + public InverseErrorWeightedVote() { + super(); + } + + public InverseErrorWeightedVote(AbstractErrorWeightedVote aewv) { + super(aewv); + } + + @Override + public double[] computeWeightedVote() { + int n = votes.size(); + weights = new double[n]; + double[] weightedVote = null; + if (n > 0) { + int d = votes.get(0).length; + weightedVote = new double[d]; + double sumError = 0; + // weights are 1/(error+eps) + for (int i = 0; i < n; ++i) { + if (errors.get(i) < Double.MAX_VALUE) { + weights[i] = 1.0 / (errors.get(i) + EPS); + sumError += weights[i]; + } + else + weights[i] = 0; + + } + + if (sumError > 0) + for (int i = 0; i < n; ++i) + { + // normalize so that weights sum 1 + weights[i] /= sumError; + // compute weighted vote + for (int j = 0; j < d; j++) + weightedVote[j] += votes.get(i)[j] * weights[i]; + } + // Only occurs if all errors=Double.MAX_VALUE + else + { + // compute arithmetic vote + for (int i = 0; i < n; ++i) + { + for (int j = 0; j < d; j++) + weightedVote[j] += votes.get(i)[j] / n; + } + } + } + return weightedVote; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + + } + + @Override + public InverseErrorWeightedVote getACopy() { + return new InverseErrorWeightedVote(this); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java new file mode 100644 index 0000000..e38b355 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java @@ -0,0 +1,71 @@ +package org.apache.samoa.moa.classifiers.rules.core.voting; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * UniformWeightedVote class for weighted votes based on estimates of errors. + * + * @author Joao Duarte ([email protected]) + * @version $Revision: 1 $ + */ +public class UniformWeightedVote extends AbstractErrorWeightedVote { + + private static final long serialVersionUID = 6359349250620616482L; + + public UniformWeightedVote() { + super(); + } + + public UniformWeightedVote(AbstractErrorWeightedVote aewv) { + super(aewv); + } + + @Override + public double[] computeWeightedVote() { + int n = votes.size(); + weights = new double[n]; + double[] weightedVote = null; + if (n > 0) { + int d = votes.get(0).length; + weightedVote = new double[d]; + for (int i = 0; i < n; i++) + { + weights[i] = 1.0 / n; + for (int j = 0; j < d; j++) + weightedVote[j] += (votes.get(i)[j] * weights[i]); + } + + } + return weightedVote; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + + } + + @Override + public UniformWeightedVote getACopy() { + return new UniformWeightedVote(this); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java new file mode 100644 index 0000000..203a227 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyFading.java @@ -0,0 +1,84 @@ +package org.apache.samoa.moa.classifiers.rules.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Page-Hinkley Test with more weight for recent instances. + * + */ + +public class PageHinkleyFading extends PageHinkleyTest { + /** + * + */ + private static final long serialVersionUID = 7110953184708812339L; + private double fadingFactor = 0.99; + + public PageHinkleyFading() { + super(); + } + + public PageHinkleyFading(double threshold, double alpha) { + super(threshold, alpha); + } + + protected double instancesSeen; + + @Override + public void reset() { + + super.reset(); + this.instancesSeen = 0; + + } + + @Override + public boolean update(double error) { + this.instancesSeen = 1 + fadingFactor * this.instancesSeen; + double absolutError = Math.abs(error); + + this.sumAbsolutError = fadingFactor * this.sumAbsolutError + absolutError; + if (this.instancesSeen > 30) { + double mT = absolutError - (this.sumAbsolutError / this.instancesSeen) - this.alpha; + this.cumulativeSum = this.cumulativeSum + mT; // Update the cumulative mT sum + if (this.cumulativeSum < this.minimumValue) { // Update the minimum mT value if the new mT is smaller than the current minimum + this.minimumValue = this.cumulativeSum; + } + return (((this.cumulativeSum - this.minimumValue) > this.threshold)); + } + return false; + } + + @Override + public PageHinkleyTest getACopy() { + PageHinkleyFading newTest = new PageHinkleyFading(this.threshold, this.alpha); + this.copyFields(newTest); + return newTest; + } + + @Override + protected void copyFields(PageHinkleyTest newTest) { + super.copyFields(newTest); + PageHinkleyFading newFading = (PageHinkleyFading) newTest; + newFading.fadingFactor = this.fadingFactor; + newFading.instancesSeen = this.instancesSeen; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java new file mode 100644 index 0000000..3e4cdf9 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/classifiers/rules/driftdetection/PageHinkleyTest.java @@ -0,0 +1,95 @@ +package org.apache.samoa.moa.classifiers.rules.driftdetection; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +/** + * Page-Hinkley Test with equal weights for all instances. + * + */ +public class PageHinkleyTest implements Serializable { + + private static final long serialVersionUID = 1L; + protected double cumulativeSum; + + public double getCumulativeSum() { + return cumulativeSum; + } + + public double getMinimumValue() { + return minimumValue; + } + + protected double minimumValue; + protected double sumAbsolutError; + protected long phinstancesSeen; + protected double threshold; + protected double alpha; + + public PageHinkleyTest() { + this(0, 0); + } + + public PageHinkleyTest(double threshold, double alpha) { + this.threshold = threshold; + this.alpha = alpha; + this.reset(); + } + + public void reset() { + this.cumulativeSum = 0.0; + this.minimumValue = Double.MAX_VALUE; + this.sumAbsolutError = 0.0; + this.phinstancesSeen = 0; + } + + // Compute Page-Hinkley test + public boolean update(double error) { + + this.phinstancesSeen++; + double absolutError = Math.abs(error); + this.sumAbsolutError = this.sumAbsolutError + absolutError; + if (this.phinstancesSeen > 30) { + double mT = absolutError - (this.sumAbsolutError / this.phinstancesSeen) - this.alpha; + this.cumulativeSum = this.cumulativeSum + mT; // Update the cumulative mT sum + if (this.cumulativeSum < this.minimumValue) { // Update the minimum mT value if the new mT is smaller than the current minimum + this.minimumValue = this.cumulativeSum; + } + return (((this.cumulativeSum - this.minimumValue) > this.threshold)); + } + return false; + } + + public PageHinkleyTest getACopy() { + PageHinkleyTest newTest = new PageHinkleyTest(this.threshold, this.alpha); + this.copyFields(newTest); + return newTest; + } + + protected void copyFields(PageHinkleyTest newTest) { + newTest.cumulativeSum = this.cumulativeSum; + newTest.minimumValue = this.minimumValue; + newTest.sumAbsolutError = this.sumAbsolutError; + newTest.phinstancesSeen = this.phinstancesSeen; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java new file mode 100644 index 0000000..bffb776 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/CFCluster.java @@ -0,0 +1,177 @@ +package org.apache.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import java.util.Arrays; + +import org.apache.samoa.instances.Instance; + +/* micro cluster, as defined by Aggarwal et al, On Clustering Massive Data Streams: A Summarization Praradigm + * in the book Data streams : models and algorithms, by Charu C Aggarwal + * @article{ + title = {Data Streams: Models and Algorithms}, + author = {Aggarwal, Charu C.}, + year = {2007}, + publisher = {Springer Science+Business Media, LLC}, + url = {http://ebooks.ulb.tu-darmstadt.de/11157/}, + institution = {eBooks [http://ebooks.ulb.tu-darmstadt.de/perl/oai2] (Germany)}, + } + + DEFINITION A micro-clusterfor a set of d-dimensionalpoints Xi,. .Xi, + with t i m e s t a m p s ~. . .T,, is the (2-d+3)tuple (CF2", CFlX CF2t, CFlt, n), + wherein CF2" and CFlX each correspond to a vector of d entries. The definition of each of these entries is as follows: + + o For each dimension, the sum of the squares of the data values is maintained + in CF2". Thus, CF2" contains d values. The p-th entry of CF2" is equal to + \sum_j=1^n(x_i_j)^2 + + o For each dimension, the sum of the data values is maintained in C F l X . + Thus, CFIX contains d values. The p-th entry of CFIX is equal to + \sum_j=1^n x_i_j + + o The sum of the squares of the time stamps Ti,. .Tin maintained in CF2t + + o The sum of the time stamps Ti, . . .Tin maintained in CFlt. + + o The number of data points is maintained in n. + + */ +public abstract class CFCluster extends SphereCluster { + + private static final long serialVersionUID = 1L; + + protected double radiusFactor = 1.8; + + /** + * Number of points in the cluster. + */ + protected double N; + /** + * Linear sum of all the points added to the cluster. + */ + public double[] LS; + /** + * Squared sum of all the points added to the cluster. + */ + public double[] SS; + + /** + * Instantiates an empty kernel with the given dimensionality. + * + * @param dimensions + * The number of dimensions of the points that can be in this kernel. + */ + public CFCluster(Instance instance, int dimensions) { + this(instance.toDoubleArray(), dimensions); + } + + protected CFCluster(int dimensions) { + this.N = 0; + this.LS = new double[dimensions]; + this.SS = new double[dimensions]; + Arrays.fill(this.LS, 0.0); + Arrays.fill(this.SS, 0.0); + } + + public CFCluster(double[] center, int dimensions) { + this.N = 1; + this.LS = center; + this.SS = new double[dimensions]; + for (int i = 0; i < SS.length; i++) { + SS[i] = Math.pow(center[i], 2); + } + } + + public CFCluster(CFCluster cluster) { + this.N = cluster.N; + this.LS = Arrays.copyOf(cluster.LS, cluster.LS.length); + this.SS = Arrays.copyOf(cluster.SS, cluster.SS.length); + } + + public void add(CFCluster cluster) { + this.N += cluster.N; + addVectors(this.LS, cluster.LS); + addVectors(this.SS, cluster.SS); + } + + public abstract CFCluster getCF(); + + /** + * @return this kernels' center + */ + @Override + public double[] getCenter() { + assert (this.N > 0); + double res[] = new double[this.LS.length]; + for (int i = 0; i < res.length; i++) { + res[i] = this.LS[i] / N; + } + return res; + } + + @Override + public abstract double getInclusionProbability(Instance instance); + + /** + * See interface <code>Cluster</code> + * + * @return The radius of the cluster. + */ + @Override + public abstract double getRadius(); + + /** + * See interface <code>Cluster</code> + * + * @return The weight. + * @see Cluster#getWeight() + */ + @Override + public double getWeight() { + return N; + } + + public void setN(double N) { + this.N = N; + } + + public double getN() { + return N; + } + + /** + * Adds the second array to the first array element by element. The arrays must have the same length. + * + * @param a1 + * Vector to which the second vector is added. + * @param a2 + * Vector to be added. This vector does not change. + */ + public static void addVectors(double[] a1, double[] a2) { + assert (a1 != null); + assert (a2 != null); + assert (a1.length == a2.length) : "Adding two arrays of different " + + "length"; + + for (int i = 0; i < a1.length; i++) { + a1[i] += a2[i]; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java new file mode 100644 index 0000000..a700641 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Cluster.java @@ -0,0 +1,168 @@ +package org.apache.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.AbstractMOAObject; + +public abstract class Cluster extends AbstractMOAObject { + + private static final long serialVersionUID = 1L; + + private double id = -1; + private double gtLabel = -1; + + private Map<String, String> measure_values; + + public Cluster() { + this.measure_values = new HashMap<>(); + } + + /** + * @return the center of the cluster + */ + public abstract double[] getCenter(); + + /** + * Returns the weight of this cluster, not neccessarily normalized. It could, for instance, simply return the number + * of points contined in this cluster. + * + * @return the weight + */ + public abstract double getWeight(); + + /** + * Returns the probability of the given point belonging to this cluster. + * + * @param instance + * @return a value between 0 and 1 + */ + public abstract double getInclusionProbability(Instance instance); + + // TODO: for non sphere cluster sample points, find out MIN MAX neighbours + // within cluster + // and return the relative distance + // public abstract double getRelativeHullDistance(Instance instance); + + @Override + public void getDescription(StringBuilder sb, int i) { + sb.append("Cluster Object"); + } + + public void setId(double id) { + this.id = id; + } + + public double getId() { + return id; + } + + public boolean isGroundTruth() { + return gtLabel != -1; + } + + public void setGroundTruth(double truth) { + gtLabel = truth; + } + + public double getGroundTruth() { + return gtLabel; + } + + /** + * Samples this cluster by returning a point from inside it. + * + * @param random + * a random number source + * @return an Instance that lies inside this cluster + */ + public abstract Instance sample(Random random); + + public void setMeasureValue(String measureKey, String value) { + measure_values.put(measureKey, value); + } + + public void setMeasureValue(String measureKey, double value) { + measure_values.put(measureKey, Double.toString(value)); + } + + public String getMeasureValue(String measureKey) { + if (measure_values.containsKey(measureKey)) + return measure_values.get(measureKey); + else + return ""; + } + + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { + infoTitle.add("ClusterID"); + infoValue.add(Integer.toString((int) getId())); + + infoTitle.add("Type"); + infoValue.add(getClass().getSimpleName()); + + double c[] = getCenter(); + if (c != null) + for (int i = 0; i < c.length; i++) { + infoTitle.add("Dim" + i); + infoValue.add(Double.toString(c[i])); + } + + infoTitle.add("Weight"); + infoValue.add(Double.toString(getWeight())); + + } + + public String getInfo() { + List<String> infoTitle = new ArrayList<>(); + List<String> infoValue = new ArrayList<>(); + getClusterSpecificInfo(infoTitle, infoValue); + + StringBuilder sb = new StringBuilder(); + + // Cluster properties + sb.append("<html>"); + sb.append("<table>"); + int i = 0; + while (i < infoTitle.size() && i < infoValue.size()) { + sb.append("<tr><td>" + infoTitle.get(i) + "</td><td>" + infoValue.get(i) + "</td></tr>"); + i++; + } + sb.append("</table>"); + + // Evaluation info + sb.append("<br>"); + sb.append("<b>Evaluation</b><br>"); + sb.append("<table>"); + for (Object o : measure_values.entrySet()) { + Map.Entry e = (Map.Entry) o; + sb.append("<tr><td>" + e.getKey() + "</td><td>" + e.getValue() + "</td></tr>"); + } + sb.append("</table>"); + sb.append("</html>"); + return sb.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java new file mode 100644 index 0000000..7c2a9c8 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java @@ -0,0 +1,269 @@ +package org.apache.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import org.apache.samoa.instances.Attribute; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.moa.core.AutoExpandVector; +import org.apache.samoa.moa.core.DataPoint; + +public class Clustering extends AbstractMOAObject { + + private AutoExpandVector<Cluster> clusters; + + public Clustering() { + this.clusters = new AutoExpandVector<>(); + } + + public Clustering(Cluster[] clusters) { + this.clusters = new AutoExpandVector<>(); + Collections.addAll(this.clusters, clusters); + } + + public Clustering(List<? extends Instance> points) { + HashMap<Integer, Integer> labelMap = classValues(points); + int dim = points.get(0).dataset().numAttributes() - 1; + + int numClasses = labelMap.size(); + int noiseLabel; + + Attribute classLabel = points.get(0).dataset().classAttribute(); + int lastLabelIndex = classLabel.numValues() - 1; + if ("noise".equalsIgnoreCase(classLabel.value(lastLabelIndex))) { + noiseLabel = lastLabelIndex; + } else { + noiseLabel = -1; + } + + ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses]; + for (int i = 0; i < numClasses; i++) { + sorted_points[i] = new ArrayList<>(); + } + + for (Instance point : points) { + int clusterId = (int) point.classValue(); + if (clusterId != noiseLabel) { + sorted_points[labelMap.get(clusterId)].add(point); + } + } + + this.clusters = new AutoExpandVector<>(); + for (int i = 0; i < numClasses; i++) { + if (sorted_points[i].size() > 0) { + SphereCluster s = new SphereCluster(sorted_points[i], dim); + s.setId(sorted_points[i].get(0).classValue()); + s.setGroundTruth(sorted_points[i].get(0).classValue()); + clusters.add(s); + } + } + } + + public Clustering(ArrayList<DataPoint> points, double overlapThreshold, int initMinPoints) { + HashMap<Integer, Integer> labelMap = Clustering.classValues(points); + int dim = points.get(0).dataset().numAttributes() - 1; + + int numClasses = labelMap.size(); + + ArrayList<DataPoint>[] sorted_points = (ArrayList<DataPoint>[]) new ArrayList[numClasses]; + for (int i = 0; i < numClasses; i++) { + sorted_points[i] = new ArrayList<>(); + } + + for (DataPoint point : points) { + int clusterId = (int) point.classValue(); + if (clusterId != -1) { + sorted_points[labelMap.get(clusterId)].add(point); + } + } + + clusters = new AutoExpandVector<>(); + for (int i = 0; i < numClasses; i++) { + ArrayList<SphereCluster> microByClass = new ArrayList<>(); + ArrayList<DataPoint> pointInCluster = new ArrayList<>(); + ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList<>(); + + pointInCluster.addAll(sorted_points[i]); + while (pointInCluster.size() > 0) { + ArrayList<Instance> micro_points = new ArrayList<>(); + for (int j = 0; j < initMinPoints && !pointInCluster.isEmpty(); j++) { + micro_points.add(pointInCluster.get(0)); + pointInCluster.remove(0); + } + if (micro_points.size() > 0) { + SphereCluster s = new SphereCluster(micro_points, dim); + for (int c = 0; c < microByClass.size(); c++) { + if ((microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold) { + micro_points.addAll(pointInMicroClusters.get(c)); + s = new SphereCluster(micro_points, dim); + pointInMicroClusters.remove(c); + microByClass.remove(c); + } + } + + for (int j = 0; j < pointInCluster.size(); j++) { + Instance instance = pointInCluster.get(j); + if (s.getInclusionProbability(instance) > 0.8) { + pointInCluster.remove(j); + micro_points.add(instance); + } + } + s.setWeight(micro_points.size()); + microByClass.add(s); + pointInMicroClusters.add(micro_points); + } + } + // + boolean changed = true; + while (changed) { + changed = false; + for (int c = 0; c < microByClass.size(); c++) { + for (int c1 = c + 1; c1 < microByClass.size(); c1++) { + double overlap = microByClass.get(c).overlapRadiusDegree(microByClass.get(c1)); + if (overlap > overlapThreshold) { + pointInMicroClusters.get(c).addAll(pointInMicroClusters.get(c1)); + SphereCluster s = new SphereCluster(pointInMicroClusters.get(c), dim); + microByClass.set(c, s); + pointInMicroClusters.remove(c1); + microByClass.remove(c1); + changed = true; + break; + } + } + } + } + + for (SphereCluster microByClas : microByClass) { + microByClas.setGroundTruth(sorted_points[i].get(0).classValue()); + clusters.add(microByClas); + } + } + + for (int j = 0; j < clusters.size(); j++) { + clusters.get(j).setId(j); + } + + } + + /** + * @param points - points to be clustered + * @return an array with the min and max class label value + */ + public static HashMap<Integer, Integer> classValues(List<? extends Instance> points) { + HashMap<Integer, Integer> classes = new HashMap<>(); + int workCluster = 0; + boolean hasNoise = false; + for (Instance point : points) { + int label = (int) point.classValue(); + if (label == -1) { + hasNoise = true; + } else { + if (!classes.containsKey(label)) { + classes.put(label, workCluster); + workCluster++; + } + } + } + + if (hasNoise) { + classes.put(-1, workCluster); + } + return classes; + } + + public Clustering(AutoExpandVector<Cluster> clusters) { + this.clusters = clusters; + } + + /** + * add a cluster to the clustering + */ + public void add(Cluster cluster) { + clusters.add(cluster); + } + + /** + * remove a cluster from the clustering + */ + public void remove(int index) { + if (index < clusters.size()) { + clusters.remove(index); + } + } + + /** + * remove a cluster from the clustering + */ + public Cluster get(int index) { + if (index < clusters.size()) { + return clusters.get(index); + } + return null; + } + + /** + * @return the <code>Clustering</code> as an AutoExpandVector + */ + public AutoExpandVector<Cluster> getClustering() { + return clusters; + } + + /** + * @return A deepcopy of the <code>Clustering</code> as an AutoExpandVector + */ + public AutoExpandVector<Cluster> getClusteringCopy() { + return (AutoExpandVector<Cluster>) clusters.copy(); + } + + /** + * @return the number of clusters + */ + public int size() { + return clusters.size(); + } + + /** + * @return the number of dimensions of this clustering + */ + public int dimension() { + assert (clusters.size() != 0); + return clusters.get(0).getCenter().length; + } + + @Override + public void getDescription(StringBuilder sb, int i) { + sb.append("Clustering Object"); + } + + public double getMaxInclusionProbability(Instance point) { + double maxInclusion = 0.0; + for (Cluster cluster : clusters) { + maxInclusion = Math.max(cluster.getInclusionProbability(point), maxInclusion); + } + return maxInclusion; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Miniball.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Miniball.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Miniball.java new file mode 100644 index 0000000..da2a0e5 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/Miniball.java @@ -0,0 +1,84 @@ +package org.apache.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.dreizak.miniball.model.ArrayPointSet; +import com.dreizak.miniball.model.PointSet; +import java.util.ArrayList; +import java.util.List; + +public class Miniball { + + private int dimension; + private com.dreizak.miniball.highdim.Miniball mb; + private PointStorage pointSet; + + public Miniball(int dimension) { + this.dimension = dimension; + } + + void clear() { + this.pointSet = new PointStorage(this.dimension); + } + + void check_in(double[] array) { + this.pointSet.add(array); + } + + double[] center() { + return this.mb.center(); + } + + double radius() { + return this.mb.radius(); + } + + void build() { + this.mb = new com.dreizak.miniball.highdim.Miniball(this.pointSet); + } + + public class PointStorage implements PointSet { + + protected int dimension; + protected List<double[]> L; + + public PointStorage(int dimension) { + this.dimension = dimension; + this.L = new ArrayList<double[]>(); + } + + public void add(double[] array) { + this.L.add(array); + } + + public int size() { + return L.size(); + } + + public int dimension() { + return dimension; + } + + public double coord(int point, int coordinate) { + return L.get(point)[coordinate]; + } + } +}
