http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java deleted file mode 100644 index 1c31e26..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java +++ /dev/null @@ -1,198 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.List; -import com.yahoo.labs.samoa.moa.cluster.CFCluster; -import com.yahoo.labs.samoa.moa.cluster.Cluster; -import com.yahoo.labs.samoa.moa.cluster.Clustering; -import com.yahoo.labs.samoa.moa.cluster.SphereCluster; - -/** - * A kMeans implementation for microclusterings. For now it only uses the real centers of the groundtruthclustering for - * implementation. There should also be an option to use random centers. TODO: random centers TODO: Create a macro - * clustering interface to make different macro clustering algorithms available to micro clustering algorithms like - * clustream, denstream and clustree - * - */ -public class KMeans { - - /** - * This kMeans implementation clusters a big number of microclusters into a smaller amount of macro clusters. To make - * it comparable to other algorithms it uses the real centers of the ground truth macro clustering to have the best - * possible initialization. The quality of resulting macro clustering yields an upper bound for kMeans on the - * underlying microclustering. - * - * @param centers - * of the ground truth clustering - * @param data - * list of microclusters - * @return - */ - public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> data) { - int k = centers.length; - - int dimensions = centers[0].getCenter().length; - - ArrayList<ArrayList<Cluster>> clustering = - new ArrayList<ArrayList<Cluster>>(); - for (int i = 0; i < k; i++) { - clustering.add(new ArrayList<Cluster>()); - } - - int repetitions = 100; - while (repetitions-- >= 0) { - // Assign points to clusters - for (Cluster point : data) { - double minDistance = distance(point.getCenter(), centers[0].getCenter()); - int closestCluster = 0; - for (int i = 1; i < k; i++) { - double distance = distance(point.getCenter(), centers[i].getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - clustering.get(closestCluster).add(point); - } - - // Calculate new centers and clear clustering lists - SphereCluster[] newCenters = new SphereCluster[centers.length]; - for (int i = 0; i < k; i++) { - newCenters[i] = calculateCenter(clustering.get(i), dimensions); - clustering.get(i).clear(); - } - centers = newCenters; - } - - return new Clustering(centers); - } - - private static double distance(double[] pointA, double[] pointB) { - double distance = 0.0; - for (int i = 0; i < pointA.length; i++) { - double d = pointA[i] - pointB[i]; - distance += d * d; - } - return Math.sqrt(distance); - } - - private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int dimensions) { - double[] res = new double[dimensions]; - for (int i = 0; i < res.length; i++) { - res[i] = 0.0; - } - - if (cluster.size() == 0) { - return new SphereCluster(res, 0.0); - } - - for (Cluster point : cluster) { - double[] center = point.getCenter(); - for (int i = 0; i < res.length; i++) { - res[i] += center[i]; - } - } - - // Normalize - for (int i = 0; i < res.length; i++) { - res[i] /= cluster.size(); - } - - // Calculate radius - double radius = 0.0; - for (Cluster point : cluster) { - double dist = distance(res, point.getCenter()); - if (dist > radius) { - radius = dist; - } - } - - return new SphereCluster(res, radius); - } - - public static Clustering gaussianMeans(Clustering gtClustering, Clustering clustering) { - ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); - for (int i = 0; i < clustering.size(); i++) { - if (clustering.get(i) instanceof CFCluster) { - microclusters.add((CFCluster) clustering.get(i)); - } - else { - System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() - + ". Cluster needs to extend moa.cluster.CFCluster"); - } - } - Cluster[] centers = new Cluster[gtClustering.size()]; - for (int i = 0; i < centers.length; i++) { - centers[i] = gtClustering.get(i); - - } - - int k = centers.length; - if (microclusters.size() < k) { - return new Clustering(new Cluster[0]); - } - - Clustering kMeansResult = kMeans(centers, microclusters); - - k = kMeansResult.size(); - CFCluster[] res = new CFCluster[k]; - - for (CFCluster microcluster : microclusters) { - // Find closest kMeans cluster - double minDistance = Double.MAX_VALUE; - int closestCluster = 0; - for (int i = 0; i < k; i++) { - double distance = distance(kMeansResult.get(i).getCenter(), microcluster.getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - // Add to cluster - if (res[closestCluster] == null) { - res[closestCluster] = (CFCluster) microcluster.copy(); - } else { - res[closestCluster].add(microcluster); - } - } - - // Clean up res - int count = 0; - for (int i = 0; i < res.length; i++) { - if (res[i] != null) - ++count; - } - - CFCluster[] cleaned = new CFCluster[count]; - count = 0; - for (int i = 0; i < res.length; i++) { - if (res[i] != null) - cleaned[count++] = res[i]; - } - - return new Clustering(cleaned); - } - -}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java deleted file mode 100644 index 498bee0..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java +++ /dev/null @@ -1,331 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers.clustream; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; -import com.yahoo.labs.samoa.moa.cluster.Cluster; -import com.yahoo.labs.samoa.moa.cluster.Clustering; -import com.yahoo.labs.samoa.moa.cluster.SphereCluster; -import com.yahoo.labs.samoa.moa.clusterers.AbstractClusterer; -import com.yahoo.labs.samoa.moa.core.Measurement; -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.instances.DenseInstance; -import com.yahoo.labs.samoa.instances.Instance; - -/** - * Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip S. Yu: A Framework for Clustering Evolving - * Data Streams. VLDB 2003: 81-92 - */ -public class Clustream extends AbstractClusterer { - - private static final long serialVersionUID = 1L; - - public IntOption timeWindowOption = new IntOption("horizon", - 'h', "Rang of the window.", 1000); - - public IntOption maxNumKernelsOption = new IntOption( - "maxNumKernels", 'k', - "Maximum number of micro kernels to use.", 100); - - public IntOption kernelRadiFactorOption = new IntOption( - "kernelRadiFactor", 't', - "Multiplier for the kernel radius", 2); - - private int timeWindow; - private long timestamp = -1; - private ClustreamKernel[] kernels; - private boolean initialized; - private List<ClustreamKernel> buffer; // Buffer for initialization with kNN - private int bufferSize; - private double t; - private int m; - - public Clustream() { - } - - @Override - public void resetLearningImpl() { - this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; - this.timeWindow = timeWindowOption.getValue(); - this.initialized = false; - this.buffer = new LinkedList<>(); - this.bufferSize = maxNumKernelsOption.getValue(); - t = kernelRadiFactorOption.getValue(); - m = maxNumKernelsOption.getValue(); - } - - @Override - public void trainOnInstanceImpl(Instance instance) { - int dim = instance.numValues(); - timestamp++; - // 0. Initialize - if (!initialized) { - if (buffer.size() < bufferSize) { - buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); - return; - } - - int k = kernels.length; - // System.err.println("k="+k+" bufferSize="+bufferSize); - assert (k <= bufferSize); - - ClustreamKernel[] centers = new ClustreamKernel[k]; - for (int i = 0; i < k; i++) { - centers[i] = buffer.get(i); // TODO: make random! - } - Clustering kmeans_clustering = kMeans(k, centers, buffer); - // Clustering kmeans_clustering = kMeans(k, buffer); - - for (int i = 0; i < kmeans_clustering.size(); i++) { - kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t, m); - } - - buffer.clear(); - initialized = true; - return; - } - - // 1. Determine closest kernel - ClustreamKernel closestKernel = null; - double minDistance = Double.MAX_VALUE; - for (ClustreamKernel kernel : kernels) { - // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); - double distance = distance(instance.toDoubleArray(), kernel.getCenter()); - if (distance < minDistance) { - closestKernel = kernel; - minDistance = distance; - } - } - - // 2. Check whether instance fits into closestKernel - double radius; - if (closestKernel != null && closestKernel.getWeight() == 1) { - // Special case: estimate radius by determining the distance to the - // next closest cluster - radius = Double.MAX_VALUE; - double[] center = closestKernel.getCenter(); - for (ClustreamKernel kernel : kernels) { - if (kernel == closestKernel) { - continue; - } - - double distance = distance(kernel.getCenter(), center); - radius = Math.min(distance, radius); - } - } else { - radius = closestKernel.getRadius(); - } - - if (minDistance < radius) { - // Date fits, put into kernel and be happy - closestKernel.insert(instance, timestamp); - return; - } - - // 3. Date does not fit, we need to free - // some space to insert a new kernel - long threshold = timestamp - timeWindow; // Kernels before this can be forgotten - - // 3.1 Try to forget old kernels - for (int i = 0; i < kernels.length; i++) { - if (kernels[i].getRelevanceStamp() < threshold) { - kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m); - return; - } - } - - // 3.2 Merge closest two kernels - int closestA = 0; - int closestB = 0; - minDistance = Double.MAX_VALUE; - for (int i = 0; i < kernels.length; i++) { - double[] centerA = kernels[i].getCenter(); - for (int j = i + 1; j < kernels.length; j++) { - double dist = distance(centerA, kernels[j].getCenter()); - if (dist < minDistance) { - minDistance = dist; - closestA = i; - closestB = j; - } - } - } - assert (closestA != closestB); - - kernels[closestA].add(kernels[closestB]); - kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m); - } - - @Override - public Clustering getMicroClusteringResult() { - if (!initialized) { - return new Clustering(new Cluster[0]); - } - - ClustreamKernel[] res = new ClustreamKernel[kernels.length]; - for (int i = 0; i < res.length; i++) { - res[i] = new ClustreamKernel(kernels[i], t, m); - } - - return new Clustering(res); - } - - @Override - public boolean implementsMicroClusterer() { - return true; - } - - @Override - public Clustering getClusteringResult() { - return null; - } - - public String getName() { - return "Clustream " + timeWindow; - } - - private static double distance(double[] pointA, double[] pointB) { - double distance = 0.0; - for (int i = 0; i < pointA.length; i++) { - double d = pointA[i] - pointB[i]; - distance += d * d; - } - return Math.sqrt(distance); - } - - // wrapper... we need to rewrite kmeans to points, not clusters, doesnt make - // sense anymore - // public static Clustering kMeans( int k, ArrayList<Instance> points, int dim - // ) { - // ArrayList<ClustreamKernel> cl = new ArrayList<ClustreamKernel>(); - // for(Instance inst : points){ - // cl.add(new ClustreamKernel(inst, dim , 0, 0, 0)); - // } - // Clustering clustering = kMeans(k, cl); - // return clustering; - // } - - public static Clustering kMeans(int k, List<? extends Cluster> data) { - Random random = new Random(0); - Cluster[] centers = new Cluster[k]; - for (int i = 0; i < centers.length; i++) { - int rid = random.nextInt(k); - centers[i] = new SphereCluster(data.get(rid).getCenter(), 0); - } - return kMeans(k, centers, data); - } - - public static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { - assert (centers.length == k); - assert (k > 0); - - int dimensions = centers[0].getCenter().length; - - ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>(); - for (int i = 0; i < k; i++) { - clustering.add(new ArrayList<Cluster>()); - } - - int repetitions = 100; - while (repetitions-- >= 0) { - // Assign points to clusters - for (Cluster point : data) { - double minDistance = distance(point.getCenter(), centers[0].getCenter()); - int closestCluster = 0; - for (int i = 1; i < k; i++) { - double distance = distance(point.getCenter(), centers[i].getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - clustering.get(closestCluster).add(point); - } - - // Calculate new centers and clear clustering lists - SphereCluster[] newCenters = new SphereCluster[centers.length]; - for (int i = 0; i < k; i++) { - newCenters[i] = calculateCenter(clustering.get(i), dimensions); - clustering.get(i).clear(); - } - centers = newCenters; - } - - return new Clustering(centers); - } - - private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int dimensions) { - double[] res = new double[dimensions]; - for (int i = 0; i < res.length; i++) { - res[i] = 0.0; - } - - if (cluster.size() == 0) { - return new SphereCluster(res, 0.0); - } - - for (Cluster point : cluster) { - double[] center = point.getCenter(); - for (int i = 0; i < res.length; i++) { - res[i] += center[i]; - } - } - - // Normalize - for (int i = 0; i < res.length; i++) { - res[i] /= cluster.size(); - } - - // Calculate radius - double radius = 0.0; - for (Cluster point : cluster) { - double dist = distance(res, point.getCenter()); - if (dist > radius) { - radius = dist; - } - } - SphereCluster sc = new SphereCluster(res, radius); - sc.setWeight(cluster.size()); - return sc; - } - - @Override - protected Measurement[] getModelMeasurementsImpl() { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public void getModelDescription(StringBuilder out, int indent) { - throw new UnsupportedOperationException("Not supported yet."); - } - - public boolean isRandomizable() { - return false; - } - - public double[] getVotesForInstance(Instance inst) { - throw new UnsupportedOperationException("Not supported yet."); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java deleted file mode 100644 index 2a8f450..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java +++ /dev/null @@ -1,271 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers.clustream; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ -import java.util.List; - -import com.yahoo.labs.samoa.instances.Instance; -import com.yahoo.labs.samoa.moa.cluster.CFCluster; - -public class ClustreamKernel extends CFCluster { - private static final long serialVersionUID = 1L; - - private final static double EPSILON = 0.00005; - public static final double MIN_VARIANCE = 1e-50; - - protected double LST; - protected double SST; - - int m; - double t; - - public ClustreamKernel(Instance instance, int dimensions, long timestamp, double t, int m) { - super(instance, dimensions); - this.t = t; - this.m = m; - this.LST = timestamp; - this.SST = timestamp * timestamp; - } - - public ClustreamKernel(ClustreamKernel cluster, double t, int m) { - super(cluster); - this.t = t; - this.m = m; - this.LST = cluster.LST; - this.SST = cluster.SST; - } - - public void insert(Instance instance, long timestamp) { - N++; - LST += timestamp; - SST += timestamp * timestamp; - - for (int i = 0; i < instance.numValues(); i++) { - LS[i] += instance.value(i); - SS[i] += instance.value(i) * instance.value(i); - } - } - - @Override - public void add(CFCluster other2) { - ClustreamKernel other = (ClustreamKernel) other2; - assert (other.LS.length == this.LS.length); - this.N += other.N; - this.LST += other.LST; - this.SST += other.SST; - - for (int i = 0; i < LS.length; i++) { - this.LS[i] += other.LS[i]; - this.SS[i] += other.SS[i]; - } - } - - public double getRelevanceStamp() { - if (N < 2 * m) - return getMuTime(); - - return getMuTime() + getSigmaTime() * getQuantile(((double) m) / (2 * N)); - } - - private double getMuTime() { - return LST / N; - } - - private double getSigmaTime() { - return Math.sqrt(SST / N - (LST / N) * (LST / N)); - } - - private double getQuantile(double z) { - assert (z >= 0 && z <= 1); - return Math.sqrt(2) * inverseError(2 * z - 1); - } - - @Override - public double getRadius() { - // trivial cluster - if (N == 1) - return 0; - if (t == 1) - t = 1; - - return getDeviation() * radiusFactor; - } - - @Override - public CFCluster getCF() { - return this; - } - - private double getDeviation() { - double[] variance = getVarianceVector(); - double sumOfDeviation = 0.0; - for (double aVariance : variance) { - double d = Math.sqrt(aVariance); - sumOfDeviation += d; - } - return sumOfDeviation / variance.length; - } - - /** - * @return this kernels' center - */ - @Override - public double[] getCenter() { - assert (!this.isEmpty()); - double res[] = new double[this.LS.length]; - for (int i = 0; i < res.length; i++) { - res[i] = this.LS[i] / N; - } - return res; - } - - /** - * See interface <code>Cluster</code> - * - * @param instance - * @return double value - */ - @Override - public double getInclusionProbability(Instance instance) { - // trivial cluster - if (N == 1) { - double distance = 0.0; - for (int i = 0; i < LS.length; i++) { - double d = LS[i] - instance.value(i); - distance += d * d; - } - distance = Math.sqrt(distance); - if (distance < EPSILON) - return 1.0; - return 0.0; - } - else { - double dist = calcNormalizedDistance(instance.toDoubleArray()); - if (dist <= getRadius()) { - return 1; - } - else { - return 0; - } - // double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); - // return res; - } - } - - private double[] getVarianceVector() { - double[] res = new double[this.LS.length]; - for (int i = 0; i < this.LS.length; i++) { - double ls = this.LS[i]; - double ss = this.SS[i]; - - double lsDivN = ls / this.getWeight(); - double lsDivNSquared = lsDivN * lsDivN; - double ssDivN = ss / this.getWeight(); - res[i] = ssDivN - lsDivNSquared; - - // Due to numerical errors, small negative values can occur. - // We correct this by settings them to almost zero. - if (res[i] <= 0.0) { - if (res[i] > -EPSILON) { - res[i] = MIN_VARIANCE; - } - } - } - return res; - } - - /** - * Check if this cluster is empty or not. - * - * @return <code>true</code> if the cluster has no data points, <code>false</code> otherwise. - */ - public boolean isEmpty() { - return this.N == 0; - } - - /** - * Calculate the normalized euclidean distance (Mahalanobis distance for distribution w/o covariances) to a point. - * - * @param point - * The point to which the distance is calculated. - * @return The normalized distance to the cluster center. - * - * TODO: check whether WEIGHTING is correctly applied to variances - */ - // ??????? - private double calcNormalizedDistance(double[] point) { - double[] center = getCenter(); - double res = 0.0; - - for (int i = 0; i < center.length; i++) { - double diff = center[i] - point[i]; - res += (diff * diff);// variance[i]; - } - return Math.sqrt(res); - } - - /** - * Approximates the inverse error function. Clustream needs this. - * - * @param x - */ - public static double inverseError(double x) { - double z = Math.sqrt(Math.PI) * x; - double res = (z) / 2; - - double z2 = z * z; - double zProd = z * z2; // z^3 - res += (1.0 / 24) * zProd; - - zProd *= z2; // z^5 - res += (7.0 / 960) * zProd; - - zProd *= z2; // z^7 - res += (127 * zProd) / 80640; - - zProd *= z2; // z^9 - res += (4369 * zProd) / 11612160; - - zProd *= z2; // z^11 - res += (34807 * zProd) / 364953600; - - zProd *= z2; // z^13 - res += (20036983 * zProd) / 797058662400d; - - return res; - } - - @Override - protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { - super.getClusterSpecificInfo(infoTitle, infoValue); - infoTitle.add("Deviation"); - - double[] variance = getVarianceVector(); - double sumOfDeviation = 0.0; - for (double aVariance : variance) { - double d = Math.sqrt(aVariance); - sumOfDeviation += d; - } - - sumOfDeviation /= variance.length; - - infoValue.add(Double.toString(sumOfDeviation)); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java deleted file mode 100644 index 08e16ae..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java +++ /dev/null @@ -1,465 +0,0 @@ -package com.yahoo.labs.samoa.moa.clusterers.clustream; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; - -import com.yahoo.labs.samoa.moa.cluster.CFCluster; -import com.yahoo.labs.samoa.moa.cluster.Cluster; -import com.yahoo.labs.samoa.moa.cluster.Clustering; -import com.yahoo.labs.samoa.moa.cluster.SphereCluster; -import com.yahoo.labs.samoa.moa.clusterers.AbstractClusterer; -import com.yahoo.labs.samoa.moa.core.Measurement; -import com.github.javacliparser.IntOption; -import com.yahoo.labs.samoa.instances.DenseInstance; -import com.yahoo.labs.samoa.instances.Instance; - -public class WithKmeans extends AbstractClusterer { - - private static final long serialVersionUID = 1L; - - public IntOption timeWindowOption = new IntOption("horizon", - 'h', "Rang of the window.", 1000); - - public IntOption maxNumKernelsOption = new IntOption( - "maxNumKernels", 'm', - "Maximum number of micro kernels to use.", 100); - - public IntOption kernelRadiFactorOption = new IntOption( - "kernelRadiFactor", 't', - "Multiplier for the kernel radius", 2); - - public IntOption kOption = new IntOption( - "k", 'k', - "k of macro k-means (number of clusters)", 5); - - private int timeWindow; - private long timestamp = -1; - private ClustreamKernel[] kernels; - private boolean initialized; - private List<ClustreamKernel> buffer; // Buffer for initialization with kNN - private int bufferSize; - private double t; - private int m; - - public WithKmeans() { - - } - - @Override - public void resetLearningImpl() { - this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; - this.timeWindow = timeWindowOption.getValue(); - this.initialized = false; - this.buffer = new LinkedList<ClustreamKernel>(); - this.bufferSize = maxNumKernelsOption.getValue(); - t = kernelRadiFactorOption.getValue(); - m = maxNumKernelsOption.getValue(); - } - - @Override - public void trainOnInstanceImpl(Instance instance) { - int dim = instance.numValues(); - timestamp++; - // 0. Initialize - if (!initialized) { - if (buffer.size() < bufferSize) { - buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); - return; - } else { - for (int i = 0; i < buffer.size(); i++) { - kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim, timestamp, t, m); - } - - buffer.clear(); - initialized = true; - return; - } - } - - // 1. Determine closest kernel - ClustreamKernel closestKernel = null; - double minDistance = Double.MAX_VALUE; - for (int i = 0; i < kernels.length; i++) { - // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); - double distance = distance(instance.toDoubleArray(), kernels[i].getCenter()); - if (distance < minDistance) { - closestKernel = kernels[i]; - minDistance = distance; - } - } - - // 2. Check whether instance fits into closestKernel - double radius = 0.0; - if (closestKernel.getWeight() == 1) { - // Special case: estimate radius by determining the distance to the - // next closest cluster - radius = Double.MAX_VALUE; - double[] center = closestKernel.getCenter(); - for (int i = 0; i < kernels.length; i++) { - if (kernels[i] == closestKernel) { - continue; - } - - double distance = distance(kernels[i].getCenter(), center); - radius = Math.min(distance, radius); - } - } else { - radius = closestKernel.getRadius(); - } - - if (minDistance < radius) { - // Date fits, put into kernel and be happy - closestKernel.insert(instance, timestamp); - return; - } - - // 3. Date does not fit, we need to free - // some space to insert a new kernel - long threshold = timestamp - timeWindow; // Kernels before this can be forgotten - - // 3.1 Try to forget old kernels - for (int i = 0; i < kernels.length; i++) { - if (kernels[i].getRelevanceStamp() < threshold) { - kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m); - return; - } - } - - // 3.2 Merge closest two kernels - int closestA = 0; - int closestB = 0; - minDistance = Double.MAX_VALUE; - for (int i = 0; i < kernels.length; i++) { - double[] centerA = kernels[i].getCenter(); - for (int j = i + 1; j < kernels.length; j++) { - double dist = distance(centerA, kernels[j].getCenter()); - if (dist < minDistance) { - minDistance = dist; - closestA = i; - closestB = j; - } - } - } - assert (closestA != closestB); - - kernels[closestA].add(kernels[closestB]); - kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m); - } - - @Override - public Clustering getMicroClusteringResult() { - if (!initialized) { - return new Clustering(new Cluster[0]); - } - - ClustreamKernel[] result = new ClustreamKernel[kernels.length]; - for (int i = 0; i < result.length; i++) { - result[i] = new ClustreamKernel(kernels[i], t, m); - } - - return new Clustering(result); - } - - @Override - public Clustering getClusteringResult() { - return kMeans_rand(kOption.getValue(), getMicroClusteringResult()); - } - - public Clustering getClusteringResult(Clustering gtClustering) { - return kMeans_gta(kOption.getValue(), getMicroClusteringResult(), gtClustering); - } - - public String getName() { - return "CluStreamWithKMeans " + timeWindow; - } - - /** - * Distance between two vectors. - * - * @param pointA - * @param pointB - * @return dist - */ - private static double distance(double[] pointA, double[] pointB) { - double distance = 0.0; - for (int i = 0; i < pointA.length; i++) { - double d = pointA[i] - pointB[i]; - distance += d * d; - } - return Math.sqrt(distance); - } - - /** - * k-means of (micro)clusters, with ground-truth-aided initialization. (to produce best results) - * - * @param k - * @param data - * @return (macro)clustering - CFClusters - */ - public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtClustering) { - - ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); - for (int i = 0; i < clustering.size(); i++) { - if (clustering.get(i) instanceof CFCluster) { - microclusters.add((CFCluster) clustering.get(i)); - } else { - System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() - + ". Cluster needs to extend moa.cluster.CFCluster"); - } - } - - int n = microclusters.size(); - assert (k <= n); - - /* k-means */ - Random random = new Random(0); - Cluster[] centers = new Cluster[k]; - int K = gtClustering.size(); - - for (int i = 0; i < k; i++) { - if (i < K) { // GT-aided - centers[i] = new SphereCluster(gtClustering.get(i).getCenter(), 0); - } else { // Randomized - int rid = random.nextInt(n); - centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); - } - } - - return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); - } - - /** - * k-means of (micro)clusters, with randomized initialization. - * - * @param k - * @param data - * @return (macro)clustering - CFClusters - */ - public static Clustering kMeans_rand(int k, Clustering clustering) { - - ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); - for (int i = 0; i < clustering.size(); i++) { - if (clustering.get(i) instanceof CFCluster) { - microclusters.add((CFCluster) clustering.get(i)); - } else { - System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() - + ". Cluster needs to extend moa.cluster.CFCluster"); - } - } - - int n = microclusters.size(); - assert (k <= n); - - /* k-means */ - Random random = new Random(0); - Cluster[] centers = new Cluster[k]; - - for (int i = 0; i < k; i++) { - int rid = random.nextInt(n); - centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); - } - - return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); - } - - /** - * (The Actual Algorithm) k-means of (micro)clusters, with specified initialization points. - * - * @param k - * @param centers - * - initial centers - * @param data - * @return (macro)clustering - SphereClusters - */ - protected static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { - assert (centers.length == k); - assert (k > 0); - - int dimensions = centers[0].getCenter().length; - - ArrayList<ArrayList<Cluster>> clustering = new ArrayList<ArrayList<Cluster>>(); - for (int i = 0; i < k; i++) { - clustering.add(new ArrayList<Cluster>()); - } - - while (true) { - // Assign points to clusters - for (Cluster point : data) { - double minDistance = distance(point.getCenter(), centers[0].getCenter()); - int closestCluster = 0; - for (int i = 1; i < k; i++) { - double distance = distance(point.getCenter(), centers[i].getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - clustering.get(closestCluster).add(point); - } - - // Calculate new centers and clear clustering lists - SphereCluster[] newCenters = new SphereCluster[centers.length]; - for (int i = 0; i < k; i++) { - newCenters[i] = calculateCenter(clustering.get(i), dimensions); - clustering.get(i).clear(); - } - - // Convergence check - boolean converged = true; - for (int i = 0; i < k; i++) { - if (!Arrays.equals(centers[i].getCenter(), newCenters[i].getCenter())) { - converged = false; - break; - } - } - - if (converged) { - break; - } else { - centers = newCenters; - } - } - - return new Clustering(centers); - } - - /** - * Rearrange the k-means result into a set of CFClusters, cleaning up the redundancies. - * - * @param kMeansResult - * @param microclusters - * @return - */ - protected static Clustering cleanUpKMeans(Clustering kMeansResult, ArrayList<CFCluster> microclusters) { - /* Convert k-means result to CFClusters */ - int k = kMeansResult.size(); - CFCluster[] converted = new CFCluster[k]; - - for (CFCluster mc : microclusters) { - // Find closest kMeans cluster - double minDistance = Double.MAX_VALUE; - int closestCluster = 0; - for (int i = 0; i < k; i++) { - double distance = distance(kMeansResult.get(i).getCenter(), mc.getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - // Add to cluster - if (converted[closestCluster] == null) { - converted[closestCluster] = (CFCluster) mc.copy(); - } else { - converted[closestCluster].add(mc); - } - } - - // Clean up - int count = 0; - for (int i = 0; i < converted.length; i++) { - if (converted[i] != null) - count++; - } - - CFCluster[] cleaned = new CFCluster[count]; - count = 0; - for (int i = 0; i < converted.length; i++) { - if (converted[i] != null) - cleaned[count++] = converted[i]; - } - - return new Clustering(cleaned); - } - - /** - * k-means helper: Calculate a wrapping cluster of assigned points[microclusters]. - * - * @param assigned - * @param dimensions - * @return SphereCluster (with center and radius) - */ - private static SphereCluster calculateCenter(ArrayList<Cluster> assigned, int dimensions) { - double[] result = new double[dimensions]; - for (int i = 0; i < result.length; i++) { - result[i] = 0.0; - } - - if (assigned.size() == 0) { - return new SphereCluster(result, 0.0); - } - - for (Cluster point : assigned) { - double[] center = point.getCenter(); - for (int i = 0; i < result.length; i++) { - result[i] += center[i]; - } - } - - // Normalize - for (int i = 0; i < result.length; i++) { - result[i] /= assigned.size(); - } - - // Calculate radius: biggest wrapping distance from center - double radius = 0.0; - for (Cluster point : assigned) { - double dist = distance(result, point.getCenter()); - if (dist > radius) { - radius = dist; - } - } - SphereCluster sc = new SphereCluster(result, radius); - sc.setWeight(assigned.size()); - return sc; - } - - /** Miscellaneous **/ - - @Override - public boolean implementsMicroClusterer() { - return true; - } - - public boolean isRandomizable() { - return false; - } - - public double[] getVotesForInstance(Instance inst) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - protected Measurement[] getModelMeasurementsImpl() { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public void getModelDescription(StringBuilder out, int indent) { - throw new UnsupportedOperationException("Not supported yet."); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java deleted file mode 100644 index ef15bd5..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java +++ /dev/null @@ -1,196 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.net.URLClassLoader; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; - -/** - * Class for discovering classes via reflection in the java class path. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class AutoClassDiscovery { - - protected static final Map<String, String[]> cachedClassNames = new HashMap<String, String[]>(); - - public static String[] findClassNames(String packageNameToSearch) { - String[] cached = cachedClassNames.get(packageNameToSearch); - if (cached == null) { - HashSet<String> classNames = new HashSet<String>(); - /* - * StringTokenizer pathTokens = new StringTokenizer(System - * .getProperty("java.class.path"), File.pathSeparator); - */ - String packageDirName = packageNameToSearch.replace('.', - File.separatorChar); - String packageJarName = packageNameToSearch.length() > 0 ? (packageNameToSearch.replace('.', '/') + "/") - : ""; - String part = ""; - - AutoClassDiscovery adc = new AutoClassDiscovery(); - URLClassLoader sysLoader = (URLClassLoader) adc.getClass().getClassLoader(); - URL[] cl_urls = sysLoader.getURLs(); - - for (int i = 0; i < cl_urls.length; i++) { - part = cl_urls[i].toString(); - if (part.startsWith("file:")) { - part = part.replace(" ", "%20"); - try { - File temp = new File(new java.net.URI(part)); - part = temp.getAbsolutePath(); - } catch (URISyntaxException e) { - e.printStackTrace(); - } - } - - // find classes - ArrayList<File> files = new ArrayList<File>(); - File dir = new File(part); - if (dir.isDirectory()) { - File root = new File(dir.toString() + File.separatorChar + packageDirName); - String[] names = findClassesInDirectoryRecursive(root, ""); - classNames.addAll(Arrays.asList(names)); - } else { - try { - JarFile jar = new JarFile(part); - Enumeration<JarEntry> jarEntries = jar.entries(); - while (jarEntries.hasMoreElements()) { - String jarEntry = jarEntries.nextElement().getName(); - if (jarEntry.startsWith(packageJarName)) { - String relativeName = jarEntry.substring(packageJarName.length()); - if (relativeName.endsWith(".class")) { - relativeName = relativeName.replace('/', - '.'); - classNames.add(relativeName.substring(0, - relativeName.length() - - ".class".length())); - } - } - } - } catch (IOException ignored) { - // ignore unreadable files - } - } - } - - /* - * while (pathTokens.hasMoreElements()) { String pathToSearch = - * pathTokens.nextElement().toString(); if (pathToSearch.endsWith(".jar")) - * { try { JarFile jar = new JarFile(pathToSearch); Enumeration<JarEntry> - * jarEntries = jar.entries(); while (jarEntries.hasMoreElements()) { - * String jarEntry = jarEntries.nextElement() .getName(); if - * (jarEntry.startsWith(packageJarName)) { String relativeName = jarEntry - * .substring(packageJarName.length()); if - * (relativeName.endsWith(".class")) { relativeName = - * relativeName.replace('/', '.'); - * classNames.add(relativeName.substring(0, relativeName.length() - - * ".class".length())); } } } } catch (IOException ignored) { // ignore - * unreadable files } } else { File root = new File(pathToSearch + - * File.separatorChar + packageDirName); String[] names = - * findClassesInDirectoryRecursive(root, ""); for (String name : names) { - * classNames.add(name); } } } - */ - cached = classNames.toArray(new String[classNames.size()]); - Arrays.sort(cached); - cachedClassNames.put(packageNameToSearch, cached); - } - return cached; - } - - protected static String[] findClassesInDirectoryRecursive(File root, - String packagePath) { - HashSet<String> classNames = new HashSet<String>(); - if (root.isDirectory()) { - String[] list = root.list(); - for (String string : list) { - if (string.endsWith(".class")) { - classNames.add(packagePath - + string.substring(0, string.length() - - ".class".length())); - } else { - File testDir = new File(root.getPath() + File.separatorChar - + string); - if (testDir.isDirectory()) { - String[] names = findClassesInDirectoryRecursive( - testDir, packagePath + string + "."); - classNames.addAll(Arrays.asList(names)); - } - } - } - } - return classNames.toArray(new String[classNames.size()]); - } - - public static Class[] findClassesOfType(String packageNameToSearch, - Class<?> typeDesired) { - ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>(); - String[] classNames = findClassNames(packageNameToSearch); - for (String className : classNames) { - String fullName = packageNameToSearch.length() > 0 ? (packageNameToSearch - + "." + className) - : className; - if (isPublicConcreteClassOfType(fullName, typeDesired)) { - try { - classesFound.add(Class.forName(fullName)); - } catch (Exception ignored) { - // ignore classes that we cannot instantiate - } - } - } - return classesFound.toArray(new Class[classesFound.size()]); - } - - public static boolean isPublicConcreteClassOfType(String className, - Class<?> typeDesired) { - Class<?> testClass = null; - try { - testClass = Class.forName(className); - } catch (Exception e) { - return false; - } - int classModifiers = testClass.getModifiers(); - return (java.lang.reflect.Modifier.isPublic(classModifiers) - && !java.lang.reflect.Modifier.isAbstract(classModifiers) - && typeDesired.isAssignableFrom(testClass) && hasEmptyConstructor(testClass)); - } - - public static boolean hasEmptyConstructor(Class<?> type) { - try { - type.getConstructor(); - return true; - } catch (Exception ignored) { - return false; - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java deleted file mode 100644 index 7101f61..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java +++ /dev/null @@ -1,133 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.Collection; - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; -import com.yahoo.labs.samoa.moa.MOAObject; - -/** - * Vector with the capability of automatic expansion. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class AutoExpandVector<T> extends ArrayList<T> implements MOAObject { - - private static final long serialVersionUID = 1L; - - public AutoExpandVector() { - super(0); - } - - public AutoExpandVector(int size) { - super(size); - } - - @Override - public void add(int pos, T obj) { - if (pos > size()) { - while (pos > size()) { - add(null); - } - trimToSize(); - } - super.add(pos, obj); - } - - @Override - public T get(int pos) { - return ((pos >= 0) && (pos < size())) ? super.get(pos) : null; - } - - @Override - public T set(int pos, T obj) { - if (pos >= size()) { - add(pos, obj); - return null; - } - return super.set(pos, obj); - } - - @Override - public boolean add(T arg0) { - boolean result = super.add(arg0); - trimToSize(); - return result; - } - - @Override - public boolean addAll(Collection<? extends T> arg0) { - boolean result = super.addAll(arg0); - trimToSize(); - return result; - } - - @Override - public boolean addAll(int arg0, Collection<? extends T> arg1) { - boolean result = super.addAll(arg0, arg1); - trimToSize(); - return result; - } - - @Override - public void clear() { - super.clear(); - trimToSize(); - } - - @Override - public T remove(int arg0) { - T result = super.remove(arg0); - trimToSize(); - return result; - } - - @Override - public boolean remove(Object arg0) { - boolean result = super.remove(arg0); - trimToSize(); - return result; - } - - @Override - protected void removeRange(int arg0, int arg1) { - super.removeRange(arg0, arg1); - trimToSize(); - } - - @Override - public MOAObject copy() { - return AbstractMOAObject.copy(this); - } - - @Override - public int measureByteSize() { - return AbstractMOAObject.measureByteSize(this); - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java deleted file mode 100644 index 1e7dcc9..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java +++ /dev/null @@ -1,133 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.HashMap; -import java.util.Iterator; -import java.util.TreeSet; - -import com.yahoo.labs.samoa.instances.Attribute; -import com.yahoo.labs.samoa.instances.DenseInstance; -import com.yahoo.labs.samoa.instances.Instance; - -public class DataPoint extends DenseInstance { - - private static final long serialVersionUID = 1L; - - protected int timestamp; - private HashMap<String, String> measure_values; - - protected int noiseLabel; - - public DataPoint(Instance nextInstance, Integer timestamp) { - super(nextInstance); - this.setDataset(nextInstance.dataset()); - this.timestamp = timestamp; - measure_values = new HashMap<String, String>(); - - Attribute classLabel = dataset().classAttribute(); - noiseLabel = classLabel.indexOfValue("noise"); // -1 returned if there is no noise - } - - public void updateWeight(int cur_timestamp, double decay_rate) { - setWeight(Math.pow(2, (-1.0) * decay_rate * (cur_timestamp - timestamp))); - } - - public void setMeasureValue(String measureKey, double value) { - synchronized (measure_values) { - measure_values.put(measureKey, Double.toString(value)); - } - } - - public void setMeasureValue(String measureKey, String value) { - synchronized (measure_values) { - measure_values.put(measureKey, value); - } - } - - public String getMeasureValue(String measureKey) { - if (measure_values.containsKey(measureKey)) - synchronized (measure_values) { - return measure_values.get(measureKey); - } - else - return ""; - } - - public int getTimestamp() { - return timestamp; - } - - public String getInfo(int x_dim, int y_dim) { - StringBuffer sb = new StringBuffer(); - sb.append("<html><table>"); - sb.append("<tr><td>Point</td><td>" + timestamp + "</td></tr>"); - for (int i = 0; i < numAttributes() - 1; i++) { // m_AttValues.length - String label = "Dim " + i; - if (i == x_dim) - label = "<b>X</b>"; - if (i == y_dim) - label = "<b>Y</b>"; - sb.append("<tr><td>" + label + "</td><td>" + value(i) + "</td></tr>"); - } - sb.append("<tr><td>Decay</td><td>" + weight() + "</td></tr>"); - sb.append("<tr><td>True cluster</td><td>" + classValue() + "</td></tr>"); - sb.append("</table>"); - sb.append("<br>"); - sb.append("<b>Evaluation</b><br>"); - sb.append("<table>"); - - TreeSet<String> sortedset; - synchronized (measure_values) { - sortedset = new TreeSet<String>(measure_values.keySet()); - } - - Iterator miterator = sortedset.iterator(); - while (miterator.hasNext()) { - String key = (String) miterator.next(); - sb.append("<tr><td>" + key + "</td><td>" + measure_values.get(key) + "</td></tr>"); - } - - sb.append("</table></html>"); - return sb.toString(); - } - - public double getDistance(DataPoint other) { - double distance = 0.0; - int numDims = numAttributes(); - if (classIndex() != 0) - numDims--; - - for (int i = 0; i < numDims; i++) { - double d = value(i) - other.value(i); - distance += d * d; - } - return Math.sqrt(distance); - } - - public boolean isNoise() { - return (int) classValue() == noiseLabel; - } - - public double getNoiseLabel() { - return noiseLabel; - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DoubleVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DoubleVector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DoubleVector.java deleted file mode 100644 index 132e3fb..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DoubleVector.java +++ /dev/null @@ -1,195 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; - -/** - * Vector of double numbers with some utilities. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class DoubleVector extends AbstractMOAObject { - - private static final long serialVersionUID = 1L; - - protected double[] array; - - public DoubleVector() { - this.array = new double[0]; - } - - public DoubleVector(double[] toCopy) { - this.array = new double[toCopy.length]; - System.arraycopy(toCopy, 0, this.array, 0, toCopy.length); - } - - public DoubleVector(DoubleVector toCopy) { - this(toCopy.getArrayRef()); - } - - public int numValues() { - return this.array.length; - } - - public void setValue(int i, double v) { - if (i >= this.array.length) { - setArrayLength(i + 1); - } - this.array[i] = v; - } - - public void addToValue(int i, double v) { - if (i >= this.array.length) { - setArrayLength(i + 1); - } - this.array[i] += v; - } - - public void addValues(DoubleVector toAdd) { - addValues(toAdd.getArrayRef()); - } - - public void addValues(double[] toAdd) { - if (toAdd.length > this.array.length) { - setArrayLength(toAdd.length); - } - for (int i = 0; i < toAdd.length; i++) { - this.array[i] += toAdd[i]; - } - } - - public void subtractValues(DoubleVector toSubtract) { - subtractValues(toSubtract.getArrayRef()); - } - - public void subtractValues(double[] toSubtract) { - if (toSubtract.length > this.array.length) { - setArrayLength(toSubtract.length); - } - for (int i = 0; i < toSubtract.length; i++) { - this.array[i] -= toSubtract[i]; - } - } - - public void addToValues(double toAdd) { - for (int i = 0; i < this.array.length; i++) { - this.array[i] = this.array[i] + toAdd; - } - } - - public void scaleValues(double multiplier) { - for (int i = 0; i < this.array.length; i++) { - this.array[i] = this.array[i] * multiplier; - } - } - - // returns 0.0 for values outside of range - public double getValue(int i) { - return ((i >= 0) && (i < this.array.length)) ? this.array[i] : 0.0; - } - - public double sumOfValues() { - double sum = 0.0; - for (double element : this.array) { - sum += element; - } - return sum; - } - - public int maxIndex() { - int max = -1; - for (int i = 0; i < this.array.length; i++) { - if ((max < 0) || (this.array[i] > this.array[max])) { - max = i; - } - } - return max; - } - - public void normalize() { - scaleValues(1.0 / sumOfValues()); - } - - public int numNonZeroEntries() { - int count = 0; - for (double element : this.array) { - if (element != 0.0) { - count++; - } - } - return count; - } - - public double minWeight() { - if (this.array.length > 0) { - double min = this.array[0]; - for (int i = 1; i < this.array.length; i++) { - if (this.array[i] < min) { - min = this.array[i]; - } - } - return min; - } - return 0.0; - } - - public double[] getArrayCopy() { - double[] aCopy = new double[this.array.length]; - System.arraycopy(this.array, 0, aCopy, 0, this.array.length); - return aCopy; - } - - public double[] getArrayRef() { - return this.array; - } - - protected void setArrayLength(int l) { - double[] newArray = new double[l]; - int numToCopy = this.array.length; - if (numToCopy > l) { - numToCopy = l; - } - System.arraycopy(this.array, 0, newArray, 0, numToCopy); - this.array = newArray; - } - - public void getSingleLineDescription(StringBuilder out) { - getSingleLineDescription(out, numValues()); - } - - public void getSingleLineDescription(StringBuilder out, int numValues) { - out.append("{"); - for (int i = 0; i < numValues; i++) { - if (i > 0) { - out.append("|"); - } - out.append(StringUtils.doubleToString(getValue(i), 3)); - } - out.append("}"); - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - getSingleLineDescription(sb); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Example.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Example.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Example.java deleted file mode 100644 index fdf681b..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Example.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -public interface Example<T extends Object> { - - public T getData(); - - public double weight(); - - public void setWeight(double weight); -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/FastVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/FastVector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/FastVector.java deleted file mode 100644 index 140b698..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/FastVector.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * FastVector.java - - * - */ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; - -/** - * Simple extension of ArrayList. Exists for legacy reasons. - * - * @author Eibe Frank ([email protected]) - * @version $Revision: 8034 $ - */ -public class FastVector<E> extends ArrayList<E> { - - /** - * Adds an element to this vector. Increases its capacity if its not large enough. - * - * @param element - * the element to add - */ - public final void addElement(E element) { - add(element); - } - - /** - * Returns the element at the given position. - * - * @param index - * the element's index - * @return the element with the given index - */ - public final E elementAt(int index) { - return get(index); - } - - /** - * Deletes an element from this vector. - * - * @param index - * the index of the element to be deleted - */ - public final void removeElementAt(int index) { - remove(index); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GaussianEstimator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GaussianEstimator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GaussianEstimator.java deleted file mode 100644 index 14cca27..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GaussianEstimator.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; - -/** - * Gaussian incremental estimator that uses incremental method that is more resistant to floating point imprecision. for - * more info see Donald Knuth's "The Art of Computer Programming, Volume 2: Seminumerical Algorithms", section 4.2.2. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class GaussianEstimator extends AbstractMOAObject { - - private static final long serialVersionUID = 1L; - - protected double weightSum; - - protected double mean; - - protected double varianceSum; - - public static final double NORMAL_CONSTANT = Math.sqrt(2 * Math.PI); - - public void addObservation(double value, double weight) { - if (Double.isInfinite(value) || Double.isNaN(value)) { - return; - } - if (this.weightSum > 0.0) { - this.weightSum += weight; - double lastMean = this.mean; - this.mean += weight * (value - lastMean) / this.weightSum; - this.varianceSum += weight * (value - lastMean) * (value - this.mean); - } else { - this.mean = value; - this.weightSum = weight; - } - } - - public void addObservations(GaussianEstimator obs) { - // Follows Variance Combination Rule in Section 2 of - // Brian Babcock, Mayur Datar, Rajeev Motwani, Liadan O'Callaghan: - // Maintaining variance and k-medians over data stream windows. PODS 2003: - // 234-243 - // - if ((this.weightSum >= 0.0) && (obs.weightSum > 0.0)) { - double oldMean = this.mean; - this.mean = (this.mean * (this.weightSum / (this.weightSum + obs.weightSum))) - + (obs.mean * (obs.weightSum / (this.weightSum + obs.weightSum))); - this.varianceSum += obs.varianceSum + (this.weightSum * obs.weightSum / (this.weightSum + obs.weightSum) * - Math.pow(obs.mean - oldMean, 2)); - this.weightSum += obs.weightSum; - } - } - - public double getTotalWeightObserved() { - return this.weightSum; - } - - public double getMean() { - return this.mean; - } - - public double getStdDev() { - return Math.sqrt(getVariance()); - } - - public double getVariance() { - return this.weightSum > 1.0 ? this.varianceSum / (this.weightSum - 1.0) - : 0.0; - } - - public double probabilityDensity(double value) { - if (this.weightSum > 0.0) { - double stdDev = getStdDev(); - if (stdDev > 0.0) { - double diff = value - getMean(); - return (1.0 / (NORMAL_CONSTANT * stdDev)) - * Math.exp(-(diff * diff / (2.0 * stdDev * stdDev))); - } - return value == getMean() ? 1.0 : 0.0; - } - return 0.0; - } - - public double[] estimatedWeight_LessThan_EqualTo_GreaterThan_Value( - double value) { - double equalToWeight = probabilityDensity(value) * this.weightSum; - double stdDev = getStdDev(); - double lessThanWeight = stdDev > 0.0 ? com.yahoo.labs.samoa.moa.core.Statistics - .normalProbability((value - getMean()) / stdDev) - * this.weightSum - equalToWeight - : (value < getMean() ? this.weightSum - equalToWeight : 0.0); - double greaterThanWeight = this.weightSum - equalToWeight - - lessThanWeight; - if (greaterThanWeight < 0.0) { - greaterThanWeight = 0.0; - } - return new double[] { lessThanWeight, equalToWeight, greaterThanWeight }; - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GreenwaldKhannaQuantileSummary.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GreenwaldKhannaQuantileSummary.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GreenwaldKhannaQuantileSummary.java deleted file mode 100644 index 8054f96..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/GreenwaldKhannaQuantileSummary.java +++ /dev/null @@ -1,281 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; -import java.util.ArrayList; - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; - -/** - * Class for representing summaries of Greenwald and Khanna quantiles. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class GreenwaldKhannaQuantileSummary extends AbstractMOAObject { - - private static final long serialVersionUID = 1L; - - protected static class Tuple implements Serializable { - - private static final long serialVersionUID = 1L; - - public double v; - - public long g; - - public long delta; - - public Tuple(double v, long g, long delta) { - this.v = v; - this.g = g; - this.delta = delta; - } - - public Tuple(double v) { - this(v, 1, 0); - } - } - - protected Tuple[] summary; - - protected int numTuples = 0; - - protected long numObservations = 0; - - public GreenwaldKhannaQuantileSummary(int maxTuples) { - this.summary = new Tuple[maxTuples]; - } - - public void insert(double val) { - int i = findIndexOfTupleGreaterThan(val); - Tuple nextT = this.summary[i]; - if (nextT == null) { - insertTuple(new Tuple(val, 1, 0), i); - } else { - insertTuple(new Tuple(val, 1, nextT.g + nextT.delta - 1), i); - } - if (this.numTuples == this.summary.length) { - // use method 1 - deleteMergeableTupleMostFull(); - // if (mergeMethod == 1) { - // deleteMergeableTupleMostFull(); - // } else if (mergeMethod == 2) { - // deleteTupleMostFull(); - // } else { - // long maxDelta = findMaxDelta(); - // compress(maxDelta); - // while (numTuples == summary.length) { - // maxDelta++; - // compress(maxDelta); - // } - // } - } - this.numObservations++; - } - - protected void insertTuple(Tuple t, int index) { - System.arraycopy(this.summary, index, this.summary, index + 1, - this.numTuples - index); - this.summary[index] = t; - this.numTuples++; - } - - protected void deleteTuple(int index) { - this.summary[index] = new Tuple(this.summary[index + 1].v, - this.summary[index].g + this.summary[index + 1].g, - this.summary[index + 1].delta); - System.arraycopy(this.summary, index + 2, this.summary, index + 1, - this.numTuples - index - 2); - this.summary[this.numTuples - 1] = null; - this.numTuples--; - } - - protected void deleteTupleMostFull() { - long leastFullness = Long.MAX_VALUE; - int leastFullIndex = 0; - for (int i = 1; i < this.numTuples - 1; i++) { - long fullness = this.summary[i].g + this.summary[i + 1].g - + this.summary[i + 1].delta; - if (fullness < leastFullness) { - leastFullness = fullness; - leastFullIndex = i; - } - } - if (leastFullIndex > 0) { - deleteTuple(leastFullIndex); - } - } - - protected void deleteMergeableTupleMostFull() { - long leastFullness = Long.MAX_VALUE; - int leastFullIndex = 0; - for (int i = 1; i < this.numTuples - 1; i++) { - long fullness = this.summary[i].g + this.summary[i + 1].g - + this.summary[i + 1].delta; - if ((this.summary[i].delta >= this.summary[i + 1].delta) - && (fullness < leastFullness)) { - leastFullness = fullness; - leastFullIndex = i; - } - } - if (leastFullIndex > 0) { - deleteTuple(leastFullIndex); - } - } - - public long getWorstError() { - long mostFullness = 0; - for (int i = 1; i < this.numTuples - 1; i++) { - long fullness = this.summary[i].g + this.summary[i].delta; - if (fullness > mostFullness) { - mostFullness = fullness; - } - } - return mostFullness; - } - - public long findMaxDelta() { - long maxDelta = 0; - for (int i = 0; i < this.numTuples; i++) { - if (this.summary[i].delta > maxDelta) { - maxDelta = this.summary[i].delta; - } - } - return maxDelta; - } - - public void compress(long maxDelta) { - long[] bandBoundaries = computeBandBoundaries(maxDelta); - for (int i = this.numTuples - 2; i >= 0; i--) { - if (this.summary[i].delta >= this.summary[i + 1].delta) { - int band = 0; - while (this.summary[i].delta < bandBoundaries[band]) { - band++; - } - long belowBandThreshold = Long.MAX_VALUE; - if (band > 0) { - belowBandThreshold = bandBoundaries[band - 1]; - } - long mergeG = this.summary[i + 1].g + this.summary[i].g; - int childI = i - 1; - while (((mergeG + this.summary[i + 1].delta) < maxDelta) - && (childI >= 0) - && (this.summary[childI].delta >= belowBandThreshold)) { - mergeG += this.summary[childI].g; - childI--; - } - if (mergeG + this.summary[i + 1].delta < maxDelta) { - // merge - int numDeleted = i - childI; - this.summary[childI + 1] = new Tuple(this.summary[i + 1].v, - mergeG, this.summary[i + 1].delta); - // todo complete & test this multiple delete - System.arraycopy(this.summary, i + 2, this.summary, - childI + 2, this.numTuples - (i + 2)); - for (int j = this.numTuples - numDeleted; j < this.numTuples; j++) { - this.summary[j] = null; - } - this.numTuples -= numDeleted; - i = childI + 1; - } - } - } - } - - public double getQuantile(double quant) { - long r = (long) Math.ceil(quant * this.numObservations); - long currRank = 0; - for (int i = 0; i < this.numTuples - 1; i++) { - currRank += this.summary[i].g; - if (currRank + this.summary[i + 1].g > r) { - return this.summary[i].v; - } - } - return this.summary[this.numTuples - 1].v; - } - - public long getTotalCount() { - return this.numObservations; - } - - public double getPropotionBelow(double cutpoint) { - return (double) getCountBelow(cutpoint) / (double) this.numObservations; - } - - public long getCountBelow(double cutpoint) { - long rank = 0; - for (int i = 0; i < this.numTuples; i++) { - if (this.summary[i].v > cutpoint) { - break; - } - rank += this.summary[i].g; - } - return rank; - } - - public double[] getSuggestedCutpoints() { - double[] cutpoints = new double[this.numTuples]; - for (int i = 0; i < this.numTuples; i++) { - cutpoints[i] = this.summary[i].v; - } - return cutpoints; - } - - protected int findIndexOfTupleGreaterThan(double val) { - int high = this.numTuples, low = -1, probe; - while (high - low > 1) { - probe = (high + low) / 2; - if (this.summary[probe].v > val) { - high = probe; - } else { - low = probe; - } - } - return high; - } - - public static long[] computeBandBoundaries(long maxDelta) { - ArrayList<Long> boundaryList = new ArrayList<Long>(); - boundaryList.add(new Long(maxDelta)); - int alpha = 1; - while (true) { - long boundary = (maxDelta - (2 << (alpha - 1)) - (maxDelta % (2 << (alpha - 1)))); - if (boundary >= 0) { - boundaryList.add(new Long(boundary + 1)); - } else { - break; - } - alpha++; - } - boundaryList.add(new Long(0)); - long[] boundaries = new long[boundaryList.size()]; - for (int i = 0; i < boundaries.length; i++) { - boundaries[i] = boundaryList.get(i).longValue(); - } - return boundaries; - } - - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InputStreamProgressMonitor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InputStreamProgressMonitor.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InputStreamProgressMonitor.java deleted file mode 100644 index 703336e..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InputStreamProgressMonitor.java +++ /dev/null @@ -1,131 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; - -/** - * Class for monitoring the progress of reading an input stream. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class InputStreamProgressMonitor extends FilterInputStream implements Serializable { - - /** The number of bytes to read in total */ - protected int inputByteSize; - - /** The number of bytes read so far */ - protected int inputBytesRead; - - public InputStreamProgressMonitor(InputStream in) { - super(in); - try { - this.inputByteSize = in.available(); - } catch (IOException ioe) { - this.inputByteSize = 0; - } - this.inputBytesRead = 0; - } - - public int getBytesRead() { - return this.inputBytesRead; - } - - public int getBytesRemaining() { - return this.inputByteSize - this.inputBytesRead; - } - - public double getProgressFraction() { - return ((double) this.inputBytesRead / (double) this.inputByteSize); - } - - /* - * (non-Javadoc) - * - * @see java.io.InputStream#read() - */ - @Override - public int read() throws IOException { - int c = this.in.read(); - if (c > 0) { - this.inputBytesRead++; - } - return c; - } - - /* - * (non-Javadoc) - * - * @see java.io.InputStream#read(byte[]) - */ - @Override - public int read(byte[] b) throws IOException { - int numread = this.in.read(b); - if (numread > 0) { - this.inputBytesRead += numread; - } - return numread; - } - - /* - * (non-Javadoc) - * - * @see java.io.InputStream#read(byte[], int, int) - */ - @Override - public int read(byte[] b, int off, int len) throws IOException { - int numread = this.in.read(b, off, len); - if (numread > 0) { - this.inputBytesRead += numread; - } - return numread; - } - - /* - * (non-Javadoc) - * - * @see java.io.InputStream#skip(long) - */ - @Override - public long skip(long n) throws IOException { - long numskip = this.in.skip(n); - if (numskip > 0) { - this.inputBytesRead += numskip; - } - return numskip; - } - - /* - * (non-Javadoc) - * - * @see java.io.FilterInputStream#reset() - */ - @Override - public synchronized void reset() throws IOException { - this.in.reset(); - this.inputBytesRead = this.inputByteSize - this.in.available(); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InstanceExample.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InstanceExample.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InstanceExample.java deleted file mode 100644 index e1283d9..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InstanceExample.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.instances.Instance; -import java.io.Serializable; - -public class InstanceExample implements Example<Instance>, Serializable { - - public Instance instance; - - public InstanceExample(Instance inst) - { - this.instance = inst; - } - - @Override - public Instance getData() { - return this.instance; - } - - @Override - public double weight() { - return this.instance.weight(); - } - - @Override - public void setWeight(double w) { - this.instance.setWeight(w); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Measurement.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Measurement.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Measurement.java deleted file mode 100644 index 50f698b..0000000 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Measurement.java +++ /dev/null @@ -1,115 +0,0 @@ -package com.yahoo.labs.samoa.moa.core; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.ArrayList; -import java.util.List; - -import com.yahoo.labs.samoa.moa.AbstractMOAObject; - -/** - * Class for storing an evaluation measurement. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class Measurement extends AbstractMOAObject { - - private static final long serialVersionUID = 1L; - - protected String name; - - protected double value; - - public Measurement(String name, double value) { - this.name = name; - this.value = value; - } - - public String getName() { - return this.name; - } - - public double getValue() { - return this.value; - } - - public static Measurement getMeasurementNamed(String name, - Measurement[] measurements) { - for (Measurement measurement : measurements) { - if (name.equals(measurement.getName())) { - return measurement; - } - } - return null; - } - - public static void getMeasurementsDescription(Measurement[] measurements, - StringBuilder out, int indent) { - if (measurements.length > 0) { - StringUtils.appendIndented(out, indent, measurements[0].toString()); - for (int i = 1; i < measurements.length; i++) { - StringUtils.appendNewlineIndented(out, indent, measurements[i].toString()); - } - - } - } - - public static Measurement[] averageMeasurements(Measurement[][] toAverage) { - List<String> measurementNames = new ArrayList<String>(); - for (Measurement[] measurements : toAverage) { - for (Measurement measurement : measurements) { - if (measurementNames.indexOf(measurement.getName()) < 0) { - measurementNames.add(measurement.getName()); - } - } - } - GaussianEstimator[] estimators = new GaussianEstimator[measurementNames.size()]; - for (int i = 0; i < estimators.length; i++) { - estimators[i] = new GaussianEstimator(); - } - for (Measurement[] measurements : toAverage) { - for (Measurement measurement : measurements) { - estimators[measurementNames.indexOf(measurement.getName())].addObservation(measurement.getValue(), 1.0); - } - } - List<Measurement> averagedMeasurements = new ArrayList<Measurement>(); - for (int i = 0; i < measurementNames.size(); i++) { - String mName = measurementNames.get(i); - GaussianEstimator mEstimator = estimators[i]; - if (mEstimator.getTotalWeightObserved() > 1.0) { - averagedMeasurements.add(new Measurement("[avg] " + mName, - mEstimator.getMean())); - averagedMeasurements.add(new Measurement("[err] " + mName, - mEstimator.getStdDev() - / Math.sqrt(mEstimator.getTotalWeightObserved()))); - } - } - return averagedMeasurements.toArray(new Measurement[averagedMeasurements.size()]); - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - sb.append(getName()); - sb.append(" = "); - sb.append(StringUtils.doubleToString(getValue(), 3)); - } -}
