http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java index a9a891f..aa98307 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java @@ -1,4 +1,3 @@ - package com.yahoo.labs.samoa.moa.clusterers; /* @@ -29,174 +28,173 @@ import com.yahoo.labs.samoa.moa.cluster.Clustering; import com.yahoo.labs.samoa.moa.cluster.SphereCluster; /** - * A kMeans implementation for microclusterings. For now it only uses the real centers of the - * groundtruthclustering for implementation. There should also be an option to use random - * centers. - * TODO: random centers - * TODO: Create a macro clustering interface to make different macro clustering algorithms available - * to micro clustering algorithms like clustream, denstream and clustree - * + * A kMeans implementation for microclusterings. For now it only uses the real + * centers of the groundtruthclustering for implementation. There should also be + * an option to use random centers. TODO: random centers TODO: Create a macro + * clustering interface to make different macro clustering algorithms available + * to micro clustering algorithms like clustream, denstream and clustree + * */ public class KMeans { - /** - * This kMeans implementation clusters a big number of microclusters - * into a smaller amount of macro clusters. To make it comparable to other - * algorithms it uses the real centers of the ground truth macro clustering - * to have the best possible initialization. The quality of resulting - * macro clustering yields an upper bound for kMeans on the underlying - * microclustering. - * - * @param centers of the ground truth clustering - * @param data list of microclusters - * @return - */ - public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> data ) { - int k = centers.length; - - int dimensions = centers[0].getCenter().length; - - ArrayList<ArrayList<Cluster>> clustering = - new ArrayList<ArrayList<Cluster>>(); - for ( int i = 0; i < k; i++ ) { - clustering.add( new ArrayList<Cluster>() ); - } - - int repetitions = 100; - while ( repetitions-- >= 0 ) { - // Assign points to clusters - for ( Cluster point : data ) { - double minDistance = distance( point.getCenter(), centers[0].getCenter() ); - int closestCluster = 0; - for ( int i = 1; i < k; i++ ) { - double distance = distance( point.getCenter(), centers[i].getCenter() ); - if ( distance < minDistance ) { - closestCluster = i; - minDistance = distance; - } - } - - clustering.get( closestCluster ).add( point ); - } - - // Calculate new centers and clear clustering lists - SphereCluster[] newCenters = new SphereCluster[centers.length]; - for ( int i = 0; i < k; i++ ) { - newCenters[i] = calculateCenter( clustering.get( i ), dimensions ); - clustering.get( i ).clear(); - } - centers = newCenters; - } - - return new Clustering( centers ); + /** + * This kMeans implementation clusters a big number of microclusters into a + * smaller amount of macro clusters. To make it comparable to other algorithms + * it uses the real centers of the ground truth macro clustering to have the + * best possible initialization. The quality of resulting macro clustering + * yields an upper bound for kMeans on the underlying microclustering. + * + * @param centers + * of the ground truth clustering + * @param data + * list of microclusters + * @return + */ + public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> data) { + int k = centers.length; + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = + new ArrayList<ArrayList<Cluster>>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); } - private static double distance(double[] pointA, double [] pointB){ - double distance = 0.0; - for (int i = 0; i < pointA.length; i++) { - double d = pointA[i] - pointB[i]; - distance += d * d; + int repetitions = 100; + while (repetitions-- >= 0) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } } - return Math.sqrt(distance); + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + centers = newCenters; } + return new Clustering(centers); + } - private static SphereCluster calculateCenter( ArrayList<Cluster> cluster, int dimensions ) { - double[] res = new double[dimensions]; - for ( int i = 0; i < res.length; i++ ) { - res[i] = 0.0; - } - - if ( cluster.size() == 0 ) { - return new SphereCluster( res, 0.0 ); - } - - for ( Cluster point : cluster ) { - double [] center = point.getCenter(); - for (int i = 0; i < res.length; i++) { - res[i] += center[i]; - } - } - - // Normalize - for ( int i = 0; i < res.length; i++ ) { - res[i] /= cluster.size(); - } - - // Calculate radius - double radius = 0.0; - for ( Cluster point : cluster ) { - double dist = distance( res, point.getCenter() ); - if ( dist > radius ) { - radius = dist; - } - } - - return new SphereCluster( res, radius ); + private static double distance(double[] pointA, double[] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; } + return Math.sqrt(distance); + } - public static Clustering gaussianMeans(Clustering gtClustering, Clustering clustering) { - ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); - for (int i = 0; i < clustering.size(); i++) { - if (clustering.get(i) instanceof CFCluster) { - microclusters.add((CFCluster)clustering.get(i)); - } - else{ - System.out.println("Unsupported Cluster Type:"+clustering.get(i).getClass() - +". Cluster needs to extend moa.cluster.CFCluster"); - } - } - Cluster[] centers = new Cluster[gtClustering.size()]; - for (int i = 0; i < centers.length; i++) { - centers[i] = gtClustering.get(i); + private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int dimensions) { + double[] res = new double[dimensions]; + for (int i = 0; i < res.length; i++) { + res[i] = 0.0; + } + + if (cluster.size() == 0) { + return new SphereCluster(res, 0.0); + } + + for (Cluster point : cluster) { + double[] center = point.getCenter(); + for (int i = 0; i < res.length; i++) { + res[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < res.length; i++) { + res[i] /= cluster.size(); + } + + // Calculate radius + double radius = 0.0; + for (Cluster point : cluster) { + double dist = distance(res, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + + return new SphereCluster(res, radius); + } + + public static Clustering gaussianMeans(Clustering gtClustering, Clustering clustering) { + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster) clustering.get(i)); + } + else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + Cluster[] centers = new Cluster[gtClustering.size()]; + for (int i = 0; i < centers.length; i++) { + centers[i] = gtClustering.get(i); + + } + + int k = centers.length; + if (microclusters.size() < k) { + return new Clustering(new Cluster[0]); + } + Clustering kMeansResult = kMeans(centers, microclusters); + + k = kMeansResult.size(); + CFCluster[] res = new CFCluster[k]; + + for (CFCluster microcluster : microclusters) { + // Find closest kMeans cluster + double minDistance = Double.MAX_VALUE; + int closestCluster = 0; + for (int i = 0; i < k; i++) { + double distance = distance(kMeansResult.get(i).getCenter(), microcluster.getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; } + } + + // Add to cluster + if (res[closestCluster] == null) { + res[closestCluster] = (CFCluster) microcluster.copy(); + } else { + res[closestCluster].add(microcluster); + } + } - int k = centers.length; - if ( microclusters.size() < k ) { - return new Clustering( new Cluster[0]); - } - - Clustering kMeansResult = kMeans( centers, microclusters ); - - k = kMeansResult.size(); - CFCluster[] res = new CFCluster[ k ]; - - for ( CFCluster microcluster : microclusters) { - // Find closest kMeans cluster - double minDistance = Double.MAX_VALUE; - int closestCluster = 0; - for ( int i = 0; i < k; i++ ) { - double distance = distance( kMeansResult.get(i).getCenter(), microcluster.getCenter() ); - if ( distance < minDistance ) { - closestCluster = i; - minDistance = distance; - } - } - - // Add to cluster - if ( res[closestCluster] == null ) { - res[closestCluster] = (CFCluster)microcluster.copy(); - } else { - res[closestCluster].add(microcluster); - } - } - - // Clean up res - int count = 0; - for ( int i = 0; i < res.length; i++ ) { - if ( res[i] != null ) - ++count; - } - - CFCluster[] cleaned = new CFCluster[count]; - count = 0; - for ( int i = 0; i < res.length; i++ ) { - if ( res[i] != null ) - cleaned[count++] = res[i]; - } - - return new Clustering( cleaned ); + // Clean up res + int count = 0; + for (int i = 0; i < res.length; i++) { + if (res[i] != null) + ++count; } + CFCluster[] cleaned = new CFCluster[count]; + count = 0; + for (int i = 0; i < res.length; i++) { + if (res[i] != null) + cleaned[count++] = res[i]; + } + + return new Clustering(cleaned); + } + }
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java index 975e61d..c329e5b 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java @@ -1,4 +1,3 @@ - package com.yahoo.labs.samoa.moa.clusterers.clustream; /* @@ -34,304 +33,300 @@ import com.github.javacliparser.IntOption; import com.yahoo.labs.samoa.instances.DenseInstance; import com.yahoo.labs.samoa.instances.Instance; -/** Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip S. Yu: - * A Framework for Clustering Evolving Data Streams. VLDB 2003: 81-92 +/** + * Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip S. + * Yu: A Framework for Clustering Evolving Data Streams. VLDB 2003: 81-92 */ -public class Clustream extends AbstractClusterer{ - - private static final long serialVersionUID = 1L; - - public IntOption timeWindowOption = new IntOption("horizon", - 'h', "Rang of the window.", 1000); - - public IntOption maxNumKernelsOption = new IntOption( - "maxNumKernels", 'k', - "Maximum number of micro kernels to use.", 100); - - public IntOption kernelRadiFactorOption = new IntOption( - "kernelRadiFactor", 't', - "Multiplier for the kernel radius", 2); - - private int timeWindow; - private long timestamp = -1; - private ClustreamKernel[] kernels; - private boolean initialized; - private List<ClustreamKernel> buffer; // Buffer for initialization with kNN - private int bufferSize; - private double t; - private int m; - - public Clustream() { - } - - - @Override - public void resetLearningImpl() { - this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; - this.timeWindow = timeWindowOption.getValue(); - this.initialized = false; - this.buffer = new LinkedList<>(); - this.bufferSize = maxNumKernelsOption.getValue(); - t = kernelRadiFactorOption.getValue(); - m = maxNumKernelsOption.getValue(); - } - - @Override - public void trainOnInstanceImpl(Instance instance) { - int dim = instance.numValues(); - timestamp++; - // 0. Initialize - if ( !initialized ) { - if ( buffer.size() < bufferSize ) { - buffer.add( new ClustreamKernel(instance,dim, timestamp, t, m) ); - return; - } - - int k = kernels.length; - //System.err.println("k="+k+" bufferSize="+bufferSize); - assert (k <= bufferSize); - - ClustreamKernel[] centers = new ClustreamKernel[k]; - for ( int i = 0; i < k; i++ ) { - centers[i] = buffer.get( i ); // TODO: make random! - } - Clustering kmeans_clustering = kMeans(k, centers, buffer); -// Clustering kmeans_clustering = kMeans(k, buffer); - - for ( int i = 0; i < kmeans_clustering.size(); i++ ) { - kernels[i] = new ClustreamKernel( new DenseInstance(1.0,centers[i].getCenter()), dim, timestamp, t, m ); - } - - buffer.clear(); - initialized = true; - return; - } - - - // 1. Determine closest kernel - ClustreamKernel closestKernel = null; - double minDistance = Double.MAX_VALUE; - for (ClustreamKernel kernel : kernels) { - //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); - double distance = distance(instance.toDoubleArray(), kernel.getCenter()); - if (distance < minDistance) { - closestKernel = kernel; - minDistance = distance; - } - } - - // 2. Check whether instance fits into closestKernel - double radius; - if (closestKernel != null && closestKernel.getWeight() == 1) { - // Special case: estimate radius by determining the distance to the - // next closest cluster - radius = Double.MAX_VALUE; - double[] center = closestKernel.getCenter(); - for (ClustreamKernel kernel : kernels) { - if (kernel == closestKernel) { - continue; - } - - double distance = distance(kernel.getCenter(), center); - radius = Math.min(distance, radius); - } - } else { - radius = closestKernel.getRadius(); - } - - if ( minDistance < radius ) { - // Date fits, put into kernel and be happy - closestKernel.insert( instance, timestamp ); - return; - } - - // 3. Date does not fit, we need to free - // some space to insert a new kernel - long threshold = timestamp - timeWindow; // Kernels before this can be forgotten - - // 3.1 Try to forget old kernels - for ( int i = 0; i < kernels.length; i++ ) { - if ( kernels[i].getRelevanceStamp() < threshold ) { - kernels[i] = new ClustreamKernel( instance, dim, timestamp, t, m ); - return; - } - } - - // 3.2 Merge closest two kernels - int closestA = 0; - int closestB = 0; - minDistance = Double.MAX_VALUE; - for ( int i = 0; i < kernels.length; i++ ) { - double[] centerA = kernels[i].getCenter(); - for ( int j = i + 1; j < kernels.length; j++ ) { - double dist = distance( centerA, kernels[j].getCenter() ); - if ( dist < minDistance ) { - minDistance = dist; - closestA = i; - closestB = j; - } - } - } - assert (closestA != closestB); - - kernels[closestA].add( kernels[closestB] ); - kernels[closestB] = new ClustreamKernel( instance, dim, timestamp, t, m ); - } - - @Override - public Clustering getMicroClusteringResult() { - if ( !initialized ) { - return new Clustering( new Cluster[0] ); - } - - ClustreamKernel[] res = new ClustreamKernel[kernels.length]; - for ( int i = 0; i < res.length; i++ ) { - res[i] = new ClustreamKernel( kernels[i], t, m ); - } - - return new Clustering( res ); - } - - @Override - public boolean implementsMicroClusterer() { - return true; - } - - @Override - public Clustering getClusteringResult() { - return null; - } - - public String getName() { - return "Clustream " + timeWindow; - } - - private static double distance(double[] pointA, double [] pointB){ - double distance = 0.0; - for (int i = 0; i < pointA.length; i++) { - double d = pointA[i] - pointB[i]; - distance += d * d; - } - return Math.sqrt(distance); - } - - //wrapper... we need to rewrite kmeans to points, not clusters, doesnt make sense anymore - // public static Clustering kMeans( int k, ArrayList<Instance> points, int dim ) { - // ArrayList<ClustreamKernel> cl = new ArrayList<ClustreamKernel>(); - // for(Instance inst : points){ - // cl.add(new ClustreamKernel(inst, dim , 0, 0, 0)); - // } - // Clustering clustering = kMeans(k, cl); - // return clustering; - // } - - public static Clustering kMeans( int k, List<? extends Cluster> data ) { - Random random = new Random(0); - Cluster[] centers = new Cluster[k]; - for (int i = 0; i < centers.length; i++) { - int rid = random.nextInt(k); - centers[i] = new SphereCluster(data.get(rid).getCenter(),0); - } - return kMeans(k, centers, data); - } - - - - - - public static Clustering kMeans( int k, Cluster[] centers, List<? extends Cluster> data ) { - assert (centers.length == k); - assert (k > 0); - - int dimensions = centers[0].getCenter().length; - - ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>(); - for ( int i = 0; i < k; i++ ) { - clustering.add( new ArrayList<Cluster>() ); - } - - int repetitions = 100; - while ( repetitions-- >= 0 ) { - // Assign points to clusters - for ( Cluster point : data ) { - double minDistance = distance( point.getCenter(), centers[0].getCenter() ); - int closestCluster = 0; - for ( int i = 1; i < k; i++ ) { - double distance = distance( point.getCenter(), centers[i].getCenter() ); - if ( distance < minDistance ) { - closestCluster = i; - minDistance = distance; - } - } - - clustering.get( closestCluster ).add( point ); - } - - // Calculate new centers and clear clustering lists - SphereCluster[] newCenters = new SphereCluster[centers.length]; - for ( int i = 0; i < k; i++ ) { - newCenters[i] = calculateCenter( clustering.get( i ), dimensions ); - clustering.get( i ).clear(); - } - centers = newCenters; - } - - return new Clustering( centers ); - } - - private static SphereCluster calculateCenter( ArrayList<Cluster> cluster, int dimensions ) { - double[] res = new double[dimensions]; - for ( int i = 0; i < res.length; i++ ) { - res[i] = 0.0; - } - - if ( cluster.size() == 0 ) { - return new SphereCluster( res, 0.0 ); - } - - for ( Cluster point : cluster ) { - double [] center = point.getCenter(); - for (int i = 0; i < res.length; i++) { - res[i] += center[i]; - } - } - - // Normalize - for ( int i = 0; i < res.length; i++ ) { - res[i] /= cluster.size(); - } - - // Calculate radius - double radius = 0.0; - for ( Cluster point : cluster ) { - double dist = distance( res, point.getCenter() ); - if ( dist > radius ) { - radius = dist; - } - } - SphereCluster sc = new SphereCluster( res, radius ); - sc.setWeight(cluster.size()); - return sc; - } - - @Override - protected Measurement[] getModelMeasurementsImpl() { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public void getModelDescription(StringBuilder out, int indent) { - throw new UnsupportedOperationException("Not supported yet."); - } - - public boolean isRandomizable() { - return false; - } - - public double[] getVotesForInstance(Instance inst) { - throw new UnsupportedOperationException("Not supported yet."); - } - +public class Clustream extends AbstractClusterer { + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("horizon", + 'h', "Rang of the window.", 1000); + + public IntOption maxNumKernelsOption = new IntOption( + "maxNumKernels", 'k', + "Maximum number of micro kernels to use.", 100); + + public IntOption kernelRadiFactorOption = new IntOption( + "kernelRadiFactor", 't', + "Multiplier for the kernel radius", 2); + + private int timeWindow; + private long timestamp = -1; + private ClustreamKernel[] kernels; + private boolean initialized; + private List<ClustreamKernel> buffer; // Buffer for initialization with kNN + private int bufferSize; + private double t; + private int m; + + public Clustream() { + } + + @Override + public void resetLearningImpl() { + this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; + this.timeWindow = timeWindowOption.getValue(); + this.initialized = false; + this.buffer = new LinkedList<>(); + this.bufferSize = maxNumKernelsOption.getValue(); + t = kernelRadiFactorOption.getValue(); + m = maxNumKernelsOption.getValue(); + } + + @Override + public void trainOnInstanceImpl(Instance instance) { + int dim = instance.numValues(); + timestamp++; + // 0. Initialize + if (!initialized) { + if (buffer.size() < bufferSize) { + buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); + return; + } + + int k = kernels.length; + // System.err.println("k="+k+" bufferSize="+bufferSize); + assert (k <= bufferSize); + + ClustreamKernel[] centers = new ClustreamKernel[k]; + for (int i = 0; i < k; i++) { + centers[i] = buffer.get(i); // TODO: make random! + } + Clustering kmeans_clustering = kMeans(k, centers, buffer); + // Clustering kmeans_clustering = kMeans(k, buffer); + + for (int i = 0; i < kmeans_clustering.size(); i++) { + kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t, m); + } + + buffer.clear(); + initialized = true; + return; + } + + // 1. Determine closest kernel + ClustreamKernel closestKernel = null; + double minDistance = Double.MAX_VALUE; + for (ClustreamKernel kernel : kernels) { + // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); + double distance = distance(instance.toDoubleArray(), kernel.getCenter()); + if (distance < minDistance) { + closestKernel = kernel; + minDistance = distance; + } + } + + // 2. Check whether instance fits into closestKernel + double radius; + if (closestKernel != null && closestKernel.getWeight() == 1) { + // Special case: estimate radius by determining the distance to the + // next closest cluster + radius = Double.MAX_VALUE; + double[] center = closestKernel.getCenter(); + for (ClustreamKernel kernel : kernels) { + if (kernel == closestKernel) { + continue; + } + + double distance = distance(kernel.getCenter(), center); + radius = Math.min(distance, radius); + } + } else { + radius = closestKernel.getRadius(); + } + + if (minDistance < radius) { + // Date fits, put into kernel and be happy + closestKernel.insert(instance, timestamp); + return; + } + + // 3. Date does not fit, we need to free + // some space to insert a new kernel + long threshold = timestamp - timeWindow; // Kernels before this can be + // forgotten + + // 3.1 Try to forget old kernels + for (int i = 0; i < kernels.length; i++) { + if (kernels[i].getRelevanceStamp() < threshold) { + kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m); + return; + } + } + + // 3.2 Merge closest two kernels + int closestA = 0; + int closestB = 0; + minDistance = Double.MAX_VALUE; + for (int i = 0; i < kernels.length; i++) { + double[] centerA = kernels[i].getCenter(); + for (int j = i + 1; j < kernels.length; j++) { + double dist = distance(centerA, kernels[j].getCenter()); + if (dist < minDistance) { + minDistance = dist; + closestA = i; + closestB = j; + } + } + } + assert (closestA != closestB); + + kernels[closestA].add(kernels[closestB]); + kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m); + } + + @Override + public Clustering getMicroClusteringResult() { + if (!initialized) { + return new Clustering(new Cluster[0]); + } + + ClustreamKernel[] res = new ClustreamKernel[kernels.length]; + for (int i = 0; i < res.length; i++) { + res[i] = new ClustreamKernel(kernels[i], t, m); + } + + return new Clustering(res); + } + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + @Override + public Clustering getClusteringResult() { + return null; + } + + public String getName() { + return "Clustream " + timeWindow; + } + + private static double distance(double[] pointA, double[] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + // wrapper... we need to rewrite kmeans to points, not clusters, doesnt make + // sense anymore + // public static Clustering kMeans( int k, ArrayList<Instance> points, int dim + // ) { + // ArrayList<ClustreamKernel> cl = new ArrayList<ClustreamKernel>(); + // for(Instance inst : points){ + // cl.add(new ClustreamKernel(inst, dim , 0, 0, 0)); + // } + // Clustering clustering = kMeans(k, cl); + // return clustering; + // } + + public static Clustering kMeans(int k, List<? extends Cluster> data) { + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + for (int i = 0; i < centers.length; i++) { + int rid = random.nextInt(k); + centers[i] = new SphereCluster(data.get(rid).getCenter(), 0); + } + return kMeans(k, centers, data); + } + + public static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { + assert (centers.length == k); + assert (k > 0); + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); + } + + int repetitions = 100; + while (repetitions-- >= 0) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + centers = newCenters; + } + + return new Clustering(centers); + } + + private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int dimensions) { + double[] res = new double[dimensions]; + for (int i = 0; i < res.length; i++) { + res[i] = 0.0; + } + + if (cluster.size() == 0) { + return new SphereCluster(res, 0.0); + } + + for (Cluster point : cluster) { + double[] center = point.getCenter(); + for (int i = 0; i < res.length; i++) { + res[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < res.length; i++) { + res[i] /= cluster.size(); + } + + // Calculate radius + double radius = 0.0; + for (Cluster point : cluster) { + double dist = distance(res, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + SphereCluster sc = new SphereCluster(res, radius); + sc.setWeight(cluster.size()); + return sc; + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } + + public boolean isRandomizable() { + return false; + } + + public double[] getVotesForInstance(Instance inst) { + throw new UnsupportedOperationException("Not supported yet."); + } } - http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java index 8c8f6a0..d5123d2 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java @@ -25,245 +25,249 @@ import com.yahoo.labs.samoa.instances.Instance; import com.yahoo.labs.samoa.moa.cluster.CFCluster; public class ClustreamKernel extends CFCluster { - private static final long serialVersionUID = 1L; - - private final static double EPSILON = 0.00005; - public static final double MIN_VARIANCE = 1e-50; - - protected double LST; - protected double SST; - - int m; - double t; - - - public ClustreamKernel( Instance instance, int dimensions, long timestamp , double t, int m) { - super(instance, dimensions); - this.t = t; - this.m = m; - this.LST = timestamp; - this.SST = timestamp*timestamp; - } - - public ClustreamKernel( ClustreamKernel cluster, double t, int m ) { - super(cluster); - this.t = t; - this.m = m; - this.LST = cluster.LST; - this.SST = cluster.SST; - } - - public void insert( Instance instance, long timestamp ) { - N++; - LST += timestamp; - SST += timestamp*timestamp; - - for ( int i = 0; i < instance.numValues(); i++ ) { - LS[i] += instance.value(i); - SS[i] += instance.value(i)*instance.value(i); - } + private static final long serialVersionUID = 1L; + + private final static double EPSILON = 0.00005; + public static final double MIN_VARIANCE = 1e-50; + + protected double LST; + protected double SST; + + int m; + double t; + + public ClustreamKernel(Instance instance, int dimensions, long timestamp, double t, int m) { + super(instance, dimensions); + this.t = t; + this.m = m; + this.LST = timestamp; + this.SST = timestamp * timestamp; + } + + public ClustreamKernel(ClustreamKernel cluster, double t, int m) { + super(cluster); + this.t = t; + this.m = m; + this.LST = cluster.LST; + this.SST = cluster.SST; + } + + public void insert(Instance instance, long timestamp) { + N++; + LST += timestamp; + SST += timestamp * timestamp; + + for (int i = 0; i < instance.numValues(); i++) { + LS[i] += instance.value(i); + SS[i] += instance.value(i) * instance.value(i); } - - @Override - public void add( CFCluster other2 ) { - ClustreamKernel other = (ClustreamKernel) other2; - assert( other.LS.length == this.LS.length ); - this.N += other.N; - this.LST += other.LST; - this.SST += other.SST; - - for ( int i = 0; i < LS.length; i++ ) { - this.LS[i] += other.LS[i]; - this.SS[i] += other.SS[i]; - } + } + + @Override + public void add(CFCluster other2) { + ClustreamKernel other = (ClustreamKernel) other2; + assert (other.LS.length == this.LS.length); + this.N += other.N; + this.LST += other.LST; + this.SST += other.SST; + + for (int i = 0; i < LS.length; i++) { + this.LS[i] += other.LS[i]; + this.SS[i] += other.SS[i]; } - - public double getRelevanceStamp() { - if ( N < 2*m ) - return getMuTime(); - - return getMuTime() + getSigmaTime() * getQuantile( ((double)m)/(2*N) ); + } + + public double getRelevanceStamp() { + if (N < 2 * m) + return getMuTime(); + + return getMuTime() + getSigmaTime() * getQuantile(((double) m) / (2 * N)); + } + + private double getMuTime() { + return LST / N; + } + + private double getSigmaTime() { + return Math.sqrt(SST / N - (LST / N) * (LST / N)); + } + + private double getQuantile(double z) { + assert (z >= 0 && z <= 1); + return Math.sqrt(2) * inverseError(2 * z - 1); + } + + @Override + public double getRadius() { + // trivial cluster + if (N == 1) + return 0; + if (t == 1) + t = 1; + + return getDeviation() * radiusFactor; + } + + @Override + public CFCluster getCF() { + return this; + } + + private double getDeviation() { + double[] variance = getVarianceVector(); + double sumOfDeviation = 0.0; + for (double aVariance : variance) { + double d = Math.sqrt(aVariance); + sumOfDeviation += d; } - - private double getMuTime() { - return LST / N; - } - - private double getSigmaTime() { - return Math.sqrt(SST/N - (LST/N)*(LST/N)); - } - - private double getQuantile( double z ) { - assert( z >= 0 && z <= 1 ); - return Math.sqrt( 2 ) * inverseError( 2*z - 1 ); - } - - @Override - public double getRadius() { - //trivial cluster - if(N == 1) return 0; - if(t==1) - t=1; - - return getDeviation()*radiusFactor; - } - - @Override - public CFCluster getCF(){ - return this; - } - - - private double getDeviation(){ - double[] variance = getVarianceVector(); - double sumOfDeviation = 0.0; - for (double aVariance : variance) { - double d = Math.sqrt(aVariance); - sumOfDeviation += d; - } - return sumOfDeviation / variance.length; + return sumOfDeviation / variance.length; + } + + /** + * @return this kernels' center + */ + @Override + public double[] getCenter() { + assert (!this.isEmpty()); + double res[] = new double[this.LS.length]; + for (int i = 0; i < res.length; i++) { + res[i] = this.LS[i] / N; } - - /** - * @return this kernels' center - */ - @Override - public double[] getCenter() { - assert (!this.isEmpty()); - double res[] = new double[this.LS.length]; - for (int i = 0; i < res.length; i++) { - res[i] = this.LS[i] / N; - } - return res; + return res; + } + + /** + * See interface <code>Cluster</code> + * + * @param instance + * @return double value + */ + @Override + public double getInclusionProbability(Instance instance) { + // trivial cluster + if (N == 1) { + double distance = 0.0; + for (int i = 0; i < LS.length; i++) { + double d = LS[i] - instance.value(i); + distance += d * d; + } + distance = Math.sqrt(distance); + if (distance < EPSILON) + return 1.0; + return 0.0; } - - /** - * See interface <code>Cluster</code> - * @param instance - * @return double value - */ - @Override - public double getInclusionProbability(Instance instance) { - //trivial cluster - if(N == 1){ - double distance = 0.0; - for (int i = 0; i < LS.length; i++) { - double d = LS[i] - instance.value(i); - distance += d * d; - } - distance = Math.sqrt(distance); - if( distance < EPSILON ) - return 1.0; - return 0.0; - } - else{ - double dist = calcNormalizedDistance(instance.toDoubleArray()); - if(dist <= getRadius()){ - return 1; - } - else{ - return 0; - } -// double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); -// return res; - } + else { + double dist = calcNormalizedDistance(instance.toDoubleArray()); + if (dist <= getRadius()) { + return 1; + } + else { + return 0; + } + // double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); + // return res; } - - private double[] getVarianceVector() { - double[] res = new double[this.LS.length]; - for (int i = 0; i < this.LS.length; i++) { - double ls = this.LS[i]; - double ss = this.SS[i]; - - double lsDivN = ls / this.getWeight(); - double lsDivNSquared = lsDivN * lsDivN; - double ssDivN = ss / this.getWeight(); - res[i] = ssDivN - lsDivNSquared; - - // Due to numerical errors, small negative values can occur. - // We correct this by settings them to almost zero. - if (res[i] <= 0.0) { - if (res[i] > -EPSILON) { - res[i] = MIN_VARIANCE; - } - } + } + + private double[] getVarianceVector() { + double[] res = new double[this.LS.length]; + for (int i = 0; i < this.LS.length; i++) { + double ls = this.LS[i]; + double ss = this.SS[i]; + + double lsDivN = ls / this.getWeight(); + double lsDivNSquared = lsDivN * lsDivN; + double ssDivN = ss / this.getWeight(); + res[i] = ssDivN - lsDivNSquared; + + // Due to numerical errors, small negative values can occur. + // We correct this by settings them to almost zero. + if (res[i] <= 0.0) { + if (res[i] > -EPSILON) { + res[i] = MIN_VARIANCE; } - return res; + } } - - /** - * Check if this cluster is empty or not. - * @return <code>true</code> if the cluster has no data points, - * <code>false</code> otherwise. - */ - public boolean isEmpty() { - return this.N == 0; + return res; + } + + /** + * Check if this cluster is empty or not. + * + * @return <code>true</code> if the cluster has no data points, + * <code>false</code> otherwise. + */ + public boolean isEmpty() { + return this.N == 0; + } + + /** + * Calculate the normalized euclidean distance (Mahalanobis distance for + * distribution w/o covariances) to a point. + * + * @param point + * The point to which the distance is calculated. + * @return The normalized distance to the cluster center. + * + * TODO: check whether WEIGHTING is correctly applied to variances + */ + // ??????? + private double calcNormalizedDistance(double[] point) { + double[] center = getCenter(); + double res = 0.0; + + for (int i = 0; i < center.length; i++) { + double diff = center[i] - point[i]; + res += (diff * diff);// variance[i]; } - - /** - * Calculate the normalized euclidean distance (Mahalanobis distance for - * distribution w/o covariances) to a point. - * @param point The point to which the distance is calculated. - * @return The normalized distance to the cluster center. - * - * TODO: check whether WEIGHTING is correctly applied to variances - */ - //??????? - private double calcNormalizedDistance(double[] point) { - double[] center = getCenter(); - double res = 0.0; - - for (int i = 0; i < center.length; i++) { - double diff = center[i] - point[i]; - res += (diff * diff);// variance[i]; - } - return Math.sqrt(res); + return Math.sqrt(res); + } + + /** + * Approximates the inverse error function. Clustream needs this. + * + * @param x + */ + public static double inverseError(double x) { + double z = Math.sqrt(Math.PI) * x; + double res = (z) / 2; + + double z2 = z * z; + double zProd = z * z2; // z^3 + res += (1.0 / 24) * zProd; + + zProd *= z2; // z^5 + res += (7.0 / 960) * zProd; + + zProd *= z2; // z^7 + res += (127 * zProd) / 80640; + + zProd *= z2; // z^9 + res += (4369 * zProd) / 11612160; + + zProd *= z2; // z^11 + res += (34807 * zProd) / 364953600; + + zProd *= z2; // z^13 + res += (20036983 * zProd) / 797058662400d; + + return res; + } + + @Override + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { + super.getClusterSpecificInfo(infoTitle, infoValue); + infoTitle.add("Deviation"); + + double[] variance = getVarianceVector(); + double sumOfDeviation = 0.0; + for (double aVariance : variance) { + double d = Math.sqrt(aVariance); + sumOfDeviation += d; } - /** - * Approximates the inverse error function. Clustream needs this. - * @param x - */ - public static double inverseError(double x) { - double z = Math.sqrt(Math.PI) * x; - double res = (z) / 2; - - double z2 = z * z; - double zProd = z * z2; // z^3 - res += (1.0 / 24) * zProd; + sumOfDeviation /= variance.length; - zProd *= z2; // z^5 - res += (7.0 / 960) * zProd; - - zProd *= z2; // z^7 - res += (127 * zProd) / 80640; - - zProd *= z2; // z^9 - res += (4369 * zProd) / 11612160; - - zProd *= z2; // z^11 - res += (34807 * zProd) / 364953600; - - zProd *= z2; // z^13 - res += (20036983 * zProd) / 797058662400d; - - return res; - } - - @Override - protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { - super.getClusterSpecificInfo(infoTitle, infoValue); - infoTitle.add("Deviation"); - - double[] variance = getVarianceVector(); - double sumOfDeviation = 0.0; - for (double aVariance : variance) { - double d = Math.sqrt(aVariance); - sumOfDeviation += d; - } - - sumOfDeviation/= variance.length; - - infoValue.add(Double.toString(sumOfDeviation)); - } + infoValue.add(Double.toString(sumOfDeviation)); + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java index 9e40fc1..08cf936 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java @@ -1,4 +1,3 @@ - package com.yahoo.labs.samoa.moa.clusterers.clustream; /* @@ -38,431 +37,434 @@ import com.yahoo.labs.samoa.instances.DenseInstance; import com.yahoo.labs.samoa.instances.Instance; public class WithKmeans extends AbstractClusterer { - - private static final long serialVersionUID = 1L; - - public IntOption timeWindowOption = new IntOption("horizon", - 'h', "Rang of the window.", 1000); - - public IntOption maxNumKernelsOption = new IntOption( - "maxNumKernels", 'm', - "Maximum number of micro kernels to use.", 100); - - public IntOption kernelRadiFactorOption = new IntOption( - "kernelRadiFactor", 't', - "Multiplier for the kernel radius", 2); - - public IntOption kOption = new IntOption( - "k", 'k', - "k of macro k-means (number of clusters)", 5); - - private int timeWindow; - private long timestamp = -1; - private ClustreamKernel[] kernels; - private boolean initialized; - private List<ClustreamKernel> buffer; // Buffer for initialization with kNN - private int bufferSize; - private double t; - private int m; - - public WithKmeans() { - - } - - @Override - public void resetLearningImpl() { - this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; - this.timeWindow = timeWindowOption.getValue(); - this.initialized = false; - this.buffer = new LinkedList<ClustreamKernel>(); - this.bufferSize = maxNumKernelsOption.getValue(); - t = kernelRadiFactorOption.getValue(); - m = maxNumKernelsOption.getValue(); - } - - @Override - public void trainOnInstanceImpl(Instance instance) { - int dim = instance.numValues(); - timestamp++; - // 0. Initialize - if (!initialized) { - if (buffer.size() < bufferSize) { - buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); - return; - } else { - for (int i = 0; i < buffer.size(); i++) { - kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim, timestamp, t, m); - } - - buffer.clear(); - initialized = true; - return; - } - } - - - // 1. Determine closest kernel - ClustreamKernel closestKernel = null; - double minDistance = Double.MAX_VALUE; - for ( int i = 0; i < kernels.length; i++ ) { - //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); - double distance = distance(instance.toDoubleArray(), kernels[i].getCenter()); - if (distance < minDistance) { - closestKernel = kernels[i]; - minDistance = distance; - } - } - - // 2. Check whether instance fits into closestKernel - double radius = 0.0; - if ( closestKernel.getWeight() == 1 ) { - // Special case: estimate radius by determining the distance to the - // next closest cluster - radius = Double.MAX_VALUE; - double[] center = closestKernel.getCenter(); - for ( int i = 0; i < kernels.length; i++ ) { - if ( kernels[i] == closestKernel ) { - continue; - } - - double distance = distance(kernels[i].getCenter(), center ); - radius = Math.min( distance, radius ); - } - } else { - radius = closestKernel.getRadius(); - } - - if ( minDistance < radius ) { - // Date fits, put into kernel and be happy - closestKernel.insert( instance, timestamp ); - return; - } - - // 3. Date does not fit, we need to free - // some space to insert a new kernel - long threshold = timestamp - timeWindow; // Kernels before this can be forgotten - - // 3.1 Try to forget old kernels - for ( int i = 0; i < kernels.length; i++ ) { - if ( kernels[i].getRelevanceStamp() < threshold ) { - kernels[i] = new ClustreamKernel( instance, dim, timestamp, t, m ); - return; - } - } - - // 3.2 Merge closest two kernels - int closestA = 0; - int closestB = 0; - minDistance = Double.MAX_VALUE; - for ( int i = 0; i < kernels.length; i++ ) { - double[] centerA = kernels[i].getCenter(); - for ( int j = i + 1; j < kernels.length; j++ ) { - double dist = distance( centerA, kernels[j].getCenter() ); - if ( dist < minDistance ) { - minDistance = dist; - closestA = i; - closestB = j; - } - } - } - assert (closestA != closestB); - - kernels[closestA].add( kernels[closestB] ); - kernels[closestB] = new ClustreamKernel( instance, dim, timestamp, t, m ); - } - - @Override - public Clustering getMicroClusteringResult() { - if (!initialized) { - return new Clustering(new Cluster[0]); - } - - ClustreamKernel[] result = new ClustreamKernel[kernels.length]; - for (int i = 0; i < result.length; i++) { - result[i] = new ClustreamKernel(kernels[i], t, m); - } - - return new Clustering(result); - } - - @Override - public Clustering getClusteringResult() { - return kMeans_rand(kOption.getValue(), getMicroClusteringResult()); - } - - public Clustering getClusteringResult(Clustering gtClustering) { - return kMeans_gta(kOption.getValue(), getMicroClusteringResult(), gtClustering); - } - - public String getName() { - return "CluStreamWithKMeans " + timeWindow; - } - - /** - * Distance between two vectors. - * - * @param pointA - * @param pointB - * @return dist - */ - private static double distance(double[] pointA, double [] pointB) { - double distance = 0.0; - for (int i = 0; i < pointA.length; i++) { - double d = pointA[i] - pointB[i]; - distance += d * d; - } - return Math.sqrt(distance); - } - - /** - * k-means of (micro)clusters, with ground-truth-aided initialization. - * (to produce best results) - * - * @param k - * @param data - * @return (macro)clustering - CFClusters - */ - public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtClustering) { - - ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); - for (int i = 0; i < clustering.size(); i++) { - if (clustering.get(i) instanceof CFCluster) { - microclusters.add((CFCluster)clustering.get(i)); - } else { - System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + ". Cluster needs to extend moa.cluster.CFCluster"); - } + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("horizon", + 'h', "Rang of the window.", 1000); + + public IntOption maxNumKernelsOption = new IntOption( + "maxNumKernels", 'm', + "Maximum number of micro kernels to use.", 100); + + public IntOption kernelRadiFactorOption = new IntOption( + "kernelRadiFactor", 't', + "Multiplier for the kernel radius", 2); + + public IntOption kOption = new IntOption( + "k", 'k', + "k of macro k-means (number of clusters)", 5); + + private int timeWindow; + private long timestamp = -1; + private ClustreamKernel[] kernels; + private boolean initialized; + private List<ClustreamKernel> buffer; // Buffer for initialization with kNN + private int bufferSize; + private double t; + private int m; + + public WithKmeans() { + + } + + @Override + public void resetLearningImpl() { + this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; + this.timeWindow = timeWindowOption.getValue(); + this.initialized = false; + this.buffer = new LinkedList<ClustreamKernel>(); + this.bufferSize = maxNumKernelsOption.getValue(); + t = kernelRadiFactorOption.getValue(); + m = maxNumKernelsOption.getValue(); + } + + @Override + public void trainOnInstanceImpl(Instance instance) { + int dim = instance.numValues(); + timestamp++; + // 0. Initialize + if (!initialized) { + if (buffer.size() < bufferSize) { + buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); + return; + } else { + for (int i = 0; i < buffer.size(); i++) { + kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim, timestamp, t, m); + } + + buffer.clear(); + initialized = true; + return; + } + } + + // 1. Determine closest kernel + ClustreamKernel closestKernel = null; + double minDistance = Double.MAX_VALUE; + for (int i = 0; i < kernels.length; i++) { + // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); + double distance = distance(instance.toDoubleArray(), kernels[i].getCenter()); + if (distance < minDistance) { + closestKernel = kernels[i]; + minDistance = distance; + } + } + + // 2. Check whether instance fits into closestKernel + double radius = 0.0; + if (closestKernel.getWeight() == 1) { + // Special case: estimate radius by determining the distance to the + // next closest cluster + radius = Double.MAX_VALUE; + double[] center = closestKernel.getCenter(); + for (int i = 0; i < kernels.length; i++) { + if (kernels[i] == closestKernel) { + continue; + } + + double distance = distance(kernels[i].getCenter(), center); + radius = Math.min(distance, radius); + } + } else { + radius = closestKernel.getRadius(); + } + + if (minDistance < radius) { + // Date fits, put into kernel and be happy + closestKernel.insert(instance, timestamp); + return; + } + + // 3. Date does not fit, we need to free + // some space to insert a new kernel + long threshold = timestamp - timeWindow; // Kernels before this can be + // forgotten + + // 3.1 Try to forget old kernels + for (int i = 0; i < kernels.length; i++) { + if (kernels[i].getRelevanceStamp() < threshold) { + kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m); + return; + } + } + + // 3.2 Merge closest two kernels + int closestA = 0; + int closestB = 0; + minDistance = Double.MAX_VALUE; + for (int i = 0; i < kernels.length; i++) { + double[] centerA = kernels[i].getCenter(); + for (int j = i + 1; j < kernels.length; j++) { + double dist = distance(centerA, kernels[j].getCenter()); + if (dist < minDistance) { + minDistance = dist; + closestA = i; + closestB = j; + } + } + } + assert (closestA != closestB); + + kernels[closestA].add(kernels[closestB]); + kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m); + } + + @Override + public Clustering getMicroClusteringResult() { + if (!initialized) { + return new Clustering(new Cluster[0]); + } + + ClustreamKernel[] result = new ClustreamKernel[kernels.length]; + for (int i = 0; i < result.length; i++) { + result[i] = new ClustreamKernel(kernels[i], t, m); + } + + return new Clustering(result); + } + + @Override + public Clustering getClusteringResult() { + return kMeans_rand(kOption.getValue(), getMicroClusteringResult()); + } + + public Clustering getClusteringResult(Clustering gtClustering) { + return kMeans_gta(kOption.getValue(), getMicroClusteringResult(), gtClustering); + } + + public String getName() { + return "CluStreamWithKMeans " + timeWindow; + } + + /** + * Distance between two vectors. + * + * @param pointA + * @param pointB + * @return dist + */ + private static double distance(double[] pointA, double[] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + /** + * k-means of (micro)clusters, with ground-truth-aided initialization. (to + * produce best results) + * + * @param k + * @param data + * @return (macro)clustering - CFClusters + */ + public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtClustering) { + + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster) clustering.get(i)); + } else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + + int n = microclusters.size(); + assert (k <= n); + + /* k-means */ + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + int K = gtClustering.size(); + + for (int i = 0; i < k; i++) { + if (i < K) { // GT-aided + centers[i] = new SphereCluster(gtClustering.get(i).getCenter(), 0); + } else { // Randomized + int rid = random.nextInt(n); + centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); + } + } + + return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); + } + + /** + * k-means of (micro)clusters, with randomized initialization. + * + * @param k + * @param data + * @return (macro)clustering - CFClusters + */ + public static Clustering kMeans_rand(int k, Clustering clustering) { + + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster) clustering.get(i)); + } else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + + int n = microclusters.size(); + assert (k <= n); + + /* k-means */ + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + + for (int i = 0; i < k; i++) { + int rid = random.nextInt(n); + centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); + } + + return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); + } + + /** + * (The Actual Algorithm) k-means of (micro)clusters, with specified + * initialization points. + * + * @param k + * @param centers + * - initial centers + * @param data + * @return (macro)clustering - SphereClusters + */ + protected static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { + assert (centers.length == k); + assert (k > 0); + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = new ArrayList<ArrayList<Cluster>>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); + } + + while (true) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + + // Convergence check + boolean converged = true; + for (int i = 0; i < k; i++) { + if (!Arrays.equals(centers[i].getCenter(), newCenters[i].getCenter())) { + converged = false; + break; } - - int n = microclusters.size(); - assert (k <= n); - - /* k-means */ - Random random = new Random(0); - Cluster[] centers = new Cluster[k]; - int K = gtClustering.size(); - - for (int i = 0; i < k; i++) { - if (i < K) { // GT-aided - centers[i] = new SphereCluster(gtClustering.get(i).getCenter(), 0); - } else { // Randomized - int rid = random.nextInt(n); - centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); - } - } - - return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); - } - - /** - * k-means of (micro)clusters, with randomized initialization. - * - * @param k - * @param data - * @return (macro)clustering - CFClusters - */ - public static Clustering kMeans_rand(int k, Clustering clustering) { - - ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); - for (int i = 0; i < clustering.size(); i++) { - if (clustering.get(i) instanceof CFCluster) { - microclusters.add((CFCluster)clustering.get(i)); - } else { - System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + ". Cluster needs to extend moa.cluster.CFCluster"); - } + } + + if (converged) { + break; + } else { + centers = newCenters; + } + } + + return new Clustering(centers); + } + + /** + * Rearrange the k-means result into a set of CFClusters, cleaning up the + * redundancies. + * + * @param kMeansResult + * @param microclusters + * @return + */ + protected static Clustering cleanUpKMeans(Clustering kMeansResult, ArrayList<CFCluster> microclusters) { + /* Convert k-means result to CFClusters */ + int k = kMeansResult.size(); + CFCluster[] converted = new CFCluster[k]; + + for (CFCluster mc : microclusters) { + // Find closest kMeans cluster + double minDistance = Double.MAX_VALUE; + int closestCluster = 0; + for (int i = 0; i < k; i++) { + double distance = distance(kMeansResult.get(i).getCenter(), mc.getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; } - - int n = microclusters.size(); - assert (k <= n); - - /* k-means */ - Random random = new Random(0); - Cluster[] centers = new Cluster[k]; - - for (int i = 0; i < k; i++) { - int rid = random.nextInt(n); - centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); - } - - return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); - } - - /** - * (The Actual Algorithm) k-means of (micro)clusters, with specified initialization points. - * - * @param k - * @param centers - initial centers - * @param data - * @return (macro)clustering - SphereClusters - */ - protected static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { - assert (centers.length == k); - assert (k > 0); - - int dimensions = centers[0].getCenter().length; - - ArrayList<ArrayList<Cluster>> clustering = new ArrayList<ArrayList<Cluster>>(); - for (int i = 0; i < k; i++) { - clustering.add(new ArrayList<Cluster>()); - } - - while (true) { - // Assign points to clusters - for (Cluster point : data) { - double minDistance = distance(point.getCenter(), centers[0].getCenter()); - int closestCluster = 0; - for (int i = 1; i < k; i++) { - double distance = distance(point.getCenter(), centers[i].getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - clustering.get(closestCluster).add(point); - } - - // Calculate new centers and clear clustering lists - SphereCluster[] newCenters = new SphereCluster[centers.length]; - for (int i = 0; i < k; i++) { - newCenters[i] = calculateCenter(clustering.get(i), dimensions); - clustering.get(i).clear(); - } - - // Convergence check - boolean converged = true; - for (int i = 0; i < k; i++) { - if (!Arrays.equals(centers[i].getCenter(), newCenters[i].getCenter())) { - converged = false; - break; - } - } - - if (converged) { - break; - } else { - centers = newCenters; - } - } - - return new Clustering(centers); - } - - /** - * Rearrange the k-means result into a set of CFClusters, cleaning up the redundancies. - * - * @param kMeansResult - * @param microclusters - * @return - */ - protected static Clustering cleanUpKMeans(Clustering kMeansResult, ArrayList<CFCluster> microclusters) { - /* Convert k-means result to CFClusters */ - int k = kMeansResult.size(); - CFCluster[] converted = new CFCluster[k]; - - for (CFCluster mc : microclusters) { - // Find closest kMeans cluster - double minDistance = Double.MAX_VALUE; - int closestCluster = 0; - for (int i = 0; i < k; i++) { - double distance = distance(kMeansResult.get(i).getCenter(), mc.getCenter()); - if (distance < minDistance) { - closestCluster = i; - minDistance = distance; - } - } - - // Add to cluster - if ( converted[closestCluster] == null ) { - converted[closestCluster] = (CFCluster)mc.copy(); - } else { - converted[closestCluster].add(mc); - } - } - - // Clean up - int count = 0; - for (int i = 0; i < converted.length; i++) { - if (converted[i] != null) - count++; - } - - CFCluster[] cleaned = new CFCluster[count]; - count = 0; - for (int i = 0; i < converted.length; i++) { - if (converted[i] != null) - cleaned[count++] = converted[i]; - } - - return new Clustering(cleaned); - } - - - - /** - * k-means helper: Calculate a wrapping cluster of assigned points[microclusters]. - * - * @param assigned - * @param dimensions - * @return SphereCluster (with center and radius) - */ - private static SphereCluster calculateCenter(ArrayList<Cluster> assigned, int dimensions) { - double[] result = new double[dimensions]; - for (int i = 0; i < result.length; i++) { - result[i] = 0.0; - } - - if (assigned.size() == 0) { - return new SphereCluster(result, 0.0); - } - - for (Cluster point : assigned) { - double[] center = point.getCenter(); - for (int i = 0; i < result.length; i++) { - result[i] += center[i]; - } - } - - // Normalize - for (int i = 0; i < result.length; i++) { - result[i] /= assigned.size(); - } - - // Calculate radius: biggest wrapping distance from center - double radius = 0.0; - for (Cluster point : assigned) { - double dist = distance(result, point.getCenter()); - if (dist > radius) { - radius = dist; - } - } - SphereCluster sc = new SphereCluster(result, radius); - sc.setWeight(assigned.size()); - return sc; - } - - - /** Miscellaneous **/ - - @Override - public boolean implementsMicroClusterer() { - return true; - } - - public boolean isRandomizable() { - return false; - } - - public double[] getVotesForInstance(Instance inst) { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - protected Measurement[] getModelMeasurementsImpl() { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public void getModelDescription(StringBuilder out, int indent) { - throw new UnsupportedOperationException("Not supported yet."); - } + } + + // Add to cluster + if (converted[closestCluster] == null) { + converted[closestCluster] = (CFCluster) mc.copy(); + } else { + converted[closestCluster].add(mc); + } + } + + // Clean up + int count = 0; + for (int i = 0; i < converted.length; i++) { + if (converted[i] != null) + count++; + } + + CFCluster[] cleaned = new CFCluster[count]; + count = 0; + for (int i = 0; i < converted.length; i++) { + if (converted[i] != null) + cleaned[count++] = converted[i]; + } + + return new Clustering(cleaned); + } + + /** + * k-means helper: Calculate a wrapping cluster of assigned + * points[microclusters]. + * + * @param assigned + * @param dimensions + * @return SphereCluster (with center and radius) + */ + private static SphereCluster calculateCenter(ArrayList<Cluster> assigned, int dimensions) { + double[] result = new double[dimensions]; + for (int i = 0; i < result.length; i++) { + result[i] = 0.0; + } + + if (assigned.size() == 0) { + return new SphereCluster(result, 0.0); + } + + for (Cluster point : assigned) { + double[] center = point.getCenter(); + for (int i = 0; i < result.length; i++) { + result[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < result.length; i++) { + result[i] /= assigned.size(); + } + + // Calculate radius: biggest wrapping distance from center + double radius = 0.0; + for (Cluster point : assigned) { + double dist = distance(result, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + SphereCluster sc = new SphereCluster(result, radius); + sc.setWeight(assigned.size()); + return sc; + } + + /** Miscellaneous **/ + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + public boolean isRandomizable() { + return false; + } + + public double[] getVotesForInstance(Instance inst) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java index f052832..3880e09 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java @@ -36,176 +36,161 @@ import java.util.jar.JarFile; /** * Class for discovering classes via reflection in the java class path. - * + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class AutoClassDiscovery { - protected static final Map<String, String[]> cachedClassNames = new HashMap<String, String[]>(); - - public static String[] findClassNames(String packageNameToSearch) { - String[] cached = cachedClassNames.get(packageNameToSearch); - if (cached == null) { - HashSet<String> classNames = new HashSet<String>(); - /*StringTokenizer pathTokens = new StringTokenizer(System - .getProperty("java.class.path"), File.pathSeparator);*/ - String packageDirName = packageNameToSearch.replace('.', - File.separatorChar); - String packageJarName = packageNameToSearch.length() > 0 ? (packageNameToSearch.replace('.', '/') + "/") - : ""; - String part = ""; + protected static final Map<String, String[]> cachedClassNames = new HashMap<String, String[]>(); + public static String[] findClassNames(String packageNameToSearch) { + String[] cached = cachedClassNames.get(packageNameToSearch); + if (cached == null) { + HashSet<String> classNames = new HashSet<String>(); + /* + * StringTokenizer pathTokens = new StringTokenizer(System + * .getProperty("java.class.path"), File.pathSeparator); + */ + String packageDirName = packageNameToSearch.replace('.', + File.separatorChar); + String packageJarName = packageNameToSearch.length() > 0 ? (packageNameToSearch.replace('.', '/') + "/") + : ""; + String part = ""; - AutoClassDiscovery adc = new AutoClassDiscovery(); - URLClassLoader sysLoader = (URLClassLoader) adc.getClass().getClassLoader(); - URL[] cl_urls = sysLoader.getURLs(); - - for (int i = 0; i < cl_urls.length; i++) { - part = cl_urls[i].toString(); - if (part.startsWith("file:")) { - part = part.replace(" ", "%20"); - try { - File temp = new File(new java.net.URI(part)); - part = temp.getAbsolutePath(); - } catch (URISyntaxException e) { - e.printStackTrace(); - } - } + AutoClassDiscovery adc = new AutoClassDiscovery(); + URLClassLoader sysLoader = (URLClassLoader) adc.getClass().getClassLoader(); + URL[] cl_urls = sysLoader.getURLs(); - // find classes - ArrayList<File> files = new ArrayList<File>(); - File dir = new File(part); - if (dir.isDirectory()) { - File root = new File(dir.toString() + File.separatorChar + packageDirName); - String[] names = findClassesInDirectoryRecursive(root, ""); - classNames.addAll(Arrays.asList(names)); - } else { - try { - JarFile jar = new JarFile(part); - Enumeration<JarEntry> jarEntries = jar.entries(); - while (jarEntries.hasMoreElements()) { - String jarEntry = jarEntries.nextElement().getName(); - if (jarEntry.startsWith(packageJarName)) { - String relativeName = jarEntry.substring(packageJarName.length()); - if (relativeName.endsWith(".class")) { - relativeName = relativeName.replace('/', - '.'); - classNames.add(relativeName.substring(0, - relativeName.length() - - ".class".length())); - } - } - } - } catch (IOException ignored) { - // ignore unreadable files - } - } - } + for (int i = 0; i < cl_urls.length; i++) { + part = cl_urls[i].toString(); + if (part.startsWith("file:")) { + part = part.replace(" ", "%20"); + try { + File temp = new File(new java.net.URI(part)); + part = temp.getAbsolutePath(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } - /*while (pathTokens.hasMoreElements()) { - String pathToSearch = pathTokens.nextElement().toString(); - if (pathToSearch.endsWith(".jar")) { - try { - JarFile jar = new JarFile(pathToSearch); + // find classes + ArrayList<File> files = new ArrayList<File>(); + File dir = new File(part); + if (dir.isDirectory()) { + File root = new File(dir.toString() + File.separatorChar + packageDirName); + String[] names = findClassesInDirectoryRecursive(root, ""); + classNames.addAll(Arrays.asList(names)); + } else { + try { + JarFile jar = new JarFile(part); Enumeration<JarEntry> jarEntries = jar.entries(); while (jarEntries.hasMoreElements()) { - String jarEntry = jarEntries.nextElement() - .getName(); - if (jarEntry.startsWith(packageJarName)) { - String relativeName = jarEntry - .substring(packageJarName.length()); - if (relativeName.endsWith(".class")) { - relativeName = relativeName.replace('/', - '.'); - classNames.add(relativeName.substring(0, - relativeName.length() - - ".class".length())); - } - } + String jarEntry = jarEntries.nextElement().getName(); + if (jarEntry.startsWith(packageJarName)) { + String relativeName = jarEntry.substring(packageJarName.length()); + if (relativeName.endsWith(".class")) { + relativeName = relativeName.replace('/', + '.'); + classNames.add(relativeName.substring(0, + relativeName.length() + - ".class".length())); + } + } } - } catch (IOException ignored) { + } catch (IOException ignored) { // ignore unreadable files - } - } else { - File root = new File(pathToSearch + File.separatorChar - + packageDirName); - String[] names = findClassesInDirectoryRecursive(root, ""); - for (String name : names) { - classNames.add(name); - } - } - } */ - cached = classNames.toArray(new String[classNames.size()]); - Arrays.sort(cached); - cachedClassNames.put(packageNameToSearch, cached); + } } - return cached; - } + } - protected static String[] findClassesInDirectoryRecursive(File root, - String packagePath) { - HashSet<String> classNames = new HashSet<String>(); - if (root.isDirectory()) { - String[] list = root.list(); - for (String string : list) { - if (string.endsWith(".class")) { - classNames.add(packagePath - + string.substring(0, string.length() - - ".class".length())); - } else { - File testDir = new File(root.getPath() + File.separatorChar - + string); - if (testDir.isDirectory()) { - String[] names = findClassesInDirectoryRecursive( - testDir, packagePath + string + "."); - classNames.addAll(Arrays.asList(names)); - } - } - } - } - return classNames.toArray(new String[classNames.size()]); + /* + * while (pathTokens.hasMoreElements()) { String pathToSearch = + * pathTokens.nextElement().toString(); if (pathToSearch.endsWith(".jar")) + * { try { JarFile jar = new JarFile(pathToSearch); Enumeration<JarEntry> + * jarEntries = jar.entries(); while (jarEntries.hasMoreElements()) { + * String jarEntry = jarEntries.nextElement() .getName(); if + * (jarEntry.startsWith(packageJarName)) { String relativeName = jarEntry + * .substring(packageJarName.length()); if + * (relativeName.endsWith(".class")) { relativeName = + * relativeName.replace('/', '.'); + * classNames.add(relativeName.substring(0, relativeName.length() - + * ".class".length())); } } } } catch (IOException ignored) { // ignore + * unreadable files } } else { File root = new File(pathToSearch + + * File.separatorChar + packageDirName); String[] names = + * findClassesInDirectoryRecursive(root, ""); for (String name : names) { + * classNames.add(name); } } } + */ + cached = classNames.toArray(new String[classNames.size()]); + Arrays.sort(cached); + cachedClassNames.put(packageNameToSearch, cached); } + return cached; + } - public static Class[] findClassesOfType(String packageNameToSearch, - Class<?> typeDesired) { - ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>(); - String[] classNames = findClassNames(packageNameToSearch); - for (String className : classNames) { - String fullName = packageNameToSearch.length() > 0 ? (packageNameToSearch - + "." + className) - : className; - if (isPublicConcreteClassOfType(fullName, typeDesired)) { - try { - classesFound.add(Class.forName(fullName)); - } catch (Exception ignored) { - // ignore classes that we cannot instantiate - } - } + protected static String[] findClassesInDirectoryRecursive(File root, + String packagePath) { + HashSet<String> classNames = new HashSet<String>(); + if (root.isDirectory()) { + String[] list = root.list(); + for (String string : list) { + if (string.endsWith(".class")) { + classNames.add(packagePath + + string.substring(0, string.length() + - ".class".length())); + } else { + File testDir = new File(root.getPath() + File.separatorChar + + string); + if (testDir.isDirectory()) { + String[] names = findClassesInDirectoryRecursive( + testDir, packagePath + string + "."); + classNames.addAll(Arrays.asList(names)); + } } - return classesFound.toArray(new Class[classesFound.size()]); + } } + return classNames.toArray(new String[classNames.size()]); + } - public static boolean isPublicConcreteClassOfType(String className, - Class<?> typeDesired) { - Class<?> testClass = null; + public static Class[] findClassesOfType(String packageNameToSearch, + Class<?> typeDesired) { + ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>(); + String[] classNames = findClassNames(packageNameToSearch); + for (String className : classNames) { + String fullName = packageNameToSearch.length() > 0 ? (packageNameToSearch + + "." + className) + : className; + if (isPublicConcreteClassOfType(fullName, typeDesired)) { try { - testClass = Class.forName(className); - } catch (Exception e) { - return false; + classesFound.add(Class.forName(fullName)); + } catch (Exception ignored) { + // ignore classes that we cannot instantiate } - int classModifiers = testClass.getModifiers(); - return (java.lang.reflect.Modifier.isPublic(classModifiers) - && !java.lang.reflect.Modifier.isAbstract(classModifiers) - && typeDesired.isAssignableFrom(testClass) && hasEmptyConstructor(testClass)); + } } + return classesFound.toArray(new Class[classesFound.size()]); + } - public static boolean hasEmptyConstructor(Class<?> type) { - try { - type.getConstructor(); - return true; - } catch (Exception ignored) { - return false; - } + public static boolean isPublicConcreteClassOfType(String className, + Class<?> typeDesired) { + Class<?> testClass = null; + try { + testClass = Class.forName(className); + } catch (Exception e) { + return false; + } + int classModifiers = testClass.getModifiers(); + return (java.lang.reflect.Modifier.isPublic(classModifiers) + && !java.lang.reflect.Modifier.isAbstract(classModifiers) + && typeDesired.isAssignableFrom(testClass) && hasEmptyConstructor(testClass)); + } + + public static boolean hasEmptyConstructor(Class<?> type) { + try { + type.getConstructor(); + return true; + } catch (Exception ignored) { + return false; } + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java index 2b8ed09..071e0c4 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java @@ -28,106 +28,106 @@ import com.yahoo.labs.samoa.moa.MOAObject; /** * Vector with the capability of automatic expansion. - * + * * @author Richard Kirkby ([email protected]) * @version $Revision: 7 $ */ public class AutoExpandVector<T> extends ArrayList<T> implements MOAObject { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - public AutoExpandVector() { - super(0); - } - - public AutoExpandVector(int size) { - super(size); - } + public AutoExpandVector() { + super(0); + } - @Override - public void add(int pos, T obj) { - if (pos > size()) { - while (pos > size()) { - add(null); - } - trimToSize(); - } - super.add(pos, obj); - } + public AutoExpandVector(int size) { + super(size); + } - @Override - public T get(int pos) { - return ((pos >= 0) && (pos < size())) ? super.get(pos) : null; + @Override + public void add(int pos, T obj) { + if (pos > size()) { + while (pos > size()) { + add(null); + } + trimToSize(); } - - @Override - public T set(int pos, T obj) { - if (pos >= size()) { - add(pos, obj); - return null; - } - return super.set(pos, obj); - } - - @Override - public boolean add(T arg0) { - boolean result = super.add(arg0); - trimToSize(); - return result; - } - - @Override - public boolean addAll(Collection<? extends T> arg0) { - boolean result = super.addAll(arg0); - trimToSize(); - return result; - } - - @Override - public boolean addAll(int arg0, Collection<? extends T> arg1) { - boolean result = super.addAll(arg0, arg1); - trimToSize(); - return result; - } - - @Override - public void clear() { - super.clear(); - trimToSize(); - } - - @Override - public T remove(int arg0) { - T result = super.remove(arg0); - trimToSize(); - return result; - } - - @Override - public boolean remove(Object arg0) { - boolean result = super.remove(arg0); - trimToSize(); - return result; - } - - @Override - protected void removeRange(int arg0, int arg1) { - super.removeRange(arg0, arg1); - trimToSize(); - } - - @Override - public MOAObject copy() { - return AbstractMOAObject.copy(this); - } - - @Override - public int measureByteSize() { - return AbstractMOAObject.measureByteSize(this); - } - - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub + super.add(pos, obj); + } + + @Override + public T get(int pos) { + return ((pos >= 0) && (pos < size())) ? super.get(pos) : null; + } + + @Override + public T set(int pos, T obj) { + if (pos >= size()) { + add(pos, obj); + return null; } + return super.set(pos, obj); + } + + @Override + public boolean add(T arg0) { + boolean result = super.add(arg0); + trimToSize(); + return result; + } + + @Override + public boolean addAll(Collection<? extends T> arg0) { + boolean result = super.addAll(arg0); + trimToSize(); + return result; + } + + @Override + public boolean addAll(int arg0, Collection<? extends T> arg1) { + boolean result = super.addAll(arg0, arg1); + trimToSize(); + return result; + } + + @Override + public void clear() { + super.clear(); + trimToSize(); + } + + @Override + public T remove(int arg0) { + T result = super.remove(arg0); + trimToSize(); + return result; + } + + @Override + public boolean remove(Object arg0) { + boolean result = super.remove(arg0); + trimToSize(); + return result; + } + + @Override + protected void removeRange(int arg0, int arg1) { + super.removeRange(arg0, arg1); + trimToSize(); + } + + @Override + public MOAObject copy() { + return AbstractMOAObject.copy(this); + } + + @Override + public int measureByteSize() { + return AbstractMOAObject.measureByteSize(this); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } }
