http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java new file mode 100644 index 0000000..fdf2af6 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java @@ -0,0 +1,364 @@ +package org.apache.samoa.moa.cluster; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.List; +import java.util.Random; + +import org.apache.samoa.instances.DenseInstance; +import org.apache.samoa.instances.Instance; + +/** + * A simple implementation of the <code>Cluster</code> interface representing spherical clusters. The inclusion + * probability is one inside the sphere and zero everywhere else. + * + */ +public class SphereCluster extends Cluster { + + private static final long serialVersionUID = 1L; + + private double[] center; + private double radius; + private double weight; + + public SphereCluster(double[] center, double radius) { + this(center, radius, 1.0); + } + + public SphereCluster() { + } + + public SphereCluster(double[] center, double radius, double weightedSize) { + this(); + this.center = center; + this.radius = radius; + this.weight = weightedSize; + } + + public SphereCluster(int dimensions, double radius, Random random) { + this(); + this.center = new double[dimensions]; + this.radius = radius; + + // Position randomly but keep hypersphere inside the boundaries + double interval = 1.0 - 2 * radius; + for (int i = 0; i < center.length; i++) { + this.center[i] = (random.nextDouble() * interval) + radius; + } + this.weight = 0.0; + } + + public SphereCluster(List<? extends Instance> instances, int dimension) { + this(); + if (instances == null || instances.size() <= 0) + return; + + weight = instances.size(); + + Miniball mb = new Miniball(dimension); + mb.clear(); + + for (Instance instance : instances) { + mb.check_in(instance.toDoubleArray()); + } + + mb.build(); + center = mb.center(); + radius = mb.radius(); + mb.clear(); + } + + /** + * Checks whether two <code>SphereCluster</code> overlap based on radius NOTE: overlapRadiusDegree only calculates the + * overlap based on the centers and the radi, so not the real overlap + * + * TODO: should we do this by MC to get the real overlap??? + * + * @param other + * @return + */ + + public double overlapRadiusDegree(SphereCluster other) { + + double[] center0 = getCenter(); + double radius0 = getRadius(); + + double[] center1 = other.getCenter(); + double radius1 = other.getRadius(); + + double radiusBig; + double radiusSmall; + if (radius0 < radius1) { + radiusBig = radius1; + radiusSmall = radius0; + } + else { + radiusBig = radius0; + radiusSmall = radius1; + } + + double dist = 0; + for (int i = 0; i < center0.length; i++) { + double delta = center0[i] - center1[i]; + dist += delta * delta; + } + dist = Math.sqrt(dist); + + if (dist > radiusSmall + radiusBig) + return 0; + if (dist + radiusSmall <= radiusBig) { + // one lies within the other + return 1; + } + else { + return (radiusSmall + radiusBig - dist) / (2 * radiusSmall); + } + } + + public void combine(SphereCluster cluster) { + double[] center = getCenter(); + double[] newcenter = new double[center.length]; + double[] other_center = cluster.getCenter(); + double other_weight = cluster.getWeight(); + double other_radius = cluster.getRadius(); + + for (int i = 0; i < center.length; i++) { + newcenter[i] = (center[i] * getWeight() + other_center[i] * other_weight) / (getWeight() + other_weight); + } + + center = newcenter; + double r_0 = getRadius() + Math.abs(distance(center, newcenter)); + double r_1 = other_radius + Math.abs(distance(other_center, newcenter)); + radius = Math.max(r_0, r_1); + weight += other_weight; + } + + public void merge(SphereCluster cluster) { + double[] c0 = getCenter(); + double w0 = getWeight(); + double r0 = getRadius(); + + double[] c1 = cluster.getCenter(); + double w1 = cluster.getWeight(); + double r1 = cluster.getRadius(); + + // vector + double[] v = new double[c0.length]; + // center distance + double d = 0; + + for (int i = 0; i < c0.length; i++) { + v[i] = c0[i] - c1[i]; + d += v[i] * v[i]; + } + d = Math.sqrt(d); + + double r; + double[] c = new double[c0.length]; + + // one lays within the others + if (d + r0 <= r1 || d + r1 <= r0) { + if (d + r0 <= r1) { + r = r1; + c = c1; + } + else { + r = r0; + c = c0; + } + } + else { + r = (r0 + r1 + d) / 2.0; + for (int i = 0; i < c.length; i++) { + c[i] = c1[i] - v[i] / d * (r1 - r); + } + } + + setCenter(c); + setRadius(r); + setWeight(w0 + w1); + + } + + @Override + public double[] getCenter() { + double[] copy = new double[center.length]; + System.arraycopy(center, 0, copy, 0, center.length); + return copy; + } + + public void setCenter(double[] center) { + this.center = center; + } + + public double getRadius() { + return radius; + } + + public void setRadius(double radius) { + this.radius = radius; + } + + @Override + public double getWeight() { + return weight; + } + + public void setWeight(double weight) { + this.weight = weight; + } + + @Override + public double getInclusionProbability(Instance instance) { + if (getCenterDistance(instance) <= getRadius()) { + return 1.0; + } + return 0.0; + } + + public double getCenterDistance(Instance instance) { + double distance = 0.0; + // get the center through getCenter so subclass have a chance + double[] center = getCenter(); + for (int i = 0; i < center.length; i++) { + double d = center[i] - instance.value(i); + distance += d * d; + } + return Math.sqrt(distance); + } + + public double getCenterDistance(SphereCluster other) { + return distance(getCenter(), other.getCenter()); + } + + /* + * the minimal distance between the surface of two clusters. is negative if + * the two clusters overlap + */ + public double getHullDistance(SphereCluster other) { + double distance; + // get the center through getCenter so subclass have a chance + double[] center0 = getCenter(); + double[] center1 = other.getCenter(); + distance = distance(center0, center1); + + distance = distance - getRadius() - other.getRadius(); + return distance; + } + + /* + */ + /** + * When a clusters looses points the new minimal bounding sphere can be partly outside of the originating cluster. If + * a another cluster is right next to the original cluster (without overlapping), the new cluster can be overlapping + * with this second cluster. OverlapSave will tell you if the current cluster can degenerate so much that it overlaps + * with cluster 'other' + * + * @param other + * the potentially overlapping cluster + * @return true if cluster can potentially overlap + */ + public boolean overlapSave(SphereCluster other) { + // use basic geometry to figure out the maximal degenerated cluster + // comes down to Max(radius *(sin alpha + cos alpha)) which is + double minDist = Math.sqrt(2) * (getRadius() + other.getRadius()); + double diff = getCenterDistance(other) - minDist; + + return diff > 0; + } + + private double distance(double[] v1, double[] v2) { + double distance = 0.0; + double[] center = getCenter(); + for (int i = 0; i < center.length; i++) { + double d = v1[i] - v2[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + public double[] getDistanceVector(Instance instance) { + return distanceVector(getCenter(), instance.toDoubleArray()); + } + + public double[] getDistanceVector(SphereCluster other) { + return distanceVector(getCenter(), other.getCenter()); + } + + private double[] distanceVector(double[] v1, double[] v2) { + double[] v = new double[v1.length]; + for (int i = 0; i < v1.length; i++) { + v[i] = v2[i] - v1[i]; + } + return v; + } + + /** + * Samples this cluster by returning a point from inside it. + * + * @param random + * a random number source + * @return a point that lies inside this cluster + */ + public Instance sample(Random random) { + // Create sample in hypersphere coordinates + // get the center through getCenter so subclass have a chance + double[] center = getCenter(); + + final int dimensions = center.length; + + final double sin[] = new double[dimensions - 1]; + final double cos[] = new double[dimensions - 1]; + final double length = random.nextDouble() * getRadius(); + + double lastValue = 1.0; + for (int i = 0; i < dimensions - 1; i++) { + double angle = random.nextDouble() * 2 * Math.PI; + sin[i] = lastValue * Math.sin(angle); // Store cumulative values + cos[i] = Math.cos(angle); + lastValue = sin[i]; + } + + // Calculate cartesian coordinates + double res[] = new double[dimensions]; + + // First value uses only cosines + res[0] = center[0] + length * cos[0]; + + // Loop through 'middle' coordinates which use cosines and sines + for (int i = 1; i < dimensions - 1; i++) { + res[i] = center[i] + length * sin[i - 1] * cos[i]; + } + + // Last value uses only sines + res[dimensions - 1] = center[dimensions - 1] + length * sin[dimensions - 2]; + + return new DenseInstance(1.0, res); + } + + @Override + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { + super.getClusterSpecificInfo(infoTitle, infoValue); + infoTitle.add("Radius"); + infoValue.add(Double.toString(getRadius())); + } + +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java new file mode 100644 index 0000000..02a5191 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java @@ -0,0 +1,299 @@ +package org.apache.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.LinkedList; +import java.util.List; +import java.util.Random; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.core.Measurement; +import org.apache.samoa.moa.core.ObjectRepository; +import org.apache.samoa.moa.core.StringUtils; +import org.apache.samoa.moa.options.AbstractOptionHandler; +import org.apache.samoa.moa.tasks.TaskMonitor; + +import com.github.javacliparser.FlagOption; +import com.github.javacliparser.IntOption; + +public abstract class AbstractClusterer extends AbstractOptionHandler + implements Clusterer { + + @Override + public String getPurposeString() { + return "MOA Clusterer: " + getClass().getCanonicalName(); + } + + protected InstancesHeader modelContext; + + protected double trainingWeightSeenByModel = 0.0; + + protected int randomSeed = 1; + + protected IntOption randomSeedOption; + + public FlagOption evaluateMicroClusteringOption; + + protected Random clustererRandom; + + protected Clustering clustering; + + public AbstractClusterer() { + if (isRandomizable()) { + this.randomSeedOption = new IntOption("randomSeed", 'r', + "Seed for random behaviour of the Clusterer.", 1); + } + + if (implementsMicroClusterer()) { + this.evaluateMicroClusteringOption = + new FlagOption("evaluateMicroClustering", 'M', + "Evaluate the underlying microclustering instead of the macro clustering"); + } + } + + @Override + public void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + if (this.randomSeedOption != null) { + this.randomSeed = this.randomSeedOption.getValue(); + } + if (!trainingHasStarted()) { + resetLearning(); + } + clustering = new Clustering(); + } + + public void setModelContext(InstancesHeader ih) { + if ((ih != null) && (ih.classIndex() < 0)) { + throw new IllegalArgumentException( + "Context for a Clusterer must include a class to learn"); + } + if (trainingHasStarted() + && (this.modelContext != null) + && ((ih == null) || !contextIsCompatible(this.modelContext, ih))) { + throw new IllegalArgumentException( + "New context is not compatible with existing model"); + } + this.modelContext = ih; + } + + public InstancesHeader getModelContext() { + return this.modelContext; + } + + public void setRandomSeed(int s) { + this.randomSeed = s; + if (this.randomSeedOption != null) { + // keep option consistent + this.randomSeedOption.setValue(s); + } + } + + public boolean trainingHasStarted() { + return this.trainingWeightSeenByModel > 0.0; + } + + public double trainingWeightSeenByModel() { + return this.trainingWeightSeenByModel; + } + + public void resetLearning() { + this.trainingWeightSeenByModel = 0.0; + if (isRandomizable()) { + this.clustererRandom = new Random(this.randomSeed); + } + resetLearningImpl(); + } + + public void trainOnInstance(Instance inst) { + if (inst.weight() > 0.0) { + this.trainingWeightSeenByModel += inst.weight(); + trainOnInstanceImpl(inst); + } + } + + public Measurement[] getModelMeasurements() { + List<Measurement> measurementList = new LinkedList<Measurement>(); + measurementList.add(new Measurement("model training instances", + trainingWeightSeenByModel())); + measurementList.add(new Measurement("model serialized size (bytes)", + measureByteSize())); + Measurement[] modelMeasurements = getModelMeasurementsImpl(); + if (modelMeasurements != null) { + for (Measurement measurement : modelMeasurements) { + measurementList.add(measurement); + } + } + // add average of sub-model measurements + Clusterer[] subModels = getSubClusterers(); + if ((subModels != null) && (subModels.length > 0)) { + List<Measurement[]> subMeasurements = new LinkedList<Measurement[]>(); + for (Clusterer subModel : subModels) { + if (subModel != null) { + subMeasurements.add(subModel.getModelMeasurements()); + } + } + Measurement[] avgMeasurements = Measurement + .averageMeasurements(subMeasurements + .toArray(new Measurement[subMeasurements.size()][])); + for (Measurement measurement : avgMeasurements) { + measurementList.add(measurement); + } + } + return measurementList.toArray(new Measurement[measurementList.size()]); + } + + public void getDescription(StringBuilder out, int indent) { + StringUtils.appendIndented(out, indent, "Model type: "); + out.append(this.getClass().getName()); + StringUtils.appendNewline(out); + Measurement.getMeasurementsDescription(getModelMeasurements(), out, + indent); + StringUtils.appendNewlineIndented(out, indent, "Model description:"); + StringUtils.appendNewline(out); + if (trainingHasStarted()) { + getModelDescription(out, indent); + } else { + StringUtils.appendIndented(out, indent, + "Model has not been trained."); + } + } + + public Clusterer[] getSubClusterers() { + return null; + } + + @Override + public Clusterer copy() { + return (Clusterer) super.copy(); + } + + // public boolean correctlyClassifies(Instance inst) { + // return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst + // .classValue(); + // } + + public String getClassNameString() { + return InstancesHeader.getClassNameString(this.modelContext); + } + + public String getClassLabelString(int classLabelIndex) { + return InstancesHeader.getClassLabelString(this.modelContext, + classLabelIndex); + } + + public String getAttributeNameString(int attIndex) { + return InstancesHeader.getAttributeNameString(this.modelContext, + attIndex); + } + + public String getNominalValueString(int attIndex, int valIndex) { + return InstancesHeader.getNominalValueString(this.modelContext, + attIndex, valIndex); + } + + // originalContext notnull + // newContext notnull + public static boolean contextIsCompatible(InstancesHeader originalContext, + InstancesHeader newContext) { + // rule 1: num classes can increase but never decrease + // rule 2: num attributes can increase but never decrease + // rule 3: num nominal attribute values can increase but never decrease + // rule 4: attribute types must stay in the same order (although class + // can + // move; is always skipped over) + // attribute names are free to change, but should always still represent + // the original attributes + if (newContext.numClasses() < originalContext.numClasses()) { + return false; // rule 1 + } + if (newContext.numAttributes() < originalContext.numAttributes()) { + return false; // rule 2 + } + int oPos = 0; + int nPos = 0; + while (oPos < originalContext.numAttributes()) { + if (oPos == originalContext.classIndex()) { + oPos++; + if (!(oPos < originalContext.numAttributes())) { + break; + } + } + if (nPos == newContext.classIndex()) { + nPos++; + } + if (originalContext.attribute(oPos).isNominal()) { + if (!newContext.attribute(nPos).isNominal()) { + return false; // rule 4 + } + if (newContext.attribute(nPos).numValues() < originalContext + .attribute(oPos).numValues()) { + return false; // rule 3 + } + } else { + assert (originalContext.attribute(oPos).isNumeric()); + if (!newContext.attribute(nPos).isNumeric()) { + return false; // rule 4 + } + } + oPos++; + nPos++; + } + return true; // all checks clear + } + + // reason for ...Impl methods: + // ease programmer burden by not requiring them to remember calls to super + // in overridden methods & will produce compiler errors if not overridden + + public abstract void resetLearningImpl(); + + public abstract void trainOnInstanceImpl(Instance inst); + + protected abstract Measurement[] getModelMeasurementsImpl(); + + public abstract void getModelDescription(StringBuilder out, int indent); + + protected static int modelAttIndexToInstanceAttIndex(int index, + Instance inst) { + return inst.classIndex() > index ? index : index + 1; + } + + protected static int modelAttIndexToInstanceAttIndex(int index, + Instances insts) { + return insts.classIndex() > index ? index : index + 1; + } + + public boolean implementsMicroClusterer() { + return false; + } + + public boolean keepClassLabel() { + return false; + } + + public Clustering getMicroClusteringResult() { + return null; + }; +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java new file mode 100644 index 0000000..b9f80b5 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java @@ -0,0 +1,358 @@ +package org.apache.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Random; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.cluster.SphereCluster; +import org.apache.samoa.moa.core.DataPoint; +import org.apache.samoa.moa.core.Measurement; + +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; + +public class ClusterGenerator extends AbstractClusterer { + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("timeWindow", + 't', "Rang of the window.", 1000); + + public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 'r', + "The average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 'R', + "The average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption positionOffsetOption = new FloatOption("positionOffset", 'p', + "The average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 'D', + "Deletes complete clusters from the clustering.", 0, 0, 1); + + public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j', + "Join two clusters if their hull distance is less minRadius times this factor.", 0, 0, 1); + + public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A', + "Adds additional clusters.", 0, 0, 1); + + private static double err_intervall_width = 0.0; + private ArrayList<DataPoint> points; + private int instanceCounter; + private int windowCounter; + private Random random; + private Clustering sourceClustering = null; + + @Override + public void resetLearningImpl() { + points = new ArrayList<DataPoint>(); + instanceCounter = 0; + windowCounter = 0; + random = new Random(227); + + // joinClustersOption.set(); + // evaluateMicroClusteringOption.set(); + } + + @Override + public void trainOnInstanceImpl(Instance inst) { + if (windowCounter >= timeWindowOption.getValue()) { + points.clear(); + windowCounter = 0; + } + windowCounter++; + instanceCounter++; + points.add(new DataPoint(inst, instanceCounter)); + } + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + public void setSourceClustering(Clustering source) { + sourceClustering = source; + } + + @Override + public Clustering getMicroClusteringResult() { + // System.out.println("Numcluster:"+clustering.size()+" / "+num); + // Clustering source_clustering = new Clustering(points, overlapThreshold, + // microInitMinPoints); + if (sourceClustering == null) { + + System.out.println("You need to set a source clustering for the ClusterGenerator to work"); + return null; + } + return alterClustering(sourceClustering); + } + + public Clustering getClusteringResult() { + sourceClustering = new Clustering(points); + // if(sourceClustering == null){ + // System.out.println("You need to set a source clustering for the ClusterGenerator to work"); + // return null; + // } + return alterClustering(sourceClustering); + } + + private Clustering alterClustering(Clustering scclustering) { + // percentage of the radius that will be cut off + // 0: no changes to radius + // 1: radius of 0 + double errLevelRadiusDecrease = radiusDecreaseOption.getValue(); + + // 0: no changes to radius + // 1: radius 100% bigger + double errLevelRadiusIncrease = radiusIncreaseOption.getValue(); + + // 0: no changes + // 1: distance between centers is 2 * original radius + double errLevelPosition = positionOffsetOption.getValue(); + + int numRemoveCluster = (int) (clusterRemoveOption.getValue() * scclustering.size()); + + int numAddCluster = (int) (clusterAddOption.getValue() * scclustering.size()); + + for (int c = 0; c < numRemoveCluster; c++) { + int delId = random.nextInt(scclustering.size()); + scclustering.remove(delId); + } + + int numCluster = scclustering.size(); + double[] err_seeds = new double[numCluster]; + double err_seed_sum = 0.0; + double tmp_seed; + for (int i = 0; i < numCluster; i++) { + tmp_seed = random.nextDouble(); + err_seeds[i] = err_seed_sum + tmp_seed; + err_seed_sum += tmp_seed; + } + + double sumWeight = 0; + for (int i = 0; i < numCluster; i++) { + sumWeight += scclustering.get(i).getWeight(); + } + + Clustering clustering = new Clustering(); + + for (int i = 0; i < numCluster; i++) { + if (!(scclustering.get(i) instanceof SphereCluster)) { + System.out.println("Not a Sphere Cluster"); + continue; + } + SphereCluster sourceCluster = (SphereCluster) scclustering.get(i); + double[] center = Arrays.copyOf(sourceCluster.getCenter(), sourceCluster.getCenter().length); + double weight = sourceCluster.getWeight(); + double radius = sourceCluster.getRadius(); + + // move cluster center + if (errLevelPosition > 0) { + double errOffset = random.nextDouble() * err_intervall_width / 2.0; + double errOffsetDirection = ((random.nextBoolean()) ? 1 : -1); + double level = errLevelPosition + errOffsetDirection * errOffset; + double[] vector = new double[center.length]; + double vectorLength = 0; + for (int d = 0; d < center.length; d++) { + vector[d] = (random.nextBoolean() ? 1 : -1) * random.nextDouble(); + vectorLength += Math.pow(vector[d], 2); + } + vectorLength = Math.sqrt(vectorLength); + + // max is when clusters are next to each other + double length = 2 * radius * level; + + for (int d = 0; d < center.length; d++) { + // normalize length and then strecht to reach error position + vector[d] = vector[d] / vectorLength * length; + } + // System.out.println("Center "+Arrays.toString(center)); + // System.out.println("Vector "+Arrays.toString(vector)); + // check if error position is within bounds + double[] newCenter = new double[center.length]; + for (int d = 0; d < center.length; d++) { + // check bounds, otherwise flip vector + if (center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1) { + newCenter[d] = center[d] + vector[d]; + } + else { + newCenter[d] = center[d] + (-1) * vector[d]; + } + } + center = newCenter; + for (int d = 0; d < center.length; d++) { + if (newCenter[d] >= 0 && newCenter[d] <= 1) { + } + else { + System.out + .println("This shouldnt have happend, Cluster center out of bounds:" + Arrays.toString(newCenter)); + } + } + // System.out.println("new Center "+Arrays.toString(newCenter)); + + } + + // alter radius + if (errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0) { + double errOffset = random.nextDouble() * err_intervall_width / 2.0; + int errOffsetDirection = ((random.nextBoolean()) ? 1 : -1); + + if (errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || random.nextBoolean())) { + double level = (errLevelRadiusDecrease + errOffsetDirection * errOffset);// *sourceCluster.getWeight()/sumWeight; + level = (level < 0) ? 0 : level; + level = (level > 1) ? 1 : level; + radius *= (1 - level); + } + else { + double level = errLevelRadiusIncrease + errOffsetDirection * errOffset; + level = (level < 0) ? 0 : level; + level = (level > 1) ? 1 : level; + radius += radius * level; + } + } + + SphereCluster newCluster = new SphereCluster(center, radius, weight); + newCluster.setMeasureValue("Source Cluster", "C" + sourceCluster.getId()); + + clustering.add(newCluster); + } + + if (joinClustersOption.getValue() > 0) { + clustering = joinClusters(clustering); + } + + // add new clusters by copying clusters and set a random center + for (int c = 0; c < numAddCluster; c++) { + int copyId = random.nextInt(clustering.size()); + SphereCluster scorg = (SphereCluster) clustering.get(copyId); + int dim = scorg.getCenter().length; + double[] center = new double[dim]; + double radius = scorg.getRadius(); + + boolean outofbounds = true; + int tryCounter = 0; + while (outofbounds && tryCounter < 20) { + tryCounter++; + outofbounds = false; + for (int j = 0; j < center.length; j++) { + center[j] = random.nextDouble(); + if (center[j] - radius < 0 || center[j] + radius > 1) { + outofbounds = true; + break; + } + } + } + if (outofbounds) { + System.out.println("Coludn't place additional cluster"); + } + else { + SphereCluster scnew = new SphereCluster(center, radius, scorg.getWeight() / 2); + scorg.setWeight(scorg.getWeight() - scnew.getWeight()); + clustering.add(scnew); + } + } + + return clustering; + + } + + private Clustering joinClusters(Clustering clustering) { + + double radiusFactor = joinClustersOption.getValue(); + boolean[] merged = new boolean[clustering.size()]; + + Clustering mclustering = new Clustering(); + + if (radiusFactor > 0) { + for (int c1 = 0; c1 < clustering.size(); c1++) { + SphereCluster sc1 = (SphereCluster) clustering.get(c1); + double minDist = Double.MAX_VALUE; + double minOver = 1; + int maxindexCon = -1; + int maxindexOver = -1; + for (int c2 = 0; c2 < clustering.size(); c2++) { + SphereCluster sc2 = (SphereCluster) clustering.get(c2); + // double over = sc1.overlapRadiusDegree(sc2); + // if(over > 0 && over < minOver){ + // minOver = over; + // maxindexOver = c2; + // } + double dist = sc1.getHullDistance(sc2); + double threshold = Math.min(sc1.getRadius(), sc2.getRadius()) * radiusFactor; + if (dist > 0 && dist < minDist && dist < threshold) { + minDist = dist; + maxindexCon = c2; + } + } + int maxindex = -1; + if (maxindexOver != -1) + maxindex = maxindexOver; + else + maxindex = maxindexCon; + + if (maxindex != -1 && !merged[c1]) { + merged[c1] = true; + merged[maxindex] = true; + SphereCluster scnew = new SphereCluster(sc1.getCenter(), sc1.getRadius(), sc1.getWeight()); + SphereCluster sc2 = (SphereCluster) clustering.get(maxindex); + scnew.merge(sc2); + mclustering.add(scnew); + } + } + } + + for (int i = 0; i < merged.length; i++) { + if (!merged[i]) + mclustering.add(clustering.get(i)); + } + + return mclustering; + + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public boolean isRandomizable() { + return false; + } + + @Override + public boolean keepClassLabel() { + return true; + } + + public double[] getVotesForInstance(Instance inst) { + return null; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java new file mode 100644 index 0000000..bfa07c8 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java @@ -0,0 +1,64 @@ +package org.apache.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.moa.MOAObject; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.core.Measurement; +import org.apache.samoa.moa.options.OptionHandler; + +public interface Clusterer extends MOAObject, OptionHandler { + + public void setModelContext(InstancesHeader ih); + + public InstancesHeader getModelContext(); + + public boolean isRandomizable(); + + public void setRandomSeed(int s); + + public boolean trainingHasStarted(); + + public double trainingWeightSeenByModel(); + + public void resetLearning(); + + public void trainOnInstance(Instance inst); + + public double[] getVotesForInstance(Instance inst); + + public Measurement[] getModelMeasurements(); + + public Clusterer[] getSubClusterers(); + + public Clusterer copy(); + + public Clustering getClusteringResult(); + + public boolean implementsMicroClusterer(); + + public Clustering getMicroClusteringResult(); + + public boolean keepClassLabel(); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java new file mode 100644 index 0000000..8eeaeba --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java @@ -0,0 +1,199 @@ +package org.apache.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.samoa.moa.cluster.CFCluster; +import org.apache.samoa.moa.cluster.Cluster; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.cluster.SphereCluster; + +/** + * A kMeans implementation for microclusterings. For now it only uses the real centers of the groundtruthclustering for + * implementation. There should also be an option to use random centers. TODO: random centers TODO: Create a macro + * clustering interface to make different macro clustering algorithms available to micro clustering algorithms like + * clustream, denstream and clustree + * + */ +public class KMeans { + + /** + * This kMeans implementation clusters a big number of microclusters into a smaller amount of macro clusters. To make + * it comparable to other algorithms it uses the real centers of the ground truth macro clustering to have the best + * possible initialization. The quality of resulting macro clustering yields an upper bound for kMeans on the + * underlying microclustering. + * + * @param centers + * of the ground truth clustering + * @param data + * list of microclusters + * @return + */ + public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> data) { + int k = centers.length; + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = + new ArrayList<ArrayList<Cluster>>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); + } + + int repetitions = 100; + while (repetitions-- >= 0) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + centers = newCenters; + } + + return new Clustering(centers); + } + + private static double distance(double[] pointA, double[] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int dimensions) { + double[] res = new double[dimensions]; + for (int i = 0; i < res.length; i++) { + res[i] = 0.0; + } + + if (cluster.size() == 0) { + return new SphereCluster(res, 0.0); + } + + for (Cluster point : cluster) { + double[] center = point.getCenter(); + for (int i = 0; i < res.length; i++) { + res[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < res.length; i++) { + res[i] /= cluster.size(); + } + + // Calculate radius + double radius = 0.0; + for (Cluster point : cluster) { + double dist = distance(res, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + + return new SphereCluster(res, radius); + } + + public static Clustering gaussianMeans(Clustering gtClustering, Clustering clustering) { + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster) clustering.get(i)); + } + else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + Cluster[] centers = new Cluster[gtClustering.size()]; + for (int i = 0; i < centers.length; i++) { + centers[i] = gtClustering.get(i); + + } + + int k = centers.length; + if (microclusters.size() < k) { + return new Clustering(new Cluster[0]); + } + + Clustering kMeansResult = kMeans(centers, microclusters); + + k = kMeansResult.size(); + CFCluster[] res = new CFCluster[k]; + + for (CFCluster microcluster : microclusters) { + // Find closest kMeans cluster + double minDistance = Double.MAX_VALUE; + int closestCluster = 0; + for (int i = 0; i < k; i++) { + double distance = distance(kMeansResult.get(i).getCenter(), microcluster.getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + // Add to cluster + if (res[closestCluster] == null) { + res[closestCluster] = (CFCluster) microcluster.copy(); + } else { + res[closestCluster].add(microcluster); + } + } + + // Clean up res + int count = 0; + for (int i = 0; i < res.length; i++) { + if (res[i] != null) + ++count; + } + + CFCluster[] cleaned = new CFCluster[count]; + count = 0; + for (int i = 0; i < res.length; i++) { + if (res[i] != null) + cleaned[count++] = res[i]; + } + + return new Clustering(cleaned); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java new file mode 100644 index 0000000..055c3d6 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java @@ -0,0 +1,333 @@ +package org.apache.samoa.moa.clusterers.clustream; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; + +import org.apache.samoa.instances.DenseInstance; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.cluster.Cluster; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.cluster.SphereCluster; +import org.apache.samoa.moa.clusterers.AbstractClusterer; +import org.apache.samoa.moa.core.Measurement; + +import com.github.javacliparser.IntOption; + +/** + * Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip S. Yu: A Framework for Clustering Evolving + * Data Streams. VLDB 2003: 81-92 + */ +public class Clustream extends AbstractClusterer { + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("horizon", + 'h', "Rang of the window.", 1000); + + public IntOption maxNumKernelsOption = new IntOption( + "maxNumKernels", 'k', + "Maximum number of micro kernels to use.", 100); + + public IntOption kernelRadiFactorOption = new IntOption( + "kernelRadiFactor", 't', + "Multiplier for the kernel radius", 2); + + private int timeWindow; + private long timestamp = -1; + private ClustreamKernel[] kernels; + private boolean initialized; + private List<ClustreamKernel> buffer; // Buffer for initialization with kNN + private int bufferSize; + private double t; + private int m; + + public Clustream() { + } + + @Override + public void resetLearningImpl() { + this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; + this.timeWindow = timeWindowOption.getValue(); + this.initialized = false; + this.buffer = new LinkedList<>(); + this.bufferSize = maxNumKernelsOption.getValue(); + t = kernelRadiFactorOption.getValue(); + m = maxNumKernelsOption.getValue(); + } + + @Override + public void trainOnInstanceImpl(Instance instance) { + int dim = instance.numValues(); + timestamp++; + // 0. Initialize + if (!initialized) { + if (buffer.size() < bufferSize) { + buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); + return; + } + + int k = kernels.length; + // System.err.println("k="+k+" bufferSize="+bufferSize); + assert (k <= bufferSize); + + ClustreamKernel[] centers = new ClustreamKernel[k]; + for (int i = 0; i < k; i++) { + centers[i] = buffer.get(i); // TODO: make random! + } + Clustering kmeans_clustering = kMeans(k, centers, buffer); + // Clustering kmeans_clustering = kMeans(k, buffer); + + for (int i = 0; i < kmeans_clustering.size(); i++) { + kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t, m); + } + + buffer.clear(); + initialized = true; + return; + } + + // 1. Determine closest kernel + ClustreamKernel closestKernel = null; + double minDistance = Double.MAX_VALUE; + for (ClustreamKernel kernel : kernels) { + // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); + double distance = distance(instance.toDoubleArray(), kernel.getCenter()); + if (distance < minDistance) { + closestKernel = kernel; + minDistance = distance; + } + } + + // 2. Check whether instance fits into closestKernel + double radius; + if (closestKernel != null && closestKernel.getWeight() == 1) { + // Special case: estimate radius by determining the distance to the + // next closest cluster + radius = Double.MAX_VALUE; + double[] center = closestKernel.getCenter(); + for (ClustreamKernel kernel : kernels) { + if (kernel == closestKernel) { + continue; + } + + double distance = distance(kernel.getCenter(), center); + radius = Math.min(distance, radius); + } + } else { + radius = closestKernel.getRadius(); + } + + if (minDistance < radius) { + // Date fits, put into kernel and be happy + closestKernel.insert(instance, timestamp); + return; + } + + // 3. Date does not fit, we need to free + // some space to insert a new kernel + long threshold = timestamp - timeWindow; // Kernels before this can be forgotten + + // 3.1 Try to forget old kernels + for (int i = 0; i < kernels.length; i++) { + if (kernels[i].getRelevanceStamp() < threshold) { + kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m); + return; + } + } + + // 3.2 Merge closest two kernels + int closestA = 0; + int closestB = 0; + minDistance = Double.MAX_VALUE; + for (int i = 0; i < kernels.length; i++) { + double[] centerA = kernels[i].getCenter(); + for (int j = i + 1; j < kernels.length; j++) { + double dist = distance(centerA, kernels[j].getCenter()); + if (dist < minDistance) { + minDistance = dist; + closestA = i; + closestB = j; + } + } + } + assert (closestA != closestB); + + kernels[closestA].add(kernels[closestB]); + kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m); + } + + @Override + public Clustering getMicroClusteringResult() { + if (!initialized) { + return new Clustering(new Cluster[0]); + } + + ClustreamKernel[] res = new ClustreamKernel[kernels.length]; + for (int i = 0; i < res.length; i++) { + res[i] = new ClustreamKernel(kernels[i], t, m); + } + + return new Clustering(res); + } + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + @Override + public Clustering getClusteringResult() { + return null; + } + + public String getName() { + return "Clustream " + timeWindow; + } + + private static double distance(double[] pointA, double[] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + // wrapper... we need to rewrite kmeans to points, not clusters, doesnt make + // sense anymore + // public static Clustering kMeans( int k, ArrayList<Instance> points, int dim + // ) { + // ArrayList<ClustreamKernel> cl = new ArrayList<ClustreamKernel>(); + // for(Instance inst : points){ + // cl.add(new ClustreamKernel(inst, dim , 0, 0, 0)); + // } + // Clustering clustering = kMeans(k, cl); + // return clustering; + // } + + public static Clustering kMeans(int k, List<? extends Cluster> data) { + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + for (int i = 0; i < centers.length; i++) { + int rid = random.nextInt(k); + centers[i] = new SphereCluster(data.get(rid).getCenter(), 0); + } + return kMeans(k, centers, data); + } + + public static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { + assert (centers.length == k); + assert (k > 0); + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); + } + + int repetitions = 100; + while (repetitions-- >= 0) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + centers = newCenters; + } + + return new Clustering(centers); + } + + private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int dimensions) { + double[] res = new double[dimensions]; + for (int i = 0; i < res.length; i++) { + res[i] = 0.0; + } + + if (cluster.size() == 0) { + return new SphereCluster(res, 0.0); + } + + for (Cluster point : cluster) { + double[] center = point.getCenter(); + for (int i = 0; i < res.length; i++) { + res[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < res.length; i++) { + res[i] /= cluster.size(); + } + + // Calculate radius + double radius = 0.0; + for (Cluster point : cluster) { + double dist = distance(res, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + SphereCluster sc = new SphereCluster(res, radius); + sc.setWeight(cluster.size()); + return sc; + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } + + public boolean isRandomizable() { + return false; + } + + public double[] getVotesForInstance(Instance inst) { + throw new UnsupportedOperationException("Not supported yet."); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java new file mode 100644 index 0000000..522d50e --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java @@ -0,0 +1,271 @@ +package org.apache.samoa.moa.clusterers.clustream; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import java.util.List; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.cluster.CFCluster; + +public class ClustreamKernel extends CFCluster { + private static final long serialVersionUID = 1L; + + private final static double EPSILON = 0.00005; + public static final double MIN_VARIANCE = 1e-50; + + protected double LST; + protected double SST; + + int m; + double t; + + public ClustreamKernel(Instance instance, int dimensions, long timestamp, double t, int m) { + super(instance, dimensions); + this.t = t; + this.m = m; + this.LST = timestamp; + this.SST = timestamp * timestamp; + } + + public ClustreamKernel(ClustreamKernel cluster, double t, int m) { + super(cluster); + this.t = t; + this.m = m; + this.LST = cluster.LST; + this.SST = cluster.SST; + } + + public void insert(Instance instance, long timestamp) { + N++; + LST += timestamp; + SST += timestamp * timestamp; + + for (int i = 0; i < instance.numValues(); i++) { + LS[i] += instance.value(i); + SS[i] += instance.value(i) * instance.value(i); + } + } + + @Override + public void add(CFCluster other2) { + ClustreamKernel other = (ClustreamKernel) other2; + assert (other.LS.length == this.LS.length); + this.N += other.N; + this.LST += other.LST; + this.SST += other.SST; + + for (int i = 0; i < LS.length; i++) { + this.LS[i] += other.LS[i]; + this.SS[i] += other.SS[i]; + } + } + + public double getRelevanceStamp() { + if (N < 2 * m) + return getMuTime(); + + return getMuTime() + getSigmaTime() * getQuantile(((double) m) / (2 * N)); + } + + private double getMuTime() { + return LST / N; + } + + private double getSigmaTime() { + return Math.sqrt(SST / N - (LST / N) * (LST / N)); + } + + private double getQuantile(double z) { + assert (z >= 0 && z <= 1); + return Math.sqrt(2) * inverseError(2 * z - 1); + } + + @Override + public double getRadius() { + // trivial cluster + if (N == 1) + return 0; + if (t == 1) + t = 1; + + return getDeviation() * radiusFactor; + } + + @Override + public CFCluster getCF() { + return this; + } + + private double getDeviation() { + double[] variance = getVarianceVector(); + double sumOfDeviation = 0.0; + for (double aVariance : variance) { + double d = Math.sqrt(aVariance); + sumOfDeviation += d; + } + return sumOfDeviation / variance.length; + } + + /** + * @return this kernels' center + */ + @Override + public double[] getCenter() { + assert (!this.isEmpty()); + double res[] = new double[this.LS.length]; + for (int i = 0; i < res.length; i++) { + res[i] = this.LS[i] / N; + } + return res; + } + + /** + * See interface <code>Cluster</code> + * + * @param instance + * @return double value + */ + @Override + public double getInclusionProbability(Instance instance) { + // trivial cluster + if (N == 1) { + double distance = 0.0; + for (int i = 0; i < LS.length; i++) { + double d = LS[i] - instance.value(i); + distance += d * d; + } + distance = Math.sqrt(distance); + if (distance < EPSILON) + return 1.0; + return 0.0; + } + else { + double dist = calcNormalizedDistance(instance.toDoubleArray()); + if (dist <= getRadius()) { + return 1; + } + else { + return 0; + } + // double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); + // return res; + } + } + + private double[] getVarianceVector() { + double[] res = new double[this.LS.length]; + for (int i = 0; i < this.LS.length; i++) { + double ls = this.LS[i]; + double ss = this.SS[i]; + + double lsDivN = ls / this.getWeight(); + double lsDivNSquared = lsDivN * lsDivN; + double ssDivN = ss / this.getWeight(); + res[i] = ssDivN - lsDivNSquared; + + // Due to numerical errors, small negative values can occur. + // We correct this by settings them to almost zero. + if (res[i] <= 0.0) { + if (res[i] > -EPSILON) { + res[i] = MIN_VARIANCE; + } + } + } + return res; + } + + /** + * Check if this cluster is empty or not. + * + * @return <code>true</code> if the cluster has no data points, <code>false</code> otherwise. + */ + public boolean isEmpty() { + return this.N == 0; + } + + /** + * Calculate the normalized euclidean distance (Mahalanobis distance for distribution w/o covariances) to a point. + * + * @param point + * The point to which the distance is calculated. + * @return The normalized distance to the cluster center. + * + * TODO: check whether WEIGHTING is correctly applied to variances + */ + // ??????? + private double calcNormalizedDistance(double[] point) { + double[] center = getCenter(); + double res = 0.0; + + for (int i = 0; i < center.length; i++) { + double diff = center[i] - point[i]; + res += (diff * diff);// variance[i]; + } + return Math.sqrt(res); + } + + /** + * Approximates the inverse error function. Clustream needs this. + * + * @param x + */ + public static double inverseError(double x) { + double z = Math.sqrt(Math.PI) * x; + double res = (z) / 2; + + double z2 = z * z; + double zProd = z * z2; // z^3 + res += (1.0 / 24) * zProd; + + zProd *= z2; // z^5 + res += (7.0 / 960) * zProd; + + zProd *= z2; // z^7 + res += (127 * zProd) / 80640; + + zProd *= z2; // z^9 + res += (4369 * zProd) / 11612160; + + zProd *= z2; // z^11 + res += (34807 * zProd) / 364953600; + + zProd *= z2; // z^13 + res += (20036983 * zProd) / 797058662400d; + + return res; + } + + @Override + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { + super.getClusterSpecificInfo(infoTitle, infoValue); + infoTitle.add("Deviation"); + + double[] variance = getVarianceVector(); + double sumOfDeviation = 0.0; + for (double aVariance : variance) { + double d = Math.sqrt(aVariance); + sumOfDeviation += d; + } + + sumOfDeviation /= variance.length; + + infoValue.add(Double.toString(sumOfDeviation)); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java new file mode 100644 index 0000000..cf54dd8 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java @@ -0,0 +1,466 @@ +package org.apache.samoa.moa.clusterers.clustream; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; + +import org.apache.samoa.instances.DenseInstance; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.cluster.CFCluster; +import org.apache.samoa.moa.cluster.Cluster; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.cluster.SphereCluster; +import org.apache.samoa.moa.clusterers.AbstractClusterer; +import org.apache.samoa.moa.core.Measurement; + +import com.github.javacliparser.IntOption; + +public class WithKmeans extends AbstractClusterer { + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("horizon", + 'h', "Rang of the window.", 1000); + + public IntOption maxNumKernelsOption = new IntOption( + "maxNumKernels", 'm', + "Maximum number of micro kernels to use.", 100); + + public IntOption kernelRadiFactorOption = new IntOption( + "kernelRadiFactor", 't', + "Multiplier for the kernel radius", 2); + + public IntOption kOption = new IntOption( + "k", 'k', + "k of macro k-means (number of clusters)", 5); + + private int timeWindow; + private long timestamp = -1; + private ClustreamKernel[] kernels; + private boolean initialized; + private List<ClustreamKernel> buffer; // Buffer for initialization with kNN + private int bufferSize; + private double t; + private int m; + + public WithKmeans() { + + } + + @Override + public void resetLearningImpl() { + this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; + this.timeWindow = timeWindowOption.getValue(); + this.initialized = false; + this.buffer = new LinkedList<ClustreamKernel>(); + this.bufferSize = maxNumKernelsOption.getValue(); + t = kernelRadiFactorOption.getValue(); + m = maxNumKernelsOption.getValue(); + } + + @Override + public void trainOnInstanceImpl(Instance instance) { + int dim = instance.numValues(); + timestamp++; + // 0. Initialize + if (!initialized) { + if (buffer.size() < bufferSize) { + buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); + return; + } else { + for (int i = 0; i < buffer.size(); i++) { + kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim, timestamp, t, m); + } + + buffer.clear(); + initialized = true; + return; + } + } + + // 1. Determine closest kernel + ClustreamKernel closestKernel = null; + double minDistance = Double.MAX_VALUE; + for (int i = 0; i < kernels.length; i++) { + // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); + double distance = distance(instance.toDoubleArray(), kernels[i].getCenter()); + if (distance < minDistance) { + closestKernel = kernels[i]; + minDistance = distance; + } + } + + // 2. Check whether instance fits into closestKernel + double radius = 0.0; + if (closestKernel.getWeight() == 1) { + // Special case: estimate radius by determining the distance to the + // next closest cluster + radius = Double.MAX_VALUE; + double[] center = closestKernel.getCenter(); + for (int i = 0; i < kernels.length; i++) { + if (kernels[i] == closestKernel) { + continue; + } + + double distance = distance(kernels[i].getCenter(), center); + radius = Math.min(distance, radius); + } + } else { + radius = closestKernel.getRadius(); + } + + if (minDistance < radius) { + // Date fits, put into kernel and be happy + closestKernel.insert(instance, timestamp); + return; + } + + // 3. Date does not fit, we need to free + // some space to insert a new kernel + long threshold = timestamp - timeWindow; // Kernels before this can be forgotten + + // 3.1 Try to forget old kernels + for (int i = 0; i < kernels.length; i++) { + if (kernels[i].getRelevanceStamp() < threshold) { + kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m); + return; + } + } + + // 3.2 Merge closest two kernels + int closestA = 0; + int closestB = 0; + minDistance = Double.MAX_VALUE; + for (int i = 0; i < kernels.length; i++) { + double[] centerA = kernels[i].getCenter(); + for (int j = i + 1; j < kernels.length; j++) { + double dist = distance(centerA, kernels[j].getCenter()); + if (dist < minDistance) { + minDistance = dist; + closestA = i; + closestB = j; + } + } + } + assert (closestA != closestB); + + kernels[closestA].add(kernels[closestB]); + kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m); + } + + @Override + public Clustering getMicroClusteringResult() { + if (!initialized) { + return new Clustering(new Cluster[0]); + } + + ClustreamKernel[] result = new ClustreamKernel[kernels.length]; + for (int i = 0; i < result.length; i++) { + result[i] = new ClustreamKernel(kernels[i], t, m); + } + + return new Clustering(result); + } + + @Override + public Clustering getClusteringResult() { + return kMeans_rand(kOption.getValue(), getMicroClusteringResult()); + } + + public Clustering getClusteringResult(Clustering gtClustering) { + return kMeans_gta(kOption.getValue(), getMicroClusteringResult(), gtClustering); + } + + public String getName() { + return "CluStreamWithKMeans " + timeWindow; + } + + /** + * Distance between two vectors. + * + * @param pointA + * @param pointB + * @return dist + */ + private static double distance(double[] pointA, double[] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + /** + * k-means of (micro)clusters, with ground-truth-aided initialization. (to produce best results) + * + * @param k + * @param data + * @return (macro)clustering - CFClusters + */ + public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtClustering) { + + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster) clustering.get(i)); + } else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + + int n = microclusters.size(); + assert (k <= n); + + /* k-means */ + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + int K = gtClustering.size(); + + for (int i = 0; i < k; i++) { + if (i < K) { // GT-aided + centers[i] = new SphereCluster(gtClustering.get(i).getCenter(), 0); + } else { // Randomized + int rid = random.nextInt(n); + centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); + } + } + + return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); + } + + /** + * k-means of (micro)clusters, with randomized initialization. + * + * @param k + * @param data + * @return (macro)clustering - CFClusters + */ + public static Clustering kMeans_rand(int k, Clustering clustering) { + + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster) clustering.get(i)); + } else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + + int n = microclusters.size(); + assert (k <= n); + + /* k-means */ + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + + for (int i = 0; i < k; i++) { + int rid = random.nextInt(n); + centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); + } + + return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); + } + + /** + * (The Actual Algorithm) k-means of (micro)clusters, with specified initialization points. + * + * @param k + * @param centers + * - initial centers + * @param data + * @return (macro)clustering - SphereClusters + */ + protected static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { + assert (centers.length == k); + assert (k > 0); + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = new ArrayList<ArrayList<Cluster>>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); + } + + while (true) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + + // Convergence check + boolean converged = true; + for (int i = 0; i < k; i++) { + if (!Arrays.equals(centers[i].getCenter(), newCenters[i].getCenter())) { + converged = false; + break; + } + } + + if (converged) { + break; + } else { + centers = newCenters; + } + } + + return new Clustering(centers); + } + + /** + * Rearrange the k-means result into a set of CFClusters, cleaning up the redundancies. + * + * @param kMeansResult + * @param microclusters + * @return + */ + protected static Clustering cleanUpKMeans(Clustering kMeansResult, ArrayList<CFCluster> microclusters) { + /* Convert k-means result to CFClusters */ + int k = kMeansResult.size(); + CFCluster[] converted = new CFCluster[k]; + + for (CFCluster mc : microclusters) { + // Find closest kMeans cluster + double minDistance = Double.MAX_VALUE; + int closestCluster = 0; + for (int i = 0; i < k; i++) { + double distance = distance(kMeansResult.get(i).getCenter(), mc.getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + // Add to cluster + if (converted[closestCluster] == null) { + converted[closestCluster] = (CFCluster) mc.copy(); + } else { + converted[closestCluster].add(mc); + } + } + + // Clean up + int count = 0; + for (int i = 0; i < converted.length; i++) { + if (converted[i] != null) + count++; + } + + CFCluster[] cleaned = new CFCluster[count]; + count = 0; + for (int i = 0; i < converted.length; i++) { + if (converted[i] != null) + cleaned[count++] = converted[i]; + } + + return new Clustering(cleaned); + } + + /** + * k-means helper: Calculate a wrapping cluster of assigned points[microclusters]. + * + * @param assigned + * @param dimensions + * @return SphereCluster (with center and radius) + */ + private static SphereCluster calculateCenter(ArrayList<Cluster> assigned, int dimensions) { + double[] result = new double[dimensions]; + for (int i = 0; i < result.length; i++) { + result[i] = 0.0; + } + + if (assigned.size() == 0) { + return new SphereCluster(result, 0.0); + } + + for (Cluster point : assigned) { + double[] center = point.getCenter(); + for (int i = 0; i < result.length; i++) { + result[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < result.length; i++) { + result[i] /= assigned.size(); + } + + // Calculate radius: biggest wrapping distance from center + double radius = 0.0; + for (Cluster point : assigned) { + double dist = distance(result, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + SphereCluster sc = new SphereCluster(result, radius); + sc.setWeight(assigned.size()); + return sc; + } + + /** Miscellaneous **/ + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + public boolean isRandomizable() { + return false; + } + + public double[] getVotesForInstance(Instance inst) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java new file mode 100644 index 0000000..e141fde --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java @@ -0,0 +1,196 @@ +package org.apache.samoa.moa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +/** + * Class for discovering classes via reflection in the java class path. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class AutoClassDiscovery { + + protected static final Map<String, String[]> cachedClassNames = new HashMap<String, String[]>(); + + public static String[] findClassNames(String packageNameToSearch) { + String[] cached = cachedClassNames.get(packageNameToSearch); + if (cached == null) { + HashSet<String> classNames = new HashSet<String>(); + /* + * StringTokenizer pathTokens = new StringTokenizer(System + * .getProperty("java.class.path"), File.pathSeparator); + */ + String packageDirName = packageNameToSearch.replace('.', + File.separatorChar); + String packageJarName = packageNameToSearch.length() > 0 ? (packageNameToSearch.replace('.', '/') + "/") + : ""; + String part = ""; + + AutoClassDiscovery adc = new AutoClassDiscovery(); + URLClassLoader sysLoader = (URLClassLoader) adc.getClass().getClassLoader(); + URL[] cl_urls = sysLoader.getURLs(); + + for (int i = 0; i < cl_urls.length; i++) { + part = cl_urls[i].toString(); + if (part.startsWith("file:")) { + part = part.replace(" ", "%20"); + try { + File temp = new File(new java.net.URI(part)); + part = temp.getAbsolutePath(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } + + // find classes + ArrayList<File> files = new ArrayList<File>(); + File dir = new File(part); + if (dir.isDirectory()) { + File root = new File(dir.toString() + File.separatorChar + packageDirName); + String[] names = findClassesInDirectoryRecursive(root, ""); + classNames.addAll(Arrays.asList(names)); + } else { + try { + JarFile jar = new JarFile(part); + Enumeration<JarEntry> jarEntries = jar.entries(); + while (jarEntries.hasMoreElements()) { + String jarEntry = jarEntries.nextElement().getName(); + if (jarEntry.startsWith(packageJarName)) { + String relativeName = jarEntry.substring(packageJarName.length()); + if (relativeName.endsWith(".class")) { + relativeName = relativeName.replace('/', + '.'); + classNames.add(relativeName.substring(0, + relativeName.length() + - ".class".length())); + } + } + } + } catch (IOException ignored) { + // ignore unreadable files + } + } + } + + /* + * while (pathTokens.hasMoreElements()) { String pathToSearch = + * pathTokens.nextElement().toString(); if (pathToSearch.endsWith(".jar")) + * { try { JarFile jar = new JarFile(pathToSearch); Enumeration<JarEntry> + * jarEntries = jar.entries(); while (jarEntries.hasMoreElements()) { + * String jarEntry = jarEntries.nextElement() .getName(); if + * (jarEntry.startsWith(packageJarName)) { String relativeName = jarEntry + * .substring(packageJarName.length()); if + * (relativeName.endsWith(".class")) { relativeName = + * relativeName.replace('/', '.'); + * classNames.add(relativeName.substring(0, relativeName.length() - + * ".class".length())); } } } } catch (IOException ignored) { // ignore + * unreadable files } } else { File root = new File(pathToSearch + + * File.separatorChar + packageDirName); String[] names = + * findClassesInDirectoryRecursive(root, ""); for (String name : names) { + * classNames.add(name); } } } + */ + cached = classNames.toArray(new String[classNames.size()]); + Arrays.sort(cached); + cachedClassNames.put(packageNameToSearch, cached); + } + return cached; + } + + protected static String[] findClassesInDirectoryRecursive(File root, + String packagePath) { + HashSet<String> classNames = new HashSet<String>(); + if (root.isDirectory()) { + String[] list = root.list(); + for (String string : list) { + if (string.endsWith(".class")) { + classNames.add(packagePath + + string.substring(0, string.length() + - ".class".length())); + } else { + File testDir = new File(root.getPath() + File.separatorChar + + string); + if (testDir.isDirectory()) { + String[] names = findClassesInDirectoryRecursive( + testDir, packagePath + string + "."); + classNames.addAll(Arrays.asList(names)); + } + } + } + } + return classNames.toArray(new String[classNames.size()]); + } + + public static Class[] findClassesOfType(String packageNameToSearch, + Class<?> typeDesired) { + ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>(); + String[] classNames = findClassNames(packageNameToSearch); + for (String className : classNames) { + String fullName = packageNameToSearch.length() > 0 ? (packageNameToSearch + + "." + className) + : className; + if (isPublicConcreteClassOfType(fullName, typeDesired)) { + try { + classesFound.add(Class.forName(fullName)); + } catch (Exception ignored) { + // ignore classes that we cannot instantiate + } + } + } + return classesFound.toArray(new Class[classesFound.size()]); + } + + public static boolean isPublicConcreteClassOfType(String className, + Class<?> typeDesired) { + Class<?> testClass = null; + try { + testClass = Class.forName(className); + } catch (Exception e) { + return false; + } + int classModifiers = testClass.getModifiers(); + return (java.lang.reflect.Modifier.isPublic(classModifiers) + && !java.lang.reflect.Modifier.isAbstract(classModifiers) + && typeDesired.isAssignableFrom(testClass) && hasEmptyConstructor(testClass)); + } + + public static boolean hasEmptyConstructor(Class<?> type) { + try { + type.getConstructor(); + return true; + } catch (Exception ignored) { + return false; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java new file mode 100644 index 0000000..fce23dc --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java @@ -0,0 +1,133 @@ +package org.apache.samoa.moa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Collection; + +import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.moa.MOAObject; + +/** + * Vector with the capability of automatic expansion. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class AutoExpandVector<T> extends ArrayList<T> implements MOAObject { + + private static final long serialVersionUID = 1L; + + public AutoExpandVector() { + super(0); + } + + public AutoExpandVector(int size) { + super(size); + } + + @Override + public void add(int pos, T obj) { + if (pos > size()) { + while (pos > size()) { + add(null); + } + trimToSize(); + } + super.add(pos, obj); + } + + @Override + public T get(int pos) { + return ((pos >= 0) && (pos < size())) ? super.get(pos) : null; + } + + @Override + public T set(int pos, T obj) { + if (pos >= size()) { + add(pos, obj); + return null; + } + return super.set(pos, obj); + } + + @Override + public boolean add(T arg0) { + boolean result = super.add(arg0); + trimToSize(); + return result; + } + + @Override + public boolean addAll(Collection<? extends T> arg0) { + boolean result = super.addAll(arg0); + trimToSize(); + return result; + } + + @Override + public boolean addAll(int arg0, Collection<? extends T> arg1) { + boolean result = super.addAll(arg0, arg1); + trimToSize(); + return result; + } + + @Override + public void clear() { + super.clear(); + trimToSize(); + } + + @Override + public T remove(int arg0) { + T result = super.remove(arg0); + trimToSize(); + return result; + } + + @Override + public boolean remove(Object arg0) { + boolean result = super.remove(arg0); + trimToSize(); + return result; + } + + @Override + protected void removeRange(int arg0, int arg1) { + super.removeRange(arg0, arg1); + trimToSize(); + } + + @Override + public MOAObject copy() { + return AbstractMOAObject.copy(this); + } + + @Override + public int measureByteSize() { + return AbstractMOAObject.measureByteSize(this); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } +}
