http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java new file mode 100644 index 0000000..a1d80d1 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java @@ -0,0 +1,298 @@ +package com.yahoo.labs.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import com.yahoo.labs.samoa.moa.cluster.Clustering; + +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.moa.core.ObjectRepository; +import com.yahoo.labs.samoa.moa.core.StringUtils; +import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; +import com.github.javacliparser.FlagOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.instances.Instances; + +public abstract class AbstractClusterer extends AbstractOptionHandler + implements Clusterer { + + @Override + public String getPurposeString() { + return "MOA Clusterer: " + getClass().getCanonicalName(); + } + + protected InstancesHeader modelContext; + + protected double trainingWeightSeenByModel = 0.0; + + protected int randomSeed = 1; + + protected IntOption randomSeedOption; + + public FlagOption evaluateMicroClusteringOption; + + protected Random clustererRandom; + + protected Clustering clustering; + + public AbstractClusterer() { + if (isRandomizable()) { + this.randomSeedOption = new IntOption("randomSeed", 'r', + "Seed for random behaviour of the Clusterer.", 1); + } + + if( implementsMicroClusterer()){ + this.evaluateMicroClusteringOption = + new FlagOption("evaluateMicroClustering", 'M', + "Evaluate the underlying microclustering instead of the macro clustering"); + } + } + + @Override + public void prepareForUseImpl(TaskMonitor monitor, + ObjectRepository repository) { + if (this.randomSeedOption != null) { + this.randomSeed = this.randomSeedOption.getValue(); + } + if (!trainingHasStarted()) { + resetLearning(); + } + clustering = new Clustering(); + } + + public void setModelContext(InstancesHeader ih) { + if ((ih != null) && (ih.classIndex() < 0)) { + throw new IllegalArgumentException( + "Context for a Clusterer must include a class to learn"); + } + if (trainingHasStarted() + && (this.modelContext != null) + && ((ih == null) || !contextIsCompatible(this.modelContext, ih))) { + throw new IllegalArgumentException( + "New context is not compatible with existing model"); + } + this.modelContext = ih; + } + + public InstancesHeader getModelContext() { + return this.modelContext; + } + + public void setRandomSeed(int s) { + this.randomSeed = s; + if (this.randomSeedOption != null) { + // keep option consistent + this.randomSeedOption.setValue(s); + } + } + + public boolean trainingHasStarted() { + return this.trainingWeightSeenByModel > 0.0; + } + + public double trainingWeightSeenByModel() { + return this.trainingWeightSeenByModel; + } + + public void resetLearning() { + this.trainingWeightSeenByModel = 0.0; + if (isRandomizable()) { + this.clustererRandom = new Random(this.randomSeed); + } + resetLearningImpl(); + } + + public void trainOnInstance(Instance inst) { + if (inst.weight() > 0.0) { + this.trainingWeightSeenByModel += inst.weight(); + trainOnInstanceImpl(inst); + } + } + + public Measurement[] getModelMeasurements() { + List<Measurement> measurementList = new LinkedList<Measurement>(); + measurementList.add(new Measurement("model training instances", + trainingWeightSeenByModel())); + measurementList.add(new Measurement("model serialized size (bytes)", + measureByteSize())); + Measurement[] modelMeasurements = getModelMeasurementsImpl(); + if (modelMeasurements != null) { + for (Measurement measurement : modelMeasurements) { + measurementList.add(measurement); + } + } + // add average of sub-model measurements + Clusterer[] subModels = getSubClusterers(); + if ((subModels != null) && (subModels.length > 0)) { + List<Measurement[]> subMeasurements = new LinkedList<Measurement[]>(); + for (Clusterer subModel : subModels) { + if (subModel != null) { + subMeasurements.add(subModel.getModelMeasurements()); + } + } + Measurement[] avgMeasurements = Measurement + .averageMeasurements(subMeasurements + .toArray(new Measurement[subMeasurements.size()][])); + for (Measurement measurement : avgMeasurements) { + measurementList.add(measurement); + } + } + return measurementList.toArray(new Measurement[measurementList.size()]); + } + + public void getDescription(StringBuilder out, int indent) { + StringUtils.appendIndented(out, indent, "Model type: "); + out.append(this.getClass().getName()); + StringUtils.appendNewline(out); + Measurement.getMeasurementsDescription(getModelMeasurements(), out, + indent); + StringUtils.appendNewlineIndented(out, indent, "Model description:"); + StringUtils.appendNewline(out); + if (trainingHasStarted()) { + getModelDescription(out, indent); + } else { + StringUtils.appendIndented(out, indent, + "Model has not been trained."); + } + } + + public Clusterer[] getSubClusterers() { + return null; + } + + @Override + public Clusterer copy() { + return (Clusterer) super.copy(); + } + +// public boolean correctlyClassifies(Instance inst) { +// return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst +// .classValue(); +// } + + public String getClassNameString() { + return InstancesHeader.getClassNameString(this.modelContext); + } + + public String getClassLabelString(int classLabelIndex) { + return InstancesHeader.getClassLabelString(this.modelContext, + classLabelIndex); + } + + public String getAttributeNameString(int attIndex) { + return InstancesHeader.getAttributeNameString(this.modelContext, + attIndex); + } + + public String getNominalValueString(int attIndex, int valIndex) { + return InstancesHeader.getNominalValueString(this.modelContext, + attIndex, valIndex); + } + + // originalContext notnull + // newContext notnull + public static boolean contextIsCompatible(InstancesHeader originalContext, + InstancesHeader newContext) { + // rule 1: num classes can increase but never decrease + // rule 2: num attributes can increase but never decrease + // rule 3: num nominal attribute values can increase but never decrease + // rule 4: attribute types must stay in the same order (although class + // can + // move; is always skipped over) + // attribute names are free to change, but should always still represent + // the original attributes + if (newContext.numClasses() < originalContext.numClasses()) { + return false; // rule 1 + } + if (newContext.numAttributes() < originalContext.numAttributes()) { + return false; // rule 2 + } + int oPos = 0; + int nPos = 0; + while (oPos < originalContext.numAttributes()) { + if (oPos == originalContext.classIndex()) { + oPos++; + if (!(oPos < originalContext.numAttributes())) { + break; + } + } + if (nPos == newContext.classIndex()) { + nPos++; + } + if (originalContext.attribute(oPos).isNominal()) { + if (!newContext.attribute(nPos).isNominal()) { + return false; // rule 4 + } + if (newContext.attribute(nPos).numValues() < originalContext + .attribute(oPos).numValues()) { + return false; // rule 3 + } + } else { + assert (originalContext.attribute(oPos).isNumeric()); + if (!newContext.attribute(nPos).isNumeric()) { + return false; // rule 4 + } + } + oPos++; + nPos++; + } + return true; // all checks clear + } + + // reason for ...Impl methods: + // ease programmer burden by not requiring them to remember calls to super + // in overridden methods & will produce compiler errors if not overridden + + public abstract void resetLearningImpl(); + + public abstract void trainOnInstanceImpl(Instance inst); + + protected abstract Measurement[] getModelMeasurementsImpl(); + + public abstract void getModelDescription(StringBuilder out, int indent); + + protected static int modelAttIndexToInstanceAttIndex(int index, + Instance inst) { + return inst.classIndex() > index ? index : index + 1; + } + + protected static int modelAttIndexToInstanceAttIndex(int index, + Instances insts) { + return insts.classIndex() > index ? index : index + 1; + } + + public boolean implementsMicroClusterer(){ + return false; + } + + public boolean keepClassLabel(){ + return false; + } + + public Clustering getMicroClusteringResult(){ + return null; + }; +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java new file mode 100644 index 0000000..6056514 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java @@ -0,0 +1,368 @@ + +package com.yahoo.labs.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Random; +import com.github.javacliparser.FloatOption; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.moa.cluster.SphereCluster; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.moa.core.DataPoint; +import com.yahoo.labs.samoa.instances.Instance; + +public class ClusterGenerator extends AbstractClusterer{ + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("timeWindow", + 't', "Rang of the window.", 1000); + + public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 'r', + "The average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 'R', + "The average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption positionOffsetOption = new FloatOption("positionOffset", 'p', + "The average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 'D', + "Deletes complete clusters from the clustering.", 0, 0, 1); + + public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j', + "Join two clusters if their hull distance is less minRadius times this factor.", 0, 0, 1); + + public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A', + "Adds additional clusters.", 0, 0, 1); + + private static double err_intervall_width = 0.0; + private ArrayList<DataPoint> points; + private int instanceCounter; + private int windowCounter; + private Random random; + private Clustering sourceClustering = null; + + @Override + public void resetLearningImpl() { + points = new ArrayList<DataPoint>(); + instanceCounter = 0; + windowCounter = 0; + random = new Random(227); + + //joinClustersOption.set(); + //evaluateMicroClusteringOption.set(); + } + + @Override + public void trainOnInstanceImpl(Instance inst) { + if(windowCounter >= timeWindowOption.getValue()){ + points.clear(); + windowCounter = 0; + } + windowCounter++; + instanceCounter++; + points.add( new DataPoint(inst,instanceCounter)); + } + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + + public void setSourceClustering(Clustering source){ + sourceClustering = source; + } + + @Override + public Clustering getMicroClusteringResult() { + //System.out.println("Numcluster:"+clustering.size()+" / "+num); + //Clustering source_clustering = new Clustering(points, overlapThreshold, microInitMinPoints); + if(sourceClustering == null){ + + System.out.println("You need to set a source clustering for the ClusterGenerator to work"); + return null; + } + return alterClustering(sourceClustering); + } + + + + public Clustering getClusteringResult(){ + sourceClustering = new Clustering(points); +// if(sourceClustering == null){ +// System.out.println("You need to set a source clustering for the ClusterGenerator to work"); +// return null; +// } + return alterClustering(sourceClustering); + } + + + private Clustering alterClustering(Clustering scclustering){ + //percentage of the radius that will be cut off + //0: no changes to radius + //1: radius of 0 + double errLevelRadiusDecrease = radiusDecreaseOption.getValue(); + + //0: no changes to radius + //1: radius 100% bigger + double errLevelRadiusIncrease = radiusIncreaseOption.getValue(); + + //0: no changes + //1: distance between centers is 2 * original radius + double errLevelPosition = positionOffsetOption.getValue(); + + + int numRemoveCluster = (int)(clusterRemoveOption.getValue()*scclustering.size()); + + int numAddCluster = (int)(clusterAddOption.getValue()*scclustering.size()); + + for (int c = 0; c < numRemoveCluster; c++) { + int delId = random.nextInt(scclustering.size()); + scclustering.remove(delId); + } + + int numCluster = scclustering.size(); + double[] err_seeds = new double[numCluster]; + double err_seed_sum = 0.0; + double tmp_seed; + for (int i = 0; i < numCluster; i++) { + tmp_seed = random.nextDouble(); + err_seeds[i] = err_seed_sum + tmp_seed; + err_seed_sum+= tmp_seed; + } + + double sumWeight = 0; + for (int i = 0; i <numCluster; i++) { + sumWeight+= scclustering.get(i).getWeight(); + } + + Clustering clustering = new Clustering(); + + for (int i = 0; i <numCluster; i++) { + if(!(scclustering.get(i) instanceof SphereCluster)){ + System.out.println("Not a Sphere Cluster"); + continue; + } + SphereCluster sourceCluster = (SphereCluster)scclustering.get(i); + double[] center = Arrays.copyOf(sourceCluster.getCenter(),sourceCluster.getCenter().length); + double weight = sourceCluster.getWeight(); + double radius = sourceCluster.getRadius(); + + //move cluster center + if(errLevelPosition >0){ + double errOffset = random.nextDouble()*err_intervall_width/2.0; + double errOffsetDirection = ((random.nextBoolean())? 1 : -1); + double level = errLevelPosition + errOffsetDirection * errOffset; + double[] vector = new double[center.length]; + double vectorLength = 0; + for (int d = 0; d < center.length; d++) { + vector[d] = (random.nextBoolean()?1:-1)*random.nextDouble(); + vectorLength += Math.pow(vector[d],2); + } + vectorLength = Math.sqrt(vectorLength); + + + //max is when clusters are next to each other + double length = 2 * radius * level; + + for (int d = 0; d < center.length; d++) { + //normalize length and then strecht to reach error position + vector[d]=vector[d]/vectorLength*length; + } +// System.out.println("Center "+Arrays.toString(center)); +// System.out.println("Vector "+Arrays.toString(vector)); + //check if error position is within bounds + double [] newCenter = new double[center.length]; + for (int d = 0; d < center.length; d++) { + //check bounds, otherwise flip vector + if(center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1){ + newCenter[d] = center[d] + vector[d]; + } + else{ + newCenter[d] = center[d] + (-1)*vector[d]; + } + } + center = newCenter; + for (int d = 0; d < center.length; d++) { + if(newCenter[d] >= 0 && newCenter[d] <= 1){ + } + else{ + System.out.println("This shouldnt have happend, Cluster center out of bounds:"+Arrays.toString(newCenter)); + } + } + //System.out.println("new Center "+Arrays.toString(newCenter)); + + } + + //alter radius + if(errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0){ + double errOffset = random.nextDouble()*err_intervall_width/2.0; + int errOffsetDirection = ((random.nextBoolean())? 1 : -1); + + if(errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || random.nextBoolean())){ + double level = (errLevelRadiusDecrease + errOffsetDirection * errOffset);//*sourceCluster.getWeight()/sumWeight; + level = (level<0)?0:level; + level = (level>1)?1:level; + radius*=(1-level); + } + else{ + double level = errLevelRadiusIncrease + errOffsetDirection * errOffset; + level = (level<0)?0:level; + level = (level>1)?1:level; + radius+=radius*level; + } + } + + SphereCluster newCluster = new SphereCluster(center, radius, weight); + newCluster.setMeasureValue("Source Cluster", "C"+sourceCluster.getId()); + + clustering.add(newCluster); + } + + if(joinClustersOption.getValue() > 0){ + clustering = joinClusters(clustering); + } + + //add new clusters by copying clusters and set a random center + for (int c = 0; c < numAddCluster; c++) { + int copyId = random.nextInt(clustering.size()); + SphereCluster scorg = (SphereCluster)clustering.get(copyId); + int dim = scorg.getCenter().length; + double[] center = new double [dim]; + double radius = scorg.getRadius(); + + boolean outofbounds = true; + int tryCounter = 0; + while(outofbounds && tryCounter < 20){ + tryCounter++; + outofbounds = false; + for (int j = 0; j < center.length; j++) { + center[j] = random.nextDouble(); + if(center[j]- radius < 0 || center[j] + radius > 1){ + outofbounds = true; + break; + } + } + } + if(outofbounds){ + System.out.println("Coludn't place additional cluster"); + } + else{ + SphereCluster scnew = new SphereCluster(center, radius, scorg.getWeight()/2); + scorg.setWeight(scorg.getWeight()-scnew.getWeight()); + clustering.add(scnew); + } + } + + return clustering; + + } + + + + private Clustering joinClusters(Clustering clustering){ + + double radiusFactor = joinClustersOption.getValue(); + boolean[] merged = new boolean[clustering.size()]; + + Clustering mclustering = new Clustering(); + + if(radiusFactor >0){ + for (int c1 = 0; c1 < clustering.size(); c1++) { + SphereCluster sc1 = (SphereCluster) clustering.get(c1); + double minDist = Double.MAX_VALUE; + double minOver = 1; + int maxindexCon = -1; + int maxindexOver = -1; + for (int c2 = 0; c2 < clustering.size(); c2++) { + SphereCluster sc2 = (SphereCluster) clustering.get(c2); +// double over = sc1.overlapRadiusDegree(sc2); +// if(over > 0 && over < minOver){ +// minOver = over; +// maxindexOver = c2; +// } + double dist = sc1.getHullDistance(sc2); + double threshold = Math.min(sc1.getRadius(), sc2.getRadius())*radiusFactor; + if(dist > 0 && dist < minDist && dist < threshold){ + minDist = dist; + maxindexCon = c2; + } + } + int maxindex = -1; + if(maxindexOver!=-1) + maxindex = maxindexOver; + else + maxindex = maxindexCon; + + if(maxindex!=-1 && !merged[c1]){ + merged[c1]=true; + merged[maxindex]=true; + SphereCluster scnew = new SphereCluster(sc1.getCenter(),sc1.getRadius(),sc1.getWeight()); + SphereCluster sc2 = (SphereCluster) clustering.get(maxindex); + scnew.merge(sc2); + mclustering.add(scnew); + } + } + } + + for (int i = 0; i < merged.length; i++) { + if(!merged[i]) + mclustering.add(clustering.get(i)); + } + + + return mclustering; + + } + + + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public boolean isRandomizable() { + return false; + } + + @Override + public boolean keepClassLabel(){ + return true; + } + + public double[] getVotesForInstance(Instance inst) { + return null; + } +} + + http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java new file mode 100644 index 0000000..76d7d50 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java @@ -0,0 +1,64 @@ +package com.yahoo.labs.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.MOAObject; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.instances.InstancesHeader; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.moa.options.OptionHandler; +import com.yahoo.labs.samoa.instances.Instance; + +public interface Clusterer extends MOAObject, OptionHandler { + + public void setModelContext(InstancesHeader ih); + + public InstancesHeader getModelContext(); + + public boolean isRandomizable(); + + public void setRandomSeed(int s); + + public boolean trainingHasStarted(); + + public double trainingWeightSeenByModel(); + + public void resetLearning(); + + public void trainOnInstance(Instance inst); + + public double[] getVotesForInstance(Instance inst); + + public Measurement[] getModelMeasurements(); + + public Clusterer[] getSubClusterers(); + + public Clusterer copy(); + + public Clustering getClusteringResult(); + + public boolean implementsMicroClusterer(); + + public Clustering getMicroClusteringResult(); + + public boolean keepClassLabel(); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java new file mode 100644 index 0000000..a9a891f --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java @@ -0,0 +1,202 @@ + +package com.yahoo.labs.samoa.moa.clusterers; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.List; +import com.yahoo.labs.samoa.moa.cluster.CFCluster; +import com.yahoo.labs.samoa.moa.cluster.Cluster; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.moa.cluster.SphereCluster; + +/** + * A kMeans implementation for microclusterings. For now it only uses the real centers of the + * groundtruthclustering for implementation. There should also be an option to use random + * centers. + * TODO: random centers + * TODO: Create a macro clustering interface to make different macro clustering algorithms available + * to micro clustering algorithms like clustream, denstream and clustree + * + */ +public class KMeans { + + /** + * This kMeans implementation clusters a big number of microclusters + * into a smaller amount of macro clusters. To make it comparable to other + * algorithms it uses the real centers of the ground truth macro clustering + * to have the best possible initialization. The quality of resulting + * macro clustering yields an upper bound for kMeans on the underlying + * microclustering. + * + * @param centers of the ground truth clustering + * @param data list of microclusters + * @return + */ + public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> data ) { + int k = centers.length; + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = + new ArrayList<ArrayList<Cluster>>(); + for ( int i = 0; i < k; i++ ) { + clustering.add( new ArrayList<Cluster>() ); + } + + int repetitions = 100; + while ( repetitions-- >= 0 ) { + // Assign points to clusters + for ( Cluster point : data ) { + double minDistance = distance( point.getCenter(), centers[0].getCenter() ); + int closestCluster = 0; + for ( int i = 1; i < k; i++ ) { + double distance = distance( point.getCenter(), centers[i].getCenter() ); + if ( distance < minDistance ) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get( closestCluster ).add( point ); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for ( int i = 0; i < k; i++ ) { + newCenters[i] = calculateCenter( clustering.get( i ), dimensions ); + clustering.get( i ).clear(); + } + centers = newCenters; + } + + return new Clustering( centers ); + } + + private static double distance(double[] pointA, double [] pointB){ + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + + private static SphereCluster calculateCenter( ArrayList<Cluster> cluster, int dimensions ) { + double[] res = new double[dimensions]; + for ( int i = 0; i < res.length; i++ ) { + res[i] = 0.0; + } + + if ( cluster.size() == 0 ) { + return new SphereCluster( res, 0.0 ); + } + + for ( Cluster point : cluster ) { + double [] center = point.getCenter(); + for (int i = 0; i < res.length; i++) { + res[i] += center[i]; + } + } + + // Normalize + for ( int i = 0; i < res.length; i++ ) { + res[i] /= cluster.size(); + } + + // Calculate radius + double radius = 0.0; + for ( Cluster point : cluster ) { + double dist = distance( res, point.getCenter() ); + if ( dist > radius ) { + radius = dist; + } + } + + return new SphereCluster( res, radius ); + } + + public static Clustering gaussianMeans(Clustering gtClustering, Clustering clustering) { + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster)clustering.get(i)); + } + else{ + System.out.println("Unsupported Cluster Type:"+clustering.get(i).getClass() + +". Cluster needs to extend moa.cluster.CFCluster"); + } + } + Cluster[] centers = new Cluster[gtClustering.size()]; + for (int i = 0; i < centers.length; i++) { + centers[i] = gtClustering.get(i); + + } + + int k = centers.length; + if ( microclusters.size() < k ) { + return new Clustering( new Cluster[0]); + } + + Clustering kMeansResult = kMeans( centers, microclusters ); + + k = kMeansResult.size(); + CFCluster[] res = new CFCluster[ k ]; + + for ( CFCluster microcluster : microclusters) { + // Find closest kMeans cluster + double minDistance = Double.MAX_VALUE; + int closestCluster = 0; + for ( int i = 0; i < k; i++ ) { + double distance = distance( kMeansResult.get(i).getCenter(), microcluster.getCenter() ); + if ( distance < minDistance ) { + closestCluster = i; + minDistance = distance; + } + } + + // Add to cluster + if ( res[closestCluster] == null ) { + res[closestCluster] = (CFCluster)microcluster.copy(); + } else { + res[closestCluster].add(microcluster); + } + } + + // Clean up res + int count = 0; + for ( int i = 0; i < res.length; i++ ) { + if ( res[i] != null ) + ++count; + } + + CFCluster[] cleaned = new CFCluster[count]; + count = 0; + for ( int i = 0; i < res.length; i++ ) { + if ( res[i] != null ) + cleaned[count++] = res[i]; + } + + return new Clustering( cleaned ); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java new file mode 100644 index 0000000..975e61d --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java @@ -0,0 +1,337 @@ + +package com.yahoo.labs.samoa.moa.clusterers.clustream; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import com.yahoo.labs.samoa.moa.cluster.Cluster; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.moa.cluster.SphereCluster; +import com.yahoo.labs.samoa.moa.clusterers.AbstractClusterer; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.instances.DenseInstance; +import com.yahoo.labs.samoa.instances.Instance; + +/** Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip S. Yu: + * A Framework for Clustering Evolving Data Streams. VLDB 2003: 81-92 + */ +public class Clustream extends AbstractClusterer{ + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("horizon", + 'h', "Rang of the window.", 1000); + + public IntOption maxNumKernelsOption = new IntOption( + "maxNumKernels", 'k', + "Maximum number of micro kernels to use.", 100); + + public IntOption kernelRadiFactorOption = new IntOption( + "kernelRadiFactor", 't', + "Multiplier for the kernel radius", 2); + + private int timeWindow; + private long timestamp = -1; + private ClustreamKernel[] kernels; + private boolean initialized; + private List<ClustreamKernel> buffer; // Buffer for initialization with kNN + private int bufferSize; + private double t; + private int m; + + public Clustream() { + } + + + @Override + public void resetLearningImpl() { + this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; + this.timeWindow = timeWindowOption.getValue(); + this.initialized = false; + this.buffer = new LinkedList<>(); + this.bufferSize = maxNumKernelsOption.getValue(); + t = kernelRadiFactorOption.getValue(); + m = maxNumKernelsOption.getValue(); + } + + @Override + public void trainOnInstanceImpl(Instance instance) { + int dim = instance.numValues(); + timestamp++; + // 0. Initialize + if ( !initialized ) { + if ( buffer.size() < bufferSize ) { + buffer.add( new ClustreamKernel(instance,dim, timestamp, t, m) ); + return; + } + + int k = kernels.length; + //System.err.println("k="+k+" bufferSize="+bufferSize); + assert (k <= bufferSize); + + ClustreamKernel[] centers = new ClustreamKernel[k]; + for ( int i = 0; i < k; i++ ) { + centers[i] = buffer.get( i ); // TODO: make random! + } + Clustering kmeans_clustering = kMeans(k, centers, buffer); +// Clustering kmeans_clustering = kMeans(k, buffer); + + for ( int i = 0; i < kmeans_clustering.size(); i++ ) { + kernels[i] = new ClustreamKernel( new DenseInstance(1.0,centers[i].getCenter()), dim, timestamp, t, m ); + } + + buffer.clear(); + initialized = true; + return; + } + + + // 1. Determine closest kernel + ClustreamKernel closestKernel = null; + double minDistance = Double.MAX_VALUE; + for (ClustreamKernel kernel : kernels) { + //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); + double distance = distance(instance.toDoubleArray(), kernel.getCenter()); + if (distance < minDistance) { + closestKernel = kernel; + minDistance = distance; + } + } + + // 2. Check whether instance fits into closestKernel + double radius; + if (closestKernel != null && closestKernel.getWeight() == 1) { + // Special case: estimate radius by determining the distance to the + // next closest cluster + radius = Double.MAX_VALUE; + double[] center = closestKernel.getCenter(); + for (ClustreamKernel kernel : kernels) { + if (kernel == closestKernel) { + continue; + } + + double distance = distance(kernel.getCenter(), center); + radius = Math.min(distance, radius); + } + } else { + radius = closestKernel.getRadius(); + } + + if ( minDistance < radius ) { + // Date fits, put into kernel and be happy + closestKernel.insert( instance, timestamp ); + return; + } + + // 3. Date does not fit, we need to free + // some space to insert a new kernel + long threshold = timestamp - timeWindow; // Kernels before this can be forgotten + + // 3.1 Try to forget old kernels + for ( int i = 0; i < kernels.length; i++ ) { + if ( kernels[i].getRelevanceStamp() < threshold ) { + kernels[i] = new ClustreamKernel( instance, dim, timestamp, t, m ); + return; + } + } + + // 3.2 Merge closest two kernels + int closestA = 0; + int closestB = 0; + minDistance = Double.MAX_VALUE; + for ( int i = 0; i < kernels.length; i++ ) { + double[] centerA = kernels[i].getCenter(); + for ( int j = i + 1; j < kernels.length; j++ ) { + double dist = distance( centerA, kernels[j].getCenter() ); + if ( dist < minDistance ) { + minDistance = dist; + closestA = i; + closestB = j; + } + } + } + assert (closestA != closestB); + + kernels[closestA].add( kernels[closestB] ); + kernels[closestB] = new ClustreamKernel( instance, dim, timestamp, t, m ); + } + + @Override + public Clustering getMicroClusteringResult() { + if ( !initialized ) { + return new Clustering( new Cluster[0] ); + } + + ClustreamKernel[] res = new ClustreamKernel[kernels.length]; + for ( int i = 0; i < res.length; i++ ) { + res[i] = new ClustreamKernel( kernels[i], t, m ); + } + + return new Clustering( res ); + } + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + @Override + public Clustering getClusteringResult() { + return null; + } + + public String getName() { + return "Clustream " + timeWindow; + } + + private static double distance(double[] pointA, double [] pointB){ + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + //wrapper... we need to rewrite kmeans to points, not clusters, doesnt make sense anymore + // public static Clustering kMeans( int k, ArrayList<Instance> points, int dim ) { + // ArrayList<ClustreamKernel> cl = new ArrayList<ClustreamKernel>(); + // for(Instance inst : points){ + // cl.add(new ClustreamKernel(inst, dim , 0, 0, 0)); + // } + // Clustering clustering = kMeans(k, cl); + // return clustering; + // } + + public static Clustering kMeans( int k, List<? extends Cluster> data ) { + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + for (int i = 0; i < centers.length; i++) { + int rid = random.nextInt(k); + centers[i] = new SphereCluster(data.get(rid).getCenter(),0); + } + return kMeans(k, centers, data); + } + + + + + + public static Clustering kMeans( int k, Cluster[] centers, List<? extends Cluster> data ) { + assert (centers.length == k); + assert (k > 0); + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>(); + for ( int i = 0; i < k; i++ ) { + clustering.add( new ArrayList<Cluster>() ); + } + + int repetitions = 100; + while ( repetitions-- >= 0 ) { + // Assign points to clusters + for ( Cluster point : data ) { + double minDistance = distance( point.getCenter(), centers[0].getCenter() ); + int closestCluster = 0; + for ( int i = 1; i < k; i++ ) { + double distance = distance( point.getCenter(), centers[i].getCenter() ); + if ( distance < minDistance ) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get( closestCluster ).add( point ); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for ( int i = 0; i < k; i++ ) { + newCenters[i] = calculateCenter( clustering.get( i ), dimensions ); + clustering.get( i ).clear(); + } + centers = newCenters; + } + + return new Clustering( centers ); + } + + private static SphereCluster calculateCenter( ArrayList<Cluster> cluster, int dimensions ) { + double[] res = new double[dimensions]; + for ( int i = 0; i < res.length; i++ ) { + res[i] = 0.0; + } + + if ( cluster.size() == 0 ) { + return new SphereCluster( res, 0.0 ); + } + + for ( Cluster point : cluster ) { + double [] center = point.getCenter(); + for (int i = 0; i < res.length; i++) { + res[i] += center[i]; + } + } + + // Normalize + for ( int i = 0; i < res.length; i++ ) { + res[i] /= cluster.size(); + } + + // Calculate radius + double radius = 0.0; + for ( Cluster point : cluster ) { + double dist = distance( res, point.getCenter() ); + if ( dist > radius ) { + radius = dist; + } + } + SphereCluster sc = new SphereCluster( res, radius ); + sc.setWeight(cluster.size()); + return sc; + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } + + public boolean isRandomizable() { + return false; + } + + public double[] getVotesForInstance(Instance inst) { + throw new UnsupportedOperationException("Not supported yet."); + } + + +} + http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java new file mode 100644 index 0000000..8c8f6a0 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java @@ -0,0 +1,269 @@ +package com.yahoo.labs.samoa.moa.clusterers.clustream; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2010 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import java.util.List; + +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.moa.cluster.CFCluster; + +public class ClustreamKernel extends CFCluster { + private static final long serialVersionUID = 1L; + + private final static double EPSILON = 0.00005; + public static final double MIN_VARIANCE = 1e-50; + + protected double LST; + protected double SST; + + int m; + double t; + + + public ClustreamKernel( Instance instance, int dimensions, long timestamp , double t, int m) { + super(instance, dimensions); + this.t = t; + this.m = m; + this.LST = timestamp; + this.SST = timestamp*timestamp; + } + + public ClustreamKernel( ClustreamKernel cluster, double t, int m ) { + super(cluster); + this.t = t; + this.m = m; + this.LST = cluster.LST; + this.SST = cluster.SST; + } + + public void insert( Instance instance, long timestamp ) { + N++; + LST += timestamp; + SST += timestamp*timestamp; + + for ( int i = 0; i < instance.numValues(); i++ ) { + LS[i] += instance.value(i); + SS[i] += instance.value(i)*instance.value(i); + } + } + + @Override + public void add( CFCluster other2 ) { + ClustreamKernel other = (ClustreamKernel) other2; + assert( other.LS.length == this.LS.length ); + this.N += other.N; + this.LST += other.LST; + this.SST += other.SST; + + for ( int i = 0; i < LS.length; i++ ) { + this.LS[i] += other.LS[i]; + this.SS[i] += other.SS[i]; + } + } + + public double getRelevanceStamp() { + if ( N < 2*m ) + return getMuTime(); + + return getMuTime() + getSigmaTime() * getQuantile( ((double)m)/(2*N) ); + } + + private double getMuTime() { + return LST / N; + } + + private double getSigmaTime() { + return Math.sqrt(SST/N - (LST/N)*(LST/N)); + } + + private double getQuantile( double z ) { + assert( z >= 0 && z <= 1 ); + return Math.sqrt( 2 ) * inverseError( 2*z - 1 ); + } + + @Override + public double getRadius() { + //trivial cluster + if(N == 1) return 0; + if(t==1) + t=1; + + return getDeviation()*radiusFactor; + } + + @Override + public CFCluster getCF(){ + return this; + } + + + private double getDeviation(){ + double[] variance = getVarianceVector(); + double sumOfDeviation = 0.0; + for (double aVariance : variance) { + double d = Math.sqrt(aVariance); + sumOfDeviation += d; + } + return sumOfDeviation / variance.length; + } + + /** + * @return this kernels' center + */ + @Override + public double[] getCenter() { + assert (!this.isEmpty()); + double res[] = new double[this.LS.length]; + for (int i = 0; i < res.length; i++) { + res[i] = this.LS[i] / N; + } + return res; + } + + /** + * See interface <code>Cluster</code> + * @param instance + * @return double value + */ + @Override + public double getInclusionProbability(Instance instance) { + //trivial cluster + if(N == 1){ + double distance = 0.0; + for (int i = 0; i < LS.length; i++) { + double d = LS[i] - instance.value(i); + distance += d * d; + } + distance = Math.sqrt(distance); + if( distance < EPSILON ) + return 1.0; + return 0.0; + } + else{ + double dist = calcNormalizedDistance(instance.toDoubleArray()); + if(dist <= getRadius()){ + return 1; + } + else{ + return 0; + } +// double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); +// return res; + } + } + + private double[] getVarianceVector() { + double[] res = new double[this.LS.length]; + for (int i = 0; i < this.LS.length; i++) { + double ls = this.LS[i]; + double ss = this.SS[i]; + + double lsDivN = ls / this.getWeight(); + double lsDivNSquared = lsDivN * lsDivN; + double ssDivN = ss / this.getWeight(); + res[i] = ssDivN - lsDivNSquared; + + // Due to numerical errors, small negative values can occur. + // We correct this by settings them to almost zero. + if (res[i] <= 0.0) { + if (res[i] > -EPSILON) { + res[i] = MIN_VARIANCE; + } + } + } + return res; + } + + /** + * Check if this cluster is empty or not. + * @return <code>true</code> if the cluster has no data points, + * <code>false</code> otherwise. + */ + public boolean isEmpty() { + return this.N == 0; + } + + /** + * Calculate the normalized euclidean distance (Mahalanobis distance for + * distribution w/o covariances) to a point. + * @param point The point to which the distance is calculated. + * @return The normalized distance to the cluster center. + * + * TODO: check whether WEIGHTING is correctly applied to variances + */ + //??????? + private double calcNormalizedDistance(double[] point) { + double[] center = getCenter(); + double res = 0.0; + + for (int i = 0; i < center.length; i++) { + double diff = center[i] - point[i]; + res += (diff * diff);// variance[i]; + } + return Math.sqrt(res); + } + + /** + * Approximates the inverse error function. Clustream needs this. + * @param x + */ + public static double inverseError(double x) { + double z = Math.sqrt(Math.PI) * x; + double res = (z) / 2; + + double z2 = z * z; + double zProd = z * z2; // z^3 + res += (1.0 / 24) * zProd; + + zProd *= z2; // z^5 + res += (7.0 / 960) * zProd; + + zProd *= z2; // z^7 + res += (127 * zProd) / 80640; + + zProd *= z2; // z^9 + res += (4369 * zProd) / 11612160; + + zProd *= z2; // z^11 + res += (34807 * zProd) / 364953600; + + zProd *= z2; // z^13 + res += (20036983 * zProd) / 797058662400d; + + return res; + } + + @Override + protected void getClusterSpecificInfo(List<String> infoTitle, List<String> infoValue) { + super.getClusterSpecificInfo(infoTitle, infoValue); + infoTitle.add("Deviation"); + + double[] variance = getVarianceVector(); + double sumOfDeviation = 0.0; + for (double aVariance : variance) { + double d = Math.sqrt(aVariance); + sumOfDeviation += d; + } + + sumOfDeviation/= variance.length; + + infoValue.add(Double.toString(sumOfDeviation)); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java new file mode 100644 index 0000000..9e40fc1 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java @@ -0,0 +1,468 @@ + +package com.yahoo.labs.samoa.moa.clusterers.clustream; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; + +import com.yahoo.labs.samoa.moa.cluster.CFCluster; +import com.yahoo.labs.samoa.moa.cluster.Cluster; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.moa.cluster.SphereCluster; +import com.yahoo.labs.samoa.moa.clusterers.AbstractClusterer; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.instances.DenseInstance; +import com.yahoo.labs.samoa.instances.Instance; + +public class WithKmeans extends AbstractClusterer { + + private static final long serialVersionUID = 1L; + + public IntOption timeWindowOption = new IntOption("horizon", + 'h', "Rang of the window.", 1000); + + public IntOption maxNumKernelsOption = new IntOption( + "maxNumKernels", 'm', + "Maximum number of micro kernels to use.", 100); + + public IntOption kernelRadiFactorOption = new IntOption( + "kernelRadiFactor", 't', + "Multiplier for the kernel radius", 2); + + public IntOption kOption = new IntOption( + "k", 'k', + "k of macro k-means (number of clusters)", 5); + + private int timeWindow; + private long timestamp = -1; + private ClustreamKernel[] kernels; + private boolean initialized; + private List<ClustreamKernel> buffer; // Buffer for initialization with kNN + private int bufferSize; + private double t; + private int m; + + public WithKmeans() { + + } + + @Override + public void resetLearningImpl() { + this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()]; + this.timeWindow = timeWindowOption.getValue(); + this.initialized = false; + this.buffer = new LinkedList<ClustreamKernel>(); + this.bufferSize = maxNumKernelsOption.getValue(); + t = kernelRadiFactorOption.getValue(); + m = maxNumKernelsOption.getValue(); + } + + @Override + public void trainOnInstanceImpl(Instance instance) { + int dim = instance.numValues(); + timestamp++; + // 0. Initialize + if (!initialized) { + if (buffer.size() < bufferSize) { + buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m)); + return; + } else { + for (int i = 0; i < buffer.size(); i++) { + kernels[i] = new ClustreamKernel(new DenseInstance(1.0, buffer.get(i).getCenter()), dim, timestamp, t, m); + } + + buffer.clear(); + initialized = true; + return; + } + } + + + // 1. Determine closest kernel + ClustreamKernel closestKernel = null; + double minDistance = Double.MAX_VALUE; + for ( int i = 0; i < kernels.length; i++ ) { + //System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation()); + double distance = distance(instance.toDoubleArray(), kernels[i].getCenter()); + if (distance < minDistance) { + closestKernel = kernels[i]; + minDistance = distance; + } + } + + // 2. Check whether instance fits into closestKernel + double radius = 0.0; + if ( closestKernel.getWeight() == 1 ) { + // Special case: estimate radius by determining the distance to the + // next closest cluster + radius = Double.MAX_VALUE; + double[] center = closestKernel.getCenter(); + for ( int i = 0; i < kernels.length; i++ ) { + if ( kernels[i] == closestKernel ) { + continue; + } + + double distance = distance(kernels[i].getCenter(), center ); + radius = Math.min( distance, radius ); + } + } else { + radius = closestKernel.getRadius(); + } + + if ( minDistance < radius ) { + // Date fits, put into kernel and be happy + closestKernel.insert( instance, timestamp ); + return; + } + + // 3. Date does not fit, we need to free + // some space to insert a new kernel + long threshold = timestamp - timeWindow; // Kernels before this can be forgotten + + // 3.1 Try to forget old kernels + for ( int i = 0; i < kernels.length; i++ ) { + if ( kernels[i].getRelevanceStamp() < threshold ) { + kernels[i] = new ClustreamKernel( instance, dim, timestamp, t, m ); + return; + } + } + + // 3.2 Merge closest two kernels + int closestA = 0; + int closestB = 0; + minDistance = Double.MAX_VALUE; + for ( int i = 0; i < kernels.length; i++ ) { + double[] centerA = kernels[i].getCenter(); + for ( int j = i + 1; j < kernels.length; j++ ) { + double dist = distance( centerA, kernels[j].getCenter() ); + if ( dist < minDistance ) { + minDistance = dist; + closestA = i; + closestB = j; + } + } + } + assert (closestA != closestB); + + kernels[closestA].add( kernels[closestB] ); + kernels[closestB] = new ClustreamKernel( instance, dim, timestamp, t, m ); + } + + @Override + public Clustering getMicroClusteringResult() { + if (!initialized) { + return new Clustering(new Cluster[0]); + } + + ClustreamKernel[] result = new ClustreamKernel[kernels.length]; + for (int i = 0; i < result.length; i++) { + result[i] = new ClustreamKernel(kernels[i], t, m); + } + + return new Clustering(result); + } + + @Override + public Clustering getClusteringResult() { + return kMeans_rand(kOption.getValue(), getMicroClusteringResult()); + } + + public Clustering getClusteringResult(Clustering gtClustering) { + return kMeans_gta(kOption.getValue(), getMicroClusteringResult(), gtClustering); + } + + public String getName() { + return "CluStreamWithKMeans " + timeWindow; + } + + /** + * Distance between two vectors. + * + * @param pointA + * @param pointB + * @return dist + */ + private static double distance(double[] pointA, double [] pointB) { + double distance = 0.0; + for (int i = 0; i < pointA.length; i++) { + double d = pointA[i] - pointB[i]; + distance += d * d; + } + return Math.sqrt(distance); + } + + /** + * k-means of (micro)clusters, with ground-truth-aided initialization. + * (to produce best results) + * + * @param k + * @param data + * @return (macro)clustering - CFClusters + */ + public static Clustering kMeans_gta(int k, Clustering clustering, Clustering gtClustering) { + + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster)clustering.get(i)); + } else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + + int n = microclusters.size(); + assert (k <= n); + + /* k-means */ + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + int K = gtClustering.size(); + + for (int i = 0; i < k; i++) { + if (i < K) { // GT-aided + centers[i] = new SphereCluster(gtClustering.get(i).getCenter(), 0); + } else { // Randomized + int rid = random.nextInt(n); + centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); + } + } + + return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); + } + + /** + * k-means of (micro)clusters, with randomized initialization. + * + * @param k + * @param data + * @return (macro)clustering - CFClusters + */ + public static Clustering kMeans_rand(int k, Clustering clustering) { + + ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>(); + for (int i = 0; i < clustering.size(); i++) { + if (clustering.get(i) instanceof CFCluster) { + microclusters.add((CFCluster)clustering.get(i)); + } else { + System.out.println("Unsupported Cluster Type:" + clustering.get(i).getClass() + ". Cluster needs to extend moa.cluster.CFCluster"); + } + } + + int n = microclusters.size(); + assert (k <= n); + + /* k-means */ + Random random = new Random(0); + Cluster[] centers = new Cluster[k]; + + for (int i = 0; i < k; i++) { + int rid = random.nextInt(n); + centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0); + } + + return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters); + } + + /** + * (The Actual Algorithm) k-means of (micro)clusters, with specified initialization points. + * + * @param k + * @param centers - initial centers + * @param data + * @return (macro)clustering - SphereClusters + */ + protected static Clustering kMeans(int k, Cluster[] centers, List<? extends Cluster> data) { + assert (centers.length == k); + assert (k > 0); + + int dimensions = centers[0].getCenter().length; + + ArrayList<ArrayList<Cluster>> clustering = new ArrayList<ArrayList<Cluster>>(); + for (int i = 0; i < k; i++) { + clustering.add(new ArrayList<Cluster>()); + } + + while (true) { + // Assign points to clusters + for (Cluster point : data) { + double minDistance = distance(point.getCenter(), centers[0].getCenter()); + int closestCluster = 0; + for (int i = 1; i < k; i++) { + double distance = distance(point.getCenter(), centers[i].getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + clustering.get(closestCluster).add(point); + } + + // Calculate new centers and clear clustering lists + SphereCluster[] newCenters = new SphereCluster[centers.length]; + for (int i = 0; i < k; i++) { + newCenters[i] = calculateCenter(clustering.get(i), dimensions); + clustering.get(i).clear(); + } + + // Convergence check + boolean converged = true; + for (int i = 0; i < k; i++) { + if (!Arrays.equals(centers[i].getCenter(), newCenters[i].getCenter())) { + converged = false; + break; + } + } + + if (converged) { + break; + } else { + centers = newCenters; + } + } + + return new Clustering(centers); + } + + /** + * Rearrange the k-means result into a set of CFClusters, cleaning up the redundancies. + * + * @param kMeansResult + * @param microclusters + * @return + */ + protected static Clustering cleanUpKMeans(Clustering kMeansResult, ArrayList<CFCluster> microclusters) { + /* Convert k-means result to CFClusters */ + int k = kMeansResult.size(); + CFCluster[] converted = new CFCluster[k]; + + for (CFCluster mc : microclusters) { + // Find closest kMeans cluster + double minDistance = Double.MAX_VALUE; + int closestCluster = 0; + for (int i = 0; i < k; i++) { + double distance = distance(kMeansResult.get(i).getCenter(), mc.getCenter()); + if (distance < minDistance) { + closestCluster = i; + minDistance = distance; + } + } + + // Add to cluster + if ( converted[closestCluster] == null ) { + converted[closestCluster] = (CFCluster)mc.copy(); + } else { + converted[closestCluster].add(mc); + } + } + + // Clean up + int count = 0; + for (int i = 0; i < converted.length; i++) { + if (converted[i] != null) + count++; + } + + CFCluster[] cleaned = new CFCluster[count]; + count = 0; + for (int i = 0; i < converted.length; i++) { + if (converted[i] != null) + cleaned[count++] = converted[i]; + } + + return new Clustering(cleaned); + } + + + + /** + * k-means helper: Calculate a wrapping cluster of assigned points[microclusters]. + * + * @param assigned + * @param dimensions + * @return SphereCluster (with center and radius) + */ + private static SphereCluster calculateCenter(ArrayList<Cluster> assigned, int dimensions) { + double[] result = new double[dimensions]; + for (int i = 0; i < result.length; i++) { + result[i] = 0.0; + } + + if (assigned.size() == 0) { + return new SphereCluster(result, 0.0); + } + + for (Cluster point : assigned) { + double[] center = point.getCenter(); + for (int i = 0; i < result.length; i++) { + result[i] += center[i]; + } + } + + // Normalize + for (int i = 0; i < result.length; i++) { + result[i] /= assigned.size(); + } + + // Calculate radius: biggest wrapping distance from center + double radius = 0.0; + for (Cluster point : assigned) { + double dist = distance(result, point.getCenter()); + if (dist > radius) { + radius = dist; + } + } + SphereCluster sc = new SphereCluster(result, radius); + sc.setWeight(assigned.size()); + return sc; + } + + + /** Miscellaneous **/ + + @Override + public boolean implementsMicroClusterer() { + return true; + } + + public boolean isRandomizable() { + return false; + } + + public double[] getVotesForInstance(Instance inst) { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + throw new UnsupportedOperationException("Not supported yet."); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java new file mode 100644 index 0000000..f052832 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java @@ -0,0 +1,211 @@ +package com.yahoo.labs.samoa.moa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +/** + * Class for discovering classes via reflection in the java class path. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class AutoClassDiscovery { + + protected static final Map<String, String[]> cachedClassNames = new HashMap<String, String[]>(); + + public static String[] findClassNames(String packageNameToSearch) { + String[] cached = cachedClassNames.get(packageNameToSearch); + if (cached == null) { + HashSet<String> classNames = new HashSet<String>(); + /*StringTokenizer pathTokens = new StringTokenizer(System + .getProperty("java.class.path"), File.pathSeparator);*/ + String packageDirName = packageNameToSearch.replace('.', + File.separatorChar); + String packageJarName = packageNameToSearch.length() > 0 ? (packageNameToSearch.replace('.', '/') + "/") + : ""; + String part = ""; + + + AutoClassDiscovery adc = new AutoClassDiscovery(); + URLClassLoader sysLoader = (URLClassLoader) adc.getClass().getClassLoader(); + URL[] cl_urls = sysLoader.getURLs(); + + for (int i = 0; i < cl_urls.length; i++) { + part = cl_urls[i].toString(); + if (part.startsWith("file:")) { + part = part.replace(" ", "%20"); + try { + File temp = new File(new java.net.URI(part)); + part = temp.getAbsolutePath(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } + + // find classes + ArrayList<File> files = new ArrayList<File>(); + File dir = new File(part); + if (dir.isDirectory()) { + File root = new File(dir.toString() + File.separatorChar + packageDirName); + String[] names = findClassesInDirectoryRecursive(root, ""); + classNames.addAll(Arrays.asList(names)); + } else { + try { + JarFile jar = new JarFile(part); + Enumeration<JarEntry> jarEntries = jar.entries(); + while (jarEntries.hasMoreElements()) { + String jarEntry = jarEntries.nextElement().getName(); + if (jarEntry.startsWith(packageJarName)) { + String relativeName = jarEntry.substring(packageJarName.length()); + if (relativeName.endsWith(".class")) { + relativeName = relativeName.replace('/', + '.'); + classNames.add(relativeName.substring(0, + relativeName.length() + - ".class".length())); + } + } + } + } catch (IOException ignored) { + // ignore unreadable files + } + } + } + + /*while (pathTokens.hasMoreElements()) { + String pathToSearch = pathTokens.nextElement().toString(); + if (pathToSearch.endsWith(".jar")) { + try { + JarFile jar = new JarFile(pathToSearch); + Enumeration<JarEntry> jarEntries = jar.entries(); + while (jarEntries.hasMoreElements()) { + String jarEntry = jarEntries.nextElement() + .getName(); + if (jarEntry.startsWith(packageJarName)) { + String relativeName = jarEntry + .substring(packageJarName.length()); + if (relativeName.endsWith(".class")) { + relativeName = relativeName.replace('/', + '.'); + classNames.add(relativeName.substring(0, + relativeName.length() + - ".class".length())); + } + } + } + } catch (IOException ignored) { + // ignore unreadable files + } + } else { + File root = new File(pathToSearch + File.separatorChar + + packageDirName); + String[] names = findClassesInDirectoryRecursive(root, ""); + for (String name : names) { + classNames.add(name); + } + } + } */ + cached = classNames.toArray(new String[classNames.size()]); + Arrays.sort(cached); + cachedClassNames.put(packageNameToSearch, cached); + } + return cached; + } + + protected static String[] findClassesInDirectoryRecursive(File root, + String packagePath) { + HashSet<String> classNames = new HashSet<String>(); + if (root.isDirectory()) { + String[] list = root.list(); + for (String string : list) { + if (string.endsWith(".class")) { + classNames.add(packagePath + + string.substring(0, string.length() + - ".class".length())); + } else { + File testDir = new File(root.getPath() + File.separatorChar + + string); + if (testDir.isDirectory()) { + String[] names = findClassesInDirectoryRecursive( + testDir, packagePath + string + "."); + classNames.addAll(Arrays.asList(names)); + } + } + } + } + return classNames.toArray(new String[classNames.size()]); + } + + public static Class[] findClassesOfType(String packageNameToSearch, + Class<?> typeDesired) { + ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>(); + String[] classNames = findClassNames(packageNameToSearch); + for (String className : classNames) { + String fullName = packageNameToSearch.length() > 0 ? (packageNameToSearch + + "." + className) + : className; + if (isPublicConcreteClassOfType(fullName, typeDesired)) { + try { + classesFound.add(Class.forName(fullName)); + } catch (Exception ignored) { + // ignore classes that we cannot instantiate + } + } + } + return classesFound.toArray(new Class[classesFound.size()]); + } + + public static boolean isPublicConcreteClassOfType(String className, + Class<?> typeDesired) { + Class<?> testClass = null; + try { + testClass = Class.forName(className); + } catch (Exception e) { + return false; + } + int classModifiers = testClass.getModifiers(); + return (java.lang.reflect.Modifier.isPublic(classModifiers) + && !java.lang.reflect.Modifier.isAbstract(classModifiers) + && typeDesired.isAssignableFrom(testClass) && hasEmptyConstructor(testClass)); + } + + public static boolean hasEmptyConstructor(Class<?> type) { + try { + type.getConstructor(); + return true; + } catch (Exception ignored) { + return false; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java new file mode 100644 index 0000000..2b8ed09 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java @@ -0,0 +1,133 @@ +package com.yahoo.labs.samoa.moa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.Collection; + +import com.yahoo.labs.samoa.moa.AbstractMOAObject; +import com.yahoo.labs.samoa.moa.MOAObject; + +/** + * Vector with the capability of automatic expansion. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class AutoExpandVector<T> extends ArrayList<T> implements MOAObject { + + private static final long serialVersionUID = 1L; + + public AutoExpandVector() { + super(0); + } + + public AutoExpandVector(int size) { + super(size); + } + + @Override + public void add(int pos, T obj) { + if (pos > size()) { + while (pos > size()) { + add(null); + } + trimToSize(); + } + super.add(pos, obj); + } + + @Override + public T get(int pos) { + return ((pos >= 0) && (pos < size())) ? super.get(pos) : null; + } + + @Override + public T set(int pos, T obj) { + if (pos >= size()) { + add(pos, obj); + return null; + } + return super.set(pos, obj); + } + + @Override + public boolean add(T arg0) { + boolean result = super.add(arg0); + trimToSize(); + return result; + } + + @Override + public boolean addAll(Collection<? extends T> arg0) { + boolean result = super.addAll(arg0); + trimToSize(); + return result; + } + + @Override + public boolean addAll(int arg0, Collection<? extends T> arg1) { + boolean result = super.addAll(arg0, arg1); + trimToSize(); + return result; + } + + @Override + public void clear() { + super.clear(); + trimToSize(); + } + + @Override + public T remove(int arg0) { + T result = super.remove(arg0); + trimToSize(); + return result; + } + + @Override + public boolean remove(Object arg0) { + boolean result = super.remove(arg0); + trimToSize(); + return result; + } + + @Override + protected void removeRange(int arg0, int arg1) { + super.removeRange(arg0, arg1); + trimToSize(); + } + + @Override + public MOAObject copy() { + return AbstractMOAObject.copy(this); + } + + @Override + public int measureByteSize() { + return AbstractMOAObject.measureByteSize(this); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java new file mode 100644 index 0000000..21ad3df --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java @@ -0,0 +1,134 @@ + +package com.yahoo.labs.samoa.moa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 RWTH Aachen University, Germany + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.HashMap; +import java.util.Iterator; +import java.util.TreeSet; + +import com.yahoo.labs.samoa.instances.Attribute; +import com.yahoo.labs.samoa.instances.DenseInstance; +import com.yahoo.labs.samoa.instances.Instance; + +public class DataPoint extends DenseInstance{ + + private static final long serialVersionUID = 1L; + + protected int timestamp; + private HashMap<String, String> measure_values; + + protected int noiseLabel; + + public DataPoint(Instance nextInstance, Integer timestamp) { + super(nextInstance); + this.setDataset(nextInstance.dataset()); + this.timestamp = timestamp; + measure_values = new HashMap<String, String>(); + + Attribute classLabel = dataset().classAttribute(); + noiseLabel = classLabel.indexOfValue("noise"); // -1 returned if there is no noise + } + + public void updateWeight(int cur_timestamp, double decay_rate){ + setWeight(Math.pow(2,(-1.0)*decay_rate*(cur_timestamp-timestamp))); + } + + public void setMeasureValue(String measureKey, double value){ + synchronized(measure_values){ + measure_values.put(measureKey, Double.toString(value)); + } + } + + public void setMeasureValue(String measureKey,String value){ + synchronized(measure_values){ + measure_values.put(measureKey, value); + } + } + + public String getMeasureValue(String measureKey){ + if(measure_values.containsKey(measureKey)) + synchronized(measure_values){ + return measure_values.get(measureKey); + } + else + return ""; + } + + public int getTimestamp(){ + return timestamp; + } + + public String getInfo(int x_dim, int y_dim) { + StringBuffer sb = new StringBuffer(); + sb.append("<html><table>"); + sb.append("<tr><td>Point</td><td>"+timestamp+"</td></tr>"); + for (int i = 0; i < numAttributes() - 1; i++) { //m_AttValues.length + String label = "Dim "+i; + if(i == x_dim) + label = "<b>X</b>"; + if(i == y_dim) + label = "<b>Y</b>"; + sb.append("<tr><td>"+label+"</td><td>"+value(i)+"</td></tr>"); + } + sb.append("<tr><td>Decay</td><td>"+weight()+"</td></tr>"); + sb.append("<tr><td>True cluster</td><td>"+classValue()+"</td></tr>"); + sb.append("</table>"); + sb.append("<br>"); + sb.append("<b>Evaluation</b><br>"); + sb.append("<table>"); + + TreeSet<String> sortedset; + synchronized(measure_values){ + sortedset = new TreeSet<String>(measure_values.keySet()); + } + + Iterator miterator = sortedset.iterator(); + while(miterator.hasNext()) { + String key = (String)miterator.next(); + sb.append("<tr><td>"+key+"</td><td>"+measure_values.get(key)+"</td></tr>"); + } + + sb.append("</table></html>"); + return sb.toString(); + } + + public double getDistance(DataPoint other){ + double distance = 0.0; + int numDims = numAttributes(); + if(classIndex()!=0) numDims--; + + for (int i = 0; i < numDims; i++) { + double d = value(i) - other.value(i); + distance += d * d; + } + return Math.sqrt(distance); + } + + + public boolean isNoise() { + return (int)classValue() == noiseLabel; + } + + public double getNoiseLabel() { + return noiseLabel; + } +}
