http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java
new file mode 100644
index 0000000..a1d80d1
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/AbstractClusterer.java
@@ -0,0 +1,298 @@
+package com.yahoo.labs.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import com.yahoo.labs.samoa.moa.cluster.Clustering;
+
+import com.yahoo.labs.samoa.instances.InstancesHeader;
+import com.yahoo.labs.samoa.moa.core.Measurement;
+import com.yahoo.labs.samoa.moa.core.ObjectRepository;
+import com.yahoo.labs.samoa.moa.core.StringUtils;
+import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler;
+import com.github.javacliparser.FlagOption;
+import com.github.javacliparser.IntOption;
+import com.yahoo.labs.samoa.moa.tasks.TaskMonitor;
+import com.yahoo.labs.samoa.instances.Instance;
+import com.yahoo.labs.samoa.instances.Instances;
+
+public abstract class AbstractClusterer extends AbstractOptionHandler
+               implements Clusterer {
+       
+       @Override
+       public String getPurposeString() {
+               return "MOA Clusterer: " + getClass().getCanonicalName();
+       }
+
+       protected InstancesHeader modelContext;
+
+       protected double trainingWeightSeenByModel = 0.0;
+
+       protected int randomSeed = 1;
+
+       protected IntOption randomSeedOption;
+
+    public FlagOption evaluateMicroClusteringOption;
+
+    protected Random clustererRandom;
+
+    protected Clustering clustering;
+    
+       public AbstractClusterer() {
+               if (isRandomizable()) {
+                       this.randomSeedOption = new IntOption("randomSeed", 'r',
+                                       "Seed for random behaviour of the 
Clusterer.", 1);
+               }
+
+        if( implementsMicroClusterer()){
+            this.evaluateMicroClusteringOption =
+                    new FlagOption("evaluateMicroClustering", 'M',
+                    "Evaluate the underlying microclustering instead of the 
macro clustering");
+        }
+       }
+
+       @Override
+       public void prepareForUseImpl(TaskMonitor monitor,
+                       ObjectRepository repository) {
+               if (this.randomSeedOption != null) {
+                       this.randomSeed = this.randomSeedOption.getValue();
+               }
+               if (!trainingHasStarted()) {
+                       resetLearning();
+               }
+                clustering = new Clustering();
+       }
+
+       public void setModelContext(InstancesHeader ih) {
+               if ((ih != null) && (ih.classIndex() < 0)) {
+                       throw new IllegalArgumentException(
+                                       "Context for a Clusterer must include a 
class to learn");
+               }
+               if (trainingHasStarted()
+                               && (this.modelContext != null)
+                               && ((ih == null) || 
!contextIsCompatible(this.modelContext, ih))) {
+                       throw new IllegalArgumentException(
+                                       "New context is not compatible with 
existing model");
+               }
+               this.modelContext = ih;
+       }
+
+       public InstancesHeader getModelContext() {
+               return this.modelContext;
+       }
+
+       public void setRandomSeed(int s) {
+               this.randomSeed = s;
+               if (this.randomSeedOption != null) {
+                       // keep option consistent
+                       this.randomSeedOption.setValue(s);
+               }
+       }
+
+       public boolean trainingHasStarted() {
+               return this.trainingWeightSeenByModel > 0.0;
+       }
+
+       public double trainingWeightSeenByModel() {
+               return this.trainingWeightSeenByModel;
+       }
+
+       public void resetLearning() {
+               this.trainingWeightSeenByModel = 0.0;
+               if (isRandomizable()) {
+                       this.clustererRandom = new Random(this.randomSeed);
+               }
+               resetLearningImpl();
+       }
+
+       public void trainOnInstance(Instance inst) {
+               if (inst.weight() > 0.0) {
+                       this.trainingWeightSeenByModel += inst.weight();
+                       trainOnInstanceImpl(inst);
+               }
+       }
+
+       public Measurement[] getModelMeasurements() {
+               List<Measurement> measurementList = new 
LinkedList<Measurement>();
+               measurementList.add(new Measurement("model training instances",
+                               trainingWeightSeenByModel()));
+               measurementList.add(new Measurement("model serialized size 
(bytes)",
+                               measureByteSize()));
+               Measurement[] modelMeasurements = getModelMeasurementsImpl();
+               if (modelMeasurements != null) {
+                       for (Measurement measurement : modelMeasurements) {
+                               measurementList.add(measurement);
+                       }
+               }
+               // add average of sub-model measurements
+               Clusterer[] subModels = getSubClusterers();
+               if ((subModels != null) && (subModels.length > 0)) {
+                       List<Measurement[]> subMeasurements = new 
LinkedList<Measurement[]>();
+                       for (Clusterer subModel : subModels) {
+                               if (subModel != null) {
+                                       
subMeasurements.add(subModel.getModelMeasurements());
+                               }
+                       }
+                       Measurement[] avgMeasurements = Measurement
+                                       .averageMeasurements(subMeasurements
+                                                       .toArray(new 
Measurement[subMeasurements.size()][]));
+                       for (Measurement measurement : avgMeasurements) {
+                               measurementList.add(measurement);
+                       }
+               }
+               return measurementList.toArray(new 
Measurement[measurementList.size()]);
+       }
+
+       public void getDescription(StringBuilder out, int indent) {
+               StringUtils.appendIndented(out, indent, "Model type: ");
+               out.append(this.getClass().getName());
+               StringUtils.appendNewline(out);
+               Measurement.getMeasurementsDescription(getModelMeasurements(), 
out,
+                               indent);
+               StringUtils.appendNewlineIndented(out, indent, "Model 
description:");
+               StringUtils.appendNewline(out);
+               if (trainingHasStarted()) {
+                       getModelDescription(out, indent);
+               } else {
+                       StringUtils.appendIndented(out, indent,
+                                       "Model has not been trained.");
+               }
+       }
+
+       public Clusterer[] getSubClusterers() {
+               return null;
+       }
+
+       @Override
+       public Clusterer copy() {
+               return (Clusterer) super.copy();
+       }
+
+//     public boolean correctlyClassifies(Instance inst) {
+//             return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst
+//                             .classValue();
+//     }
+
+       public String getClassNameString() {
+               return InstancesHeader.getClassNameString(this.modelContext);
+       }
+
+       public String getClassLabelString(int classLabelIndex) {
+               return InstancesHeader.getClassLabelString(this.modelContext,
+                               classLabelIndex);
+       }
+
+       public String getAttributeNameString(int attIndex) {
+               return InstancesHeader.getAttributeNameString(this.modelContext,
+                               attIndex);
+       }
+
+       public String getNominalValueString(int attIndex, int valIndex) {
+               return InstancesHeader.getNominalValueString(this.modelContext,
+                               attIndex, valIndex);
+       }
+
+       // originalContext notnull
+       // newContext notnull
+       public static boolean contextIsCompatible(InstancesHeader 
originalContext,
+                       InstancesHeader newContext) {
+               // rule 1: num classes can increase but never decrease
+               // rule 2: num attributes can increase but never decrease
+               // rule 3: num nominal attribute values can increase but never 
decrease
+               // rule 4: attribute types must stay in the same order 
(although class
+               // can
+               // move; is always skipped over)
+               // attribute names are free to change, but should always still 
represent
+               // the original attributes
+               if (newContext.numClasses() < originalContext.numClasses()) {
+                       return false; // rule 1
+               }
+               if (newContext.numAttributes() < 
originalContext.numAttributes()) {
+                       return false; // rule 2
+               }
+               int oPos = 0;
+               int nPos = 0;
+               while (oPos < originalContext.numAttributes()) {
+                       if (oPos == originalContext.classIndex()) {
+                               oPos++;
+                               if (!(oPos < originalContext.numAttributes())) {
+                                       break;
+                               }
+                       }
+                       if (nPos == newContext.classIndex()) {
+                               nPos++;
+                       }
+                       if (originalContext.attribute(oPos).isNominal()) {
+                               if (!newContext.attribute(nPos).isNominal()) {
+                                       return false; // rule 4
+                               }
+                               if (newContext.attribute(nPos).numValues() < 
originalContext
+                                               .attribute(oPos).numValues()) {
+                                       return false; // rule 3
+                               }
+                       } else {
+                               assert 
(originalContext.attribute(oPos).isNumeric());
+                               if (!newContext.attribute(nPos).isNumeric()) {
+                                       return false; // rule 4
+                               }
+                       }
+                       oPos++;
+                       nPos++;
+               }
+               return true; // all checks clear
+       }
+
+       // reason for ...Impl methods:
+       // ease programmer burden by not requiring them to remember calls to 
super
+       // in overridden methods & will produce compiler errors if not 
overridden
+
+       public abstract void resetLearningImpl();
+
+       public abstract void trainOnInstanceImpl(Instance inst);
+
+       protected abstract Measurement[] getModelMeasurementsImpl();
+
+       public abstract void getModelDescription(StringBuilder out, int indent);
+
+       protected static int modelAttIndexToInstanceAttIndex(int index,
+                       Instance inst) {
+               return inst.classIndex() > index ? index : index + 1;
+       }
+
+       protected static int modelAttIndexToInstanceAttIndex(int index,
+                       Instances insts) {
+               return insts.classIndex() > index ? index : index + 1;
+       }
+
+        public boolean  implementsMicroClusterer(){
+            return false;
+        }
+
+        public boolean  keepClassLabel(){
+            return false;
+        }
+        
+        public Clustering getMicroClusteringResult(){
+            return null;
+        };
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java
new file mode 100644
index 0000000..6056514
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/ClusterGenerator.java
@@ -0,0 +1,368 @@
+
+package com.yahoo.labs.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2010 RWTH Aachen University, Germany
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+import com.github.javacliparser.FloatOption;
+import com.github.javacliparser.IntOption;
+import com.yahoo.labs.samoa.moa.cluster.Clustering;
+import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
+import com.yahoo.labs.samoa.moa.core.Measurement;
+import com.yahoo.labs.samoa.moa.core.DataPoint;
+import com.yahoo.labs.samoa.instances.Instance;
+
+public class ClusterGenerator extends AbstractClusterer{
+
+       private static final long serialVersionUID = 1L;
+
+       public IntOption timeWindowOption = new IntOption("timeWindow",
+                       't', "Rang of the window.", 1000);
+
+    public FloatOption radiusDecreaseOption = new 
FloatOption("radiusDecrease", 'r',
+                "The average radii of the centroids in the model.", 0, 0, 1);
+
+    public FloatOption radiusIncreaseOption = new 
FloatOption("radiusIncrease", 'R',
+                "The average radii of the centroids in the model.", 0, 0, 1);
+
+    public FloatOption positionOffsetOption = new 
FloatOption("positionOffset", 'p',
+                "The average radii of the centroids in the model.", 0, 0, 1);
+
+    public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 
'D',
+                "Deletes complete clusters from the clustering.", 0, 0, 1);
+
+    public FloatOption joinClustersOption = new FloatOption("joinClusters", 
'j',
+            "Join two clusters if their hull distance is less minRadius times 
this factor.", 0, 0, 1);
+
+    public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A',
+                "Adds additional clusters.", 0, 0, 1);
+
+    private static double err_intervall_width = 0.0;
+    private ArrayList<DataPoint> points;
+    private int instanceCounter;
+    private int windowCounter;
+    private Random random;
+    private Clustering sourceClustering = null;
+
+    @Override
+    public void resetLearningImpl() {
+        points = new ArrayList<DataPoint>();
+        instanceCounter = 0;
+        windowCounter = 0;
+        random = new Random(227);
+
+        //joinClustersOption.set();
+        //evaluateMicroClusteringOption.set();
+    }
+
+    @Override
+    public void trainOnInstanceImpl(Instance inst) {
+        if(windowCounter >= timeWindowOption.getValue()){
+            points.clear();
+            windowCounter = 0;
+        }
+        windowCounter++;
+        instanceCounter++;
+        points.add( new DataPoint(inst,instanceCounter));
+    }
+
+    @Override
+    public boolean implementsMicroClusterer() {
+        return true;
+    }
+
+
+    public void setSourceClustering(Clustering source){
+        sourceClustering = source;
+    }
+    
+    @Override
+    public Clustering getMicroClusteringResult() {
+        //System.out.println("Numcluster:"+clustering.size()+" / "+num);
+        //Clustering source_clustering = new Clustering(points, 
overlapThreshold, microInitMinPoints);
+        if(sourceClustering == null){
+
+            System.out.println("You need to set a source clustering for the 
ClusterGenerator to work");
+            return null;
+        }
+        return alterClustering(sourceClustering);
+    }
+
+
+
+    public Clustering getClusteringResult(){
+        sourceClustering = new Clustering(points);
+//        if(sourceClustering == null){
+//            System.out.println("You need to set a source clustering for the 
ClusterGenerator to work");
+//            return null;
+//        }
+        return alterClustering(sourceClustering);
+    }
+
+
+    private Clustering alterClustering(Clustering scclustering){
+        //percentage of the radius that will be cut off
+        //0: no changes to radius
+        //1: radius of 0
+        double errLevelRadiusDecrease = radiusDecreaseOption.getValue();
+
+        //0: no changes to radius
+        //1: radius 100% bigger
+        double errLevelRadiusIncrease = radiusIncreaseOption.getValue();
+
+        //0: no changes
+        //1: distance between centers is 2 * original radius
+        double errLevelPosition = positionOffsetOption.getValue();
+
+
+        int numRemoveCluster = 
(int)(clusterRemoveOption.getValue()*scclustering.size());
+
+        int numAddCluster = 
(int)(clusterAddOption.getValue()*scclustering.size());
+
+        for (int c = 0; c < numRemoveCluster; c++) {
+            int delId = random.nextInt(scclustering.size());
+            scclustering.remove(delId);
+        }
+
+        int numCluster = scclustering.size();
+        double[] err_seeds = new double[numCluster];
+        double err_seed_sum = 0.0;
+        double tmp_seed;
+        for (int i = 0; i < numCluster; i++) {
+            tmp_seed = random.nextDouble();
+            err_seeds[i] = err_seed_sum + tmp_seed;
+            err_seed_sum+= tmp_seed;
+        }
+
+        double sumWeight = 0;
+        for (int i = 0; i <numCluster; i++) {
+            sumWeight+= scclustering.get(i).getWeight();
+        }
+
+        Clustering clustering = new Clustering();
+
+        for (int i = 0; i <numCluster; i++) {
+            if(!(scclustering.get(i) instanceof SphereCluster)){
+                System.out.println("Not a Sphere Cluster");
+                continue;
+            }
+            SphereCluster sourceCluster = (SphereCluster)scclustering.get(i);
+            double[] center = 
Arrays.copyOf(sourceCluster.getCenter(),sourceCluster.getCenter().length);
+            double weight = sourceCluster.getWeight();
+            double radius = sourceCluster.getRadius();
+
+            //move cluster center
+            if(errLevelPosition >0){
+                double errOffset = random.nextDouble()*err_intervall_width/2.0;
+                double errOffsetDirection = ((random.nextBoolean())? 1 : -1);
+                double level = errLevelPosition + errOffsetDirection * 
errOffset;
+                double[] vector = new double[center.length];
+                double vectorLength = 0;
+                for (int d = 0; d < center.length; d++) {
+                    vector[d] = 
(random.nextBoolean()?1:-1)*random.nextDouble();
+                    vectorLength += Math.pow(vector[d],2);
+                }
+                vectorLength = Math.sqrt(vectorLength);
+
+                
+                //max is when clusters are next to each other
+                double length = 2 * radius * level;
+
+                for (int d = 0; d < center.length; d++) {
+                    //normalize length and then strecht to reach error position
+                    vector[d]=vector[d]/vectorLength*length;
+                }
+//                System.out.println("Center "+Arrays.toString(center));
+//                System.out.println("Vector "+Arrays.toString(vector));
+                //check if error position is within bounds
+                double [] newCenter = new double[center.length];
+                for (int d = 0; d < center.length; d++) {
+                    //check bounds, otherwise flip vector
+                    if(center[d] + vector[d] >= 0 && center[d] + vector[d] <= 
1){
+                        newCenter[d] = center[d] + vector[d];
+                    }
+                    else{
+                        newCenter[d] = center[d] + (-1)*vector[d];
+                    }
+                }
+                center = newCenter;
+                for (int d = 0; d < center.length; d++) {
+                    if(newCenter[d] >= 0 && newCenter[d] <= 1){
+                    }
+                    else{
+                        System.out.println("This shouldnt have happend, 
Cluster center out of bounds:"+Arrays.toString(newCenter));
+                    }
+                }
+                //System.out.println("new Center "+Arrays.toString(newCenter));
+
+            }
+            
+            //alter radius
+            if(errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0){
+                double errOffset = random.nextDouble()*err_intervall_width/2.0;
+                int errOffsetDirection = ((random.nextBoolean())? 1 : -1);
+
+                if(errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 
|| random.nextBoolean())){
+                    double level = (errLevelRadiusDecrease + 
errOffsetDirection * errOffset);//*sourceCluster.getWeight()/sumWeight;
+                    level = (level<0)?0:level;
+                    level = (level>1)?1:level;
+                    radius*=(1-level);
+                }
+                else{
+                    double level = errLevelRadiusIncrease + errOffsetDirection 
* errOffset;
+                    level = (level<0)?0:level;
+                    level = (level>1)?1:level;
+                    radius+=radius*level;
+                }
+            }
+
+            SphereCluster newCluster = new SphereCluster(center, radius, 
weight);
+            newCluster.setMeasureValue("Source Cluster", 
"C"+sourceCluster.getId());
+
+            clustering.add(newCluster);
+        }
+
+        if(joinClustersOption.getValue() > 0){
+            clustering = joinClusters(clustering);
+        }
+
+        //add new clusters by copying clusters and set a random center
+        for (int c = 0; c < numAddCluster; c++) {
+            int copyId = random.nextInt(clustering.size());
+            SphereCluster scorg = (SphereCluster)clustering.get(copyId);
+            int dim = scorg.getCenter().length;
+            double[] center = new double [dim];
+            double radius = scorg.getRadius();
+
+            boolean outofbounds = true;
+            int tryCounter = 0;
+            while(outofbounds && tryCounter < 20){
+                tryCounter++;
+                outofbounds = false;
+                for (int j = 0; j < center.length; j++) {
+                     center[j] = random.nextDouble();
+                     if(center[j]- radius < 0 || center[j] + radius > 1){
+                        outofbounds = true;
+                        break;
+                     }
+                }
+            }
+            if(outofbounds){
+                System.out.println("Coludn't place additional cluster");
+            }
+            else{
+                SphereCluster scnew = new SphereCluster(center, radius, 
scorg.getWeight()/2);
+                scorg.setWeight(scorg.getWeight()-scnew.getWeight());
+                clustering.add(scnew);
+            }
+        }
+
+        return clustering;
+
+    }
+
+
+
+    private Clustering joinClusters(Clustering clustering){
+
+        double radiusFactor = joinClustersOption.getValue();
+        boolean[] merged = new boolean[clustering.size()];
+
+        Clustering mclustering = new Clustering();
+
+        if(radiusFactor >0){
+            for (int c1 = 0; c1 < clustering.size(); c1++) {
+                SphereCluster sc1 = (SphereCluster) clustering.get(c1);
+                double minDist = Double.MAX_VALUE;
+                double minOver = 1;
+                int maxindexCon = -1;
+                int maxindexOver = -1;
+                for (int c2 = 0; c2 < clustering.size(); c2++) {
+                    SphereCluster sc2 = (SphereCluster) clustering.get(c2);
+//                    double over = sc1.overlapRadiusDegree(sc2);
+//                    if(over > 0 && over < minOver){
+//                       minOver = over;
+//                       maxindexOver = c2;
+//                    }
+                    double dist = sc1.getHullDistance(sc2);
+                    double threshold = Math.min(sc1.getRadius(), 
sc2.getRadius())*radiusFactor;
+                    if(dist > 0 && dist < minDist && dist < threshold){
+                            minDist = dist;
+                            maxindexCon = c2;
+                    }
+                }
+                int maxindex = -1;
+                if(maxindexOver!=-1)
+                    maxindex = maxindexOver;
+                else
+                    maxindex = maxindexCon;
+
+                if(maxindex!=-1 && !merged[c1]){
+                    merged[c1]=true;
+                    merged[maxindex]=true;
+                    SphereCluster scnew = new 
SphereCluster(sc1.getCenter(),sc1.getRadius(),sc1.getWeight());
+                    SphereCluster sc2 = (SphereCluster) 
clustering.get(maxindex);
+                    scnew.merge(sc2);
+                    mclustering.add(scnew);
+                }
+            }
+        }
+
+        for (int i = 0; i < merged.length; i++) {
+            if(!merged[i])
+                 mclustering.add(clustering.get(i));
+        }
+
+
+        return mclustering;
+
+    }
+
+
+
+    @Override
+    protected Measurement[] getModelMeasurementsImpl() {
+        throw new UnsupportedOperationException("Not supported yet.");
+    }
+
+    @Override
+    public void getModelDescription(StringBuilder out, int indent) {
+        throw new UnsupportedOperationException("Not supported yet.");
+    }
+
+    @Override
+    public boolean isRandomizable() {
+        return false;
+    }
+
+    @Override
+    public boolean  keepClassLabel(){
+        return true;
+    }
+
+    public double[] getVotesForInstance(Instance inst) {
+        return null;
+    }
+}
+
+

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java
new file mode 100644
index 0000000..76d7d50
--- /dev/null
+++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java
@@ -0,0 +1,64 @@
+package com.yahoo.labs.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import com.yahoo.labs.samoa.moa.MOAObject;
+import com.yahoo.labs.samoa.moa.cluster.Clustering;
+import com.yahoo.labs.samoa.instances.InstancesHeader;
+import com.yahoo.labs.samoa.moa.core.Measurement;
+import com.yahoo.labs.samoa.moa.options.OptionHandler;
+import com.yahoo.labs.samoa.instances.Instance;
+
+public interface Clusterer extends MOAObject, OptionHandler {
+
+       public void setModelContext(InstancesHeader ih);
+
+       public InstancesHeader getModelContext();
+
+       public boolean isRandomizable();
+
+       public void setRandomSeed(int s);
+
+       public boolean trainingHasStarted();
+
+       public double trainingWeightSeenByModel();
+
+       public void resetLearning();
+
+       public void trainOnInstance(Instance inst);
+
+       public double[] getVotesForInstance(Instance inst);
+
+       public Measurement[] getModelMeasurements();
+
+       public Clusterer[] getSubClusterers();
+
+       public Clusterer copy();
+
+    public Clustering getClusteringResult();
+
+    public boolean implementsMicroClusterer();
+
+    public Clustering getMicroClusteringResult();
+    
+    public boolean keepClassLabel();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java
new file mode 100644
index 0000000..a9a891f
--- /dev/null
+++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/KMeans.java
@@ -0,0 +1,202 @@
+
+package com.yahoo.labs.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2010 RWTH Aachen University, Germany
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+import com.yahoo.labs.samoa.moa.cluster.CFCluster;
+import com.yahoo.labs.samoa.moa.cluster.Cluster;
+import com.yahoo.labs.samoa.moa.cluster.Clustering;
+import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
+
+/**
+ * A kMeans implementation for microclusterings. For now it only uses the real 
centers of the 
+ * groundtruthclustering for implementation. There should also be an option to 
use random 
+ * centers. 
+ * TODO: random centers
+ * TODO: Create a macro clustering interface to make different macro 
clustering algorithms available 
+ * to micro clustering algorithms like clustream, denstream and clustree   
+ *
+ */
+public class KMeans {
+
+    /**
+     * This kMeans implementation clusters a big number of microclusters 
+     * into a smaller amount of macro clusters. To make it comparable to other 
+     * algorithms it uses the real centers of the ground truth macro clustering
+     * to have the best possible initialization. The quality of resulting 
+     * macro clustering yields an upper bound for kMeans on the underlying 
+     * microclustering.        
+     * 
+     * @param centers of the ground truth clustering 
+     * @param data list of microclusters
+     * @return
+     */
+    public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> 
data ) {
+        int k = centers.length;
+
+       int dimensions = centers[0].getCenter().length;
+
+       ArrayList<ArrayList<Cluster>> clustering =
+               new ArrayList<ArrayList<Cluster>>();
+       for ( int i = 0; i < k; i++ ) {
+           clustering.add( new ArrayList<Cluster>() );
+       }
+
+       int repetitions = 100;
+       while ( repetitions-- >= 0 ) {
+           // Assign points to clusters
+           for ( Cluster point : data ) {
+               double minDistance = distance( point.getCenter(), 
centers[0].getCenter() );
+               int closestCluster = 0;
+               for ( int i = 1; i < k; i++ ) {
+                   double distance = distance( point.getCenter(), 
centers[i].getCenter() );
+                   if ( distance < minDistance ) {
+                       closestCluster = i;
+                       minDistance = distance;
+                   }
+               }
+
+               clustering.get( closestCluster ).add( point );
+           }
+
+           // Calculate new centers and clear clustering lists
+           SphereCluster[] newCenters = new SphereCluster[centers.length];
+           for ( int i = 0; i < k; i++ ) {
+               newCenters[i] = calculateCenter( clustering.get( i ), 
dimensions );
+               clustering.get( i ).clear();
+           }
+           centers = newCenters;
+       }
+
+       return new Clustering( centers );
+    }
+
+    private static double distance(double[] pointA, double [] pointB){
+        double distance = 0.0;
+        for (int i = 0; i < pointA.length; i++) {
+            double d = pointA[i] - pointB[i];
+            distance += d * d;
+        }
+        return Math.sqrt(distance);
+    }
+
+
+    private static SphereCluster calculateCenter( ArrayList<Cluster> cluster, 
int dimensions ) {
+       double[] res = new double[dimensions];
+       for ( int i = 0; i < res.length; i++ ) {
+           res[i] = 0.0;
+       }
+
+       if ( cluster.size() == 0 ) {
+           return new SphereCluster( res, 0.0 );
+       }
+
+       for ( Cluster point : cluster ) {
+            double [] center = point.getCenter();
+            for (int i = 0; i < res.length; i++) {
+               res[i] += center[i];
+            }
+       }
+
+       // Normalize
+       for ( int i = 0; i < res.length; i++ ) {
+           res[i] /= cluster.size();
+       }
+
+       // Calculate radius
+       double radius = 0.0;
+       for ( Cluster point : cluster ) {
+           double dist = distance( res, point.getCenter() );
+           if ( dist > radius ) {
+               radius = dist;
+           }
+       }
+
+       return new SphereCluster( res, radius );
+    }
+
+    public static Clustering gaussianMeans(Clustering gtClustering, Clustering 
clustering) {
+        ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>();
+        for (int i = 0; i < clustering.size(); i++) {
+            if (clustering.get(i) instanceof CFCluster) {
+                microclusters.add((CFCluster)clustering.get(i));
+            }
+            else{
+                System.out.println("Unsupported Cluster 
Type:"+clustering.get(i).getClass()
+                        +". Cluster needs to extend moa.cluster.CFCluster");
+            }
+        }
+        Cluster[] centers = new Cluster[gtClustering.size()];
+        for (int i = 0; i < centers.length; i++) {
+            centers[i] = gtClustering.get(i);
+
+        }
+
+        int k = centers.length;
+       if ( microclusters.size() < k ) {
+           return new Clustering( new Cluster[0]);
+       }
+
+       Clustering kMeansResult = kMeans( centers, microclusters );
+
+       k = kMeansResult.size();
+       CFCluster[] res = new CFCluster[ k ];
+
+       for ( CFCluster microcluster : microclusters) {
+           // Find closest kMeans cluster
+           double minDistance = Double.MAX_VALUE;
+           int closestCluster = 0;
+           for ( int i = 0; i < k; i++ ) {
+               double distance = distance( kMeansResult.get(i).getCenter(), 
microcluster.getCenter() );
+               if ( distance < minDistance ) {
+                   closestCluster = i;
+                   minDistance = distance;
+               }
+           }
+
+           // Add to cluster
+           if ( res[closestCluster] == null ) {
+               res[closestCluster] = (CFCluster)microcluster.copy();
+           } else {
+               res[closestCluster].add(microcluster);
+           }
+       }
+
+       // Clean up res
+       int count = 0;
+       for ( int i = 0; i < res.length; i++ ) {
+           if ( res[i] != null )
+               ++count;
+       }
+
+       CFCluster[] cleaned = new CFCluster[count];
+       count = 0;
+       for ( int i = 0; i < res.length; i++ ) {
+           if ( res[i] != null )
+               cleaned[count++] = res[i];
+       }
+
+       return new Clustering( cleaned );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java
new file mode 100644
index 0000000..975e61d
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/Clustream.java
@@ -0,0 +1,337 @@
+
+package com.yahoo.labs.samoa.moa.clusterers.clustream;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2010 RWTH Aachen University, Germany
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import com.yahoo.labs.samoa.moa.cluster.Cluster;
+import com.yahoo.labs.samoa.moa.cluster.Clustering;
+import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
+import com.yahoo.labs.samoa.moa.clusterers.AbstractClusterer;
+import com.yahoo.labs.samoa.moa.core.Measurement;
+import com.github.javacliparser.IntOption;
+import com.yahoo.labs.samoa.instances.DenseInstance;
+import com.yahoo.labs.samoa.instances.Instance;
+
+/** Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip 
S. Yu:
+ * A Framework for Clustering Evolving Data Streams. VLDB 2003: 81-92
+ */
+public class Clustream extends AbstractClusterer{
+
+       private static final long serialVersionUID = 1L;
+
+       public IntOption timeWindowOption = new IntOption("horizon",
+                       'h', "Rang of the window.", 1000);
+
+       public IntOption maxNumKernelsOption = new IntOption(
+                       "maxNumKernels", 'k',
+                       "Maximum number of micro kernels to use.", 100);
+
+       public IntOption kernelRadiFactorOption = new IntOption(
+                       "kernelRadiFactor", 't',
+                       "Multiplier for the kernel radius", 2);
+
+       private int timeWindow;
+       private long timestamp = -1;
+       private ClustreamKernel[] kernels;
+       private boolean initialized;
+       private List<ClustreamKernel> buffer; // Buffer for initialization with 
kNN
+       private int bufferSize;
+       private double t;
+       private int m;
+
+       public Clustream() {
+       }
+
+
+       @Override
+       public void resetLearningImpl() {
+               this.kernels = new 
ClustreamKernel[maxNumKernelsOption.getValue()];
+               this.timeWindow = timeWindowOption.getValue();
+               this.initialized = false;
+               this.buffer = new LinkedList<>();
+               this.bufferSize = maxNumKernelsOption.getValue();
+               t = kernelRadiFactorOption.getValue();
+               m = maxNumKernelsOption.getValue();
+       }
+
+       @Override
+       public void trainOnInstanceImpl(Instance instance) {
+               int dim = instance.numValues();
+               timestamp++;
+               // 0. Initialize
+               if ( !initialized ) {
+                       if ( buffer.size() < bufferSize ) {
+                               buffer.add( new ClustreamKernel(instance,dim, 
timestamp, t, m) );
+                               return;
+                       }
+
+                       int k = kernels.length;
+                       //System.err.println("k="+k+" bufferSize="+bufferSize);
+                       assert (k <= bufferSize);
+
+                       ClustreamKernel[] centers = new ClustreamKernel[k];
+                       for ( int i = 0; i < k; i++ ) {
+                               centers[i] = buffer.get( i ); // TODO: make 
random!
+                       }
+                       Clustering kmeans_clustering = kMeans(k, centers, 
buffer);
+//                     Clustering kmeans_clustering = kMeans(k, buffer);
+
+                       for ( int i = 0; i < kmeans_clustering.size(); i++ ) {
+                               kernels[i] = new ClustreamKernel( new 
DenseInstance(1.0,centers[i].getCenter()), dim, timestamp, t, m );
+                       }
+
+                       buffer.clear();
+                       initialized = true;
+                       return;
+               }
+
+
+               // 1. Determine closest kernel
+               ClustreamKernel closestKernel = null;
+               double minDistance = Double.MAX_VALUE;
+               for (ClustreamKernel kernel : kernels) {
+                       //System.out.println(i+" "+kernels[i].getWeight()+" 
"+kernels[i].getDeviation());
+                       double distance = distance(instance.toDoubleArray(), 
kernel.getCenter());
+                       if (distance < minDistance) {
+                               closestKernel = kernel;
+                               minDistance = distance;
+                       }
+               }
+
+               // 2. Check whether instance fits into closestKernel
+               double radius;
+               if (closestKernel != null && closestKernel.getWeight() == 1) {
+                       // Special case: estimate radius by determining the 
distance to the
+                       // next closest cluster
+                       radius = Double.MAX_VALUE;
+                       double[] center = closestKernel.getCenter();
+                       for (ClustreamKernel kernel : kernels) {
+                               if (kernel == closestKernel) {
+                                       continue;
+                               }
+
+                               double distance = distance(kernel.getCenter(), 
center);
+                               radius = Math.min(distance, radius);
+                       }
+               } else {
+                       radius = closestKernel.getRadius();
+               }
+
+               if ( minDistance < radius ) {
+                       // Date fits, put into kernel and be happy
+                       closestKernel.insert( instance, timestamp );
+                       return;
+               }
+
+               // 3. Date does not fit, we need to free
+               // some space to insert a new kernel
+               long threshold = timestamp - timeWindow; // Kernels before this 
can be forgotten
+
+               // 3.1 Try to forget old kernels
+               for ( int i = 0; i < kernels.length; i++ ) {
+                       if ( kernels[i].getRelevanceStamp() < threshold ) {
+                               kernels[i] = new ClustreamKernel( instance, 
dim, timestamp, t, m );
+                               return;
+                       }
+               }
+
+               // 3.2 Merge closest two kernels
+               int closestA = 0;
+               int closestB = 0;
+               minDistance = Double.MAX_VALUE;
+               for ( int i = 0; i < kernels.length; i++ ) {
+                       double[] centerA = kernels[i].getCenter();
+                       for ( int j = i + 1; j < kernels.length; j++ ) {
+                               double dist = distance( centerA, 
kernels[j].getCenter() );
+                               if ( dist < minDistance ) {
+                                       minDistance = dist;
+                                       closestA = i;
+                                       closestB = j;
+                               }
+                       }
+               }
+               assert (closestA != closestB);
+
+               kernels[closestA].add( kernels[closestB] );
+               kernels[closestB] = new ClustreamKernel( instance, dim, 
timestamp, t,  m );
+       }
+
+       @Override
+       public Clustering getMicroClusteringResult() {
+               if ( !initialized ) {
+                       return new Clustering( new Cluster[0] );
+               }
+
+               ClustreamKernel[] res = new ClustreamKernel[kernels.length];
+               for ( int i = 0; i < res.length; i++ ) {
+                       res[i] = new ClustreamKernel( kernels[i], t, m );
+               }
+
+               return new Clustering( res );
+       }
+
+       @Override
+       public boolean implementsMicroClusterer() {
+               return true;
+       }
+
+       @Override
+       public Clustering getClusteringResult() {
+               return null;
+       }
+
+       public String getName() {
+               return "Clustream " + timeWindow;
+       }
+
+       private static double distance(double[] pointA, double [] pointB){
+               double distance = 0.0;
+               for (int i = 0; i < pointA.length; i++) {
+                       double d = pointA[i] - pointB[i];
+                       distance += d * d;
+               }
+               return Math.sqrt(distance);
+       }
+
+       //wrapper... we need to rewrite kmeans to points, not clusters, doesnt 
make sense anymore
+       //    public static Clustering kMeans( int k, ArrayList<Instance> 
points, int dim ) {
+       //        ArrayList<ClustreamKernel> cl = new 
ArrayList<ClustreamKernel>();
+       //        for(Instance inst : points){
+       //            cl.add(new ClustreamKernel(inst, dim , 0, 0, 0));
+       //        }
+       //        Clustering clustering = kMeans(k, cl);
+       //        return clustering;
+       //    }
+
+       public static Clustering kMeans( int k, List<? extends Cluster> data ) {
+               Random random = new Random(0);
+               Cluster[] centers = new Cluster[k];
+               for (int i = 0; i < centers.length; i++) {
+                       int rid = random.nextInt(k);
+                       centers[i] = new 
SphereCluster(data.get(rid).getCenter(),0);
+               }
+               return kMeans(k, centers, data);
+       }
+
+
+
+
+
+       public static Clustering kMeans( int k, Cluster[] centers, List<? 
extends Cluster> data ) {
+               assert (centers.length == k);
+               assert (k > 0);
+
+               int dimensions = centers[0].getCenter().length;
+
+               ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>();
+               for ( int i = 0; i < k; i++ ) {
+                       clustering.add( new ArrayList<Cluster>() );
+               }
+
+               int repetitions = 100;
+               while ( repetitions-- >= 0 ) {
+                       // Assign points to clusters
+                       for ( Cluster point : data ) {
+                               double minDistance = distance( 
point.getCenter(), centers[0].getCenter() );
+                               int closestCluster = 0;
+                               for ( int i = 1; i < k; i++ ) {
+                                       double distance = distance( 
point.getCenter(), centers[i].getCenter() );
+                                       if ( distance < minDistance ) {
+                                               closestCluster = i;
+                                               minDistance = distance;
+                                       }
+                               }
+
+                               clustering.get( closestCluster ).add( point );
+                       }
+
+                       // Calculate new centers and clear clustering lists
+                       SphereCluster[] newCenters = new 
SphereCluster[centers.length];
+                       for ( int i = 0; i < k; i++ ) {
+                               newCenters[i] = calculateCenter( 
clustering.get( i ), dimensions );
+                               clustering.get( i ).clear();
+                       }
+                       centers = newCenters;
+               }
+
+               return new Clustering( centers );
+       }
+
+       private static SphereCluster calculateCenter( ArrayList<Cluster> 
cluster, int dimensions ) {
+               double[] res = new double[dimensions];
+               for ( int i = 0; i < res.length; i++ ) {
+                       res[i] = 0.0;
+               }
+
+               if ( cluster.size() == 0 ) {
+                       return new SphereCluster( res, 0.0 );
+               }
+
+               for ( Cluster point : cluster ) {
+                       double [] center = point.getCenter();
+                       for (int i = 0; i < res.length; i++) {
+                               res[i] += center[i];
+                       }
+               }
+
+               // Normalize
+               for ( int i = 0; i < res.length; i++ ) {
+                       res[i] /= cluster.size();
+               }
+
+               // Calculate radius
+               double radius = 0.0;
+               for ( Cluster point : cluster ) {
+                       double dist = distance( res, point.getCenter() );
+                       if ( dist > radius ) {
+                               radius = dist;
+                       }
+               }
+               SphereCluster sc = new SphereCluster( res, radius );
+               sc.setWeight(cluster.size());
+               return sc;
+       }
+
+       @Override
+       protected Measurement[] getModelMeasurementsImpl() {
+               throw new UnsupportedOperationException("Not supported yet.");
+       }
+
+       @Override
+       public void getModelDescription(StringBuilder out, int indent) {
+               throw new UnsupportedOperationException("Not supported yet.");
+       }
+
+       public boolean isRandomizable() {
+               return false;
+       }
+
+       public double[] getVotesForInstance(Instance inst) {
+               throw new UnsupportedOperationException("Not supported yet.");
+       }
+
+
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java
new file mode 100644
index 0000000..8c8f6a0
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/ClustreamKernel.java
@@ -0,0 +1,269 @@
+package com.yahoo.labs.samoa.moa.clusterers.clustream;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2010 RWTH Aachen University, Germany
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+import java.util.List;
+
+import com.yahoo.labs.samoa.instances.Instance;
+import com.yahoo.labs.samoa.moa.cluster.CFCluster;
+
+public class ClustreamKernel extends CFCluster {
+       private static final long serialVersionUID = 1L;
+
+       private final static double EPSILON = 0.00005;
+    public static final double MIN_VARIANCE = 1e-50;
+
+    protected double LST;
+    protected double SST;
+
+    int m;
+    double t;
+
+
+    public ClustreamKernel( Instance instance, int dimensions, long timestamp 
, double t, int m) {
+        super(instance, dimensions);
+        this.t = t;
+        this.m = m;
+        this.LST = timestamp;
+               this.SST = timestamp*timestamp;
+    }
+
+    public ClustreamKernel( ClustreamKernel cluster, double t, int m ) {
+        super(cluster);
+        this.t = t;
+        this.m = m;
+        this.LST = cluster.LST;
+        this.SST = cluster.SST;
+    }
+
+    public void insert( Instance instance, long timestamp ) {
+               N++;
+               LST += timestamp;
+               SST += timestamp*timestamp;
+       
+               for ( int i = 0; i < instance.numValues(); i++ ) {
+                   LS[i] += instance.value(i);
+                   SS[i] += instance.value(i)*instance.value(i);
+               }
+    }
+
+    @Override
+    public void add( CFCluster other2 ) {
+        ClustreamKernel other = (ClustreamKernel) other2;
+               assert( other.LS.length == this.LS.length );
+               this.N += other.N;
+               this.LST += other.LST;
+               this.SST += other.SST;
+       
+               for ( int i = 0; i < LS.length; i++ ) {
+                   this.LS[i] += other.LS[i];
+                   this.SS[i] += other.SS[i];
+               }
+    }
+
+    public double getRelevanceStamp() {
+               if ( N < 2*m )
+                   return getMuTime();
+       
+               return getMuTime() + getSigmaTime() * getQuantile( 
((double)m)/(2*N) );
+    }
+
+    private double getMuTime() {
+       return LST / N;
+    }
+
+    private double getSigmaTime() {
+       return Math.sqrt(SST/N - (LST/N)*(LST/N));
+    }
+
+    private double getQuantile( double z ) {
+               assert( z >= 0 && z <= 1 );
+               return Math.sqrt( 2 ) * inverseError( 2*z - 1 );
+    }
+
+    @Override
+    public double getRadius() {
+        //trivial cluster
+        if(N == 1) return 0;
+        if(t==1)
+            t=1;
+
+        return getDeviation()*radiusFactor;
+    }
+
+    @Override
+    public CFCluster getCF(){
+        return this;
+    }
+
+
+    private double getDeviation(){
+        double[] variance = getVarianceVector();
+        double sumOfDeviation = 0.0;
+        for (double aVariance : variance) {
+            double d = Math.sqrt(aVariance);
+            sumOfDeviation += d;
+        }
+        return sumOfDeviation / variance.length;
+    }
+
+    /**
+     * @return this kernels' center
+     */
+    @Override
+    public double[] getCenter() {
+        assert (!this.isEmpty());
+        double res[] = new double[this.LS.length];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = this.LS[i] / N;
+        }
+        return res;
+    }
+
+    /**
+     * See interface <code>Cluster</code>
+     * @param instance
+     * @return double value
+     */
+    @Override
+    public double getInclusionProbability(Instance instance) {
+        //trivial cluster
+        if(N == 1){
+            double distance = 0.0;
+            for (int i = 0; i < LS.length; i++) {
+                double d = LS[i] - instance.value(i);
+                distance += d * d;
+            }
+            distance = Math.sqrt(distance);
+            if( distance < EPSILON )
+                return 1.0;
+            return 0.0;
+        }
+        else{
+            double dist = calcNormalizedDistance(instance.toDoubleArray());
+            if(dist <= getRadius()){
+                return 1;
+            }
+            else{
+                return 0;
+            }
+//            double res = AuxiliaryFunctions.distanceProbabilty(dist, 
LS.length);
+//            return res;
+        }
+    }
+
+    private double[] getVarianceVector() {
+        double[] res = new double[this.LS.length];
+        for (int i = 0; i < this.LS.length; i++) {
+            double ls = this.LS[i];
+            double ss = this.SS[i];
+
+            double lsDivN = ls / this.getWeight();
+            double lsDivNSquared = lsDivN * lsDivN;
+            double ssDivN = ss / this.getWeight();
+            res[i] = ssDivN - lsDivNSquared;
+
+            // Due to numerical errors, small negative values can occur.
+            // We correct this by settings them to almost zero.
+            if (res[i] <= 0.0) {
+                if (res[i] > -EPSILON) {
+                    res[i] = MIN_VARIANCE;
+                }
+            }
+        }
+        return res;
+    }
+
+    /**
+     * Check if this cluster is empty or not.
+     * @return <code>true</code> if the cluster has no data points,
+     * <code>false</code> otherwise.
+     */
+    public boolean isEmpty() {
+        return this.N == 0;
+    }
+
+    /**
+     * Calculate the normalized euclidean distance (Mahalanobis distance for
+     * distribution w/o covariances) to a point.
+     * @param point The point to which the distance is calculated.
+     * @return The normalized distance to the cluster center.
+     *
+     * TODO: check whether WEIGHTING is correctly applied to variances
+     */
+    //???????
+    private double calcNormalizedDistance(double[] point) {
+        double[] center = getCenter();
+        double res = 0.0;
+
+        for (int i = 0; i < center.length; i++) {
+            double diff = center[i] - point[i];
+            res += (diff * diff);// variance[i];
+        }
+        return Math.sqrt(res);
+    }
+
+        /**
+     * Approximates the inverse error function. Clustream needs this.
+     * @param x
+     */
+    public static double inverseError(double x) {
+        double z = Math.sqrt(Math.PI) * x;
+        double res = (z) / 2;
+
+        double z2 = z * z;
+        double zProd = z * z2; // z^3
+        res += (1.0 / 24) * zProd;
+
+        zProd *= z2;  // z^5
+        res += (7.0 / 960) * zProd;
+
+        zProd *= z2;  // z^7
+        res += (127 * zProd) / 80640;
+
+        zProd *= z2;  // z^9
+        res += (4369 * zProd) / 11612160;
+
+        zProd *= z2;  // z^11
+        res += (34807 * zProd) / 364953600;
+
+        zProd *= z2;  // z^13
+        res += (20036983 * zProd) / 797058662400d;
+
+        return res;
+    }
+
+    @Override
+    protected void getClusterSpecificInfo(List<String> infoTitle, List<String> 
infoValue) {
+        super.getClusterSpecificInfo(infoTitle, infoValue);
+        infoTitle.add("Deviation");
+
+        double[] variance = getVarianceVector();
+        double sumOfDeviation = 0.0;
+        for (double aVariance : variance) {
+            double d = Math.sqrt(aVariance);
+            sumOfDeviation += d;
+        }
+
+        sumOfDeviation/= variance.length;
+        
+        infoValue.add(Double.toString(sumOfDeviation));
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java
new file mode 100644
index 0000000..9e40fc1
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/clustream/WithKmeans.java
@@ -0,0 +1,468 @@
+
+package com.yahoo.labs.samoa.moa.clusterers.clustream;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2013 RWTH Aachen University, Germany
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
+import com.yahoo.labs.samoa.moa.cluster.CFCluster;
+import com.yahoo.labs.samoa.moa.cluster.Cluster;
+import com.yahoo.labs.samoa.moa.cluster.Clustering;
+import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
+import com.yahoo.labs.samoa.moa.clusterers.AbstractClusterer;
+import com.yahoo.labs.samoa.moa.core.Measurement;
+import com.github.javacliparser.IntOption;
+import com.yahoo.labs.samoa.instances.DenseInstance;
+import com.yahoo.labs.samoa.instances.Instance;
+
+public class WithKmeans extends AbstractClusterer {
+       
+       private static final long serialVersionUID = 1L;
+
+       public IntOption timeWindowOption = new IntOption("horizon",
+                       'h', "Rang of the window.", 1000);
+
+       public IntOption maxNumKernelsOption = new IntOption(
+                       "maxNumKernels", 'm',
+                       "Maximum number of micro kernels to use.", 100);
+
+       public IntOption kernelRadiFactorOption = new IntOption(
+                       "kernelRadiFactor", 't',
+                       "Multiplier for the kernel radius", 2);
+       
+       public IntOption kOption = new IntOption(
+                       "k", 'k',
+                       "k of macro k-means (number of clusters)", 5);
+
+       private int timeWindow;
+       private long timestamp = -1;
+       private ClustreamKernel[] kernels;
+       private boolean initialized;
+       private List<ClustreamKernel> buffer; // Buffer for initialization with 
kNN
+       private int bufferSize;
+       private double t;
+       private int m;
+       
+       public WithKmeans() {
+       
+       }
+
+       @Override
+       public void resetLearningImpl() {
+               this.kernels = new 
ClustreamKernel[maxNumKernelsOption.getValue()];
+               this.timeWindow = timeWindowOption.getValue();
+               this.initialized = false;
+               this.buffer = new LinkedList<ClustreamKernel>();
+               this.bufferSize = maxNumKernelsOption.getValue();
+               t = kernelRadiFactorOption.getValue();
+               m = maxNumKernelsOption.getValue();
+       }
+
+       @Override
+       public void trainOnInstanceImpl(Instance instance) {
+               int dim = instance.numValues();
+               timestamp++;
+               // 0. Initialize
+               if (!initialized) {
+                       if (buffer.size() < bufferSize) {
+                               buffer.add(new ClustreamKernel(instance, dim, 
timestamp, t, m));
+                               return;
+                       } else {
+                               for (int i = 0; i < buffer.size(); i++) {
+                                       kernels[i] = new ClustreamKernel(new 
DenseInstance(1.0, buffer.get(i).getCenter()), dim, timestamp, t, m);
+                               }
+       
+                               buffer.clear();
+                               initialized = true;
+                               return;
+                       }
+               }
+
+
+               // 1. Determine closest kernel
+               ClustreamKernel closestKernel = null;
+               double minDistance = Double.MAX_VALUE;
+               for ( int i = 0; i < kernels.length; i++ ) {
+                       //System.out.println(i+" "+kernels[i].getWeight()+" 
"+kernels[i].getDeviation());
+                       double distance = distance(instance.toDoubleArray(), 
kernels[i].getCenter());
+                       if (distance < minDistance) {
+                               closestKernel = kernels[i];
+                               minDistance = distance;
+                       }
+               }
+
+               // 2. Check whether instance fits into closestKernel
+               double radius = 0.0;
+               if ( closestKernel.getWeight() == 1 ) {
+                       // Special case: estimate radius by determining the 
distance to the
+                       // next closest cluster
+                       radius = Double.MAX_VALUE;
+                       double[] center = closestKernel.getCenter();
+                       for ( int i = 0; i < kernels.length; i++ ) {
+                               if ( kernels[i] == closestKernel ) {
+                                       continue;
+                               }
+
+                               double distance = 
distance(kernels[i].getCenter(), center );
+                               radius = Math.min( distance, radius );
+                       }
+               } else {
+                       radius = closestKernel.getRadius();
+               }
+
+               if ( minDistance < radius ) {
+                       // Date fits, put into kernel and be happy
+                       closestKernel.insert( instance, timestamp );
+                       return;
+               }
+
+               // 3. Date does not fit, we need to free
+               // some space to insert a new kernel
+               long threshold = timestamp - timeWindow; // Kernels before this 
can be forgotten
+
+               // 3.1 Try to forget old kernels
+               for ( int i = 0; i < kernels.length; i++ ) {
+                       if ( kernels[i].getRelevanceStamp() < threshold ) {
+                               kernels[i] = new ClustreamKernel( instance, 
dim, timestamp, t, m );
+                               return;
+                       }
+               }
+
+               // 3.2 Merge closest two kernels
+               int closestA = 0;
+               int closestB = 0;
+               minDistance = Double.MAX_VALUE;
+               for ( int i = 0; i < kernels.length; i++ ) {
+                       double[] centerA = kernels[i].getCenter();
+                       for ( int j = i + 1; j < kernels.length; j++ ) {
+                               double dist = distance( centerA, 
kernels[j].getCenter() );
+                               if ( dist < minDistance ) {
+                                       minDistance = dist;
+                                       closestA = i;
+                                       closestB = j;
+                               }
+                       }
+               }
+               assert (closestA != closestB);
+
+               kernels[closestA].add( kernels[closestB] );
+               kernels[closestB] = new ClustreamKernel( instance, dim, 
timestamp, t,  m );
+       }
+       
+       @Override
+       public Clustering getMicroClusteringResult() {
+               if (!initialized) {
+                       return new Clustering(new Cluster[0]);
+               }
+
+               ClustreamKernel[] result = new ClustreamKernel[kernels.length];
+               for (int i = 0; i < result.length; i++) {
+                       result[i] = new ClustreamKernel(kernels[i], t, m);
+               }
+
+               return new Clustering(result);
+       }
+       
+       @Override
+       public Clustering getClusteringResult() {
+               return kMeans_rand(kOption.getValue(), 
getMicroClusteringResult());
+       }
+       
+       public Clustering getClusteringResult(Clustering gtClustering) {
+               return kMeans_gta(kOption.getValue(), 
getMicroClusteringResult(), gtClustering);
+       }
+
+       public String getName() {
+               return "CluStreamWithKMeans " + timeWindow;
+       }
+
+       /**
+        * Distance between two vectors.
+        * 
+        * @param pointA
+        * @param pointB
+        * @return dist
+        */
+       private static double distance(double[] pointA, double [] pointB) {
+               double distance = 0.0;
+               for (int i = 0; i < pointA.length; i++) {
+                       double d = pointA[i] - pointB[i];
+                       distance += d * d;
+               }
+               return Math.sqrt(distance);
+       }
+
+       /**
+        * k-means of (micro)clusters, with ground-truth-aided initialization.
+        * (to produce best results) 
+        * 
+        * @param k
+        * @param data
+        * @return (macro)clustering - CFClusters
+        */
+       public static Clustering kMeans_gta(int k, Clustering clustering, 
Clustering gtClustering) {
+               
+               ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>();
+        for (int i = 0; i < clustering.size(); i++) {
+            if (clustering.get(i) instanceof CFCluster) {
+                microclusters.add((CFCluster)clustering.get(i));
+            } else {
+                System.out.println("Unsupported Cluster Type:" + 
clustering.get(i).getClass() + ". Cluster needs to extend 
moa.cluster.CFCluster");
+            }
+        }
+        
+        int n = microclusters.size();
+               assert (k <= n);
+               
+               /* k-means */
+               Random random = new Random(0);
+               Cluster[] centers = new Cluster[k];
+               int K = gtClustering.size();
+               
+               for (int i = 0; i < k; i++) {
+                       if (i < K) {    // GT-aided
+                               centers[i] = new 
SphereCluster(gtClustering.get(i).getCenter(), 0);
+                       } else {                // Randomized
+                               int rid = random.nextInt(n);
+                               centers[i] = new 
SphereCluster(microclusters.get(rid).getCenter(), 0);
+                       }
+               }
+               
+               return cleanUpKMeans(kMeans(k, centers, microclusters), 
microclusters);
+       }
+       
+       /**
+        * k-means of (micro)clusters, with randomized initialization. 
+        * 
+        * @param k
+        * @param data
+        * @return (macro)clustering - CFClusters
+        */
+       public static Clustering kMeans_rand(int k, Clustering clustering) {
+               
+               ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>();
+        for (int i = 0; i < clustering.size(); i++) {
+            if (clustering.get(i) instanceof CFCluster) {
+                microclusters.add((CFCluster)clustering.get(i));
+            } else {
+                System.out.println("Unsupported Cluster Type:" + 
clustering.get(i).getClass() + ". Cluster needs to extend 
moa.cluster.CFCluster");
+            }
+        }
+        
+        int n = microclusters.size();
+               assert (k <= n);
+               
+               /* k-means */
+               Random random = new Random(0);
+               Cluster[] centers = new Cluster[k];
+               
+               for (int i = 0; i < k; i++) {
+                       int rid = random.nextInt(n);
+                       centers[i] = new 
SphereCluster(microclusters.get(rid).getCenter(), 0);
+               }
+               
+               return cleanUpKMeans(kMeans(k, centers, microclusters), 
microclusters);
+       }
+       
+       /**
+        * (The Actual Algorithm) k-means of (micro)clusters, with specified 
initialization points.
+        * 
+        * @param k
+        * @param centers - initial centers
+        * @param data
+        * @return (macro)clustering - SphereClusters
+        */
+       protected static Clustering kMeans(int k, Cluster[] centers, List<? 
extends Cluster> data) {
+               assert (centers.length == k);
+               assert (k > 0);
+
+               int dimensions = centers[0].getCenter().length;
+
+               ArrayList<ArrayList<Cluster>> clustering = new 
ArrayList<ArrayList<Cluster>>();
+               for (int i = 0; i < k; i++) {
+                       clustering.add(new ArrayList<Cluster>());
+               }
+
+               while (true) {
+                       // Assign points to clusters
+                       for (Cluster point : data) {
+                               double minDistance = 
distance(point.getCenter(), centers[0].getCenter());
+                               int closestCluster = 0;
+                               for (int i = 1; i < k; i++) {
+                                       double distance = 
distance(point.getCenter(), centers[i].getCenter());
+                                       if (distance < minDistance) {
+                                               closestCluster = i;
+                                               minDistance = distance;
+                                       }
+                               }
+
+                               clustering.get(closestCluster).add(point);
+                       }
+
+                       // Calculate new centers and clear clustering lists
+                       SphereCluster[] newCenters = new 
SphereCluster[centers.length];
+                       for (int i = 0; i < k; i++) {
+                               newCenters[i] = 
calculateCenter(clustering.get(i), dimensions);
+                               clustering.get(i).clear();
+                       }
+                       
+                       // Convergence check
+                       boolean converged = true;
+                       for (int i = 0; i < k; i++) {
+                               if (!Arrays.equals(centers[i].getCenter(), 
newCenters[i].getCenter())) {
+                                       converged = false;
+                                       break;
+                               }
+                       }
+                       
+                       if (converged) {
+                               break;
+                       } else {
+                               centers = newCenters;
+                       }
+               }
+
+               return new Clustering(centers);
+       }
+       
+       /**
+        * Rearrange the k-means result into a set of CFClusters, cleaning up 
the redundancies.
+        * 
+        * @param kMeansResult
+        * @param microclusters
+        * @return
+        */
+       protected static Clustering cleanUpKMeans(Clustering kMeansResult, 
ArrayList<CFCluster> microclusters) {
+               /* Convert k-means result to CFClusters */
+               int k = kMeansResult.size();
+               CFCluster[] converted = new CFCluster[k];
+
+               for (CFCluster mc : microclusters) {
+                   // Find closest kMeans cluster
+                   double minDistance = Double.MAX_VALUE;
+                   int closestCluster = 0;
+                   for (int i = 0; i < k; i++) {
+                       double distance = 
distance(kMeansResult.get(i).getCenter(), mc.getCenter());
+                               if (distance < minDistance) {
+                                   closestCluster = i;
+                                   minDistance = distance;
+                               }
+                   }
+
+                   // Add to cluster
+                   if ( converted[closestCluster] == null ) {
+                       converted[closestCluster] = (CFCluster)mc.copy();
+                   } else {
+                       converted[closestCluster].add(mc);
+                   }
+               }
+
+               // Clean up
+               int count = 0;
+               for (int i = 0; i < converted.length; i++) {
+                   if (converted[i] != null)
+                       count++;
+               }
+
+               CFCluster[] cleaned = new CFCluster[count];
+               count = 0;
+               for (int i = 0; i < converted.length; i++) {
+                   if (converted[i] != null)
+                       cleaned[count++] = converted[i];
+               }
+
+               return new Clustering(cleaned);
+       }
+
+       
+
+       /**
+        * k-means helper: Calculate a wrapping cluster of assigned 
points[microclusters].
+        * 
+        * @param assigned
+        * @param dimensions
+        * @return SphereCluster (with center and radius)
+        */
+       private static SphereCluster calculateCenter(ArrayList<Cluster> 
assigned, int dimensions) {
+               double[] result = new double[dimensions];
+               for (int i = 0; i < result.length; i++) {
+                       result[i] = 0.0;
+               }
+
+               if (assigned.size() == 0) {
+                       return new SphereCluster(result, 0.0);
+               }
+
+               for (Cluster point : assigned) {
+                       double[] center = point.getCenter();
+                       for (int i = 0; i < result.length; i++) {
+                               result[i] += center[i];
+                       }
+               }
+
+               // Normalize
+               for (int i = 0; i < result.length; i++) {
+                       result[i] /= assigned.size();
+               }
+
+               // Calculate radius: biggest wrapping distance from center
+               double radius = 0.0;
+               for (Cluster point : assigned) {
+                       double dist = distance(result, point.getCenter());
+                       if (dist > radius) {
+                               radius = dist;
+                       }
+               }
+               SphereCluster sc = new SphereCluster(result, radius);
+               sc.setWeight(assigned.size());
+               return sc;
+       }
+
+       
+       /** Miscellaneous **/
+       
+       @Override
+       public boolean implementsMicroClusterer() {
+               return true;
+       }
+       
+       public boolean isRandomizable() {
+               return false;
+       }
+       
+       public double[] getVotesForInstance(Instance inst) {
+               throw new UnsupportedOperationException("Not supported yet.");
+       }
+       
+       @Override
+       protected Measurement[] getModelMeasurementsImpl() {
+               throw new UnsupportedOperationException("Not supported yet.");
+       }
+       
+       @Override
+       public void getModelDescription(StringBuilder out, int indent) {
+               throw new UnsupportedOperationException("Not supported yet.");
+       }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java
new file mode 100644
index 0000000..f052832
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoClassDiscovery.java
@@ -0,0 +1,211 @@
+package com.yahoo.labs.samoa.moa.core;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+/**
+ * Class for discovering classes via reflection in the java class path.
+ *
+ * @author Richard Kirkby ([email protected])
+ * @version $Revision: 7 $
+ */
+public class AutoClassDiscovery {
+
+    protected static final Map<String, String[]> cachedClassNames = new 
HashMap<String, String[]>();
+
+    public static String[] findClassNames(String packageNameToSearch) {
+        String[] cached = cachedClassNames.get(packageNameToSearch);
+        if (cached == null) {
+            HashSet<String> classNames = new HashSet<String>();
+            /*StringTokenizer pathTokens = new StringTokenizer(System
+            .getProperty("java.class.path"), File.pathSeparator);*/
+            String packageDirName = packageNameToSearch.replace('.',
+                    File.separatorChar);
+            String packageJarName = packageNameToSearch.length() > 0 ? 
(packageNameToSearch.replace('.', '/') + "/")
+                    : "";
+            String part = "";
+
+
+            AutoClassDiscovery adc = new AutoClassDiscovery();
+            URLClassLoader sysLoader = (URLClassLoader) 
adc.getClass().getClassLoader();
+            URL[] cl_urls = sysLoader.getURLs();
+
+            for (int i = 0; i < cl_urls.length; i++) {
+                part = cl_urls[i].toString();
+                if (part.startsWith("file:")) {
+                    part = part.replace(" ", "%20");
+                    try {
+                        File temp = new File(new java.net.URI(part));
+                        part = temp.getAbsolutePath();
+                    } catch (URISyntaxException e) {
+                        e.printStackTrace();
+                    }
+                }
+
+                // find classes
+                ArrayList<File> files = new ArrayList<File>();
+                File dir = new File(part);
+                if (dir.isDirectory()) {
+                    File root = new File(dir.toString() + File.separatorChar + 
packageDirName);
+                    String[] names = findClassesInDirectoryRecursive(root, "");
+                    classNames.addAll(Arrays.asList(names));
+                } else {
+                    try {
+                        JarFile jar = new JarFile(part);
+                        Enumeration<JarEntry> jarEntries = jar.entries();
+                        while (jarEntries.hasMoreElements()) {
+                            String jarEntry = 
jarEntries.nextElement().getName();
+                            if (jarEntry.startsWith(packageJarName)) {
+                                String relativeName = 
jarEntry.substring(packageJarName.length());
+                                if (relativeName.endsWith(".class")) {
+                                    relativeName = relativeName.replace('/',
+                                            '.');
+                                    classNames.add(relativeName.substring(0,
+                                            relativeName.length()
+                                            - ".class".length()));
+                                }
+                            }
+                        }
+                    } catch (IOException ignored) {
+                        // ignore unreadable files
+                    }
+                }
+            }
+
+            /*while (pathTokens.hasMoreElements()) {
+            String pathToSearch = pathTokens.nextElement().toString();
+            if (pathToSearch.endsWith(".jar")) {
+            try {
+            JarFile jar = new JarFile(pathToSearch);
+            Enumeration<JarEntry> jarEntries = jar.entries();
+            while (jarEntries.hasMoreElements()) {
+            String jarEntry = jarEntries.nextElement()
+            .getName();
+            if (jarEntry.startsWith(packageJarName)) {
+            String relativeName = jarEntry
+            .substring(packageJarName.length());
+            if (relativeName.endsWith(".class")) {
+            relativeName = relativeName.replace('/',
+            '.');
+            classNames.add(relativeName.substring(0,
+            relativeName.length()
+            - ".class".length()));
+            }
+            }
+            }
+            } catch (IOException ignored) {
+            // ignore unreadable files
+            }
+            } else {
+            File root = new File(pathToSearch + File.separatorChar
+            + packageDirName);
+            String[] names = findClassesInDirectoryRecursive(root, "");
+            for (String name : names) {
+            classNames.add(name);
+            }
+            }
+            } */
+            cached = classNames.toArray(new String[classNames.size()]);
+            Arrays.sort(cached);
+            cachedClassNames.put(packageNameToSearch, cached);
+        }
+        return cached;
+    }
+
+    protected static String[] findClassesInDirectoryRecursive(File root,
+            String packagePath) {
+        HashSet<String> classNames = new HashSet<String>();
+        if (root.isDirectory()) {
+            String[] list = root.list();
+            for (String string : list) {
+                if (string.endsWith(".class")) {
+                    classNames.add(packagePath
+                            + string.substring(0, string.length()
+                            - ".class".length()));
+                } else {
+                    File testDir = new File(root.getPath() + File.separatorChar
+                            + string);
+                    if (testDir.isDirectory()) {
+                        String[] names = findClassesInDirectoryRecursive(
+                                testDir, packagePath + string + ".");
+                        classNames.addAll(Arrays.asList(names));
+                    }
+                }
+            }
+        }
+        return classNames.toArray(new String[classNames.size()]);
+    }
+
+    public static Class[] findClassesOfType(String packageNameToSearch,
+            Class<?> typeDesired) {
+        ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>();
+        String[] classNames = findClassNames(packageNameToSearch);
+        for (String className : classNames) {
+            String fullName = packageNameToSearch.length() > 0 ? 
(packageNameToSearch
+                    + "." + className)
+                    : className;
+            if (isPublicConcreteClassOfType(fullName, typeDesired)) {
+                try {
+                    classesFound.add(Class.forName(fullName));
+                } catch (Exception ignored) {
+                    // ignore classes that we cannot instantiate
+                }
+            }
+        }
+        return classesFound.toArray(new Class[classesFound.size()]);
+    }
+
+    public static boolean isPublicConcreteClassOfType(String className,
+            Class<?> typeDesired) {
+        Class<?> testClass = null;
+        try {
+            testClass = Class.forName(className);
+        } catch (Exception e) {
+            return false;
+        }
+        int classModifiers = testClass.getModifiers();
+        return (java.lang.reflect.Modifier.isPublic(classModifiers)
+                && !java.lang.reflect.Modifier.isAbstract(classModifiers)
+                && typeDesired.isAssignableFrom(testClass) && 
hasEmptyConstructor(testClass));
+    }
+
+    public static boolean hasEmptyConstructor(Class<?> type) {
+        try {
+            type.getConstructor();
+            return true;
+        } catch (Exception ignored) {
+            return false;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java
new file mode 100644
index 0000000..2b8ed09
--- /dev/null
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/AutoExpandVector.java
@@ -0,0 +1,133 @@
+package com.yahoo.labs.samoa.moa.core;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import com.yahoo.labs.samoa.moa.AbstractMOAObject;
+import com.yahoo.labs.samoa.moa.MOAObject;
+
+/**
+ * Vector with the capability of automatic expansion.
+ *
+ * @author Richard Kirkby ([email protected])
+ * @version $Revision: 7 $
+ */
+public class AutoExpandVector<T> extends ArrayList<T> implements MOAObject {
+
+    private static final long serialVersionUID = 1L;
+
+    public AutoExpandVector() {
+        super(0);
+    }
+    
+    public AutoExpandVector(int size) {
+        super(size);
+    }
+
+    @Override
+    public void add(int pos, T obj) {
+        if (pos > size()) {
+            while (pos > size()) {
+                add(null);
+            }
+            trimToSize();
+        }
+        super.add(pos, obj);
+    }
+
+    @Override
+    public T get(int pos) {
+        return ((pos >= 0) && (pos < size())) ? super.get(pos) : null;
+    }
+
+    @Override
+    public T set(int pos, T obj) {
+        if (pos >= size()) {
+            add(pos, obj);
+            return null;
+        }
+        return super.set(pos, obj);
+    }
+
+    @Override
+    public boolean add(T arg0) {
+        boolean result = super.add(arg0);
+        trimToSize();
+        return result;
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends T> arg0) {
+        boolean result = super.addAll(arg0);
+        trimToSize();
+        return result;
+    }
+
+    @Override
+    public boolean addAll(int arg0, Collection<? extends T> arg1) {
+        boolean result = super.addAll(arg0, arg1);
+        trimToSize();
+        return result;
+    }
+
+    @Override
+    public void clear() {
+        super.clear();
+        trimToSize();
+    }
+
+    @Override
+    public T remove(int arg0) {
+        T result = super.remove(arg0);
+        trimToSize();
+        return result;
+    }
+
+    @Override
+    public boolean remove(Object arg0) {
+        boolean result = super.remove(arg0);
+        trimToSize();
+        return result;
+    }
+
+    @Override
+    protected void removeRange(int arg0, int arg1) {
+        super.removeRange(arg0, arg1);
+        trimToSize();
+    }
+
+    @Override
+    public MOAObject copy() {
+        return AbstractMOAObject.copy(this);
+    }
+
+    @Override
+    public int measureByteSize() {
+        return AbstractMOAObject.measureByteSize(this);
+    }
+
+    @Override
+    public void getDescription(StringBuilder sb, int indent) {
+        // TODO Auto-generated method stub
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java
new file mode 100644
index 0000000..21ad3df
--- /dev/null
+++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/DataPoint.java
@@ -0,0 +1,134 @@
+
+package com.yahoo.labs.samoa.moa.core;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ *    Copyright (C) 2013 RWTH Aachen University, Germany
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.TreeSet;
+
+import com.yahoo.labs.samoa.instances.Attribute;
+import com.yahoo.labs.samoa.instances.DenseInstance;
+import com.yahoo.labs.samoa.instances.Instance;
+
+public class DataPoint extends DenseInstance{
+    
+       private static final long serialVersionUID = 1L;
+       
+       protected int timestamp;
+    private HashMap<String, String> measure_values;
+    
+    protected int noiseLabel;
+
+    public DataPoint(Instance nextInstance, Integer timestamp) {
+        super(nextInstance);
+        this.setDataset(nextInstance.dataset());
+        this.timestamp = timestamp;
+        measure_values = new HashMap<String, String>();
+        
+        Attribute classLabel = dataset().classAttribute();
+        noiseLabel = classLabel.indexOfValue("noise");         // -1 returned 
if there is no noise
+    }
+
+    public void updateWeight(int cur_timestamp, double decay_rate){
+        setWeight(Math.pow(2,(-1.0)*decay_rate*(cur_timestamp-timestamp)));
+    }
+
+    public void setMeasureValue(String measureKey, double value){
+        synchronized(measure_values){
+            measure_values.put(measureKey, Double.toString(value));
+        }
+    }
+
+    public void setMeasureValue(String measureKey,String value){
+        synchronized(measure_values){
+            measure_values.put(measureKey, value);
+        }
+    }
+
+    public String getMeasureValue(String measureKey){
+        if(measure_values.containsKey(measureKey))
+            synchronized(measure_values){
+                return measure_values.get(measureKey);
+            }
+        else
+            return "";
+    }
+
+    public int getTimestamp(){
+        return timestamp;
+    }
+    
+    public String getInfo(int x_dim, int y_dim) {
+        StringBuffer sb = new StringBuffer();
+        sb.append("<html><table>");
+        sb.append("<tr><td>Point</td><td>"+timestamp+"</td></tr>");
+        for (int i = 0; i < numAttributes() - 1; i++) { //m_AttValues.length
+            String label = "Dim "+i;
+            if(i == x_dim)
+                 label = "<b>X</b>";
+            if(i == y_dim)
+                 label = "<b>Y</b>";
+            sb.append("<tr><td>"+label+"</td><td>"+value(i)+"</td></tr>");
+        }
+        sb.append("<tr><td>Decay</td><td>"+weight()+"</td></tr>");
+        sb.append("<tr><td>True cluster</td><td>"+classValue()+"</td></tr>");
+        sb.append("</table>");
+        sb.append("<br>");
+        sb.append("<b>Evaluation</b><br>");
+        sb.append("<table>");
+
+        TreeSet<String> sortedset;
+        synchronized(measure_values){
+            sortedset = new TreeSet<String>(measure_values.keySet());
+        }
+
+        Iterator miterator = sortedset.iterator();
+         while(miterator.hasNext()) {
+             String key = (String)miterator.next();
+             
sb.append("<tr><td>"+key+"</td><td>"+measure_values.get(key)+"</td></tr>");
+         }
+
+        sb.append("</table></html>");
+        return sb.toString();
+    }
+
+    public double getDistance(DataPoint other){
+        double distance = 0.0;
+        int numDims = numAttributes();
+        if(classIndex()!=0) numDims--;
+
+        for (int i = 0; i < numDims; i++) {
+            double d = value(i) - other.value(i);
+            distance += d * d;
+        }
+        return Math.sqrt(distance);
+    }
+
+    
+    public boolean isNoise() {
+               return (int)classValue() == noiseLabel;
+       }
+    
+    public double getNoiseLabel() {
+       return noiseLabel;
+    }
+}

Reply via email to