http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java 
b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java
new file mode 100644
index 0000000..fdf2af6
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/cluster/SphereCluster.java
@@ -0,0 +1,364 @@
+package org.apache.samoa.moa.cluster;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.List;
+import java.util.Random;
+
+import org.apache.samoa.instances.DenseInstance;
+import org.apache.samoa.instances.Instance;
+
+/**
+ * A simple implementation of the <code>Cluster</code> interface representing 
spherical clusters. The inclusion
+ * probability is one inside the sphere and zero everywhere else.
+ * 
+ */
+public class SphereCluster extends Cluster {
+
+  private static final long serialVersionUID = 1L;
+
+  private double[] center;
+  private double radius;
+  private double weight;
+
+  public SphereCluster(double[] center, double radius) {
+    this(center, radius, 1.0);
+  }
+
+  public SphereCluster() {
+  }
+
+  public SphereCluster(double[] center, double radius, double weightedSize) {
+    this();
+    this.center = center;
+    this.radius = radius;
+    this.weight = weightedSize;
+  }
+
+  public SphereCluster(int dimensions, double radius, Random random) {
+    this();
+    this.center = new double[dimensions];
+    this.radius = radius;
+
+    // Position randomly but keep hypersphere inside the boundaries
+    double interval = 1.0 - 2 * radius;
+    for (int i = 0; i < center.length; i++) {
+      this.center[i] = (random.nextDouble() * interval) + radius;
+    }
+    this.weight = 0.0;
+  }
+
+  public SphereCluster(List<? extends Instance> instances, int dimension) {
+    this();
+    if (instances == null || instances.size() <= 0)
+      return;
+
+    weight = instances.size();
+
+    Miniball mb = new Miniball(dimension);
+    mb.clear();
+
+    for (Instance instance : instances) {
+      mb.check_in(instance.toDoubleArray());
+    }
+
+    mb.build();
+    center = mb.center();
+    radius = mb.radius();
+    mb.clear();
+  }
+
+  /**
+   * Checks whether two <code>SphereCluster</code> overlap based on radius 
NOTE: overlapRadiusDegree only calculates the
+   * overlap based on the centers and the radi, so not the real overlap
+   * 
+   * TODO: should we do this by MC to get the real overlap???
+   * 
+   * @param other
+   * @return
+   */
+
+  public double overlapRadiusDegree(SphereCluster other) {
+
+    double[] center0 = getCenter();
+    double radius0 = getRadius();
+
+    double[] center1 = other.getCenter();
+    double radius1 = other.getRadius();
+
+    double radiusBig;
+    double radiusSmall;
+    if (radius0 < radius1) {
+      radiusBig = radius1;
+      radiusSmall = radius0;
+    }
+    else {
+      radiusBig = radius0;
+      radiusSmall = radius1;
+    }
+
+    double dist = 0;
+    for (int i = 0; i < center0.length; i++) {
+      double delta = center0[i] - center1[i];
+      dist += delta * delta;
+    }
+    dist = Math.sqrt(dist);
+
+    if (dist > radiusSmall + radiusBig)
+      return 0;
+    if (dist + radiusSmall <= radiusBig) {
+      // one lies within the other
+      return 1;
+    }
+    else {
+      return (radiusSmall + radiusBig - dist) / (2 * radiusSmall);
+    }
+  }
+
+  public void combine(SphereCluster cluster) {
+    double[] center = getCenter();
+    double[] newcenter = new double[center.length];
+    double[] other_center = cluster.getCenter();
+    double other_weight = cluster.getWeight();
+    double other_radius = cluster.getRadius();
+
+    for (int i = 0; i < center.length; i++) {
+      newcenter[i] = (center[i] * getWeight() + other_center[i] * 
other_weight) / (getWeight() + other_weight);
+    }
+
+    center = newcenter;
+    double r_0 = getRadius() + Math.abs(distance(center, newcenter));
+    double r_1 = other_radius + Math.abs(distance(other_center, newcenter));
+    radius = Math.max(r_0, r_1);
+    weight += other_weight;
+  }
+
+  public void merge(SphereCluster cluster) {
+    double[] c0 = getCenter();
+    double w0 = getWeight();
+    double r0 = getRadius();
+
+    double[] c1 = cluster.getCenter();
+    double w1 = cluster.getWeight();
+    double r1 = cluster.getRadius();
+
+    // vector
+    double[] v = new double[c0.length];
+    // center distance
+    double d = 0;
+
+    for (int i = 0; i < c0.length; i++) {
+      v[i] = c0[i] - c1[i];
+      d += v[i] * v[i];
+    }
+    d = Math.sqrt(d);
+
+    double r;
+    double[] c = new double[c0.length];
+
+    // one lays within the others
+    if (d + r0 <= r1 || d + r1 <= r0) {
+      if (d + r0 <= r1) {
+        r = r1;
+        c = c1;
+      }
+      else {
+        r = r0;
+        c = c0;
+      }
+    }
+    else {
+      r = (r0 + r1 + d) / 2.0;
+      for (int i = 0; i < c.length; i++) {
+        c[i] = c1[i] - v[i] / d * (r1 - r);
+      }
+    }
+
+    setCenter(c);
+    setRadius(r);
+    setWeight(w0 + w1);
+
+  }
+
+  @Override
+  public double[] getCenter() {
+    double[] copy = new double[center.length];
+    System.arraycopy(center, 0, copy, 0, center.length);
+    return copy;
+  }
+
+  public void setCenter(double[] center) {
+    this.center = center;
+  }
+
+  public double getRadius() {
+    return radius;
+  }
+
+  public void setRadius(double radius) {
+    this.radius = radius;
+  }
+
+  @Override
+  public double getWeight() {
+    return weight;
+  }
+
+  public void setWeight(double weight) {
+    this.weight = weight;
+  }
+
+  @Override
+  public double getInclusionProbability(Instance instance) {
+    if (getCenterDistance(instance) <= getRadius()) {
+      return 1.0;
+    }
+    return 0.0;
+  }
+
+  public double getCenterDistance(Instance instance) {
+    double distance = 0.0;
+    // get the center through getCenter so subclass have a chance
+    double[] center = getCenter();
+    for (int i = 0; i < center.length; i++) {
+      double d = center[i] - instance.value(i);
+      distance += d * d;
+    }
+    return Math.sqrt(distance);
+  }
+
+  public double getCenterDistance(SphereCluster other) {
+    return distance(getCenter(), other.getCenter());
+  }
+
+  /*
+   * the minimal distance between the surface of two clusters. is negative if
+   * the two clusters overlap
+   */
+  public double getHullDistance(SphereCluster other) {
+    double distance;
+    // get the center through getCenter so subclass have a chance
+    double[] center0 = getCenter();
+    double[] center1 = other.getCenter();
+    distance = distance(center0, center1);
+
+    distance = distance - getRadius() - other.getRadius();
+    return distance;
+  }
+
+  /*
+   */
+  /**
+   * When a clusters looses points the new minimal bounding sphere can be 
partly outside of the originating cluster. If
+   * a another cluster is right next to the original cluster (without 
overlapping), the new cluster can be overlapping
+   * with this second cluster. OverlapSave will tell you if the current 
cluster can degenerate so much that it overlaps
+   * with cluster 'other'
+   * 
+   * @param other
+   *          the potentially overlapping cluster
+   * @return true if cluster can potentially overlap
+   */
+  public boolean overlapSave(SphereCluster other) {
+    // use basic geometry to figure out the maximal degenerated cluster
+    // comes down to Max(radius *(sin alpha + cos alpha)) which is
+    double minDist = Math.sqrt(2) * (getRadius() + other.getRadius());
+    double diff = getCenterDistance(other) - minDist;
+
+    return diff > 0;
+  }
+
+  private double distance(double[] v1, double[] v2) {
+    double distance = 0.0;
+    double[] center = getCenter();
+    for (int i = 0; i < center.length; i++) {
+      double d = v1[i] - v2[i];
+      distance += d * d;
+    }
+    return Math.sqrt(distance);
+  }
+
+  public double[] getDistanceVector(Instance instance) {
+    return distanceVector(getCenter(), instance.toDoubleArray());
+  }
+
+  public double[] getDistanceVector(SphereCluster other) {
+    return distanceVector(getCenter(), other.getCenter());
+  }
+
+  private double[] distanceVector(double[] v1, double[] v2) {
+    double[] v = new double[v1.length];
+    for (int i = 0; i < v1.length; i++) {
+      v[i] = v2[i] - v1[i];
+    }
+    return v;
+  }
+
+  /**
+   * Samples this cluster by returning a point from inside it.
+   * 
+   * @param random
+   *          a random number source
+   * @return a point that lies inside this cluster
+   */
+  public Instance sample(Random random) {
+    // Create sample in hypersphere coordinates
+    // get the center through getCenter so subclass have a chance
+    double[] center = getCenter();
+
+    final int dimensions = center.length;
+
+    final double sin[] = new double[dimensions - 1];
+    final double cos[] = new double[dimensions - 1];
+    final double length = random.nextDouble() * getRadius();
+
+    double lastValue = 1.0;
+    for (int i = 0; i < dimensions - 1; i++) {
+      double angle = random.nextDouble() * 2 * Math.PI;
+      sin[i] = lastValue * Math.sin(angle); // Store cumulative values
+      cos[i] = Math.cos(angle);
+      lastValue = sin[i];
+    }
+
+    // Calculate cartesian coordinates
+    double res[] = new double[dimensions];
+
+    // First value uses only cosines
+    res[0] = center[0] + length * cos[0];
+
+    // Loop through 'middle' coordinates which use cosines and sines
+    for (int i = 1; i < dimensions - 1; i++) {
+      res[i] = center[i] + length * sin[i - 1] * cos[i];
+    }
+
+    // Last value uses only sines
+    res[dimensions - 1] = center[dimensions - 1] + length * sin[dimensions - 
2];
+
+    return new DenseInstance(1.0, res);
+  }
+
+  @Override
+  protected void getClusterSpecificInfo(List<String> infoTitle, List<String> 
infoValue) {
+    super.getClusterSpecificInfo(infoTitle, infoValue);
+    infoTitle.add("Radius");
+    infoValue.add(Double.toString(getRadius()));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java
 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java
new file mode 100644
index 0000000..02a5191
--- /dev/null
+++ 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/AbstractClusterer.java
@@ -0,0 +1,299 @@
+package org.apache.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.instances.Instances;
+import org.apache.samoa.instances.InstancesHeader;
+import org.apache.samoa.moa.cluster.Clustering;
+import org.apache.samoa.moa.core.Measurement;
+import org.apache.samoa.moa.core.ObjectRepository;
+import org.apache.samoa.moa.core.StringUtils;
+import org.apache.samoa.moa.options.AbstractOptionHandler;
+import org.apache.samoa.moa.tasks.TaskMonitor;
+
+import com.github.javacliparser.FlagOption;
+import com.github.javacliparser.IntOption;
+
+public abstract class AbstractClusterer extends AbstractOptionHandler
+    implements Clusterer {
+
+  @Override
+  public String getPurposeString() {
+    return "MOA Clusterer: " + getClass().getCanonicalName();
+  }
+
+  protected InstancesHeader modelContext;
+
+  protected double trainingWeightSeenByModel = 0.0;
+
+  protected int randomSeed = 1;
+
+  protected IntOption randomSeedOption;
+
+  public FlagOption evaluateMicroClusteringOption;
+
+  protected Random clustererRandom;
+
+  protected Clustering clustering;
+
+  public AbstractClusterer() {
+    if (isRandomizable()) {
+      this.randomSeedOption = new IntOption("randomSeed", 'r',
+          "Seed for random behaviour of the Clusterer.", 1);
+    }
+
+    if (implementsMicroClusterer()) {
+      this.evaluateMicroClusteringOption =
+          new FlagOption("evaluateMicroClustering", 'M',
+              "Evaluate the underlying microclustering instead of the macro 
clustering");
+    }
+  }
+
+  @Override
+  public void prepareForUseImpl(TaskMonitor monitor,
+      ObjectRepository repository) {
+    if (this.randomSeedOption != null) {
+      this.randomSeed = this.randomSeedOption.getValue();
+    }
+    if (!trainingHasStarted()) {
+      resetLearning();
+    }
+    clustering = new Clustering();
+  }
+
+  public void setModelContext(InstancesHeader ih) {
+    if ((ih != null) && (ih.classIndex() < 0)) {
+      throw new IllegalArgumentException(
+          "Context for a Clusterer must include a class to learn");
+    }
+    if (trainingHasStarted()
+        && (this.modelContext != null)
+        && ((ih == null) || !contextIsCompatible(this.modelContext, ih))) {
+      throw new IllegalArgumentException(
+          "New context is not compatible with existing model");
+    }
+    this.modelContext = ih;
+  }
+
+  public InstancesHeader getModelContext() {
+    return this.modelContext;
+  }
+
+  public void setRandomSeed(int s) {
+    this.randomSeed = s;
+    if (this.randomSeedOption != null) {
+      // keep option consistent
+      this.randomSeedOption.setValue(s);
+    }
+  }
+
+  public boolean trainingHasStarted() {
+    return this.trainingWeightSeenByModel > 0.0;
+  }
+
+  public double trainingWeightSeenByModel() {
+    return this.trainingWeightSeenByModel;
+  }
+
+  public void resetLearning() {
+    this.trainingWeightSeenByModel = 0.0;
+    if (isRandomizable()) {
+      this.clustererRandom = new Random(this.randomSeed);
+    }
+    resetLearningImpl();
+  }
+
+  public void trainOnInstance(Instance inst) {
+    if (inst.weight() > 0.0) {
+      this.trainingWeightSeenByModel += inst.weight();
+      trainOnInstanceImpl(inst);
+    }
+  }
+
+  public Measurement[] getModelMeasurements() {
+    List<Measurement> measurementList = new LinkedList<Measurement>();
+    measurementList.add(new Measurement("model training instances",
+        trainingWeightSeenByModel()));
+    measurementList.add(new Measurement("model serialized size (bytes)",
+        measureByteSize()));
+    Measurement[] modelMeasurements = getModelMeasurementsImpl();
+    if (modelMeasurements != null) {
+      for (Measurement measurement : modelMeasurements) {
+        measurementList.add(measurement);
+      }
+    }
+    // add average of sub-model measurements
+    Clusterer[] subModels = getSubClusterers();
+    if ((subModels != null) && (subModels.length > 0)) {
+      List<Measurement[]> subMeasurements = new LinkedList<Measurement[]>();
+      for (Clusterer subModel : subModels) {
+        if (subModel != null) {
+          subMeasurements.add(subModel.getModelMeasurements());
+        }
+      }
+      Measurement[] avgMeasurements = Measurement
+          .averageMeasurements(subMeasurements
+              .toArray(new Measurement[subMeasurements.size()][]));
+      for (Measurement measurement : avgMeasurements) {
+        measurementList.add(measurement);
+      }
+    }
+    return measurementList.toArray(new Measurement[measurementList.size()]);
+  }
+
+  public void getDescription(StringBuilder out, int indent) {
+    StringUtils.appendIndented(out, indent, "Model type: ");
+    out.append(this.getClass().getName());
+    StringUtils.appendNewline(out);
+    Measurement.getMeasurementsDescription(getModelMeasurements(), out,
+        indent);
+    StringUtils.appendNewlineIndented(out, indent, "Model description:");
+    StringUtils.appendNewline(out);
+    if (trainingHasStarted()) {
+      getModelDescription(out, indent);
+    } else {
+      StringUtils.appendIndented(out, indent,
+          "Model has not been trained.");
+    }
+  }
+
+  public Clusterer[] getSubClusterers() {
+    return null;
+  }
+
+  @Override
+  public Clusterer copy() {
+    return (Clusterer) super.copy();
+  }
+
+  // public boolean correctlyClassifies(Instance inst) {
+  // return Utils.maxIndex(getVotesForInstance(inst)) == (int) inst
+  // .classValue();
+  // }
+
+  public String getClassNameString() {
+    return InstancesHeader.getClassNameString(this.modelContext);
+  }
+
+  public String getClassLabelString(int classLabelIndex) {
+    return InstancesHeader.getClassLabelString(this.modelContext,
+        classLabelIndex);
+  }
+
+  public String getAttributeNameString(int attIndex) {
+    return InstancesHeader.getAttributeNameString(this.modelContext,
+        attIndex);
+  }
+
+  public String getNominalValueString(int attIndex, int valIndex) {
+    return InstancesHeader.getNominalValueString(this.modelContext,
+        attIndex, valIndex);
+  }
+
+  // originalContext notnull
+  // newContext notnull
+  public static boolean contextIsCompatible(InstancesHeader originalContext,
+      InstancesHeader newContext) {
+    // rule 1: num classes can increase but never decrease
+    // rule 2: num attributes can increase but never decrease
+    // rule 3: num nominal attribute values can increase but never decrease
+    // rule 4: attribute types must stay in the same order (although class
+    // can
+    // move; is always skipped over)
+    // attribute names are free to change, but should always still represent
+    // the original attributes
+    if (newContext.numClasses() < originalContext.numClasses()) {
+      return false; // rule 1
+    }
+    if (newContext.numAttributes() < originalContext.numAttributes()) {
+      return false; // rule 2
+    }
+    int oPos = 0;
+    int nPos = 0;
+    while (oPos < originalContext.numAttributes()) {
+      if (oPos == originalContext.classIndex()) {
+        oPos++;
+        if (!(oPos < originalContext.numAttributes())) {
+          break;
+        }
+      }
+      if (nPos == newContext.classIndex()) {
+        nPos++;
+      }
+      if (originalContext.attribute(oPos).isNominal()) {
+        if (!newContext.attribute(nPos).isNominal()) {
+          return false; // rule 4
+        }
+        if (newContext.attribute(nPos).numValues() < originalContext
+            .attribute(oPos).numValues()) {
+          return false; // rule 3
+        }
+      } else {
+        assert (originalContext.attribute(oPos).isNumeric());
+        if (!newContext.attribute(nPos).isNumeric()) {
+          return false; // rule 4
+        }
+      }
+      oPos++;
+      nPos++;
+    }
+    return true; // all checks clear
+  }
+
+  // reason for ...Impl methods:
+  // ease programmer burden by not requiring them to remember calls to super
+  // in overridden methods & will produce compiler errors if not overridden
+
+  public abstract void resetLearningImpl();
+
+  public abstract void trainOnInstanceImpl(Instance inst);
+
+  protected abstract Measurement[] getModelMeasurementsImpl();
+
+  public abstract void getModelDescription(StringBuilder out, int indent);
+
+  protected static int modelAttIndexToInstanceAttIndex(int index,
+      Instance inst) {
+    return inst.classIndex() > index ? index : index + 1;
+  }
+
+  protected static int modelAttIndexToInstanceAttIndex(int index,
+      Instances insts) {
+    return insts.classIndex() > index ? index : index + 1;
+  }
+
+  public boolean implementsMicroClusterer() {
+    return false;
+  }
+
+  public boolean keepClassLabel() {
+    return false;
+  }
+
+  public Clustering getMicroClusteringResult() {
+    return null;
+  };
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java
new file mode 100644
index 0000000..b9f80b5
--- /dev/null
+++ 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/ClusterGenerator.java
@@ -0,0 +1,358 @@
+package org.apache.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.moa.cluster.Clustering;
+import org.apache.samoa.moa.cluster.SphereCluster;
+import org.apache.samoa.moa.core.DataPoint;
+import org.apache.samoa.moa.core.Measurement;
+
+import com.github.javacliparser.FloatOption;
+import com.github.javacliparser.IntOption;
+
+public class ClusterGenerator extends AbstractClusterer {
+
+  private static final long serialVersionUID = 1L;
+
+  public IntOption timeWindowOption = new IntOption("timeWindow",
+      't', "Rang of the window.", 1000);
+
+  public FloatOption radiusDecreaseOption = new FloatOption("radiusDecrease", 
'r',
+      "The average radii of the centroids in the model.", 0, 0, 1);
+
+  public FloatOption radiusIncreaseOption = new FloatOption("radiusIncrease", 
'R',
+      "The average radii of the centroids in the model.", 0, 0, 1);
+
+  public FloatOption positionOffsetOption = new FloatOption("positionOffset", 
'p',
+      "The average radii of the centroids in the model.", 0, 0, 1);
+
+  public FloatOption clusterRemoveOption = new FloatOption("clusterRemove", 
'D',
+      "Deletes complete clusters from the clustering.", 0, 0, 1);
+
+  public FloatOption joinClustersOption = new FloatOption("joinClusters", 'j',
+      "Join two clusters if their hull distance is less minRadius times this 
factor.", 0, 0, 1);
+
+  public FloatOption clusterAddOption = new FloatOption("clusterAdd", 'A',
+      "Adds additional clusters.", 0, 0, 1);
+
+  private static double err_intervall_width = 0.0;
+  private ArrayList<DataPoint> points;
+  private int instanceCounter;
+  private int windowCounter;
+  private Random random;
+  private Clustering sourceClustering = null;
+
+  @Override
+  public void resetLearningImpl() {
+    points = new ArrayList<DataPoint>();
+    instanceCounter = 0;
+    windowCounter = 0;
+    random = new Random(227);
+
+    // joinClustersOption.set();
+    // evaluateMicroClusteringOption.set();
+  }
+
+  @Override
+  public void trainOnInstanceImpl(Instance inst) {
+    if (windowCounter >= timeWindowOption.getValue()) {
+      points.clear();
+      windowCounter = 0;
+    }
+    windowCounter++;
+    instanceCounter++;
+    points.add(new DataPoint(inst, instanceCounter));
+  }
+
+  @Override
+  public boolean implementsMicroClusterer() {
+    return true;
+  }
+
+  public void setSourceClustering(Clustering source) {
+    sourceClustering = source;
+  }
+
+  @Override
+  public Clustering getMicroClusteringResult() {
+    // System.out.println("Numcluster:"+clustering.size()+" / "+num);
+    // Clustering source_clustering = new Clustering(points, overlapThreshold,
+    // microInitMinPoints);
+    if (sourceClustering == null) {
+
+      System.out.println("You need to set a source clustering for the 
ClusterGenerator to work");
+      return null;
+    }
+    return alterClustering(sourceClustering);
+  }
+
+  public Clustering getClusteringResult() {
+    sourceClustering = new Clustering(points);
+    // if(sourceClustering == null){
+    // System.out.println("You need to set a source clustering for the 
ClusterGenerator to work");
+    // return null;
+    // }
+    return alterClustering(sourceClustering);
+  }
+
+  private Clustering alterClustering(Clustering scclustering) {
+    // percentage of the radius that will be cut off
+    // 0: no changes to radius
+    // 1: radius of 0
+    double errLevelRadiusDecrease = radiusDecreaseOption.getValue();
+
+    // 0: no changes to radius
+    // 1: radius 100% bigger
+    double errLevelRadiusIncrease = radiusIncreaseOption.getValue();
+
+    // 0: no changes
+    // 1: distance between centers is 2 * original radius
+    double errLevelPosition = positionOffsetOption.getValue();
+
+    int numRemoveCluster = (int) (clusterRemoveOption.getValue() * 
scclustering.size());
+
+    int numAddCluster = (int) (clusterAddOption.getValue() * 
scclustering.size());
+
+    for (int c = 0; c < numRemoveCluster; c++) {
+      int delId = random.nextInt(scclustering.size());
+      scclustering.remove(delId);
+    }
+
+    int numCluster = scclustering.size();
+    double[] err_seeds = new double[numCluster];
+    double err_seed_sum = 0.0;
+    double tmp_seed;
+    for (int i = 0; i < numCluster; i++) {
+      tmp_seed = random.nextDouble();
+      err_seeds[i] = err_seed_sum + tmp_seed;
+      err_seed_sum += tmp_seed;
+    }
+
+    double sumWeight = 0;
+    for (int i = 0; i < numCluster; i++) {
+      sumWeight += scclustering.get(i).getWeight();
+    }
+
+    Clustering clustering = new Clustering();
+
+    for (int i = 0; i < numCluster; i++) {
+      if (!(scclustering.get(i) instanceof SphereCluster)) {
+        System.out.println("Not a Sphere Cluster");
+        continue;
+      }
+      SphereCluster sourceCluster = (SphereCluster) scclustering.get(i);
+      double[] center = Arrays.copyOf(sourceCluster.getCenter(), 
sourceCluster.getCenter().length);
+      double weight = sourceCluster.getWeight();
+      double radius = sourceCluster.getRadius();
+
+      // move cluster center
+      if (errLevelPosition > 0) {
+        double errOffset = random.nextDouble() * err_intervall_width / 2.0;
+        double errOffsetDirection = ((random.nextBoolean()) ? 1 : -1);
+        double level = errLevelPosition + errOffsetDirection * errOffset;
+        double[] vector = new double[center.length];
+        double vectorLength = 0;
+        for (int d = 0; d < center.length; d++) {
+          vector[d] = (random.nextBoolean() ? 1 : -1) * random.nextDouble();
+          vectorLength += Math.pow(vector[d], 2);
+        }
+        vectorLength = Math.sqrt(vectorLength);
+
+        // max is when clusters are next to each other
+        double length = 2 * radius * level;
+
+        for (int d = 0; d < center.length; d++) {
+          // normalize length and then strecht to reach error position
+          vector[d] = vector[d] / vectorLength * length;
+        }
+        // System.out.println("Center "+Arrays.toString(center));
+        // System.out.println("Vector "+Arrays.toString(vector));
+        // check if error position is within bounds
+        double[] newCenter = new double[center.length];
+        for (int d = 0; d < center.length; d++) {
+          // check bounds, otherwise flip vector
+          if (center[d] + vector[d] >= 0 && center[d] + vector[d] <= 1) {
+            newCenter[d] = center[d] + vector[d];
+          }
+          else {
+            newCenter[d] = center[d] + (-1) * vector[d];
+          }
+        }
+        center = newCenter;
+        for (int d = 0; d < center.length; d++) {
+          if (newCenter[d] >= 0 && newCenter[d] <= 1) {
+          }
+          else {
+            System.out
+                .println("This shouldnt have happend, Cluster center out of 
bounds:" + Arrays.toString(newCenter));
+          }
+        }
+        // System.out.println("new Center "+Arrays.toString(newCenter));
+
+      }
+
+      // alter radius
+      if (errLevelRadiusDecrease > 0 || errLevelRadiusIncrease > 0) {
+        double errOffset = random.nextDouble() * err_intervall_width / 2.0;
+        int errOffsetDirection = ((random.nextBoolean()) ? 1 : -1);
+
+        if (errLevelRadiusDecrease > 0 && (errLevelRadiusIncrease == 0 || 
random.nextBoolean())) {
+          double level = (errLevelRadiusDecrease + errOffsetDirection * 
errOffset);// *sourceCluster.getWeight()/sumWeight;
+          level = (level < 0) ? 0 : level;
+          level = (level > 1) ? 1 : level;
+          radius *= (1 - level);
+        }
+        else {
+          double level = errLevelRadiusIncrease + errOffsetDirection * 
errOffset;
+          level = (level < 0) ? 0 : level;
+          level = (level > 1) ? 1 : level;
+          radius += radius * level;
+        }
+      }
+
+      SphereCluster newCluster = new SphereCluster(center, radius, weight);
+      newCluster.setMeasureValue("Source Cluster", "C" + 
sourceCluster.getId());
+
+      clustering.add(newCluster);
+    }
+
+    if (joinClustersOption.getValue() > 0) {
+      clustering = joinClusters(clustering);
+    }
+
+    // add new clusters by copying clusters and set a random center
+    for (int c = 0; c < numAddCluster; c++) {
+      int copyId = random.nextInt(clustering.size());
+      SphereCluster scorg = (SphereCluster) clustering.get(copyId);
+      int dim = scorg.getCenter().length;
+      double[] center = new double[dim];
+      double radius = scorg.getRadius();
+
+      boolean outofbounds = true;
+      int tryCounter = 0;
+      while (outofbounds && tryCounter < 20) {
+        tryCounter++;
+        outofbounds = false;
+        for (int j = 0; j < center.length; j++) {
+          center[j] = random.nextDouble();
+          if (center[j] - radius < 0 || center[j] + radius > 1) {
+            outofbounds = true;
+            break;
+          }
+        }
+      }
+      if (outofbounds) {
+        System.out.println("Coludn't place additional cluster");
+      }
+      else {
+        SphereCluster scnew = new SphereCluster(center, radius, 
scorg.getWeight() / 2);
+        scorg.setWeight(scorg.getWeight() - scnew.getWeight());
+        clustering.add(scnew);
+      }
+    }
+
+    return clustering;
+
+  }
+
+  private Clustering joinClusters(Clustering clustering) {
+
+    double radiusFactor = joinClustersOption.getValue();
+    boolean[] merged = new boolean[clustering.size()];
+
+    Clustering mclustering = new Clustering();
+
+    if (radiusFactor > 0) {
+      for (int c1 = 0; c1 < clustering.size(); c1++) {
+        SphereCluster sc1 = (SphereCluster) clustering.get(c1);
+        double minDist = Double.MAX_VALUE;
+        double minOver = 1;
+        int maxindexCon = -1;
+        int maxindexOver = -1;
+        for (int c2 = 0; c2 < clustering.size(); c2++) {
+          SphereCluster sc2 = (SphereCluster) clustering.get(c2);
+          // double over = sc1.overlapRadiusDegree(sc2);
+          // if(over > 0 && over < minOver){
+          // minOver = over;
+          // maxindexOver = c2;
+          // }
+          double dist = sc1.getHullDistance(sc2);
+          double threshold = Math.min(sc1.getRadius(), sc2.getRadius()) * 
radiusFactor;
+          if (dist > 0 && dist < minDist && dist < threshold) {
+            minDist = dist;
+            maxindexCon = c2;
+          }
+        }
+        int maxindex = -1;
+        if (maxindexOver != -1)
+          maxindex = maxindexOver;
+        else
+          maxindex = maxindexCon;
+
+        if (maxindex != -1 && !merged[c1]) {
+          merged[c1] = true;
+          merged[maxindex] = true;
+          SphereCluster scnew = new SphereCluster(sc1.getCenter(), 
sc1.getRadius(), sc1.getWeight());
+          SphereCluster sc2 = (SphereCluster) clustering.get(maxindex);
+          scnew.merge(sc2);
+          mclustering.add(scnew);
+        }
+      }
+    }
+
+    for (int i = 0; i < merged.length; i++) {
+      if (!merged[i])
+        mclustering.add(clustering.get(i));
+    }
+
+    return mclustering;
+
+  }
+
+  @Override
+  protected Measurement[] getModelMeasurementsImpl() {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+  @Override
+  public void getModelDescription(StringBuilder out, int indent) {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+  @Override
+  public boolean isRandomizable() {
+    return false;
+  }
+
+  @Override
+  public boolean keepClassLabel() {
+    return true;
+  }
+
+  public double[] getVotesForInstance(Instance inst) {
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java
new file mode 100644
index 0000000..bfa07c8
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/Clusterer.java
@@ -0,0 +1,64 @@
+package org.apache.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.instances.InstancesHeader;
+import org.apache.samoa.moa.MOAObject;
+import org.apache.samoa.moa.cluster.Clustering;
+import org.apache.samoa.moa.core.Measurement;
+import org.apache.samoa.moa.options.OptionHandler;
+
+public interface Clusterer extends MOAObject, OptionHandler {
+
+  public void setModelContext(InstancesHeader ih);
+
+  public InstancesHeader getModelContext();
+
+  public boolean isRandomizable();
+
+  public void setRandomSeed(int s);
+
+  public boolean trainingHasStarted();
+
+  public double trainingWeightSeenByModel();
+
+  public void resetLearning();
+
+  public void trainOnInstance(Instance inst);
+
+  public double[] getVotesForInstance(Instance inst);
+
+  public Measurement[] getModelMeasurements();
+
+  public Clusterer[] getSubClusterers();
+
+  public Clusterer copy();
+
+  public Clustering getClusteringResult();
+
+  public boolean implementsMicroClusterer();
+
+  public Clustering getMicroClusteringResult();
+
+  public boolean keepClassLabel();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java
new file mode 100644
index 0000000..8eeaeba
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/KMeans.java
@@ -0,0 +1,199 @@
+package org.apache.samoa.moa.clusterers;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.samoa.moa.cluster.CFCluster;
+import org.apache.samoa.moa.cluster.Cluster;
+import org.apache.samoa.moa.cluster.Clustering;
+import org.apache.samoa.moa.cluster.SphereCluster;
+
+/**
+ * A kMeans implementation for microclusterings. For now it only uses the real 
centers of the groundtruthclustering for
+ * implementation. There should also be an option to use random centers. TODO: 
random centers TODO: Create a macro
+ * clustering interface to make different macro clustering algorithms 
available to micro clustering algorithms like
+ * clustream, denstream and clustree
+ * 
+ */
+public class KMeans {
+
+  /**
+   * This kMeans implementation clusters a big number of microclusters into a 
smaller amount of macro clusters. To make
+   * it comparable to other algorithms it uses the real centers of the ground 
truth macro clustering to have the best
+   * possible initialization. The quality of resulting macro clustering yields 
an upper bound for kMeans on the
+   * underlying microclustering.
+   * 
+   * @param centers
+   *          of the ground truth clustering
+   * @param data
+   *          list of microclusters
+   * @return
+   */
+  public static Clustering kMeans(Cluster[] centers, List<? extends Cluster> 
data) {
+    int k = centers.length;
+
+    int dimensions = centers[0].getCenter().length;
+
+    ArrayList<ArrayList<Cluster>> clustering =
+        new ArrayList<ArrayList<Cluster>>();
+    for (int i = 0; i < k; i++) {
+      clustering.add(new ArrayList<Cluster>());
+    }
+
+    int repetitions = 100;
+    while (repetitions-- >= 0) {
+      // Assign points to clusters
+      for (Cluster point : data) {
+        double minDistance = distance(point.getCenter(), 
centers[0].getCenter());
+        int closestCluster = 0;
+        for (int i = 1; i < k; i++) {
+          double distance = distance(point.getCenter(), 
centers[i].getCenter());
+          if (distance < minDistance) {
+            closestCluster = i;
+            minDistance = distance;
+          }
+        }
+
+        clustering.get(closestCluster).add(point);
+      }
+
+      // Calculate new centers and clear clustering lists
+      SphereCluster[] newCenters = new SphereCluster[centers.length];
+      for (int i = 0; i < k; i++) {
+        newCenters[i] = calculateCenter(clustering.get(i), dimensions);
+        clustering.get(i).clear();
+      }
+      centers = newCenters;
+    }
+
+    return new Clustering(centers);
+  }
+
+  private static double distance(double[] pointA, double[] pointB) {
+    double distance = 0.0;
+    for (int i = 0; i < pointA.length; i++) {
+      double d = pointA[i] - pointB[i];
+      distance += d * d;
+    }
+    return Math.sqrt(distance);
+  }
+
+  private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int 
dimensions) {
+    double[] res = new double[dimensions];
+    for (int i = 0; i < res.length; i++) {
+      res[i] = 0.0;
+    }
+
+    if (cluster.size() == 0) {
+      return new SphereCluster(res, 0.0);
+    }
+
+    for (Cluster point : cluster) {
+      double[] center = point.getCenter();
+      for (int i = 0; i < res.length; i++) {
+        res[i] += center[i];
+      }
+    }
+
+    // Normalize
+    for (int i = 0; i < res.length; i++) {
+      res[i] /= cluster.size();
+    }
+
+    // Calculate radius
+    double radius = 0.0;
+    for (Cluster point : cluster) {
+      double dist = distance(res, point.getCenter());
+      if (dist > radius) {
+        radius = dist;
+      }
+    }
+
+    return new SphereCluster(res, radius);
+  }
+
+  public static Clustering gaussianMeans(Clustering gtClustering, Clustering 
clustering) {
+    ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>();
+    for (int i = 0; i < clustering.size(); i++) {
+      if (clustering.get(i) instanceof CFCluster) {
+        microclusters.add((CFCluster) clustering.get(i));
+      }
+      else {
+        System.out.println("Unsupported Cluster Type:" + 
clustering.get(i).getClass()
+            + ". Cluster needs to extend moa.cluster.CFCluster");
+      }
+    }
+    Cluster[] centers = new Cluster[gtClustering.size()];
+    for (int i = 0; i < centers.length; i++) {
+      centers[i] = gtClustering.get(i);
+
+    }
+
+    int k = centers.length;
+    if (microclusters.size() < k) {
+      return new Clustering(new Cluster[0]);
+    }
+
+    Clustering kMeansResult = kMeans(centers, microclusters);
+
+    k = kMeansResult.size();
+    CFCluster[] res = new CFCluster[k];
+
+    for (CFCluster microcluster : microclusters) {
+      // Find closest kMeans cluster
+      double minDistance = Double.MAX_VALUE;
+      int closestCluster = 0;
+      for (int i = 0; i < k; i++) {
+        double distance = distance(kMeansResult.get(i).getCenter(), 
microcluster.getCenter());
+        if (distance < minDistance) {
+          closestCluster = i;
+          minDistance = distance;
+        }
+      }
+
+      // Add to cluster
+      if (res[closestCluster] == null) {
+        res[closestCluster] = (CFCluster) microcluster.copy();
+      } else {
+        res[closestCluster].add(microcluster);
+      }
+    }
+
+    // Clean up res
+    int count = 0;
+    for (int i = 0; i < res.length; i++) {
+      if (res[i] != null)
+        ++count;
+    }
+
+    CFCluster[] cleaned = new CFCluster[count];
+    count = 0;
+    for (int i = 0; i < res.length; i++) {
+      if (res[i] != null)
+        cleaned[count++] = res[i];
+    }
+
+    return new Clustering(cleaned);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java
 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java
new file mode 100644
index 0000000..055c3d6
--- /dev/null
+++ 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java
@@ -0,0 +1,333 @@
+package org.apache.samoa.moa.clusterers.clustream;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.samoa.instances.DenseInstance;
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.moa.cluster.Cluster;
+import org.apache.samoa.moa.cluster.Clustering;
+import org.apache.samoa.moa.cluster.SphereCluster;
+import org.apache.samoa.moa.clusterers.AbstractClusterer;
+import org.apache.samoa.moa.core.Measurement;
+
+import com.github.javacliparser.IntOption;
+
+/**
+ * Citation: CluStream: Charu C. Aggarwal, Jiawei Han, Jianyong Wang, Philip 
S. Yu: A Framework for Clustering Evolving
+ * Data Streams. VLDB 2003: 81-92
+ */
+public class Clustream extends AbstractClusterer {
+
+  private static final long serialVersionUID = 1L;
+
+  public IntOption timeWindowOption = new IntOption("horizon",
+      'h', "Rang of the window.", 1000);
+
+  public IntOption maxNumKernelsOption = new IntOption(
+      "maxNumKernels", 'k',
+      "Maximum number of micro kernels to use.", 100);
+
+  public IntOption kernelRadiFactorOption = new IntOption(
+      "kernelRadiFactor", 't',
+      "Multiplier for the kernel radius", 2);
+
+  private int timeWindow;
+  private long timestamp = -1;
+  private ClustreamKernel[] kernels;
+  private boolean initialized;
+  private List<ClustreamKernel> buffer; // Buffer for initialization with kNN
+  private int bufferSize;
+  private double t;
+  private int m;
+
+  public Clustream() {
+  }
+
+  @Override
+  public void resetLearningImpl() {
+    this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()];
+    this.timeWindow = timeWindowOption.getValue();
+    this.initialized = false;
+    this.buffer = new LinkedList<>();
+    this.bufferSize = maxNumKernelsOption.getValue();
+    t = kernelRadiFactorOption.getValue();
+    m = maxNumKernelsOption.getValue();
+  }
+
+  @Override
+  public void trainOnInstanceImpl(Instance instance) {
+    int dim = instance.numValues();
+    timestamp++;
+    // 0. Initialize
+    if (!initialized) {
+      if (buffer.size() < bufferSize) {
+        buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
+        return;
+      }
+
+      int k = kernels.length;
+      // System.err.println("k="+k+" bufferSize="+bufferSize);
+      assert (k <= bufferSize);
+
+      ClustreamKernel[] centers = new ClustreamKernel[k];
+      for (int i = 0; i < k; i++) {
+        centers[i] = buffer.get(i); // TODO: make random!
+      }
+      Clustering kmeans_clustering = kMeans(k, centers, buffer);
+      // Clustering kmeans_clustering = kMeans(k, buffer);
+
+      for (int i = 0; i < kmeans_clustering.size(); i++) {
+        kernels[i] = new ClustreamKernel(new DenseInstance(1.0, 
centers[i].getCenter()), dim, timestamp, t, m);
+      }
+
+      buffer.clear();
+      initialized = true;
+      return;
+    }
+
+    // 1. Determine closest kernel
+    ClustreamKernel closestKernel = null;
+    double minDistance = Double.MAX_VALUE;
+    for (ClustreamKernel kernel : kernels) {
+      // System.out.println(i+" "+kernels[i].getWeight()+" 
"+kernels[i].getDeviation());
+      double distance = distance(instance.toDoubleArray(), kernel.getCenter());
+      if (distance < minDistance) {
+        closestKernel = kernel;
+        minDistance = distance;
+      }
+    }
+
+    // 2. Check whether instance fits into closestKernel
+    double radius;
+    if (closestKernel != null && closestKernel.getWeight() == 1) {
+      // Special case: estimate radius by determining the distance to the
+      // next closest cluster
+      radius = Double.MAX_VALUE;
+      double[] center = closestKernel.getCenter();
+      for (ClustreamKernel kernel : kernels) {
+        if (kernel == closestKernel) {
+          continue;
+        }
+
+        double distance = distance(kernel.getCenter(), center);
+        radius = Math.min(distance, radius);
+      }
+    } else {
+      radius = closestKernel.getRadius();
+    }
+
+    if (minDistance < radius) {
+      // Date fits, put into kernel and be happy
+      closestKernel.insert(instance, timestamp);
+      return;
+    }
+
+    // 3. Date does not fit, we need to free
+    // some space to insert a new kernel
+    long threshold = timestamp - timeWindow; // Kernels before this can be 
forgotten
+
+    // 3.1 Try to forget old kernels
+    for (int i = 0; i < kernels.length; i++) {
+      if (kernels[i].getRelevanceStamp() < threshold) {
+        kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
+        return;
+      }
+    }
+
+    // 3.2 Merge closest two kernels
+    int closestA = 0;
+    int closestB = 0;
+    minDistance = Double.MAX_VALUE;
+    for (int i = 0; i < kernels.length; i++) {
+      double[] centerA = kernels[i].getCenter();
+      for (int j = i + 1; j < kernels.length; j++) {
+        double dist = distance(centerA, kernels[j].getCenter());
+        if (dist < minDistance) {
+          minDistance = dist;
+          closestA = i;
+          closestB = j;
+        }
+      }
+    }
+    assert (closestA != closestB);
+
+    kernels[closestA].add(kernels[closestB]);
+    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
+  }
+
+  @Override
+  public Clustering getMicroClusteringResult() {
+    if (!initialized) {
+      return new Clustering(new Cluster[0]);
+    }
+
+    ClustreamKernel[] res = new ClustreamKernel[kernels.length];
+    for (int i = 0; i < res.length; i++) {
+      res[i] = new ClustreamKernel(kernels[i], t, m);
+    }
+
+    return new Clustering(res);
+  }
+
+  @Override
+  public boolean implementsMicroClusterer() {
+    return true;
+  }
+
+  @Override
+  public Clustering getClusteringResult() {
+    return null;
+  }
+
+  public String getName() {
+    return "Clustream " + timeWindow;
+  }
+
+  private static double distance(double[] pointA, double[] pointB) {
+    double distance = 0.0;
+    for (int i = 0; i < pointA.length; i++) {
+      double d = pointA[i] - pointB[i];
+      distance += d * d;
+    }
+    return Math.sqrt(distance);
+  }
+
+  // wrapper... we need to rewrite kmeans to points, not clusters, doesnt make
+  // sense anymore
+  // public static Clustering kMeans( int k, ArrayList<Instance> points, int 
dim
+  // ) {
+  // ArrayList<ClustreamKernel> cl = new ArrayList<ClustreamKernel>();
+  // for(Instance inst : points){
+  // cl.add(new ClustreamKernel(inst, dim , 0, 0, 0));
+  // }
+  // Clustering clustering = kMeans(k, cl);
+  // return clustering;
+  // }
+
+  public static Clustering kMeans(int k, List<? extends Cluster> data) {
+    Random random = new Random(0);
+    Cluster[] centers = new Cluster[k];
+    for (int i = 0; i < centers.length; i++) {
+      int rid = random.nextInt(k);
+      centers[i] = new SphereCluster(data.get(rid).getCenter(), 0);
+    }
+    return kMeans(k, centers, data);
+  }
+
+  public static Clustering kMeans(int k, Cluster[] centers, List<? extends 
Cluster> data) {
+    assert (centers.length == k);
+    assert (k > 0);
+
+    int dimensions = centers[0].getCenter().length;
+
+    ArrayList<ArrayList<Cluster>> clustering = new ArrayList<>();
+    for (int i = 0; i < k; i++) {
+      clustering.add(new ArrayList<Cluster>());
+    }
+
+    int repetitions = 100;
+    while (repetitions-- >= 0) {
+      // Assign points to clusters
+      for (Cluster point : data) {
+        double minDistance = distance(point.getCenter(), 
centers[0].getCenter());
+        int closestCluster = 0;
+        for (int i = 1; i < k; i++) {
+          double distance = distance(point.getCenter(), 
centers[i].getCenter());
+          if (distance < minDistance) {
+            closestCluster = i;
+            minDistance = distance;
+          }
+        }
+
+        clustering.get(closestCluster).add(point);
+      }
+
+      // Calculate new centers and clear clustering lists
+      SphereCluster[] newCenters = new SphereCluster[centers.length];
+      for (int i = 0; i < k; i++) {
+        newCenters[i] = calculateCenter(clustering.get(i), dimensions);
+        clustering.get(i).clear();
+      }
+      centers = newCenters;
+    }
+
+    return new Clustering(centers);
+  }
+
+  private static SphereCluster calculateCenter(ArrayList<Cluster> cluster, int 
dimensions) {
+    double[] res = new double[dimensions];
+    for (int i = 0; i < res.length; i++) {
+      res[i] = 0.0;
+    }
+
+    if (cluster.size() == 0) {
+      return new SphereCluster(res, 0.0);
+    }
+
+    for (Cluster point : cluster) {
+      double[] center = point.getCenter();
+      for (int i = 0; i < res.length; i++) {
+        res[i] += center[i];
+      }
+    }
+
+    // Normalize
+    for (int i = 0; i < res.length; i++) {
+      res[i] /= cluster.size();
+    }
+
+    // Calculate radius
+    double radius = 0.0;
+    for (Cluster point : cluster) {
+      double dist = distance(res, point.getCenter());
+      if (dist > radius) {
+        radius = dist;
+      }
+    }
+    SphereCluster sc = new SphereCluster(res, radius);
+    sc.setWeight(cluster.size());
+    return sc;
+  }
+
+  @Override
+  protected Measurement[] getModelMeasurementsImpl() {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+  @Override
+  public void getModelDescription(StringBuilder out, int indent) {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+  public boolean isRandomizable() {
+    return false;
+  }
+
+  public double[] getVotesForInstance(Instance inst) {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java
 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java
new file mode 100644
index 0000000..522d50e
--- /dev/null
+++ 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/ClustreamKernel.java
@@ -0,0 +1,271 @@
+package org.apache.samoa.moa.clusterers.clustream;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+import java.util.List;
+
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.moa.cluster.CFCluster;
+
+public class ClustreamKernel extends CFCluster {
+  private static final long serialVersionUID = 1L;
+
+  private final static double EPSILON = 0.00005;
+  public static final double MIN_VARIANCE = 1e-50;
+
+  protected double LST;
+  protected double SST;
+
+  int m;
+  double t;
+
+  public ClustreamKernel(Instance instance, int dimensions, long timestamp, 
double t, int m) {
+    super(instance, dimensions);
+    this.t = t;
+    this.m = m;
+    this.LST = timestamp;
+    this.SST = timestamp * timestamp;
+  }
+
+  public ClustreamKernel(ClustreamKernel cluster, double t, int m) {
+    super(cluster);
+    this.t = t;
+    this.m = m;
+    this.LST = cluster.LST;
+    this.SST = cluster.SST;
+  }
+
+  public void insert(Instance instance, long timestamp) {
+    N++;
+    LST += timestamp;
+    SST += timestamp * timestamp;
+
+    for (int i = 0; i < instance.numValues(); i++) {
+      LS[i] += instance.value(i);
+      SS[i] += instance.value(i) * instance.value(i);
+    }
+  }
+
+  @Override
+  public void add(CFCluster other2) {
+    ClustreamKernel other = (ClustreamKernel) other2;
+    assert (other.LS.length == this.LS.length);
+    this.N += other.N;
+    this.LST += other.LST;
+    this.SST += other.SST;
+
+    for (int i = 0; i < LS.length; i++) {
+      this.LS[i] += other.LS[i];
+      this.SS[i] += other.SS[i];
+    }
+  }
+
+  public double getRelevanceStamp() {
+    if (N < 2 * m)
+      return getMuTime();
+
+    return getMuTime() + getSigmaTime() * getQuantile(((double) m) / (2 * N));
+  }
+
+  private double getMuTime() {
+    return LST / N;
+  }
+
+  private double getSigmaTime() {
+    return Math.sqrt(SST / N - (LST / N) * (LST / N));
+  }
+
+  private double getQuantile(double z) {
+    assert (z >= 0 && z <= 1);
+    return Math.sqrt(2) * inverseError(2 * z - 1);
+  }
+
+  @Override
+  public double getRadius() {
+    // trivial cluster
+    if (N == 1)
+      return 0;
+    if (t == 1)
+      t = 1;
+
+    return getDeviation() * radiusFactor;
+  }
+
+  @Override
+  public CFCluster getCF() {
+    return this;
+  }
+
+  private double getDeviation() {
+    double[] variance = getVarianceVector();
+    double sumOfDeviation = 0.0;
+    for (double aVariance : variance) {
+      double d = Math.sqrt(aVariance);
+      sumOfDeviation += d;
+    }
+    return sumOfDeviation / variance.length;
+  }
+
+  /**
+   * @return this kernels' center
+   */
+  @Override
+  public double[] getCenter() {
+    assert (!this.isEmpty());
+    double res[] = new double[this.LS.length];
+    for (int i = 0; i < res.length; i++) {
+      res[i] = this.LS[i] / N;
+    }
+    return res;
+  }
+
+  /**
+   * See interface <code>Cluster</code>
+   * 
+   * @param instance
+   * @return double value
+   */
+  @Override
+  public double getInclusionProbability(Instance instance) {
+    // trivial cluster
+    if (N == 1) {
+      double distance = 0.0;
+      for (int i = 0; i < LS.length; i++) {
+        double d = LS[i] - instance.value(i);
+        distance += d * d;
+      }
+      distance = Math.sqrt(distance);
+      if (distance < EPSILON)
+        return 1.0;
+      return 0.0;
+    }
+    else {
+      double dist = calcNormalizedDistance(instance.toDoubleArray());
+      if (dist <= getRadius()) {
+        return 1;
+      }
+      else {
+        return 0;
+      }
+      // double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
+      // return res;
+    }
+  }
+
+  private double[] getVarianceVector() {
+    double[] res = new double[this.LS.length];
+    for (int i = 0; i < this.LS.length; i++) {
+      double ls = this.LS[i];
+      double ss = this.SS[i];
+
+      double lsDivN = ls / this.getWeight();
+      double lsDivNSquared = lsDivN * lsDivN;
+      double ssDivN = ss / this.getWeight();
+      res[i] = ssDivN - lsDivNSquared;
+
+      // Due to numerical errors, small negative values can occur.
+      // We correct this by settings them to almost zero.
+      if (res[i] <= 0.0) {
+        if (res[i] > -EPSILON) {
+          res[i] = MIN_VARIANCE;
+        }
+      }
+    }
+    return res;
+  }
+
+  /**
+   * Check if this cluster is empty or not.
+   * 
+   * @return <code>true</code> if the cluster has no data points, 
<code>false</code> otherwise.
+   */
+  public boolean isEmpty() {
+    return this.N == 0;
+  }
+
+  /**
+   * Calculate the normalized euclidean distance (Mahalanobis distance for 
distribution w/o covariances) to a point.
+   * 
+   * @param point
+   *          The point to which the distance is calculated.
+   * @return The normalized distance to the cluster center.
+   * 
+   *         TODO: check whether WEIGHTING is correctly applied to variances
+   */
+  // ???????
+  private double calcNormalizedDistance(double[] point) {
+    double[] center = getCenter();
+    double res = 0.0;
+
+    for (int i = 0; i < center.length; i++) {
+      double diff = center[i] - point[i];
+      res += (diff * diff);// variance[i];
+    }
+    return Math.sqrt(res);
+  }
+
+  /**
+   * Approximates the inverse error function. Clustream needs this.
+   * 
+   * @param x
+   */
+  public static double inverseError(double x) {
+    double z = Math.sqrt(Math.PI) * x;
+    double res = (z) / 2;
+
+    double z2 = z * z;
+    double zProd = z * z2; // z^3
+    res += (1.0 / 24) * zProd;
+
+    zProd *= z2; // z^5
+    res += (7.0 / 960) * zProd;
+
+    zProd *= z2; // z^7
+    res += (127 * zProd) / 80640;
+
+    zProd *= z2; // z^9
+    res += (4369 * zProd) / 11612160;
+
+    zProd *= z2; // z^11
+    res += (34807 * zProd) / 364953600;
+
+    zProd *= z2; // z^13
+    res += (20036983 * zProd) / 797058662400d;
+
+    return res;
+  }
+
+  @Override
+  protected void getClusterSpecificInfo(List<String> infoTitle, List<String> 
infoValue) {
+    super.getClusterSpecificInfo(infoTitle, infoValue);
+    infoTitle.add("Deviation");
+
+    double[] variance = getVarianceVector();
+    double sumOfDeviation = 0.0;
+    for (double aVariance : variance) {
+      double d = Math.sqrt(aVariance);
+      sumOfDeviation += d;
+    }
+
+    sumOfDeviation /= variance.length;
+
+    infoValue.add(Double.toString(sumOfDeviation));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java
 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java
new file mode 100644
index 0000000..cf54dd8
--- /dev/null
+++ 
b/samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/WithKmeans.java
@@ -0,0 +1,466 @@
+package org.apache.samoa.moa.clusterers.clustream;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.samoa.instances.DenseInstance;
+import org.apache.samoa.instances.Instance;
+import org.apache.samoa.moa.cluster.CFCluster;
+import org.apache.samoa.moa.cluster.Cluster;
+import org.apache.samoa.moa.cluster.Clustering;
+import org.apache.samoa.moa.cluster.SphereCluster;
+import org.apache.samoa.moa.clusterers.AbstractClusterer;
+import org.apache.samoa.moa.core.Measurement;
+
+import com.github.javacliparser.IntOption;
+
+public class WithKmeans extends AbstractClusterer {
+
+  private static final long serialVersionUID = 1L;
+
+  public IntOption timeWindowOption = new IntOption("horizon",
+      'h', "Rang of the window.", 1000);
+
+  public IntOption maxNumKernelsOption = new IntOption(
+      "maxNumKernels", 'm',
+      "Maximum number of micro kernels to use.", 100);
+
+  public IntOption kernelRadiFactorOption = new IntOption(
+      "kernelRadiFactor", 't',
+      "Multiplier for the kernel radius", 2);
+
+  public IntOption kOption = new IntOption(
+      "k", 'k',
+      "k of macro k-means (number of clusters)", 5);
+
+  private int timeWindow;
+  private long timestamp = -1;
+  private ClustreamKernel[] kernels;
+  private boolean initialized;
+  private List<ClustreamKernel> buffer; // Buffer for initialization with kNN
+  private int bufferSize;
+  private double t;
+  private int m;
+
+  public WithKmeans() {
+
+  }
+
+  @Override
+  public void resetLearningImpl() {
+    this.kernels = new ClustreamKernel[maxNumKernelsOption.getValue()];
+    this.timeWindow = timeWindowOption.getValue();
+    this.initialized = false;
+    this.buffer = new LinkedList<ClustreamKernel>();
+    this.bufferSize = maxNumKernelsOption.getValue();
+    t = kernelRadiFactorOption.getValue();
+    m = maxNumKernelsOption.getValue();
+  }
+
+  @Override
+  public void trainOnInstanceImpl(Instance instance) {
+    int dim = instance.numValues();
+    timestamp++;
+    // 0. Initialize
+    if (!initialized) {
+      if (buffer.size() < bufferSize) {
+        buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
+        return;
+      } else {
+        for (int i = 0; i < buffer.size(); i++) {
+          kernels[i] = new ClustreamKernel(new DenseInstance(1.0, 
buffer.get(i).getCenter()), dim, timestamp, t, m);
+        }
+
+        buffer.clear();
+        initialized = true;
+        return;
+      }
+    }
+
+    // 1. Determine closest kernel
+    ClustreamKernel closestKernel = null;
+    double minDistance = Double.MAX_VALUE;
+    for (int i = 0; i < kernels.length; i++) {
+      // System.out.println(i+" "+kernels[i].getWeight()+" 
"+kernels[i].getDeviation());
+      double distance = distance(instance.toDoubleArray(), 
kernels[i].getCenter());
+      if (distance < minDistance) {
+        closestKernel = kernels[i];
+        minDistance = distance;
+      }
+    }
+
+    // 2. Check whether instance fits into closestKernel
+    double radius = 0.0;
+    if (closestKernel.getWeight() == 1) {
+      // Special case: estimate radius by determining the distance to the
+      // next closest cluster
+      radius = Double.MAX_VALUE;
+      double[] center = closestKernel.getCenter();
+      for (int i = 0; i < kernels.length; i++) {
+        if (kernels[i] == closestKernel) {
+          continue;
+        }
+
+        double distance = distance(kernels[i].getCenter(), center);
+        radius = Math.min(distance, radius);
+      }
+    } else {
+      radius = closestKernel.getRadius();
+    }
+
+    if (minDistance < radius) {
+      // Date fits, put into kernel and be happy
+      closestKernel.insert(instance, timestamp);
+      return;
+    }
+
+    // 3. Date does not fit, we need to free
+    // some space to insert a new kernel
+    long threshold = timestamp - timeWindow; // Kernels before this can be 
forgotten
+
+    // 3.1 Try to forget old kernels
+    for (int i = 0; i < kernels.length; i++) {
+      if (kernels[i].getRelevanceStamp() < threshold) {
+        kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
+        return;
+      }
+    }
+
+    // 3.2 Merge closest two kernels
+    int closestA = 0;
+    int closestB = 0;
+    minDistance = Double.MAX_VALUE;
+    for (int i = 0; i < kernels.length; i++) {
+      double[] centerA = kernels[i].getCenter();
+      for (int j = i + 1; j < kernels.length; j++) {
+        double dist = distance(centerA, kernels[j].getCenter());
+        if (dist < minDistance) {
+          minDistance = dist;
+          closestA = i;
+          closestB = j;
+        }
+      }
+    }
+    assert (closestA != closestB);
+
+    kernels[closestA].add(kernels[closestB]);
+    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
+  }
+
+  @Override
+  public Clustering getMicroClusteringResult() {
+    if (!initialized) {
+      return new Clustering(new Cluster[0]);
+    }
+
+    ClustreamKernel[] result = new ClustreamKernel[kernels.length];
+    for (int i = 0; i < result.length; i++) {
+      result[i] = new ClustreamKernel(kernels[i], t, m);
+    }
+
+    return new Clustering(result);
+  }
+
+  @Override
+  public Clustering getClusteringResult() {
+    return kMeans_rand(kOption.getValue(), getMicroClusteringResult());
+  }
+
+  public Clustering getClusteringResult(Clustering gtClustering) {
+    return kMeans_gta(kOption.getValue(), getMicroClusteringResult(), 
gtClustering);
+  }
+
+  public String getName() {
+    return "CluStreamWithKMeans " + timeWindow;
+  }
+
+  /**
+   * Distance between two vectors.
+   * 
+   * @param pointA
+   * @param pointB
+   * @return dist
+   */
+  private static double distance(double[] pointA, double[] pointB) {
+    double distance = 0.0;
+    for (int i = 0; i < pointA.length; i++) {
+      double d = pointA[i] - pointB[i];
+      distance += d * d;
+    }
+    return Math.sqrt(distance);
+  }
+
+  /**
+   * k-means of (micro)clusters, with ground-truth-aided initialization. (to 
produce best results)
+   * 
+   * @param k
+   * @param data
+   * @return (macro)clustering - CFClusters
+   */
+  public static Clustering kMeans_gta(int k, Clustering clustering, Clustering 
gtClustering) {
+
+    ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>();
+    for (int i = 0; i < clustering.size(); i++) {
+      if (clustering.get(i) instanceof CFCluster) {
+        microclusters.add((CFCluster) clustering.get(i));
+      } else {
+        System.out.println("Unsupported Cluster Type:" + 
clustering.get(i).getClass()
+            + ". Cluster needs to extend moa.cluster.CFCluster");
+      }
+    }
+
+    int n = microclusters.size();
+    assert (k <= n);
+
+    /* k-means */
+    Random random = new Random(0);
+    Cluster[] centers = new Cluster[k];
+    int K = gtClustering.size();
+
+    for (int i = 0; i < k; i++) {
+      if (i < K) { // GT-aided
+        centers[i] = new SphereCluster(gtClustering.get(i).getCenter(), 0);
+      } else { // Randomized
+        int rid = random.nextInt(n);
+        centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0);
+      }
+    }
+
+    return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters);
+  }
+
+  /**
+   * k-means of (micro)clusters, with randomized initialization.
+   * 
+   * @param k
+   * @param data
+   * @return (macro)clustering - CFClusters
+   */
+  public static Clustering kMeans_rand(int k, Clustering clustering) {
+
+    ArrayList<CFCluster> microclusters = new ArrayList<CFCluster>();
+    for (int i = 0; i < clustering.size(); i++) {
+      if (clustering.get(i) instanceof CFCluster) {
+        microclusters.add((CFCluster) clustering.get(i));
+      } else {
+        System.out.println("Unsupported Cluster Type:" + 
clustering.get(i).getClass()
+            + ". Cluster needs to extend moa.cluster.CFCluster");
+      }
+    }
+
+    int n = microclusters.size();
+    assert (k <= n);
+
+    /* k-means */
+    Random random = new Random(0);
+    Cluster[] centers = new Cluster[k];
+
+    for (int i = 0; i < k; i++) {
+      int rid = random.nextInt(n);
+      centers[i] = new SphereCluster(microclusters.get(rid).getCenter(), 0);
+    }
+
+    return cleanUpKMeans(kMeans(k, centers, microclusters), microclusters);
+  }
+
+  /**
+   * (The Actual Algorithm) k-means of (micro)clusters, with specified 
initialization points.
+   * 
+   * @param k
+   * @param centers
+   *          - initial centers
+   * @param data
+   * @return (macro)clustering - SphereClusters
+   */
+  protected static Clustering kMeans(int k, Cluster[] centers, List<? extends 
Cluster> data) {
+    assert (centers.length == k);
+    assert (k > 0);
+
+    int dimensions = centers[0].getCenter().length;
+
+    ArrayList<ArrayList<Cluster>> clustering = new 
ArrayList<ArrayList<Cluster>>();
+    for (int i = 0; i < k; i++) {
+      clustering.add(new ArrayList<Cluster>());
+    }
+
+    while (true) {
+      // Assign points to clusters
+      for (Cluster point : data) {
+        double minDistance = distance(point.getCenter(), 
centers[0].getCenter());
+        int closestCluster = 0;
+        for (int i = 1; i < k; i++) {
+          double distance = distance(point.getCenter(), 
centers[i].getCenter());
+          if (distance < minDistance) {
+            closestCluster = i;
+            minDistance = distance;
+          }
+        }
+
+        clustering.get(closestCluster).add(point);
+      }
+
+      // Calculate new centers and clear clustering lists
+      SphereCluster[] newCenters = new SphereCluster[centers.length];
+      for (int i = 0; i < k; i++) {
+        newCenters[i] = calculateCenter(clustering.get(i), dimensions);
+        clustering.get(i).clear();
+      }
+
+      // Convergence check
+      boolean converged = true;
+      for (int i = 0; i < k; i++) {
+        if (!Arrays.equals(centers[i].getCenter(), newCenters[i].getCenter())) 
{
+          converged = false;
+          break;
+        }
+      }
+
+      if (converged) {
+        break;
+      } else {
+        centers = newCenters;
+      }
+    }
+
+    return new Clustering(centers);
+  }
+
+  /**
+   * Rearrange the k-means result into a set of CFClusters, cleaning up the 
redundancies.
+   * 
+   * @param kMeansResult
+   * @param microclusters
+   * @return
+   */
+  protected static Clustering cleanUpKMeans(Clustering kMeansResult, 
ArrayList<CFCluster> microclusters) {
+    /* Convert k-means result to CFClusters */
+    int k = kMeansResult.size();
+    CFCluster[] converted = new CFCluster[k];
+
+    for (CFCluster mc : microclusters) {
+      // Find closest kMeans cluster
+      double minDistance = Double.MAX_VALUE;
+      int closestCluster = 0;
+      for (int i = 0; i < k; i++) {
+        double distance = distance(kMeansResult.get(i).getCenter(), 
mc.getCenter());
+        if (distance < minDistance) {
+          closestCluster = i;
+          minDistance = distance;
+        }
+      }
+
+      // Add to cluster
+      if (converted[closestCluster] == null) {
+        converted[closestCluster] = (CFCluster) mc.copy();
+      } else {
+        converted[closestCluster].add(mc);
+      }
+    }
+
+    // Clean up
+    int count = 0;
+    for (int i = 0; i < converted.length; i++) {
+      if (converted[i] != null)
+        count++;
+    }
+
+    CFCluster[] cleaned = new CFCluster[count];
+    count = 0;
+    for (int i = 0; i < converted.length; i++) {
+      if (converted[i] != null)
+        cleaned[count++] = converted[i];
+    }
+
+    return new Clustering(cleaned);
+  }
+
+  /**
+   * k-means helper: Calculate a wrapping cluster of assigned 
points[microclusters].
+   * 
+   * @param assigned
+   * @param dimensions
+   * @return SphereCluster (with center and radius)
+   */
+  private static SphereCluster calculateCenter(ArrayList<Cluster> assigned, 
int dimensions) {
+    double[] result = new double[dimensions];
+    for (int i = 0; i < result.length; i++) {
+      result[i] = 0.0;
+    }
+
+    if (assigned.size() == 0) {
+      return new SphereCluster(result, 0.0);
+    }
+
+    for (Cluster point : assigned) {
+      double[] center = point.getCenter();
+      for (int i = 0; i < result.length; i++) {
+        result[i] += center[i];
+      }
+    }
+
+    // Normalize
+    for (int i = 0; i < result.length; i++) {
+      result[i] /= assigned.size();
+    }
+
+    // Calculate radius: biggest wrapping distance from center
+    double radius = 0.0;
+    for (Cluster point : assigned) {
+      double dist = distance(result, point.getCenter());
+      if (dist > radius) {
+        radius = dist;
+      }
+    }
+    SphereCluster sc = new SphereCluster(result, radius);
+    sc.setWeight(assigned.size());
+    return sc;
+  }
+
+  /** Miscellaneous **/
+
+  @Override
+  public boolean implementsMicroClusterer() {
+    return true;
+  }
+
+  public boolean isRandomizable() {
+    return false;
+  }
+
+  public double[] getVotesForInstance(Instance inst) {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+  @Override
+  protected Measurement[] getModelMeasurementsImpl() {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+
+  @Override
+  public void getModelDescription(StringBuilder out, int indent) {
+    throw new UnsupportedOperationException("Not supported yet.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java 
b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java
new file mode 100644
index 0000000..e141fde
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoClassDiscovery.java
@@ -0,0 +1,196 @@
+package org.apache.samoa.moa.core;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+/**
+ * Class for discovering classes via reflection in the java class path.
+ * 
+ * @author Richard Kirkby ([email protected])
+ * @version $Revision: 7 $
+ */
+public class AutoClassDiscovery {
+
+  protected static final Map<String, String[]> cachedClassNames = new 
HashMap<String, String[]>();
+
+  public static String[] findClassNames(String packageNameToSearch) {
+    String[] cached = cachedClassNames.get(packageNameToSearch);
+    if (cached == null) {
+      HashSet<String> classNames = new HashSet<String>();
+      /*
+       * StringTokenizer pathTokens = new StringTokenizer(System
+       * .getProperty("java.class.path"), File.pathSeparator);
+       */
+      String packageDirName = packageNameToSearch.replace('.',
+          File.separatorChar);
+      String packageJarName = packageNameToSearch.length() > 0 ? 
(packageNameToSearch.replace('.', '/') + "/")
+          : "";
+      String part = "";
+
+      AutoClassDiscovery adc = new AutoClassDiscovery();
+      URLClassLoader sysLoader = (URLClassLoader) 
adc.getClass().getClassLoader();
+      URL[] cl_urls = sysLoader.getURLs();
+
+      for (int i = 0; i < cl_urls.length; i++) {
+        part = cl_urls[i].toString();
+        if (part.startsWith("file:")) {
+          part = part.replace(" ", "%20");
+          try {
+            File temp = new File(new java.net.URI(part));
+            part = temp.getAbsolutePath();
+          } catch (URISyntaxException e) {
+            e.printStackTrace();
+          }
+        }
+
+        // find classes
+        ArrayList<File> files = new ArrayList<File>();
+        File dir = new File(part);
+        if (dir.isDirectory()) {
+          File root = new File(dir.toString() + File.separatorChar + 
packageDirName);
+          String[] names = findClassesInDirectoryRecursive(root, "");
+          classNames.addAll(Arrays.asList(names));
+        } else {
+          try {
+            JarFile jar = new JarFile(part);
+            Enumeration<JarEntry> jarEntries = jar.entries();
+            while (jarEntries.hasMoreElements()) {
+              String jarEntry = jarEntries.nextElement().getName();
+              if (jarEntry.startsWith(packageJarName)) {
+                String relativeName = 
jarEntry.substring(packageJarName.length());
+                if (relativeName.endsWith(".class")) {
+                  relativeName = relativeName.replace('/',
+                      '.');
+                  classNames.add(relativeName.substring(0,
+                      relativeName.length()
+                          - ".class".length()));
+                }
+              }
+            }
+          } catch (IOException ignored) {
+            // ignore unreadable files
+          }
+        }
+      }
+
+      /*
+       * while (pathTokens.hasMoreElements()) { String pathToSearch =
+       * pathTokens.nextElement().toString(); if 
(pathToSearch.endsWith(".jar"))
+       * { try { JarFile jar = new JarFile(pathToSearch); Enumeration<JarEntry>
+       * jarEntries = jar.entries(); while (jarEntries.hasMoreElements()) {
+       * String jarEntry = jarEntries.nextElement() .getName(); if
+       * (jarEntry.startsWith(packageJarName)) { String relativeName = jarEntry
+       * .substring(packageJarName.length()); if
+       * (relativeName.endsWith(".class")) { relativeName =
+       * relativeName.replace('/', '.');
+       * classNames.add(relativeName.substring(0, relativeName.length() -
+       * ".class".length())); } } } } catch (IOException ignored) { // ignore
+       * unreadable files } } else { File root = new File(pathToSearch +
+       * File.separatorChar + packageDirName); String[] names =
+       * findClassesInDirectoryRecursive(root, ""); for (String name : names) {
+       * classNames.add(name); } } }
+       */
+      cached = classNames.toArray(new String[classNames.size()]);
+      Arrays.sort(cached);
+      cachedClassNames.put(packageNameToSearch, cached);
+    }
+    return cached;
+  }
+
+  protected static String[] findClassesInDirectoryRecursive(File root,
+      String packagePath) {
+    HashSet<String> classNames = new HashSet<String>();
+    if (root.isDirectory()) {
+      String[] list = root.list();
+      for (String string : list) {
+        if (string.endsWith(".class")) {
+          classNames.add(packagePath
+              + string.substring(0, string.length()
+                  - ".class".length()));
+        } else {
+          File testDir = new File(root.getPath() + File.separatorChar
+              + string);
+          if (testDir.isDirectory()) {
+            String[] names = findClassesInDirectoryRecursive(
+                testDir, packagePath + string + ".");
+            classNames.addAll(Arrays.asList(names));
+          }
+        }
+      }
+    }
+    return classNames.toArray(new String[classNames.size()]);
+  }
+
+  public static Class[] findClassesOfType(String packageNameToSearch,
+      Class<?> typeDesired) {
+    ArrayList<Class<?>> classesFound = new ArrayList<Class<?>>();
+    String[] classNames = findClassNames(packageNameToSearch);
+    for (String className : classNames) {
+      String fullName = packageNameToSearch.length() > 0 ? (packageNameToSearch
+          + "." + className)
+          : className;
+      if (isPublicConcreteClassOfType(fullName, typeDesired)) {
+        try {
+          classesFound.add(Class.forName(fullName));
+        } catch (Exception ignored) {
+          // ignore classes that we cannot instantiate
+        }
+      }
+    }
+    return classesFound.toArray(new Class[classesFound.size()]);
+  }
+
+  public static boolean isPublicConcreteClassOfType(String className,
+      Class<?> typeDesired) {
+    Class<?> testClass = null;
+    try {
+      testClass = Class.forName(className);
+    } catch (Exception e) {
+      return false;
+    }
+    int classModifiers = testClass.getModifiers();
+    return (java.lang.reflect.Modifier.isPublic(classModifiers)
+        && !java.lang.reflect.Modifier.isAbstract(classModifiers)
+        && typeDesired.isAssignableFrom(testClass) && 
hasEmptyConstructor(testClass));
+  }
+
+  public static boolean hasEmptyConstructor(Class<?> type) {
+    try {
+      type.getConstructor();
+      return true;
+    } catch (Exception ignored) {
+      return false;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java 
b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java
new file mode 100644
index 0000000..fce23dc
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/AutoExpandVector.java
@@ -0,0 +1,133 @@
+package org.apache.samoa.moa.core;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.apache.samoa.moa.AbstractMOAObject;
+import org.apache.samoa.moa.MOAObject;
+
+/**
+ * Vector with the capability of automatic expansion.
+ * 
+ * @author Richard Kirkby ([email protected])
+ * @version $Revision: 7 $
+ */
+public class AutoExpandVector<T> extends ArrayList<T> implements MOAObject {
+
+  private static final long serialVersionUID = 1L;
+
+  public AutoExpandVector() {
+    super(0);
+  }
+
+  public AutoExpandVector(int size) {
+    super(size);
+  }
+
+  @Override
+  public void add(int pos, T obj) {
+    if (pos > size()) {
+      while (pos > size()) {
+        add(null);
+      }
+      trimToSize();
+    }
+    super.add(pos, obj);
+  }
+
+  @Override
+  public T get(int pos) {
+    return ((pos >= 0) && (pos < size())) ? super.get(pos) : null;
+  }
+
+  @Override
+  public T set(int pos, T obj) {
+    if (pos >= size()) {
+      add(pos, obj);
+      return null;
+    }
+    return super.set(pos, obj);
+  }
+
+  @Override
+  public boolean add(T arg0) {
+    boolean result = super.add(arg0);
+    trimToSize();
+    return result;
+  }
+
+  @Override
+  public boolean addAll(Collection<? extends T> arg0) {
+    boolean result = super.addAll(arg0);
+    trimToSize();
+    return result;
+  }
+
+  @Override
+  public boolean addAll(int arg0, Collection<? extends T> arg1) {
+    boolean result = super.addAll(arg0, arg1);
+    trimToSize();
+    return result;
+  }
+
+  @Override
+  public void clear() {
+    super.clear();
+    trimToSize();
+  }
+
+  @Override
+  public T remove(int arg0) {
+    T result = super.remove(arg0);
+    trimToSize();
+    return result;
+  }
+
+  @Override
+  public boolean remove(Object arg0) {
+    boolean result = super.remove(arg0);
+    trimToSize();
+    return result;
+  }
+
+  @Override
+  protected void removeRange(int arg0, int arg1) {
+    super.removeRange(arg0, arg1);
+    trimToSize();
+  }
+
+  @Override
+  public MOAObject copy() {
+    return AbstractMOAObject.copy(this);
+  }
+
+  @Override
+  public int measureByteSize() {
+    return AbstractMOAObject.measureByteSize(this);
+  }
+
+  @Override
+  public void getDescription(StringBuilder sb, int indent) {
+    // TODO Auto-generated method stub
+  }
+}

Reply via email to