http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
index 53fb4dc..c31fa74 100644
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
@@ -33,811 +33,828 @@ import java.util.Iterator;
  * 
  * CMM: Ground truth analysis
  * 
- * Reference: Kremer et al., "An Effective Evaluation Measure for Clustering 
on Evolving Data Streams", KDD, 2011
+ * Reference: Kremer et al.,
+ * "An Effective Evaluation Measure for Clustering on Evolving Data Streams",
+ * KDD, 2011
  * 
- * @author Timm jansen
- * Data Management and Data Exploration Group, RWTH Aachen University
-*/
+ * @author Timm jansen Data Management and Data Exploration Group, RWTH Aachen
+ *         University
+ */
 
 /*
- * TODO:
- * - try to avoid calcualting the radius multiple times
- * - avoid the full distance map?
- * - knn functionality in clusters
- * - noise error
+ * TODO: - try to avoid calcualting the radius multiple times - avoid the full
+ * distance map? - knn functionality in clusters - noise error
  */
-public class CMM_GTAnalysis{
-       
-    /**
-     * the given ground truth clustering
-     */
-    private Clustering gtClustering;
-    
-    /**
-     * list of given points within the horizon  
-     */
-    private ArrayList<CMMPoint> cmmpoints;
-    
+public class CMM_GTAnalysis {
+
+  /**
+   * the given ground truth clustering
+   */
+  private Clustering gtClustering;
+
+  /**
+   * list of given points within the horizon
+   */
+  private ArrayList<CMMPoint> cmmpoints;
+
+  /**
+   * the newly calculate ground truth clustering
+   */
+  private ArrayList<GTCluster> gt0Clusters;
+
+  /**
+   * IDs of noise points
+   */
+  private ArrayList<Integer> noise;
+
+  /**
+   * total number of points
+   */
+  private int numPoints;
+
+  /**
+   * number of clusters of the original ground truth
+   */
+  private int numGTClusters;
+
+  /**
+   * number of classes of the original ground truth, in case of a micro
+   * clustering ground truth this differs from numGTClusters
+   */
+  private int numGTClasses;
+
+  /**
+   * number of classes after we are done with the analysis
+   */
+  private int numGT0Classes;
+
+  /**
+   * number of dimensions
+   */
+  private int numDims;
+
+  /**
+   * mapping between true cluster ID/class label of the original ground truth
+   * and the internal cluster ID/working class label.
+   * 
+   * different original cluster IDs might map to the same new cluster ID due to
+   * merging of two clusters
+   */
+  private HashMap<Integer, Integer> mapTrueLabelToWorkLabel;
+
+  /**
+   * log of how clusters have been merged (for debugging)
+   */
+  private int[] mergeMap;
+
+  /**
+   * number of non-noise points that will create an error due to the underlying
+   * clustering model (e.g. point being covered by two clusters representing
+   * different classes)
+   */
+  private int noiseErrorByModel;
+
+  /**
+   * number of noise points that will create an error due to the underlying
+   * clustering model (e.g. noise point being covered by a cluster)
+   */
+  private int pointErrorByModel;
+
+  /**
+   * CMM debug mode
+   */
+  private boolean debug = false;
+
+  /******* CMM parameter ***********/
+
+  /**
+   * defines how many nearest neighbors will be used
+   */
+  private int knnNeighbourhood = 2;
+
+  /**
+   * the threshold which defines when ground truth clusters will be merged. set
+   * to 1 to disable merging
+   */
+  private double tauConnection = 0.5;
+
+  /**
+   * experimental (default: disabled) separate k for points to cluster and
+   * cluster to cluster
+   */
+  private double clusterConnectionMaxPoints = knnNeighbourhood;
+
+  /**
+   * experimental (default: disabled) use exponential connectivity function to
+   * model different behavior: closer points will have a stronger connection
+   * compared to the linear function. Use ConnRefXValue and ConnX to better
+   * parameterize lambda, which controls the decay of the connectivity
+   */
+  private boolean useExpConnectivity = false;
+  private double lambdaConnRefXValue = 0.01;
+  private double lambdaConnX = 4;
+  private double lamdaConn;
+
+  /******************************************/
+
+  /**
+   * Wrapper class for data points to store CMM relevant attributes
+   * 
+   */
+  protected class CMMPoint extends DataPoint {
     /**
-     * the newly calculate ground truth clustering
+     * Reference to original point
      */
-    private ArrayList<GTCluster> gt0Clusters;
+    protected DataPoint p = null;
 
     /**
-     * IDs of noise points
+     * point ID
      */
-    private ArrayList<Integer> noise;
-    
-    /**
-     * total number of points
-     */
-    private int numPoints;
+    protected int pID = 0;
 
     /**
-     * number of clusters of the original ground truth 
+     * true class label
      */
-    private int numGTClusters;
+    protected int trueClass = -1;
 
     /**
-     *    number of classes of the original ground truth, in case of a 
-     *    micro clustering ground truth this differs from numGTClusters
+     * the connectivity of the point to its cluster
      */
-    private int numGTClasses;
+    protected double connectivity = 1.0;
 
     /**
-     * number of classes after we are done with the analysis 
+     * knn distnace within own cluster
      */
-    private int numGT0Classes;
+    protected double knnInCluster = 0.0;
 
     /**
-     * number of dimensions
+     * knn indices (for debugging only)
      */
-    private int numDims;
+    protected ArrayList<Integer> knnIndices;
+
+    public CMMPoint(DataPoint point, int id) {
+      // make a copy, but keep reference
+      super(point, point.getTimestamp());
+      p = point;
+      pID = id;
+      trueClass = (int) point.classValue();
+    }
 
     /**
-     * mapping between true cluster ID/class label of the original ground 
truth 
-     * and the internal cluster ID/working class label. 
+     * Retruns the current working label of the cluster the point belongs to.
+     * The label can change due to merging of clusters.
      * 
-     * different original cluster IDs might map to the same new cluster ID due 
to merging of two clusters 
+     * @return the current working class label
      */
-    private HashMap<Integer, Integer> mapTrueLabelToWorkLabel;
+    protected int workclass() {
+      if (trueClass == -1)
+        return -1;
+      else
+        return mapTrueLabelToWorkLabel.get(trueClass);
+    }
+  }
 
-    /**
-     * log of how clusters have been merged (for debugging)
-     */
-    private int[] mergeMap;
+  /**
+   * Main class to model the new clusters that will be the output of the 
cluster
+   * analysis
+   * 
+   */
+  protected class GTCluster {
+    /** points that are per definition in the cluster */
+    private ArrayList<Integer> points = new ArrayList<Integer>();
 
     /**
-     * number of non-noise points that will create an error due to the 
underlying clustering model
-     * (e.g. point being covered by two clusters representing different 
classes)
+     * a new GT cluster consists of one or more "old" GT clusters.
+     * Connected/overlapping clusters cannot be merged directly because of the
+     * underlying cluster model. E.g. for merging two spherical clusters the 
new
+     * cluster sphere can cover a lot more space then two separate smaller
+     * spheres. To keep the original coverage we need to keep the orignal
+     * clusters and merge them on an abstract level.
      */
-    private int noiseErrorByModel;
+    private ArrayList<Integer> clusterRepresentations = new 
ArrayList<Integer>();
 
-    /**
-     * number of noise points that will create an error due to the underlying 
clustering model
-     * (e.g. noise point being covered by a cluster)
-     */
-    private int pointErrorByModel;    
-    
-    /**
-     * CMM debug mode
-     */
-    private boolean debug = false;
+    /** current work class (changes when merging) */
+    private int workclass;
 
-    
-    /******* CMM parameter ***********/
+    /** original work class */
+    private final int orgWorkClass;
 
-    /**
-     * defines how many nearest neighbors will be used
-     */
-    private int knnNeighbourhood = 2;
+    /** original class label */
+    private final int label;
 
-    /**
-     * the threshold which defines when ground truth clusters will be merged.
-     * set to 1 to disable merging 
-     */
-    private double tauConnection = 0.5;
-    
-    /**
-     *  experimental (default: disabled)
-     *  separate k for points to cluster and cluster to cluster 
-     */
-    private double clusterConnectionMaxPoints = knnNeighbourhood;
-    
-    /** 
-     * experimental (default: disabled)
-     * use exponential connectivity function to model different behavior: 
-     * closer points will have a stronger connection compared to the linear 
function.
-     * Use ConnRefXValue and ConnX to better parameterize lambda, which 
controls 
-     * the decay of the connectivity
-     */
-    private boolean useExpConnectivity = false;
-    private double lambdaConnRefXValue = 0.01;
-    private double lambdaConnX = 4;
-    private double lamdaConn;
-    
-    
-    /******************************************/
-    
-    
-    /**
-     * Wrapper class for data points to store CMM relevant attributes
-     *
-     */
-    protected class CMMPoint extends DataPoint{
-        /**
-         * Reference to original point
-         */
-        protected DataPoint p = null;
-        
-        /**
-         * point ID
-         */
-        protected int pID = 0;
-        
-        
-        /**
-         * true class label
-         */
-        protected int trueClass = -1;
-
-        
-        /**
-         * the connectivity of the point to its cluster
-         */
-        protected double connectivity = 1.0;
-        
-        
-        /**
-         * knn distnace within own cluster
-         */
-        protected double knnInCluster = 0.0; 
-        
-        
-        /**
-         * knn indices (for debugging only)
-         */
-        protected ArrayList<Integer> knnIndices;
-
-        public CMMPoint(DataPoint point, int id) {
-            //make a copy, but keep reference
-            super(point,point.getTimestamp());
-            p = point;
-            pID = id;
-            trueClass = (int)point.classValue();
-        }
+    /** clusters that have been merged into this cluster (debugging) */
+    private ArrayList<Integer> mergedWorkLabels = null;
 
-        
-        /**
-         * Retruns the current working label of the cluster the point belongs 
to. 
-         * The label can change due to merging of clusters.  
-         * 
-         * @return the current working class label
-         */
-        protected int workclass(){
-            if(trueClass == -1 )
-                return -1;
-            else
-                return mapTrueLabelToWorkLabel.get(trueClass);
-        }
+    /** average knn distance of all points in the cluster */
+    private double knnMeanAvg = 0;
+
+    /** average deviation of knn distance of all points */
+    private double knnDevAvg = 0;
+
+    /** connectivity of the cluster to all other clusters */
+    private ArrayList<Double> connections = new ArrayList<Double>();
+
+    private GTCluster(int workclass, int label, int gtClusteringID) {
+      this.orgWorkClass = workclass;
+      this.workclass = workclass;
+      this.label = label;
+      this.clusterRepresentations.add(gtClusteringID);
     }
 
-    
-    
     /**
-     * Main class to model the new clusters that will be the output of the 
cluster analysis
-     *
+     * The original class label the cluster represents
+     * 
+     * @return original class label
      */
-    protected class GTCluster{
-       /** points that are per definition in the cluster */
-        private ArrayList<Integer> points = new ArrayList<Integer>();
-        
-        /** a new GT cluster consists of one or more "old" GT clusters. 
-         * Connected/overlapping clusters cannot be merged directly because of 
the 
-         * underlying cluster model. E.g. for merging two spherical clusters 
the new 
-         * cluster sphere can cover a lot more space then two separate smaller 
spheres. 
-         * To keep the original coverage we need to keep the orignal clusters 
and merge
-         * them on an abstract level. */
-        private ArrayList<Integer> clusterRepresentations = new 
ArrayList<Integer>();
-     
-        /** current work class (changes when merging) */
-        private int workclass;
-        
-        /** original work class */
-        private final int orgWorkClass;
-        
-        /** original class label*/
-        private final int label;
-        
-        /** clusters that have been merged into this cluster (debugging)*/
-        private ArrayList<Integer> mergedWorkLabels = null;
-        
-        /** average knn distance of all points in the cluster*/
-        private double knnMeanAvg = 0;
-        
-        /** average deviation of knn distance of all points*/
-        private double knnDevAvg = 0;
-        
-        /** connectivity of the cluster to all other clusters */
-        private ArrayList<Double> connections = new ArrayList<Double>();
-        
-
-        private GTCluster(int workclass, int label, int gtClusteringID) {
-           this.orgWorkClass = workclass;
-           this.workclass = workclass;
-           this.label = label;
-           this.clusterRepresentations.add(gtClusteringID);
-        }
+    protected int getLabel() {
+      return label;
+    }
 
-        
-        /**
-         * The original class label the cluster represents
-         * @return original class label
-         */
-        protected int getLabel(){
-            return label;
-        }
+    /**
+     * Calculate the probability of the point being covered through the cluster
+     * 
+     * @param point
+     *          to calculate the probability for
+     * @return probability of the point being covered through the cluster
+     */
+    protected double getInclusionProbability(CMMPoint point) {
+      double prob = Double.MIN_VALUE;
+      // check all cluster representatives for coverage
+      for (int c = 0; c < clusterRepresentations.size(); c++) {
+        double tmp_prob = 
gtClustering.get(clusterRepresentations.get(c)).getInclusionProbability(point);
+        if (tmp_prob > prob)
+          prob = tmp_prob;
+      }
+      return prob;
+    }
 
-        /**
-         * Calculate the probability of the point being covered through the 
cluster
-         * @param point to calculate the probability for
-         * @return probability of the point being covered through the cluster
-         */
-        protected double getInclusionProbability(CMMPoint point){
-            double prob = Double.MIN_VALUE;
-            //check all cluster representatives for coverage
-            for (int c = 0; c < clusterRepresentations.size(); c++) {
-               double tmp_prob = 
gtClustering.get(clusterRepresentations.get(c)).getInclusionProbability(point);
-               if(tmp_prob > prob) prob = tmp_prob;
-            }
-            return prob;
+    /**
+     * calculate knn distances of points within own cluster + average knn
+     * distance and average knn distance deviation of all points
+     */
+    private void calculateKnn() {
+      for (int p0 : points) {
+        CMMPoint cmdp = cmmpoints.get(p0);
+        if (!cmdp.isNoise()) {
+          AutoExpandVector<Double> knnDist = new AutoExpandVector<Double>();
+          AutoExpandVector<Integer> knnPointIndex = new 
AutoExpandVector<Integer>();
+
+          // calculate nearest neighbours
+          getKnnInCluster(cmdp, knnNeighbourhood, points, knnDist, 
knnPointIndex);
+
+          // TODO: What to do if we have less then k neighbours?
+          double avgKnn = 0;
+          for (int i = 0; i < knnDist.size(); i++) {
+            avgKnn += knnDist.get(i);
+          }
+          if (knnDist.size() != 0)
+            avgKnn /= knnDist.size();
+          cmdp.knnInCluster = avgKnn;
+          cmdp.knnIndices = knnPointIndex;
+          cmdp.p.setMeasureValue("knnAvg", cmdp.knnInCluster);
+
+          knnMeanAvg += avgKnn;
+          knnDevAvg += Math.pow(avgKnn, 2);
         }
+      }
+      knnMeanAvg = knnMeanAvg / (double) points.size();
+      knnDevAvg = knnDevAvg / (double) points.size();
 
-        
-        /**
-         * calculate knn distances of points within own cluster 
-         * + average knn distance and average knn distance deviation of all 
points 
-         */
-        private void calculateKnn(){
-            for (int p0 : points) {
-                CMMPoint cmdp = cmmpoints.get(p0);
-                if(!cmdp.isNoise()){
-                    AutoExpandVector<Double> knnDist = new 
AutoExpandVector<Double>();
-                    AutoExpandVector<Integer> knnPointIndex = new 
AutoExpandVector<Integer>();
-                    
-                    //calculate nearest neighbours 
-                    getKnnInCluster(cmdp, knnNeighbourhood, points, 
knnDist,knnPointIndex);
-
-                    //TODO: What to do if we have less then k neighbours?
-                    double avgKnn = 0;
-                    for (int i = 0; i < knnDist.size(); i++) {
-                        avgKnn+= knnDist.get(i);
-                    }
-                    if(knnDist.size()!=0)
-                        avgKnn/=knnDist.size();
-                    cmdp.knnInCluster = avgKnn;
-                    cmdp.knnIndices = knnPointIndex;
-                    cmdp.p.setMeasureValue("knnAvg", cmdp.knnInCluster);
-
-                    knnMeanAvg+=avgKnn;
-                    knnDevAvg+=Math.pow(avgKnn,2);
-                }
-            }
-            knnMeanAvg=knnMeanAvg/(double)points.size();
-            knnDevAvg=knnDevAvg/(double)points.size();
-
-            double variance = knnDevAvg-Math.pow(knnMeanAvg,2.0);
-            // Due to numerical errors, small negative values can occur.
-            if (variance <= 0.0) variance = 1e-50;
-            knnDevAvg = Math.sqrt(variance);
+      double variance = knnDevAvg - Math.pow(knnMeanAvg, 2.0);
+      // Due to numerical errors, small negative values can occur.
+      if (variance <= 0.0)
+        variance = 1e-50;
+      knnDevAvg = Math.sqrt(variance);
 
-        }
+    }
 
-        
-        /**
-         * Calculate the connection of a cluster to this cluster
-         * @param otherCid cluster id of the other cluster
-         * @param initial flag for initial run
-         */
-        private void calculateClusterConnection(int otherCid, boolean initial){
-            double avgConnection = 0;
-            if(workclass==otherCid){
-                avgConnection = 1;
-            }
-            else{
-                AutoExpandVector<Double> kmax = new AutoExpandVector<Double>();
-                AutoExpandVector<Integer> kmaxIndexes = new 
AutoExpandVector<Integer>();
-
-                for(int p : points){
-                    CMMPoint cmdp = cmmpoints.get(p);
-                    double con_p_Cj = getConnectionValue(cmmpoints.get(p), 
otherCid);
-                    double connection = cmdp.connectivity * con_p_Cj;
-                    if(initial){
-                        cmdp.p.setMeasureValue("Connection to C"+otherCid, 
con_p_Cj);
-                    }
-
-                    //connection
-                    if(kmax.size() < clusterConnectionMaxPoints || connection 
> kmax.get(kmax.size()-1)){
-                        int index = 0;
-                        while(index < kmax.size() && connection < 
kmax.get(index)) {
-                            index++;
-                        }
-                        kmax.add(index, connection);
-                        kmaxIndexes.add(index, p);
-                        if(kmax.size() > clusterConnectionMaxPoints){
-                            kmax.remove(kmax.size()-1);
-                            kmaxIndexes.add(kmaxIndexes.size()-1);
-                        }
-                    }
-                }
-                //connection
-                for (int k = 0; k < kmax.size(); k++) {
-                    avgConnection+= kmax.get(k);
-                }
-                avgConnection/=kmax.size();
+    /**
+     * Calculate the connection of a cluster to this cluster
+     * 
+     * @param otherCid
+     *          cluster id of the other cluster
+     * @param initial
+     *          flag for initial run
+     */
+    private void calculateClusterConnection(int otherCid, boolean initial) {
+      double avgConnection = 0;
+      if (workclass == otherCid) {
+        avgConnection = 1;
+      }
+      else {
+        AutoExpandVector<Double> kmax = new AutoExpandVector<Double>();
+        AutoExpandVector<Integer> kmaxIndexes = new 
AutoExpandVector<Integer>();
+
+        for (int p : points) {
+          CMMPoint cmdp = cmmpoints.get(p);
+          double con_p_Cj = getConnectionValue(cmmpoints.get(p), otherCid);
+          double connection = cmdp.connectivity * con_p_Cj;
+          if (initial) {
+            cmdp.p.setMeasureValue("Connection to C" + otherCid, con_p_Cj);
+          }
+
+          // connection
+          if (kmax.size() < clusterConnectionMaxPoints || connection > 
kmax.get(kmax.size() - 1)) {
+            int index = 0;
+            while (index < kmax.size() && connection < kmax.get(index)) {
+              index++;
             }
-
-            if(otherCid<connections.size()){
-                connections.set(otherCid, avgConnection);
+            kmax.add(index, connection);
+            kmaxIndexes.add(index, p);
+            if (kmax.size() > clusterConnectionMaxPoints) {
+              kmax.remove(kmax.size() - 1);
+              kmaxIndexes.add(kmaxIndexes.size() - 1);
             }
-            else
-                if(connections.size() == otherCid){
-                    connections.add(avgConnection);
-                }
-                else
-                    System.out.println("Something is going really wrong with 
the connection listing!"+knnNeighbourhood+" "+tauConnection);
+          }
         }
-
-        
-        /**
-         * Merge a cluster into this cluster
-         * @param mergeID the ID of the cluster to be merged
-         */
-        private void mergeCluster(int mergeID){
-            if(mergeID < gt0Clusters.size()){
-                //track merging (debugging)
-               for (int i = 0; i < numGTClasses; i++) {
-                    if(mergeMap[i]==mergeID)
-                        mergeMap[i]=workclass;
-                    if(mergeMap[i]>mergeID)
-                        mergeMap[i]--;
-                }
-                GTCluster gtcMerge  = gt0Clusters.get(mergeID);
-                if(debug)
-                    System.out.println("Merging C"+gtcMerge.workclass+" into 
C"+workclass+
-                            " with Con "+connections.get(mergeID)+" / 
"+gtcMerge.connections.get(workclass));
-
-
-                //update mapTrueLabelToWorkLabel
-                mapTrueLabelToWorkLabel.put(gtcMerge.label, workclass);
-                Iterator iterator = 
mapTrueLabelToWorkLabel.keySet().iterator();
-                while (iterator.hasNext()) {
-                    Integer key = (Integer)iterator.next();
-                    //update pointer of already merged cluster
-                    int value = mapTrueLabelToWorkLabel.get(key);
-                    if(value == mergeID)
-                        mapTrueLabelToWorkLabel.put(key, workclass);
-                    if(value > mergeID)
-                        mapTrueLabelToWorkLabel.put(key, value-1);
-                }
-
-                //merge points from B into A
-                points.addAll(gtcMerge.points);
-                clusterRepresentations.addAll(gtcMerge.clusterRepresentations);
-                if(mergedWorkLabels==null){
-                    mergedWorkLabels = new ArrayList<Integer>();
-                }
-                mergedWorkLabels.add(gtcMerge.orgWorkClass);
-                if(gtcMerge.mergedWorkLabels!=null)
-                    mergedWorkLabels.addAll(gtcMerge.mergedWorkLabels);
-
-                gt0Clusters.remove(mergeID);
-
-                //update workclass labels
-                for(int c=mergeID; c < gt0Clusters.size(); c++){
-                    gt0Clusters.get(c).workclass = c;
-                }
-
-                //update knn distances
-                calculateKnn();
-                for(int c=0; c < gt0Clusters.size(); c++){
-                    gt0Clusters.get(c).connections.remove(mergeID);
-                    
-                    //recalculate connection from other clusters to the new 
merged one
-                    
gt0Clusters.get(c).calculateClusterConnection(workclass,false);
-                    //and from new merged one to other clusters
-                    
gt0Clusters.get(workclass).calculateClusterConnection(c,false);
-                }
-            }
-            else{
-                System.out.println("Merge indices are not valid");
-            }
+        // connection
+        for (int k = 0; k < kmax.size(); k++) {
+          avgConnection += kmax.get(k);
         }
+        avgConnection /= kmax.size();
+      }
+
+      if (otherCid < connections.size()) {
+        connections.set(otherCid, avgConnection);
+      }
+      else if (connections.size() == otherCid) {
+        connections.add(avgConnection);
+      }
+      else
+        System.out.println("Something is going really wrong with the 
connection listing!" + knnNeighbourhood + " "
+            + tauConnection);
     }
 
-    
     /**
-     * @param trueClustering the ground truth clustering
-     * @param points data points
-     * @param enableClassMerge allow class merging (should be set to true on 
default)
+     * Merge a cluster into this cluster
+     * 
+     * @param mergeID
+     *          the ID of the cluster to be merged
      */
-    public CMM_GTAnalysis(Clustering trueClustering, ArrayList<DataPoint> 
points, boolean enableClassMerge){
-        if(debug)
-            System.out.println("GT Analysis Debug Output");
-
-        noiseErrorByModel = 0;
-        pointErrorByModel = 0;
-        if(!enableClassMerge){
-               tauConnection = 1.0;
-        }
-
-        lamdaConn = -Math.log(lambdaConnRefXValue)/Math.log(2)/lambdaConnX;
-        
-        this.gtClustering = trueClustering;
-
-        numPoints = points.size();
-        numDims = points.get(0).numAttributes()-1;
-        numGTClusters = gtClustering.size();
-
-        //init mappings between work and true labels
-        mapTrueLabelToWorkLabel = new HashMap<Integer, Integer>();
-        
-        //set up base of new clustering
-        gt0Clusters = new ArrayList<GTCluster>();
-        int numWorkClasses = 0;
-        //create label to worklabel mapping as real labels can be just a set 
of unordered integers
-        for (int i = 0; i < numGTClusters; i++) {
-            int label = (int)gtClustering.get(i).getGroundTruth();
-            if(!mapTrueLabelToWorkLabel.containsKey(label)){
-                gt0Clusters.add(new GTCluster(numWorkClasses,label,i));
-                mapTrueLabelToWorkLabel.put(label,numWorkClasses);
-                numWorkClasses++;
-            }
-            else{
-                
gt0Clusters.get(mapTrueLabelToWorkLabel.get(label)).clusterRepresentations.add(i);
-            }
-        }
-        numGTClasses = numWorkClasses;
-
-        mergeMap = new int[numGTClasses];
+    private void mergeCluster(int mergeID) {
+      if (mergeID < gt0Clusters.size()) {
+        // track merging (debugging)
         for (int i = 0; i < numGTClasses; i++) {
-            mergeMap[i]=i;
-        }
-
-        //create cmd point wrapper instances
-        cmmpoints = new ArrayList<CMMPoint>();
-        for (int p = 0; p < points.size(); p++) {
-            CMMPoint cmdp = new CMMPoint(points.get(p), p);
-            cmmpoints.add(cmdp);
+          if (mergeMap[i] == mergeID)
+            mergeMap[i] = workclass;
+          if (mergeMap[i] > mergeID)
+            mergeMap[i]--;
         }
-
-
-        //split points up into their GTClusters and Noise (according to class 
labels)
-        noise = new ArrayList<Integer>();
-        for (int p = 0; p < numPoints; p++) {
-            if(cmmpoints.get(p).isNoise()){
-                noise.add(p);
-            }
-            else{
-                gt0Clusters.get(cmmpoints.get(p).workclass()).points.add(p);
-            }
+        GTCluster gtcMerge = gt0Clusters.get(mergeID);
+        if (debug)
+          System.out.println("Merging C" + gtcMerge.workclass + " into C" + 
workclass +
+              " with Con " + connections.get(mergeID) + " / " + 
gtcMerge.connections.get(workclass));
+
+        // update mapTrueLabelToWorkLabel
+        mapTrueLabelToWorkLabel.put(gtcMerge.label, workclass);
+        Iterator iterator = mapTrueLabelToWorkLabel.keySet().iterator();
+        while (iterator.hasNext()) {
+          Integer key = (Integer) iterator.next();
+          // update pointer of already merged cluster
+          int value = mapTrueLabelToWorkLabel.get(key);
+          if (value == mergeID)
+            mapTrueLabelToWorkLabel.put(key, workclass);
+          if (value > mergeID)
+            mapTrueLabelToWorkLabel.put(key, value - 1);
         }
 
-        //calculate initial knnMean and knnDev
-        for (GTCluster gtc : gt0Clusters) {
-            gtc.calculateKnn();
+        // merge points from B into A
+        points.addAll(gtcMerge.points);
+        clusterRepresentations.addAll(gtcMerge.clusterRepresentations);
+        if (mergedWorkLabels == null) {
+          mergedWorkLabels = new ArrayList<Integer>();
         }
+        mergedWorkLabels.add(gtcMerge.orgWorkClass);
+        if (gtcMerge.mergedWorkLabels != null)
+          mergedWorkLabels.addAll(gtcMerge.mergedWorkLabels);
 
-        //calculate cluster connections
-        calculateGTClusterConnections();
+        gt0Clusters.remove(mergeID);
 
-        //calculate point connections with own clusters
-        calculateGTPointQualities();
-
-        if(debug)
-            System.out.println("GT Analysis Debug End");
-
-   }
-
-    /**
-     * Calculate the connection of a point to a cluster
-     *  
-     * @param cmmp the point to calculate the connection for
-     * @param clusterID the corresponding cluster
-     * @return the connection value
-     */
-    //TODO: Cache the connection value for a point to the different clusters???
-    protected double getConnectionValue(CMMPoint cmmp, int clusterID){
-        AutoExpandVector<Double> knnDist = new AutoExpandVector<Double>();
-        AutoExpandVector<Integer> knnPointIndex = new 
AutoExpandVector<Integer>();
-        
-        //calculate the knn distance of the point to the cluster
-        getKnnInCluster(cmmp, knnNeighbourhood, 
gt0Clusters.get(clusterID).points, knnDist, knnPointIndex);
-
-        //TODO: What to do if we have less then k neighbors?
-        double avgDist = 0;
-        for (int i = 0; i < knnDist.size(); i++) {
-            avgDist+= knnDist.get(i);
+        // update workclass labels
+        for (int c = mergeID; c < gt0Clusters.size(); c++) {
+          gt0Clusters.get(c).workclass = c;
         }
-        //what to do if we only have a single point???
-        if(knnDist.size()!=0)
-            avgDist/=knnDist.size();
-        else
-            return 0;
-
-        //get the upper knn distance of the cluster
-        double upperKnn = gt0Clusters.get(clusterID).knnMeanAvg + 
gt0Clusters.get(clusterID).knnDevAvg;
-        
-        /* calculate the connectivity based on knn distance of the point 
within the cluster
-           and the upper knn distance of the cluster*/ 
-        if(avgDist < upperKnn){
-            return 1;
-        }
-        else{
-            //value that should be reached at upperKnn distance
-            //Choose connection formula
-            double conn;
-            if(useExpConnectivity)
-                conn = Math.pow(2,-lamdaConn*(avgDist-upperKnn)/upperKnn);
-            else
-                conn = upperKnn/avgDist;
 
-            if(Double.isNaN(conn))
-                System.out.println("Connectivity NaN at 
"+cmmp.p.getTimestamp());
+        // update knn distances
+        calculateKnn();
+        for (int c = 0; c < gt0Clusters.size(); c++) {
+          gt0Clusters.get(c).connections.remove(mergeID);
 
-            return conn;
+          // recalculate connection from other clusters to the new merged one
+          gt0Clusters.get(c).calculateClusterConnection(workclass, false);
+          // and from new merged one to other clusters
+          gt0Clusters.get(workclass).calculateClusterConnection(c, false);
         }
+      }
+      else {
+        System.out.println("Merge indices are not valid");
+      }
     }
-
-    
-    /**
-     * @param cmmp point to calculate knn distance for
-     * @param k number of nearest neighbors to look for
-     * @param pointIDs list of point IDs to check
-     * @param knnDist sorted list of smallest knn distances (can already be 
filled to make updates possible)  
-     * @param knnPointIndex list of corresponding knn indices
-     */
-    private void getKnnInCluster(CMMPoint cmmp, int k,
-                                 ArrayList<Integer> pointIDs,
-                                 AutoExpandVector<Double> knnDist,
-                                 AutoExpandVector<Integer> knnPointIndex) {
-
-        //iterate over every point in the choosen cluster, cal distance and 
insert into list
-        for (int p1 = 0; p1 < pointIDs.size(); p1++) {
-            int pid = pointIDs.get(p1);
-            if(cmmp.pID == pid) continue;
-            double dist = distance(cmmp,cmmpoints.get(pid));
-            if(knnDist.size() < k || dist < knnDist.get(knnDist.size()-1)){
-                int index = 0;
-                while(index < knnDist.size() && dist > knnDist.get(index)) {
-                    index++;
-                }
-                knnDist.add(index, dist);
-                knnPointIndex.add(index,pid);
-                if(knnDist.size() > k){
-                    knnDist.remove(knnDist.size()-1);
-                    knnPointIndex.remove(knnPointIndex.size()-1);
-                }
-            }
-        }
+  }
+
+  /**
+   * @param trueClustering
+   *          the ground truth clustering
+   * @param points
+   *          data points
+   * @param enableClassMerge
+   *          allow class merging (should be set to true on default)
+   */
+  public CMM_GTAnalysis(Clustering trueClustering, ArrayList<DataPoint> 
points, boolean enableClassMerge) {
+    if (debug)
+      System.out.println("GT Analysis Debug Output");
+
+    noiseErrorByModel = 0;
+    pointErrorByModel = 0;
+    if (!enableClassMerge) {
+      tauConnection = 1.0;
     }
 
-
-    
-    /**
-     * calculate initial connectivities
-     */
-    private void calculateGTPointQualities(){
-        for (int p = 0; p < numPoints; p++) {
-            CMMPoint cmdp = cmmpoints.get(p);
-            if(!cmdp.isNoise()){
-                cmdp.connectivity = getConnectionValue(cmdp, cmdp.workclass());
-                cmdp.p.setMeasureValue("Connectivity", cmdp.connectivity);
-            }
-        }
+    lamdaConn = -Math.log(lambdaConnRefXValue) / Math.log(2) / lambdaConnX;
+
+    this.gtClustering = trueClustering;
+
+    numPoints = points.size();
+    numDims = points.get(0).numAttributes() - 1;
+    numGTClusters = gtClustering.size();
+
+    // init mappings between work and true labels
+    mapTrueLabelToWorkLabel = new HashMap<Integer, Integer>();
+
+    // set up base of new clustering
+    gt0Clusters = new ArrayList<GTCluster>();
+    int numWorkClasses = 0;
+    // create label to worklabel mapping as real labels can be just a set of
+    // unordered integers
+    for (int i = 0; i < numGTClusters; i++) {
+      int label = (int) gtClustering.get(i).getGroundTruth();
+      if (!mapTrueLabelToWorkLabel.containsKey(label)) {
+        gt0Clusters.add(new GTCluster(numWorkClasses, label, i));
+        mapTrueLabelToWorkLabel.put(label, numWorkClasses);
+        numWorkClasses++;
+      }
+      else {
+        
gt0Clusters.get(mapTrueLabelToWorkLabel.get(label)).clusterRepresentations.add(i);
+      }
     }
+    numGTClasses = numWorkClasses;
 
-    
-    
-    /**
-     * Calculate connections between clusters and merge clusters accordingly 
as 
-     * long as connections exceed threshold 
-     */
-    private void calculateGTClusterConnections(){
-        for (int c0 = 0; c0 < gt0Clusters.size(); c0++) {
-            for (int c1 = 0; c1 < gt0Clusters.size(); c1++) {
-                    gt0Clusters.get(c0).calculateClusterConnection(c1, true);
-            }
-        }
-
-        boolean changedConnection = true;
-        while(changedConnection){
-            if(debug){
-                System.out.println("Cluster Connection");
-                for (int c = 0; c < gt0Clusters.size(); c++) {
-                    System.out.print("C"+gt0Clusters.get(c).label+" --> ");
-                    for (int c1 = 0; c1 < 
gt0Clusters.get(c).connections.size(); c1++) {
-                        System.out.print(" C"+gt0Clusters.get(c1).label+": 
"+gt0Clusters.get(c).connections.get(c1));
-                    }
-                    System.out.println("");
-                }
-                System.out.println("");
-            }
-
-            double max = 0;
-            int maxIndexI = -1;
-            int maxIndexJ = -1;
-
-            changedConnection = false;
-            for (int c0 = 0; c0 < gt0Clusters.size(); c0++) {
-                for (int c1 = c0+1; c1 < gt0Clusters.size(); c1++) {
-                    if(c0==c1) continue;
-                        double min 
=Math.min(gt0Clusters.get(c0).connections.get(c1), 
gt0Clusters.get(c1).connections.get(c0));
-                        if(min > max){
-                            max = min;
-                            maxIndexI = c0;
-                            maxIndexJ = c1;
-                        }
-                }
-            }
-            if(maxIndexI!=-1 && max > tauConnection){
-                gt0Clusters.get(maxIndexI).mergeCluster(maxIndexJ);
-                if(debug)
-                    System.out.println("Merging "+maxIndexI+" and 
"+maxIndexJ+" because of connection "+max);
-
-                changedConnection = true;
-            }
-        }
-        numGT0Classes = gt0Clusters.size();
+    mergeMap = new int[numGTClasses];
+    for (int i = 0; i < numGTClasses; i++) {
+      mergeMap[i] = i;
     }
 
-    
-    /** 
-     * Calculates how well the original clusters are separable. 
-     * Small values indicate bad separability, values close to 1 indicate good 
separability 
-     * @return index of seperability 
-     */
-    public double getClassSeparability(){
-//        int totalConn = numGTClasses*(numGTClasses-1)/2;
-//        int mergedConn = 0;
-//        for(GTCluster gt : gt0Clusters){
-//            int merged = gt.clusterRepresentations.size();
-//            if(merged > 1)
-//                mergedConn+=merged * (merged-1)/2;
-//        }
-//        if(totalConn == 0)
-//            return 0;
-//        else
-//            return 1-mergedConn/(double)totalConn;
-        return numGT0Classes/(double)numGTClasses;
+    // create cmd point wrapper instances
+    cmmpoints = new ArrayList<CMMPoint>();
+    for (int p = 0; p < points.size(); p++) {
+      CMMPoint cmdp = new CMMPoint(points.get(p), p);
+      cmmpoints.add(cmdp);
+    }
 
+    // split points up into their GTClusters and Noise (according to class
+    // labels)
+    noise = new ArrayList<Integer>();
+    for (int p = 0; p < numPoints; p++) {
+      if (cmmpoints.get(p).isNoise()) {
+        noise.add(p);
+      }
+      else {
+        gt0Clusters.get(cmmpoints.get(p).workclass()).points.add(p);
+      }
     }
 
-    
-    /**
-     * Calculates how well noise is separable from the given clusters
-     * Small values indicate bad separability, values close to 1 indicate good 
separability
-     * @return index of noise separability
-     */
-    public double getNoiseSeparability(){
-        if(noise.isEmpty()) 
-            return 1;
-
-        double connectivity = 0;
-        for(int p : noise){
-            CMMPoint npoint = cmmpoints.get(p);
-            double maxConnection = 0;
-
-            //TODO: some kind of pruning possible. what about weighting?
-            for (int c = 0; c < gt0Clusters.size(); c++) {
-                double connection = getConnectionValue(npoint, c);
-                if(connection > maxConnection)
-                    maxConnection = connection;
-            }
-            connectivity+=maxConnection;
-            npoint.p.setMeasureValue("MaxConnection", maxConnection);
-        }
+    // calculate initial knnMean and knnDev
+    for (GTCluster gtc : gt0Clusters) {
+      gtc.calculateKnn();
+    }
 
-        return 1-(connectivity / noise.size());
+    // calculate cluster connections
+    calculateGTClusterConnections();
+
+    // calculate point connections with own clusters
+    calculateGTPointQualities();
+
+    if (debug)
+      System.out.println("GT Analysis Debug End");
+
+  }
+
+  /**
+   * Calculate the connection of a point to a cluster
+   * 
+   * @param cmmp
+   *          the point to calculate the connection for
+   * @param clusterID
+   *          the corresponding cluster
+   * @return the connection value
+   */
+  // TODO: Cache the connection value for a point to the different clusters???
+  protected double getConnectionValue(CMMPoint cmmp, int clusterID) {
+    AutoExpandVector<Double> knnDist = new AutoExpandVector<Double>();
+    AutoExpandVector<Integer> knnPointIndex = new AutoExpandVector<Integer>();
+
+    // calculate the knn distance of the point to the cluster
+    getKnnInCluster(cmmp, knnNeighbourhood, gt0Clusters.get(clusterID).points, 
knnDist, knnPointIndex);
+
+    // TODO: What to do if we have less then k neighbors?
+    double avgDist = 0;
+    for (int i = 0; i < knnDist.size(); i++) {
+      avgDist += knnDist.get(i);
     }
+    // what to do if we only have a single point???
+    if (knnDist.size() != 0)
+      avgDist /= knnDist.size();
+    else
+      return 0;
 
-    
-    /** 
-     * Calculates the relative number of errors being caused by the underlying 
cluster model
-     *  @return quality of the model 
+    // get the upper knn distance of the cluster
+    double upperKnn = gt0Clusters.get(clusterID).knnMeanAvg + 
gt0Clusters.get(clusterID).knnDevAvg;
+
+    /*
+     * calculate the connectivity based on knn distance of the point within the
+     * cluster and the upper knn distance of the cluster
      */
-    public double getModelQuality(){
-        for(int p = 0; p < numPoints; p++){
-            CMMPoint cmdp = cmmpoints.get(p);
-            for(int hc = 0; hc < numGTClusters;hc++){
-                if(gtClustering.get(hc).getGroundTruth() != cmdp.trueClass){
-                    if(gtClustering.get(hc).getInclusionProbability(cmdp) >= 
1){
-                        if(!cmdp.isNoise())
-                            pointErrorByModel++;
-                        else
-                            noiseErrorByModel++;
-                        break;
-                    }
-                }
-            }
+    if (avgDist < upperKnn) {
+      return 1;
+    }
+    else {
+      // value that should be reached at upperKnn distance
+      // Choose connection formula
+      double conn;
+      if (useExpConnectivity)
+        conn = Math.pow(2, -lamdaConn * (avgDist - upperKnn) / upperKnn);
+      else
+        conn = upperKnn / avgDist;
+
+      if (Double.isNaN(conn))
+        System.out.println("Connectivity NaN at " + cmmp.p.getTimestamp());
+
+      return conn;
+    }
+  }
+
+  /**
+   * @param cmmp
+   *          point to calculate knn distance for
+   * @param k
+   *          number of nearest neighbors to look for
+   * @param pointIDs
+   *          list of point IDs to check
+   * @param knnDist
+   *          sorted list of smallest knn distances (can already be filled to
+   *          make updates possible)
+   * @param knnPointIndex
+   *          list of corresponding knn indices
+   */
+  private void getKnnInCluster(CMMPoint cmmp, int k,
+      ArrayList<Integer> pointIDs,
+      AutoExpandVector<Double> knnDist,
+      AutoExpandVector<Integer> knnPointIndex) {
+
+    // iterate over every point in the choosen cluster, cal distance and insert
+    // into list
+    for (int p1 = 0; p1 < pointIDs.size(); p1++) {
+      int pid = pointIDs.get(p1);
+      if (cmmp.pID == pid)
+        continue;
+      double dist = distance(cmmp, cmmpoints.get(pid));
+      if (knnDist.size() < k || dist < knnDist.get(knnDist.size() - 1)) {
+        int index = 0;
+        while (index < knnDist.size() && dist > knnDist.get(index)) {
+          index++;
         }
-        if(debug)
-            System.out.println("Error by model: noise "+noiseErrorByModel+" 
point "+pointErrorByModel);
-
-        return 1-((pointErrorByModel + noiseErrorByModel)/(double) numPoints);
+        knnDist.add(index, dist);
+        knnPointIndex.add(index, pid);
+        if (knnDist.size() > k) {
+          knnDist.remove(knnDist.size() - 1);
+          knnPointIndex.remove(knnPointIndex.size() - 1);
+        }
+      }
     }
-
-    
-    /**
-     * Get CMM internal point
-     * @param index of the point
-     * @return cmm point
-     */
-    protected CMMPoint getPoint(int index){
-        return cmmpoints.get(index);
+  }
+
+  /**
+   * calculate initial connectivities
+   */
+  private void calculateGTPointQualities() {
+    for (int p = 0; p < numPoints; p++) {
+      CMMPoint cmdp = cmmpoints.get(p);
+      if (!cmdp.isNoise()) {
+        cmdp.connectivity = getConnectionValue(cmdp, cmdp.workclass());
+        cmdp.p.setMeasureValue("Connectivity", cmdp.connectivity);
+      }
     }
-
-    
-    /**
-     * Return cluster
-     * @param index of the cluster to return
-     * @return cluster
-     */
-    protected GTCluster getGT0Cluster(int index){
-        return gt0Clusters.get(index);
+  }
+
+  /**
+   * Calculate connections between clusters and merge clusters accordingly as
+   * long as connections exceed threshold
+   */
+  private void calculateGTClusterConnections() {
+    for (int c0 = 0; c0 < gt0Clusters.size(); c0++) {
+      for (int c1 = 0; c1 < gt0Clusters.size(); c1++) {
+        gt0Clusters.get(c0).calculateClusterConnection(c1, true);
+      }
     }
 
-    /**
-     * Number of classes/clusters of the new clustering
-     * @return number of new clusters
-     */
-    protected int getNumberOfGT0Classes() {
-        return numGT0Classes;
+    boolean changedConnection = true;
+    while (changedConnection) {
+      if (debug) {
+        System.out.println("Cluster Connection");
+        for (int c = 0; c < gt0Clusters.size(); c++) {
+          System.out.print("C" + gt0Clusters.get(c).label + " --> ");
+          for (int c1 = 0; c1 < gt0Clusters.get(c).connections.size(); c1++) {
+            System.out.print(" C" + gt0Clusters.get(c1).label + ": " + 
gt0Clusters.get(c).connections.get(c1));
+          }
+          System.out.println("");
+        }
+        System.out.println("");
+      }
+
+      double max = 0;
+      int maxIndexI = -1;
+      int maxIndexJ = -1;
+
+      changedConnection = false;
+      for (int c0 = 0; c0 < gt0Clusters.size(); c0++) {
+        for (int c1 = c0 + 1; c1 < gt0Clusters.size(); c1++) {
+          if (c0 == c1)
+            continue;
+          double min = Math.min(gt0Clusters.get(c0).connections.get(c1), 
gt0Clusters.get(c1).connections.get(c0));
+          if (min > max) {
+            max = min;
+            maxIndexI = c0;
+            maxIndexJ = c1;
+          }
+        }
+      }
+      if (maxIndexI != -1 && max > tauConnection) {
+        gt0Clusters.get(maxIndexI).mergeCluster(maxIndexJ);
+        if (debug)
+          System.out.println("Merging " + maxIndexI + " and " + maxIndexJ + " 
because of connection " + max);
+
+        changedConnection = true;
+      }
     }
-    
-    /**
-     * Calculates Euclidian distance 
-     * @param inst1 point as double array
-     * @param inst2 point as double array
-     * @return euclidian distance
-     */
-    private double distance(Instance inst1, Instance inst2){
-          return distance(inst1, inst2.toDoubleArray());
-
+    numGT0Classes = gt0Clusters.size();
+  }
+
+  /**
+   * Calculates how well the original clusters are separable. Small values
+   * indicate bad separability, values close to 1 indicate good separability
+   * 
+   * @return index of seperability
+   */
+  public double getClassSeparability() {
+    // int totalConn = numGTClasses*(numGTClasses-1)/2;
+    // int mergedConn = 0;
+    // for(GTCluster gt : gt0Clusters){
+    // int merged = gt.clusterRepresentations.size();
+    // if(merged > 1)
+    // mergedConn+=merged * (merged-1)/2;
+    // }
+    // if(totalConn == 0)
+    // return 0;
+    // else
+    // return 1-mergedConn/(double)totalConn;
+    return numGT0Classes / (double) numGTClasses;
+
+  }
+
+  /**
+   * Calculates how well noise is separable from the given clusters Small 
values
+   * indicate bad separability, values close to 1 indicate good separability
+   * 
+   * @return index of noise separability
+   */
+  public double getNoiseSeparability() {
+    if (noise.isEmpty())
+      return 1;
+
+    double connectivity = 0;
+    for (int p : noise) {
+      CMMPoint npoint = cmmpoints.get(p);
+      double maxConnection = 0;
+
+      // TODO: some kind of pruning possible. what about weighting?
+      for (int c = 0; c < gt0Clusters.size(); c++) {
+        double connection = getConnectionValue(npoint, c);
+        if (connection > maxConnection)
+          maxConnection = connection;
+      }
+      connectivity += maxConnection;
+      npoint.p.setMeasureValue("MaxConnection", maxConnection);
     }
-    
-    /**
-     * Calculates Euclidian distance 
-     * @param inst1 point as an instance
-     * @param inst2 point as double array
-     * @return euclidian distance
-     */
-    private double distance(Instance inst1, double[] inst2){
-        double distance = 0.0;
-        for (int i = 0; i < numDims; i++) {
-            double d = inst1.value(i) - inst2[i];
-            distance += d * d;
+
+    return 1 - (connectivity / noise.size());
+  }
+
+  /**
+   * Calculates the relative number of errors being caused by the underlying
+   * cluster model
+   * 
+   * @return quality of the model
+   */
+  public double getModelQuality() {
+    for (int p = 0; p < numPoints; p++) {
+      CMMPoint cmdp = cmmpoints.get(p);
+      for (int hc = 0; hc < numGTClusters; hc++) {
+        if (gtClustering.get(hc).getGroundTruth() != cmdp.trueClass) {
+          if (gtClustering.get(hc).getInclusionProbability(cmdp) >= 1) {
+            if (!cmdp.isNoise())
+              pointErrorByModel++;
+            else
+              noiseErrorByModel++;
+            break;
+          }
         }
-        return Math.sqrt(distance);
+      }
     }
-    
-    /**
-     * String with main CMM parameters
-     * @return main CMM parameter
-     */
-    public String getParameterString(){
-        String para = "";
-        para+="k="+knnNeighbourhood+";";
-        if(useExpConnectivity){
-               para+="lambdaConnX="+lambdaConnX+";";
-               para+="lambdaConn="+lamdaConn+";";
-               para+="lambdaConnRef="+lambdaConnRefXValue+";";
-        }
-        para+="m="+clusterConnectionMaxPoints+";";
-        para+="tauConn="+tauConnection+";";
+    if (debug)
+      System.out.println("Error by model: noise " + noiseErrorByModel + " 
point " + pointErrorByModel);
+
+    return 1 - ((pointErrorByModel + noiseErrorByModel) / (double) numPoints);
+  }
+
+  /**
+   * Get CMM internal point
+   * 
+   * @param index
+   *          of the point
+   * @return cmm point
+   */
+  protected CMMPoint getPoint(int index) {
+    return cmmpoints.get(index);
+  }
+
+  /**
+   * Return cluster
+   * 
+   * @param index
+   *          of the cluster to return
+   * @return cluster
+   */
+  protected GTCluster getGT0Cluster(int index) {
+    return gt0Clusters.get(index);
+  }
+
+  /**
+   * Number of classes/clusters of the new clustering
+   * 
+   * @return number of new clusters
+   */
+  protected int getNumberOfGT0Classes() {
+    return numGT0Classes;
+  }
+
+  /**
+   * Calculates Euclidian distance
+   * 
+   * @param inst1
+   *          point as double array
+   * @param inst2
+   *          point as double array
+   * @return euclidian distance
+   */
+  private double distance(Instance inst1, Instance inst2) {
+    return distance(inst1, inst2.toDoubleArray());
+
+  }
+
+  /**
+   * Calculates Euclidian distance
+   * 
+   * @param inst1
+   *          point as an instance
+   * @param inst2
+   *          point as double array
+   * @return euclidian distance
+   */
+  private double distance(Instance inst1, double[] inst2) {
+    double distance = 0.0;
+    for (int i = 0; i < numDims; i++) {
+      double d = inst1.value(i) - inst2[i];
+      distance += d * d;
+    }
+    return Math.sqrt(distance);
+  }
+
+  /**
+   * String with main CMM parameters
+   * 
+   * @return main CMM parameter
+   */
+  public String getParameterString() {
+    String para = "";
+    para += "k=" + knnNeighbourhood + ";";
+    if (useExpConnectivity) {
+      para += "lambdaConnX=" + lambdaConnX + ";";
+      para += "lambdaConn=" + lamdaConn + ";";
+      para += "lambdaConnRef=" + lambdaConnRefXValue + ";";
+    }
+    para += "m=" + clusterConnectionMaxPoints + ";";
+    para += "tauConn=" + tauConnection + ";";
 
-        return para;
-    }    
+    return para;
+  }
 }
-
-

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
index 0d311e4..1a44542 100644
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
@@ -30,145 +30,146 @@ import com.yahoo.labs.samoa.moa.core.DataPoint;
 import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
 import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix;
 
-public class EntropyCollection extends MeasureCollection{
-
-    private static final Logger logger = 
LoggerFactory.getLogger(EntropyCollection.class);
-
-    @Override
-    protected String[] getNames() {
-        return new String[]{"GT cross entropy","FC cross 
entropy","Homogeneity","Completeness","V-Measure","VarInformation"};
-    }
-
-    @Override
-    protected boolean[] getDefaultEnabled() {
-        return new boolean[]{false, false, false, false, false, false};
+public class EntropyCollection extends MeasureCollection {
+
+  private static final Logger logger = 
LoggerFactory.getLogger(EntropyCollection.class);
+
+  @Override
+  protected String[] getNames() {
+    return new String[] { "GT cross entropy", "FC cross entropy", 
"Homogeneity", "Completeness", "V-Measure",
+        "VarInformation" };
+  }
+
+  @Override
+  protected boolean[] getDefaultEnabled() {
+    return new boolean[] { false, false, false, false, false, false };
+  }
+
+  @Override
+  public void evaluateClustering(Clustering fclustering, Clustering 
hClustering, ArrayList<DataPoint> points)
+      throws Exception {
+
+    MembershipMatrix mm = new MembershipMatrix(fclustering, points);
+    int numClasses = mm.getNumClasses();
+    int numCluster = fclustering.size() + 1;
+    int n = mm.getTotalEntries();
+
+    double FCentropy = 0;
+    if (numCluster > 1) {
+      for (int fc = 0; fc < numCluster; fc++) {
+        double weight = mm.getClusterSum(fc) / (double) n;
+        if (weight > 0)
+          FCentropy += weight * Math.log10(weight);
+      }
+      FCentropy /= (-1 * Math.log10(numCluster));
     }
 
-    @Override
-    public void evaluateClustering(Clustering fclustering, Clustering 
hClustering, ArrayList<DataPoint> points) throws Exception {
+    logger.debug("FC entropy: {}", FCentropy);
 
-        MembershipMatrix mm = new MembershipMatrix(fclustering, points);
-        int numClasses = mm.getNumClasses();
-        int numCluster = fclustering.size()+1;
-        int n = mm.getTotalEntries();
-
-
-        double FCentropy = 0;
-        if(numCluster > 1){
-            for (int fc = 0; fc < numCluster; fc++){
-                double weight = mm.getClusterSum(fc)/(double)n;
-                if(weight > 0)
-                    FCentropy+= weight * Math.log10(weight);
-            }
-            FCentropy/=(-1*Math.log10(numCluster));
-        }
+    double GTentropy = 0;
+    if (numClasses > 1) {
+      for (int hc = 0; hc < numClasses; hc++) {
+        double weight = mm.getClassSum(hc) / (double) n;
+        if (weight > 0)
+          GTentropy += weight * Math.log10(weight);
+      }
+      GTentropy /= (-1 * Math.log10(numClasses));
+    }
 
-        logger.debug("FC entropy: {}", FCentropy);
+    logger.debug("GT entropy: {}", GTentropy);
 
-        double GTentropy = 0;
-        if(numClasses > 1){
-            for (int hc = 0; hc < numClasses; hc++){
-                double weight = mm.getClassSum(hc)/(double)n;
-                if(weight > 0)
-                    GTentropy+= weight * Math.log10(weight);
-            }
-            GTentropy/=(-1*Math.log10(numClasses));
-        }
+    // cluster based entropy
+    double FCcrossEntropy = 0;
 
-        logger.debug("GT entropy: {}", GTentropy);
-
-        //cluster based entropy
-        double FCcrossEntropy = 0;
-        
-        for (int fc = 0; fc < numCluster; fc++){
-            double e = 0;
-            int clusterWeight = mm.getClusterSum(fc);
-            if(clusterWeight>0){
-                for (int hc = 0; hc < numClasses; hc++) {
-                    double p = mm.getClusterClassWeight(fc, 
hc)/(double)clusterWeight;
-                    if(p!=0){
-                        e+=p * Math.log10(p);
-                    }
-                }
-                FCcrossEntropy+=((clusterWeight/(double)n) * e);
-            }
-        }
-        if(numCluster > 1){
-            FCcrossEntropy/=-1*Math.log10(numCluster);
+    for (int fc = 0; fc < numCluster; fc++) {
+      double e = 0;
+      int clusterWeight = mm.getClusterSum(fc);
+      if (clusterWeight > 0) {
+        for (int hc = 0; hc < numClasses; hc++) {
+          double p = mm.getClusterClassWeight(fc, hc) / (double) clusterWeight;
+          if (p != 0) {
+            e += p * Math.log10(p);
+          }
         }
+        FCcrossEntropy += ((clusterWeight / (double) n) * e);
+      }
+    }
+    if (numCluster > 1) {
+      FCcrossEntropy /= -1 * Math.log10(numCluster);
+    }
 
-        addValue("FC cross entropy", 1-FCcrossEntropy);
-        logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);
-
-        //class based entropy
-        double GTcrossEntropy = 0;
-        for (int hc = 0; hc < numClasses; hc++){
-            double e = 0;
-            int classWeight = mm.getClassSum(hc);
-            if(classWeight>0){
-                for (int fc = 0; fc < numCluster; fc++) {
-                    double p = mm.getClusterClassWeight(fc, 
hc)/(double)classWeight;
-                    if(p!=0){
-                        e+=p * Math.log10(p);
-                    }
-                }
-            }
-            GTcrossEntropy+=((classWeight/(double)n) * e);
+    addValue("FC cross entropy", 1 - FCcrossEntropy);
+    logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);
+
+    // class based entropy
+    double GTcrossEntropy = 0;
+    for (int hc = 0; hc < numClasses; hc++) {
+      double e = 0;
+      int classWeight = mm.getClassSum(hc);
+      if (classWeight > 0) {
+        for (int fc = 0; fc < numCluster; fc++) {
+          double p = mm.getClusterClassWeight(fc, hc) / (double) classWeight;
+          if (p != 0) {
+            e += p * Math.log10(p);
+          }
         }
-        if(numClasses > 1)
-            GTcrossEntropy/=-1*Math.log10(numClasses);
-        addValue("GT cross entropy", 1-GTcrossEntropy);
-        logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);
-
-        double homogeneity;
-        if(FCentropy == 0)
-            homogeneity = 1;
-        else
-            homogeneity = 1 - FCcrossEntropy/FCentropy;
-
-        //TODO set err values for now, needs to be debugged
-        if(homogeneity > 1 || homogeneity < 0)
-            addValue("Homogeneity",-1);
-        else
-            addValue("Homogeneity",homogeneity);
-
-        double completeness;
-        if(GTentropy == 0)
-            completeness = 1;
-        else
-            completeness = 1 - GTcrossEntropy/GTentropy;
-        addValue("Completeness",completeness);
-
-        double beta = 1;
-        double vmeasure = (1+ beta)*homogeneity*completeness/(beta 
*homogeneity+completeness);
-
-        if(vmeasure > 1 || homogeneity < 0)
-            addValue("V-Measure",-1);
-        else
-            addValue("V-Measure",vmeasure);
-
-
-
-        double mutual = 0;
-        for (int i = 0; i < numCluster; i++){
-                for (int j = 0; j < numClasses; j++) {
-                   if(mm.getClusterClassWeight(i, j)==0) continue;
-                   double m = Math.log10(mm.getClusterClassWeight(i, 
j)/(double)mm.getClusterSum(i)/(double)mm.getClassSum(j)*(double)n);
-                   m*= mm.getClusterClassWeight(i, j)/(double)n;
-                    logger.debug("( {} / {}): ",m, m);
-                   mutual+=m;
-                }
-        }
-        if(numClasses > 1)
-            mutual/=Math.log10(numClasses);
+      }
+      GTcrossEntropy += ((classWeight / (double) n) * e);
+    }
+    if (numClasses > 1)
+      GTcrossEntropy /= -1 * Math.log10(numClasses);
+    addValue("GT cross entropy", 1 - GTcrossEntropy);
+    logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);
+
+    double homogeneity;
+    if (FCentropy == 0)
+      homogeneity = 1;
+    else
+      homogeneity = 1 - FCcrossEntropy / FCentropy;
+
+    // TODO set err values for now, needs to be debugged
+    if (homogeneity > 1 || homogeneity < 0)
+      addValue("Homogeneity", -1);
+    else
+      addValue("Homogeneity", homogeneity);
+
+    double completeness;
+    if (GTentropy == 0)
+      completeness = 1;
+    else
+      completeness = 1 - GTcrossEntropy / GTentropy;
+    addValue("Completeness", completeness);
+
+    double beta = 1;
+    double vmeasure = (1 + beta) * homogeneity * completeness / (beta * 
homogeneity + completeness);
+
+    if (vmeasure > 1 || homogeneity < 0)
+      addValue("V-Measure", -1);
+    else
+      addValue("V-Measure", vmeasure);
+
+    double mutual = 0;
+    for (int i = 0; i < numCluster; i++) {
+      for (int j = 0; j < numClasses; j++) {
+        if (mm.getClusterClassWeight(i, j) == 0)
+          continue;
+        double m = Math.log10(mm.getClusterClassWeight(i, j) / (double) 
mm.getClusterSum(i)
+            / (double) mm.getClassSum(j) * (double) n);
+        m *= mm.getClusterClassWeight(i, j) / (double) n;
+        logger.debug("( {} / {}): ", m, m);
+        mutual += m;
+      }
+    }
+    if (numClasses > 1)
+      mutual /= Math.log10(numClasses);
 
-        double varInfo = 1;
-        if(FCentropy + GTentropy > 0)
-            varInfo = 2*mutual/(FCentropy + GTentropy);
+    double varInfo = 1;
+    if (FCentropy + GTentropy > 0)
+      varInfo = 2 * mutual / (FCentropy + GTentropy);
 
-        logger.debug("mutual: {} / VI: {}", mutual, varInfo);
-        addValue("VarInformation", varInfo);
+    logger.debug("mutual: {} / VI: {}", mutual, varInfo);
+    addValue("VarInformation", varInfo);
 
-    }
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
index 6533f36..f62b6bb 100644
--- a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
+++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
@@ -26,90 +26,85 @@ import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix;
 import com.yahoo.labs.samoa.moa.core.DataPoint;
 import java.util.ArrayList;
 
+public class F1 extends MeasureCollection {
 
-public class F1 extends MeasureCollection{
+  @Override
+  protected String[] getNames() {
+    return new String[] { "F1-P", "F1-R", "Purity" };
+  }
 
-    @Override
-    protected String[] getNames() {
-        return new String[]{"F1-P","F1-R","Purity"};
-    }
-
-    public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points) {
-
-        if (clustering.size()<0){
-            addValue(0,0);
-            addValue(1,0);
-            return;
-        }
-
-        MembershipMatrix mm = new MembershipMatrix(clustering, points);
-        //System.out.println(mm.toString());
-
-        int numClasses = mm.getNumClasses();
-        if(mm.hasNoiseClass())
-            numClasses--;
-
-
-
-        //F1 as defined in P3C, try using F1 optimization
-        double F1_P = 0.0;
-        double purity = 0;
-        int realClusters = 0;
-        for (int i = 0; i < clustering.size(); i++) {
-            int max_weight = 0;
-            int max_weight_index = -1;
+  public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points) {
 
-            //find max index
-            for (int j = 0; j < numClasses; j++) {
-                if(mm.getClusterClassWeight(i, j) > max_weight){
-                    max_weight = mm.getClusterClassWeight(i, j);
-                    max_weight_index = j;
-                }
-            }
-            if(max_weight_index!=-1){
-                realClusters++;
-                double precision = mm.getClusterClassWeight(i, 
max_weight_index)/(double)mm.getClusterSum(i);
-                double recall = mm.getClusterClassWeight(i, 
max_weight_index)/(double) mm.getClassSum(max_weight_index);
-                double f1 = 0;
-                if(precision > 0 || recall > 0){
-                    f1 = 2*precision*recall/(precision+recall);
-                }
-                F1_P += f1;
-                purity += precision;
+    if (clustering.size() < 0) {
+      addValue(0, 0);
+      addValue(1, 0);
+      return;
+    }
 
-                //TODO should we move setMeasure stuff into the Cluster 
interface?
-                clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
-            }
+    MembershipMatrix mm = new MembershipMatrix(clustering, points);
+    // System.out.println(mm.toString());
+
+    int numClasses = mm.getNumClasses();
+    if (mm.hasNoiseClass())
+      numClasses--;
+
+    // F1 as defined in P3C, try using F1 optimization
+    double F1_P = 0.0;
+    double purity = 0;
+    int realClusters = 0;
+    for (int i = 0; i < clustering.size(); i++) {
+      int max_weight = 0;
+      int max_weight_index = -1;
+
+      // find max index
+      for (int j = 0; j < numClasses; j++) {
+        if (mm.getClusterClassWeight(i, j) > max_weight) {
+          max_weight = mm.getClusterClassWeight(i, j);
+          max_weight_index = j;
         }
-        if(realClusters > 0){
-            F1_P/=realClusters;
-            purity/=realClusters;
+      }
+      if (max_weight_index != -1) {
+        realClusters++;
+        double precision = mm.getClusterClassWeight(i, max_weight_index) / 
(double) mm.getClusterSum(i);
+        double recall = mm.getClusterClassWeight(i, max_weight_index) / 
(double) mm.getClassSum(max_weight_index);
+        double f1 = 0;
+        if (precision > 0 || recall > 0) {
+          f1 = 2 * precision * recall / (precision + recall);
         }
-        addValue("F1-P",F1_P);
-        addValue("Purity",purity);
-
+        F1_P += f1;
+        purity += precision;
 
-
-        //F1 as defined in .... mainly maximizes F1 for each class
-        double F1_R = 0.0;
-        for (int j = 0; j < numClasses; j++) {
-            double max_f1 = 0;
-            for (int i = 0; i < clustering.size(); i++) {
-                double precision = mm.getClusterClassWeight(i, 
j)/(double)mm.getClusterSum(i);
-                double recall = mm.getClusterClassWeight(i, 
j)/(double)mm.getClassSum(j);
-                double f1 = 0;
-                if(precision > 0 || recall > 0){
-                    f1 = 2*precision*recall/(precision+recall);
-                }
-                if(max_f1 < f1){
-                    max_f1 = f1;
-                }
-            }
-            F1_R+= max_f1;
+        // TODO should we move setMeasure stuff into the Cluster interface?
+        clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
+      }
+    }
+    if (realClusters > 0) {
+      F1_P /= realClusters;
+      purity /= realClusters;
+    }
+    addValue("F1-P", F1_P);
+    addValue("Purity", purity);
+
+    // F1 as defined in .... mainly maximizes F1 for each class
+    double F1_R = 0.0;
+    for (int j = 0; j < numClasses; j++) {
+      double max_f1 = 0;
+      for (int i = 0; i < clustering.size(); i++) {
+        double precision = mm.getClusterClassWeight(i, j) / (double) 
mm.getClusterSum(i);
+        double recall = mm.getClusterClassWeight(i, j) / (double) 
mm.getClassSum(j);
+        double f1 = 0;
+        if (precision > 0 || recall > 0) {
+          f1 = 2 * precision * recall / (precision + recall);
         }
-        F1_R/=numClasses;
-
-        addValue("F1-R",F1_R);
+        if (max_f1 < f1) {
+          max_f1 = f1;
+        }
+      }
+      F1_R += max_f1;
     }
+    F1_R /= numClasses;
+
+    addValue("F1-R", F1_R);
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
index 7f23c1b..287af06 100644
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
@@ -20,7 +20,6 @@ package com.yahoo.labs.samoa.evaluation.measures;
  * #L%
  */
 
-
 import com.yahoo.labs.samoa.instances.Instance;
 import com.yahoo.labs.samoa.moa.cluster.Clustering;
 import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
@@ -28,164 +27,166 @@ import 
com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
 import com.yahoo.labs.samoa.moa.core.DataPoint;
 import java.util.ArrayList;
 
-
-public class General extends MeasureCollection{
-    private int numPoints;
-    private int numFClusters;
-    private int numDims;
-    private double pointInclusionProbThreshold = 0.8;
-    private Clustering clustering;
-    private ArrayList<DataPoint> points;
-
-
-    public General() {
-        super();
+public class General extends MeasureCollection {
+  private int numPoints;
+  private int numFClusters;
+  private int numDims;
+  private double pointInclusionProbThreshold = 0.8;
+  private Clustering clustering;
+  private ArrayList<DataPoint> points;
+
+  public General() {
+    super();
+  }
+
+  @Override
+  protected String[] getNames() {
+    // String[] names =
+    // 
{"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
+    return new String[] { "GPrecision", "GRecall", "Redundancy", "numCluster", 
"numClasses" };
+  }
+
+  // @Override
+  // protected boolean[] getDefaultEnabled() {
+  // boolean [] defaults = {false, false, false, false, false ,false};
+  // return defaults;
+  // }
+
+  @Override
+  public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points)
+      throws Exception {
+
+    this.points = points;
+    this.clustering = clustering;
+    numPoints = points.size();
+    numFClusters = clustering.size();
+    numDims = points.get(0).numAttributes() - 1;
+
+    int totalRedundancy = 0;
+    int trueCoverage = 0;
+    int totalCoverage = 0;
+
+    int numNoise = 0;
+    for (int p = 0; p < numPoints; p++) {
+      int coverage = 0;
+      for (int c = 0; c < numFClusters; c++) {
+        // contained in cluster c?
+        if (clustering.get(c).getInclusionProbability(points.get(p)) >= 
pointInclusionProbThreshold) {
+          coverage++;
+        }
+      }
+
+      if (points.get(p).classValue() == -1) {
+        numNoise++;
+      }
+      else {
+        if (coverage > 0)
+          trueCoverage++;
+      }
+
+      if (coverage > 0)
+        totalCoverage++; // points covered by clustering (incl. noise)
+      if (coverage > 1)
+        totalRedundancy++; // include noise
     }
 
-
-    @Override
-    protected String[] getNames() {
-        //String[] names = 
{"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
-        return new 
String[]{"GPrecision","GRecall","Redundancy","numCluster","numClasses"};
+    addValue("numCluster", clustering.size());
+    addValue("numClasses", trueClustering.size());
+    addValue("Redundancy", ((double) totalRedundancy / (double) numPoints));
+    addValue("GPrecision", (totalCoverage == 0 ? 0 : ((double) trueCoverage / 
(double) (totalCoverage))));
+    addValue("GRecall", ((double) trueCoverage / (double) (numPoints - 
numNoise)));
+    // if(isEnabled(3)){
+    // addValue("Compactness", computeCompactness());
+    // }
+    // if(isEnabled(3)){
+    // addValue("Overlap", computeOverlap());
+    // }
+  }
+
+  private double computeOverlap() {
+    for (int c = 0; c < numFClusters; c++) {
+      if (!(clustering.get(c) instanceof SphereCluster)) {
+        System.out.println("Overlap only supports Sphere Cluster. Found: " + 
clustering.get(c).getClass());
+        return Double.NaN;
+      }
     }
 
-//    @Override
-//    protected boolean[] getDefaultEnabled() {
-//        boolean [] defaults = {false, false, false, false, false ,false};
-//        return defaults;
-//    }
-
-    @Override
-    public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points) throws Exception{
-
-        this.points = points;
-        this.clustering = clustering;
-        numPoints = points.size();
-        numFClusters = clustering.size();
-        numDims = points.get(0).numAttributes()-1;
-
-
-        int totalRedundancy = 0;
-        int trueCoverage = 0;
-        int totalCoverage = 0;
-
-        int numNoise = 0;
-        for (int p = 0; p < numPoints; p++) {
-            int coverage = 0;
-            for (int c = 0; c < numFClusters; c++) {
-                //contained in cluster c?
-                if(clustering.get(c).getInclusionProbability(points.get(p)) >= 
pointInclusionProbThreshold){
-                    coverage++;
-                }
-            }
-
-            if(points.get(p).classValue()==-1){
-                numNoise++;
-            }
-            else{
-                if(coverage>0) trueCoverage++;
-            }
-
-            if(coverage>0) totalCoverage++;  //points covered by clustering 
(incl. noise)
-            if(coverage>1) totalRedundancy++; //include noise
+    boolean[] overlap = new boolean[numFClusters];
+
+    for (int c0 = 0; c0 < numFClusters; c0++) {
+      if (overlap[c0])
+        continue;
+      SphereCluster s0 = (SphereCluster) clustering.get(c0);
+      for (int c1 = c0; c1 < clustering.size(); c1++) {
+        if (c1 == c0)
+          continue;
+        SphereCluster s1 = (SphereCluster) clustering.get(c1);
+        if (s0.overlapRadiusDegree(s1) > 0) {
+          overlap[c0] = overlap[c1] = true;
         }
-
-        addValue("numCluster", clustering.size());
-        addValue("numClasses", trueClustering.size());
-        addValue("Redundancy", ((double)totalRedundancy/(double)numPoints));
-        addValue("GPrecision", 
(totalCoverage==0?0:((double)trueCoverage/(double)(totalCoverage))));
-        addValue("GRecall", 
((double)trueCoverage/(double)(numPoints-numNoise)));
-//        if(isEnabled(3)){
-//            addValue("Compactness", computeCompactness());
-//        }
-//        if(isEnabled(3)){
-//            addValue("Overlap", computeOverlap());
-//        }
+      }
     }
 
-    private double computeOverlap(){
-        for (int c = 0; c < numFClusters; c++) {
-            if(!(clustering.get(c) instanceof SphereCluster)){
-                System.out.println("Overlap only supports Sphere Cluster. 
Found: "+clustering.get(c).getClass());
-                return Double.NaN;
-            }
-        }
-
-        boolean[] overlap = new boolean[numFClusters];
-
-        for (int c0 = 0; c0 < numFClusters; c0++) {
-            if(overlap[c0]) continue;
-            SphereCluster s0 = (SphereCluster)clustering.get(c0);
-            for (int c1 = c0; c1 < clustering.size(); c1++) {
-                if(c1 == c0) continue;
-                SphereCluster s1 = (SphereCluster)clustering.get(c1);
-                if(s0.overlapRadiusDegree(s1) > 0){
-                    overlap[c0] = overlap[c1] = true;
-                }
-            }
-        }
-
-        double totalOverlap = 0;
-        for (int c0 = 0; c0 < numFClusters; c0++) {
-            if(overlap[c0])
-                totalOverlap++;
-        }
-
-//        if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
-        if(numFClusters>0) totalOverlap/=(double)numFClusters;
-        return totalOverlap;
+    double totalOverlap = 0;
+    for (int c0 = 0; c0 < numFClusters; c0++) {
+      if (overlap[c0])
+        totalOverlap++;
     }
 
+    // if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
+    if (numFClusters > 0)
+      totalOverlap /= (double) numFClusters;
+    return totalOverlap;
+  }
+
+  private double computeCompactness() {
+    if (numFClusters == 0)
+      return 0;
+    for (int c = 0; c < numFClusters; c++) {
+      if (!(clustering.get(c) instanceof SphereCluster)) {
+        System.out.println("Compactness only supports Sphere Cluster. Found: " 
+ clustering.get(c).getClass());
+        return Double.NaN;
+      }
+    }
 
-    private double computeCompactness(){
-        if(numFClusters == 0) return 0;
-        for (int c = 0; c < numFClusters; c++) {
-            if(!(clustering.get(c) instanceof SphereCluster)){
-                System.out.println("Compactness only supports Sphere Cluster. 
Found: "+clustering.get(c).getClass());
-                return Double.NaN;
-            }
+    // TODO weight radius by number of dimensions
+    double totalCompactness = 0;
+    for (int c = 0; c < numFClusters; c++) {
+      ArrayList<Instance> containedPoints = new ArrayList<Instance>();
+      for (int p = 0; p < numPoints; p++) {
+        // p in c
+        if (clustering.get(c).getInclusionProbability(points.get(p)) >= 
pointInclusionProbThreshold) {
+          containedPoints.add(points.get(p));
         }
-
-        //TODO weight radius by number of dimensions
-        double totalCompactness = 0;
-        for (int c = 0; c < numFClusters; c++) {
-            ArrayList<Instance> containedPoints = new ArrayList<Instance>();
-            for (int p = 0; p < numPoints; p++) {
-                //p in c
-                if(clustering.get(c).getInclusionProbability(points.get(p)) >= 
pointInclusionProbThreshold){
-                    containedPoints.add(points.get(p));
-                }
-            }
-            double compactness = 0;
-            if(containedPoints.size()>1){
-                //cluster not empty
-                SphereCluster minEnclosingCluster = new 
SphereCluster(containedPoints, numDims);
-                double minRadius = minEnclosingCluster.getRadius();
-                double cfRadius = 
((SphereCluster)clustering.get(c)).getRadius();
-                if(Math.abs(minRadius-cfRadius) < 0.1e-10){
-                    compactness = 1;
-                }
-                else
-                    if(minRadius < cfRadius)
-                        compactness = minRadius/cfRadius;
-                    else{
-                        System.out.println("Optimal radius bigger then real 
one ("+(cfRadius-minRadius)+"), this is really wrong");
-                        compactness = 1;
-                    }
-            }
-            else{
-                double cfRadius = 
((SphereCluster)clustering.get(c)).getRadius();
-                if(cfRadius==0) compactness = 1;
-            }
-
-            //weight by weight of cluster???
-            totalCompactness+=compactness;
-            clustering.get(c).setMeasureValue("Compactness", 
Double.toString(compactness));
+      }
+      double compactness = 0;
+      if (containedPoints.size() > 1) {
+        // cluster not empty
+        SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, 
numDims);
+        double minRadius = minEnclosingCluster.getRadius();
+        double cfRadius = ((SphereCluster) clustering.get(c)).getRadius();
+        if (Math.abs(minRadius - cfRadius) < 0.1e-10) {
+          compactness = 1;
         }
-        return (totalCompactness/numFClusters);
+        else if (minRadius < cfRadius)
+          compactness = minRadius / cfRadius;
+        else {
+          System.out.println("Optimal radius bigger then real one (" + 
(cfRadius - minRadius)
+              + "), this is really wrong");
+          compactness = 1;
+        }
+      }
+      else {
+        double cfRadius = ((SphereCluster) clustering.get(c)).getRadius();
+        if (cfRadius == 0)
+          compactness = 1;
+      }
+
+      // weight by weight of cluster???
+      totalCompactness += compactness;
+      clustering.get(c).setMeasureValue("Compactness", 
Double.toString(compactness));
     }
-
+    return (totalCompactness / numFClusters);
+  }
 
 }
-
-

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
index 4f57788..ac25888 100644
--- a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
+++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
@@ -28,69 +28,70 @@ import com.yahoo.labs.samoa.instances.Instance;
 
 public class SSQ extends MeasureCollection {
 
-    public SSQ() {
-        super();
-    }
-
-    @Override
-    public String[] getNames() {
-        return new String[]{"SSQ"};
-    }
+  public SSQ() {
+    super();
+  }
 
-    @Override
-    protected boolean[] getDefaultEnabled() {
-        return new boolean[]{false};
-    }
+  @Override
+  public String[] getNames() {
+    return new String[] { "SSQ" };
+  }
 
-    // TODO Work on this later
-    //@Override
-    public void evaluateClusteringSamoa(Clustering clustering,
-            Clustering trueClsutering, ArrayList<Instance> points) {
-        double sum = 0.0;
-        for (Instance point : points) {
-            // don't include noise
-            if (point.classValue() == -1) {
-                continue;
-            }
+  @Override
+  protected boolean[] getDefaultEnabled() {
+    return new boolean[] { false };
+  }
 
-            double minDistance = Double.MAX_VALUE;
-            for (int c = 0; c < clustering.size(); c++) {
-                double distance = 0.0;
-                double[] center = clustering.get(c).getCenter();
-                for (int i = 0; i < center.length; i++) {
-                    double d = point.value(i) - center[i];
-                    distance += d * d;
-                }
-                minDistance = Math.min(distance, minDistance);
-            }
+  // TODO Work on this later
+  // @Override
+  public void evaluateClusteringSamoa(Clustering clustering,
+      Clustering trueClsutering, ArrayList<Instance> points) {
+    double sum = 0.0;
+    for (Instance point : points) {
+      // don't include noise
+      if (point.classValue() == -1) {
+        continue;
+      }
 
-            sum += minDistance;
+      double minDistance = Double.MAX_VALUE;
+      for (int c = 0; c < clustering.size(); c++) {
+        double distance = 0.0;
+        double[] center = clustering.get(c).getCenter();
+        for (int i = 0; i < center.length; i++) {
+          double d = point.value(i) - center[i];
+          distance += d * d;
         }
+        minDistance = Math.min(distance, minDistance);
+      }
 
-        addValue(0, sum);
+      sum += minDistance;
     }
 
-    @Override
-     public void evaluateClustering(Clustering clustering, Clustering 
trueClsutering, ArrayList<DataPoint> points) {
-        double sum = 0.0;
-        for (int p = 0; p < points.size(); p++) {
-            //don't include noise
-            if(points.get(p).classValue()==-1) continue;
+    addValue(0, sum);
+  }
 
-            double minDistance = Double.MAX_VALUE;
-            for (int c = 0; c < clustering.size(); c++) {
-                double distance = 0.0;
-                double[] center = clustering.get(c).getCenter();
-                for (int i = 0; i < center.length; i++) {
-                    double d = points.get(p).value(i) - center[i];
-                    distance += d * d;
-                }
-                minDistance = Math.min(distance, minDistance);
-            }
-            
-            sum+=minDistance;
+  @Override
+  public void evaluateClustering(Clustering clustering, Clustering 
trueClsutering, ArrayList<DataPoint> points) {
+    double sum = 0.0;
+    for (int p = 0; p < points.size(); p++) {
+      // don't include noise
+      if (points.get(p).classValue() == -1)
+        continue;
+
+      double minDistance = Double.MAX_VALUE;
+      for (int c = 0; c < clustering.size(); c++) {
+        double distance = 0.0;
+        double[] center = clustering.get(c).getCenter();
+        for (int i = 0; i < center.length; i++) {
+          double d = points.get(p).value(i) - center[i];
+          distance += d * d;
         }
-        
-        addValue(0,sum);
+        minDistance = Math.min(distance, minDistance);
+      }
+
+      sum += minDistance;
     }
+
+    addValue(0, sum);
+  }
 }

Reply via email to