http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
index b88e87a..8f81392 100644
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
@@ -34,29 +34,29 @@ import com.yahoo.labs.samoa.instances.Instance;
  */
 public interface PerformanceEvaluator extends MOAObject {
 
-       /**
-        * Resets this evaluator. It must be similar to starting a new evaluator
-        * from scratch.
-        * 
-        */
-       public void reset();
+  /**
+   * Resets this evaluator. It must be similar to starting a new evaluator from
+   * scratch.
+   * 
+   */
+  public void reset();
 
-       /**
-        * Adds a learning result to this evaluator.
-        * 
-        * @param inst
-        *            the instance to be classified
-        * @param classVotes
-        *            an array containing the estimated membership 
probabilities of
-        *            the test instance in each class
-        * @return an array of measurements monitored in this evaluator
-        */
-       public void addResult(Instance inst, double[] classVotes);
+  /**
+   * Adds a learning result to this evaluator.
+   * 
+   * @param inst
+   *          the instance to be classified
+   * @param classVotes
+   *          an array containing the estimated membership probabilities of the
+   *          test instance in each class
+   * @return an array of measurements monitored in this evaluator
+   */
+  public void addResult(Instance inst, double[] classVotes);
 
-       /**
-        * Gets the current measurements monitored by this evaluator.
-        * 
-        * @return an array of measurements monitored by this evaluator
-        */
-       public Measurement[] getPerformanceMeasurements();
+  /**
+   * Gets the current measurements monitored by this evaluator.
+   * 
+   * @return an array of measurements monitored by this evaluator
+   */
+  public Measurement[] getPerformanceMeasurements();
 }

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
index 8b1f394..c1758c9 100644
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
+++ 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
@@ -29,192 +29,191 @@ import com.yahoo.labs.samoa.instances.Utils;
 /**
  * Classification evaluator that updates evaluation results using a sliding
  * window.
- *
+ * 
  * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz)
  * @version $Revision: 7 $
  */
 public class WindowClassificationPerformanceEvaluator extends 
AbstractMOAObject implements
-        ClassificationPerformanceEvaluator {
-
-     private static final long serialVersionUID = 1L;
-
-    public IntOption widthOption = new IntOption("width",
-            'w', "Size of Window", 1000);
-
-    protected double TotalweightObserved = 0;
+    ClassificationPerformanceEvaluator {
 
-    protected Estimator weightObserved;
+  private static final long serialVersionUID = 1L;
 
-    protected Estimator weightCorrect;
+  public IntOption widthOption = new IntOption("width",
+      'w', "Size of Window", 1000);
 
-    protected Estimator weightCorrectNoChangeClassifier;
+  protected double TotalweightObserved = 0;
 
-    protected double lastSeenClass;
+  protected Estimator weightObserved;
 
-    protected Estimator[] columnKappa;
+  protected Estimator weightCorrect;
 
-    protected Estimator[] rowKappa;
+  protected Estimator weightCorrectNoChangeClassifier;
 
-    protected Estimator[] classAccuracy;
+  protected double lastSeenClass;
 
-    protected int numClasses;
+  protected Estimator[] columnKappa;
 
-    public class Estimator {
+  protected Estimator[] rowKappa;
 
-        protected double[] window;
+  protected Estimator[] classAccuracy;
 
-        protected int posWindow;
+  protected int numClasses;
 
-        protected int lenWindow;
+  public class Estimator {
 
-        protected int SizeWindow;
+    protected double[] window;
 
-        protected double sum;
+    protected int posWindow;
 
-        public Estimator(int sizeWindow) {
-            window = new double[sizeWindow];
-            SizeWindow = sizeWindow;
-            posWindow = 0;
-            lenWindow = 0;
-        }
+    protected int lenWindow;
 
-        public void add(double value) {
-            sum -= window[posWindow];
-            sum += value;
-            window[posWindow] = value;
-            posWindow++;
-            if (posWindow == SizeWindow) {
-                posWindow = 0;
-            }
-            if (lenWindow < SizeWindow) {
-                lenWindow++;
-            }
-        }
+    protected int SizeWindow;
 
-        public double total() {
-            return sum;
-        }
+    protected double sum;
 
-        public double length() {
-            return lenWindow;
-        }
+    public Estimator(int sizeWindow) {
+      window = new double[sizeWindow];
+      SizeWindow = sizeWindow;
+      posWindow = 0;
+      lenWindow = 0;
+    }
 
+    public void add(double value) {
+      sum -= window[posWindow];
+      sum += value;
+      window[posWindow] = value;
+      posWindow++;
+      if (posWindow == SizeWindow) {
+        posWindow = 0;
+      }
+      if (lenWindow < SizeWindow) {
+        lenWindow++;
+      }
     }
 
-    /*   public void setWindowWidth(int w) {
-     this.width = w;
-     reset();
-     }*/
-    @Override
-    public void reset() {
-        reset(this.numClasses);
+    public double total() {
+      return sum;
     }
 
-    public void reset(int numClasses) {
-        this.numClasses = numClasses;
-        this.rowKappa = new Estimator[numClasses];
-        this.columnKappa = new Estimator[numClasses];
-        this.classAccuracy = new Estimator[numClasses];
-        for (int i = 0; i < this.numClasses; i++) {
-            this.rowKappa[i] = new Estimator(this.widthOption.getValue());
-            this.columnKappa[i] = new Estimator(this.widthOption.getValue());
-            this.classAccuracy[i] = new Estimator(this.widthOption.getValue());
-        }
-        this.weightCorrect = new Estimator(this.widthOption.getValue());
-        this.weightCorrectNoChangeClassifier = new 
Estimator(this.widthOption.getValue());
-        this.weightObserved = new Estimator(this.widthOption.getValue());
-        this.TotalweightObserved = 0;
-        this.lastSeenClass = 0;
+    public double length() {
+      return lenWindow;
     }
 
-    @Override
-    public void addResult(Instance inst, double[] classVotes) {
-        double weight = inst.weight();
-        int trueClass = (int) inst.classValue();
-        if (weight > 0.0) {
-            if (TotalweightObserved == 0) {
-                reset(inst.numClasses()); 
-            }
-            this.TotalweightObserved += weight;
-            this.weightObserved.add(weight);
-            int predictedClass = Utils.maxIndex(classVotes);
-            if (predictedClass == trueClass) {
-                this.weightCorrect.add(weight);
-            } else {
-                this.weightCorrect.add(0);
-            }
-            //Add Kappa statistic information
-            for (int i = 0; i < this.numClasses; i++) {
-                this.rowKappa[i].add(i == predictedClass ? weight : 0);
-                this.columnKappa[i].add(i == trueClass ? weight : 0);
-            }
-            if (this.lastSeenClass == trueClass) {
-                this.weightCorrectNoChangeClassifier.add(weight);
-            } else {
-                this.weightCorrectNoChangeClassifier.add(0);
-            }
-            this.classAccuracy[trueClass].add(predictedClass == trueClass ? 
weight : 0.0);
-            this.lastSeenClass = trueClass;
-        }
+  }
+
+  /*
+   * public void setWindowWidth(int w) { this.width = w; reset(); }
+   */
+  @Override
+  public void reset() {
+    reset(this.numClasses);
+  }
+
+  public void reset(int numClasses) {
+    this.numClasses = numClasses;
+    this.rowKappa = new Estimator[numClasses];
+    this.columnKappa = new Estimator[numClasses];
+    this.classAccuracy = new Estimator[numClasses];
+    for (int i = 0; i < this.numClasses; i++) {
+      this.rowKappa[i] = new Estimator(this.widthOption.getValue());
+      this.columnKappa[i] = new Estimator(this.widthOption.getValue());
+      this.classAccuracy[i] = new Estimator(this.widthOption.getValue());
+    }
+    this.weightCorrect = new Estimator(this.widthOption.getValue());
+    this.weightCorrectNoChangeClassifier = new 
Estimator(this.widthOption.getValue());
+    this.weightObserved = new Estimator(this.widthOption.getValue());
+    this.TotalweightObserved = 0;
+    this.lastSeenClass = 0;
+  }
+
+  @Override
+  public void addResult(Instance inst, double[] classVotes) {
+    double weight = inst.weight();
+    int trueClass = (int) inst.classValue();
+    if (weight > 0.0) {
+      if (TotalweightObserved == 0) {
+        reset(inst.numClasses());
+      }
+      this.TotalweightObserved += weight;
+      this.weightObserved.add(weight);
+      int predictedClass = Utils.maxIndex(classVotes);
+      if (predictedClass == trueClass) {
+        this.weightCorrect.add(weight);
+      } else {
+        this.weightCorrect.add(0);
+      }
+      // Add Kappa statistic information
+      for (int i = 0; i < this.numClasses; i++) {
+        this.rowKappa[i].add(i == predictedClass ? weight : 0);
+        this.columnKappa[i].add(i == trueClass ? weight : 0);
+      }
+      if (this.lastSeenClass == trueClass) {
+        this.weightCorrectNoChangeClassifier.add(weight);
+      } else {
+        this.weightCorrectNoChangeClassifier.add(0);
+      }
+      this.classAccuracy[trueClass].add(predictedClass == trueClass ? weight : 
0.0);
+      this.lastSeenClass = trueClass;
     }
+  }
 
-    @Override
-    public Measurement[] getPerformanceMeasurements() {
-        return new Measurement[]{
-            new Measurement("classified instances",
+  @Override
+  public Measurement[] getPerformanceMeasurements() {
+    return new Measurement[] {
+        new Measurement("classified instances",
             this.TotalweightObserved),
-            new Measurement("classifications correct (percent)",
+        new Measurement("classifications correct (percent)",
             getFractionCorrectlyClassified() * 100.0),
-            new Measurement("Kappa Statistic (percent)",
+        new Measurement("Kappa Statistic (percent)",
             getKappaStatistic() * 100.0),
-            new Measurement("Kappa Temporal Statistic (percent)",
+        new Measurement("Kappa Temporal Statistic (percent)",
             getKappaTemporalStatistic() * 100.0)
-        };
-
+    };
+
+  }
+
+  public double getTotalWeightObserved() {
+    return this.weightObserved.total();
+  }
+
+  public double getFractionCorrectlyClassified() {
+    return this.weightObserved.total() > 0.0 ? this.weightCorrect.total()
+        / this.weightObserved.total() : 0.0;
+  }
+
+  public double getKappaStatistic() {
+    if (this.weightObserved.total() > 0.0) {
+      double p0 = this.weightCorrect.total() / this.weightObserved.total();
+      double pc = 0;
+      for (int i = 0; i < this.numClasses; i++) {
+        pc += (this.rowKappa[i].total() / this.weightObserved.total())
+            * (this.columnKappa[i].total() / this.weightObserved.total());
+      }
+      return (p0 - pc) / (1 - pc);
+    } else {
+      return 0;
     }
+  }
 
-    public double getTotalWeightObserved() {
-        return this.weightObserved.total();
-    }
+  public double getKappaTemporalStatistic() {
+    if (this.weightObserved.total() > 0.0) {
+      double p0 = this.weightCorrect.total() / this.weightObserved.total();
+      double pc = this.weightCorrectNoChangeClassifier.total() / 
this.weightObserved.total();
 
-    public double getFractionCorrectlyClassified() {
-        return this.weightObserved.total() > 0.0 ? this.weightCorrect.total()
-                / this.weightObserved.total() : 0.0;
+      return (p0 - pc) / (1 - pc);
+    } else {
+      return 0;
     }
+  }
 
-    public double getKappaStatistic() {
-        if (this.weightObserved.total() > 0.0) {
-            double p0 = this.weightCorrect.total() / 
this.weightObserved.total();
-            double pc = 0;
-            for (int i = 0; i < this.numClasses; i++) {
-                pc += (this.rowKappa[i].total() / this.weightObserved.total())
-                        * (this.columnKappa[i].total() / 
this.weightObserved.total());
-            }
-            return (p0 - pc) / (1 - pc);
-        } else {
-            return 0;
-        }
-    }
-
-    public double getKappaTemporalStatistic() {
-        if (this.weightObserved.total() > 0.0) {
-            double p0 = this.weightCorrect.total() / 
this.weightObserved.total();
-            double pc = this.weightCorrectNoChangeClassifier.total() / 
this.weightObserved.total();
-
-            return (p0 - pc) / (1 - pc);
-        } else {
-            return 0;
-        }
-    }
+  public double getFractionIncorrectlyClassified() {
+    return 1.0 - getFractionCorrectlyClassified();
+  }
 
-    public double getFractionIncorrectlyClassified() {
-        return 1.0 - getFractionCorrectlyClassified();
-    }
-
-    @Override
-    public void getDescription(StringBuilder sb, int indent) {
-        Measurement.getMeasurementsDescription(getPerformanceMeasurements(),
-                sb, indent);
-    }
+  @Override
+  public void getDescription(StringBuilder sb, int indent) {
+    Measurement.getMeasurementsDescription(getPerformanceMeasurements(),
+        sb, indent);
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
index 1a41f6b..568f7c5 100644
--- a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
+++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
@@ -33,477 +33,491 @@ import java.util.ArrayList;
  * 
  * CMM: Main class
  * 
- * Reference: Kremer et al., "An Effective Evaluation Measure for Clustering 
on Evolving Data Streams", KDD, 2011
+ * Reference: Kremer et al.,
+ * "An Effective Evaluation Measure for Clustering on Evolving Data Streams",
+ * KDD, 2011
  * 
- * @author Timm jansen
- * Data Management and Data Exploration Group, RWTH Aachen University
-*/
-
-public class CMM extends MeasureCollection{
-       
-       private static final long serialVersionUID = 1L;
-
-       /**
-     * found clustering
-     */
-       private Clustering clustering;
-
-       /**
-        * the ground truth analysis
-        */
-    private CMM_GTAnalysis gtAnalysis;
-
-    /**
-     * number of points within the horizon
-     */
-    private int numPoints;
-    
-    /**
-     * number of clusters in the found clustering
-     */
-    private int numFClusters;
-    
-    /**
-     * number of cluster in the adjusted groundtruth clustering that 
-     * was calculated through the groundtruth analysis 
-     */
-    private int numGT0Classes;
+ * @author Timm jansen Data Management and Data Exploration Group, RWTH Aachen
+ *         University
+ */
 
-    /**
-     * match found clusters to GT clusters
-     */
-    private int matchMap[]; 
-    
-    /**
-     * pointInclusionProbFC[p][C] contains the probability of point p 
-     * being included in cluster C 
-     */
-    private double[][] pointInclusionProbFC;
-    
-    /**
-     * threshold that defines when a point is being considered belonging to a 
cluster
-     */
-    private double pointInclusionProbThreshold = 0.5;
-    
-    /**
-     * parameterize the error weight of missed points (default 1)
-     */
-    private double lamdaMissed = 1;
+public class CMM extends MeasureCollection {
+
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * found clustering
+   */
+  private Clustering clustering;
+
+  /**
+   * the ground truth analysis
+   */
+  private CMM_GTAnalysis gtAnalysis;
+
+  /**
+   * number of points within the horizon
+   */
+  private int numPoints;
+
+  /**
+   * number of clusters in the found clustering
+   */
+  private int numFClusters;
+
+  /**
+   * number of cluster in the adjusted groundtruth clustering that was
+   * calculated through the groundtruth analysis
+   */
+  private int numGT0Classes;
+
+  /**
+   * match found clusters to GT clusters
+   */
+  private int matchMap[];
+
+  /**
+   * pointInclusionProbFC[p][C] contains the probability of point p being
+   * included in cluster C
+   */
+  private double[][] pointInclusionProbFC;
+
+  /**
+   * threshold that defines when a point is being considered belonging to a
+   * cluster
+   */
+  private double pointInclusionProbThreshold = 0.5;
+
+  /**
+   * parameterize the error weight of missed points (default 1)
+   */
+  private double lamdaMissed = 1;
+
+  /**
+   * enable/disable debug mode
+   */
+  public boolean debug = false;
+
+  /**
+   * enable/disable class merge (main feature of ground truth analysis)
+   */
+  public boolean enableClassMerge = true;
+
+  /**
+   * enable/disable model error when enabled errors that are caused by the
+   * underling cluster model will not be counted
+   */
+  public boolean enableModelError = true;
+
+  @Override
+  protected String[] getNames() {
+    String[] names = { "CMM", "CMM Basic", "CMM Missed", "CMM Misplaced", "CMM 
Noise",
+        "CA Seperability", "CA Noise", "CA Modell" };
+    return names;
+  }
+
+  @Override
+  protected boolean[] getDefaultEnabled() {
+    boolean[] defaults = { false, false, false, false, false, false, false, 
false };
+    return defaults;
+  }
+
+  @Override
+  public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points)
+      throws Exception {
+    this.clustering = clustering;
+
+    numPoints = points.size();
+    numFClusters = clustering.size();
+
+    gtAnalysis = new CMM_GTAnalysis(trueClustering, points, enableClassMerge);
+
+    numGT0Classes = gtAnalysis.getNumberOfGT0Classes();
+
+    addValue("CA Seperability", gtAnalysis.getClassSeparability());
+    addValue("CA Noise", gtAnalysis.getNoiseSeparability());
+    addValue("CA Modell", gtAnalysis.getModelQuality());
+
+    /* init the matching and point distances */
+    calculateMatching();
+
+    /* calculate the actual error */
+    calculateError();
+  }
+
+  /**
+   * calculates the CMM specific matching between found clusters and ground
+   * truth clusters
+   */
+  private void calculateMatching() {
 
-    
     /**
-     * enable/disable debug mode
+     * found cluster frequencies
      */
-    public boolean debug = false;
+    int[][] mapFC = new int[numFClusters][numGT0Classes];
 
-    
     /**
-     * enable/disable class merge (main feature of ground truth analysis)
+     * ground truth cluster frequencies
      */
-    public boolean enableClassMerge = true;
-    
-    /**
-     * enable/disable model error
-     * when enabled errors that are caused by the underling cluster model will 
not be counted
-     */
-    public boolean enableModelError = true;
-    
-
-    @Override
-    protected String[] getNames() {
-        String[] names = {"CMM","CMM Basic","CMM Missed","CMM Misplaced","CMM 
Noise",
-                            "CA Seperability", "CA Noise", "CA Modell"};
-        return names;
+    int[][] mapGT = new int[numGT0Classes][numGT0Classes];
+    int[] sumsFC = new int[numFClusters];
+
+    // calculate fuzzy mapping from
+    pointInclusionProbFC = new double[numPoints][numFClusters];
+    for (int p = 0; p < numPoints; p++) {
+      CMMPoint cmdp = gtAnalysis.getPoint(p);
+      // found cluster frequencies
+      for (int fc = 0; fc < numFClusters; fc++) {
+        Cluster cl = clustering.get(fc);
+        pointInclusionProbFC[p][fc] = cl.getInclusionProbability(cmdp);
+        if (pointInclusionProbFC[p][fc] >= pointInclusionProbThreshold) {
+          // make sure we don't count points twice that are contained in two
+          // merged clusters
+          if (cmdp.isNoise())
+            continue;
+          mapFC[fc][cmdp.workclass()]++;
+          sumsFC[fc]++;
+        }
+      }
+
+      // ground truth cluster frequencies
+      if (!cmdp.isNoise()) {
+        for (int hc = 0; hc < numGT0Classes; hc++) {
+          if (hc == cmdp.workclass()) {
+            mapGT[hc][hc]++;
+          }
+          else {
+            if (gtAnalysis.getGT0Cluster(hc).getInclusionProbability(cmdp) >= 
1) {
+              mapGT[hc][cmdp.workclass()]++;
+            }
+          }
+        }
+      }
     }
 
-    @Override
-    protected boolean[] getDefaultEnabled() {
-        boolean [] defaults = {false, false, false, false, false, false, 
false, false};
-        return defaults;
+    // assign each found cluster to a hidden cluster
+    matchMap = new int[numFClusters];
+    for (int fc = 0; fc < numFClusters; fc++) {
+      int matchIndex = -1;
+      // check if we only have one entry anyway
+      for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
+        if (mapFC[fc][hc0] != 0) {
+          if (matchIndex == -1)
+            matchIndex = hc0;
+          else {
+            matchIndex = -1;
+            break;
+          }
+        }
+      }
+
+      // more then one entry, so look for most similar frequency profile
+      int minDiff = Integer.MAX_VALUE;
+      if (sumsFC[fc] != 0 && matchIndex == -1) {
+        ArrayList<Integer> fitCandidates = new ArrayList<Integer>();
+        for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
+          int errDiff = 0;
+          for (int hc1 = 0; hc1 < numGT0Classes; hc1++) {
+            // fc profile doesn't fit into current hc profile
+            double freq_diff = mapFC[fc][hc1] - mapGT[hc0][hc1];
+            if (freq_diff > 0) {
+              errDiff += freq_diff;
+            }
+          }
+          if (errDiff == 0) {
+            fitCandidates.add(hc0);
+          }
+          if (errDiff < minDiff) {
+            minDiff = errDiff;
+            matchIndex = hc0;
+          }
+          if (debug) {
+            // System.out.println("FC"+fc+"("+Arrays.toString(mapFC[fc])+") - 
HC0_"+hc0+"("+Arrays.toString(mapGT[hc0])+"):"+errDiff);
+          }
+        }
+        // if we have a fitting profile overwrite the min error choice
+        // if we have multiple fit candidates, use majority vote of
+        // corresponding classes
+        if (fitCandidates.size() != 0) {
+          int bestGTfit = fitCandidates.get(0);
+          for (int i = 1; i < fitCandidates.size(); i++) {
+            int GTfit = fitCandidates.get(i);
+            if (mapFC[fc][GTfit] > mapFC[fc][bestGTfit])
+              bestGTfit = fitCandidates.get(i);
+          }
+          matchIndex = bestGTfit;
+        }
+      }
+
+      matchMap[fc] = matchIndex;
+      int realMatch = -1;
+      if (matchIndex == -1) {
+        if (debug)
+          System.out.println("No cluster match: needs to be implemented?");
+      }
+      else {
+        realMatch = gtAnalysis.getGT0Cluster(matchMap[fc]).getLabel();
+      }
+      clustering.get(fc).setMeasureValue("CMM Match", "C" + realMatch);
+      clustering.get(fc).setMeasureValue("CMM Workclass", "C" + matchMap[fc]);
     }
 
-    
-    @Override
-    public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points) throws Exception{
-        this.clustering = clustering;
-
-        numPoints = points.size();
-        numFClusters = clustering.size();
+    // print matching table
+    if (debug) {
+      for (int i = 0; i < numFClusters; i++) {
+        System.out.print("C" + ((int) clustering.get(i).getId()) + " N:" + 
((int) clustering.get(i).getWeight())
+            + "  |  ");
+        for (int j = 0; j < numGT0Classes; j++) {
+          System.out.print(mapFC[i][j] + " ");
+        }
+        System.out.print(" = " + sumsFC[i] + " | ");
+        String match = "-";
+        if (matchMap[i] != -1) {
+          match = 
Integer.toString(gtAnalysis.getGT0Cluster(matchMap[i]).getLabel());
+        }
+        System.out.println(" --> " + match + "(work:" + matchMap[i] + ")");
+      }
+    }
+  }
 
-        gtAnalysis = new CMM_GTAnalysis(trueClustering, points, 
enableClassMerge);
+  /**
+   * Calculate the actual error values
+   */
+  private void calculateError() {
+    int totalErrorCount = 0;
+    int totalRedundancy = 0;
+    int trueCoverage = 0;
+    int totalCoverage = 0;
 
-        numGT0Classes = gtAnalysis.getNumberOfGT0Classes();
+    int numNoise = 0;
+    double errorNoise = 0;
+    double errorNoiseMax = 0;
 
-        addValue("CA Seperability",gtAnalysis.getClassSeparability());
-        addValue("CA Noise",gtAnalysis.getNoiseSeparability());
-        addValue("CA Modell",gtAnalysis.getModelQuality());
+    double errorMissed = 0;
+    double errorMissedMax = 0;
 
-        /* init the matching and point distances */
-        calculateMatching();
+    double errorMisplaced = 0;
+    double errorMisplacedMax = 0;
 
-        /* calculate the actual error */
-        calculateError();
-    }
+    double totalError = 0.0;
+    double totalErrorMax = 0.0;
 
-    
     /**
-     * calculates the CMM specific matching between found clusters and ground 
truth clusters 
+     * mainly iterate over all points and find the right error value for the
+     * point. within the same run calculate various other stuff like coverage
+     * etc...
      */
-    private void calculateMatching(){
-
-       /**
-        * found cluster frequencies  
-        */
-        int[][] mapFC = new int[numFClusters][numGT0Classes];
-
-        /**
-        * ground truth cluster frequencies
-        */
-        int[][] mapGT = new int[numGT0Classes][numGT0Classes];
-        int [] sumsFC = new int[numFClusters];
-
-        //calculate fuzzy mapping from
-        pointInclusionProbFC = new double[numPoints][numFClusters];
-        for (int p = 0; p < numPoints; p++) {
-            CMMPoint cmdp = gtAnalysis.getPoint(p);
-            //found cluster frequencies
-            for (int fc = 0; fc < numFClusters; fc++) {
-                Cluster cl = clustering.get(fc);
-                pointInclusionProbFC[p][fc] = cl.getInclusionProbability(cmdp);
-                if (pointInclusionProbFC[p][fc] >= 
pointInclusionProbThreshold) {
-                    //make sure we don't count points twice that are contained 
in two merged clusters
-                    if(cmdp.isNoise()) continue;
-                    mapFC[fc][cmdp.workclass()]++;
-                    sumsFC[fc]++;
-                }
+    for (int p = 0; p < numPoints; p++) {
+      CMMPoint cmdp = gtAnalysis.getPoint(p);
+      double weight = cmdp.weight();
+      // noise counter
+      if (cmdp.isNoise()) {
+        numNoise++;
+        // this is always 1
+        errorNoiseMax += cmdp.connectivity * weight;
+      }
+      else {
+        errorMissedMax += cmdp.connectivity * weight;
+        errorMisplacedMax += cmdp.connectivity * weight;
+      }
+      // sum up maxError as the individual errors are the quality weighted
+      // between 0-1
+      totalErrorMax += cmdp.connectivity * weight;
+
+      double err = 0;
+      int coverage = 0;
+
+      // check every FCluster
+      for (int c = 0; c < numFClusters; c++) {
+        // contained in cluster c?
+        if (pointInclusionProbFC[p][c] >= pointInclusionProbThreshold) {
+          coverage++;
+
+          if (!cmdp.isNoise()) {
+            // PLACED CORRECTLY
+            if (matchMap[c] == cmdp.workclass()) {
             }
-
-            //ground truth cluster frequencies
-            if(!cmdp.isNoise()){
-                for(int hc = 0; hc < numGT0Classes;hc++){
-                    if(hc == cmdp.workclass()){
-                        mapGT[hc][hc]++;
-                    }
-                    else{
-                        
if(gtAnalysis.getGT0Cluster(hc).getInclusionProbability(cmdp) >= 1){
-                            mapGT[hc][cmdp.workclass()]++;
-                        }
-                    }
-                }
+            // MISPLACED
+            else {
+              double errvalue = misplacedError(cmdp, c);
+              if (errvalue > err)
+                err = errvalue;
             }
+          }
+          else {
+            // NOISE
+            double errvalue = noiseError(cmdp, c);
+            if (errvalue > err)
+              err = errvalue;
+          }
         }
-
-        //assign each found cluster to a hidden cluster
-        matchMap = new int[numFClusters];
-        for (int fc = 0; fc < numFClusters; fc++) {
-            int matchIndex = -1;
-            //check if we only have one entry anyway
-            for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
-                if(mapFC[fc][hc0]!=0){
-                    if(matchIndex == -1)
-                        matchIndex = hc0;
-                    else{
-                        matchIndex = -1;
-                        break;
-                    }
-                }
-            }
-
-            //more then one entry, so look for most similar frequency profile
-            int minDiff = Integer.MAX_VALUE;
-            if(sumsFC[fc]!=0 && matchIndex == -1){
-                ArrayList<Integer> fitCandidates = new ArrayList<Integer>();
-                for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
-                    int errDiff = 0;
-                    for (int hc1 = 0; hc1 < numGT0Classes; hc1++) {
-                        //fc profile doesn't fit into current hc profile
-                        double freq_diff = mapFC[fc][hc1] - mapGT[hc0][hc1];
-                        if(freq_diff > 0){
-                            errDiff+= freq_diff;
-                        }
-                    }
-                    if(errDiff == 0){
-                        fitCandidates.add(hc0);
-                    }
-                    if(errDiff < minDiff){
-                        minDiff = errDiff;
-                        matchIndex = hc0;
-                    }
-                    if(debug){
-                        
//System.out.println("FC"+fc+"("+Arrays.toString(mapFC[fc])+") - 
HC0_"+hc0+"("+Arrays.toString(mapGT[hc0])+"):"+errDiff);
-                    }
-                }
-                //if we have a fitting profile overwrite the min error choice
-                //if we have multiple fit candidates, use majority vote of 
corresponding classes
-                if(fitCandidates.size()!=0){
-                    int bestGTfit = fitCandidates.get(0);
-                    for(int i = 1; i < fitCandidates.size(); i++){
-                        int GTfit = fitCandidates.get(i);
-                        if(mapFC[fc][GTfit] > mapFC[fc][bestGTfit])
-                            bestGTfit=fitCandidates.get(i);
-                    }
-                    matchIndex = bestGTfit;
-                }
-            }
-            
-            matchMap[fc] = matchIndex;
-            int realMatch = -1;
-            if(matchIndex==-1){
-                if(debug)
-                    System.out.println("No cluster match: needs to be 
implemented?");
-            }
-            else{
-                realMatch = gtAnalysis.getGT0Cluster(matchMap[fc]).getLabel();
-            }
-            clustering.get(fc).setMeasureValue("CMM Match", "C"+realMatch);
-            clustering.get(fc).setMeasureValue("CMM Workclass", 
"C"+matchMap[fc]);
+      }
+      // not in any cluster
+      if (coverage == 0) {
+        // MISSED
+        if (!cmdp.isNoise()) {
+          err = missedError(cmdp, true);
+          errorMissed += weight * err;
         }
-
-        //print matching table
-        if(debug){
-            for (int i = 0; i < numFClusters; i++) {
-                    System.out.print("C"+((int)clustering.get(i).getId()) + " 
N:"+((int)clustering.get(i).getWeight())+"  |  ");
-                    for (int j = 0; j < numGT0Classes; j++) {
-                          System.out.print(mapFC[i][j] + " ");
-                    }
-                    System.out.print(" = "+sumsFC[i] + " | ");
-                    String match = "-";
-                    if (matchMap[i]!=-1) {
-                        match = 
Integer.toString(gtAnalysis.getGT0Cluster(matchMap[i]).getLabel());
-                    }
-                    System.out.println(" --> " + match + 
"(work:"+matchMap[i]+")");
-             }
+        // NOISE
+        else {
         }
-    }
-    
-    
-    /**
-     * Calculate the actual error values
-     */
-    private void calculateError(){
-        int totalErrorCount = 0;
-        int totalRedundancy = 0;
-        int trueCoverage = 0;
-        int totalCoverage = 0;
-
-        int numNoise = 0;
-        double errorNoise = 0;
-        double errorNoiseMax = 0;
-
-        double errorMissed = 0;
-        double errorMissedMax = 0;
-
-        double errorMisplaced = 0;
-        double errorMisplacedMax = 0;
-
-        double totalError = 0.0;
-        double totalErrorMax = 0.0;
-
-        /** mainly iterate over all points and find the right error value for 
the point.
-         *  within the same run calculate various other stuff like coverage 
etc...
-         */
-        for (int p = 0; p < numPoints; p++) {
-            CMMPoint cmdp = gtAnalysis.getPoint(p);
-            double weight = cmdp.weight();
-            //noise counter
-            if(cmdp.isNoise()){
-                numNoise++;
-                //this is always 1
-                errorNoiseMax+=cmdp.connectivity*weight;
-            }
-            else{
-                errorMissedMax+=cmdp.connectivity*weight;
-                errorMisplacedMax+=cmdp.connectivity*weight;
-            }
-            //sum up maxError as the individual errors are the quality 
weighted between 0-1
-            totalErrorMax+=cmdp.connectivity*weight;
-
-
-            double err = 0;
-            int coverage = 0;
-
-            //check every FCluster
-            for (int c = 0; c < numFClusters; c++) {
-                //contained in cluster c?
-                if(pointInclusionProbFC[p][c] >= pointInclusionProbThreshold){
-                    coverage++;
-
-                    if(!cmdp.isNoise()){
-                        //PLACED CORRECTLY
-                        if(matchMap[c] == cmdp.workclass()){
-                        }
-                        //MISPLACED
-                        else{
-                            double errvalue = misplacedError(cmdp, c);
-                            if(errvalue > err)
-                                err = errvalue;
-                        }
-                    }
-                    else{
-                        //NOISE
-                        double errvalue = noiseError(cmdp, c);
-                        if(errvalue > err) err = errvalue;
-                    }
-                }
-            }
-            //not in any cluster
-            if(coverage == 0){
-                //MISSED
-                if(!cmdp.isNoise()){
-                    err = missedError(cmdp,true);
-                    errorMissed+= weight*err;
-                }
-                //NOISE
-                else{
-                }
-            }
-            else{
-                if(!cmdp.isNoise()){
-                    errorMisplaced+= err*weight;
-                }
-                else{
-                    errorNoise+= err*weight;
-                }
-            }
-
-            /* processing of other evaluation values */
-            totalError+= err*weight;
-            if(err!=0)totalErrorCount++;
-            if(coverage>0) totalCoverage++;  //points covered by clustering 
(incl. noise)
-            if(coverage>0 && !cmdp.isNoise()) trueCoverage++; //points covered 
by clustering, don't count noise
-            if(coverage>1) totalRedundancy++; //include noise
-
-            cmdp.p.setMeasureValue("CMM",err);
-            cmdp.p.setMeasureValue("Redundancy", coverage);
+      }
+      else {
+        if (!cmdp.isNoise()) {
+          errorMisplaced += err * weight;
         }
-
-        addValue("CMM", (totalErrorMax!=0)?1-totalError/totalErrorMax:1);
-        addValue("CMM Missed", 
(errorMissedMax!=0)?1-errorMissed/errorMissedMax:1);
-        addValue("CMM Misplaced", 
(errorMisplacedMax!=0)?1-errorMisplaced/errorMisplacedMax:1);
-        addValue("CMM Noise", (errorNoiseMax!=0)?1-errorNoise/errorNoiseMax:1);
-        addValue("CMM Basic", 1-((double)totalErrorCount/(double)numPoints));
-
-        if(debug){
-            System.out.println("-------------");
+        else {
+          errorNoise += err * weight;
         }
+      }
+
+      /* processing of other evaluation values */
+      totalError += err * weight;
+      if (err != 0)
+        totalErrorCount++;
+      if (coverage > 0)
+        totalCoverage++; // points covered by clustering (incl. noise)
+      if (coverage > 0 && !cmdp.isNoise())
+        trueCoverage++; // points covered by clustering, don't count noise
+      if (coverage > 1)
+        totalRedundancy++; // include noise
+
+      cmdp.p.setMeasureValue("CMM", err);
+      cmdp.p.setMeasureValue("Redundancy", coverage);
     }
 
+    addValue("CMM", (totalErrorMax != 0) ? 1 - totalError / totalErrorMax : 1);
+    addValue("CMM Missed", (errorMissedMax != 0) ? 1 - errorMissed / 
errorMissedMax : 1);
+    addValue("CMM Misplaced", (errorMisplacedMax != 0) ? 1 - errorMisplaced / 
errorMisplacedMax : 1);
+    addValue("CMM Noise", (errorNoiseMax != 0) ? 1 - errorNoise / 
errorNoiseMax : 1);
+    addValue("CMM Basic", 1 - ((double) totalErrorCount / (double) numPoints));
 
-    private double noiseError(CMMPoint cmdp, int assignedClusterID){
-        int gtAssignedID = matchMap[assignedClusterID];
-        double error;
-        
-        //Cluster wasn't matched, so just contains noise
-        //TODO: Noiscluster?
-        //also happens when we decrease the radius and there is only a noise 
point in the center
-        if(gtAssignedID==-1){
-            error = 1;
-            cmdp.p.setMeasureValue("CMM Type","noise - cluster");
-        }
-        else{
-            if(enableModelError && 
gtAnalysis.getGT0Cluster(gtAssignedID).getInclusionProbability(cmdp) >= 
pointInclusionProbThreshold){
-                //set to MIN_ERROR so we can still track the error
-               error = 0.00001;
-                cmdp.p.setMeasureValue("CMM Type","noise - byModel");
-            }
-            else{
-                error = 1 - gtAnalysis.getConnectionValue(cmdp, gtAssignedID);
-                cmdp.p.setMeasureValue("CMM Type","noise");
-            }
-        }
-
-        return error;
+    if (debug) {
+      System.out.println("-------------");
     }
+  }
+
+  private double noiseError(CMMPoint cmdp, int assignedClusterID) {
+    int gtAssignedID = matchMap[assignedClusterID];
+    double error;
+
+    // Cluster wasn't matched, so just contains noise
+    // TODO: Noiscluster?
+    // also happens when we decrease the radius and there is only a noise point
+    // in the center
+    if (gtAssignedID == -1) {
+      error = 1;
+      cmdp.p.setMeasureValue("CMM Type", "noise - cluster");
+    }
+    else {
+      if (enableModelError
+          && 
gtAnalysis.getGT0Cluster(gtAssignedID).getInclusionProbability(cmdp) >= 
pointInclusionProbThreshold) {
+        // set to MIN_ERROR so we can still track the error
+        error = 0.00001;
+        cmdp.p.setMeasureValue("CMM Type", "noise - byModel");
+      }
+      else {
+        error = 1 - gtAnalysis.getConnectionValue(cmdp, gtAssignedID);
+        cmdp.p.setMeasureValue("CMM Type", "noise");
+      }
+    }
+
+    return error;
+  }
 
-    private double missedError(CMMPoint cmdp, boolean useHullDistance){
-        cmdp.p.setMeasureValue("CMM Type","missed");
-        if(!useHullDistance){
-            return cmdp.connectivity;
+  private double missedError(CMMPoint cmdp, boolean useHullDistance) {
+    cmdp.p.setMeasureValue("CMM Type", "missed");
+    if (!useHullDistance) {
+      return cmdp.connectivity;
+    }
+    else {
+      // main idea: look at relative distance of missed point to cluster
+      double minHullDist = 1;
+      for (int fc = 0; fc < numFClusters; fc++) {
+        // if fc is mappend onto the class of the point, check it for its
+        // hulldist
+        if (matchMap[fc] != -1 && matchMap[fc] == cmdp.workclass()) {
+          if (clustering.get(fc) instanceof SphereCluster) {
+            SphereCluster sc = (SphereCluster) clustering.get(fc);
+            double distanceFC = sc.getCenterDistance(cmdp);
+            double radius = sc.getRadius();
+            double hullDist = (distanceFC - radius) / (distanceFC + radius);
+            if (hullDist < minHullDist)
+              minHullDist = hullDist;
+          }
+          else {
+            double min = 1;
+            double max = 1;
+
+            // TODO: distance for random shape
+            // generate X points from the cluster with
+            // clustering.get(fc).sample(null)
+            // and find Min and Max values
+
+            double hullDist = min / max;
+            if (hullDist < minHullDist)
+              minHullDist = hullDist;
+          }
         }
-        else{
-            //main idea: look at relative distance of missed point to cluster
-            double minHullDist = 1;
-            for (int fc = 0; fc < numFClusters; fc++){
-                //if fc is mappend onto the class of the point, check it for 
its hulldist
-                if(matchMap[fc]!=-1 && matchMap[fc] == cmdp.workclass()){
-                    if(clustering.get(fc) instanceof SphereCluster){
-                        SphereCluster sc = (SphereCluster)clustering.get(fc);
-                        double distanceFC = sc.getCenterDistance(cmdp);
-                        double radius = sc.getRadius();
-                        double hullDist = 
(distanceFC-radius)/(distanceFC+radius);
-                        if(hullDist < minHullDist)
-                            minHullDist = hullDist;
-                    }
-                    else{
-                        double min = 1;
-                        double max = 1;
-
-                        //TODO: distance for random shape
-                        //generate X points from the cluster with 
clustering.get(fc).sample(null)
-                        //and find Min and Max values
-
-                        double hullDist = min/max;
-                        if(hullDist < minHullDist)
-                            minHullDist = hullDist;
-                    }
-                }
-            }
+      }
 
-            //use distance as weight
-            if(minHullDist>1) minHullDist = 1;
+      // use distance as weight
+      if (minHullDist > 1)
+        minHullDist = 1;
 
-            double weight = (1-Math.exp(-lamdaMissed*minHullDist));
-            cmdp.p.setMeasureValue("HullDistWeight",weight);
+      double weight = (1 - Math.exp(-lamdaMissed * minHullDist));
+      cmdp.p.setMeasureValue("HullDistWeight", weight);
 
-            return weight*cmdp.connectivity;
-        }
+      return weight * cmdp.connectivity;
     }
+  }
 
+  private double misplacedError(CMMPoint cmdp, int assignedClusterID) {
+    double weight = 0;
 
-    private double misplacedError(CMMPoint cmdp, int assignedClusterID){
-        double weight = 0;
-
-        int gtAssignedID = matchMap[assignedClusterID];
-        //TODO take care of noise cluster?
-        if(gtAssignedID ==-1){
-            System.out.println("Point "+cmdp.getTimestamp()+" from gtcluster 
"+cmdp.trueClass+" assigned to noise cluster "+assignedClusterID);
-            return 1;
-        }
-
-        if(gtAssignedID == cmdp.workclass())
-            return 0;
-        else{
-            //assigned and real GT0 cluster are not connected, but does the 
model have the
-            //chance of separating this point after all?
-            if(enableModelError && 
gtAnalysis.getGT0Cluster(gtAssignedID).getInclusionProbability(cmdp) >= 
pointInclusionProbThreshold){
-                weight = 0;
-                cmdp.p.setMeasureValue("CMM Type","missplaced - byModel");
-            }
-            else{
-                //point was mapped onto wrong cluster (assigned), so check how 
far away
-                //the nearest point is within the wrongly assigned cluster
-                weight = 1 - gtAnalysis.getConnectionValue(cmdp, gtAssignedID);
-            }
-        }
-        double err_value;
-        //set to MIN_ERROR so we can still track the error
-        if(weight == 0){
-            err_value= 0.00001;
-        }
-        else{
-            err_value = weight*cmdp.connectivity;
-            cmdp.p.setMeasureValue("CMM Type","missplaced");
-        }
-
-        return err_value;
+    int gtAssignedID = matchMap[assignedClusterID];
+    // TODO take care of noise cluster?
+    if (gtAssignedID == -1) {
+      System.out.println("Point " + cmdp.getTimestamp() + " from gtcluster " + 
cmdp.trueClass
+          + " assigned to noise cluster " + assignedClusterID);
+      return 1;
     }
 
-    public String getParameterString(){
-        String para = gtAnalysis.getParameterString();
-        para+="lambdaMissed="+lamdaMissed+";";
-        return para;
+    if (gtAssignedID == cmdp.workclass())
+      return 0;
+    else {
+      // assigned and real GT0 cluster are not connected, but does the model
+      // have the
+      // chance of separating this point after all?
+      if (enableModelError
+          && 
gtAnalysis.getGT0Cluster(gtAssignedID).getInclusionProbability(cmdp) >= 
pointInclusionProbThreshold) {
+        weight = 0;
+        cmdp.p.setMeasureValue("CMM Type", "missplaced - byModel");
+      }
+      else {
+        // point was mapped onto wrong cluster (assigned), so check how far 
away
+        // the nearest point is within the wrongly assigned cluster
+        weight = 1 - gtAnalysis.getConnectionValue(cmdp, gtAssignedID);
+      }
+    }
+    double err_value;
+    // set to MIN_ERROR so we can still track the error
+    if (weight == 0) {
+      err_value = 0.00001;
+    }
+    else {
+      err_value = weight * cmdp.connectivity;
+      cmdp.p.setMeasureValue("CMM Type", "missplaced");
     }
 
-}
+    return err_value;
+  }
 
+  public String getParameterString() {
+    String para = gtAnalysis.getParameterString();
+    para += "lambdaMissed=" + lamdaMissed + ";";
+    return para;
+  }
 
+}

Reply via email to