http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java
deleted file mode 100644
index b1748ff..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package com.yahoo.labs.samoa.evaluation;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.util.Collections;
-import java.util.List;
-import java.util.Vector;
-import java.util.concurrent.TimeUnit;
-
-import com.yahoo.labs.samoa.moa.core.Measurement;
-import com.yahoo.labs.samoa.moa.evaluation.LearningCurve;
-import com.yahoo.labs.samoa.moa.evaluation.LearningEvaluation;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.yahoo.labs.samoa.core.ContentEvent;
-import com.yahoo.labs.samoa.core.Processor;
-import com.yahoo.labs.samoa.learners.ResultContentEvent;
-
-public class EvaluatorProcessor implements Processor {
-
-  /**
-        * 
-        */
-  private static final long serialVersionUID = -2778051819116753612L;
-
-  private static final Logger logger =
-      LoggerFactory.getLogger(EvaluatorProcessor.class);
-
-  private static final String ORDERING_MEASUREMENT_NAME = "evaluation 
instances";
-
-  private final PerformanceEvaluator evaluator;
-  private final int samplingFrequency;
-  private final File dumpFile;
-  private transient PrintStream immediateResultStream = null;
-  private transient boolean firstDump = true;
-
-  private long totalCount = 0;
-  private long experimentStart = 0;
-
-  private long sampleStart = 0;
-
-  private LearningCurve learningCurve;
-  private int id;
-
-  private EvaluatorProcessor(Builder builder) {
-    this.evaluator = builder.evaluator;
-    this.samplingFrequency = builder.samplingFrequency;
-    this.dumpFile = builder.dumpFile;
-  }
-
-  @Override
-  public boolean process(ContentEvent event) {
-
-    ResultContentEvent result = (ResultContentEvent) event;
-
-    if ((totalCount > 0) && (totalCount % samplingFrequency) == 0) {
-      long sampleEnd = System.nanoTime();
-      long sampleDuration = TimeUnit.SECONDS.convert(sampleEnd - sampleStart, 
TimeUnit.NANOSECONDS);
-      sampleStart = sampleEnd;
-
-      logger.info("{} seconds for {} instances", sampleDuration, 
samplingFrequency);
-      this.addMeasurement();
-    }
-
-    if (result.isLastEvent()) {
-      this.concludeMeasurement();
-      return true;
-    }
-
-    evaluator.addResult(result.getInstance(), result.getClassVotes());
-    totalCount += 1;
-
-    if (totalCount == 1) {
-      sampleStart = System.nanoTime();
-      experimentStart = sampleStart;
-    }
-
-    return false;
-  }
-
-  @Override
-  public void onCreate(int id) {
-    this.id = id;
-    this.learningCurve = new LearningCurve(ORDERING_MEASUREMENT_NAME);
-
-    if (this.dumpFile != null) {
-      try {
-        if (dumpFile.exists()) {
-          this.immediateResultStream = new PrintStream(
-              new FileOutputStream(dumpFile, true), true);
-        } else {
-          this.immediateResultStream = new PrintStream(
-              new FileOutputStream(dumpFile), true);
-        }
-
-      } catch (FileNotFoundException e) {
-        this.immediateResultStream = null;
-        logger.error("File not found exception for {}:{}", 
this.dumpFile.getAbsolutePath(), e.toString());
-
-      } catch (Exception e) {
-        this.immediateResultStream = null;
-        logger.error("Exception when creating {}:{}", 
this.dumpFile.getAbsolutePath(), e.toString());
-      }
-    }
-
-    this.firstDump = true;
-  }
-
-  @Override
-  public Processor newProcessor(Processor p) {
-    EvaluatorProcessor originalProcessor = (EvaluatorProcessor) p;
-    EvaluatorProcessor newProcessor = new 
EvaluatorProcessor.Builder(originalProcessor).build();
-
-    if (originalProcessor.learningCurve != null) {
-      newProcessor.learningCurve = originalProcessor.learningCurve;
-    }
-
-    return newProcessor;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder report = new StringBuilder();
-
-    report.append(EvaluatorProcessor.class.getCanonicalName());
-    report.append("id = ").append(this.id);
-    report.append('\n');
-
-    if (learningCurve.numEntries() > 0) {
-      report.append(learningCurve.toString());
-      report.append('\n');
-    }
-    return report.toString();
-  }
-
-  private void addMeasurement() {
-    List<Measurement> measurements = new Vector<>();
-    measurements.add(new Measurement(ORDERING_MEASUREMENT_NAME, totalCount));
-
-    Collections.addAll(measurements, evaluator.getPerformanceMeasurements());
-
-    Measurement[] finalMeasurements = measurements.toArray(new 
Measurement[measurements.size()]);
-
-    LearningEvaluation learningEvaluation = new 
LearningEvaluation(finalMeasurements);
-    learningCurve.insertEntry(learningEvaluation);
-    logger.debug("evaluator id = {}", this.id);
-    logger.info(learningEvaluation.toString());
-
-    if (immediateResultStream != null) {
-      if (firstDump) {
-        immediateResultStream.println(learningCurve.headerToString());
-        firstDump = false;
-      }
-
-      
immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries()
 - 1));
-      immediateResultStream.flush();
-    }
-  }
-
-  private void concludeMeasurement() {
-    logger.info("last event is received!");
-    logger.info("total count: {}", this.totalCount);
-
-    String learningCurveSummary = this.toString();
-    logger.info(learningCurveSummary);
-
-    long experimentEnd = System.nanoTime();
-    long totalExperimentTime = TimeUnit.SECONDS.convert(experimentEnd - 
experimentStart, TimeUnit.NANOSECONDS);
-    logger.info("total evaluation time: {} seconds for {} instances", 
totalExperimentTime, totalCount);
-
-    if (immediateResultStream != null) {
-      immediateResultStream.println("# COMPLETED");
-      immediateResultStream.flush();
-    }
-    // logger.info("average throughput rate: {} instances/seconds",
-    // (totalCount/totalExperimentTime));
-  }
-
-  public static class Builder {
-
-    private final PerformanceEvaluator evaluator;
-    private int samplingFrequency = 100000;
-    private File dumpFile = null;
-
-    public Builder(PerformanceEvaluator evaluator) {
-      this.evaluator = evaluator;
-    }
-
-    public Builder(EvaluatorProcessor oldProcessor) {
-      this.evaluator = oldProcessor.evaluator;
-      this.samplingFrequency = oldProcessor.samplingFrequency;
-      this.dumpFile = oldProcessor.dumpFile;
-    }
-
-    public Builder samplingFrequency(int samplingFrequency) {
-      this.samplingFrequency = samplingFrequency;
-      return this;
-    }
-
-    public Builder dumpFile(File file) {
-      this.dumpFile = file;
-      return this;
-    }
-
-    public EvaluatorProcessor build() {
-      return new EvaluatorProcessor(this);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
deleted file mode 100644
index 60c2ffb..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java
+++ /dev/null
@@ -1,59 +0,0 @@
-package com.yahoo.labs.samoa.evaluation;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import com.yahoo.labs.samoa.moa.MOAObject;
-import com.yahoo.labs.samoa.moa.core.Measurement;
-
-import com.yahoo.labs.samoa.instances.Instance;
-
-/**
- * Interface implemented by learner evaluators to monitor the results of the 
learning process.
- * 
- * @author Richard Kirkby ([email protected])
- * @version $Revision: 7 $
- */
-public interface PerformanceEvaluator extends MOAObject {
-
-  /**
-   * Resets this evaluator. It must be similar to starting a new evaluator 
from scratch.
-   * 
-   */
-  public void reset();
-
-  /**
-   * Adds a learning result to this evaluator.
-   * 
-   * @param inst
-   *          the instance to be classified
-   * @param classVotes
-   *          an array containing the estimated membership probabilities of 
the test instance in each class
-   * @return an array of measurements monitored in this evaluator
-   */
-  public void addResult(Instance inst, double[] classVotes);
-
-  /**
-   * Gets the current measurements monitored by this evaluator.
-   * 
-   * @return an array of measurements monitored by this evaluator
-   */
-  public Measurement[] getPerformanceMeasurements();
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java
deleted file mode 100644
index dc23102..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package com.yahoo.labs.samoa.evaluation;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-public interface RegressionPerformanceEvaluator extends PerformanceEvaluator {
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
deleted file mode 100644
index 11a77a0..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java
+++ /dev/null
@@ -1,218 +0,0 @@
-package com.yahoo.labs.samoa.evaluation;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import com.github.javacliparser.IntOption;
-import com.yahoo.labs.samoa.moa.AbstractMOAObject;
-import com.yahoo.labs.samoa.moa.core.Measurement;
-import com.yahoo.labs.samoa.instances.Instance;
-import com.yahoo.labs.samoa.instances.Utils;
-
-/**
- * Classification evaluator that updates evaluation results using a sliding 
window.
- * 
- * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz)
- * @version $Revision: 7 $
- */
-public class WindowClassificationPerformanceEvaluator extends 
AbstractMOAObject implements
-    ClassificationPerformanceEvaluator {
-
-  private static final long serialVersionUID = 1L;
-
-  public IntOption widthOption = new IntOption("width",
-      'w', "Size of Window", 1000);
-
-  protected double TotalweightObserved = 0;
-
-  protected Estimator weightObserved;
-
-  protected Estimator weightCorrect;
-
-  protected Estimator weightCorrectNoChangeClassifier;
-
-  protected double lastSeenClass;
-
-  protected Estimator[] columnKappa;
-
-  protected Estimator[] rowKappa;
-
-  protected Estimator[] classAccuracy;
-
-  protected int numClasses;
-
-  public class Estimator {
-
-    protected double[] window;
-
-    protected int posWindow;
-
-    protected int lenWindow;
-
-    protected int SizeWindow;
-
-    protected double sum;
-
-    public Estimator(int sizeWindow) {
-      window = new double[sizeWindow];
-      SizeWindow = sizeWindow;
-      posWindow = 0;
-      lenWindow = 0;
-    }
-
-    public void add(double value) {
-      sum -= window[posWindow];
-      sum += value;
-      window[posWindow] = value;
-      posWindow++;
-      if (posWindow == SizeWindow) {
-        posWindow = 0;
-      }
-      if (lenWindow < SizeWindow) {
-        lenWindow++;
-      }
-    }
-
-    public double total() {
-      return sum;
-    }
-
-    public double length() {
-      return lenWindow;
-    }
-
-  }
-
-  /*
-   * public void setWindowWidth(int w) { this.width = w; reset(); }
-   */
-  @Override
-  public void reset() {
-    reset(this.numClasses);
-  }
-
-  public void reset(int numClasses) {
-    this.numClasses = numClasses;
-    this.rowKappa = new Estimator[numClasses];
-    this.columnKappa = new Estimator[numClasses];
-    this.classAccuracy = new Estimator[numClasses];
-    for (int i = 0; i < this.numClasses; i++) {
-      this.rowKappa[i] = new Estimator(this.widthOption.getValue());
-      this.columnKappa[i] = new Estimator(this.widthOption.getValue());
-      this.classAccuracy[i] = new Estimator(this.widthOption.getValue());
-    }
-    this.weightCorrect = new Estimator(this.widthOption.getValue());
-    this.weightCorrectNoChangeClassifier = new 
Estimator(this.widthOption.getValue());
-    this.weightObserved = new Estimator(this.widthOption.getValue());
-    this.TotalweightObserved = 0;
-    this.lastSeenClass = 0;
-  }
-
-  @Override
-  public void addResult(Instance inst, double[] classVotes) {
-    double weight = inst.weight();
-    int trueClass = (int) inst.classValue();
-    if (weight > 0.0) {
-      if (TotalweightObserved == 0) {
-        reset(inst.numClasses());
-      }
-      this.TotalweightObserved += weight;
-      this.weightObserved.add(weight);
-      int predictedClass = Utils.maxIndex(classVotes);
-      if (predictedClass == trueClass) {
-        this.weightCorrect.add(weight);
-      } else {
-        this.weightCorrect.add(0);
-      }
-      // Add Kappa statistic information
-      for (int i = 0; i < this.numClasses; i++) {
-        this.rowKappa[i].add(i == predictedClass ? weight : 0);
-        this.columnKappa[i].add(i == trueClass ? weight : 0);
-      }
-      if (this.lastSeenClass == trueClass) {
-        this.weightCorrectNoChangeClassifier.add(weight);
-      } else {
-        this.weightCorrectNoChangeClassifier.add(0);
-      }
-      this.classAccuracy[trueClass].add(predictedClass == trueClass ? weight : 
0.0);
-      this.lastSeenClass = trueClass;
-    }
-  }
-
-  @Override
-  public Measurement[] getPerformanceMeasurements() {
-    return new Measurement[] {
-        new Measurement("classified instances",
-            this.TotalweightObserved),
-        new Measurement("classifications correct (percent)",
-            getFractionCorrectlyClassified() * 100.0),
-        new Measurement("Kappa Statistic (percent)",
-            getKappaStatistic() * 100.0),
-        new Measurement("Kappa Temporal Statistic (percent)",
-            getKappaTemporalStatistic() * 100.0)
-    };
-
-  }
-
-  public double getTotalWeightObserved() {
-    return this.weightObserved.total();
-  }
-
-  public double getFractionCorrectlyClassified() {
-    return this.weightObserved.total() > 0.0 ? this.weightCorrect.total()
-        / this.weightObserved.total() : 0.0;
-  }
-
-  public double getKappaStatistic() {
-    if (this.weightObserved.total() > 0.0) {
-      double p0 = this.weightCorrect.total() / this.weightObserved.total();
-      double pc = 0;
-      for (int i = 0; i < this.numClasses; i++) {
-        pc += (this.rowKappa[i].total() / this.weightObserved.total())
-            * (this.columnKappa[i].total() / this.weightObserved.total());
-      }
-      return (p0 - pc) / (1 - pc);
-    } else {
-      return 0;
-    }
-  }
-
-  public double getKappaTemporalStatistic() {
-    if (this.weightObserved.total() > 0.0) {
-      double p0 = this.weightCorrect.total() / this.weightObserved.total();
-      double pc = this.weightCorrectNoChangeClassifier.total() / 
this.weightObserved.total();
-
-      return (p0 - pc) / (1 - pc);
-    } else {
-      return 0;
-    }
-  }
-
-  public double getFractionIncorrectlyClassified() {
-    return 1.0 - getFractionCorrectlyClassified();
-  }
-
-  @Override
-  public void getDescription(StringBuilder sb, int indent) {
-    Measurement.getMeasurementsDescription(getPerformanceMeasurements(),
-        sb, indent);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
deleted file mode 100644
index 5ef959a..0000000
--- a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM.java
+++ /dev/null
@@ -1,514 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import com.yahoo.labs.samoa.evaluation.measures.CMM_GTAnalysis.CMMPoint;
-import com.yahoo.labs.samoa.moa.cluster.Cluster;
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
-import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import java.util.ArrayList;
-
-/**
- * [CMM.java]
- * 
- * CMM: Main class
- * 
- * Reference: Kremer et al., "An Effective Evaluation Measure for Clustering 
on Evolving Data Streams", KDD, 2011
- * 
- * @author Timm jansen Data Management and Data Exploration Group, RWTH Aachen 
University
- */
-
-public class CMM extends MeasureCollection {
-
-  private static final long serialVersionUID = 1L;
-
-  /**
-   * found clustering
-   */
-  private Clustering clustering;
-
-  /**
-   * the ground truth analysis
-   */
-  private CMM_GTAnalysis gtAnalysis;
-
-  /**
-   * number of points within the horizon
-   */
-  private int numPoints;
-
-  /**
-   * number of clusters in the found clustering
-   */
-  private int numFClusters;
-
-  /**
-   * number of cluster in the adjusted groundtruth clustering that was 
calculated through the groundtruth analysis
-   */
-  private int numGT0Classes;
-
-  /**
-   * match found clusters to GT clusters
-   */
-  private int matchMap[];
-
-  /**
-   * pointInclusionProbFC[p][C] contains the probability of point p being 
included in cluster C
-   */
-  private double[][] pointInclusionProbFC;
-
-  /**
-   * threshold that defines when a point is being considered belonging to a 
cluster
-   */
-  private double pointInclusionProbThreshold = 0.5;
-
-  /**
-   * parameterize the error weight of missed points (default 1)
-   */
-  private double lamdaMissed = 1;
-
-  /**
-   * enable/disable debug mode
-   */
-  public boolean debug = false;
-
-  /**
-   * enable/disable class merge (main feature of ground truth analysis)
-   */
-  public boolean enableClassMerge = true;
-
-  /**
-   * enable/disable model error when enabled errors that are caused by the 
underling cluster model will not be counted
-   */
-  public boolean enableModelError = true;
-
-  @Override
-  protected String[] getNames() {
-    String[] names = { "CMM", "CMM Basic", "CMM Missed", "CMM Misplaced", "CMM 
Noise",
-        "CA Seperability", "CA Noise", "CA Modell" };
-    return names;
-  }
-
-  @Override
-  protected boolean[] getDefaultEnabled() {
-    boolean[] defaults = { false, false, false, false, false, false, false, 
false };
-    return defaults;
-  }
-
-  @Override
-  public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points)
-      throws Exception {
-    this.clustering = clustering;
-
-    numPoints = points.size();
-    numFClusters = clustering.size();
-
-    gtAnalysis = new CMM_GTAnalysis(trueClustering, points, enableClassMerge);
-
-    numGT0Classes = gtAnalysis.getNumberOfGT0Classes();
-
-    addValue("CA Seperability", gtAnalysis.getClassSeparability());
-    addValue("CA Noise", gtAnalysis.getNoiseSeparability());
-    addValue("CA Modell", gtAnalysis.getModelQuality());
-
-    /* init the matching and point distances */
-    calculateMatching();
-
-    /* calculate the actual error */
-    calculateError();
-  }
-
-  /**
-   * calculates the CMM specific matching between found clusters and ground 
truth clusters
-   */
-  private void calculateMatching() {
-
-    /**
-     * found cluster frequencies
-     */
-    int[][] mapFC = new int[numFClusters][numGT0Classes];
-
-    /**
-     * ground truth cluster frequencies
-     */
-    int[][] mapGT = new int[numGT0Classes][numGT0Classes];
-    int[] sumsFC = new int[numFClusters];
-
-    // calculate fuzzy mapping from
-    pointInclusionProbFC = new double[numPoints][numFClusters];
-    for (int p = 0; p < numPoints; p++) {
-      CMMPoint cmdp = gtAnalysis.getPoint(p);
-      // found cluster frequencies
-      for (int fc = 0; fc < numFClusters; fc++) {
-        Cluster cl = clustering.get(fc);
-        pointInclusionProbFC[p][fc] = cl.getInclusionProbability(cmdp);
-        if (pointInclusionProbFC[p][fc] >= pointInclusionProbThreshold) {
-          // make sure we don't count points twice that are contained in two
-          // merged clusters
-          if (cmdp.isNoise())
-            continue;
-          mapFC[fc][cmdp.workclass()]++;
-          sumsFC[fc]++;
-        }
-      }
-
-      // ground truth cluster frequencies
-      if (!cmdp.isNoise()) {
-        for (int hc = 0; hc < numGT0Classes; hc++) {
-          if (hc == cmdp.workclass()) {
-            mapGT[hc][hc]++;
-          }
-          else {
-            if (gtAnalysis.getGT0Cluster(hc).getInclusionProbability(cmdp) >= 
1) {
-              mapGT[hc][cmdp.workclass()]++;
-            }
-          }
-        }
-      }
-    }
-
-    // assign each found cluster to a hidden cluster
-    matchMap = new int[numFClusters];
-    for (int fc = 0; fc < numFClusters; fc++) {
-      int matchIndex = -1;
-      // check if we only have one entry anyway
-      for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
-        if (mapFC[fc][hc0] != 0) {
-          if (matchIndex == -1)
-            matchIndex = hc0;
-          else {
-            matchIndex = -1;
-            break;
-          }
-        }
-      }
-
-      // more then one entry, so look for most similar frequency profile
-      int minDiff = Integer.MAX_VALUE;
-      if (sumsFC[fc] != 0 && matchIndex == -1) {
-        ArrayList<Integer> fitCandidates = new ArrayList<Integer>();
-        for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
-          int errDiff = 0;
-          for (int hc1 = 0; hc1 < numGT0Classes; hc1++) {
-            // fc profile doesn't fit into current hc profile
-            double freq_diff = mapFC[fc][hc1] - mapGT[hc0][hc1];
-            if (freq_diff > 0) {
-              errDiff += freq_diff;
-            }
-          }
-          if (errDiff == 0) {
-            fitCandidates.add(hc0);
-          }
-          if (errDiff < minDiff) {
-            minDiff = errDiff;
-            matchIndex = hc0;
-          }
-          if (debug) {
-            // System.out.println("FC"+fc+"("+Arrays.toString(mapFC[fc])+") - 
HC0_"+hc0+"("+Arrays.toString(mapGT[hc0])+"):"+errDiff);
-          }
-        }
-        // if we have a fitting profile overwrite the min error choice
-        // if we have multiple fit candidates, use majority vote of
-        // corresponding classes
-        if (fitCandidates.size() != 0) {
-          int bestGTfit = fitCandidates.get(0);
-          for (int i = 1; i < fitCandidates.size(); i++) {
-            int GTfit = fitCandidates.get(i);
-            if (mapFC[fc][GTfit] > mapFC[fc][bestGTfit])
-              bestGTfit = fitCandidates.get(i);
-          }
-          matchIndex = bestGTfit;
-        }
-      }
-
-      matchMap[fc] = matchIndex;
-      int realMatch = -1;
-      if (matchIndex == -1) {
-        if (debug)
-          System.out.println("No cluster match: needs to be implemented?");
-      }
-      else {
-        realMatch = gtAnalysis.getGT0Cluster(matchMap[fc]).getLabel();
-      }
-      clustering.get(fc).setMeasureValue("CMM Match", "C" + realMatch);
-      clustering.get(fc).setMeasureValue("CMM Workclass", "C" + matchMap[fc]);
-    }
-
-    // print matching table
-    if (debug) {
-      for (int i = 0; i < numFClusters; i++) {
-        System.out.print("C" + ((int) clustering.get(i).getId()) + " N:" + 
((int) clustering.get(i).getWeight())
-            + "  |  ");
-        for (int j = 0; j < numGT0Classes; j++) {
-          System.out.print(mapFC[i][j] + " ");
-        }
-        System.out.print(" = " + sumsFC[i] + " | ");
-        String match = "-";
-        if (matchMap[i] != -1) {
-          match = 
Integer.toString(gtAnalysis.getGT0Cluster(matchMap[i]).getLabel());
-        }
-        System.out.println(" --> " + match + "(work:" + matchMap[i] + ")");
-      }
-    }
-  }
-
-  /**
-   * Calculate the actual error values
-   */
-  private void calculateError() {
-    int totalErrorCount = 0;
-    int totalRedundancy = 0;
-    int trueCoverage = 0;
-    int totalCoverage = 0;
-
-    int numNoise = 0;
-    double errorNoise = 0;
-    double errorNoiseMax = 0;
-
-    double errorMissed = 0;
-    double errorMissedMax = 0;
-
-    double errorMisplaced = 0;
-    double errorMisplacedMax = 0;
-
-    double totalError = 0.0;
-    double totalErrorMax = 0.0;
-
-    /**
-     * mainly iterate over all points and find the right error value for the 
point. within the same run calculate
-     * various other stuff like coverage etc...
-     */
-    for (int p = 0; p < numPoints; p++) {
-      CMMPoint cmdp = gtAnalysis.getPoint(p);
-      double weight = cmdp.weight();
-      // noise counter
-      if (cmdp.isNoise()) {
-        numNoise++;
-        // this is always 1
-        errorNoiseMax += cmdp.connectivity * weight;
-      }
-      else {
-        errorMissedMax += cmdp.connectivity * weight;
-        errorMisplacedMax += cmdp.connectivity * weight;
-      }
-      // sum up maxError as the individual errors are the quality weighted
-      // between 0-1
-      totalErrorMax += cmdp.connectivity * weight;
-
-      double err = 0;
-      int coverage = 0;
-
-      // check every FCluster
-      for (int c = 0; c < numFClusters; c++) {
-        // contained in cluster c?
-        if (pointInclusionProbFC[p][c] >= pointInclusionProbThreshold) {
-          coverage++;
-
-          if (!cmdp.isNoise()) {
-            // PLACED CORRECTLY
-            if (matchMap[c] == cmdp.workclass()) {
-            }
-            // MISPLACED
-            else {
-              double errvalue = misplacedError(cmdp, c);
-              if (errvalue > err)
-                err = errvalue;
-            }
-          }
-          else {
-            // NOISE
-            double errvalue = noiseError(cmdp, c);
-            if (errvalue > err)
-              err = errvalue;
-          }
-        }
-      }
-      // not in any cluster
-      if (coverage == 0) {
-        // MISSED
-        if (!cmdp.isNoise()) {
-          err = missedError(cmdp, true);
-          errorMissed += weight * err;
-        }
-        // NOISE
-        else {
-        }
-      }
-      else {
-        if (!cmdp.isNoise()) {
-          errorMisplaced += err * weight;
-        }
-        else {
-          errorNoise += err * weight;
-        }
-      }
-
-      /* processing of other evaluation values */
-      totalError += err * weight;
-      if (err != 0)
-        totalErrorCount++;
-      if (coverage > 0)
-        totalCoverage++; // points covered by clustering (incl. noise)
-      if (coverage > 0 && !cmdp.isNoise())
-        trueCoverage++; // points covered by clustering, don't count noise
-      if (coverage > 1)
-        totalRedundancy++; // include noise
-
-      cmdp.p.setMeasureValue("CMM", err);
-      cmdp.p.setMeasureValue("Redundancy", coverage);
-    }
-
-    addValue("CMM", (totalErrorMax != 0) ? 1 - totalError / totalErrorMax : 1);
-    addValue("CMM Missed", (errorMissedMax != 0) ? 1 - errorMissed / 
errorMissedMax : 1);
-    addValue("CMM Misplaced", (errorMisplacedMax != 0) ? 1 - errorMisplaced / 
errorMisplacedMax : 1);
-    addValue("CMM Noise", (errorNoiseMax != 0) ? 1 - errorNoise / 
errorNoiseMax : 1);
-    addValue("CMM Basic", 1 - ((double) totalErrorCount / (double) numPoints));
-
-    if (debug) {
-      System.out.println("-------------");
-    }
-  }
-
-  private double noiseError(CMMPoint cmdp, int assignedClusterID) {
-    int gtAssignedID = matchMap[assignedClusterID];
-    double error;
-
-    // Cluster wasn't matched, so just contains noise
-    // TODO: Noiscluster?
-    // also happens when we decrease the radius and there is only a noise point
-    // in the center
-    if (gtAssignedID == -1) {
-      error = 1;
-      cmdp.p.setMeasureValue("CMM Type", "noise - cluster");
-    }
-    else {
-      if (enableModelError
-          && 
gtAnalysis.getGT0Cluster(gtAssignedID).getInclusionProbability(cmdp) >= 
pointInclusionProbThreshold) {
-        // set to MIN_ERROR so we can still track the error
-        error = 0.00001;
-        cmdp.p.setMeasureValue("CMM Type", "noise - byModel");
-      }
-      else {
-        error = 1 - gtAnalysis.getConnectionValue(cmdp, gtAssignedID);
-        cmdp.p.setMeasureValue("CMM Type", "noise");
-      }
-    }
-
-    return error;
-  }
-
-  private double missedError(CMMPoint cmdp, boolean useHullDistance) {
-    cmdp.p.setMeasureValue("CMM Type", "missed");
-    if (!useHullDistance) {
-      return cmdp.connectivity;
-    }
-    else {
-      // main idea: look at relative distance of missed point to cluster
-      double minHullDist = 1;
-      for (int fc = 0; fc < numFClusters; fc++) {
-        // if fc is mappend onto the class of the point, check it for its
-        // hulldist
-        if (matchMap[fc] != -1 && matchMap[fc] == cmdp.workclass()) {
-          if (clustering.get(fc) instanceof SphereCluster) {
-            SphereCluster sc = (SphereCluster) clustering.get(fc);
-            double distanceFC = sc.getCenterDistance(cmdp);
-            double radius = sc.getRadius();
-            double hullDist = (distanceFC - radius) / (distanceFC + radius);
-            if (hullDist < minHullDist)
-              minHullDist = hullDist;
-          }
-          else {
-            double min = 1;
-            double max = 1;
-
-            // TODO: distance for random shape
-            // generate X points from the cluster with
-            // clustering.get(fc).sample(null)
-            // and find Min and Max values
-
-            double hullDist = min / max;
-            if (hullDist < minHullDist)
-              minHullDist = hullDist;
-          }
-        }
-      }
-
-      // use distance as weight
-      if (minHullDist > 1)
-        minHullDist = 1;
-
-      double weight = (1 - Math.exp(-lamdaMissed * minHullDist));
-      cmdp.p.setMeasureValue("HullDistWeight", weight);
-
-      return weight * cmdp.connectivity;
-    }
-  }
-
-  private double misplacedError(CMMPoint cmdp, int assignedClusterID) {
-    double weight = 0;
-
-    int gtAssignedID = matchMap[assignedClusterID];
-    // TODO take care of noise cluster?
-    if (gtAssignedID == -1) {
-      System.out.println("Point " + cmdp.getTimestamp() + " from gtcluster " + 
cmdp.trueClass
-          + " assigned to noise cluster " + assignedClusterID);
-      return 1;
-    }
-
-    if (gtAssignedID == cmdp.workclass())
-      return 0;
-    else {
-      // assigned and real GT0 cluster are not connected, but does the model
-      // have the
-      // chance of separating this point after all?
-      if (enableModelError
-          && 
gtAnalysis.getGT0Cluster(gtAssignedID).getInclusionProbability(cmdp) >= 
pointInclusionProbThreshold) {
-        weight = 0;
-        cmdp.p.setMeasureValue("CMM Type", "missplaced - byModel");
-      }
-      else {
-        // point was mapped onto wrong cluster (assigned), so check how far 
away
-        // the nearest point is within the wrongly assigned cluster
-        weight = 1 - gtAnalysis.getConnectionValue(cmdp, gtAssignedID);
-      }
-    }
-    double err_value;
-    // set to MIN_ERROR so we can still track the error
-    if (weight == 0) {
-      err_value = 0.00001;
-    }
-    else {
-      err_value = weight * cmdp.connectivity;
-      cmdp.p.setMeasureValue("CMM Type", "missplaced");
-    }
-
-    return err_value;
-  }
-
-  public String getParameterString() {
-    String para = gtAnalysis.getParameterString();
-    para += "lambdaMissed=" + lamdaMissed + ";";
-    return para;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
deleted file mode 100644
index e7ae848..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/CMM_GTAnalysis.java
+++ /dev/null
@@ -1,846 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import com.yahoo.labs.samoa.instances.Instance;
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.core.AutoExpandVector;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-
-/**
- * [CMM_GTAnalysis.java]
- * 
- * CMM: Ground truth analysis
- * 
- * Reference: Kremer et al., "An Effective Evaluation Measure for Clustering 
on Evolving Data Streams", KDD, 2011
- * 
- * @author Timm jansen Data Management and Data Exploration Group, RWTH Aachen 
University
- */
-
-/*
- * TODO: - try to avoid calcualting the radius multiple times - avoid the full
- * distance map? - knn functionality in clusters - noise error
- */
-public class CMM_GTAnalysis {
-
-  /**
-   * the given ground truth clustering
-   */
-  private Clustering gtClustering;
-
-  /**
-   * list of given points within the horizon
-   */
-  private ArrayList<CMMPoint> cmmpoints;
-
-  /**
-   * the newly calculate ground truth clustering
-   */
-  private ArrayList<GTCluster> gt0Clusters;
-
-  /**
-   * IDs of noise points
-   */
-  private ArrayList<Integer> noise;
-
-  /**
-   * total number of points
-   */
-  private int numPoints;
-
-  /**
-   * number of clusters of the original ground truth
-   */
-  private int numGTClusters;
-
-  /**
-   * number of classes of the original ground truth, in case of a micro 
clustering ground truth this differs from
-   * numGTClusters
-   */
-  private int numGTClasses;
-
-  /**
-   * number of classes after we are done with the analysis
-   */
-  private int numGT0Classes;
-
-  /**
-   * number of dimensions
-   */
-  private int numDims;
-
-  /**
-   * mapping between true cluster ID/class label of the original ground truth 
and the internal cluster ID/working class
-   * label.
-   * 
-   * different original cluster IDs might map to the same new cluster ID due 
to merging of two clusters
-   */
-  private HashMap<Integer, Integer> mapTrueLabelToWorkLabel;
-
-  /**
-   * log of how clusters have been merged (for debugging)
-   */
-  private int[] mergeMap;
-
-  /**
-   * number of non-noise points that will create an error due to the 
underlying clustering model (e.g. point being
-   * covered by two clusters representing different classes)
-   */
-  private int noiseErrorByModel;
-
-  /**
-   * number of noise points that will create an error due to the underlying 
clustering model (e.g. noise point being
-   * covered by a cluster)
-   */
-  private int pointErrorByModel;
-
-  /**
-   * CMM debug mode
-   */
-  private boolean debug = false;
-
-  /******* CMM parameter ***********/
-
-  /**
-   * defines how many nearest neighbors will be used
-   */
-  private int knnNeighbourhood = 2;
-
-  /**
-   * the threshold which defines when ground truth clusters will be merged. 
set to 1 to disable merging
-   */
-  private double tauConnection = 0.5;
-
-  /**
-   * experimental (default: disabled) separate k for points to cluster and 
cluster to cluster
-   */
-  private double clusterConnectionMaxPoints = knnNeighbourhood;
-
-  /**
-   * experimental (default: disabled) use exponential connectivity function to 
model different behavior: closer points
-   * will have a stronger connection compared to the linear function. Use 
ConnRefXValue and ConnX to better parameterize
-   * lambda, which controls the decay of the connectivity
-   */
-  private boolean useExpConnectivity = false;
-  private double lambdaConnRefXValue = 0.01;
-  private double lambdaConnX = 4;
-  private double lamdaConn;
-
-  /******************************************/
-
-  /**
-   * Wrapper class for data points to store CMM relevant attributes
-   * 
-   */
-  protected class CMMPoint extends DataPoint {
-    /**
-     * Reference to original point
-     */
-    protected DataPoint p = null;
-
-    /**
-     * point ID
-     */
-    protected int pID = 0;
-
-    /**
-     * true class label
-     */
-    protected int trueClass = -1;
-
-    /**
-     * the connectivity of the point to its cluster
-     */
-    protected double connectivity = 1.0;
-
-    /**
-     * knn distnace within own cluster
-     */
-    protected double knnInCluster = 0.0;
-
-    /**
-     * knn indices (for debugging only)
-     */
-    protected ArrayList<Integer> knnIndices;
-
-    public CMMPoint(DataPoint point, int id) {
-      // make a copy, but keep reference
-      super(point, point.getTimestamp());
-      p = point;
-      pID = id;
-      trueClass = (int) point.classValue();
-    }
-
-    /**
-     * Retruns the current working label of the cluster the point belongs to. 
The label can change due to merging of
-     * clusters.
-     * 
-     * @return the current working class label
-     */
-    protected int workclass() {
-      if (trueClass == -1)
-        return -1;
-      else
-        return mapTrueLabelToWorkLabel.get(trueClass);
-    }
-  }
-
-  /**
-   * Main class to model the new clusters that will be the output of the 
cluster analysis
-   * 
-   */
-  protected class GTCluster {
-    /** points that are per definition in the cluster */
-    private ArrayList<Integer> points = new ArrayList<Integer>();
-
-    /**
-     * a new GT cluster consists of one or more "old" GT clusters. 
Connected/overlapping clusters cannot be merged
-     * directly because of the underlying cluster model. E.g. for merging two 
spherical clusters the new cluster sphere
-     * can cover a lot more space then two separate smaller spheres. To keep 
the original coverage we need to keep the
-     * orignal clusters and merge them on an abstract level.
-     */
-    private ArrayList<Integer> clusterRepresentations = new 
ArrayList<Integer>();
-
-    /** current work class (changes when merging) */
-    private int workclass;
-
-    /** original work class */
-    private final int orgWorkClass;
-
-    /** original class label */
-    private final int label;
-
-    /** clusters that have been merged into this cluster (debugging) */
-    private ArrayList<Integer> mergedWorkLabels = null;
-
-    /** average knn distance of all points in the cluster */
-    private double knnMeanAvg = 0;
-
-    /** average deviation of knn distance of all points */
-    private double knnDevAvg = 0;
-
-    /** connectivity of the cluster to all other clusters */
-    private ArrayList<Double> connections = new ArrayList<Double>();
-
-    private GTCluster(int workclass, int label, int gtClusteringID) {
-      this.orgWorkClass = workclass;
-      this.workclass = workclass;
-      this.label = label;
-      this.clusterRepresentations.add(gtClusteringID);
-    }
-
-    /**
-     * The original class label the cluster represents
-     * 
-     * @return original class label
-     */
-    protected int getLabel() {
-      return label;
-    }
-
-    /**
-     * Calculate the probability of the point being covered through the cluster
-     * 
-     * @param point
-     *          to calculate the probability for
-     * @return probability of the point being covered through the cluster
-     */
-    protected double getInclusionProbability(CMMPoint point) {
-      double prob = Double.MIN_VALUE;
-      // check all cluster representatives for coverage
-      for (int c = 0; c < clusterRepresentations.size(); c++) {
-        double tmp_prob = 
gtClustering.get(clusterRepresentations.get(c)).getInclusionProbability(point);
-        if (tmp_prob > prob)
-          prob = tmp_prob;
-      }
-      return prob;
-    }
-
-    /**
-     * calculate knn distances of points within own cluster + average knn 
distance and average knn distance deviation of
-     * all points
-     */
-    private void calculateKnn() {
-      for (int p0 : points) {
-        CMMPoint cmdp = cmmpoints.get(p0);
-        if (!cmdp.isNoise()) {
-          AutoExpandVector<Double> knnDist = new AutoExpandVector<Double>();
-          AutoExpandVector<Integer> knnPointIndex = new 
AutoExpandVector<Integer>();
-
-          // calculate nearest neighbours
-          getKnnInCluster(cmdp, knnNeighbourhood, points, knnDist, 
knnPointIndex);
-
-          // TODO: What to do if we have less then k neighbours?
-          double avgKnn = 0;
-          for (int i = 0; i < knnDist.size(); i++) {
-            avgKnn += knnDist.get(i);
-          }
-          if (knnDist.size() != 0)
-            avgKnn /= knnDist.size();
-          cmdp.knnInCluster = avgKnn;
-          cmdp.knnIndices = knnPointIndex;
-          cmdp.p.setMeasureValue("knnAvg", cmdp.knnInCluster);
-
-          knnMeanAvg += avgKnn;
-          knnDevAvg += Math.pow(avgKnn, 2);
-        }
-      }
-      knnMeanAvg = knnMeanAvg / (double) points.size();
-      knnDevAvg = knnDevAvg / (double) points.size();
-
-      double variance = knnDevAvg - Math.pow(knnMeanAvg, 2.0);
-      // Due to numerical errors, small negative values can occur.
-      if (variance <= 0.0)
-        variance = 1e-50;
-      knnDevAvg = Math.sqrt(variance);
-
-    }
-
-    /**
-     * Calculate the connection of a cluster to this cluster
-     * 
-     * @param otherCid
-     *          cluster id of the other cluster
-     * @param initial
-     *          flag for initial run
-     */
-    private void calculateClusterConnection(int otherCid, boolean initial) {
-      double avgConnection = 0;
-      if (workclass == otherCid) {
-        avgConnection = 1;
-      }
-      else {
-        AutoExpandVector<Double> kmax = new AutoExpandVector<Double>();
-        AutoExpandVector<Integer> kmaxIndexes = new 
AutoExpandVector<Integer>();
-
-        for (int p : points) {
-          CMMPoint cmdp = cmmpoints.get(p);
-          double con_p_Cj = getConnectionValue(cmmpoints.get(p), otherCid);
-          double connection = cmdp.connectivity * con_p_Cj;
-          if (initial) {
-            cmdp.p.setMeasureValue("Connection to C" + otherCid, con_p_Cj);
-          }
-
-          // connection
-          if (kmax.size() < clusterConnectionMaxPoints || connection > 
kmax.get(kmax.size() - 1)) {
-            int index = 0;
-            while (index < kmax.size() && connection < kmax.get(index)) {
-              index++;
-            }
-            kmax.add(index, connection);
-            kmaxIndexes.add(index, p);
-            if (kmax.size() > clusterConnectionMaxPoints) {
-              kmax.remove(kmax.size() - 1);
-              kmaxIndexes.add(kmaxIndexes.size() - 1);
-            }
-          }
-        }
-        // connection
-        for (int k = 0; k < kmax.size(); k++) {
-          avgConnection += kmax.get(k);
-        }
-        avgConnection /= kmax.size();
-      }
-
-      if (otherCid < connections.size()) {
-        connections.set(otherCid, avgConnection);
-      }
-      else if (connections.size() == otherCid) {
-        connections.add(avgConnection);
-      }
-      else
-        System.out.println("Something is going really wrong with the 
connection listing!" + knnNeighbourhood + " "
-            + tauConnection);
-    }
-
-    /**
-     * Merge a cluster into this cluster
-     * 
-     * @param mergeID
-     *          the ID of the cluster to be merged
-     */
-    private void mergeCluster(int mergeID) {
-      if (mergeID < gt0Clusters.size()) {
-        // track merging (debugging)
-        for (int i = 0; i < numGTClasses; i++) {
-          if (mergeMap[i] == mergeID)
-            mergeMap[i] = workclass;
-          if (mergeMap[i] > mergeID)
-            mergeMap[i]--;
-        }
-        GTCluster gtcMerge = gt0Clusters.get(mergeID);
-        if (debug)
-          System.out.println("Merging C" + gtcMerge.workclass + " into C" + 
workclass +
-              " with Con " + connections.get(mergeID) + " / " + 
gtcMerge.connections.get(workclass));
-
-        // update mapTrueLabelToWorkLabel
-        mapTrueLabelToWorkLabel.put(gtcMerge.label, workclass);
-        Iterator iterator = mapTrueLabelToWorkLabel.keySet().iterator();
-        while (iterator.hasNext()) {
-          Integer key = (Integer) iterator.next();
-          // update pointer of already merged cluster
-          int value = mapTrueLabelToWorkLabel.get(key);
-          if (value == mergeID)
-            mapTrueLabelToWorkLabel.put(key, workclass);
-          if (value > mergeID)
-            mapTrueLabelToWorkLabel.put(key, value - 1);
-        }
-
-        // merge points from B into A
-        points.addAll(gtcMerge.points);
-        clusterRepresentations.addAll(gtcMerge.clusterRepresentations);
-        if (mergedWorkLabels == null) {
-          mergedWorkLabels = new ArrayList<Integer>();
-        }
-        mergedWorkLabels.add(gtcMerge.orgWorkClass);
-        if (gtcMerge.mergedWorkLabels != null)
-          mergedWorkLabels.addAll(gtcMerge.mergedWorkLabels);
-
-        gt0Clusters.remove(mergeID);
-
-        // update workclass labels
-        for (int c = mergeID; c < gt0Clusters.size(); c++) {
-          gt0Clusters.get(c).workclass = c;
-        }
-
-        // update knn distances
-        calculateKnn();
-        for (int c = 0; c < gt0Clusters.size(); c++) {
-          gt0Clusters.get(c).connections.remove(mergeID);
-
-          // recalculate connection from other clusters to the new merged one
-          gt0Clusters.get(c).calculateClusterConnection(workclass, false);
-          // and from new merged one to other clusters
-          gt0Clusters.get(workclass).calculateClusterConnection(c, false);
-        }
-      }
-      else {
-        System.out.println("Merge indices are not valid");
-      }
-    }
-  }
-
-  /**
-   * @param trueClustering
-   *          the ground truth clustering
-   * @param points
-   *          data points
-   * @param enableClassMerge
-   *          allow class merging (should be set to true on default)
-   */
-  public CMM_GTAnalysis(Clustering trueClustering, ArrayList<DataPoint> 
points, boolean enableClassMerge) {
-    if (debug)
-      System.out.println("GT Analysis Debug Output");
-
-    noiseErrorByModel = 0;
-    pointErrorByModel = 0;
-    if (!enableClassMerge) {
-      tauConnection = 1.0;
-    }
-
-    lamdaConn = -Math.log(lambdaConnRefXValue) / Math.log(2) / lambdaConnX;
-
-    this.gtClustering = trueClustering;
-
-    numPoints = points.size();
-    numDims = points.get(0).numAttributes() - 1;
-    numGTClusters = gtClustering.size();
-
-    // init mappings between work and true labels
-    mapTrueLabelToWorkLabel = new HashMap<Integer, Integer>();
-
-    // set up base of new clustering
-    gt0Clusters = new ArrayList<GTCluster>();
-    int numWorkClasses = 0;
-    // create label to worklabel mapping as real labels can be just a set of
-    // unordered integers
-    for (int i = 0; i < numGTClusters; i++) {
-      int label = (int) gtClustering.get(i).getGroundTruth();
-      if (!mapTrueLabelToWorkLabel.containsKey(label)) {
-        gt0Clusters.add(new GTCluster(numWorkClasses, label, i));
-        mapTrueLabelToWorkLabel.put(label, numWorkClasses);
-        numWorkClasses++;
-      }
-      else {
-        
gt0Clusters.get(mapTrueLabelToWorkLabel.get(label)).clusterRepresentations.add(i);
-      }
-    }
-    numGTClasses = numWorkClasses;
-
-    mergeMap = new int[numGTClasses];
-    for (int i = 0; i < numGTClasses; i++) {
-      mergeMap[i] = i;
-    }
-
-    // create cmd point wrapper instances
-    cmmpoints = new ArrayList<CMMPoint>();
-    for (int p = 0; p < points.size(); p++) {
-      CMMPoint cmdp = new CMMPoint(points.get(p), p);
-      cmmpoints.add(cmdp);
-    }
-
-    // split points up into their GTClusters and Noise (according to class
-    // labels)
-    noise = new ArrayList<Integer>();
-    for (int p = 0; p < numPoints; p++) {
-      if (cmmpoints.get(p).isNoise()) {
-        noise.add(p);
-      }
-      else {
-        gt0Clusters.get(cmmpoints.get(p).workclass()).points.add(p);
-      }
-    }
-
-    // calculate initial knnMean and knnDev
-    for (GTCluster gtc : gt0Clusters) {
-      gtc.calculateKnn();
-    }
-
-    // calculate cluster connections
-    calculateGTClusterConnections();
-
-    // calculate point connections with own clusters
-    calculateGTPointQualities();
-
-    if (debug)
-      System.out.println("GT Analysis Debug End");
-
-  }
-
-  /**
-   * Calculate the connection of a point to a cluster
-   * 
-   * @param cmmp
-   *          the point to calculate the connection for
-   * @param clusterID
-   *          the corresponding cluster
-   * @return the connection value
-   */
-  // TODO: Cache the connection value for a point to the different clusters???
-  protected double getConnectionValue(CMMPoint cmmp, int clusterID) {
-    AutoExpandVector<Double> knnDist = new AutoExpandVector<Double>();
-    AutoExpandVector<Integer> knnPointIndex = new AutoExpandVector<Integer>();
-
-    // calculate the knn distance of the point to the cluster
-    getKnnInCluster(cmmp, knnNeighbourhood, gt0Clusters.get(clusterID).points, 
knnDist, knnPointIndex);
-
-    // TODO: What to do if we have less then k neighbors?
-    double avgDist = 0;
-    for (int i = 0; i < knnDist.size(); i++) {
-      avgDist += knnDist.get(i);
-    }
-    // what to do if we only have a single point???
-    if (knnDist.size() != 0)
-      avgDist /= knnDist.size();
-    else
-      return 0;
-
-    // get the upper knn distance of the cluster
-    double upperKnn = gt0Clusters.get(clusterID).knnMeanAvg + 
gt0Clusters.get(clusterID).knnDevAvg;
-
-    /*
-     * calculate the connectivity based on knn distance of the point within the
-     * cluster and the upper knn distance of the cluster
-     */
-    if (avgDist < upperKnn) {
-      return 1;
-    }
-    else {
-      // value that should be reached at upperKnn distance
-      // Choose connection formula
-      double conn;
-      if (useExpConnectivity)
-        conn = Math.pow(2, -lamdaConn * (avgDist - upperKnn) / upperKnn);
-      else
-        conn = upperKnn / avgDist;
-
-      if (Double.isNaN(conn))
-        System.out.println("Connectivity NaN at " + cmmp.p.getTimestamp());
-
-      return conn;
-    }
-  }
-
-  /**
-   * @param cmmp
-   *          point to calculate knn distance for
-   * @param k
-   *          number of nearest neighbors to look for
-   * @param pointIDs
-   *          list of point IDs to check
-   * @param knnDist
-   *          sorted list of smallest knn distances (can already be filled to 
make updates possible)
-   * @param knnPointIndex
-   *          list of corresponding knn indices
-   */
-  private void getKnnInCluster(CMMPoint cmmp, int k,
-      ArrayList<Integer> pointIDs,
-      AutoExpandVector<Double> knnDist,
-      AutoExpandVector<Integer> knnPointIndex) {
-
-    // iterate over every point in the choosen cluster, cal distance and insert
-    // into list
-    for (int p1 = 0; p1 < pointIDs.size(); p1++) {
-      int pid = pointIDs.get(p1);
-      if (cmmp.pID == pid)
-        continue;
-      double dist = distance(cmmp, cmmpoints.get(pid));
-      if (knnDist.size() < k || dist < knnDist.get(knnDist.size() - 1)) {
-        int index = 0;
-        while (index < knnDist.size() && dist > knnDist.get(index)) {
-          index++;
-        }
-        knnDist.add(index, dist);
-        knnPointIndex.add(index, pid);
-        if (knnDist.size() > k) {
-          knnDist.remove(knnDist.size() - 1);
-          knnPointIndex.remove(knnPointIndex.size() - 1);
-        }
-      }
-    }
-  }
-
-  /**
-   * calculate initial connectivities
-   */
-  private void calculateGTPointQualities() {
-    for (int p = 0; p < numPoints; p++) {
-      CMMPoint cmdp = cmmpoints.get(p);
-      if (!cmdp.isNoise()) {
-        cmdp.connectivity = getConnectionValue(cmdp, cmdp.workclass());
-        cmdp.p.setMeasureValue("Connectivity", cmdp.connectivity);
-      }
-    }
-  }
-
-  /**
-   * Calculate connections between clusters and merge clusters accordingly as 
long as connections exceed threshold
-   */
-  private void calculateGTClusterConnections() {
-    for (int c0 = 0; c0 < gt0Clusters.size(); c0++) {
-      for (int c1 = 0; c1 < gt0Clusters.size(); c1++) {
-        gt0Clusters.get(c0).calculateClusterConnection(c1, true);
-      }
-    }
-
-    boolean changedConnection = true;
-    while (changedConnection) {
-      if (debug) {
-        System.out.println("Cluster Connection");
-        for (int c = 0; c < gt0Clusters.size(); c++) {
-          System.out.print("C" + gt0Clusters.get(c).label + " --> ");
-          for (int c1 = 0; c1 < gt0Clusters.get(c).connections.size(); c1++) {
-            System.out.print(" C" + gt0Clusters.get(c1).label + ": " + 
gt0Clusters.get(c).connections.get(c1));
-          }
-          System.out.println("");
-        }
-        System.out.println("");
-      }
-
-      double max = 0;
-      int maxIndexI = -1;
-      int maxIndexJ = -1;
-
-      changedConnection = false;
-      for (int c0 = 0; c0 < gt0Clusters.size(); c0++) {
-        for (int c1 = c0 + 1; c1 < gt0Clusters.size(); c1++) {
-          if (c0 == c1)
-            continue;
-          double min = Math.min(gt0Clusters.get(c0).connections.get(c1), 
gt0Clusters.get(c1).connections.get(c0));
-          if (min > max) {
-            max = min;
-            maxIndexI = c0;
-            maxIndexJ = c1;
-          }
-        }
-      }
-      if (maxIndexI != -1 && max > tauConnection) {
-        gt0Clusters.get(maxIndexI).mergeCluster(maxIndexJ);
-        if (debug)
-          System.out.println("Merging " + maxIndexI + " and " + maxIndexJ + " 
because of connection " + max);
-
-        changedConnection = true;
-      }
-    }
-    numGT0Classes = gt0Clusters.size();
-  }
-
-  /**
-   * Calculates how well the original clusters are separable. Small values 
indicate bad separability, values close to 1
-   * indicate good separability
-   * 
-   * @return index of seperability
-   */
-  public double getClassSeparability() {
-    // int totalConn = numGTClasses*(numGTClasses-1)/2;
-    // int mergedConn = 0;
-    // for(GTCluster gt : gt0Clusters){
-    // int merged = gt.clusterRepresentations.size();
-    // if(merged > 1)
-    // mergedConn+=merged * (merged-1)/2;
-    // }
-    // if(totalConn == 0)
-    // return 0;
-    // else
-    // return 1-mergedConn/(double)totalConn;
-    return numGT0Classes / (double) numGTClasses;
-
-  }
-
-  /**
-   * Calculates how well noise is separable from the given clusters Small 
values indicate bad separability, values close
-   * to 1 indicate good separability
-   * 
-   * @return index of noise separability
-   */
-  public double getNoiseSeparability() {
-    if (noise.isEmpty())
-      return 1;
-
-    double connectivity = 0;
-    for (int p : noise) {
-      CMMPoint npoint = cmmpoints.get(p);
-      double maxConnection = 0;
-
-      // TODO: some kind of pruning possible. what about weighting?
-      for (int c = 0; c < gt0Clusters.size(); c++) {
-        double connection = getConnectionValue(npoint, c);
-        if (connection > maxConnection)
-          maxConnection = connection;
-      }
-      connectivity += maxConnection;
-      npoint.p.setMeasureValue("MaxConnection", maxConnection);
-    }
-
-    return 1 - (connectivity / noise.size());
-  }
-
-  /**
-   * Calculates the relative number of errors being caused by the underlying 
cluster model
-   * 
-   * @return quality of the model
-   */
-  public double getModelQuality() {
-    for (int p = 0; p < numPoints; p++) {
-      CMMPoint cmdp = cmmpoints.get(p);
-      for (int hc = 0; hc < numGTClusters; hc++) {
-        if (gtClustering.get(hc).getGroundTruth() != cmdp.trueClass) {
-          if (gtClustering.get(hc).getInclusionProbability(cmdp) >= 1) {
-            if (!cmdp.isNoise())
-              pointErrorByModel++;
-            else
-              noiseErrorByModel++;
-            break;
-          }
-        }
-      }
-    }
-    if (debug)
-      System.out.println("Error by model: noise " + noiseErrorByModel + " 
point " + pointErrorByModel);
-
-    return 1 - ((pointErrorByModel + noiseErrorByModel) / (double) numPoints);
-  }
-
-  /**
-   * Get CMM internal point
-   * 
-   * @param index
-   *          of the point
-   * @return cmm point
-   */
-  protected CMMPoint getPoint(int index) {
-    return cmmpoints.get(index);
-  }
-
-  /**
-   * Return cluster
-   * 
-   * @param index
-   *          of the cluster to return
-   * @return cluster
-   */
-  protected GTCluster getGT0Cluster(int index) {
-    return gt0Clusters.get(index);
-  }
-
-  /**
-   * Number of classes/clusters of the new clustering
-   * 
-   * @return number of new clusters
-   */
-  protected int getNumberOfGT0Classes() {
-    return numGT0Classes;
-  }
-
-  /**
-   * Calculates Euclidian distance
-   * 
-   * @param inst1
-   *          point as double array
-   * @param inst2
-   *          point as double array
-   * @return euclidian distance
-   */
-  private double distance(Instance inst1, Instance inst2) {
-    return distance(inst1, inst2.toDoubleArray());
-
-  }
-
-  /**
-   * Calculates Euclidian distance
-   * 
-   * @param inst1
-   *          point as an instance
-   * @param inst2
-   *          point as double array
-   * @return euclidian distance
-   */
-  private double distance(Instance inst1, double[] inst2) {
-    double distance = 0.0;
-    for (int i = 0; i < numDims; i++) {
-      double d = inst1.value(i) - inst2[i];
-      distance += d * d;
-    }
-    return Math.sqrt(distance);
-  }
-
-  /**
-   * String with main CMM parameters
-   * 
-   * @return main CMM parameter
-   */
-  public String getParameterString() {
-    String para = "";
-    para += "k=" + knnNeighbourhood + ";";
-    if (useExpConnectivity) {
-      para += "lambdaConnX=" + lambdaConnX + ";";
-      para += "lambdaConn=" + lamdaConn + ";";
-      para += "lambdaConnRef=" + lambdaConnRefXValue + ";";
-    }
-    para += "m=" + clusterConnectionMaxPoints + ";";
-    para += "tauConn=" + tauConnection + ";";
-
-    return para;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
deleted file mode 100644
index 962e66a..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/EntropyCollection.java
+++ /dev/null
@@ -1,175 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import java.util.ArrayList;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
-import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix;
-
-public class EntropyCollection extends MeasureCollection {
-
-  private static final Logger logger = 
LoggerFactory.getLogger(EntropyCollection.class);
-
-  @Override
-  protected String[] getNames() {
-    return new String[] { "GT cross entropy", "FC cross entropy", 
"Homogeneity", "Completeness", "V-Measure",
-        "VarInformation" };
-  }
-
-  @Override
-  protected boolean[] getDefaultEnabled() {
-    return new boolean[] { false, false, false, false, false, false };
-  }
-
-  @Override
-  public void evaluateClustering(Clustering fclustering, Clustering 
hClustering, ArrayList<DataPoint> points)
-      throws Exception {
-
-    MembershipMatrix mm = new MembershipMatrix(fclustering, points);
-    int numClasses = mm.getNumClasses();
-    int numCluster = fclustering.size() + 1;
-    int n = mm.getTotalEntries();
-
-    double FCentropy = 0;
-    if (numCluster > 1) {
-      for (int fc = 0; fc < numCluster; fc++) {
-        double weight = mm.getClusterSum(fc) / (double) n;
-        if (weight > 0)
-          FCentropy += weight * Math.log10(weight);
-      }
-      FCentropy /= (-1 * Math.log10(numCluster));
-    }
-
-    logger.debug("FC entropy: {}", FCentropy);
-
-    double GTentropy = 0;
-    if (numClasses > 1) {
-      for (int hc = 0; hc < numClasses; hc++) {
-        double weight = mm.getClassSum(hc) / (double) n;
-        if (weight > 0)
-          GTentropy += weight * Math.log10(weight);
-      }
-      GTentropy /= (-1 * Math.log10(numClasses));
-    }
-
-    logger.debug("GT entropy: {}", GTentropy);
-
-    // cluster based entropy
-    double FCcrossEntropy = 0;
-
-    for (int fc = 0; fc < numCluster; fc++) {
-      double e = 0;
-      int clusterWeight = mm.getClusterSum(fc);
-      if (clusterWeight > 0) {
-        for (int hc = 0; hc < numClasses; hc++) {
-          double p = mm.getClusterClassWeight(fc, hc) / (double) clusterWeight;
-          if (p != 0) {
-            e += p * Math.log10(p);
-          }
-        }
-        FCcrossEntropy += ((clusterWeight / (double) n) * e);
-      }
-    }
-    if (numCluster > 1) {
-      FCcrossEntropy /= -1 * Math.log10(numCluster);
-    }
-
-    addValue("FC cross entropy", 1 - FCcrossEntropy);
-    logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);
-
-    // class based entropy
-    double GTcrossEntropy = 0;
-    for (int hc = 0; hc < numClasses; hc++) {
-      double e = 0;
-      int classWeight = mm.getClassSum(hc);
-      if (classWeight > 0) {
-        for (int fc = 0; fc < numCluster; fc++) {
-          double p = mm.getClusterClassWeight(fc, hc) / (double) classWeight;
-          if (p != 0) {
-            e += p * Math.log10(p);
-          }
-        }
-      }
-      GTcrossEntropy += ((classWeight / (double) n) * e);
-    }
-    if (numClasses > 1)
-      GTcrossEntropy /= -1 * Math.log10(numClasses);
-    addValue("GT cross entropy", 1 - GTcrossEntropy);
-    logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);
-
-    double homogeneity;
-    if (FCentropy == 0)
-      homogeneity = 1;
-    else
-      homogeneity = 1 - FCcrossEntropy / FCentropy;
-
-    // TODO set err values for now, needs to be debugged
-    if (homogeneity > 1 || homogeneity < 0)
-      addValue("Homogeneity", -1);
-    else
-      addValue("Homogeneity", homogeneity);
-
-    double completeness;
-    if (GTentropy == 0)
-      completeness = 1;
-    else
-      completeness = 1 - GTcrossEntropy / GTentropy;
-    addValue("Completeness", completeness);
-
-    double beta = 1;
-    double vmeasure = (1 + beta) * homogeneity * completeness / (beta * 
homogeneity + completeness);
-
-    if (vmeasure > 1 || homogeneity < 0)
-      addValue("V-Measure", -1);
-    else
-      addValue("V-Measure", vmeasure);
-
-    double mutual = 0;
-    for (int i = 0; i < numCluster; i++) {
-      for (int j = 0; j < numClasses; j++) {
-        if (mm.getClusterClassWeight(i, j) == 0)
-          continue;
-        double m = Math.log10(mm.getClusterClassWeight(i, j) / (double) 
mm.getClusterSum(i)
-            / (double) mm.getClassSum(j) * (double) n);
-        m *= mm.getClusterClassWeight(i, j) / (double) n;
-        logger.debug("( {} / {}): ", m, m);
-        mutual += m;
-      }
-    }
-    if (numClasses > 1)
-      mutual /= Math.log10(numClasses);
-
-    double varInfo = 1;
-    if (FCentropy + GTentropy > 0)
-      varInfo = 2 * mutual / (FCentropy + GTentropy);
-
-    logger.debug("mutual: {} / VI: {}", mutual, varInfo);
-    addValue("VarInformation", varInfo);
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
deleted file mode 100644
index a31e6ce..0000000
--- a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/F1.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
-import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import java.util.ArrayList;
-
-public class F1 extends MeasureCollection {
-
-  @Override
-  protected String[] getNames() {
-    return new String[] { "F1-P", "F1-R", "Purity" };
-  }
-
-  public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points) {
-
-    if (clustering.size() < 0) {
-      addValue(0, 0);
-      addValue(1, 0);
-      return;
-    }
-
-    MembershipMatrix mm = new MembershipMatrix(clustering, points);
-    // System.out.println(mm.toString());
-
-    int numClasses = mm.getNumClasses();
-    if (mm.hasNoiseClass())
-      numClasses--;
-
-    // F1 as defined in P3C, try using F1 optimization
-    double F1_P = 0.0;
-    double purity = 0;
-    int realClusters = 0;
-    for (int i = 0; i < clustering.size(); i++) {
-      int max_weight = 0;
-      int max_weight_index = -1;
-
-      // find max index
-      for (int j = 0; j < numClasses; j++) {
-        if (mm.getClusterClassWeight(i, j) > max_weight) {
-          max_weight = mm.getClusterClassWeight(i, j);
-          max_weight_index = j;
-        }
-      }
-      if (max_weight_index != -1) {
-        realClusters++;
-        double precision = mm.getClusterClassWeight(i, max_weight_index) / 
(double) mm.getClusterSum(i);
-        double recall = mm.getClusterClassWeight(i, max_weight_index) / 
(double) mm.getClassSum(max_weight_index);
-        double f1 = 0;
-        if (precision > 0 || recall > 0) {
-          f1 = 2 * precision * recall / (precision + recall);
-        }
-        F1_P += f1;
-        purity += precision;
-
-        // TODO should we move setMeasure stuff into the Cluster interface?
-        clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
-      }
-    }
-    if (realClusters > 0) {
-      F1_P /= realClusters;
-      purity /= realClusters;
-    }
-    addValue("F1-P", F1_P);
-    addValue("Purity", purity);
-
-    // F1 as defined in .... mainly maximizes F1 for each class
-    double F1_R = 0.0;
-    for (int j = 0; j < numClasses; j++) {
-      double max_f1 = 0;
-      for (int i = 0; i < clustering.size(); i++) {
-        double precision = mm.getClusterClassWeight(i, j) / (double) 
mm.getClusterSum(i);
-        double recall = mm.getClusterClassWeight(i, j) / (double) 
mm.getClassSum(j);
-        double f1 = 0;
-        if (precision > 0 || recall > 0) {
-          f1 = 2 * precision * recall / (precision + recall);
-        }
-        if (max_f1 < f1) {
-          max_f1 = f1;
-        }
-      }
-      F1_R += max_f1;
-    }
-    F1_R /= numClasses;
-
-    addValue("F1-R", F1_R);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
deleted file mode 100644
index c15a8f8..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/General.java
+++ /dev/null
@@ -1,192 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-import com.yahoo.labs.samoa.instances.Instance;
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
-import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import java.util.ArrayList;
-
-public class General extends MeasureCollection {
-  private int numPoints;
-  private int numFClusters;
-  private int numDims;
-  private double pointInclusionProbThreshold = 0.8;
-  private Clustering clustering;
-  private ArrayList<DataPoint> points;
-
-  public General() {
-    super();
-  }
-
-  @Override
-  protected String[] getNames() {
-    // String[] names =
-    // 
{"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
-    return new String[] { "GPrecision", "GRecall", "Redundancy", "numCluster", 
"numClasses" };
-  }
-
-  // @Override
-  // protected boolean[] getDefaultEnabled() {
-  // boolean [] defaults = {false, false, false, false, false ,false};
-  // return defaults;
-  // }
-
-  @Override
-  public void evaluateClustering(Clustering clustering, Clustering 
trueClustering, ArrayList<DataPoint> points)
-      throws Exception {
-
-    this.points = points;
-    this.clustering = clustering;
-    numPoints = points.size();
-    numFClusters = clustering.size();
-    numDims = points.get(0).numAttributes() - 1;
-
-    int totalRedundancy = 0;
-    int trueCoverage = 0;
-    int totalCoverage = 0;
-
-    int numNoise = 0;
-    for (int p = 0; p < numPoints; p++) {
-      int coverage = 0;
-      for (int c = 0; c < numFClusters; c++) {
-        // contained in cluster c?
-        if (clustering.get(c).getInclusionProbability(points.get(p)) >= 
pointInclusionProbThreshold) {
-          coverage++;
-        }
-      }
-
-      if (points.get(p).classValue() == -1) {
-        numNoise++;
-      }
-      else {
-        if (coverage > 0)
-          trueCoverage++;
-      }
-
-      if (coverage > 0)
-        totalCoverage++; // points covered by clustering (incl. noise)
-      if (coverage > 1)
-        totalRedundancy++; // include noise
-    }
-
-    addValue("numCluster", clustering.size());
-    addValue("numClasses", trueClustering.size());
-    addValue("Redundancy", ((double) totalRedundancy / (double) numPoints));
-    addValue("GPrecision", (totalCoverage == 0 ? 0 : ((double) trueCoverage / 
(double) (totalCoverage))));
-    addValue("GRecall", ((double) trueCoverage / (double) (numPoints - 
numNoise)));
-    // if(isEnabled(3)){
-    // addValue("Compactness", computeCompactness());
-    // }
-    // if(isEnabled(3)){
-    // addValue("Overlap", computeOverlap());
-    // }
-  }
-
-  private double computeOverlap() {
-    for (int c = 0; c < numFClusters; c++) {
-      if (!(clustering.get(c) instanceof SphereCluster)) {
-        System.out.println("Overlap only supports Sphere Cluster. Found: " + 
clustering.get(c).getClass());
-        return Double.NaN;
-      }
-    }
-
-    boolean[] overlap = new boolean[numFClusters];
-
-    for (int c0 = 0; c0 < numFClusters; c0++) {
-      if (overlap[c0])
-        continue;
-      SphereCluster s0 = (SphereCluster) clustering.get(c0);
-      for (int c1 = c0; c1 < clustering.size(); c1++) {
-        if (c1 == c0)
-          continue;
-        SphereCluster s1 = (SphereCluster) clustering.get(c1);
-        if (s0.overlapRadiusDegree(s1) > 0) {
-          overlap[c0] = overlap[c1] = true;
-        }
-      }
-    }
-
-    double totalOverlap = 0;
-    for (int c0 = 0; c0 < numFClusters; c0++) {
-      if (overlap[c0])
-        totalOverlap++;
-    }
-
-    // if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
-    if (numFClusters > 0)
-      totalOverlap /= (double) numFClusters;
-    return totalOverlap;
-  }
-
-  private double computeCompactness() {
-    if (numFClusters == 0)
-      return 0;
-    for (int c = 0; c < numFClusters; c++) {
-      if (!(clustering.get(c) instanceof SphereCluster)) {
-        System.out.println("Compactness only supports Sphere Cluster. Found: " 
+ clustering.get(c).getClass());
-        return Double.NaN;
-      }
-    }
-
-    // TODO weight radius by number of dimensions
-    double totalCompactness = 0;
-    for (int c = 0; c < numFClusters; c++) {
-      ArrayList<Instance> containedPoints = new ArrayList<Instance>();
-      for (int p = 0; p < numPoints; p++) {
-        // p in c
-        if (clustering.get(c).getInclusionProbability(points.get(p)) >= 
pointInclusionProbThreshold) {
-          containedPoints.add(points.get(p));
-        }
-      }
-      double compactness = 0;
-      if (containedPoints.size() > 1) {
-        // cluster not empty
-        SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, 
numDims);
-        double minRadius = minEnclosingCluster.getRadius();
-        double cfRadius = ((SphereCluster) clustering.get(c)).getRadius();
-        if (Math.abs(minRadius - cfRadius) < 0.1e-10) {
-          compactness = 1;
-        }
-        else if (minRadius < cfRadius)
-          compactness = minRadius / cfRadius;
-        else {
-          System.out.println("Optimal radius bigger then real one (" + 
(cfRadius - minRadius)
-              + "), this is really wrong");
-          compactness = 1;
-        }
-      }
-      else {
-        double cfRadius = ((SphereCluster) clustering.get(c)).getRadius();
-        if (cfRadius == 0)
-          compactness = 1;
-      }
-
-      // weight by weight of cluster???
-      totalCompactness += compactness;
-      clustering.get(c).setMeasureValue("Compactness", 
Double.toString(compactness));
-    }
-    return (totalCompactness / numFClusters);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
deleted file mode 100644
index 175b925..0000000
--- a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/SSQ.java
+++ /dev/null
@@ -1,97 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-import java.util.ArrayList;
-
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
-import com.yahoo.labs.samoa.instances.Instance;
-
-public class SSQ extends MeasureCollection {
-
-  public SSQ() {
-    super();
-  }
-
-  @Override
-  public String[] getNames() {
-    return new String[] { "SSQ" };
-  }
-
-  @Override
-  protected boolean[] getDefaultEnabled() {
-    return new boolean[] { false };
-  }
-
-  // TODO Work on this later
-  // @Override
-  public void evaluateClusteringSamoa(Clustering clustering,
-      Clustering trueClsutering, ArrayList<Instance> points) {
-    double sum = 0.0;
-    for (Instance point : points) {
-      // don't include noise
-      if (point.classValue() == -1) {
-        continue;
-      }
-
-      double minDistance = Double.MAX_VALUE;
-      for (int c = 0; c < clustering.size(); c++) {
-        double distance = 0.0;
-        double[] center = clustering.get(c).getCenter();
-        for (int i = 0; i < center.length; i++) {
-          double d = point.value(i) - center[i];
-          distance += d * d;
-        }
-        minDistance = Math.min(distance, minDistance);
-      }
-
-      sum += minDistance;
-    }
-
-    addValue(0, sum);
-  }
-
-  @Override
-  public void evaluateClustering(Clustering clustering, Clustering 
trueClsutering, ArrayList<DataPoint> points) {
-    double sum = 0.0;
-    for (int p = 0; p < points.size(); p++) {
-      // don't include noise
-      if (points.get(p).classValue() == -1)
-        continue;
-
-      double minDistance = Double.MAX_VALUE;
-      for (int c = 0; c < clustering.size(); c++) {
-        double distance = 0.0;
-        double[] center = clustering.get(c).getCenter();
-        for (int i = 0; i < center.length; i++) {
-          double d = points.get(p).value(i) - center[i];
-          distance += d * d;
-        }
-        minDistance = Math.min(distance, minDistance);
-      }
-
-      sum += minDistance;
-    }
-
-    addValue(0, sum);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/Separation.java
----------------------------------------------------------------------
diff --git 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/Separation.java
 
b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/Separation.java
deleted file mode 100644
index 19b3310..0000000
--- 
a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/measures/Separation.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package com.yahoo.labs.samoa.evaluation.measures;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-import com.yahoo.labs.samoa.instances.DenseInstance;
-import com.yahoo.labs.samoa.instances.Instance;
-import com.yahoo.labs.samoa.moa.cluster.Cluster;
-import com.yahoo.labs.samoa.moa.cluster.Clustering;
-import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
-import com.yahoo.labs.samoa.moa.core.DataPoint;
-import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
-import java.util.ArrayList;
-import java.util.List;
-
-public class Separation extends MeasureCollection {
-
-  public Separation() {
-    super();
-  }
-
-  @Override
-  protected String[] getNames() {
-    return new String[] { "BSS", "BSS-GT", "BSS-Ratio" };
-  }
-
-  // @Override
-  public void evaluateClusteringSamoa(Clustering clustering,
-      Clustering trueClustering, ArrayList<Instance> points)
-      throws Exception {
-
-    double BSS_GT = 1.0;
-    double BSS;
-    int dimension = points.get(0).numAttributes() - 1;
-    SphereCluster sc = new SphereCluster(points, dimension);
-
-    // DO INTERNAL EVALUATION
-    // clustering.getClustering().get(0).getCenter();
-
-    BSS = getBSS(clustering, sc.getCenter());
-
-    if (trueClustering != null) {
-      List<Instance> listInstances = new ArrayList<>();
-      for (Cluster c : trueClustering.getClustering()) {
-        DenseInstance inst = new DenseInstance(c.getWeight(), c.getCenter());
-        listInstances.add(inst);
-      }
-      SphereCluster gt = new SphereCluster(listInstances, dimension);
-      BSS_GT = getBSS(trueClustering, gt.getCenter());
-    }
-
-    addValue("BSS", BSS);
-    addValue("BSS-GT", BSS_GT);
-    addValue("BSS-Ratio", BSS / BSS_GT);
-
-  }
-
-  private double getBSS(Clustering clustering, double[] mean) {
-    double bss = 0.0;
-    for (int i = 0; i < clustering.size(); i++) {
-      double weight = clustering.get(i).getWeight();
-      double sum = 0.0;
-      for (int j = 0; j < mean.length; j++) {
-        sum += Math.pow((mean[j] - clustering.get(i).getCenter()[j]), 2);
-      }
-      bss += weight * sum;
-    }
-
-    return bss;
-  }
-
-  @Override
-  protected void evaluateClustering(Clustering clustering,
-      Clustering trueClustering, ArrayList<DataPoint> points)
-      throws Exception {
-    double BSS_GT = 1.0;
-    double BSS;
-    int dimension = points.get(0).numAttributes() - 1;
-    SphereCluster sc = new SphereCluster(points, dimension);
-
-    // DO INTERNAL EVALUATION
-    // clustering.getClustering().get(0).getCenter();
-
-    BSS = getBSS(clustering, sc.getCenter());
-
-    if (trueClustering != null) {
-      String s = "";
-      List<Instance> listInstances = new ArrayList<>();
-      for (Cluster c : trueClustering.getClustering()) {
-        DenseInstance inst = new DenseInstance(c.getWeight(), c.getCenter());
-        listInstances.add(inst);
-        s += " " + c.getWeight();
-      }
-      SphereCluster gt = new SphereCluster(listInstances, dimension);
-      BSS_GT = getBSS(trueClustering, gt.getCenter());
-    }
-
-    addValue("BSS", BSS);
-    addValue("BSS-GT", BSS_GT);
-    addValue("BSS-Ratio", BSS / BSS_GT);
-  }
-}


Reply via email to