Repository: incubator-samoa Updated Branches: refs/heads/master 9a6ad44f6 -> 2c7a1704b
SAMOA-11: Incorrect String comparison in Clustering.java (smarthi) closes #16 Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/2c7a1704 Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/2c7a1704 Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/2c7a1704 Branch: refs/heads/master Commit: 2c7a1704b3e1f626ab846175345009fc31054440 Parents: 9a6ad44 Author: abifet <[email protected]> Authored: Sat Apr 4 09:49:03 2015 +0200 Committer: abifet <[email protected]> Committed: Sat Apr 4 09:49:03 2015 +0200 ---------------------------------------------------------------------- .../labs/samoa/moa/cluster/Clustering.java | 99 ++++++++++---------- 1 file changed, 49 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/2c7a1704/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java index bf01ed1..70a5e79 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Clustering.java @@ -21,8 +21,10 @@ package com.yahoo.labs.samoa.moa.cluster; */ import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; + import com.yahoo.labs.samoa.moa.AbstractMOAObject; import com.yahoo.labs.samoa.moa.core.AutoExpandVector; import com.yahoo.labs.samoa.moa.core.DataPoint; @@ -34,14 +36,12 @@ public class Clustering extends AbstractMOAObject { private AutoExpandVector<Cluster> clusters; public Clustering() { - this.clusters = new AutoExpandVector<Cluster>(); + this.clusters = new AutoExpandVector<>(); } public Clustering(Cluster[] clusters) { - this.clusters = new AutoExpandVector<Cluster>(); - for (int i = 0; i < clusters.length; i++) { - this.clusters.add(clusters[i]); - } + this.clusters = new AutoExpandVector<>(); + Collections.addAll(this.clusters, clusters); } public Clustering(List<? extends Instance> points) { @@ -53,7 +53,7 @@ public class Clustering extends AbstractMOAObject { Attribute classLabel = points.get(0).dataset().classAttribute(); int lastLabelIndex = classLabel.numValues() - 1; - if (classLabel.value(lastLabelIndex) == "noise") { + if ("noise".equalsIgnoreCase(classLabel.value(lastLabelIndex))) { noiseLabel = lastLabelIndex; } else { noiseLabel = -1; @@ -61,15 +61,17 @@ public class Clustering extends AbstractMOAObject { ArrayList<Instance>[] sorted_points = (ArrayList<Instance>[]) new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) { - sorted_points[i] = new ArrayList<Instance>(); + sorted_points[i] = new ArrayList<>(); } + for (Instance point : points) { - int clusterid = (int) point.classValue(); - if (clusterid == noiseLabel) - continue; - sorted_points[labelMap.get(clusterid)].add((Instance) point); + int clusterId = (int) point.classValue(); + if (clusterId != noiseLabel) { + sorted_points[labelMap.get(clusterId)].add(point); + } } - this.clusters = new AutoExpandVector<Cluster>(); + + this.clusters = new AutoExpandVector<>(); for (int i = 0; i < numClasses; i++) { if (sorted_points[i].size() > 0) { SphereCluster s = new SphereCluster(sorted_points[i], dim); @@ -85,45 +87,43 @@ public class Clustering extends AbstractMOAObject { int dim = points.get(0).dataset().numAttributes() - 1; int numClasses = labelMap.size(); - int num = 0; ArrayList<DataPoint>[] sorted_points = (ArrayList<DataPoint>[]) new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) { - sorted_points[i] = new ArrayList<DataPoint>(); + sorted_points[i] = new ArrayList<>(); } + for (DataPoint point : points) { - int clusterid = (int) point.classValue(); - if (clusterid == -1) - continue; - sorted_points[labelMap.get(clusterid)].add(point); - num++; + int clusterId = (int) point.classValue(); + if (clusterId != -1) { + sorted_points[labelMap.get(clusterId)].add(point); + } } - clusters = new AutoExpandVector<Cluster>(); - int microID = 0; + clusters = new AutoExpandVector<>(); for (int i = 0; i < numClasses; i++) { - ArrayList<SphereCluster> microByClass = new ArrayList<SphereCluster>(); - ArrayList<DataPoint> pointInCluster = new ArrayList<DataPoint>(); - ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList(); + ArrayList<SphereCluster> microByClass = new ArrayList<>(); + ArrayList<DataPoint> pointInCluster = new ArrayList<>(); + ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList<>(); pointInCluster.addAll(sorted_points[i]); while (pointInCluster.size() > 0) { - ArrayList<Instance> micro_points = new ArrayList<Instance>(); + ArrayList<Instance> micro_points = new ArrayList<>(); for (int j = 0; j < initMinPoints && !pointInCluster.isEmpty(); j++) { - micro_points.add((Instance) pointInCluster.get(0)); + micro_points.add(pointInCluster.get(0)); pointInCluster.remove(0); } if (micro_points.size() > 0) { SphereCluster s = new SphereCluster(micro_points, dim); for (int c = 0; c < microByClass.size(); c++) { - if (((SphereCluster) microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold) { + if ((microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold) { micro_points.addAll(pointInMicroClusters.get(c)); s = new SphereCluster(micro_points, dim); pointInMicroClusters.remove(c); microByClass.remove(c); - // System.out.println("Removing redundant cluster based on radius overlap"+c); } } + for (int j = 0; j < pointInCluster.size(); j++) { Instance instance = pointInCluster.get(j); if (s.getInclusionProbability(instance) > 0.8) { @@ -134,7 +134,6 @@ public class Clustering extends AbstractMOAObject { s.setWeight(micro_points.size()); microByClass.add(s); pointInMicroClusters.add(micro_points); - microID++; } } // @@ -144,7 +143,6 @@ public class Clustering extends AbstractMOAObject { for (int c = 0; c < microByClass.size(); c++) { for (int c1 = c + 1; c1 < microByClass.size(); c1++) { double overlap = microByClass.get(c).overlapRadiusDegree(microByClass.get(c1)); - // System.out.println("Overlap C"+(clustering.size()+c)+" ->C"+(clustering.size()+c1)+": "+overlap); if (overlap > overlapThreshold) { pointInMicroClusters.get(c).addAll(pointInMicroClusters.get(c1)); SphereCluster s = new SphereCluster(pointInMicroClusters.get(c), dim); @@ -157,12 +155,13 @@ public class Clustering extends AbstractMOAObject { } } } - for (int j = 0; j < microByClass.size(); j++) { - microByClass.get(j).setGroundTruth(sorted_points[i].get(0).classValue()); - clusters.add(microByClass.get(j)); - } + for (SphereCluster microByClas : microByClass) { + microByClas.setGroundTruth(sorted_points[i].get(0).classValue()); + clusters.add(microByClas); + } } + for (int j = 0; j < clusters.size(); j++) { clusters.get(j).setId(j); } @@ -170,27 +169,28 @@ public class Clustering extends AbstractMOAObject { } /** - * @param points + * @param points - points to be clustered * @return an array with the min and max class label value */ public static HashMap<Integer, Integer> classValues(List<? extends Instance> points) { - HashMap<Integer, Integer> classes = new HashMap<Integer, Integer>(); - int workcluster = 0; - boolean hasnoise = false; - for (int i = 0; i < points.size(); i++) { - int label = (int) points.get(i).classValue(); + HashMap<Integer, Integer> classes = new HashMap<>(); + int workCluster = 0; + boolean hasNoise = false; + for (Instance point : points) { + int label = (int) point.classValue(); if (label == -1) { - hasnoise = true; - } - else { + hasNoise = true; + } else { if (!classes.containsKey(label)) { - classes.put(label, workcluster); - workcluster++; + classes.put(label, workCluster); + workCluster++; } } } - if (hasnoise) - classes.put(-1, workcluster); + + if (hasNoise) { + classes.put(-1, workCluster); + } return classes; } @@ -260,9 +260,8 @@ public class Clustering extends AbstractMOAObject { public double getMaxInclusionProbability(Instance point) { double maxInclusion = 0.0; - for (int i = 0; i < clusters.size(); i++) { - maxInclusion = Math.max(clusters.get(i).getInclusionProbability(point), - maxInclusion); + for (Cluster cluster : clusters) { + maxInclusion = Math.max(cluster.getInclusionProbability(point), maxInclusion); } return maxInclusion; }
