Author: scooter
Date: 2011-08-20 16:00:14 -0700 (Sat, 20 Aug 2011)
New Revision: 26612
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterAlgorithm.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterer.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/Matrix.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KCluster.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KMeansCluster.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMCluster.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMedoidCluster.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteResult.java
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteUtil.java
Log:
Integrated silhouette into k-means and k-medoid
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterAlgorithm.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterAlgorithm.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterAlgorithm.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -33,9 +33,15 @@
package clusterMaker.algorithms.attributeClusterers;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
import cytoscape.CyNode;
import cytoscape.Cytoscape;
@@ -46,6 +52,8 @@
import cytoscape.task.TaskMonitor;
import clusterMaker.ClusterMaker;
+import clusterMaker.algorithms.attributeClusterers.silhouette.SilhouetteResult;
+import clusterMaker.algorithms.attributeClusterers.silhouette.SilhouetteUtil;
/**
* This abstract class is the base class for all of the attribute clusterers
provided by
@@ -61,6 +69,8 @@
protected TaskMonitor monitor;
protected Integer[] rowOrder;
protected String weightAttributes[] = null;
+ protected int kMax = -1;
+ private SilhouetteResult[] silhouetteResults = null;
protected boolean adjustDiagonals = false;
protected boolean debug = false;
@@ -69,9 +79,8 @@
protected boolean interimRun = false;
protected boolean selectedOnly = false;
protected boolean zeroMissing = false;
+ protected boolean useSilhouette = false;
- abstract public String cluster(int nClusters, int nIterations, boolean
transpose);
-
public Matrix getMatrix() { return matrix; }
public DistanceMetric getMetric() { return metric; }
@@ -81,8 +90,15 @@
public void setAdjustDiagonals(boolean val) { adjustDiagonals = val; }
public void setZeroMissing(boolean val) { zeroMissing = val; }
public void setDebug(boolean val) { debug = val; }
- public void setInterimRun(boolean val) { interimRun = val; }
+ public void setUseSilhouette(boolean val) { useSilhouette = val; }
+ public void setKMax(int val) { kMax = val; }
+ /**
+ * This method is called by all of the attribute cluster algorithms to
update the
+ * results attributes in the network.
+ *
+ * @param cluster_type the cluster type to indicate write into the
CLUSTER_TYPE_ATTRIBUTE
+ */
protected void updateAttributes(String cluster_type) {
// Update the network attribute and make it hidden
CyAttributes netAttr = Cytoscape.getNetworkAttributes();
@@ -144,6 +160,10 @@
}
+ /**
+ * This method resets (clears) all of the existing network attributes.
+ */
+ @SuppressWarnings("unchecked")
protected void resetAttributes() {
CyAttributes netAttr = Cytoscape.getNetworkAttributes();
String netID = Cytoscape.getCurrentNetwork().getIdentifier();
@@ -176,6 +196,42 @@
}
}
+ /**
+ * This method is used to determine if the current network has data
corresponding to a
+ * particular cluster type.
+ *
+ * @param type the cluster type to check for
+ * @return 'true' if this network has data for the designated type
+ */
+ public static boolean isAvailable(String type) {
+ String netID = Cytoscape.getCurrentNetwork().getIdentifier();
+ CyAttributes netAttr = Cytoscape.getNetworkAttributes();
+ if (!netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_TYPE_ATTRIBUTE))
+ return false;
+
+ if (!netAttr.getStringAttribute(netID,
ClusterMaker.CLUSTER_TYPE_ATTRIBUTE).equals(type))
+ return false;
+
+ // OK, we need either a node list or an attribute list
+ if (!netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_ATTR_ATTRIBUTE) &&
+ !netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_NODE_ATTRIBUTE))
+ return false;
+
+ // Finally, we need to have the cluster attributes themselves
+ if (!netAttr.hasAttribute(netID,
ClusterMaker.NODE_ORDER_ATTRIBUTE) &&
+ !netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_NODE_ATTRIBUTE))
+ return false;
+
+ return true;
+ }
+
+ /**
+ * This protected method is called to create all of our groups (if
desired).
+ * It is used by all of the k-clustering algorithms.
+ *
+ * @param nClusters the number of clusters we created
+ * @param cluster the list of values and the assigned clusters
+ */
protected void createGroups(int nClusters, int[] clusters) {
if (matrix.isTransposed()) {
return;
@@ -225,26 +281,187 @@
netAttr.setListAttribute(netID, ClusterMaker.GROUP_ATTRIBUTE,
groupNames);
}
- public static boolean isAvailable(String type) {
- String netID = Cytoscape.getCurrentNetwork().getIdentifier();
- CyAttributes netAttr = Cytoscape.getNetworkAttributes();
- if (!netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_TYPE_ATTRIBUTE))
- return false;
- if (!netAttr.getStringAttribute(netID,
ClusterMaker.CLUSTER_TYPE_ATTRIBUTE).equals(type))
- return false;
+ /**
+ * Common code for the k-cluster algorithms with silhouette
+ */
- // OK, we need either a node list or an attribute list
- if (!netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_ATTR_ATTRIBUTE) &&
- !netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_NODE_ATTRIBUTE))
- return false;
+ // This should be overridden by any k-cluster implementation
+ protected int kcluster(int nClusters, int nIterations, Matrix matrix,
+ DistanceMetric metric, int[] clusters) {
+ return 0;
+ }
+ /**
+ * This is the common entry point for k-cluster algorithms.
+ *
+ * @param nClusters the number of clusters (k)
+ * @param nIterations the number of iterations to use
+ * @param transpose whether we're doing rows (GENE) or columns (ARRY)
+ * @param algorithm the algorithm type
+ * @return a string with all of the results
+ */
+ public String cluster(int nClusters, int nIterations, boolean
transpose, String algorithm) {
+ String keyword = "GENE";
+ if (transpose) keyword = "ARRY";
- // Finally, we need to have the cluster attributes themselves
- if (!netAttr.hasAttribute(netID,
ClusterMaker.NODE_ORDER_ATTRIBUTE) &&
- !netAttr.hasAttribute(netID,
ClusterMaker.CLUSTER_NODE_ATTRIBUTE))
- return false;
+ for (int att = 0; att < weightAttributes.length; att++)
+ if (debug)
+ logger.debug("Attribute:
'"+weightAttributes[att]+"'");
- return true;
+ if (monitor != null)
+ monitor.setStatus("Creating distance matrix");
+
+ // Create the matrix
+ matrix = new Matrix(weightAttributes, transpose, ignoreMissing,
selectedOnly);
+ logger.info("cluster matrix has "+matrix.nRows()+" rows");
+
+ // Create a weight vector of all ones (we don't use individual
weighting, yet)
+ matrix.setUniformWeights();
+
+ if (monitor != null)
+ monitor.setStatus("Clustering...");
+
+ if (useSilhouette) {
+ TaskMonitor saveMonitor = monitor;
+ monitor = null;
+
+ silhouetteResults = new SilhouetteResult[kMax];
+
+ int nThreads =
Runtime.getRuntime().availableProcessors()-1;
+ if (nThreads > 1)
+ runThreadedSilhouette(kMax, nIterations,
nThreads);
+ else
+ runLinearSilhouette(kMax, nIterations);
+
+ // Now get the results and find our best k
+ double maxSil = Double.MIN_VALUE;
+ for (int kEstimate = 2; kEstimate < kMax; kEstimate++) {
+ double sil =
silhouetteResults[kEstimate].getAverageSilhouette();
+ // System.out.println("Average silhouette for
"+kEstimate+" clusters is "+sil);
+ if (sil > maxSil) {
+ maxSil = sil;
+ nClusters = kEstimate;
+ }
+ }
+ monitor = saveMonitor;
+ // System.out.println("maxSil = "+maxSil+" nClusters =
"+nClusters);
+ }
+
+ int[] clusters = new int[matrix.nRows()];
+ // Cluster
+ int ifound = kcluster(nClusters, nIterations, matrix, metric,
clusters);
+
+ // OK, now run our silhouette on our final result
+ SilhouetteResult sResult =
SilhouetteUtil.SilhouetteCalculator(matrix, metric, clusters);
+ // System.out.println("Average silhouette =
"+sResult.getAverageSilhouette());
+ // SilhouetteUtil.printSilhouette(sResult, clusters);
+
+ if (!matrix.isTransposed())
+ createGroups(nClusters, clusters);
+
+ /*
+ Ideally, we would sort our clusters based on size, but for some
reason
+ this isn't working...
+ renumberClusters(nClusters, clusters);
+ */
+
+ rowOrder = matrix.indexSort(clusters, clusters.length);
+ System.out.println(Arrays.toString(rowOrder));
+ // Update the network attributes
+ updateAttributes(algorithm);
+
+ String resultString = "Created "+nClusters+" clusters with
average silhouette = "+sResult.getAverageSilhouette();
+ logger.info(resultString);
+ return resultString;
}
+ private void renumberClusters(int nClusters, int [] clusters) {
+ int[] clusterSizes = new int[nClusters];
+ Arrays.fill(clusterSizes, 0);
+ for (int row = 0; row < clusters.length; row++) {
+ clusterSizes[clusters[row]] += 1;
+ }
+
+ Integer[] sortedClusters = new Integer[nClusters];
+ for (int cluster = 0; cluster < nClusters; cluster++) {
+ sortedClusters[cluster] = cluster;
+ }
+
+
+ // OK, now sort
+ Arrays.sort(sortedClusters, new SizeComparator(clusterSizes));
+ int[] clusterIndex = new int[nClusters];
+ for (int cluster = 0; cluster < nClusters; cluster++) {
+ clusterIndex[sortedClusters[cluster]] = cluster;
+ }
+ for (int row = 0; row < clusters.length; row++) {
+ // System.out.println("Setting cluster for row "+ row+"
to "+sortedClusters[clusters[row]]+" was "+clusters[row]);
+ clusters[row] = clusterIndex[clusters[row]];
+ }
+
+ }
+
+ class SizeComparator implements Comparator <Integer> {
+ int[] sizeArray = null;
+ public SizeComparator(int[] a) { this.sizeArray = a; }
+
+ public int compare(Integer o1, Integer o2) {
+ if (sizeArray[o1] > sizeArray[o2]) return 1;
+ if (sizeArray[o1] < sizeArray[o2]) return -1;
+ return 0;
+ }
+ }
+
+ private void runThreadedSilhouette(int kMax, int nIterations, int
nThreads) {
+ // Set up the thread pools
+ ExecutorService[] threadPools = new ExecutorService[nThreads];
+ for (int pool = 0; pool < threadPools.length; pool++)
+ threadPools[pool] = Executors.newFixedThreadPool(1);
+
+ // Dispatch a kmeans calculation to each pool
+ for (int kEstimate = 2; kEstimate < kMax; kEstimate++) {
+ int[] clusters = new int[matrix.nRows()];
+ Runnable r = new RunKMeans(matrix, clusters, kEstimate,
nIterations);
+ threadPools[(kEstimate-2)%nThreads].submit(r);
+ // threadPools[0].submit(r);
+ }
+
+ // OK, now wait for each thread to complete
+ for (int pool = 0; pool < threadPools.length; pool++) {
+ threadPools[pool].shutdown();
+ try {
+ boolean result =
threadPools[pool].awaitTermination(7, TimeUnit.DAYS);
+ } catch (Exception e) {}
+ }
+ }
+
+ private void runLinearSilhouette(int kMax, int nIterations) {
+ for (int kEstimate = 2; kEstimate < kMax; kEstimate++) {
+ int[] clusters = new int[matrix.nRows()];
+ int ifound = kcluster(kEstimate, nIterations, matrix,
metric, clusters);
+ silhouetteResults[kEstimate] =
SilhouetteUtil.SilhouetteCalculator(matrix, metric, clusters);
+ }
+ }
+
+ private class RunKMeans implements Runnable {
+ Matrix matrix;
+ int[] clusters;
+ int kEstimate;
+ int nIterations;
+
+ public RunKMeans (Matrix matrix, int[] clusters, int k, int
nIterations) {
+ this.matrix = matrix;
+ this.clusters = clusters;
+ this.kEstimate = k;
+ this.nIterations = nIterations;
+ }
+
+ public void run() {
+ int[] clusters = new int[matrix.nRows()];
+ int ifound = kcluster(kEstimate, nIterations, matrix,
metric, clusters);
+ try {
+ silhouetteResults[kEstimate] =
SilhouetteUtil.SilhouetteCalculator(matrix, metric, clusters);
+ } catch (Exception e) { e.printStackTrace(); }
+ }
+ }
}
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterer.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterer.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/AbstractAttributeClusterer.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -38,17 +38,21 @@
import cytoscape.Cytoscape;
import cytoscape.data.CyAttributes;
+import cytoscape.layout.Tunable;
+import cytoscape.layout.TunableListener;
import cytoscape.logger.CyLogger;
import cytoscape.task.TaskMonitor;
+import cytoscape.task.ui.JTaskConfig;
import clusterMaker.algorithms.AbstractClusterAlgorithm;
+import clusterMaker.ui.ClusterTask;
/**
* This abstract class is the base class for all of the attribute clusterers
provided by
* clusterMaker. Fundamentally, an attribute clusterer is an algorithm which
functions to
* partition nodes or node attributes based on properties of the attributes.
*/
-public abstract class AbstractAttributeClusterer extends
AbstractClusterAlgorithm {
+public abstract class AbstractAttributeClusterer extends
AbstractClusterAlgorithm implements TunableListener {
// Common instance variables
protected String[] attributeArray = new String[1];
protected String dataAttributes = null;
@@ -59,6 +63,9 @@
protected boolean selectedOnly = false;
protected boolean adjustDiagonals = false;
protected boolean zeroMissing = false;
+ protected boolean useSilhouette = true;
+ protected int kMax = 0;
+ protected int kNumber = 0;
protected TaskMonitor monitor = null;
protected CyLogger logger = null;
@@ -101,4 +108,76 @@
return
AbstractAttributeClusterAlgorithm.isAvailable(getShortName());
}
+ // We don't want to autodispose our task monitors
+ public JTaskConfig getDefaultTaskConfig() { return
ClusterTask.getDefaultTaskConfig(true); }
+
+ protected void addKTunables() {
+ Tunable t = new Tunable("useSilhouette",
+ "Estimate k using silhouette",
+ Tunable.BOOLEAN, Boolean.TRUE,
+ (Object)new Boolean(useSilhouette),
(Object)null, 0);
+ t.addTunableValueListener(this);
+ clusterProperties.add(t);
+
+ t = new Tunable("kMax",
+ "Maximum number of clusters",
+ Tunable.INTEGER, new Integer(0),
+ (Object)new Integer(kMax), (Object)null, 0);
+ if (!useSilhouette) t.setFlag(Tunable.IMMUTABLE);
+ clusterProperties.add(t);
+
+ t = new Tunable("kNumber",
+ "Number of clusters (k)",
+ Tunable.INTEGER, new Integer(10),
+ (Object)new Integer(kNumber), (Object)null, 0);
+ if (useSilhouette) t.setFlag(Tunable.IMMUTABLE);
+ clusterProperties.add(t);
+ }
+
+ protected void updateKTunables(boolean force) {
+ Tunable t = clusterProperties.get("useSilhouette");
+ if ((t != null) && (t.valueChanged() || force))
+ useSilhouette = ((Boolean) t.getValue()).booleanValue();
+
+ t = clusterProperties.get("kMax");
+ if ((t != null) && (t.valueChanged() || force))
+ kMax = ((Integer) t.getValue()).intValue();
+
+ t = clusterProperties.get("kNumber");
+ if ((t != null) && (t.valueChanged() || force))
+ kNumber = ((Integer) t.getValue()).intValue();
+ }
+
+ protected void updateKEstimates() {
+ // We also want to update the number our "guestimate" for k
+ double nodeCount =
(double)Cytoscape.getCurrentNetwork().getNodeCount();
+ if (selectedOnly) {
+ int selNodes =
Cytoscape.getCurrentNetwork().getSelectedNodes().size();
+ if (selNodes > 0) nodeCount = (double)selNodes;
+ }
+
+ Tunable kTunable = clusterProperties.get("kNumber");
+ double kinit = Math.sqrt(nodeCount/2);
+ if (kinit > 1)
+ kTunable.setValue((int)kinit);
+ else
+ kTunable.setValue(1);
+
+ Tunable kMaxTunable = clusterProperties.get("kMax");
+ kMaxTunable.setValue((int)kinit*2); // Double our kNumber
estimate
+ }
+
+ public void tunableChanged(Tunable t) {
+ if (t.getName().equals("useSilhouette")) {
+ useSilhouette = ((Boolean) t.getValue()).booleanValue();
+ if (useSilhouette) {
+
clusterProperties.get("kMax").clearFlag(Tunable.IMMUTABLE);
+
clusterProperties.get("kNumber").setFlag(Tunable.IMMUTABLE);
+ } else {
+
clusterProperties.get("kMax").setFlag(Tunable.IMMUTABLE);
+
clusterProperties.get("kNumber").clearFlag(Tunable.IMMUTABLE);
+ }
+ }
+ }
+
}
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/Matrix.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/Matrix.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/Matrix.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -322,10 +322,12 @@
public double[][] getDistanceMatrix(DistanceMetric metric) {
double[][] result = new double[this.nRows][this.nRows];
- for (int row = 1; row < this.nRows; row++) {
- for (int column = 0; column < row; column++) {
+ for (int row = 0; row < this.nRows; row++) {
+ for (int column = row; column < this.nRows; column++) {
result[row][column] =
metric.getMetric(this, this,
this.getWeights(), row, column);
+ if (row != column)
+ result[column][row] =
result[row][column]; // Assumes symmetrical distances
//
System.out.println("distanceMatrix["+row+"]["+column+"] =
"+result[row][column]);
}
}
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KCluster.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KCluster.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KCluster.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -36,6 +36,10 @@
import java.lang.Math;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
@@ -57,6 +61,8 @@
import
clusterMaker.algorithms.attributeClusterers.AbstractAttributeClusterAlgorithm;
import clusterMaker.algorithms.attributeClusterers.DistanceMetric;
import clusterMaker.algorithms.attributeClusterers.Matrix;
+// import
clusterMaker.algorithms.attributeClusterers.silhouette.SilhouetteResult;
+// import
clusterMaker.algorithms.attributeClusterers.silhouette.SilhouetteUtil;
public class KCluster extends AbstractAttributeClusterAlgorithm {
Random random = null;
@@ -70,6 +76,7 @@
resetAttributes();
}
+/*
public String cluster(int nClusters, int nIterations, boolean
transpose) {
String keyword = "GENE";
if (transpose) keyword = "ARRY";
@@ -88,14 +95,44 @@
// Create a weight vector of all ones (we don't use individual
weighting, yet)
matrix.setUniformWeights();
- int[] clusters = new int[matrix.nRows()];
-
if (monitor != null)
monitor.setStatus("Clustering...");
+ if (useSilhouette) {
+ TaskMonitor saveMonitor = monitor;
+ monitor = null;
+
+ silhouetteResults = new SilhouetteResult[kMax];
+
+ int nThreads =
Runtime.getRuntime().availableProcessors()-1;
+ if (nThreads > 1)
+ runThreadedSilhouette(kMax, nIterations,
nThreads);
+ else
+ runLinearSilhouette(kMax, nIterations);
+
+ // Now get the results and find our best k
+ double maxSil = Double.MIN_VALUE;
+ for (int kEstimate = 2; kEstimate < kMax; kEstimate++) {
+ double sil =
silhouetteResults[kEstimate].getAverageSilhouette();
+ // System.out.println("Average silhouette for
"+kEstimate+" clusters is "+sil);
+ if (sil > maxSil) {
+ maxSil = sil;
+ nClusters = kEstimate;
+ }
+ }
+ monitor = saveMonitor;
+ // System.out.println("maxSil = "+maxSil+" nClusters =
"+nClusters);
+ }
+
+ int[] clusters = new int[matrix.nRows()];
// Cluster
int ifound = kmeans(nClusters, nIterations, matrix, metric,
clusters);
+ // OK, now run our silhouette on our final result
+ SilhouetteResult sResult =
SilhouetteUtil.SilhouetteCalculator(matrix, metric, clusters);
+ // System.out.println("Average silhouette =
"+sResult.getAverageSilhouette());
+ // SilhouetteUtil.printSilhouette(sResult, clusters);
+
if (!interimRun) {
if (!matrix.isTransposed())
createGroups(nClusters, clusters);
@@ -104,14 +141,47 @@
updateAttributes("kmeans");
}
- return "Complete";
+ return "Created "+nClusters+" clusters with average silhouette
= "+sResult.getAverageSilhouette();
}
- public int kmeans(int nClusters, int nIterations, Matrix matrix,
DistanceMetric metric, int[] clusterID) {
+ private void runThreadedSilhouette(int kMax, int nIterations, int
nThreads) {
+ // Set up the thread pools
+ ExecutorService[] threadPools = new ExecutorService[nThreads];
+ for (int pool = 0; pool < threadPools.length; pool++)
+ threadPools[pool] = Executors.newFixedThreadPool(1);
+ // Dispatch a kmeans calculation to each pool
+ for (int kEstimate = 2; kEstimate < kMax; kEstimate++) {
+ int[] clusters = new int[matrix.nRows()];
+ Runnable r = new RunKMeans(matrix, clusters, kEstimate,
nIterations);
+ threadPools[(kEstimate-2)%nThreads].submit(r);
+ // threadPools[0].submit(r);
+ }
+
+ // OK, now wait for each thread to complete
+ for (int pool = 0; pool < threadPools.length; pool++) {
+ threadPools[pool].shutdown();
+ try {
+ boolean result =
threadPools[pool].awaitTermination(7, TimeUnit.DAYS);
+ } catch (Exception e) {}
+ }
+ }
+
+ private void runLinearSilhouette(int kMax, int nIterations) {
+ for (int kEstimate = 2; kEstimate < kMax; kEstimate++) {
+ int[] clusters = new int[matrix.nRows()];
+ int ifound = kmeans(kEstimate, nIterations, matrix,
metric, clusters);
+ silhouetteResults[kEstimate] =
SilhouetteUtil.SilhouetteCalculator(matrix, metric, clusters);
+ }
+ }
+*/
+
+ // The kmeans implementation of a k-clusterer
+ public int kcluster(int nClusters, int nIterations, Matrix matrix,
DistanceMetric metric, int[] clusterID) {
if (monitor != null)
monitor.setPercentCompleted(0);
+ random = null;
int nelements = matrix.nRows();
int ifound = 1;
@@ -492,9 +562,35 @@
*/
private double uniform() {
if (random == null) {
- Date date = new Date();
- random = new Random(date.getTime());
+ // Date date = new Date();
+ // random = new Random(date.getTime());
+ // Use an unseeded random so that our silhouette
results are comparable
+ random = new Random();
}
return random.nextDouble();
}
+
+/*
+ class RunKMeans implements Runnable {
+ Matrix matrix;
+ int[] clusters;
+ int kEstimate;
+ int nIterations;
+
+ public RunKMeans (Matrix matrix, int[] clusters, int k, int
nIterations) {
+ this.matrix = matrix;
+ this.clusters = clusters;
+ this.kEstimate = k;
+ this.nIterations = nIterations;
+ }
+
+ public void run() {
+ int[] clusters = new int[matrix.nRows()];
+ int ifound = kmeans(kEstimate, nIterations, matrix,
metric, clusters);
+ try {
+ silhouetteResults[kEstimate] =
SilhouetteUtil.SilhouetteCalculator(matrix, metric, clusters);
+ } catch (Exception e) { e.printStackTrace(); }
+ }
+ }
+*/
}
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KMeansCluster.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KMeansCluster.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmeans/KMeansCluster.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -56,7 +56,6 @@
// clusterMaker imports
public class KMeansCluster extends AbstractAttributeClusterer {
- int kNumber = 0;
int rNumber = 0;
KnnView knnView = null;
@@ -75,20 +74,9 @@
attributeArray = getAllAttributes();
attributeTunable.setLowerBound((Object)attributeArray);
- // We also want to update the number our "guestimate" for k
- double nodeCount =
(double)Cytoscape.getCurrentNetwork().getNodeCount();
- Tunable kTunable = clusterProperties.get("knumber");
- if (selectedOnly) {
- int selNodes =
Cytoscape.getCurrentNetwork().getSelectedNodes().size();
- if (selNodes > 0) nodeCount = (double)selNodes;
- }
+ // We also want to update the number our "guestimate" for k and
kMax
+ updateKEstimates();
- double kinit = Math.sqrt(nodeCount/2);
- if (kinit > 1)
- kTunable.setValue((int)kinit);
- else
- kTunable.setValue(1);
-
return clusterProperties.getTunablePanel();
}
@@ -107,16 +95,13 @@
*/
// K
- clusterProperties.add(new Tunable("knumber",
- "Number of clusters",
- Tunable.INTEGER, new
Integer(10),
- (Object)kNumber,
(Object)null, 0));
+ addKTunables();
// Number of iterations
clusterProperties.add(new Tunable("iterations",
"Number of iterations",
- Tunable.INTEGER, new
Integer(10),
- (Object)rNumber,
(Object)null, 0));
+ Tunable.INTEGER, new
Integer(50),
+ (Object)new Integer(rNumber),
(Object)null, 0));
// The distance metric to use
clusterProperties.add(new Tunable("dMetric",
@@ -163,18 +148,17 @@
public void updateSettings() {
updateSettings(false);
+ updateKTunables(false);
+
}
public void updateSettings(boolean force) {
clusterProperties.updateValues();
super.updateSettings(force);
+ updateKTunables(force);
- Tunable t = clusterProperties.get("knumber");
+ Tunable t = clusterProperties.get("iterations");
if ((t != null) && (t.valueChanged() || force))
- kNumber = ((Integer) t.getValue()).intValue();
-
- t = clusterProperties.get("iterations");
- if ((t != null) && (t.valueChanged() || force))
rNumber = ((Integer) t.getValue()).intValue();
t = clusterProperties.get("dMetric");
@@ -231,19 +215,21 @@
algorithm.setSelectedOnly(selectedOnly);
algorithm.setDebug(debug);
+ String resultsString = "K-Means results:";
+
// Cluster the attributes, if requested
if (clusterAttributes && attributeArray.length > 1) {
if (monitor != null)
monitor.setStatus("Clustering attributes");
- algorithm.cluster(kNumber, rNumber, true);
+ resultsString = "\nAttributes: " +
algorithm.cluster(kNumber, rNumber, true, "kmeans");
}
// Cluster the nodes
if (monitor != null)
monitor.setStatus("Clustering nodes");
- algorithm.cluster(kNumber, rNumber, false);
+ resultsString = "\nNodes: "+algorithm.cluster(kNumber, rNumber,
false, "kmeans");
if (monitor != null)
- monitor.setStatus("Clustering complete");
+ monitor.setStatus(resultsString);
// Tell any listeners that we're done
pcs.firePropertyChange(ClusterAlgorithm.CLUSTER_COMPUTED, null,
this);
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMCluster.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMCluster.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMCluster.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -40,6 +40,10 @@
import java.util.HashMap;
import java.util.List;
import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
import javax.swing.JPanel;
// Cytoscape imports
@@ -71,6 +75,7 @@
resetAttributes();
}
+/*
public String cluster(int nClusters, int nIterations, boolean
transpose) {
String keyword = "GENE";
if (transpose) keyword = "ARRY";
@@ -106,8 +111,9 @@
return "Complete";
}
+*/
- public int kmedoid(int nClusters, int nIterations, Matrix matrix,
DistanceMetric metric, int[] clusterId) {
+ public int kcluster(int nClusters, int nIterations, Matrix matrix,
DistanceMetric metric, int[] clusterId) {
if (monitor != null)
monitor.setPercentCompleted(0);
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMedoidCluster.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMedoidCluster.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/kmedoid/KMedoidCluster.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -57,7 +57,6 @@
// clusterMaker imports
public class KMedoidCluster extends AbstractAttributeClusterer {
- int kNumber = 0;
int rNumber = 0;
KnnView knnView = null;
@@ -76,20 +75,9 @@
attributeArray = getAllAttributes();
attributeTunable.setLowerBound((Object)attributeArray);
- // We also want to update the number our "guestimate" for k
- double nodeCount =
(double)Cytoscape.getCurrentNetwork().getNodeCount();
- Tunable kTunable = clusterProperties.get("knumber");
- if (selectedOnly) {
- int selNodes =
Cytoscape.getCurrentNetwork().getSelectedNodes().size();
- if (selNodes > 0) nodeCount = (double)selNodes;
- }
+ // We also want to update the number our "guestimate" for k and
kMax
+ updateKEstimates();
- double kinit = Math.sqrt(nodeCount/2);
- if (kinit > 1)
- kTunable.setValue((int)kinit);
- else
- kTunable.setValue(1);
-
return clusterProperties.getTunablePanel();
}
@@ -108,10 +96,7 @@
*/
// K
- clusterProperties.add(new Tunable("knumber",
- "Number of clusters",
- Tunable.INTEGER, new
Integer(10),
- (Object)kNumber,
(Object)null, 0));
+ addKTunables();
// Number of iterations
clusterProperties.add(new Tunable("iterations",
@@ -164,18 +149,16 @@
public void updateSettings() {
updateSettings(false);
+ updateKTunables(false);
}
public void updateSettings(boolean force) {
clusterProperties.updateValues();
super.updateSettings(force);
+ updateKTunables(force);
- Tunable t = clusterProperties.get("knumber");
+ Tunable t = clusterProperties.get("iterations");
if ((t != null) && (t.valueChanged() || force))
- kNumber = ((Integer) t.getValue()).intValue();
-
- t = clusterProperties.get("iterations");
- if ((t != null) && (t.valueChanged() || force))
rNumber = ((Integer) t.getValue()).intValue();
t = clusterProperties.get("dMetric");
@@ -232,19 +215,21 @@
algorithm.setSelectedOnly(selectedOnly);
algorithm.setDebug(debug);
+ String resultsString = "K-Medoid results:";
+
// Cluster the attributes, if requested
if (clusterAttributes && attributeArray.length > 1) {
if (monitor != null)
monitor.setStatus("Clustering attributes");
- algorithm.cluster(kNumber, rNumber, true);
+ resultsString = "\nAttributes: " +
algorithm.cluster(kNumber, rNumber, true, "kmedoid");
}
// Cluster the nodes
if (monitor != null)
monitor.setStatus("Clustering nodes");
- algorithm.cluster(kNumber, rNumber, false);
+ resultsString = "\nNodes: "+algorithm.cluster(kNumber, rNumber,
false, "kmedoid");
if (monitor != null)
- monitor.setStatus("Clustering complete");
+ monitor.setStatus(resultsString);
// Tell any listeners that we're done
pcs.firePropertyChange(ClusterAlgorithm.CLUSTER_COMPUTED, null,
this);
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteResult.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteResult.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteResult.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -30,8 +30,8 @@
/**
* add a value in the silhouette list
- * @para value is the silhouette value
- * @para the nearest cluster of the sample
+ * @param value is the silhouette value
+ * @param the nearest cluster of the sample
*/
public void addSilhouettevalue(double value, Integer label)
{
@@ -42,7 +42,7 @@
/**
* delete a silhouette at a given index
- * @para index the position of the sample you want to delete (0~ size-1)
+ * @param index the position of the sample you want to delete
(0~ size-1)
*/
public void deleteSilhouettevalue(int index)
{
@@ -53,8 +53,8 @@
/**
* get the value for the silhouette at a given index
- * @para index the position of the sample you want to delete (0~ size-1)
- * return the value for the silhouette at
+ * @param index the position of the sample you want to delete
(0~ size-1)
+ * return the value for the silhouette at
*/
public double getSilhouettevalue(int index)
{
@@ -64,8 +64,8 @@
/**
* This function is to get the neighbor cluster of current sample (the
given index)
- * @para index the position of the sample you want to delete (0~ size-1)
- * return the neighbor cluster of current sample
+ * @param index the position of the sample you want to delete
(0~ size-1)
+ * return the neighbor cluster of current sample
*/
public Integer getSilhouetteneighborlabel(int index)
{
@@ -79,10 +79,12 @@
*/
public double getAverageSilhouette()
{
+ // System.out.println("Have
"+silhouetteValues.size()+" values");
double avgS = 0;
for (Double v: silhouetteValues) {
avgS = avgS+v.doubleValue();
}
+ // System.out.println("avgS = "+avgS+" average
= "+avgS/(double)silhouetteValues.size());
return avgS/(double)silhouetteValues.size();
}
Modified:
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteUtil.java
===================================================================
---
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteUtil.java
2011-08-20 00:23:14 UTC (rev 26611)
+++
csplugins/trunk/ucsf/scooter/clusterMaker/src/clusterMaker/algorithms/attributeClusterers/silhouette/SilhouetteUtil.java
2011-08-20 23:00:14 UTC (rev 26612)
@@ -10,6 +10,9 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
import java.util.Iterator;
@@ -30,7 +33,7 @@
* @param labels the labels for each of clusters
* @return the resulting silhouette
*/
- public static SilhouetteResult SilhouetteCalculator(Matrix matrix,
DistanceMetric metric, Integer[] labels)
+ public static SilhouetteResult SilhouetteCalculator(Matrix matrix,
DistanceMetric metric, int[] labels)
{
double[][] distanceMatrix = matrix.getDistanceMatrix(metric);
return SilhouetteCalculator(distanceMatrix, labels);
@@ -42,7 +45,7 @@
* @para labels the labels for each of clusters
* @return the resulting silhouette
*/
- public static SilhouetteResult SilhouetteCalculator(double[][]
distancematrix, Integer[] labels)
+ public static SilhouetteResult SilhouetteCalculator(double[][]
distancematrix, int[] labels)
{
SilhouetteResult silresult = new SilhouetteResult();
@@ -62,73 +65,88 @@
}
}
+ // The number of classes (clusters) that we have
+ int classnum = classlabels.size();
+
// OK, now calculate the silhouete
- for(int i=0;i<labels.length;i++)
+ for(int i=0;i<samplenum;i++)
{
double silhouettevalue=0;
double a=0;
double b=0;
- int classnum = classlabels.size();
Integer classlabel = labels[i];
+ // System.out.println("Row "+i+" is in cluster "+classlabel);
//initializing
HashMap<Integer, Double> bvalues = new HashMap<Integer, Double>();
- for(int k=0; k<classnum; k++)
- {
- Set<Integer> labelset = classlabels.keySet();
- for (Integer label: labelset)
- {
- bvalues.put(label, Double.valueOf(0));
- }
- }
- //calculate distance by differnt classes
+ //calculate distance by different classes
for(int j=0;j<samplenum;j++)
{
+ // System.out.println("Distance from "+i+" to "+j+" is
"+distancematrix[i][j]);
if (i == j) continue;
Integer currentclasslabel = labels[j];
- double distancevalue =
bvalues.get(currentclasslabel).doubleValue();
+ // System.out.println("Row "+j+" is in cluster
"+currentclasslabel);
+ double distancevalue = 0.0;
+ if(bvalues.containsKey(currentclasslabel))
+ distancevalue =
bvalues.get(currentclasslabel).doubleValue();
distancevalue = distancevalue + distancematrix[i][j];
+ // System.out.println("Cumulative distance from "+i+" to
cluster "+currentclasslabel+" is "+distancevalue);
bvalues.put(currentclasslabel, Double.valueOf(distancevalue));
}
-
+
//calculate a b and silhouette
- for(int k=0;k<classnum;k++)
+ double mindis = Double.MAX_VALUE;
+ Integer minlabel = null;
+ for(Integer kLabel: bvalues.keySet())
{
- double mindis = Double.MAX_VALUE;
- Integer minlabel = null;
- Set<Integer> labelset = classlabels.keySet();
- for (Integer label: labelset)
- {
- int count = classlabels.get(label).intValue();
- double value = bvalues.get(label).doubleValue();
+ int count = classlabels.get(kLabel).intValue();
+ double value = bvalues.get(kLabel).doubleValue();
+ if (kLabel.equals(classlabel))
+ a = value/count;
+ else if (value/count < mindis) {
+ mindis = value/count;
+ minlabel = kLabel;
+ }
+ }
+ b = mindis;
- if(label.equals(classlabel))
- a = value/count;
- else {
-
- double avedistance = value/count;
- if(avedistance<mindis)
- {
- mindis = avedistance;
- minlabel = label;
- }
- }
- }
- b = mindis;
-
- if(a>b) {
- silhouettevalue = (b-a)/a;
- } else {
- silhouettevalue = (b-a)/b;
- }
+ if(a>b) {
+ silhouettevalue = (b-a)/a;
+ } else {
+ silhouettevalue = (b-a)/b;
+ }
+ //
System.out.println("silhouetteValue for "+i+" = "+silhouettevalue+", a = "+a+",
b = "+b);
- silresult.addSilhouettevalue(silhouettevalue, minlabel);
+ silresult.addSilhouettevalue(silhouettevalue, minlabel);
- }
}
-
return silresult;
}
+
+ /**
+ * This method prints out the silhouette profile for a given result. In
typical silhouette display,
+ * the values are organized by cluster with the silhouette values ranked
largest to smallest.
+ *
+ * @param result the result that we're displaying
+ * @param labels the clustering
+ */
+ public static void printSilhouette(SilhouetteResult result, int[] labels) {
+ // Divide the indices into clusters
+ TreeMap<Integer, SortedSet<Double>> clusters = new TreeMap<Integer,
SortedSet<Double>>();
+ for (int row = 0; row < labels.length; row++) {
+ if (!clusters.containsKey(labels[row]))
+ clusters.put(labels[row], new TreeSet<Double>());
+ clusters.get(labels[row]).add(result.getSilhouettevalue(row));
+ }
+ // For each cluster, output the profile
+ for (Integer cluster: clusters.keySet()) {
+ System.out.println("Cluster #"+cluster);
+ for (Double sil: clusters.get(cluster)) {
+ System.out.println("Silhouette "+sil);
+ }
+ }
+
+ }
}
--
You received this message because you are subscribed to the Google Groups
"cytoscape-cvs" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/cytoscape-cvs?hl=en.