Author: assenov
Date: 2010-10-25 16:11:44 -0700 (Mon, 25 Oct 2010)
New Revision: 22410
Modified:
cytoscape/trunk/coreplugins/NetworkAnalyzer/src/main/java/de/mpg/mpi_inf/bioinf/netanalyzer/UndirNetworkAnalyzer.java
Log:
Fixed a mistake in the class definitions.
Modified:
cytoscape/trunk/coreplugins/NetworkAnalyzer/src/main/java/de/mpg/mpi_inf/bioinf/netanalyzer/UndirNetworkAnalyzer.java
===================================================================
---
cytoscape/trunk/coreplugins/NetworkAnalyzer/src/main/java/de/mpg/mpi_inf/bioinf/netanalyzer/UndirNetworkAnalyzer.java
2010-10-25 23:10:23 UTC (rev 22409)
+++
cytoscape/trunk/coreplugins/NetworkAnalyzer/src/main/java/de/mpg/mpi_inf/bioinf/netanalyzer/UndirNetworkAnalyzer.java
2010-10-25 23:11:44 UTC (rev 22410)
@@ -17,505 +17,832 @@
package de.mpg.mpi_inf.bioinf.netanalyzer;
+import giny.model.Edge;
import giny.model.Node;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileWriter;
-import java.io.IOException;
+import java.awt.geom.Point2D;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import cytoscape.CyNetwork;
import cytoscape.Cytoscape;
-import cytoscape.data.CyAttributes;
-import cytoscape.util.SwingWorker;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.AnalysisError;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.Interpretations;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.Messages;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.NetworkAnalysisReport;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.NetworkInspection;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.CCInfo;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.DegreeDistribution;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.LogBinDistribution;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.LongHistogram;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.MutInteger;
import de.mpg.mpi_inf.bioinf.netanalyzer.data.NetworkInterpretation;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.NetworkStats;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.NetworkStatus;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.NodeBetweenInfo;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.PathLengthData;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.Points2D;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.SimpleUndirParams;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.SumCountPair;
+import de.mpg.mpi_inf.bioinf.netanalyzer.data.Utils;
import de.mpg.mpi_inf.bioinf.netanalyzer.data.io.SettingsSerializer;
-import de.mpg.mpi_inf.bioinf.netanalyzer.data.io.StatsSerializer;
-import de.mpg.mpi_inf.bioinf.netanalyzer.ui.BatchAnalysisDialog;
/**
- * Class for batch analysis of networks.
+ * Network analyzer for networks that contain undirected edges only.
*
* @author Yassen Assenov
+ * @author Sven-Eric Schelhorn
* @author Nadezhda Doncheva
*/
-public class BatchNetworkAnalyzer extends SwingWorker {
+public class UndirNetworkAnalyzer extends NetworkAnalyzer {
/**
- * Initializes a new instance of <code>BatchNetworkAnalyzer</code>.
+ * Initializes a new instance of <code>UndirNetworkAnalyzer</code>.
*
- * @param aOutputDir
- * Output directory as chosen by the user.
- * @param aInputFiles
- * List of all input files for the analysis.
+ * @param aNetwork
+ * Network to be analyzed.
+ * @param aNodeSet
+ * Subset of nodes in <code>aNetwork</code>, for which
topological parameters are to
+ * be calculated. Set this to <code>null</code> if
parameters must be calculated for
+ * all nodes in the network.
* @param aInterpr
- * Parameter specifying which interpretations to be applied
to each network.
+ * Interpretation of the network edges.
*/
- public BatchNetworkAnalyzer(File aOutputDir, List<File> aInputFiles,
Interpretations aInterpr) {
- analyzer = null;
- cancelled = false;
- dialog = null;
- progress = 0;
- outputDir = aOutputDir;
- inputFiles = aInputFiles;
- interpretations = aInterpr;
- reports = new ArrayList<NetworkAnalysisReport>();
- scale = 0.0;
- analyzing = false;
- subProgress = 0;
- }
-
- /**
- * Cancels the process of network analysis.
- * <p>
- * Note that this method does not force the analyzer to cancel
immediately; it takes an unspecified period of time
- * until the analysis thread actually stops.
- * </p>
- */
- public void cancel() {
- cancelled = true;
- synchronized (this) {
- if (analyzer != null) {
- analyzer.cancel();
- }
+ public UndirNetworkAnalyzer(CyNetwork aNetwork, Set<Node> aNodeSet,
+ NetworkInterpretation aInterpr) {
+ super(aNetwork, aNodeSet, aInterpr);
+ if (nodeSet != null) {
+ stats.set("nodeCount", nodeSet.size());
}
+ nodeCount = stats.getInt("nodeCount");
+ sPathLengths = new long[nodeCount];
+ sharedNeighborsHist = new long[nodeCount];
+ visited = new HashSet<Node>(nodeCount);
+ useNodeAttributes =
SettingsSerializer.getPluginSettings().getUseNodeAttributes();
+ useEdgeAttributes =
SettingsSerializer.getPluginSettings().getUseEdgeAttributes();
+ nodeBetweenness = new HashMap<Node, NodeBetweenInfo>();
+ edgeBetweenness = new HashMap<Edge, Double>();
+ stress = new HashMap<Node, Long>();
+ roundingDigits = 8;
+ computeNB = true;
}
/*
* (non-Javadoc)
*
- * @see cytoscape.util.SwingWorker#construct()
+ * @see de.mpg.mpi_inf.bioinf.netanalyzer.NetworkAnalyzer#computeAll()
*/
@Override
- public Object construct() {
- progress = 0;
- for (final File inputFile : inputFiles) {
+ public void computeAll() {
- // Make a new network in cytoscape from a filename in
- // the network-directory
- CyNetwork network = null;
- try {
- write(Messages.SM_LOADING + inputFile.getName()
+ " ... ");
- if (!inputFile.isFile()) {
- throw new RuntimeException();
+ long time = System.currentTimeMillis();
+ analysisStarting();
+ int edgeCount = 0;
+ SimpleUndirParams params = new SimpleUndirParams();
+ int maxConnectivity = 0;
+ DegreeDistribution degreeDist = new
DegreeDistribution(nodeCount);
+ // clustering coefficients
+ HashMap<Integer, SumCountPair> CCps = new HashMap<Integer,
SumCountPair>();
+ // topological coefficients
+ ArrayList<Point2D.Double> topCoefs = new
ArrayList<Point2D.Double>(nodeCount);
+ // closeness centrality
+ ArrayList<Point2D.Double> closenessCent = new
ArrayList<Point2D.Double>(nodeCount);
+ // node betweenness
+ ArrayList<Point2D.Double> nodeBetweennessArray = new
ArrayList<Point2D.Double>(nodeCount);
+ // neighborhood connectivity
+ HashMap<Integer, SumCountPair> NCps = new HashMap<Integer,
SumCountPair>();
+ // average shortest path length
+ Map<Node, Double> aplMap = new HashMap<Node, Double>();
+ // stress
+ LogBinDistribution stressDist = new LogBinDistribution();
+ // Compute number of connected components
+ ConnComponentAnalyzer cca = new ConnComponentAnalyzer(network);
+ Set<CCInfo> components = cca.findComponents();
+ params.connectedComponentCount = components.size();
+
+ for (CCInfo aCompInfo : components) {
+ // Get nodes of connected component
+ Set<Node> connNodes = cca.getNodesOf(aCompInfo);
+ // Set<Node> connNodes = new HashSet<Node>(connNodes);
+ if (nodeSet != null) {
+ connNodes.retainAll(nodeSet);
+ }
+
+ // Initialize the parameters for node and edge
betweenness calculation
+ if (nodeSet == null && computeNB) {
+ nodeBetweenness.clear();
+ edgeBetweenness.clear();
+ stress.clear();
+ aplMap.clear();
+ for (Node n : connNodes) {
+ nodeBetweenness.put(n, new
NodeBetweenInfo(0, -1, 0.0));
+ stress.put(n, Long.valueOf(0));
}
- network =
Cytoscape.createNetworkFromFile(inputFile.getPath(), false);
- } catch (RuntimeException e) {
- writeLine(Messages.SM_READERROR);
- reports.add(new
NetworkAnalysisReport(inputFile, null, AnalysisError.NETWORK_NOT_OPENED));
- progress += PROGRESS_PER_NET;
- continue;
}
- // Get all possible interpretations for the network
- NetworkInspection inspection = null;
- try {
- inspection =
CyNetworkUtils.inspectNetwork(network);
- } catch (IllegalArgumentException e) {
- writeLine(Messages.SM_DONE);
- reports.add(new
NetworkAnalysisReport(inputFile, null, AnalysisError.NETWORK_EMPTY));
- unloadNetwork(inputFile, network);
- continue;
- } catch (NullPointerException e) {
- reports.add(new
NetworkAnalysisReport(inputFile, null, AnalysisError.NETWORK_FILE_INVALID));
- progress += PROGRESS_PER_NET;
- continue;
- }
+ int componentDiameter = 0;
+ for (final Node node : connNodes) {
+ ++progress;
+ final String nodeID = node.getIdentifier();
+ final int[] incEdges = getIncidentEdges(node);
+ final Map<Node, MutInteger> neighborMap =
CyNetworkUtils.getNeighborMap(network,
+ node, incEdges);
- final NetworkInterpretation[] interprs =
filterInterpretations(getInterpretations(inspection));
- final int intCount = interprs.length;
- final int advance = PROGRESS_PER_NET / intCount;
+ // Degree distribution calculation
+ final int degree = getDegree(node, incEdges);
+ edgeCount += degree;
+ degreeDist.addObservation(degree);
+ if (useNodeAttributes) {
+ setAttr(nodeID, "deg", degree);
+ }
+ final int neighborCount = calcSimple(nodeID,
incEdges, neighborMap, params);
+ if (maxConnectivity < neighborCount) {
+ maxConnectivity = neighborCount;
+ }
- // Run NetworkAnalyzer on all accepted interpretations
- writeLine(Messages.SM_DONE);
- for (int j = 0; j < intCount; progress += advance, ++j)
{
+ if (neighborCount > 0) {
+ final Set<Node> neighbors =
neighborMap.keySet();
+
+ // Neighborhood connectivity computation
+ int[] neighborInd =
CyNetworkUtils.getIndices(neighbors);
+ final double neighborConnect =
averageNeighbors(neighbors);
+ accumulate(NCps, neighborCount,
neighborConnect);
+
+ if (neighborCount > 1) {
+
+ // Topological coefficients
computation
+ double topCoef =
computeTC(node, neighbors);
+ if (!Double.isNaN(topCoef)) {
+ topCoefs.add(new
Point2D.Double(neighborCount, topCoef));
+ } else {
+ topCoef = 0.0;
+ }
+
+ // Clustering coefficients
computation
+ final double nodeCCp =
computeCC(neighborInd);
+ accumulate(CCps, neighborCount,
nodeCCp);
+ if (useNodeAttributes) {
+ setAttr(nodeID, "cco",
Utils.roundTo(nodeCCp, roundingDigits));
+ setAttr(nodeID, "tco",
Utils.roundTo(topCoef, roundingDigits));
+ }
+
+ } else if (useNodeAttributes) {
+ setAttr(nodeID, "cco", 0.0);
+ setAttr(nodeID, "tco", 0.0);
+ }
+ setAttr(nodeID, "nco",
Utils.roundTo(neighborConnect, roundingDigits));
+ } else if (useNodeAttributes) {
+ setAttr(nodeID, "nco", 0.0);
+ setAttr(nodeID, "cco", 0.0);
+ setAttr(nodeID, "tco", 0.0);
+ }
if (cancelled) {
- writeLine(Messages.SM_ANALYSISC);
- return null;
+ analysisFinished();
+ return;
}
- // Run the analysis for an interpretation
- final NetworkInterpretation interpretation =
interprs[j];
- try {
- if (interpretation.isDirected()) {
- analyzer = new
DirNetworkAnalyzer(network, null, interpretation);
- } else {
- analyzer = new
UndirNetworkAnalyzer(network, null, interpretation);
+ // Shortest path lengths computation
+ if (nodeSet != null) {
+ continue;
+ }
+ PathLengthData pathLengths =
computeSPandSN(node);
+ final int eccentricity =
pathLengths.getMaxLength();
+ if (params.diameter < eccentricity) {
+ params.diameter = eccentricity;
+ }
+ if (0 < eccentricity && eccentricity <
params.radius) {
+ params.radius = eccentricity;
+ }
+ if (componentDiameter < eccentricity) {
+ componentDiameter = eccentricity;
+ }
+ final double apl = (pathLengths.getCount() > 0)
? pathLengths.getAverageLength()
+ : 0;
+ aplMap.put(node, Double.valueOf(apl));
+ final double closeness = (apl > 0.0) ? 1 / apl
: 0.0;
+ closenessCent.add(new
Point2D.Double(neighborCount, closeness));
+
+ // Store max. and avg. shortest path lengths,
and closeness in
+ // node attributes
+ if (useNodeAttributes) {
+ setAttr(nodeID, "spl", eccentricity);
+ setAttr(nodeID, "apl",
Utils.roundTo(apl, roundingDigits));
+ setAttr(nodeID, "clc",
Utils.roundTo(closeness, roundingDigits));
+ }
+
+ // Node and edge betweenness calculation
+ if (computeNB) {
+ computeNBandEB(node);
+ // Reset everything except the
betweenness value
+ for (final Node n : connNodes) {
+ NodeBetweenInfo nodeInfo =
nodeBetweenness.get(n);
+ nodeInfo.reset();
}
- writeLine(Messages.DI_ANALYZINGINTERP1
+ (j + 1) + Messages.DI_ANALYZINGINTERP2 + intCount);
- final int maxProgress =
analyzer.getMaxProgress();
- scale = (double) advance / (double)
maxProgress;
- analyzing = true;
- subProgress = 0;
- analyzer.computeAll();
- analyzing = false;
- if (cancelled) {
-
writeLine(Messages.SM_ANALYSISC);
- return null;
+ }
+
+ if (cancelled) {
+ analysisFinished();
+ return;
+ }
+ } // end node iteration
+
+ if (nodeSet == null) {
+ // Normalize and save node betweenness
+ final double nNormFactor =
computeNormFactor(nodeBetweenness.size());
+ for (final Node n : connNodes) {
+ String id = n.getIdentifier();
+ // Compute node radiality
+ final double rad = (componentDiameter +
1.0 - aplMap.get(n).doubleValue())
+ / componentDiameter;
+ if (useNodeAttributes) {
+ setAttr(id, "rad",
Utils.roundTo(rad, roundingDigits));
}
- final NetworkStats stats =
analyzer.getStats();
- synchronized (this) {
- analyzer = null;
+ if (computeNB) {
+ final NodeBetweenInfo nbi =
nodeBetweenness.get(n);
+ double nb =
nbi.getBetweenness() * nNormFactor;
+ if (Double.isNaN(nb)) {
+ nb = 0.0;
+ }
+ final int degree = getDegree(n,
getIncidentEdges(n));
+ nodeBetweennessArray.add(new
Point2D.Double(degree, nb));
+ final long nodeStress =
stress.get(n).longValue();
+
stressDist.addObservation(nodeStress);
+ if (useNodeAttributes) {
+ setAttr(id, "nbt",
Utils.roundTo(nb, roundingDigits));
+ setAttr(id, "stress",
nodeStress);
+ }
}
-
- final String networkName =
network.getTitle();
- stats.setTitle(networkName +
interpretation.getInterpretSuffix());
- final String extendedName = networkName
+ createID(interpretation);
- try {
- if
(SettingsSerializer.getPluginSettings().getUseNodeAttributes()) {
- if
(!saveNodeAttributes(network, interpretation.isDirected(), outputDir,
-
extendedName)) {
-
writeLine(Messages.SM_ATTRIBUTESNOTSAVED);
- }
+ } // end iterate over nodes
+
+ // Save edge betweenness
+ if (useEdgeAttributes && computeNB) {
+ for (final Map.Entry<Edge, Double>
betEntry : edgeBetweenness.entrySet()) {
+ double eb =
betEntry.getValue().doubleValue();
+ if (Double.isNaN(eb)) {
+ eb = 0.0;
}
- File netstatFile = new
File(outputDir, extendedName + ".netstats");
- StatsSerializer.save(stats,
netstatFile);
-
writeLine(Messages.SM_RESULTSSAVED);
- reports.add(new
NetworkAnalysisReport(inputFile, interpretation, netstatFile));
- } catch (SecurityException ex) {
-
writeError(Messages.SM_SAVEERROR);
- reports.add(new
NetworkAnalysisReport(inputFile, interpretation,
-
AnalysisError.OUTPUT_NOT_CREATED));
- } catch (FileNotFoundException ex) {
-
writeError(Messages.SM_SAVEERROR);
- reports.add(new
NetworkAnalysisReport(inputFile, interpretation,
-
AnalysisError.OUTPUT_NOT_CREATED));
- } catch (IOException e) {
-
writeError(Messages.SM_SAVEERROR);
- reports
- .add(new
NetworkAnalysisReport(inputFile, interpretation,
AnalysisError.OUTPUT_IO_ERROR));
+
setEAttr(betEntry.getKey().getIdentifier(), "ebt",
+
Utils.roundTo(eb, roundingDigits));
}
-
- if (cancelled) {
-
writeLine(Messages.SM_ANALYSISC);
- return null;
- }
- } catch (Exception e) {
- reports.add(new
NetworkAnalysisReport(inputFile, interpretation, AnalysisError.INTERNAL_ERROR));
}
}
+ } // end iteration over connected component
+
+ // save statistics
+ if (params.connectivityAccum != null) {
+ final double meanConnectivity =
params.connectivityAccum.getAverage();
+ stats.set("avNeighbors", meanConnectivity);
+ final double density = meanConnectivity / (nodeCount -
1);
+ stats.set("density", meanConnectivity / (nodeCount -
1));
+ stats.set("centralization", (nodeCount / ((double)
nodeCount - 2))
+ * (maxConnectivity / ((double)
nodeCount - 1) - density));
+ final double nom = params.sqConnectivityAccum.getSum()
* nodeCount;
+ final double denom = params.connectivityAccum.getSum()
+ * params.connectivityAccum.getSum();
+ stats.set("heterogeneity", Math.sqrt(nom / denom - 1));
+ }
- unloadNetwork(inputFile, network);
+ // Save degree distribution in the statistics instance
+ stats.set("degreeDist", degreeDist.createHistogram());
+
+ // Save C(k) in the statistics instance
+ if (CCps.size() > 0) {
+ Point2D.Double[] averages = new
Point2D.Double[CCps.size()];
+ double cc = accumulateCCs(CCps, averages) / nodeCount;
+ stats.set("cc", cc);
+ if (averages.length > 1) {
+ stats.set("cksDist", new Points2D(averages));
+ }
}
- return null;
- }
- /**
- * Filters the set of network interpretations based on the setting
selected by the user.
- *
- * @param interprs
- * All possible interpretations of the current network.
- * @return Array of all acceptable interpretations of the current
network.
- *
- * @see #interpretations
- */
- private NetworkInterpretation[]
filterInterpretations(NetworkInterpretation[] interprs) {
- if (interpretations != Interpretations.ALL) {
- ArrayList<NetworkInterpretation> accepted = null;
- for (int i = 0; i < interprs.length; i++) {
- if ((interpretations ==
Interpretations.DIRECTED) != interprs[i].isDirected()) {
- if (accepted == null) {
- accepted = new
ArrayList<NetworkInterpretation>(interprs.length - 1);
- for (int j = 0; j < i; j++) {
-
accepted.add(interprs[j]);
- }
+ // Save topological coefficients in the statistics instance
+ if (topCoefs.size() > 1) {
+ stats.set("topCoefs", new Points2D(topCoefs));
+ }
+
+ stats.set("ncc", params.connectedComponentCount);
+ stats.set("usn", params.unconnectedNodeCount);
+ stats.set("nsl", params.selfLoopCount);
+ stats.set("mnp", params.multiEdgePartners / 2);
+ if (interpr.isPaired()) {
+ stats.set("edgeCount", edgeCount / 2);
+ }
+
+ if (nodeSet == null) {
+ long connPairs = 0; // total number of connected pairs
of nodes
+ long totalPathLength = 0;
+ for (int i = 1; i <= params.diameter; ++i) {
+ connPairs += sPathLengths[i];
+ totalPathLength += i * sPathLengths[i];
+ }
+ stats.set("connPairs", connPairs);
+
+ // Save shortest path lengths distribution
+ if (params.diameter > 0) {
+ stats.set("diameter", params.diameter);
+ stats.set("radius", params.radius);
+ stats.set("avSpl", (double) totalPathLength /
connPairs);
+ if (params.diameter > 1) {
+ stats.set("splDist", new
LongHistogram(sPathLengths, 1, params.diameter));
+ }
+ int largestCommN = 0;
+ for (int i = 1; i < nodeCount; ++i) {
+ if (sharedNeighborsHist[i] != 0) {
+ sharedNeighborsHist[i] /= 2;
+ largestCommN = i;
}
- } else if (accepted != null) {
- accepted.add(interprs[i]);
}
+ // Save common neighbors distribution
+ if (largestCommN > 0) {
+ stats.set("commNeighbors", new
LongHistogram(sharedNeighborsHist, 1,
+ largestCommN));
+ }
}
- if (accepted != null) {
- NetworkInterpretation[] result = new
NetworkInterpretation[accepted.size()];
- accepted.toArray(result);
- return result;
- }
}
- return interprs;
+
+ // Save closeness centrality in the statistics instance
+ if (closenessCent.size() > 1) {
+ stats.set("closenessCent", new Points2D(closenessCent));
+ }
+
+ // Save node betweenness in the statistics instance
+ if (nodeBetweennessArray.size() > 2) {
+ stats.set("nodeBetween", new
Points2D(nodeBetweennessArray));
+ }
+
+ // Save neighborhood connectivity in the statistics instance
+ if (NCps.size() > 1) {
+ stats.set("neighborConn", new
Points2D(getAverages(NCps)));
+ }
+
+ // Save stress distribution in the statistics instance
+ if (nodeSet == null && computeNB) {
+ stats.set("stressDist", stressDist.createPoints2D());
+ }
+
+ analysisFinished();
+ time = System.currentTimeMillis() - time;
+ stats.set("time", time / 1000.0);
+ progress = nodeCount;
+ if (useNodeAttributes || useEdgeAttributes) {
+
Cytoscape.firePropertyChange(Cytoscape.ATTRIBUTES_CHANGED, null, null);
+ }
}
/**
- * Save node attributes computed by NetworkAnalyzer for this network
into a tab-delimited file with extension
- * "nattributes" (1st column corresponds to the node ids, each
subsequent column contains the values of a node
- * attribute).
+ * Calculates a set of simple properties of the given node.
*
- * @param aNetwork
- * Target network.
- * @param aDir
- * Flag indicating if the network interpretation is directed.
- * @param aOutputDir
- * Output directory for writing files as chosen by the user.
- * @param aExtendedName
- * Name of the analyzed network including the current
interpretation.
- * @return <code>true</code> if any node attributes where present and
have been saved, and <code>false</code>
- * otherwise.
+ * @param aNodeID
+ * ID of the node of interest. This parameter is used for
storing attribute values.
+ * @param aIncEdges
+ * Array of the indices of all the neighbors of the node of
interest.
+ * @param aNeMap
+ * Map of neighbors of the node of interest and their
frequency.
+ * @param aParams
+ * Instance to accumulate the computed values.
+ * @return Number of neighbors of the node of interest.
*/
- private boolean saveNodeAttributes(CyNetwork aNetwork, boolean aDir,
File aOutputDir,
- String aExtendedName) {
- // get node attributes computed in the last analysis run
- Set<String> netAnayzerAttr = new HashSet<String>();
- if (aDir) {
- netAnayzerAttr = Messages.getDirNodeAttributes();
+ private int calcSimple(String aNodeID, int[] aIncEdges, Map<Node,
MutInteger> aNeMap,
+ SimpleUndirParams aParams) {
+ final int neighborCount = aNeMap.size();
+
+ // Avg. number of neighbors, density & centralization
calculation
+ if (aParams.connectivityAccum != null) {
+ aParams.connectivityAccum.add(neighborCount);
} else {
- netAnayzerAttr = Messages.getUndirNodeAttributes();
+ aParams.connectivityAccum = new
SumCountPair(neighborCount);
}
- if (netAnayzerAttr.size() == 0) {
- return false;
+ // Heterogeneity calculation
+ if (aParams.sqConnectivityAccum != null) {
+ aParams.sqConnectivityAccum.add(neighborCount *
neighborCount);
+ } else {
+ aParams.sqConnectivityAccum = new
SumCountPair(neighborCount * neighborCount);
+ }
+ // Number of unconnected nodes calculation
+ if (neighborCount == 0) {
+ aParams.unconnectedNodeCount++;
}
- // save chosen node attributes in a file, 1st column
corresponds to the node ids, each subsequent column
- // contains the values of a node attribute
- final CyAttributes nodeAttr = Cytoscape.getNodeAttributes();
- try {
- final FileWriter writer = new FileWriter(new
File(outputDir, aExtendedName + ".nattributes"));
- writer.write("Node ID");
- for (final String attr : netAnayzerAttr) {
- writer.write("\t" + attr);
+
+ // Number of self-loops and number of directed/undireceted edges
+ // calculation
+ int selfLoops = 0;
+ int dirEdges = 0;
+ for (int j = 0; j < aIncEdges.length; j++) {
+ Edge e = network.getEdge(aIncEdges[j]);
+ if (e.isDirected()) {
+ dirEdges++;
}
- writer.write("\n");
- final Iterator<?> it = aNetwork.nodesIterator();
- while (it.hasNext()) {
- final String id = ((Node)
it.next()).getIdentifier();
- writer.write(id);
- for (final String attr : netAnayzerAttr) {
- final Object attrValue =
nodeAttr.getAttribute(id, attr);
- if (attrValue != null) {
- writer.write("\t" +
attrValue.toString());
- }
- }
- writer.write("\n");
+ if (e.getSource() == e.getTarget()) {
+ selfLoops++;
}
- writer.close();
- } catch (IOException ex) {
- // attributes file could not be written
- return false;
}
- return true;
- }
+ aParams.selfLoopCount += selfLoops;
+ int undirEdges = aIncEdges.length - dirEdges;
- /*
- * (non-Javadoc)
- *
- * @see cytoscape.util.SwingWorker#finished()
- */
- @Override
- public void finished() {
- progress++;
- if (dialog != null) {
- dialog.analysisFinished();
+ // Number of multi-edge node partners calculation
+ int partnerOfMultiEdgeNodePairs = 0;
+ for (final MutInteger freq : aNeMap.values()) {
+ if (freq.value > 1) {
+ partnerOfMultiEdgeNodePairs++;
+ }
}
- interrupt();
- }
+ aParams.multiEdgePartners += partnerOfMultiEdgeNodePairs;
- /**
- * Gets the current progress of the tester as a number of steps.
- *
- * @return Number of steps completed in the analysis process.
- */
- public int getCurrentProgress() {
- if (analyzing) {
- subProgress = +analyzer.getCurrentProgress();
- return (progress + (int) (subProgress * scale));
+ // Storing the values in attributes
+ if (useNodeAttributes) {
+ setAttr(aNodeID, "slo", selfLoops);
+ setAttr(aNodeID, "isn", (neighborCount == 0));
+ setAttr(aNodeID, "nue", undirEdges);
+ setAttr(aNodeID, "nde", dirEdges);
+ setAttr(aNodeID, "pmn", partnerOfMultiEdgeNodePairs);
}
- return progress;
+ return neighborCount;
}
/**
- * Gets the maximum progress of the tester as a number of steps.
+ * Computes the clustering coefficient of a node.
*
- * @return Total number of steps required for the tester to finish.
+ * @param aNeighborIndices
+ * Array of the indices of all the neighbors of the node of
interest.
+ * @return Clustering coefficient of <code>aNode</code> as a value in
the range
+ * <code>[0,1]</code>.
*/
- public int getMaxProgress() {
- return inputFiles.size() * PROGRESS_PER_NET + 1;
+ private double computeCC(int[] aNeighborIndices) {
+ int edgeCount = CyNetworkUtils.getPairConnCount(network,
aNeighborIndices, true);
+ int neighborsCount = aNeighborIndices.length;
+ return (double) 2 * edgeCount / (neighborsCount *
(neighborsCount - 1));
}
/**
- * Gets the number of input files, i.e. the number of networks to be
analyzed.
+ * Computes the shortest path lengths from the given node to all other
nodes in the network. In
+ * addition, this method accumulates values in the {...@link
#sharedNeighborsHist} histogram.
+ * <p>
+ * This method stores the lengths found in the array {...@link
#sPathLengths}.<br/>
+ * <code>sPathLengths[i] == 0</code> when i is the index of
<code>aNode</code>.<br/>
+ * <code>sPathLengths[i] == Integer.MAX_VALUE</code> when node i and
<code>aNode</code> are
+ * disconnected.<br/>
+ * <code>sPathLengths[i] == d > 0</code> when every shortest path
between node i and
+ * <code>aNode</code> contains <code>d</code> edges.
+ * </p>
+ * <p>
+ * This method uses a breadth-first traversal through the network,
starting from the specified
+ * node, in order to find all reachable nodes and accumulate their
distances to
+ * <code>aNode</code> in {...@link #sPathLengths}.
+ * </p>
*
- * @return Number of input files, i.e. networks to be analyzed.
+ * @param aNode
+ * Starting node of the shortest paths to be found.
+ * @return Data on the shortest path lengths from the current node to
all other reachable nodes
+ * in the network.
*/
- public int getInputFilesCount() {
- return inputFiles.size();
+ private PathLengthData computeSPandSN(Node aNode) {
+ visited.clear();
+ visited.add(aNode);
+ Set<Node> nbs = null;
+ LinkedList<Node> reachedNodes = new LinkedList<Node>();
+ reachedNodes.add(aNode);
+ reachedNodes.add(null);
+ int currentDist = 1;
+ PathLengthData result = new PathLengthData();
+
+ for (Node currentNode = reachedNodes.removeFirst();
!reachedNodes.isEmpty(); currentNode = reachedNodes
+ .removeFirst()) {
+ if (currentNode == null) {
+ // Next level of the BFS tree
+ currentDist++;
+ reachedNodes.add(null);
+ } else {
+ // Traverse next reached node
+ final Set<Node> neighbors =
getNeighbors(currentNode);
+ if (nbs == null) {
+ nbs = neighbors;
+ }
+ for (final Node neighbor : neighbors) {
+ if (visited.add(neighbor)) {
+ final int snCount =
(currentDist > 2) ? 0 : countNeighborsIn(nbs, neighbor);
+ sharedNeighborsHist[snCount]++;
+ sPathLengths[currentDist]++;
+ result.addSPL(currentDist);
+ reachedNodes.add(neighbor);
+ }
+ }
+ }
+ }
+ return result;
}
/**
- * Gets the list with reports after the analysis is finished.
+ * Accumulates the node and edge betweenness of all nodes in a
connected component. The node
+ * betweenness is calculate using the algorithm of Brandes (U. Brandes:
A Faster Algorithm for
+ * Betweenness Centrality. Journal of Mathematical Sociology
25(2):163-177, 2001). The edge
+ * betweenness is calculated as used by Newman and Girvan (M.E. Newman
and M. Girvan: Finding
+ * and Evaluating Community Structure in Networks. Phys. Rev. E Stat.
Nonlin. Soft. Matter
+ * Phys., 69, 026113.). In each run of this method a different source
node is chosen and the
+ * betweenness of all nodes is replaced by the new one. For the final
result this method has to
+ * be run for all nodes of the connected component.
*
- * @return List of reports describing the success or failure of the
analysis of each network.
+ * This method uses a breadth-first search through the network,
starting from a specified source
+ * node, in order to find all paths to the other nodes in the network
and to accumulate their
+ * betweenness.
+ *
+ * @param source
+ * Node where a run of breadth-first search is started, in
order to accumulate the
+ * node and edge betweenness of all other nodes
*/
- public List<NetworkAnalysisReport> getReports() {
- return reports;
+ private void computeNBandEB(Node source) {
+ LinkedList<Node> done_nodes = new LinkedList<Node>();
+ LinkedList<Node> reached = new LinkedList<Node>();
+ HashMap<Edge, Double> edgeDependency = new HashMap<Edge,
Double>();
+ HashMap<Node, Long> stressDependency = new HashMap<Node,
Long>();
+
+ final NodeBetweenInfo sourceNBInfo =
nodeBetweenness.get(source);
+ sourceNBInfo.setSource();
+ reached.add(source);
+ stressDependency.put(source, Long.valueOf(0));
+
+ // Use BFS to find shortest paths from source to all nodes in
the
+ // network
+ while (!reached.isEmpty()) {
+ final Node current = reached.removeFirst();
+ done_nodes.addFirst(current);
+ final NodeBetweenInfo currentNBInfo =
nodeBetweenness.get(current);
+ final Set<Node> neighbors = getNeighbors(current);
+ for (Node neighbor : neighbors) {
+ final NodeBetweenInfo neighborNBInfo =
nodeBetweenness.get(neighbor);
+ final List<Edge> edges =
CyNetworkUtils.getConnEdge(network, current, neighbor);
+ final int expectSPLength =
currentNBInfo.getSPLength() + 1;
+ if (neighborNBInfo.getSPLength() < 0) {
+ // Neighbor traversed for the first time
+ reached.add(neighbor);
+
neighborNBInfo.setSPLength(expectSPLength);
+ stressDependency.put(neighbor,
Long.valueOf(0));
+ }
+ // shortest path via current to neighbor found
+ if (neighborNBInfo.getSPLength() ==
expectSPLength) {
+
neighborNBInfo.addSPCount(currentNBInfo.getSPCount());
+ // check for long overflow
+ if (neighborNBInfo.getSPCount() < 0) {
+ computeNB = false;
+ }
+ // add predecessors and outgoing edges,
needed for
+ // accumulation of betweenness scores
+ neighborNBInfo.addPredecessor(current);
+ for (final Edge edge : edges) {
+ currentNBInfo.addOutedge(edge);
+ }
+ }
+ // initialize edge dependency
+ for (final Edge edge : edges) {
+ if (!edgeDependency.containsKey(edge)) {
+ edgeDependency.put(edge, new
Double(0.0));
+ }
+ }
+ }
+ }
+
+ // Return nodes in order of non-increasing distance from source
+ while (!done_nodes.isEmpty()) {
+ final Node current = done_nodes.removeFirst();
+ final NodeBetweenInfo currentNBInfo =
nodeBetweenness.get(current);
+ if (currentNBInfo != null) {
+ final long currentStress =
stressDependency.get(current).longValue();
+ while (!currentNBInfo.isEmptyPredecessors()) {
+ final Node predecessor =
currentNBInfo.pullPredecessor();
+ final NodeBetweenInfo predecessorNBInfo
= nodeBetweenness.get(predecessor);
+ predecessorNBInfo.addDependency((1.0 +
currentNBInfo.getDependency())
+ * ((double)
predecessorNBInfo.getSPCount() / (double) currentNBInfo
+
.getSPCount()));
+ // accumulate all sp count
+ final long oldStress =
stressDependency.get(predecessor).longValue();
+ stressDependency.put(predecessor, new
Long(oldStress + 1 + currentStress));
+ // accumulate edge betweenness
+ final List<Edge> edges =
CyNetworkUtils.getConnEdge(network, predecessor,
+ current);
+ if (edges.size() != 0) {
+ final Edge compEdge =
edges.get(0);
+ final LinkedList<Edge>
currentedges = currentNBInfo.getOutEdges();
+ double oldbetweenness = 0.0;
+ double newbetweenness = 0.0;
+ for (final Edge edge : edges) {
+ if
(edgeBetweenness.containsKey(edge)) {
+ oldbetweenness
= edgeBetweenness.get(edge).doubleValue();
+ break;
+ }
+ }
+ // if the node is a leaf node
in this search tree
+ if (currentedges.size() == 0) {
+ newbetweenness =
(double) predecessorNBInfo.getSPCount()
+ /
(double) currentNBInfo.getSPCount();
+ } else {
+ double neighbourbetw =
0.0;
+ for (Edge neighbouredge
: currentedges) {
+ if
(!edges.contains(neighbouredge)) {
+
neighbourbetw += edgeDependency.get(neighbouredge)
+
.doubleValue();
+ }
+ }
+ newbetweenness = (1 +
neighbourbetw)
+ *
((double) predecessorNBInfo.getSPCount() / (double) currentNBInfo
+
.getSPCount());
+ }
+ edgeDependency.put(compEdge,
new Double(newbetweenness));
+ for (final Edge edge : edges) {
+
edgeBetweenness.put(edge, new Double(newbetweenness + oldbetweenness));
+ }
+ }
+ }
+ // accumulate node betweenness in each run
+ if (!current.equals(source)) {
+
currentNBInfo.addBetweenness(currentNBInfo.getDependency());
+ // accumulate number of shortest paths
+ final long allSpPaths =
stress.get(current).longValue();
+ stress.put(current, new Long(allSpPaths
+ currentNBInfo.getSPCount()
+ * currentStress));
+ }
+ }
+ }
}
/**
- * Sets the corresponding batch analysis dialog.
+ * Computes a normalization factor for node betweenness normalization.
*
- * @param aDialog
- * Dialog of the batch analysis.
+ * @param count
+ * Number of nodes for which betweenness has been computed.
+ * @return Normalization factor for node betweenness normalization.
*/
- public void setDialog(BatchAnalysisDialog aDialog) {
- dialog = aDialog;
+ protected double computeNormFactor(int count) {
+ return (count > 2) ? (1.0 / ((count - 1) * (count - 2))) : 1.0;
}
/**
- * Creates an integer identifier of the network interpretation, where 1
means directed network and 0 - undirected.
+ * Computes the average number of neighbors of the nodes in a given
node set.
*
- * @param aInterp
- * Network Interpretation.
- * @return A String of the integer identifier of the network
interpretation.
+ * @param aNodes
+ * Non-empty set of nodes. Specifying <code>null</code> or
an empty set for this
+ * parameter results in throwing an exception.
+ * @return Average number of neighbors of the nodes in
<code>aNodes</code>.
*/
- private static String createID(NetworkInterpretation aInterp) {
- String newName = "";
- final boolean flag1 = aInterp.isDirected();
- final boolean flag2 = aInterp.isIgnoreUSL();
- final boolean flag3 = aInterp.isPaired();
- if (flag1) {
- newName += "-d";
- if (flag2) {
- newName += "-isl";
- }
- } else {
- newName += "-u";
- if (flag3) {
- newName += "-cpe";
- }
+ private double averageNeighbors(Set<Node> aNodes) {
+ int neighbors = 0;
+ for (final Node node : aNodes) {
+ neighbors += getNeighbors(node).size();
}
- return newName;
+ return (double) neighbors / aNodes.size();
}
/**
- * Gets the network interpretations from the network inspection.
+ * Counts the number of neighbors of the given node that occur in the
given set of nodes.
*
- * @param inspection
- * Network inspection.
- * @return Array with different network interpretations.
+ * @param aSet
+ * Set of nodes to be searched in.
+ * @param aNode
+ * Node whose neighbors will be searched in
<code>aSet</code>.
+ * @return Number of nodes in <code>aSet</code> that are neighbors of
<code>aNode</code>.
*/
- private static NetworkInterpretation[]
getInterpretations(NetworkInspection inspection) {
- return NetworkStatus.getStatus(inspection).getInterpretations();
+ private int countNeighborsIn(Set<Node> aSet, Node aNode) {
+ Set<Node> nbs = CyNetworkUtils.getNeighbors(network, aNode,
getIncidentEdges(aNode));
+ nbs.retainAll(aSet);
+ return nbs.size();
}
/**
- * Fixed integer estimating the analysis time of a single network.
- */
- private static final int PROGRESS_PER_NET = 12;
-
- /**
- * Unloads the network from Cytoscape and writes a message in the batch
analysis dialog.
+ * Computes the topological coefficient of the given node.
*
- * @param inputFile
- * File from which the network was loaded.
- * @param network
- * Network to be unloaded.
+ * @param aNode
+ * Node to get the topological coefficient of.
+ * @param aNeighbors
+ * Set of all the neighbors of the given node.
+ * @return Topological coefficient of the <code>aNode</code> as a
number in the range [0, 1];
+ * <code>NaN</code> if the topological coefficient function is
not defined for the given
+ * node.
*/
- private void unloadNetwork(final File inputFile, CyNetwork network) {
- // Unload the network
- write(Messages.SM_UNLOADING + inputFile.getName() + " ... ");
- try {
- Cytoscape.destroyNetwork(network, true);
- } catch (Exception ex) {
- // Network already removed (by another plugin); ignore
+ private double computeTC(Node aNode, Set<Node> aNeighbors) {
+ Set<Node> comNeNodes = new HashSet<Node>(); // nodes that share
common
+ // neighbor with aNode
+ int tc = 0;
+ for (final Node nb : aNeighbors) {
+ Set<Node> currentComNeNodes = getNeighbors(nb);
+ for (final Node n : currentComNeNodes) {
+ if (n != aNode) {
+ tc++;
+ if (comNeNodes.add(n)) {
+ if (aNeighbors.contains(n)) {
+ tc++;
+ }
+ }
+ }
+ }
}
- writeLine(Messages.SM_DONE + "\n");
+ return (double) tc / (double) (comNeNodes.size() *
aNeighbors.size());
}
/**
- * Writes a message to the user in the batch analysis dialog.
+ * Gets all the neighbors of the given node.
*
- * @param aMessage
- * Message to be written (showed) to the user.
+ * @param aNode
+ * Node, whose neighbors are to be found.
+ * @return <code>Set</code> of <code>Node</code> instances, containing
all the neighbors of
+ * <code>aNode</code>; empty set if the node specified is an
isolated vertex.
+ * @see CyNetworkUtils#getNeighbors(CyNetwork, Node, int[])
*/
- private void write(String aMessage) {
- if (dialog != null) {
- dialog.write(aMessage);
- }
+ private Set<Node> getNeighbors(Node aNode) {
+ return CyNetworkUtils.getNeighbors(network, aNode,
getIncidentEdges(aNode));
}
/**
- * Writes a message, followed by a new line, to the user in the batch
analysis dialog.
+ * Gets the degree of a given node.
*
- * @param aMessage
- * Message to be written to the user.
+ * @param aNode
+ * Node to get the degree of.
+ * @param aIncEdges
+ * Array of the indices of all edges incident on the given
node.
+ * @return Degree of the given node, as defined in the book &qout;Graph
Theory&qout; by Reinhard
+ * Diestel.
*/
- private void writeLine(String aMessage) {
- write(aMessage + "\n");
+ private int getDegree(Node aNode, int[] aIncEdges) {
+ int degree = aIncEdges.length;
+ for (int i = 0; i < aIncEdges.length; ++i) {
+ Edge e = network.getEdge(aIncEdges[i]);
+ if (e.getSource() == e.getTarget() && (!(e.isDirected()
&& interpr.isPaired()))) {
+ degree++;
+ }
+ }
+ return degree;
}
/**
- * Writes an error message, followed by a new line, to the user in the
batch analysis dialog.
+ * Gets all edges incident on the given node.
*
- * @param aMessage
- * An error message to be written to the user.
+ * @param aNode
+ * Node, on which incident edges are to be found.
+ * @return Array of edge indices, containing all the edges in the
network incident on
+ * <code>aNode</code>.
*/
- private void writeError(String aMessage) {
- if (dialog != null) {
- dialog.write(aMessage + "\n");
- }
+ private int[] getIncidentEdges(Node aNode) {
+ int ni = aNode.getRootGraphIndex();
+ return network.getAdjacentEdgeIndicesArray(ni, true,
!interpr.isPaired(), true);
}
/**
- * Instance of <code>UndirNetowkrAnalyzer</code> or
<code>DirNetowkrAnalyzer</code> responsible for the network
- * parameters computation.
+ * Histogram of shortest path lengths.
+ * <p>
+ * <code>sPathLength[0]</code> stores the number of nodes processed so
far.<br/>
+ * <code>sPathLength[i]</code> for <code>i > 0</code> stores the
number of shortest paths of
+ * length <code>i</code> found so far.
+ * </p>
*/
- private NetworkAnalyzer analyzer;
+ private long[] sPathLengths;
/**
- * Flag indicating if the analysis has been canceled.
+ * Flag, indicating if the computed parameters must be stored as node
attributes.
*/
- private boolean cancelled;
+ private boolean useNodeAttributes;
/**
- * Dialog showing the progress of the batch analysis.
+ * Flag, indicating if the computed parameters must be stored as edge
attributes.
*/
- private BatchAnalysisDialog dialog;
+ private boolean useEdgeAttributes;
/**
- * Progress of the batch analysis.
+ * Histogram of pairs of nodes that share common neighbors. The i-th
element of this array
+ * accumulates the number of node pairs that share i neighbors.
*/
- private int progress;
+ private long[] sharedNeighborsHist;
/**
- * Array with the names of the input (inOutDir[0]) and output directory
(inOutDir[1]).
+ * Round doubles in attributes to <code>roundingDigits</code> decimals
after the point.
*/
- private File outputDir;
+ private int roundingDigits;
/**
- * List of input files, that can be loaded in Cytoscape and analyzed by
NetworkAnalyzer.
+ * Set of visited nodes.
+ * <p>
+ * This set is used exclusively by the method {...@link
#computeSPandSN(Node)}.
+ * </p>
*/
- private List<File> inputFiles;
+ private final Set<Node> visited;
/**
- * Interpretations to applied for every loaded network.
+ * Flag indicating if node(edge) betweenness and stress should be
computed. It is set to false if the
+ * number of shortest paths exceeds the maximum long value.
*/
- private Interpretations interpretations;
+ private boolean computeNB;
/**
- * List of reports describing the success or failure of the analysis of
each network.
+ * Map of all nodes with their respective node betweenness information,
which stores information
+ * needed for the node betweenness calculation.
*/
- private List<NetworkAnalysisReport> reports;
+ private Map<Node, NodeBetweenInfo> nodeBetweenness;
/**
- * Scaling factor needed for showing the analysis progress.
+ * Map of all nodes with their respective edge betweenness.
*/
- private double scale;
+ private Map<Edge, Double> edgeBetweenness;
/**
- * Flag indicating if parameters are computed by
<code>NetworkAnalyzer</code>.
+ * Map of all nodes with their respective stress, i.e. number of
shortest paths passing through
+ * a node.
*/
- private boolean analyzing;
+ private Map<Node, Long> stress;
- /**
- * Progress of <code>NetworkAnalyzer</code> analysis for a single
network interpretation.
- */
- private double subProgress;
+ private int nodeCount;
}
--
You received this message because you are subscribed to the Google Groups
"cytoscape-cvs" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/cytoscape-cvs?hl=en.