Author: kono
Date: 2012-05-31 07:06:32 -0700 (Thu, 31 May 2012)
New Revision: 29408
Modified:
csplugins/trunk/ucsd/kono/PSIQUICUniversalClient/src/org/cytoscape/webservice/psicquic/mapper/Mitab25Mapper.java
Log:
Score mapping problem had been fixed.
Modified:
csplugins/trunk/ucsd/kono/PSIQUICUniversalClient/src/org/cytoscape/webservice/psicquic/mapper/Mitab25Mapper.java
===================================================================
---
csplugins/trunk/ucsd/kono/PSIQUICUniversalClient/src/org/cytoscape/webservice/psicquic/mapper/Mitab25Mapper.java
2012-05-31 12:20:22 UTC (rev 29407)
+++
csplugins/trunk/ucsd/kono/PSIQUICUniversalClient/src/org/cytoscape/webservice/psicquic/mapper/Mitab25Mapper.java
2012-05-31 14:06:32 UTC (rev 29408)
@@ -17,8 +17,6 @@
/**
* Map minimal set of information from MITAB25.
*
- * @author kono
- *
*/
public class Mitab25Mapper {
@@ -26,8 +24,12 @@
// Separator for multiple entries.
private static final String SEPARATOR = "\\|";
- private static final String ATTR_PREFIX = "PSI-MI-25.";
+ private static final String TAB = "\t";
+ private static final String DB_ID = "\\:";
+ private static final String DESCRIPTION = "(\\S?)";
+ private static final String ATTR_PREFIX = "PSIMI25.";
+ // PSIMI25 specification contains 15 columns.
private static final int COLUMN_COUNT = 15;
final Set<CyNode> nodes;
@@ -36,30 +38,32 @@
final CyAttributes nodeAttr;
final CyAttributes edgeAttr;
final CyAttributes networkAttr;
-
+
+ private static final String INTERACTION = "interaction";
+
// Reg.Ex for parsing entry
private final static Pattern miPttr = Pattern.compile("MI:\\d{4}");
private final static Pattern miNamePttr = Pattern.compile("\\(.+\\)");
+
+ private final static Pattern lineSplitter = Pattern.compile(TAB);
+ private final static Pattern entrySplitter = Pattern.compile(SEPARATOR);
+ private final static Pattern dbSplitter = Pattern.compile(DB_ID);
+ private final static Pattern descriptionSpliter =
Pattern.compile("\\(");
- private static final String TAB = "\t";
- private static final String INTERACTION = "interaction";
-
// Attr Names
- private static final String DETECTION_METHOD = ATTR_PREFIX
- + "interaction detection method";
- private static final String INTERACTION_TYPE = ATTR_PREFIX
- + "interaction type";
+ private static final String DETECTION_METHOD = ATTR_PREFIX +
"interaction detection method";
+ private static final String INTERACTION_TYPE = ATTR_PREFIX +
"interaction type";
private static final String SOURCE_DB = ATTR_PREFIX + "source database";
private static final String INTERACTION_ID = ATTR_PREFIX + "Interaction
ID";
- private static final String EDGE_SCORE = ATTR_PREFIX + "confidence
score";
+ private static final String EDGE_SCORE = ATTR_PREFIX + "Confidence
Score";
// Stable IDs which maybe used for mapping later
private static final String UNIPROT = "uniprotkb";
private static final String ENTREZ_GENE = "entrezgene/locuslink";
private static final String ENTREZ_GENE_SYN = "entrez gene/locuslink";
-
+
private static final String CHEBI = "chebi";
-
+
private static final String INTERACTOR_TYPE = ATTR_PREFIX + "interactor
type";
private static final String COMPOUND = "compound";
@@ -75,41 +79,33 @@
networkAttr = Cytoscape.getNetworkAttributes();
}
- public CyNetwork map(String mitab, String networkName,
- CyNetwork parentNetwork) {
+ public CyNetwork map(String mitab, String networkName, CyNetwork
parentNetwork) {
// Read the long string of MITAB
String[] lines = mitab.split("\n");
-
parse(lines);
+ lines = null;
// Create top attribues for important keys
List<String> currentAttr;
for (CyNode node : nodes) {
- currentAttr =
nodeAttr.getListAttribute(node.getIdentifier(),
- ATTR_PREFIX + UNIPROT);
+ currentAttr =
nodeAttr.getListAttribute(node.getIdentifier(), ATTR_PREFIX + UNIPROT);
if (currentAttr != null && currentAttr.size() != 0) {
- nodeAttr.setAttribute(node.getIdentifier(),
ATTR_PREFIX
- + UNIPROT + ".top",
currentAttr.get(0));
+ nodeAttr.setAttribute(node.getIdentifier(),
ATTR_PREFIX + UNIPROT + ".top", currentAttr.get(0));
}
- currentAttr =
nodeAttr.getListAttribute(node.getIdentifier(),
- ATTR_PREFIX + ENTREZ_GENE);
+ currentAttr =
nodeAttr.getListAttribute(node.getIdentifier(), ATTR_PREFIX + ENTREZ_GENE);
if (currentAttr != null && currentAttr.size() != 0) {
- nodeAttr.setAttribute(node.getIdentifier(),
ATTR_PREFIX
- + ENTREZ_GENE + ".top",
currentAttr.get(0));
+ nodeAttr.setAttribute(node.getIdentifier(),
ATTR_PREFIX + ENTREZ_GENE + ".top", currentAttr.get(0));
}
- currentAttr =
nodeAttr.getListAttribute(node.getIdentifier(),
- ATTR_PREFIX + ENTREZ_GENE_SYN);
+ currentAttr =
nodeAttr.getListAttribute(node.getIdentifier(), ATTR_PREFIX + ENTREZ_GENE_SYN);
if (currentAttr != null && currentAttr.size() != 0) {
- nodeAttr.setAttribute(node.getIdentifier(),
ATTR_PREFIX
- + ENTREZ_GENE + ".top",
currentAttr.get(0));
+ nodeAttr.setAttribute(node.getIdentifier(),
ATTR_PREFIX + ENTREZ_GENE + ".top", currentAttr.get(0));
}
}
if (edges.size() != 0) {
- final CyNetwork network =
Cytoscape.createNetwork(nodes, edges,
- networkName, parentNetwork);
+ final CyNetwork network =
Cytoscape.createNetwork(nodes, edges, networkName, parentNetwork);
nodes.clear();
edges.clear();
@@ -120,100 +116,79 @@
}
private void parse(final String[] lines) {
- String[] entry;
- String[] sourceID;
- String[] targetID;
-
- String[] detectionMethods;
- CyNode source;
- CyNode target;
- CyEdge e;
-
- String[] sourceDB;
- String[] interactionID;
- String[] interactionType;
-
- String[] edgeScore;
-
- for (String line : lines) {
+ for (final String line : lines) {
try {
- entry = line.split(TAB);
+ parseLine(line);
+ } catch (Exception ex) {
+ logger.warn("Failed parse line: " + line, ex);
+ continue;
+ }
+ }
+ }
- // Validate entry list.
- if (entry == null || entry.length <
COLUMN_COUNT)
- continue;
+ private final void parseLine(final String line) throws Exception {
+ final String[] entry = lineSplitter.split(line);
- sourceID = entry[0].split(SEPARATOR);
- targetID = entry[1].split(SEPARATOR);
+ // Validate entry list.
+ if (entry == null || entry.length < COLUMN_COUNT)
+ return;
- source = Cytoscape.getCyNode(sourceID[0], true);
- target = Cytoscape.getCyNode(targetID[0], true);
- nodes.add(source);
- nodes.add(target);
-
- // Set type if not protein
- if(source.getIdentifier().contains(CHEBI))
-
nodeAttr.setAttribute(source.getIdentifier(), INTERACTOR_TYPE, COMPOUND);
- if(target.getIdentifier().contains(CHEBI))
-
nodeAttr.setAttribute(target.getIdentifier(), INTERACTOR_TYPE, COMPOUND);
+ // Create nodes
+ final String[] sourceID = entrySplitter.split(entry[0]);
+ final String[] targetID = entrySplitter.split(entry[1]);
- // Aliases
- setAliases(nodeAttr, source.getIdentifier(),
- entry[0].split(SEPARATOR));
- setAliases(nodeAttr, target.getIdentifier(),
- entry[1].split(SEPARATOR));
- setAliases(nodeAttr, source.getIdentifier(),
- entry[2].split(SEPARATOR));
- setAliases(nodeAttr, target.getIdentifier(),
- entry[3].split(SEPARATOR));
- setAliases(nodeAttr, source.getIdentifier(),
- entry[4].split(SEPARATOR));
- setAliases(nodeAttr, target.getIdentifier(),
- entry[5].split(SEPARATOR));
+ String[] keyIDPparts = dbSplitter.split(sourceID[0]);
+ final String firstSourceID = keyIDPparts[1];
+ final String firstSourceDB = keyIDPparts[0];
+
+ keyIDPparts = dbSplitter.split(targetID[0]);
+ final String firstTargetID = keyIDPparts[1];
+ final String firstTargetDB = keyIDPparts[0];
+
+ final CyNode source =
Cytoscape.getCyNode(firstSourceID, true);
+ final CyNode target =
Cytoscape.getCyNode(firstTargetID, true);
+ nodeAttr.setAttribute(source.getIdentifier(),
ATTR_PREFIX + "primaryKey." + firstSourceDB, firstSourceID);
+ nodeAttr.setAttribute(target.getIdentifier(),
ATTR_PREFIX + "primaryKey." + firstTargetDB, firstTargetID);
+ nodes.add(source);
+ nodes.add(target);
- // Tax ID (pick first one only)
- setTaxID(nodeAttr, source.getIdentifier(),
- entry[9].split(SEPARATOR)[0]);
- setTaxID(nodeAttr, target.getIdentifier(),
- entry[10].split(SEPARATOR)[0]);
+ // Set type if not protein
+ if (source.getIdentifier().contains(CHEBI))
+ nodeAttr.setAttribute(source.getIdentifier(),
INTERACTOR_TYPE, COMPOUND);
+ if (target.getIdentifier().contains(CHEBI))
+ nodeAttr.setAttribute(target.getIdentifier(),
INTERACTOR_TYPE, COMPOUND);
- sourceDB = entry[12].split(SEPARATOR);
- interactionID = entry[13].split(SEPARATOR);
+ // Aliases
+ setAliases(nodeAttr, source.getIdentifier(), sourceID);
+ setAliases(nodeAttr, target.getIdentifier(), targetID);
+ setAliases(nodeAttr, source.getIdentifier(),
entrySplitter.split(entry[2]));
+ setAliases(nodeAttr, target.getIdentifier(),
entrySplitter.split(entry[3]));
+ setAliases(nodeAttr, source.getIdentifier(),
entrySplitter.split(entry[4]));
+ setAliases(nodeAttr, target.getIdentifier(),
entrySplitter.split(entry[5]));
- edgeScore = entry[14].split(SEPARATOR);
+ // Tax ID (pick first one only)
+ setTaxID(nodeAttr, source.getIdentifier(),
entrySplitter.split(entry[9])[0]);
+ setTaxID(nodeAttr, target.getIdentifier(),
entrySplitter.split(entry[10])[0]);
- detectionMethods = entry[6].split(SEPARATOR);
- interactionType = entry[11].split(SEPARATOR);
- e = Cytoscape.getCyEdge(source, target,
INTERACTION,
- interactionID[0], true);
- edges.add(e);
+ final String[] sourceDB =
entrySplitter.split(entry[12]);
+ final String[] interactionID =
entrySplitter.split(entry[13]);
- setEdgeListAttribute(edgeAttr,
e.getIdentifier(),
- interactionType,
INTERACTION_TYPE);
- setEdgeListAttribute(edgeAttr,
e.getIdentifier(),
- detectionMethods,
DETECTION_METHOD);
- setEdgeListAttribute(edgeAttr,
e.getIdentifier(), sourceDB,
- SOURCE_DB);
-
- // Map scores
- setEdgeScoreListAttribute(edgeAttr,
e.getIdentifier(), edgeScore,
- EDGE_SCORE);
+ final String[] edgeScores =
entrySplitter.split(entry[14]);
- edgeAttr.setAttribute(e.getIdentifier(),
INTERACTION_ID,
- interactionID[0]);
+ final String[] detectionMethods =
entrySplitter.split(entry[6]);
+ final String[] interactionType =
entrySplitter.split(entry[11]);
+ final CyEdge e = Cytoscape.getCyEdge(source, target,
INTERACTION, interactionID[0], true);
+ edges.add(e);
- setPublication(edgeAttr, e.getIdentifier(),
- entry[8].split(SEPARATOR),
entry[7].split(SEPARATOR));
- } catch (Exception ex) {
- logger.warn("Invalid entry line found: " +
line, ex);
- continue;
- }
+ setEdgeListAttribute(edgeAttr, e.getIdentifier(),
interactionType, INTERACTION_TYPE);
+ setEdgeListAttribute(edgeAttr, e.getIdentifier(),
detectionMethods, DETECTION_METHOD);
+ setEdgeListAttribute(edgeAttr, e.getIdentifier(),
sourceDB, SOURCE_DB);
- }
- }
+ // Map scores
+ setEdgeScoreListAttribute(edgeAttr, e.getIdentifier(),
edgeScores, EDGE_SCORE);
- private void setNetworkAttr(final CyNetwork net) {
-
+ edgeAttr.setAttribute(e.getIdentifier(),
INTERACTION_ID, interactionID[0]);
+ setPublication(edgeAttr, e.getIdentifier(),
entrySplitter.split(entry[8]), entrySplitter.split(entry[7]));
}
private void setTaxID(CyAttributes attr, String id, String value) {
@@ -227,16 +202,14 @@
if (matcher.find()) {
taxonName = matcher.group();
attr.setAttribute(id, attrName,
buf[1].split("\\(")[0]);
- attr.setAttribute(id, attrName + ".name",
- taxonName.substring(1,
taxonName.length() - 1));
+ attr.setAttribute(id, attrName + ".name",
taxonName.substring(1, taxonName.length() - 1));
} else {
attr.setAttribute(id, attrName, buf[1]);
}
}
}
- private void setPublication(CyAttributes attr, String id, String[]
pubID,
- String[] authors) {
+ private void setPublication(CyAttributes attr, String id, String[]
pubID, String[] authors) {
String key = null;
String[] temp;
@@ -271,8 +244,7 @@
}
}
- private void setEdgeListAttribute(CyAttributes attr, String id,
- String[] entry, String key) {
+ private void setEdgeListAttribute(CyAttributes attr, String id,
String[] entry, String key) {
String value;
String name;
@@ -285,38 +257,36 @@
listAttrMapper(attr, key + ".name", id, name);
}
}
-
- // Special case for edge scores
- private void setEdgeScoreListAttribute(CyAttributes attr, String id,
- String[] entry, String key) {
- String scoreString;
- String scoreType;
- for (String val : entry) {
- final String[] parts = val.split(":");
- if(parts == null || parts.length != 2)
+
+ /**
+ * Create edge score attribute as Double
+ * @param attr
+ * @param id
+ * @param scores
+ * @param prefix
+ */
+ private void setEdgeScoreListAttribute(CyAttributes attr, String id,
String[] scores, String prefix) {
+ for (final String scoreFullString : scores) {
+ final String[] parts =
dbSplitter.split(scoreFullString);
+ if (parts == null || parts.length != 2)
continue;
-
- scoreString = parts[1];
- scoreType = parts[0];
-
+
+ final String scoreRaw = parts[1];
+ final String scoreString =
descriptionSpliter.split(scoreRaw)[0];
+ final String db = parts[0];
+
try {
- final Double score =
Double.parseDouble(scoreString);
- edgeAttr.setAttribute(id, key + "." +
scoreType, score);
+ final double score =
Double.parseDouble(scoreString);
+ edgeAttr.setAttribute(id, prefix + "." + db,
score);
} catch (Exception e) {
- if(scoreString != null &&
scoreString.trim().equals("") == false)
- edgeAttr.setAttribute(id, key + "." +
scoreType, scoreString);
-
continue;
}
}
}
-
-
- private void listAttrMapper(CyAttributes attr, String attrName, String
id,
- String value) {
+ private void listAttrMapper(CyAttributes attr, String attrName, String
id, String value) {
List currentAttr;
currentAttr = attr.getListAttribute(id, attrName);
--
You received this message because you are subscribed to the Google Groups
"cytoscape-cvs" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/cytoscape-cvs?hl=en.