Author: kono
Date: 2012-06-04 12:03:10 -0700 (Mon, 04 Jun 2012)
New Revision: 29440
Added:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABEntry.java
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine25.java
Removed:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
Modified:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabParser.java
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabReader.java
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
Log:
Minor updated to MITAB readers. Full import option should be implemented.
Added:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABEntry.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABEntry.java
(rev 0)
+++
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABEntry.java
2012-06-04 19:03:10 UTC (rev 29440)
@@ -0,0 +1,11 @@
+package org.cytoscape.psi_mi.internal.plugin;
+
+
+/**
+ * Represents a MITAB entry
+ *
+ */
+public final class MITABEntry {
+
+
+}
Property changes on:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABEntry.java
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Deleted:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
2012-06-04 19:00:30 UTC (rev 29439)
+++
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java
2012-06-04 19:03:10 UTC (rev 29440)
@@ -1,309 +0,0 @@
-package org.cytoscape.psi_mi.internal.plugin;
-
-import java.util.List;
-import java.util.ArrayList;
-
-// There are 15 tab separated columns. Columns can be further separated by
'|' and sub-columns
-// can be further separated by ':'
-// These are the columns
-//
-// 0 srcDB:sourceRawId|srcDB:srcAlias
-// 1 tgtDB:targetRawId|tgtDB:tgtAlias
-// 2 srcDB:srcAlias|srcDB:srcAlias
-// 3 tgtDB:tgtAlias|tgtDB:tgtAlias
-// 4 srcDB:srcAlias|srcDB:srcAlias
-// 5 tgtDB:tgtAlias|tgtDB:tgtAlias
-// 6 detectionDB:detectionMethod|detectionDB:detectionMethod
-// 7 authors|authors
-// 8 publicationIDKey:publicationIDValue|publicationIDKey:publicationIDValue
-// 9 srcTaxonDB:srcTaxonName|srcTaxonDB:srcTaxonName
-// 10 tgtTaxonDB:tgtTaxonName|tgtTaxonDB:tgtTaxonName
-// 11 interactionType|interactionType
-// 12 sourceDB|sourceDB
-// 13 interactionID|XXXX
-// 14 edgeScoreType:edgeScoreString|edgeScoreType:edgeScoreString
-// subsequent optional columns are ignored!
-//
-// For a better description see:
http://wiki.reactome.org/index.php/PSI-MITAB_interactions
-//
-
-
-/**
- * This class quickly reads a single line of PSI-MI Tab delimited format into
a data
- * structure for easy processing.
- */
-public class MITABLine {
-
- final char COLON = ':';
- final char PIPE = '|';
- final char TAB = ' ';
- final char QUOTE = '"';
-
- String sourceRawID = "";
- String targetRawID = "";
-
- List<String> srcAliases = new ArrayList<String>(10);
- List<String> srcDBs = new ArrayList<String>(10);
-
- List<String> tgtAliases = new ArrayList<String>(10);
- List<String> tgtDBs = new ArrayList<String>(10);
-
- List<String> authors = new ArrayList<String>(5);
-
- List<String> detectionMethods = new ArrayList<String>(5);
- List<String> detectionDBs = new ArrayList<String>(5);
-
- List<String> publicationValues = new ArrayList<String>(5);
- List<String> publicationDBs = new ArrayList<String>(5);
-
- List<String> srcTaxonDBs = new ArrayList<String>(5);
- List<String> srcTaxonIDs = new ArrayList<String>(5);
-
- List<String> tgtTaxonDBs = new ArrayList<String>(5);
- List<String> tgtTaxonIDs = new ArrayList<String>(5);
-
- List<String> sourceIDs = new ArrayList<String>(5);
- List<String> sourceDBs = new ArrayList<String>(5);
-
- List<String> interactionTypes = new ArrayList<String>(5);
- List<String> interactionTypeDBs = new ArrayList<String>(5);
-
- List<String> edgeScoreTypes = new ArrayList<String>(5);
- List<String> edgeScoreStrings = new ArrayList<String>(5);
-
- List<String> interactionIDs = new ArrayList<String>(5);
- List<String> interactionDBs = new ArrayList<String>(5);
-
- private int colon = 0;
- private int tab = 0;
- private int pipe = 0;
- private int begin = 0;
- private int end = 0;
-
- private void init() {
- sourceRawID = "";
- targetRawID = "";
- colon = 0;
- tab = 0;
- pipe = 0;
- begin = 0;
- end = 0;
- srcAliases.clear();
- tgtAliases.clear();
- authors.clear();
- detectionMethods.clear();
- detectionDBs.clear();
- publicationDBs.clear();
- publicationValues.clear();
- srcTaxonIDs.clear();
- srcTaxonDBs.clear();
- tgtTaxonIDs.clear();
- tgtTaxonDBs.clear();
- sourceIDs.clear();
- sourceDBs.clear();
- interactionTypes.clear();
- interactionTypeDBs.clear();
- edgeScoreTypes.clear();
- edgeScoreStrings.clear();
- interactionIDs.clear();
- interactionDBs.clear();
- }
-
-
- public void readLine(String line) {
- init();
-
- // column 0
- // get first source DB
- srcDBs.add(nextString(line));
-
- // get sourceRawID
- sourceRawID = nextString(line);
- srcAliases.add(sourceRawID);
-
- // get any additional source aliases from col 0
- addNextPairs("additional src aliases", srcDBs, srcAliases, line
);
-
- // column 1
- // get first target db
- tgtDBs.add(nextString(line));
-
- // get targetRawID
- targetRawID = nextString(line);
- tgtAliases.add(targetRawID);
-
- // get any additional target aliases from col 1
- addNextPairs("additional tgt aliases", tgtDBs, tgtAliases,
line);
-
- // column 2
- // get any additional source aliases
- addNextPairs("col 2 src", srcDBs, srcAliases, line );
-
- // column 3
- // get any additional target aliases
- addNextPairs("col 3 tgt", tgtDBs, tgtAliases, line);
-
- // column 4
- // get any additional source aliases
- addNextPairs("col 4 src", srcDBs, srcAliases, line );
-
- // column 5
- // get any additional target aliases
- addNextPairs("col 5 tgt", tgtDBs, tgtAliases, line);
-
- // column 6
- // get any detection methods
- addNextPairs("detection", detectionDBs, detectionMethods, line);
-
- // column 7
- // get any authors
- addNextValues("authors",authors,line);
-
- // column 8
- // get any additional publications
- addNextPairs("publications", publicationDBs, publicationValues,
line);
-
- // column 9
- // get source taxon
- addNextPairs("src taxon", srcTaxonDBs, srcTaxonIDs, line);
-
- // column 10
- // get target taxon
- addNextPairs("tgt taxon", tgtTaxonDBs, tgtTaxonIDs, line);
-
- // column 11
- // get any interaction types
- addNextPairs("interaction", interactionTypeDBs,
interactionTypes, line);
-
- // column 12
- // get any source databases
- addNextPairs("source", sourceDBs,sourceIDs,line);
-
- // column 13
- // get interaction ID
- addNextPairs("interaction IDs", interactionDBs, interactionIDs,
line );
-
- // column 14
- // get edge scores
- addNextPairs("edge scores", edgeScoreTypes, edgeScoreStrings,
line);
- }
-
- // just for debugging
- public void print() {
- System.out.println("sourceRawID: " + sourceRawID);
- System.out.println("targetRawID: " + targetRawID);
- printList("srcAliases", srcAliases);
- printList("tgtAliases", tgtAliases);
- printList("detectionDBs", detectionDBs);
- printList("detectionMethods", detectionMethods);
- printList("authors", authors);
- printList("publicationDBs", publicationDBs);
- printList("publicationValues", publicationValues);
- printList("sourceDBs", sourceDBs);
- printList("sourceIDs", sourceIDs);
- printList("interactionTypes", interactionTypes);
- printList("interactionTypeDBs", interactionTypeDBs);
- printList("interactionIDs", interactionIDs);
- printList("interactionDBs", interactionDBs);
- printList("edgeScoreTypes", edgeScoreTypes);
- printList("edgeScoreStrings", edgeScoreStrings);
- System.out.println();
- System.out.println();
- }
-
- // just for debugging
- public void printList(String name, List<String> vals) {
- System.out.print(name + ": ");
- for ( String s : vals )
- System.out.print("'" + s + "', ");
- System.out.println();
- }
-
- private String nextString(String line) {
- end = nextIndex(line,begin);
- if ( (begin > end) || (begin > line.length() - 1))
- return "";
-
- String ret = line.substring(begin,end);
-
- // This is an attempt to handle quoted strings, which may
- // include our tokenizing characters! Basically, if
- // we see a quote, make sure we get a close quote too!
- int openQuote = ret.indexOf(QUOTE);
- if ( openQuote >= 0 ) {
- int closeQuote = ret.indexOf(QUOTE,openQuote+1);
- if ( closeQuote < 0 ) {
- end = nextIndex(line,end+1);
- ret = line.substring(begin,end);
- }
- }
-
- begin = end+1;
- return ret;
- }
-
- private void addNextValues(String desc,List<String> values, String
line) {
- do {
- authors.add(nextString(line));
- } while ( end != tab );
- }
-
- private void addNextPairs(String desc, List<String> dbs, List<String>
values, String line) {
-
- //System.out.println("starting: " + desc);
- //int peekEnd = peekNextIndex(line,begin);
- //if ( (begin <= peekEnd) && (begin >= 0) && (peekEnd >= 0) )
- // System.out.println(" for: " +
line.substring(begin,peekEnd));
- //else
- // System.out.println(" weird begin: " + begin + " end: "
+ peekEnd);
-
-
- do {
- String db = nextString(line);
- //System.out.println(" next db string: '" + db +
"'");
-
- // make sure the first column is valid before continuing
- if ( db.equals("") || db.equals("-") ) {
- //System.out.println(" got invalid col: "
+ db);
- return;
- }
- dbs.add(db);
-
- String val = nextString(line);
- //System.out.println(" next val string: '" + val +
"'");
- values.add(val);
- } while ( end != tab );
- }
-
- private int nextIndex(String s, int start) {
- colon = s.indexOf(COLON, start);
- if ( colon < 0 ) colon = s.length() - 1;
-
- pipe = s.indexOf(PIPE, start);
- if ( pipe < 0 ) pipe = s.length() - 1;
-
- tab = s.indexOf(TAB, start);
- if ( tab < 0 ) tab = s.length() - 1;
-
- int ind = Math.min(colon, Math.min(pipe,tab));
-
- return ind;
- }
-
- // just for debugging!
- private int peekNextIndex(String s, int start) {
- int x, y, z = 0;
- x = s.indexOf(COLON, start);
- if ( x < 0 ) x = s.length() - 1;
-
- y = s.indexOf(PIPE, start);
- if ( y < 0 ) y = s.length() - 1;
-
- z = s.indexOf(TAB, start);
- if ( z < 0 ) z = s.length() - 1;
-
- int ind = Math.min(x, Math.min(y,z));
- return ind;
- }
-}
-
Copied:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine25.java
(from rev 29353,
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine.java)
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine25.java
(rev 0)
+++
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/MITABLine25.java
2012-06-04 19:03:10 UTC (rev 29440)
@@ -0,0 +1,301 @@
+package org.cytoscape.psi_mi.internal.plugin;
+
+import java.util.List;
+import java.util.ArrayList;
+
+// There are 15 tab separated columns. Columns can be further separated by
'|' and sub-columns
+// can be further separated by ':'
+// These are the columns
+//
+// 0 srcDB:sourceRawId|srcDB:srcAlias
+// 1 tgtDB:targetRawId|tgtDB:tgtAlias
+// 2 srcDB:srcAlias|srcDB:srcAlias
+// 3 tgtDB:tgtAlias|tgtDB:tgtAlias
+// 4 srcDB:srcAlias|srcDB:srcAlias
+// 5 tgtDB:tgtAlias|tgtDB:tgtAlias
+// 6 detectionDB:detectionMethod|detectionDB:detectionMethod
+// 7 authors|authors
+// 8 publicationIDKey:publicationIDValue|publicationIDKey:publicationIDValue
+// 9 srcTaxonDB:srcTaxonName|srcTaxonDB:srcTaxonName
+// 10 tgtTaxonDB:tgtTaxonName|tgtTaxonDB:tgtTaxonName
+// 11 interactionType|interactionType
+// 12 sourceDB|sourceDB
+// 13 interactionID|XXXX
+// 14 edgeScoreType:edgeScoreString|edgeScoreType:edgeScoreString
+// subsequent optional columns are ignored!
+//
+// For a better description see:
http://wiki.reactome.org/index.php/PSI-MITAB_interactions
+//
+
+/**
+ * This class quickly reads a single line of PSI-MI Tab delimited format into
a data
+ * structure for easy processing.
+ *
+ * This is a PSIMITAB 25 format.
+ */
+public class MITABLine25 {
+
+ private static final char COLON = ':';
+ private static final char PIPE = '|';
+ private static final char TAB = ' ';
+ private static final char QUOTE = '"';
+
+ String sourceRawID = "";
+ String targetRawID = "";
+
+ final List<String> srcAliases = new ArrayList<String>(10);
+ final List<String> srcDBs = new ArrayList<String>(10);
+
+ final List<String> tgtAliases = new ArrayList<String>(10);
+ final List<String> tgtDBs = new ArrayList<String>(10);
+
+ final List<String> authors = new ArrayList<String>(5);
+
+ final List<String> detectionMethods = new ArrayList<String>(5);
+ final List<String> detectionDBs = new ArrayList<String>(5);
+
+ final List<String> publicationValues = new ArrayList<String>(5);
+ final List<String> publicationDBs = new ArrayList<String>(5);
+
+ final List<String> srcTaxonDBs = new ArrayList<String>(5);
+ final List<String> srcTaxonIDs = new ArrayList<String>(5);
+
+ final List<String> tgtTaxonDBs = new ArrayList<String>(5);
+ final List<String> tgtTaxonIDs = new ArrayList<String>(5);
+
+ final List<String> sourceIDs = new ArrayList<String>(5);
+ final List<String> sourceDBs = new ArrayList<String>(5);
+
+ final List<String> interactionTypes = new ArrayList<String>(5);
+ final List<String> interactionTypeDBs = new ArrayList<String>(5);
+
+ final List<String> edgeScoreTypes = new ArrayList<String>(5);
+ final List<String> edgeScoreStrings = new ArrayList<String>(5);
+
+ final List<String> interactionIDs = new ArrayList<String>(5);
+ List<String> interactionDBs = new ArrayList<String>(5);
+
+ private int colon = 0;
+ private int tab = 0;
+ private int pipe = 0;
+ private int begin = 0;
+ private int end = 0;
+
+ private void init() {
+ sourceRawID = "";
+ targetRawID = "";
+ colon = 0;
+ tab = 0;
+ pipe = 0;
+ begin = 0;
+ end = 0;
+ srcAliases.clear();
+ tgtAliases.clear();
+ authors.clear();
+ detectionMethods.clear();
+ detectionDBs.clear();
+ publicationDBs.clear();
+ publicationValues.clear();
+ srcTaxonIDs.clear();
+ srcTaxonDBs.clear();
+ tgtTaxonIDs.clear();
+ tgtTaxonDBs.clear();
+ sourceIDs.clear();
+ sourceDBs.clear();
+ interactionTypes.clear();
+ interactionTypeDBs.clear();
+ edgeScoreTypes.clear();
+ edgeScoreStrings.clear();
+ interactionIDs.clear();
+ interactionDBs.clear();
+ }
+
+
+ public void readLine(final String line) {
+ init();
+
+ // column 0
+ // get first source DB
+ srcDBs.add(nextString(line));
+
+ // get sourceRawID
+ sourceRawID = nextString(line);
+ srcAliases.add(sourceRawID);
+
+ // get any additional source aliases from col 0
+ addNextPairs("additional src aliases", srcDBs, srcAliases, line
);
+
+ // column 1
+ // get first target db
+ tgtDBs.add(nextString(line));
+
+ // get targetRawID
+ targetRawID = nextString(line);
+ tgtAliases.add(targetRawID);
+
+ // get any additional target aliases from col 1
+ addNextPairs("additional tgt aliases", tgtDBs, tgtAliases,
line);
+
+ // column 2
+ // get any additional source aliases
+ addNextPairs("col 2 src", srcDBs, srcAliases, line );
+
+ // column 3
+ // get any additional target aliases
+ addNextPairs("col 3 tgt", tgtDBs, tgtAliases, line);
+
+ // column 4
+ // get any additional source aliases
+ addNextPairs("col 4 src", srcDBs, srcAliases, line );
+
+ // column 5
+ // get any additional target aliases
+ addNextPairs("col 5 tgt", tgtDBs, tgtAliases, line);
+
+ // column 6
+ // get any detection methods
+ addNextPairs("detection", detectionDBs, detectionMethods, line);
+
+ // column 7
+ // get any authors
+ addNextValues("authors",authors,line);
+
+ // column 8
+ // get any additional publications
+ addNextPairs("publications", publicationDBs, publicationValues,
line);
+
+ // column 9
+ // get source taxon
+ addNextPairs("src taxon", srcTaxonDBs, srcTaxonIDs, line);
+
+ // column 10
+ // get target taxon
+ addNextPairs("tgt taxon", tgtTaxonDBs, tgtTaxonIDs, line);
+
+ // column 11
+ // get any interaction types
+ addNextPairs("interaction", interactionTypeDBs,
interactionTypes, line);
+
+ // column 12
+ // get any source databases
+ addNextPairs("source", sourceDBs,sourceIDs,line);
+
+ // column 13
+ // get interaction ID
+ addNextPairs("interaction IDs", interactionDBs, interactionIDs,
line );
+
+ // column 14
+ // get edge scores
+ addNextPairs("edge scores", edgeScoreTypes, edgeScoreStrings,
line);
+ }
+
+ // just for debugging
+ public void print() {
+ System.out.println("sourceRawID: " + sourceRawID);
+ System.out.println("targetRawID: " + targetRawID);
+ printList("srcAliases", srcAliases);
+ printList("tgtAliases", tgtAliases);
+ printList("detectionDBs", detectionDBs);
+ printList("detectionMethods", detectionMethods);
+ printList("authors", authors);
+ printList("publicationDBs", publicationDBs);
+ printList("publicationValues", publicationValues);
+ printList("sourceDBs", sourceDBs);
+ printList("sourceIDs", sourceIDs);
+ printList("interactionTypes", interactionTypes);
+ printList("interactionTypeDBs", interactionTypeDBs);
+ printList("interactionIDs", interactionIDs);
+ printList("interactionDBs", interactionDBs);
+ printList("edgeScoreTypes", edgeScoreTypes);
+ printList("edgeScoreStrings", edgeScoreStrings);
+ System.out.println();
+ System.out.println();
+ }
+
+ // just for debugging
+ public void printList(String name, List<String> vals) {
+ System.out.print(name + ": ");
+ for ( String s : vals )
+ System.out.print("'" + s + "', ");
+ System.out.println();
+ }
+
+ private String nextString(final String line) {
+ end = nextIndex(line,begin);
+ if ( (begin > end) || (begin > line.length() - 1))
+ return "";
+
+ String ret = line.substring(begin,end);
+
+ // This is an attempt to handle quoted strings, which may
+ // include our tokenizing characters! Basically, if
+ // we see a quote, make sure we get a close quote too!
+ int openQuote = ret.indexOf(QUOTE);
+ if ( openQuote >= 0 ) {
+ int closeQuote = ret.indexOf(QUOTE,openQuote+1);
+ if ( closeQuote < 0 ) {
+ end = nextIndex(line,end+1);
+ ret = line.substring(begin,end);
+ }
+ }
+
+ begin = end+1;
+ return ret;
+ }
+
+ private void addNextValues(String desc,List<String> values, String
line) {
+ do {
+ authors.add(nextString(line));
+ } while ( end != tab );
+ }
+
+ private void addNextPairs(String desc, List<String> dbs, List<String>
values, String line) {
+ do {
+ String db = nextString(line);
+ //System.out.println(" next db string: '" + db +
"'");
+
+ // make sure the first column is valid before continuing
+ if ( db.equals("") || db.equals("-") ) {
+ //System.out.println(" got invalid col: "
+ db);
+ return;
+ }
+ dbs.add(db);
+
+ String val = nextString(line);
+ //System.out.println(" next val string: '" + val +
"'");
+ values.add(val);
+ } while ( end != tab );
+ }
+
+ private int nextIndex(String s, int start) {
+ colon = s.indexOf(COLON, start);
+ if ( colon < 0 ) colon = s.length() - 1;
+
+ pipe = s.indexOf(PIPE, start);
+ if ( pipe < 0 ) pipe = s.length() - 1;
+
+ tab = s.indexOf(TAB, start);
+ if ( tab < 0 ) tab = s.length() - 1;
+
+ int ind = Math.min(colon, Math.min(pipe,tab));
+
+ return ind;
+ }
+
+ // just for debugging!
+ private int peekNextIndex(String s, int start) {
+ int x, y, z = 0;
+ x = s.indexOf(COLON, start);
+ if ( x < 0 ) x = s.length() - 1;
+
+ y = s.indexOf(PIPE, start);
+ if ( y < 0 ) y = s.length() - 1;
+
+ z = s.indexOf(TAB, start);
+ if ( z < 0 ) z = s.length() - 1;
+
+ int ind = Math.min(x, Math.min(y,z));
+ return ind;
+ }
+}
+
Modified:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabParser.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabParser.java
2012-06-04 19:00:30 UTC (rev 29439)
+++
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabParser.java
2012-06-04 19:03:10 UTC (rev 29440)
@@ -9,8 +9,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.cytoscape.model.CyEdge;
import org.cytoscape.model.CyNetwork;
@@ -18,60 +16,47 @@
import org.cytoscape.model.CyNode;
import org.cytoscape.model.CyRow;
import org.cytoscape.model.CyTable;
-import org.cytoscape.model.CyIdentifiable;
import org.cytoscape.work.TaskMonitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PsiMiTabParser {
-
+
private static final Logger logger =
LoggerFactory.getLogger(PsiMiTabParser.class);
-
+
private static final int BUFFER_SIZE = 100000;
- // Separator for multiple entries.
- private static final String SEPARATOR = "\\|";
- private static final String SUBSEPARATOR = ":";
- private static final String ATTR_PREFIX = "PSI-MI-25.";
+ private static final String ATTR_PREFIX = "PSIMI-25.";
- private static final int COLUMN_COUNT = 15;
-
- // Reg.Ex for parsing entry
- private final static Pattern miPttr = Pattern.compile("MI:\\d{4}");
- private final static Pattern miNamePttr = Pattern.compile("\\(.+\\)");
-
- private static final String TAB = "\t";
-
// Node Attr Names
private static final String INTERACTOR_TYPE = ATTR_PREFIX + "interactor
type";
- private static final String ALIASES = ATTR_PREFIX + "aliases";
- private static final String TAXONIDS = ATTR_PREFIX + "taxon ID";
- private static final String TAXONDBS = ATTR_PREFIX + "taxon DB";
+ private static final String ALIASES = ATTR_PREFIX + "aliases";
+ private static final String TAXONIDS = ATTR_PREFIX + "taxon ID";
+ private static final String TAXONDBS = ATTR_PREFIX + "taxon DB";
// Edge Attr Names
- private static final String INTERACTION = CyEdge.INTERACTION; // should
already exist
+ private static final String INTERACTION = CyEdge.INTERACTION; // should
+
// already
+
// exist
private static final String DETECTION_METHOD_ID = ATTR_PREFIX +
"detection method ID";
private static final String DETECTION_METHOD = ATTR_PREFIX + "detection
method";
private static final String INTERACTION_TYPE = ATTR_PREFIX +
"interaction type";
private static final String INTERACTION_TYPE_ID = ATTR_PREFIX +
"interaction type ID";
private static final String SOURCE_DB = ATTR_PREFIX + "source DB";
private static final String EDGE_SCORE = ATTR_PREFIX + "edge score";
- private static final String AUTHORS = ATTR_PREFIX + "authors";
- private static final String PUBLICATION_ID = ATTR_PREFIX + "publication
ID";
- private static final String PUBLICATION_DB = ATTR_PREFIX + "publication
DB";
+ private static final String AUTHORS = ATTR_PREFIX + "authors";
+ private static final String PUBLICATION_ID = ATTR_PREFIX + "publication
ID";
+ private static final String PUBLICATION_DB = ATTR_PREFIX + "publication
DB";
// Stable IDs which maybe used for mapping later
private static final String CHEBI = "chebi";
private static final String COMPOUND = "compound";
-
- private Matcher matcher;
-
private Map<String, CyNode> nodeMap;
private final InputStream inputStream;
private final CyNetworkFactory cyNetworkFactory;
-
+
private boolean cancelFlag = false;
public PsiMiTabParser(final InputStream inputStream, final
CyNetworkFactory cyNetworkFactory) {
@@ -84,121 +69,175 @@
long start = System.currentTimeMillis();
taskMonitor.setProgress(-1.0);
-
+
this.nodeMap = new HashMap<String, CyNode>();
- String[] entry;
- String[] sourceID;
- String[] targetID;
-
- String[] detectionMethods;
-
- String[] sourceDB;
- String[] interactionID;
- String[] interactionType;
-
- String[] edgeScore;
-
final CyNetwork network = cyNetworkFactory.createNetwork();
initColumns(network);
String line;
final BufferedReader br = new BufferedReader(new
InputStreamReader(inputStream), BUFFER_SIZE);
+ final MITABLine25 mline = new MITABLine25();
- MITABLine mline = new MITABLine();
-
long interactionCount = 0;
while ((line = br.readLine()) != null) {
-
- if(cancelFlag) {
+ if (cancelFlag) {
cleanup(br);
return network;
}
-
+
// Ignore comment line
if (line.startsWith("#"))
continue;
try {
-
- mline.readLine(line);
+ processFull(network, mline, line);
+ } catch (Exception ex) {
+ logger.warn("Could not parse this line: " +
line, ex);
+ continue;
+ }
+ if (++interactionCount % 100 == 0)
+ taskMonitor.setStatusMessage("parsed " +
interactionCount + " interactions");
+ }
- final String sourceRawID = mline.sourceRawID;
- final String targetRawID = mline.targetRawID;
+ br.close();
+ nodeMap.clear();
+ nodeMap = null;
- // create nodes
- CyNode source = nodeMap.get(sourceRawID);
- if (source == null) {
- source = network.addNode();
- nodeMap.put(sourceRawID, source);
- }
- CyNode target = nodeMap.get(targetRawID);
- if (target == null) {
- target = network.addNode();
- nodeMap.put(targetRawID, target);
- }
+ logger.info("MITAB Parse finished in " +
(System.currentTimeMillis() - start) + " msec.");
- CyRow sourceRow = network.getRow(source);
- CyRow targetRow = network.getRow(target);
+ return network;
+ }
- // set various node attrs
- sourceRow.set(CyNetwork.NAME, sourceRawID);
- targetRow.set(CyNetwork.NAME, targetRawID);
+ private void processMinimum(final CyNetwork network, final MITABLine25
mline, final String line) {
+ mline.readLine(line);
- setInteractorType(sourceRow,mline.srcAliases);
- setInteractorType(targetRow,mline.tgtAliases);
+ final String sourceRawID = mline.sourceRawID;
+ final String targetRawID = mline.targetRawID;
- setAliases(sourceRow, mline.srcAliases,
mline.srcDBs);
- setAliases(targetRow, mline.tgtAliases,
mline.tgtDBs);
+ // create nodes
+ CyNode source = nodeMap.get(sourceRawID);
+ if (source == null) {
+ source = network.addNode();
+ nodeMap.put(sourceRawID, source);
+ }
+ CyNode target = nodeMap.get(targetRawID);
+ if (target == null) {
+ target = network.addNode();
+ nodeMap.put(targetRawID, target);
+ }
- setTaxID(sourceRow, mline.srcTaxonIDs,
mline.srcTaxonDBs);
- setTaxID(targetRow, mline.tgtTaxonIDs,
mline.tgtTaxonDBs);
+ final CyRow sourceRow = network.getRow(source);
+ final CyRow targetRow = network.getRow(target);
- // create edge
- final CyEdge e = network.addEdge(source,
target, true);
- CyRow edgeRow = network.getRow(e);
-
- // set various edge attrs
- String interactionId = "unknown";
- if ( mline.interactionIDs.size() > 0 )
- interactionId =
mline.interactionIDs.get(0);
+ // set various node attrs
+ sourceRow.set(CyNetwork.NAME, sourceRawID);
+ targetRow.set(CyNetwork.NAME, targetRawID);
- edgeRow.set(INTERACTION, interactionId);
- edgeRow.set(CyNetwork.NAME, sourceRawID + " ("
+ interactionId + ") " + targetRawID);
+ setInteractorType(sourceRow, mline.srcAliases);
+ setInteractorType(targetRow, mline.tgtAliases);
- setTypedEdgeListAttribute(edgeRow,
mline.interactionTypes, INTERACTION_TYPE_ID, INTERACTION_TYPE);
- setTypedEdgeListAttribute(edgeRow,
mline.detectionMethods, DETECTION_METHOD_ID, DETECTION_METHOD);
- setEdgeListAttribute(edgeRow, mline.sourceDBs,
SOURCE_DB);
- setEdgeListAttribute(edgeRow,
mline.edgeScoreStrings, EDGE_SCORE);
+ setAliases(sourceRow, mline.srcAliases, mline.srcDBs);
+ setAliases(targetRow, mline.tgtAliases, mline.tgtDBs);
- setPublication(edgeRow,
mline.publicationValues, mline.publicationDBs);
- setAuthors(edgeRow, mline.authors);
-
- } catch (Exception ex) {
- logger.warn("Could not parse this line: " +
line, ex);
- continue;
- }
- if ( ++interactionCount % 100 == 0 )
- taskMonitor.setStatusMessage("parsed " +
interactionCount + " interactions");
+ setTaxID(sourceRow, mline.srcTaxonIDs, mline.srcTaxonDBs);
+ setTaxID(targetRow, mline.tgtTaxonIDs, mline.tgtTaxonDBs);
+
+ // create edge
+ final CyEdge e = network.addEdge(source, target, true);
+ CyRow edgeRow = network.getRow(e);
+
+ // set various edge attrs
+ String interactionId = "unknown";
+ if (mline.interactionIDs.size() > 0)
+ interactionId = mline.interactionIDs.get(0);
+
+ edgeRow.set(INTERACTION, interactionId);
+ edgeRow.set(CyNetwork.NAME, sourceRawID + " (" + interactionId
+ ") " + targetRawID);
+
+ setTypedEdgeListAttribute(edgeRow, mline.interactionTypes,
INTERACTION_TYPE_ID, INTERACTION_TYPE);
+ setTypedEdgeListAttribute(edgeRow, mline.detectionMethods,
DETECTION_METHOD_ID, DETECTION_METHOD);
+ setEdgeListAttribute(edgeRow, mline.sourceDBs, SOURCE_DB);
+ setEdgeListAttribute(edgeRow, mline.edgeScoreStrings,
EDGE_SCORE);
+
+ setPublication(edgeRow, mline.publicationValues,
mline.publicationDBs);
+ setAuthors(edgeRow, mline.authors);
+ }
+
+ private void processFull(final CyNetwork network, final MITABLine25
mline, final String line) {
+ mline.readLine(line);
+
+ final String sourceRawID = mline.sourceRawID;
+ final String targetRawID = mline.targetRawID;
+
+ // create nodes
+ CyNode source = nodeMap.get(sourceRawID);
+ if (source == null) {
+ source = network.addNode();
+ nodeMap.put(sourceRawID, source);
}
+ CyNode target = nodeMap.get(targetRawID);
+ if (target == null) {
+ target = network.addNode();
+ nodeMap.put(targetRawID, target);
+ }
+
+ final CyRow sourceRow = network.getRow(source);
+ final CyRow targetRow = network.getRow(target);
- br.close();
- nodeMap.clear();
- nodeMap = null;
+ // set various node attrs
+ sourceRow.set(CyNetwork.NAME, sourceRawID);
+ targetRow.set(CyNetwork.NAME, targetRawID);
+
+ final List<String> sDB = mline.sourceDBs;
+ for(int i=0; i<sDB.size(); i++) {
+ String dbName = sDB.get(i);
+ if(sourceRow.getTable().getColumn(dbName) == null)
+ sourceRow.getTable().createColumn(dbName,
String.class, true);
+
+ sourceRow.set(dbName, mline.sourceIDs.get(i));
+ }
+
+
- logger.info("MITAB Parse finished in " +
(System.currentTimeMillis() - start) + " msec.");
-
- return network;
+ setInteractorType(sourceRow, mline.srcAliases);
+ setInteractorType(targetRow, mline.tgtAliases);
+
+ setAliases(sourceRow, mline.srcAliases, mline.srcDBs);
+ setAliases(targetRow, mline.tgtAliases, mline.tgtDBs);
+
+ setTaxID(sourceRow, mline.srcTaxonIDs, mline.srcTaxonDBs);
+ setTaxID(targetRow, mline.tgtTaxonIDs, mline.tgtTaxonDBs);
+
+ // create edge
+ final CyEdge e = network.addEdge(source, target, true);
+ CyRow edgeRow = network.getRow(e);
+
+ // set various edge attrs
+ String interactionId = "unknown";
+ if (mline.interactionIDs.size() > 0)
+ interactionId = mline.interactionIDs.get(0);
+
+ edgeRow.set(INTERACTION, interactionId);
+ edgeRow.set(CyNetwork.NAME, sourceRawID + " (" + interactionId
+ ") " + targetRawID);
+
+ setTypedEdgeListAttribute(edgeRow, mline.interactionTypes,
INTERACTION_TYPE_ID, INTERACTION_TYPE);
+ setTypedEdgeListAttribute(edgeRow, mline.detectionMethods,
DETECTION_METHOD_ID, DETECTION_METHOD);
+ setEdgeListAttribute(edgeRow, mline.sourceDBs, SOURCE_DB);
+ setEdgeListAttribute(edgeRow, mline.edgeScoreStrings,
EDGE_SCORE);
+
+ setPublication(edgeRow, mline.publicationValues,
mline.publicationDBs);
+ setAuthors(edgeRow, mline.authors);
}
private void setTaxID(CyRow row, List<String> taxonIDs, List<String>
taxonDBs) {
- row.set(TAXONIDS,taxonIDs);
- row.set(TAXONDBS,taxonDBs);
+ row.set(TAXONIDS, taxonIDs);
+ row.set(TAXONDBS, taxonDBs);
}
private void setPublication(CyRow row, List<String> pubID, List<String>
pubDB) {
- for ( int i = 0; i < pubID.size(); i++ ) {
+ for (int i = 0; i < pubID.size(); i++) {
listAttrMapper(row, PUBLICATION_ID, pubID.get(i));
listAttrMapper(row, PUBLICATION_DB, pubDB.get(i));
}
@@ -211,10 +250,10 @@
}
private void setAliases(CyRow row, List<String> aliases, List<String>
aliasDBs) {
- for ( String s : aliases ) {
+ for (String s : aliases) {
int ind = s.indexOf('(');
- if ( ind > 0 )
- s = s.substring(0,ind);
+ if (ind > 0)
+ s = s.substring(0, ind);
listAttrMapper(row, ALIASES, s);
}
}
@@ -224,6 +263,7 @@
listAttrMapper(row, key, val);
}
}
+
private void setTypedEdgeListAttribute(CyRow row, List<String> entry,
String idKey, String descKey) {
for (String val : entry) {
String id = "";
@@ -231,24 +271,24 @@
// Extract description between parens.
int openParen = val.indexOf('(');
- if ( openParen >= 0 ) {
+ if (openParen >= 0) {
int closeParen = val.indexOf(')');
- if ( closeParen > openParen)
- desc =
val.substring(openParen+1,closeParen);
+ if (closeParen > openParen)
+ desc = val.substring(openParen + 1,
closeParen);
}
// Extract ID between quotes.
int firstQuote = val.indexOf('"');
- if ( firstQuote >= 0 ) {
- int secondQuote = val.indexOf('"',firstQuote+1);
- if ( secondQuote > firstQuote ) {
- id =
val.substring(firstQuote+1,secondQuote);
+ if (firstQuote >= 0) {
+ int secondQuote = val.indexOf('"', firstQuote +
1);
+ if (secondQuote > firstQuote) {
+ id = val.substring(firstQuote + 1,
secondQuote);
}
- }
+ }
// If we can't parse properly, just shove the whole
// thing in description.
- if ( desc.equals("") || id.equals("") ) {
+ if (desc.equals("") || id.equals("")) {
listAttrMapper(row, descKey, val);
} else {
listAttrMapper(row, idKey, id);
@@ -273,8 +313,8 @@
public void cancel() {
cancelFlag = true;
}
-
- private void cleanup(Reader br) throws IOException {
+
+ private void cleanup(final Reader br) throws IOException {
br.close();
nodeMap.clear();
nodeMap = null;
@@ -282,32 +322,32 @@
private void setInteractorType(CyRow row, List<String> aliases) {
// Set type if not protein
- if (aliases.contains(CHEBI))
+ if (aliases.contains(CHEBI))
row.set(INTERACTOR_TYPE, COMPOUND);
}
private void initColumns(CyNetwork network) {
final CyTable nodeTable = network.getDefaultNodeTable();
- createListColumn(nodeTable,INTERACTOR_TYPE,String.class);
- createListColumn(nodeTable,ALIASES,String.class);
- createListColumn(nodeTable,TAXONIDS,String.class);
- createListColumn(nodeTable,TAXONDBS,String.class);
+ createListColumn(nodeTable, INTERACTOR_TYPE, String.class);
+ createListColumn(nodeTable, ALIASES, String.class);
+ createListColumn(nodeTable, TAXONIDS, String.class);
+ createListColumn(nodeTable, TAXONDBS, String.class);
final CyTable edgeTable = network.getDefaultEdgeTable();
- createListColumn(edgeTable,INTERACTION_TYPE,String.class);
- createListColumn(edgeTable,INTERACTION_TYPE_ID,String.class);
- createListColumn(edgeTable,DETECTION_METHOD,String.class);
- createListColumn(edgeTable,DETECTION_METHOD_ID,String.class);
- createListColumn(edgeTable,SOURCE_DB,String.class);
- createListColumn(edgeTable,EDGE_SCORE,String.class);
- createListColumn(edgeTable,AUTHORS,String.class);
- createListColumn(edgeTable,PUBLICATION_ID,String.class);
- createListColumn(edgeTable,PUBLICATION_DB,String.class);
+ createListColumn(edgeTable, INTERACTION_TYPE, String.class);
+ createListColumn(edgeTable, INTERACTION_TYPE_ID, String.class);
+ createListColumn(edgeTable, DETECTION_METHOD, String.class);
+ createListColumn(edgeTable, DETECTION_METHOD_ID, String.class);
+ createListColumn(edgeTable, SOURCE_DB, String.class);
+ createListColumn(edgeTable, EDGE_SCORE, String.class);
+ createListColumn(edgeTable, AUTHORS, String.class);
+ createListColumn(edgeTable, PUBLICATION_ID, String.class);
+ createListColumn(edgeTable, PUBLICATION_DB, String.class);
}
private void createListColumn(CyTable table, String colName, Class<?>
type) {
- if ( table.getColumn(colName) == null )
- table.createListColumn(colName,String.class,false);
+ if (table.getColumn(colName) == null)
+ table.createListColumn(colName, String.class, false);
}
}
Modified:
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabReader.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabReader.java
2012-06-04 19:00:30 UTC (rev 29439)
+++
core3/impl/trunk/psi-mi-impl/impl/src/main/java/org/cytoscape/psi_mi/internal/plugin/PsiMiTabReader.java
2012-06-04 19:03:10 UTC (rev 29440)
@@ -16,9 +16,13 @@
import org.cytoscape.work.Task;
import org.cytoscape.work.TaskIterator;
import org.cytoscape.work.TaskMonitor;
+import org.cytoscape.work.Tunable;
public class PsiMiTabReader extends AbstractTask implements CyNetworkReader {
-
+
+// @Tunable(description="Import all columns in the data file")
+// public Boolean importFull;
+
private InputStream inputStream;
private final CyNetworkViewFactory cyNetworkViewFactory;
@@ -26,13 +30,12 @@
private final PsiMiTabParser parser;
private CyNetwork network;
-
+
private TaskMonitor parentTaskMonitor;
private final CyProperty<Properties> prop;
-
- public PsiMiTabReader(InputStream is,
- CyNetworkViewFactory cyNetworkViewFactory,
- CyNetworkFactory cyNetworkFactory, final
CyLayoutAlgorithmManager layouts, final CyProperty<Properties> prop) {
+
+ public PsiMiTabReader(InputStream is, CyNetworkViewFactory
cyNetworkViewFactory, CyNetworkFactory cyNetworkFactory,
+ final CyLayoutAlgorithmManager layouts, final
CyProperty<Properties> prop) {
if (is == null)
throw new NullPointerException("Input stream is null");
this.inputStream = is;
@@ -57,33 +60,33 @@
}
private void createNetwork(TaskMonitor taskMonitor) throws IOException {
-
taskMonitor.setTitle("Loading PSIMI-TAB File");
taskMonitor.setStatusMessage("Loading PSI-MI-TAB25 file.");
taskMonitor.setProgress(0.01d);
-
+
network = parser.parse(taskMonitor);
-
+
taskMonitor.setProgress(1.0d);
}
@Override
public CyNetwork[] getNetworks() {
- return new CyNetwork[] {network};
+ return new CyNetwork[] { network };
}
@Override
public CyNetworkView buildCyNetworkView(CyNetwork network) {
final CyNetworkView view =
cyNetworkViewFactory.createNetworkView(network);
-
+
String pref = CyLayoutAlgorithmManager.DEFAULT_LAYOUT_NAME;
- if(prop != null)
+ if (prop != null)
pref =
prop.getProperties().getProperty("preferredLayoutAlgorithm", pref);
final CyLayoutAlgorithm layout = layouts.getLayout(pref);
// Force to run this task here to avoid concurrency problem.
- TaskIterator itr = layout.createTaskIterator(view,
layout.getDefaultLayoutContext(), CyLayoutAlgorithm.ALL_NODE_VIEWS,"");
+ TaskIterator itr = layout.createTaskIterator(view,
layout.getDefaultLayoutContext(),
+ CyLayoutAlgorithm.ALL_NODE_VIEWS, "");
Task nextTask = itr.next();
try {
nextTask.run(parentTaskMonitor);
@@ -93,7 +96,7 @@
return view;
}
-
+
@Override
public void cancel() {
parser.cancel();
Modified:
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
===================================================================
---
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
2012-06-04 19:00:30 UTC (rev 29439)
+++
core3/impl/trunk/psi-mi-impl/impl/src/test/java/org/cytoscape/psi_mi/internal/plugin/MITABLineTest.java
2012-06-04 19:03:10 UTC (rev 29440)
@@ -31,7 +31,7 @@
// #ID Interactor A ID Interactor B Alt IDs Interactor A Alt IDs
Interactor B Aliases Interactor A Aliases Interactor B Interaction
Detection Method Publication 1st Author Publication Identifiers Taxid
Interactor A Taxid Interactor B Interaction Types Source Database
Interaction Identifiers Confidence Values
// entrez gene/locuslink:280906|GRID:158296 entrez
gene/locuslink:281119|GRID:158481 entrez gene/locuslink:RB1 entrez
gene/locuslink:DNMT1|entrez gene/locuslink:BOS_7601 - entrez
gene/locuslink:DNMT(gene name synonym) psi-mi:"MI:0004"(affinity
chromatography technology) "Robertson KD (2000)" pubmed:10888886
taxid:9913 taxid:9913 psi-mi:"MI:0915"(physical association)
psi-mi:"MI:0463"(GRID) GRID:261841 -
public void testMITABLine() throws Exception {
- MITABLine mline = new MITABLine();
+ MITABLine25 mline = new MITABLine25();
String line;
@@ -70,7 +70,7 @@
// #ID Interactor A ID Interactor B Alt IDs Interactor A Alt IDs
Interactor B Aliases Interactor A Aliases Interactor B Interaction
Detection Method Publication 1st Author Publication Identifiers Taxid
Interactor A Taxid Interactor B Interaction Types Source Database
Interaction Identifiers Confidence Values
//entrez gene/locuslink:326601|GRID:160074 entrez
gene/locuslink:819210|GRID:4545 entrez gene/locuslink:H3F3A|entrez
gene/locuslink:BOS_15646 entrez gene/locuslink:BRM|entrez
gene/locuslink:At2g46020 entrez gene/locuslink:H3F3B(gene name synonym)
entrez gene/locuslink:ARABIDOPSIS THALIANA BRAHMA(gene name synonym)|entrez
gene/locuslink:T3F17.33(gene name synonym)|entrez gene/locuslink:CHA2(gene name
synonym)|entrez gene/locuslink:CHROMATIN REMODELING 2(gene name synonym)|entrez
gene/locuslink:ATBRM(gene name synonym)|entrez gene/locuslink:BRAHMA(gene name
synonym)|entrez gene/locuslink:CHR2(gene name synonym) psi-mi:"MI:0047"(far
western blotting) "Farrona S (2007)" pubmed:17825834 taxid:9913 taxid:3702
psi-mi:"MI:0407"(direct interaction) psi-mi:"MI:0463"(GRID) GRID:271838 -
public void testMITABLine3() throws Exception {
- MITABLine mline = new MITABLine();
+ MITABLine25 mline = new MITABLine25();
String line;
--
You received this message because you are subscribed to the Google Groups
"cytoscape-cvs" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/cytoscape-cvs?hl=en.