Revision: 18970
          http://sourceforge.net/p/gate/code/18970
Author:   adamfunk
Date:     2015-10-26 14:46:14 +0000 (Mon, 26 Oct 2015)
Log Message:
-----------
You can now specify a document feature to use as the identifier in
termbank listings (fallback to sourceURL & getName() if
missing/blank).  Added corpus index in [] after that to make
uniqueness even more likely.

Cleared the remaining eclipse warnings.

Modified Paths:
--------------
    gate/trunk/plugins/TermRaider/.classpath
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
    
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java
    gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java

Modified: gate/trunk/plugins/TermRaider/.classpath
===================================================================
--- gate/trunk/plugins/TermRaider/.classpath    2015-10-26 10:31:39 UTC (rev 
18969)
+++ gate/trunk/plugins/TermRaider/.classpath    2015-10-26 14:46:14 UTC (rev 
18970)
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <classpath>
        <classpathentry kind="src" path="src"/>
-       <classpathentry kind="con" 
path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
        <classpathentry combineaccessrules="false" exported="true" kind="src" 
path="/GATE"/>
+       <classpathentry kind="con" 
path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
        <classpathentry kind="output" path="classes"/>
 </classpath>

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java    
    2015-10-26 10:31:39 UTC (rev 18969)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java    
    2015-10-26 14:46:14 UTC (rev 18970)
@@ -145,7 +145,7 @@
       boolean wasLoaded = corpus.isDocumentLoaded(i);
       Document document = (Document) corpus.get(i);
       
-      addData(document);
+      addData(document, i);
 
       // datastore safety
       if (! wasLoaded) {
@@ -166,7 +166,7 @@
 
   /* BEHOLD THE GUBBINS to distinguish the various (potential) types of 
Pairbanks*/
 
-  protected abstract void addData(Document document);
+  protected abstract void addData(Document document, int index);
   
   protected abstract void calculateScores(); 
   

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java    
    2015-10-26 10:31:39 UTC (rev 18969)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java    
    2015-10-26 14:46:14 UTC (rev 18970)
@@ -46,6 +46,7 @@
   
   // additional CREOLE init parameters
   protected Set<String> inputAnnotationTypes;
+  protected String idDocumentFeature;
 
   // transient to allow serialization
   protected transient List<Action> actionsList;
@@ -225,7 +226,7 @@
       boolean wasLoaded = corpus.isDocumentLoaded(i);
       Document document = (Document) corpus.get(i);
       
-      processDocument(document);
+      processDocument(document, i);
 
       // datastore safety
       if (! wasLoaded) {
@@ -241,7 +242,7 @@
 
   protected abstract void resetScores();
 
-  protected abstract void processDocument(Document document);
+  protected abstract void processDocument(Document document, int index);
   
   /**
    * This also needs to fill types and languages
@@ -332,4 +333,17 @@
     return this.inputAnnotationTypes;
   }
   
+
+  @CreoleParameter(comment = "doc feature to use for identification (blank = 
use sourceURL)",
+          defaultValue = "")
+  public void setIdDocumentFeature(String name) {
+    this.idDocumentFeature = name;
+  }
+  
+  public String getIdDocumentFeature() {
+    return this.idDocumentFeature;
+  }
+
+  
+  
 }

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java  
    2015-10-26 10:31:39 UTC (rev 18969)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java  
    2015-10-26 14:46:14 UTC (rev 18970)
@@ -50,9 +50,9 @@
   private ScoreType rawScoreST, termFrequencyST, localDocFrequencyST;
   
   
-  protected void processDocument(Document document) {
+  protected void processDocument(Document document, int index) {
     documentCount++;
-    String documentSource = Utilities.sourceOrName(document);
+    String documentSource = Utilities.docIdentifier(document, 
idDocumentFeature, index);
     AnnotationSet candidates = 
document.getAnnotations(inputASName).get(inputAnnotationTypes);
 
     for (Annotation candidate : candidates) {

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
   2015-10-26 10:31:39 UTC (rev 18969)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
   2015-10-26 14:46:14 UTC (rev 18970)
@@ -134,18 +134,18 @@
   }
   
   
-  protected void processDocument(Document document) {
+  protected void processDocument(Document document, int index) {
     if (this.segmentAnnotationType.isEmpty() || (this.segmentAnnotationType == 
null)) {
-      processWholeDocument(document);
+      processWholeDocument(document, index);
     }
     else {
-      processDocumentSegments(document);
+      processDocumentSegments(document, index);
     }
   }
 
   
-  protected void processDocumentSegments(Document document) {
-    String documentSource = Utilities.sourceOrName(document);
+  protected void processDocumentSegments(Document document, int index) {
+    String documentSource = Utilities.docIdentifier(document, 
idDocumentFeature, index);
     AnnotationSet segments = 
document.getAnnotations(inputASName).get(segmentAnnotationType);
     AnnotationSet candidates = 
document.getAnnotations(inputASName).get(inputAnnotationTypes);
 
@@ -166,9 +166,9 @@
   }
 
   
-  protected void processWholeDocument(Document document) {
+  protected void processWholeDocument(Document document, int index) {
     documentCount++;
-    String documentSource = Utilities.sourceOrName(document);
+    String documentSource = Utilities.docIdentifier(document, 
idDocumentFeature, index);
     AnnotationSet candidates = 
document.getAnnotations(inputASName).get(inputAnnotationTypes);
 
     Set<Term> documentTerms = new HashSet<Term>();

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
--- 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java    
    2015-10-26 10:31:39 UTC (rev 18969)
+++ 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java    
    2015-10-26 14:46:14 UTC (rev 18970)
@@ -73,9 +73,9 @@
   }
 
   
-  protected void processDocument(Document document) {
+  protected void processDocument(Document document, int index) {
     documentCount++;
-    String documentSource = Utilities.sourceOrName(document);
+    String documentSource = Utilities.docIdentifier(document, 
idDocumentFeature, index);
     AnnotationSet candidates = 
document.getAnnotations(inputASName).get(inputAnnotationTypes);
     
     for (Annotation candidate : candidates) {

Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java 
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/PMIBank.java 
2015-10-26 14:46:14 UTC (rev 18970)
@@ -55,8 +55,9 @@
 
   
   
-  protected void addData(Document document) {
-    String documentSource = Utilities.sourceOrName(document);
+  protected void addData(Document document, int index) {
+    // TODO: add support for the doc ID feature
+    String documentSource = Utilities.docIdentifier(document, null, index);
     /** Collocations that have already been processed in this document
      * (each collocation is a pair of IDs for a Token annotation), to avoid 
counting
      * them again.     */

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java   
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java   
2015-10-26 14:46:14 UTC (rev 18970)
@@ -55,9 +55,9 @@
   
   
   
-  protected void processDocument(Document document) {
+  protected void processDocument(Document document, int index) {
     documentCount++;
-    String documentSource = Utilities.sourceOrName(document);
+    String documentSource = Utilities.docIdentifier(document, 
idDocumentFeature, index);
     AnnotationSet candidates = 
document.getAnnotations(inputASName).get(inputAnnotationTypes);
 
     for (Annotation candidate : candidates) {

Modified: 
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java   
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/TermbankViewer.java   
2015-10-26 14:46:14 UTC (rev 18970)
@@ -83,7 +83,7 @@
   private TermbankTableModel termbankTableModel;
   
   private XHTMLPanel termCloud = new XHTMLPanel();
-  private JComboBox cloudType;
+  private JComboBox<ScoreType> cloudType;
   private JSlider cloudSize = new JSlider();
   private List<ScoreType> scoreTypes;
   
@@ -177,7 +177,7 @@
     cloudBar.setFloatable(false);
     JButton btnExport = new JButton(MainFrame.getIcon("Download"));
     
-    cloudType = new JComboBox();
+    cloudType = new JComboBox<ScoreType>();
     
     Hashtable<Integer, JLabel> labelTable = new Hashtable<Integer,JLabel>();
     labelTable.put(0, new JLabel(MainFrame.getIcon("Sunny")));

Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java       
2015-10-26 10:31:39 UTC (rev 18969)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/util/Utilities.java       
2015-10-26 14:46:14 UTC (rev 18970)
@@ -131,14 +131,28 @@
   }
 
   
-  public static String sourceOrName(Document document) {
-    URL url = document.getSourceUrl();
-    if (url == null) {
-      return document.getName();
+  public static String docIdentifier(Document document, String feature, int 
index) {
+    String identifier = null;
+    if ( (feature != null) && (! feature.isEmpty() ) &&      
+            document.getFeatures().containsKey(feature) ) { 
+      Object value = document.getFeatures().get(feature);
+      if (value != null) {
+        identifier = value.toString();
+      }
     }
+
+    if (identifier == null) { 
+      URL url = document.getSourceUrl();
+      if (url != null) {
+        identifier = url.toString();
+      }
+    }
     
-    //implied else
-    return url.toString();
+    if (identifier == null) {
+      identifier = document.getName();
+    }
+
+    return String.format("%s [%d]", identifier, index);
   }
   
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to