Revision: 17461
          http://sourceforge.net/p/gate/code/17461
Author:   adamfunk
Date:     2014-02-26 21:54:45 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
Halfway to slaying the old beast.

Modified Paths:
--------------
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 21:29:40 UTC (rev 17460)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 21:54:45 UTC (rev 17461)
@@ -42,13 +42,13 @@
   
   protected Map<ScoreType, Map<Term, Number>> scores;
   protected Map<Term, Set<String>>  termDocuments;
+  public static final String RAW_SUFFIX = ".raw";
   
   protected Map<Term, Double>       termScores;
-  protected Map<Term, Double>       rawTermScores;
   protected List<Term>              termsByDescendingScore;
   protected Map<Term, Integer>      termFrequencies, docFrequencies;
   protected boolean                 descendingScoresDone = false;
-
+  // TODO delete when FrequencyTableModel is superseded
   public static final String freqProperty = "frequency";
 
   protected List<ScoreType> scoreTypes;
@@ -136,16 +136,19 @@
     return new HashSet<String>();
   }
   
+  // TODO delete when FrequencyTableModel is superseded
   @Deprecated
   public Map<Term, Integer> getTermFrequencies() {
     return this.termFrequencies;
   }
   
+  // TODO delete when FrequencyTableModel is superseded
   @Deprecated
   public Map<Term, Integer> getDocFrequencies() {
     return this.docFrequencies;
   }
   
+  // TODO delete when FrequencyTableModel is superseded
   @Deprecated
   public String getFreqProperty() {
     return freqProperty;
@@ -251,18 +254,6 @@
   }
 
   
-  @Deprecated
-  public Double getRawScore(Term term) {
-    if (rawTermScores.containsKey(term)) {
-      return rawTermScores.get(term).doubleValue();
-    }
-    
-    // error code
-    return null;
-  }
-
-  
-  
   /* Methods for saving as CSV */
   
   public void saveAsCsv(Number threshold, File outputFile) throws 
GateException {

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    2014-02-26 21:29:40 UTC (rev 17460)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    2014-02-26 21:54:45 UTC (rev 17461)
@@ -16,13 +16,9 @@
 import gate.*;
 import gate.termraider.util.*;
 import gate.termraider.modes.*;
-
 import java.util.*;
 
-import org.apache.commons.lang.StringEscapeUtils;
 
-
-
 @CreoleResource(name = "AnnotationTermbank",
     icon = "termbank-lr.png",
     comment = "TermRaider Termbank derived from document annotations")

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 21:29:40 UTC (rev 17460)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 21:54:45 UTC (rev 17461)
@@ -16,9 +16,6 @@
 import gate.*;
 import gate.termraider.modes.*;
 import gate.termraider.util.*;
-
-import org.apache.commons.lang.StringEscapeUtils;
-
 import java.util.*;
 
 
@@ -40,7 +37,7 @@
   /* EXTRA DATA FOR ANALYSIS */
   private Map<Term, Set<String>> termHeads;
   private Map<Term, Set<String>> termHyponyms;
-  private ScoreType termFrequencyST, localDocFrequencyST;
+  private ScoreType termFrequencyST, localDocFrequencyST, rawScoreST;
 
   
   /* Methods for the debugging GUI to get the data   */
@@ -126,9 +123,8 @@
     
     for (Term term : terms) {
       double rawScore = calculateOneRawScore(term);
-      rawTermScores.put(term, rawScore);
       double score = Normalization.normalizeScore(rawScore);
-      termScores.put(term, score);
+      Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
       Utilities.setScoreTermValue(scores, localDocFrequencyST, term, 
this.termDocuments.size());
     }
     
@@ -143,7 +139,6 @@
     termHyponyms    = new HashMap<Term, Set<String>>();
     termDocuments   = new HashMap<Term, Set<String>>();
     termScores      = new HashMap<Term, Double>();
-    rawTermScores   = new HashMap<Term, Double>();
     termsByDescendingScore     = new ArrayList<Term>();
     termFrequencies = new HashMap<Term, Integer>();
     docFrequencies = new HashMap<Term, Integer>();
@@ -153,6 +148,7 @@
   protected void initializeScoreTypes() {
     this.scoreTypes = new ArrayList<ScoreType>();
     this.scoreTypes.add(new ScoreType(scoreProperty));
+    
     this.termFrequencyST = new ScoreType("termFrequency");
     this.scoreTypes.add(termFrequencyST);
     this.localDocFrequencyST = new ScoreType("localDocFrequency");

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
 2014-02-26 21:29:40 UTC (rev 17460)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
 2014-02-26 21:54:45 UTC (rev 17461)
@@ -17,13 +17,10 @@
 import gate.*;
 import gate.termraider.modes.*;
 import gate.termraider.util.*;
-
 import java.util.*;
 
-import org.apache.commons.lang.StringEscapeUtils;
 
 
-
 @CreoleResource(name = "TfIdfTermbank",
         icon = "termbank-lr.png",
         comment = "TermRaider Termbank derived from vectors in document 
features")
@@ -39,7 +36,7 @@
   private DocumentFrequencyBank docFreqSource;
   
   /* EXTRA DATA */
-  private ScoreType termFrequencyST, localDocFrequencyST, refDocFrequencyST;
+  private ScoreType rawScoreST, termFrequencyST, localDocFrequencyST, 
refDocFrequencyST;
   
   
   protected void processDocument(Document document) {
@@ -49,7 +46,7 @@
 
     for (Annotation candidate : candidates) {
       Term term = makeTerm(candidate, document);
-      incrementTermFreq(term, 1);
+      Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
       
       Utilities.addToMapSet(termDocuments, term, documentSource);
     }
@@ -59,6 +56,8 @@
   protected void initializeScoreTypes() {
     this.scoreTypes = new ArrayList<ScoreType>();
     this.scoreTypes.add(new ScoreType(scoreProperty));
+    this.rawScoreST = new ScoreType(scoreProperty + 
AbstractTermbank.RAW_SUFFIX);
+    this.scoreTypes.add(rawScoreST);
     this.termFrequencyST = new ScoreType("termFrequency");
     this.scoreTypes.add(termFrequencyST);
     this.localDocFrequencyST = new ScoreType("localDocFrequency");
@@ -79,15 +78,13 @@
       int df = getRefDocFrequency(term);
       int n = docFreqSource.getDocumentCount();
       double score = TfCalculation.calculate(tfCalculation, tf) * 
IdfCalculation.calculate(idfCalculation, df, n);
-      rawTermScores.put(term, Double.valueOf(score));
-      termScores.put(term, Normalization.normalizeScore(score));
+      Utilities.setScoreTermValue(scores, rawScoreST, term, score);
+      double normalized = Normalization.normalizeScore(score);
+      Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, 
normalized);
     }
-    
-    termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
-    Collections.sort(termsByDescendingScore, new 
TermComparatorByDescendingScore(termScores));
-    
+
     if (debugMode) {
-      System.out.println("Termbank: nbr of terms = " + 
termsByDescendingScore.size());
+      System.out.println("Termbank: nbr of terms = " + this.getTerms().size());
     }
   }
   
@@ -95,8 +92,6 @@
   protected void resetScores() {
     termDocuments    = new HashMap<Term, Set<String>>();
     termScores       = new HashMap<Term, Double>();
-    rawTermScores    = new HashMap<Term, Double>();
-    termsByDescendingScore      = new ArrayList<Term>();
     termFrequencies = new HashMap<Term, Integer>();
     docFrequencies = new HashMap<Term, Integer>();
     documentCount = 0;

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
       2014-02-26 21:29:40 UTC (rev 17460)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/gui/HyponymyDebugger.java
       2014-02-26 21:54:45 UTC (rev 17461)
@@ -80,7 +80,7 @@
 
 class HDTableModel extends AbstractTableModel {
   private static final long serialVersionUID = -1124137938074923640L;
-  private String[] columnNames = {"term", "raw score", "docs", "docs", 
"hyponyms", "hyponyms", "heads"};
+  private String[] columnNames = {"term", "score", "docs", "docs", "hyponyms", 
"hyponyms", "heads"};
   private Map<Term, Set<String>> termDocuments, termHyponyms, termHeads;
   private List<Term> terms;
   private HyponymyTermbank termbank;
@@ -126,7 +126,7 @@
         result = term.toString();
         break;
       case 1:
-        result = Double.toString(termbank.getRawScore(term));
+        result = Double.toString(termbank.getScore(term));
         break;
       case 2: 
         result = Integer.toString(termDocuments.get(term).size());

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
        2014-02-26 21:29:40 UTC (rev 17460)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/modes/Normalization.java
        2014-02-26 21:54:45 UTC (rev 17461)
@@ -29,8 +29,8 @@
   }
   
   
-  // TODO: make the following private and add normalization
-  // options to the termbanks (except DFB)
+  // TODO: make the following private, remove deprecation,
+  // and add normalization options to the termbanks (except DFB)
   
   /**
    * The following produces the right half of a sigmoid 
@@ -39,6 +39,7 @@
    * @param score from 0 to inf 
    * @return score from 0 to 100
    */
+  @Deprecated
   public static double normalizeScore(double score) {
     double norm = 2.0 / (1.0 + Math.exp(-score / xScale)) - 1.0;
     return (double) (100.0F * norm);

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to