Revision: 17459
          http://sourceforge.net/p/gate/code/17459
Author:   adamfunk
Date:     2014-02-26 21:09:57 +0000 (Wed, 26 Feb 2014)
Log Message:
-----------
Alas, what to do with the raw scores?

Modified Paths:
--------------
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
    
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
  2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractBank.java
  2014-02-26 21:09:57 UTC (rev 17459)
@@ -40,7 +40,7 @@
     return this.documentCount;
   }
   
-  public abstract void saveAsCsv(double threshold, File file)
+  public abstract void saveAsCsv(Number threshold, File file)
     throws GateException;
 
   public abstract void saveAsCsv(File file)

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
      2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
      2014-02-26 21:09:57 UTC (rev 17459)
@@ -181,7 +181,7 @@
   
   /* Methods for saving as CSV */
   
-  public void saveAsCsv(double threshold, File outputFile) throws 
GateException {
+  public void saveAsCsv(Number threshold, File outputFile) throws 
GateException {
     PairCsvGenerator generator = new PairCsvGenerator();
     generator.generateAndSaveCsv(this, threshold, outputFile);
   }

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
      2014-02-26 21:09:57 UTC (rev 17459)
@@ -77,16 +77,21 @@
     return this.scoreTypes;
   }
   
-  // TODO : make this abstract and implement it
-  // everywhere as part of the overhaul
+
   public Number getScore(ScoreType type, Term term) {
-    return 0.0;
+    Map<Term, Number> mainScores = this.getScores();
+    if (mainScores.containsKey(term)) {
+      return mainScores.get(term);
+    }
+    
+    // implied else
+    return 0;
   }
 
-  // TODO : make this abstract and implement it
-  // everywhere as part of the overhaul
+
   public Collection<Term> getTerms() {
-    return new HashSet<Term>();
+    Map<Term, Number> mainScores = this.getScores();
+    return mainScores.keySet();
   }
   
   
@@ -94,11 +99,12 @@
     return this.scoreTypes.get(0);
   }
   
+  
   protected abstract void initializeScoreTypes();
   
   
   public List<Term> getTermsByDescendingScore() {
-    // lazy calculation
+    // lazy computation
     if (! descendingScoresDone) {
       termsByDescendingScore = new ArrayList<Term>(this.getTerms());
       Collections.sort(termsByDescendingScore, new 
TermComparatorByDescendingScore(scores.get(this.getDefaultScoreType())));
@@ -108,10 +114,12 @@
   }
   
 
+  @Deprecated
   public Map<Term, Double> getTermScores() {
     return this.termScores;
   }
   
+  
   public Map<Term, Set<String>> getTermDocuments() {
     return this.termDocuments;
   }
@@ -126,20 +134,28 @@
     return new HashSet<String>();
   }
   
-  
+  @Deprecated
   public Map<Term, Integer> getTermFrequencies() {
     return this.termFrequencies;
   }
   
+  @Deprecated
   public Map<Term, Integer> getDocFrequencies() {
     return this.docFrequencies;
   }
   
+  @Deprecated
   public String getFreqProperty() {
     return freqProperty;
   }
+  
+  
+  public Map<Term, Number> getScores() {
+    return this.scores.get(this.getDefaultScoreType());
+  }
 
-  public Double getMinScore() {
+  
+  public Number getMinScore() {
     if (this.termScores.isEmpty()) {
       return 1.0;
     }
@@ -147,7 +163,7 @@
     return Collections.min(this.termScores.values());
   }
   
-  public Double getMaxScore() {
+  public Number getMaxScore() {
     if (this.termScores.isEmpty()) {
       return 1.0;
     }
@@ -215,14 +231,14 @@
   
   
   
-  
+  @Deprecated
   protected int incrementTermFreq(Term term, int increment) {
     return Utilities.incrementMap(termFrequencies, term, increment);
   }
   
   
   
-  
+  // TODO: change to use getMainScores() 
   public Double getScore(Term term) {
     if (termScores.containsKey(term)) {
       return termScores.get(term).doubleValue();
@@ -233,6 +249,7 @@
   }
 
   
+  @Deprecated
   public Double getRawScore(Term term) {
     if (rawTermScores.containsKey(term)) {
       return rawTermScores.get(term).doubleValue();
@@ -246,7 +263,7 @@
   
   /* Methods for saving as CSV */
   
-  public void saveAsCsv(double threshold, File outputFile) throws 
GateException {
+  public void saveAsCsv(Number threshold, File outputFile) throws 
GateException {
     CsvGenerator.generateAndSaveCsv(this, threshold, outputFile);
   }
 

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
    2014-02-26 21:09:57 UTC (rev 17459)
@@ -47,7 +47,7 @@
       Term term = makeTerm(candidate, document);
       FeatureMap fm = candidate.getFeatures();
       if (fm.containsKey(inputScoreFeature)) {
-        incrementTermFreq(term, 1);
+        Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
         
         double score = ((Number) fm.get(inputScoreFeature)).doubleValue();
         Utilities.addToMapSet(termDocuments, term, documentSource);
@@ -72,6 +72,7 @@
     for (Term term : termIndividualScores.keySet()) {
       score = MergingMode.calculate(mergingMode, 
termIndividualScores.get(term));
       Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, score);
+      Utilities.setScoreTermValue(scores, localDocFrequencyST, term, 
termDocuments.size());
     }
     
     if (debugMode) {

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
 2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
 2014-02-26 21:09:57 UTC (rev 17459)
@@ -251,7 +251,7 @@
 
   
   @Override
-  public void saveAsCsv(double threshold, File file) throws GateException {
+  public void saveAsCsv(Number threshold, File file) throws GateException {
     CsvGenerator.generateAndSaveCsv(this, threshold, file);
   }
 

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
      2014-02-26 21:09:57 UTC (rev 17459)
@@ -16,7 +16,9 @@
 import gate.*;
 import gate.termraider.modes.*;
 import gate.termraider.util.*;
+
 import org.apache.commons.lang.StringEscapeUtils;
+
 import java.util.*;
 
 
@@ -38,6 +40,7 @@
   /* EXTRA DATA FOR ANALYSIS */
   private Map<Term, Set<String>> termHeads;
   private Map<Term, Set<String>> termHyponyms;
+  private ScoreType termFrequencyST, localDocFrequencyST;
 
   
   /* Methods for the debugging GUI to get the data   */
@@ -77,7 +80,7 @@
       
       Utilities.addToMapSet(termDocuments, term, documentSource);
       Utilities.addToMapSet(termHeads, term, head);
-      incrementTermFreq(term, 1);
+      Utilities.incrementScoreTermValue(scores, termFrequencyST, term, 1);
     }
   }
 
@@ -126,11 +129,9 @@
       rawTermScores.put(term, rawScore);
       double score = Normalization.normalizeScore(rawScore);
       termScores.put(term, score);
+      Utilities.setScoreTermValue(scores, localDocFrequencyST, term, 
this.termDocuments.size());
     }
     
-    termsByDescendingScore = new ArrayList<Term>(termScores.keySet());
-    Collections.sort(termsByDescendingScore, new 
TermComparatorByDescendingScore(termScores));
-    
     if (debugMode) {
       System.out.println("Termbank: nbr of terms = " + 
termsByDescendingScore.size());
     }
@@ -152,13 +153,10 @@
   protected void initializeScoreTypes() {
     this.scoreTypes = new ArrayList<ScoreType>();
     this.scoreTypes.add(new ScoreType(scoreProperty));
-    // TODO this TB needs a whole different kettle of fish
-    //this.termFrequencyST = new ScoreType("termFrequency");
-    //this.scoreTypes.add(termFrequencyST);
-    //this.localDocFrequencyST = new ScoreType("localDocFrequency");
-    //this.scoreTypes.add(localDocFrequencyST);
-    //this.refDocFrequencyST = new ScoreType("refDocFrequency");
-    //this.scoreTypes.add(refDocFrequencyST);
+    this.termFrequencyST = new ScoreType("termFrequency");
+    this.scoreTypes.add(termFrequencyST);
+    this.localDocFrequencyST = new ScoreType("localDocFrequency");
+    this.scoreTypes.add(localDocFrequencyST);
   }
 
   

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
        2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
        2014-02-26 21:09:57 UTC (rev 17459)
@@ -24,7 +24,6 @@
   public static void generateAndSaveCsv(AbstractTermbank bank, 
           Number threshold, File outputFile) throws GateException {
     PrintWriter writer = initializeWriter(outputFile);
-    Map<Term, Double> termScores = bank.getTermScores();
     addComment(bank, "threshold = " + threshold);
     List<Term> sortedTerms = bank.getTermsByDescendingScore();
     
@@ -33,7 +32,7 @@
     writer.println(bank.getCsvHeader());
     
     for (Term term : sortedTerms) {
-      Double score = termScores.get(term);
+      Double score = bank.getScore(term);
       if (score >= threshold.doubleValue()) {
         writer.println(bank.getCsvLine(term));
         written++;

Modified: 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
===================================================================
--- 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
    2014-02-26 20:51:10 UTC (rev 17458)
+++ 
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/output/PairCsvGenerator.java
    2014-02-26 21:09:57 UTC (rev 17459)
@@ -26,7 +26,7 @@
   private String scorePropertyName;
   
   public void generateAndSaveCsv(AbstractPairbank pairbank, 
-          double threshold, File outputFile) throws GateException {
+          Number threshold, File outputFile) throws GateException {
     this.pairbank = pairbank;
     this.debugMode = pairbank.getDebugMode();
     this.scorePropertyName = pairbank.getScoreProperty();
@@ -41,7 +41,7 @@
   }
   
   
-  private void generateCsv(PrintWriter writer, double threshold) {
+  private void generateCsv(PrintWriter writer, Number threshold) {
     Map<UnorderedTermPair, Double> scores = pairbank.getScores();
     List<UnorderedTermPair> pairs = new 
ArrayList<UnorderedTermPair>(scores.keySet());
     Collections.sort(pairs, new TermPairComparatorByDescendingScore(scores));
@@ -51,7 +51,7 @@
     writeHeader(writer);    
     for (UnorderedTermPair pair: pairs) {
       double score = scores.get(pair);
-      if (score < threshold) {
+      if (score < threshold.doubleValue()) {
         break;
       }
       

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to