Revision: 17403
http://sourceforge.net/p/gate/code/17403
Author: adamfunk
Date: 2014-02-23 21:09:42 +0000 (Sun, 23 Feb 2014)
Log Message:
-----------
DF viewer works; CSV works...
Modified Paths:
--------------
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2014-02-22 14:44:43 UTC (rev 17402)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractPairbank.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -31,7 +31,6 @@
private static final long serialVersionUID = 424942970862740181L;
// CREOLE init parameters
- protected boolean debugMode;
protected String inputASName;
protected transient List<Action> actionsList;
@@ -223,18 +222,7 @@
public String getInputASName() {
return this.inputASName;
}
-
-
- @CreoleParameter(comment = "print debugging information during
initialization",
- defaultValue = "false")
- public void setDebugMode(Boolean debug) {
- this.debugMode = debug;
- }
- public Boolean getDebugMode() {
- return this.debugMode;
- }
-
}
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-22 14:44:43 UTC (rev 17402)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008--2013, The University of Sheffield. See the file
+ * Copyright (c) 2008--2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -31,7 +31,6 @@
private static final long serialVersionUID = -2809051430169834059L;
// CREOLE init parameters
- protected boolean debugMode;
protected String inputASName;
protected Set<String> inputAnnotationTypes;
@@ -226,8 +225,7 @@
/* Methods for saving as CSV */
public void saveAsCsv(double threshold, File outputFile) throws
GateException {
- CsvGenerator generator = new CsvGenerator();
- generator.generateAndSaveCsv(this, threshold, outputFile);
+ CsvGenerator.generateAndSaveCsv(this, threshold, outputFile);
}
/**
@@ -236,9 +234,7 @@
* @throws GateException
*/
public void saveAsCsv(File outputFile) throws GateException {
- double threshold = this.getMinScore();
- CsvGenerator generator = new CsvGenerator();
- generator.generateAndSaveCsv(this, threshold, outputFile);
+ saveAsCsv(this.getMinScore(), outputFile);
}
@@ -293,16 +289,4 @@
return this.inputAnnotationTypes;
}
-
- @CreoleParameter(comment = "print debugging information during
initialization",
- defaultValue = "false")
- public void setDebugMode(Boolean debug) {
- this.debugMode = debug;
- }
-
- public Boolean getDebugMode() {
- return this.debugMode;
- }
-
-
}
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-22 14:44:43 UTC (rev 17402)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -13,7 +13,9 @@
import java.io.File;
import java.util.*;
+
import javax.swing.Action;
+
import gate.Annotation;
import gate.AnnotationSet;
import gate.Corpus;
@@ -25,6 +27,7 @@
import gate.creole.metadata.CreoleResource;
import gate.gui.ActionsPublisher;
import gate.termraider.gui.ActionSaveCsv;
+import gate.termraider.output.CsvGenerator;
import gate.termraider.util.*;
import gate.util.GateException;
@@ -230,11 +233,19 @@
public int getMaxFrequency() {
return this.maxFrequency;
}
+
+
+ public List<Term> getTermsByDescendingFreq() {
+ List<Term> terms = new ArrayList<Term>(this.getTerms());
+ Comparator<Term> comparator = new
TermComparatorByDescendingScore(documentFrequencies);
+ Collections.sort(terms, comparator);
+ return terms;
+ }
+
@Override
public void saveAsCsv(double threshold, File file) throws GateException {
- System.out.println("CSV output has not yet been implemented.");
- // TODO Auto-generated method stub
+ CsvGenerator.generateAndSaveCsv(this, threshold, file);
}
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
===================================================================
---
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
2014-02-22 14:44:43 UTC (rev 17402)
+++
gate/trunk/plugins/TermRaider/src/gate/termraider/gui/DocumentFrequencyViewer.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008--2012, The University of Sheffield. See the file
+ * Copyright (c) 2008--2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -19,8 +19,10 @@
import gate.event.ProgressListener;
import gate.termraider.bank.*;
import gate.termraider.util.*;
+
import java.awt.BorderLayout;
import java.util.*;
+
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JSplitPane;
@@ -49,6 +51,7 @@
private ListTableModel typeTableModel, langTableModel;
private JTextField docsField;
+
@Override
public Resource init() {
initGuiComponents();
@@ -74,29 +77,23 @@
dfTab.add(freqScrollPane, BorderLayout.CENTER);
JSplitPane listsTab = new JSplitPane(JSplitPane.HORIZONTAL_SPLIT);
- typeTableModel = new ListTableModel("Term annotation types");
+ typeTableModel = new ListTableModel("Annotation types indexed");
typeTable = new JTable(typeTableModel);
- langTableModel = new ListTableModel("Language codes");
+ typeTable.setAutoCreateRowSorter(true);
+ JScrollPane typeScrollPane = new JScrollPane(typeTable,
+ JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,
+ JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
+
+ langTableModel = new ListTableModel("Language codes indexed");
langTable = new JTable(langTableModel);
- listsTab.setLeftComponent(typeTable);
- listsTab.setRightComponent(langTable);
- tabbedPane.addTab("Types and languages", listsTab);
-
- // TODO
- // wrap each table in a pane with optional scrolling
- /*
- termTable.setAutoCreateRowSorter(true);
- pairTable.setAutoCreateRowSorter(true);
- termPane = new JScrollPane(termTable,
- JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,
- JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
- pairPane = new JScrollPane(pairTable,
+ langTable.setAutoCreateRowSorter(true);
+ JScrollPane langScrollPane = new JScrollPane(langTable,
JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,
JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
- splitPane.setLeftComponent(termPane);
- splitPane.setRightComponent(pairPane);
- */
+ listsTab.setLeftComponent(typeScrollPane);
+ listsTab.setRightComponent(langScrollPane);
+ tabbedPane.addTab("Types and languages", listsTab);
this.add(tabbedPane, BorderLayout.CENTER);
tabbedPane.validate();
@@ -189,6 +186,7 @@
public ListTableModel(String heading) {
this.heading = heading;
+ this.strings = new ArrayList<String>();
}
public void setList(Collection<String> strings) {
Modified: gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
2014-02-22 14:44:43 UTC (rev 17402)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/gui/SliderPanel.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -28,7 +28,9 @@
-
+ // TODO
+ // Add another constructor for DocumentFrequencyBank, with more
+ // suitable slider range calculations
public SliderPanel(AbstractBank scoredbank, String verb, boolean startLeft,
TermbankViewer viewer) {
this.scoredbank = scoredbank;
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
2014-02-22 14:44:43 UTC (rev 17402)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/output/CsvGenerator.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010--2012, The University of Sheffield. See the file
+ * Copyright (c) 2010--2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -12,71 +12,99 @@
package gate.termraider.output;
import gate.util.GateException;
+
import java.io.*;
import java.util.*;
+
import org.apache.commons.lang.*;
+
import gate.termraider.bank.*;
import gate.termraider.util.*;
public class CsvGenerator {
- private AbstractTermbank termbank;
- private boolean debugMode;
- private String scorePropertyName;
-
- public void generateAndSaveCsv(AbstractTermbank termbank,
- double threshold, File outputFile) throws GateException {
- this.termbank = termbank;
- this.debugMode = termbank.getDebugMode();
- this.scorePropertyName = termbank.getScoreProperty();
+ public static void generateAndSaveCsv(AbstractBank bank,
+ Number threshold, File outputFile) throws GateException {
PrintWriter writer = initializeWriter(outputFile);
- generateCsv(writer, threshold);
+
+ if (bank instanceof AbstractTermbank) {
+ String scorePropertyName = bank.getScoreProperty();
+ generateTermbankCsv((AbstractTermbank) bank, writer,
threshold.doubleValue(), scorePropertyName);
+ }
+ else if (bank instanceof DocumentFrequencyBank) {
+ generateDFCsv((DocumentFrequencyBank) bank, writer,
threshold.intValue());
+ }
+
writer.flush();
writer.close();
- if (debugMode) {
- System.out.println("Termbank: saved CSV in " +
outputFile.getAbsolutePath());
+ if (bank.getDebugMode()) {
+ System.out.println("Saved CSV to " + outputFile.getAbsolutePath() +
+ " from " + bank.getName() + " (" + bank.getClass().getName() +
")");
}
-
}
-
- private void generateCsv(PrintWriter writer, double threshold) {
- Map<Term, Double> termScores = termbank.getTermScores();
- Map<Term, Set<String>> termDocuments = termbank.getTermDocuments();
+ private static void generateTermbankCsv(AbstractTermbank bank, PrintWriter
writer,
+ double threshold, String scorePropertyName) {
+ Map<Term, Double> termScores = bank.getTermScores();
+ Map<Term, Set<String>> termDocuments = bank.getTermDocuments();
Map<Term, Integer> termFrequencies = null;
- termFrequencies = termbank.getTermFrequencies();
- addComment("threshold = " + threshold);
- List<Term> sortedTerms = termbank.getTermsByDescendingScore();
+ termFrequencies = bank.getTermFrequencies();
+ addComment(bank, "threshold = " + threshold);
+ List<Term> sortedTerms = bank.getTermsByDescendingScore();
- addComment("Unfiltered nbr of terms = " + sortedTerms.size());
+ addComment(bank, "Unfiltered nbr of terms = " + sortedTerms.size());
int written = 0;
- writeHeader(writer);
+ writeTermbankHeader(writer);
for (Term term : sortedTerms) {
Double score = termScores.get(term);
if (score >= threshold) {
Set<String> documents = termDocuments.get(term);
Integer frequency = termFrequencies.get(term);
- writeContent(writer, term, score, documents, frequency);
+ writeTermBankContent(writer, term, score, documents, frequency,
scorePropertyName);
written++;
}
else { // the rest must be lower
break;
}
}
- addComment("Filtered nbr of terms = " + written);
+ addComment(bank, "Filtered nbr of terms = " + written);
}
+
+ private static void generateDFCsv(DocumentFrequencyBank bank, PrintWriter
writer, int threshold) {
+ Map<Term, Integer> frequencies = bank.getDocFrequencies();
+ addComment(bank, "threshold = " + threshold);
+ List<Term> sortedTerms = bank.getTermsByDescendingFreq();
+
+ addComment(bank, "Unfiltered nbr of terms = " + sortedTerms.size());
+ int written = 0;
+ writeDFHeader(writer);
+ writeDFContent(writer, "_TOTAL_DOCS_", bank.getTotalDocs());
+
+ for (Term term : sortedTerms) {
+ Integer freq = frequencies.get(term);
+ if (freq >= threshold) {
+ writeDFContent(writer, term, freq);
+ written++;
+ }
+ else { // the rest must be lower
+ break;
+ }
+ }
+ addComment(bank, "Filtered nbr of terms = " + written);
+ }
+
- private void addComment(String commentStr) {
- if (debugMode) {
- System.err.println(commentStr);
+ private static void addComment(AbstractBank termbank, String commentStr) {
+ if (termbank.getDebugMode()) {
+ System.out.println(commentStr);
}
}
- private PrintWriter initializeWriter(File outputFile) throws GateException {
+ private static PrintWriter initializeWriter(File outputFile) throws
GateException {
try {
return new PrintWriter(outputFile);
}
@@ -86,8 +114,8 @@
}
-
- private void writeContent(PrintWriter writer, Term term, Double score,
Set<String> documents, Integer frequency) {
+ private static void writeTermBankContent(PrintWriter writer, Term term,
Double score,
+ Set<String> documents, Integer frequency, String scorePropertyName) {
StringBuilder sb = new StringBuilder();
sb.append(StringEscapeUtils.escapeCsv(term.getTermString()));
sb.append(',');
@@ -95,16 +123,18 @@
sb.append(',');
sb.append(StringEscapeUtils.escapeCsv(term.getType()));
sb.append(',');
- sb.append(StringEscapeUtils.escapeCsv(this.scorePropertyName));
+ sb.append(StringEscapeUtils.escapeCsv(scorePropertyName));
sb.append(',');
sb.append(StringEscapeUtils.escapeCsv(score.toString()));
sb.append(',');
sb.append(StringEscapeUtils.escapeCsv(Integer.toString(documents.size())));
- sb.append(',').append(StringEscapeUtils.escapeCsv(frequency.toString()));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(frequency.toString()));
writer.println(sb.toString());
}
- private void writeHeader(PrintWriter writer) {
+
+ private static void writeTermbankHeader(PrintWriter writer) {
StringBuilder sb = new StringBuilder();
sb.append(StringEscapeUtils.escapeCsv("Term"));
sb.append(',').append(StringEscapeUtils.escapeCsv("Lang"));
@@ -115,6 +145,42 @@
sb.append(',').append(StringEscapeUtils.escapeCsv("Term_Frequency"));
writer.println(sb.toString());
}
+
+
+ private static void writeDFContent(PrintWriter writer, Term term, Integer
frequency) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(StringEscapeUtils.escapeCsv(term.getTermString()));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(term.getLanguageCode()));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(term.getType()));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(frequency.toString()));
+ writer.println(sb.toString());
+ }
+
+
+ private static void writeDFContent(PrintWriter writer, String string,
Integer frequency) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(StringEscapeUtils.escapeCsv(string));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(""));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(""));
+ sb.append(',');
+ sb.append(StringEscapeUtils.escapeCsv(frequency.toString()));
+ writer.println(sb.toString());
+ }
+
+
+ private static void writeDFHeader(PrintWriter writer) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(StringEscapeUtils.escapeCsv("Term"));
+ sb.append(',').append(StringEscapeUtils.escapeCsv("Lang"));
+ sb.append(',').append(StringEscapeUtils.escapeCsv("Type"));
+ sb.append(',').append(StringEscapeUtils.escapeCsv("DocFrequency"));
+ writer.println(sb.toString());
+ }
}
Modified:
gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
===================================================================
--- gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
2014-02-22 14:44:43 UTC (rev 17402)
+++ gate/trunk/plugins/TermRaider/src/gate/termraider/util/AbstractBank.java
2014-02-23 21:09:42 UTC (rev 17403)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010--2012, The University of Sheffield. See the file
+ * Copyright (c) 2010--2014, The University of Sheffield. See the file
* COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
@@ -75,6 +75,7 @@
protected String languageFeature;
protected String inputAnnotationFeature;
protected Set<Corpus> corpora;
+ protected boolean debugMode;
@@ -118,5 +119,15 @@
public Set<Corpus> getCorpora() {
return this.corpora;
}
+
+ @CreoleParameter(comment = "print debugging information during
initialization",
+ defaultValue = "false")
+ public void setDebugMode(Boolean debug) {
+ this.debugMode = debug;
+ }
+
+ public Boolean getDebugMode() {
+ return this.debugMode;
+ }
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs