Revision: 17479
http://sourceforge.net/p/gate/code/17479
Author: adamfunk
Date: 2014-02-27 16:26:19 +0000 (Thu, 27 Feb 2014)
Log Message:
-----------
Wow, it's all working again.
Modified Paths:
--------------
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/apply/TermScoreCopier.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/apply/TermScoreCopier.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/apply/TermScoreCopier.java
2014-02-27 15:49:04 UTC (rev 17478)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/apply/TermScoreCopier.java
2014-02-27 16:26:19 UTC (rev 17479)
@@ -78,7 +78,7 @@
// You can't put too many features in a nuclear reactor.
Map<ScoreType, Number> scoreMap = termbank.getScoreMap(term);
for (ScoreType st : scoreMap.keySet()) {
- fm.put(st.toNormalizedString(), scoreMap.get(st));
+ fm.put(st.toString(), scoreMap.get(st));
}
checkInterruption();
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-27 15:49:04 UTC (rev 17478)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AbstractTermbank.java
2014-02-27 16:26:19 UTC (rev 17479)
@@ -58,7 +58,6 @@
resetScores();
processCorpora();
calculateScores();
- scanTypesLanguages();
return this;
}
@@ -186,6 +185,9 @@
if ( (corpora == null) || (corpora.size() == 0) ) {
throw new ResourceInstantiationException("No corpora given");
}
+
+ this.types = new TreeSet<String>();
+ this.languages = new TreeSet<String>();
}
protected void createActions() {
@@ -220,15 +222,6 @@
}
- protected void scanTypesLanguages() {
- this.types = new TreeSet<String>();
- this.languages = new TreeSet<String>();
- for (Term term : this.getDefaultScores().keySet()) {
- this.languages.add(term.getLanguageCode());
- this.types.add(term.getType());
- }
- }
-
/* BEHOLD THE GUBBINS to distinguish the various types of Termbanks */
@@ -236,6 +229,9 @@
protected abstract void processDocument(Document document);
+ /**
+ * This also needs to fill types and languages
+ */
protected abstract void calculateScores();
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-27 15:49:04 UTC (rev 17478)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/AnnotationTermbank.java
2014-02-27 16:26:19 UTC (rev 17479)
@@ -63,10 +63,11 @@
public void calculateScores() {
- Double score;
-
- for (Term term : termIndividualScores.keySet()) {
- score = MergingMode.calculate(mergingMode,
termIndividualScores.get(term));
+ for (Term term : termDocuments.keySet()) {
+ languages.add(term.getLanguageCode());
+ types.add(term.getType());
+
+ Double score = MergingMode.calculate(mergingMode,
termIndividualScores.get(term));
Utilities.setScoreTermValue(scores, getDefaultScoreType(), term, score);
int localDF = termDocuments.get(term).size();
Utilities.setScoreTermValue(scores, localDocFrequencyST, term, localDF);
@@ -80,11 +81,13 @@
protected void resetScores() {
scores = new HashMap<ScoreType, Map<Term,Number>>();
- for (ScoreType type : scoreTypes) {
- scores.put(type, new HashMap<Term, Number>());
+ for (ScoreType st : scoreTypes) {
+ scores.put(st, new HashMap<Term, Number>());
}
termIndividualScores = new HashMap<Term, List<Double>>();
termDocuments = new HashMap<Term, Set<String>>();
+ languages = new HashSet<String>();
+ types = new HashSet<String>();
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-27 15:49:04 UTC (rev 17478)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/DocumentFrequencyBank.java
2014-02-27 16:26:19 UTC (rev 17479)
@@ -18,7 +18,6 @@
import gate.AnnotationSet;
import gate.Corpus;
import gate.Document;
-import gate.Factory;
import gate.Resource;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
@@ -52,7 +51,6 @@
processInputBanks();
processCorpora();
calculateScores();
- scanTypesLanguages();
return this;
}
@@ -75,9 +73,10 @@
protected void resetScores() {
scores = new HashMap<ScoreType, Map<Term,Number>>();
- for (ScoreType type : scoreTypes) {
- scores.put(type, new HashMap<Term, Number>());
+ for (ScoreType st : scoreTypes) {
+ scores.put(st, new HashMap<Term, Number>());
}
+
documentCount = 0;
languages = new HashSet<String>();
types = new HashSet<String>();
@@ -112,20 +111,6 @@
}
- protected void processCorpus(Corpus corpus) {
- for (int i=0 ; i < corpus.size() ; i++) {
- boolean wasLoaded = corpus.isDocumentLoaded(i);
- Document document = (Document) corpus.get(i);
- processDocument(document);
- // datastore safety
- if (! wasLoaded) {
- corpus.unloadDocument(document);
- Factory.deleteResource(document);
- }
- }
- }
-
-
protected void processDocument(Document document) {
documentCount++;
String documentSource = Utilities.sourceOrName(document);
@@ -143,7 +128,7 @@
protected void calculateScores() {
- for (Term term : this.getTerms()) {
+ for (Term term : termDocuments.keySet()) {
this.types.add(term.getType());
this.languages.add(term.getLanguageCode());
int df = termDocuments.get(term).size();
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-27 15:49:04 UTC (rev 17478)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/HyponymyTermbank.java
2014-02-27 16:26:19 UTC (rev 17479)
@@ -110,6 +110,9 @@
}
for (Term term : terms) {
+ this.languages.add(term.getLanguageCode());
+ this.types.add(term.getType());
+
double rawScore = calculateOneRawScore(term);
double normalized = Normalization.calculate(normalization, rawScore);
Utilities.setScoreTermValue(scores, rawScoreST, term, rawScore);
@@ -125,12 +128,15 @@
protected void resetScores() {
+ scores = new HashMap<ScoreType, Map<Term,Number>>();
+ for (ScoreType st : scoreTypes) {
+ scores.put(st, new HashMap<Term, Number>());
+ }
termHeads = new HashMap<Term, Set<String>>();
termHyponyms = new HashMap<Term, Set<String>>();
termDocuments = new HashMap<Term, Set<String>>();
- scores = new HashMap<ScoreType, Map<Term,Number>>();
- termFrequencies = new HashMap<Term, Integer>();
- docFrequencies = new HashMap<Term, Integer>();
+ languages = new HashSet<String>();
+ types = new HashSet<String>();
}
Modified:
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
===================================================================
---
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2014-02-27 15:49:04 UTC (rev 17478)
+++
gate/branches/termraider-refactoring/plugins/TermRaider/src/gate/termraider/bank/TfIdfTermbank.java
2014-02-27 16:26:19 UTC (rev 17479)
@@ -69,6 +69,9 @@
protected void calculateScores() {
for (Term term : scores.get(termFrequencyST).keySet()) {
+ this.languages.add(term.getLanguageCode());
+ this.types.add(term.getType());
+
int tf = scores.get(termFrequencyST).get(term).intValue();
int df = docFreqSource.getFrequencyLax(term);
Utilities.setScoreTermValue(scores, refDocFrequencyST, term, df);
@@ -88,9 +91,14 @@
protected void resetScores() {
- termDocuments = new HashMap<Term, Set<String>>();
+ termDocuments = new HashMap<Term, Set<String>>();
documentCount = 0;
scores = new HashMap<ScoreType, Map<Term,Number>>();
+ for (ScoreType st : scoreTypes) {
+ scores.put(st, new HashMap<Term, Number>());
+ }
+ types = new HashSet<String>();
+ languages = new HashSet<String>();
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Flow-based real-time traffic analytics software. Cisco certified tool.
Monitor traffic, SLAs, QoS, Medianet, WAAS etc. with NetFlow Analyzer
Customize your own dashboards, set traffic alerts and generate reports.
Network behavioral analysis & security monitoring. All-in-one tool.
http://pubads.g.doubleclick.net/gampad/clk?id=126839071&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs