Revision: 17402
http://sourceforge.net/p/gate/code/17402
Author: markagreenwood
Date: 2014-02-22 14:44:43 +0000 (Sat, 22 Feb 2014)
Log Message:
-----------
removed some deprecated stuff from the POSTagger and cleaned up the generics to
make the code easier to follow
Modified Paths:
--------------
gate/trunk/src/main/hepple/postag/InvalidRuleException.java
gate/trunk/src/main/hepple/postag/Lexicon.java
gate/trunk/src/main/hepple/postag/POSTagger.java
gate/trunk/src/main/hepple/postag/Rule.java
Modified: gate/trunk/src/main/hepple/postag/InvalidRuleException.java
===================================================================
--- gate/trunk/src/main/hepple/postag/InvalidRuleException.java 2014-02-22
14:06:13 UTC (rev 17401)
+++ gate/trunk/src/main/hepple/postag/InvalidRuleException.java 2014-02-22
14:44:43 UTC (rev 17402)
@@ -27,6 +27,8 @@
public class InvalidRuleException extends Exception {
+ private static final long serialVersionUID = -805406522295075612L;
+
public InvalidRuleException(){
}
Modified: gate/trunk/src/main/hepple/postag/Lexicon.java
===================================================================
--- gate/trunk/src/main/hepple/postag/Lexicon.java 2014-02-22 14:06:13 UTC
(rev 17401)
+++ gate/trunk/src/main/hepple/postag/Lexicon.java 2014-02-22 14:44:43 UTC
(rev 17402)
@@ -27,9 +27,14 @@
import gate.util.BomStrippingInputStreamReader;
-import java.util.*;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.StringTokenizer;
import org.apache.commons.io.IOUtils;
@@ -38,24 +43,11 @@
* ({@link java.lang.String}) to possible POS categories
* ({@link java.util.List}
*/
-class Lexicon extends HashMap {
+class Lexicon extends HashMap<String,List<String>> {
- private String encoding;
+ private static final long serialVersionUID = -2320126076517881896L;
-
/**
- * @deprecated The lexicon file is read at construction time, so setting the
- * encoding later will have no effect. Use the two argument constructor to
- * set the encoding.
- */
- public void setEncoding(String encoding) {
- throw new IllegalStateException("Cannot change encoding once POS tagger "
- + "has been constructed. Use the three "
- + "argument constructor to specify "
- + "encoding.");
- }
-
- /**
* Constructor.
* @param lexiconURL an URL for the file contianing the lexicon.
*/
@@ -65,11 +57,10 @@
/**
* Constructor.
- * @param lexiconURL an URL for the file contianing the lexicon.
+ * @param lexiconURL an URL for the file containing the lexicon.
* @param encoding the character encoding to use for reading the lexicon.
*/
public Lexicon(URL lexiconURL, String encoding) throws IOException{
- this.encoding = encoding;
String line;
BufferedReader lexiconReader = null;
InputStream lexiconStream = null;
Modified: gate/trunk/src/main/hepple/postag/POSTagger.java
===================================================================
--- gate/trunk/src/main/hepple/postag/POSTagger.java 2014-02-22 14:06:13 UTC
(rev 17401)
+++ gate/trunk/src/main/hepple/postag/POSTagger.java 2014-02-22 14:44:43 UTC
(rev 17402)
@@ -93,7 +93,7 @@
// static final int MAXTAGS = 200;
- protected Map rules;
+ protected Map<String, List<Rule>> rules;
// public Rule[] rules = new Rule[MAXTAGS];
// public Rule[] lastRules = new Rule[MAXTAGS];
@@ -139,7 +139,7 @@
IOException{
this.encoding = encoding;
this.lexicon = new Lexicon(lexiconURL, encoding);
- rules = new HashMap();
+ rules = new HashMap<String, List<Rule>>();
readRules(rulesURL);
}
@@ -150,7 +150,7 @@
public Rule createNewRule(String ruleId) throws InvalidRuleException{
try{
String className = "hepple.postag.rules.Rule_" + ruleId;
- Class ruleClass = Class.forName(className);
+ Class<?> ruleClass = Class.forName(className);
return (Rule)ruleClass.newInstance();
}catch(Exception e){
throw new InvalidRuleException("Could not create rule " + ruleId + "!\n"
+
@@ -168,15 +168,15 @@
* being itself a list having pairs of strings as elements with
* the word on the first position and the tag on the second.
*/
- public List runTagger(List sentences){
- List output = new ArrayList();
- List taggedSentence = new ArrayList();
- Iterator sentencesIter = sentences.iterator();
+ public List<List<String[]>> runTagger(List<List<String>> sentences){
+ List<List<String[]>> output = new ArrayList<List<String[]>>();
+ List<String[]> taggedSentence = new ArrayList<String[]>();
+ Iterator<List<String>> sentencesIter = sentences.iterator();
while(sentencesIter.hasNext()){
- List sentence = (List)sentencesIter.next();
- Iterator wordsIter = sentence.iterator();
+ List<String> sentence = sentencesIter.next();
+ Iterator<String> wordsIter = sentence.iterator();
while(wordsIter.hasNext()){
- String newWord = (String)wordsIter.next();
+ String newWord = wordsIter.next();
oneStep(newWord, taggedSentence);
}//while(wordsIter.hasNext())
//finished adding all the words from a sentence, add six more
@@ -186,27 +186,12 @@
}
//we have a new finished sentence
output.add(taggedSentence);
- taggedSentence = new ArrayList();
+ taggedSentence = new ArrayList<String[]>();
}//while(sentencesIter.hasNext())
return output;
}
-
/**
- * This method sets the encoding that POS tagger uses to read rules and the
- * lexicons.
- *
- * @deprecated The rules and lexicon are read at construction time, so
- * setting the encoding later will have no effect.
- */
- public void setEncoding(String encoding) {
- throw new IllegalStateException("Cannot change encoding once POS tagger "
- + "has been constructed. Use the three "
- + "argument constructor to specify "
- + "encoding.");
- }
-
- /**
* Adds a new word to the window of 7 words (on the last position) and tags
* the word currently in the middle (i.e. on position 3). This function
* also reads the word on the first position and adds its tag to the
@@ -218,7 +203,7 @@
* of tagging the current sentence so far.
* @return returns true if a full sentence is now tagged, otherwise false.
*/
- protected boolean oneStep(String word, List taggedSentence){
+ protected boolean oneStep(String word, List<String[]> taggedSentence){
//add the new word at the end of the text window
for (int i=1 ; i<7 ; i++) {
wordBuff[i-1] = wordBuff[i];
@@ -232,11 +217,11 @@
//apply the rules to the word in the middle of the text window
//Try to fire a rule for the current lexical entry. It may be the case that
//no rule applies.
- List rulesToApply = (List)rules.get(lexBuff[3][0]);
+ List<Rule> rulesToApply = rules.get(lexBuff[3][0]);
if(rulesToApply != null && rulesToApply.size() > 0){
- Iterator rulesIter = rulesToApply.iterator();
+ Iterator<Rule> rulesIter = rulesToApply.iterator();
//find the first rule that applies, fire it and stop.
- while(rulesIter.hasNext() && !((Rule)rulesIter.next()).apply(this)){}
+ while(rulesIter.hasNext() && !(rulesIter.next()).apply(this)){}
}
//save the tagged word from the first position
@@ -276,9 +261,9 @@
while (tokens.hasMoreTokens()) ruleParts.add(tokens.nextToken());
if (ruleParts.size() < 3) throw new InvalidRuleException(line);
- newRule = createNewRule((String)ruleParts.get(2));
+ newRule = createNewRule(ruleParts.get(2));
newRule.initialise(ruleParts);
- List<Rule> existingRules = (List)rules.get(newRule.from);
+ List<Rule> existingRules = rules.get(newRule.from);
if(existingRules == null){
existingRules = new ArrayList<Rule>();
rules.put(newRule.from, existingRules);
@@ -307,7 +292,7 @@
if (staart.equals(wd)) return staartLex;
- List categories = (List)lexicon.get(wd);
+ List<String> categories = lexicon.get(wd);
if(categories != null){
result = new String[categories.size()];
for(int i = 0; i < result.length; i++){
@@ -410,18 +395,18 @@
while(line != null){
StringTokenizer tokens = new StringTokenizer(line);
- List sentence = new ArrayList();
+ List<String> sentence = new ArrayList<String>();
while(tokens.hasMoreTokens()) sentence.add(tokens.nextToken());
- List sentences = new ArrayList();
+ List<List<String>> sentences = new ArrayList<List<String>>();
sentences.add(sentence);
- List result = tagger.runTagger(sentences);
+ List<List<String[]>> result = tagger.runTagger(sentences);
- Iterator iter = result.iterator();
+ Iterator<List<String[]>> iter = result.iterator();
while(iter.hasNext()){
- List sentenceFromTagger = (List)iter.next();
- Iterator sentIter = sentenceFromTagger.iterator();
+ List<String[]> sentenceFromTagger = iter.next();
+ Iterator<String[]> sentIter = sentenceFromTagger.iterator();
while(sentIter.hasNext()){
- String[] tag = (String[])sentIter.next();
+ String[] tag = sentIter.next();
System.out.print(tag[0] + "/" + tag[1]);
if(sentIter.hasNext()) System.out.print(" ");
else System.out.println();
@@ -469,13 +454,13 @@
* Reads one input file and creates the structure needed by the tagger
* for input.
*/
- private static List readInput(String file) throws IOException{
+ private static List<List<String>> readInput(String file) throws IOException{
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
- List result = new ArrayList();
+ List<List<String>> result = new ArrayList<List<String>>();
while(line != null){
StringTokenizer tokens = new StringTokenizer(line);
- List sentence = new ArrayList();
+ List<String> sentence = new ArrayList<String>();
while(tokens.hasMoreTokens()) sentence.add(tokens.nextToken());
result.add(sentence);
line = reader.readLine();
Modified: gate/trunk/src/main/hepple/postag/Rule.java
===================================================================
--- gate/trunk/src/main/hepple/postag/Rule.java 2014-02-22 14:06:13 UTC (rev
17401)
+++ gate/trunk/src/main/hepple/postag/Rule.java 2014-02-22 14:44:43 UTC (rev
17402)
@@ -26,7 +26,7 @@
package hepple.postag;
-import java.util.*;
+import java.util.List;
public abstract class Rule {
@@ -35,13 +35,13 @@
protected String ruleId;
protected String[] context;
- public void initialise(List ruleParts) {
- from = (String)ruleParts.get(0);
- to = (String)ruleParts.get(1);
- ruleId = (String)ruleParts.get(2);
+ public void initialise(List<String> ruleParts) {
+ from = ruleParts.get(0);
+ to = ruleParts.get(1);
+ ruleId = ruleParts.get(2);
int contextSize = ruleParts.size() - 3;
context = new String[contextSize];
- for (int i=0 ; i<contextSize ; i++) context[i] =
(String)ruleParts.get(i+3);
+ for (int i=0 ; i<contextSize ; i++) context[i] = ruleParts.get(i+3);
}
abstract public boolean checkContext(POSTagger tagger);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs