Revision: 17402
          http://sourceforge.net/p/gate/code/17402
Author:   markagreenwood
Date:     2014-02-22 14:44:43 +0000 (Sat, 22 Feb 2014)
Log Message:
-----------
removed some deprecated stuff from the POSTagger and cleaned up the generics to 
make the code easier to follow

Modified Paths:
--------------
    gate/trunk/src/main/hepple/postag/InvalidRuleException.java
    gate/trunk/src/main/hepple/postag/Lexicon.java
    gate/trunk/src/main/hepple/postag/POSTagger.java
    gate/trunk/src/main/hepple/postag/Rule.java

Modified: gate/trunk/src/main/hepple/postag/InvalidRuleException.java
===================================================================
--- gate/trunk/src/main/hepple/postag/InvalidRuleException.java 2014-02-22 
14:06:13 UTC (rev 17401)
+++ gate/trunk/src/main/hepple/postag/InvalidRuleException.java 2014-02-22 
14:44:43 UTC (rev 17402)
@@ -27,6 +27,8 @@
 
 public class InvalidRuleException extends Exception {
 
+  private static final long serialVersionUID = -805406522295075612L;
+
   public InvalidRuleException(){
   }
 

Modified: gate/trunk/src/main/hepple/postag/Lexicon.java
===================================================================
--- gate/trunk/src/main/hepple/postag/Lexicon.java      2014-02-22 14:06:13 UTC 
(rev 17401)
+++ gate/trunk/src/main/hepple/postag/Lexicon.java      2014-02-22 14:44:43 UTC 
(rev 17402)
@@ -27,9 +27,14 @@
 
 import gate.util.BomStrippingInputStreamReader;
 
-import java.util.*;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
 import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.StringTokenizer;
 
 import org.apache.commons.io.IOUtils;
 
@@ -38,24 +43,11 @@
  * ({@link java.lang.String}) to possible POS categories
  * ({@link java.util.List}
  */
-class Lexicon extends HashMap {
+class Lexicon extends HashMap<String,List<String>> {
 
-  private String encoding;
+  private static final long serialVersionUID = -2320126076517881896L;
 
-
   /**
-   * @deprecated The lexicon file is read at construction time, so setting the
-   * encoding later will have no effect.  Use the two argument constructor to
-   * set the encoding.
-   */
-  public void setEncoding(String encoding) {
-    throw new IllegalStateException("Cannot change encoding once POS tagger "
-                                  + "has been constructed.  Use the three "
-                                  + "argument constructor to specify "
-                                  + "encoding.");
-  }
-
-  /**
    * Constructor.
    * @param lexiconURL an URL for the file contianing the lexicon.
    */
@@ -65,11 +57,10 @@
 
   /**
    * Constructor.
-   * @param lexiconURL an URL for the file contianing the lexicon.
+   * @param lexiconURL an URL for the file containing the lexicon.
    * @param encoding the character encoding to use for reading the lexicon.
    */
   public Lexicon(URL lexiconURL, String encoding) throws IOException{
-    this.encoding = encoding;
     String line;
     BufferedReader lexiconReader = null;
     InputStream lexiconStream = null;

Modified: gate/trunk/src/main/hepple/postag/POSTagger.java
===================================================================
--- gate/trunk/src/main/hepple/postag/POSTagger.java    2014-02-22 14:06:13 UTC 
(rev 17401)
+++ gate/trunk/src/main/hepple/postag/POSTagger.java    2014-02-22 14:44:43 UTC 
(rev 17402)
@@ -93,7 +93,7 @@
 
 //    static final int MAXTAGS = 200;
 
-    protected Map rules;
+    protected Map<String, List<Rule>> rules;
 //    public Rule[] rules = new Rule[MAXTAGS];
 //    public Rule[] lastRules = new Rule[MAXTAGS];
 
@@ -139,7 +139,7 @@
                                                           IOException{
       this.encoding = encoding;
       this.lexicon = new Lexicon(lexiconURL, encoding);
-      rules = new HashMap();
+      rules = new HashMap<String, List<Rule>>();
       readRules(rulesURL);
     }
 
@@ -150,7 +150,7 @@
   public Rule createNewRule(String ruleId) throws InvalidRuleException{
     try{
       String className = "hepple.postag.rules.Rule_" + ruleId;
-      Class ruleClass = Class.forName(className);
+      Class<?> ruleClass = Class.forName(className);
       return (Rule)ruleClass.newInstance();
     }catch(Exception e){
       throw new InvalidRuleException("Could not create rule " + ruleId + "!\n" 
+
@@ -168,15 +168,15 @@
    * being itself a list having pairs of strings as elements with
    * the word on the first position and the tag on the second.
    */
-  public List runTagger(List sentences){
-    List output = new ArrayList();
-    List taggedSentence = new ArrayList();
-    Iterator sentencesIter = sentences.iterator();
+  public List<List<String[]>> runTagger(List<List<String>> sentences){
+    List<List<String[]>> output = new ArrayList<List<String[]>>();
+    List<String[]> taggedSentence = new ArrayList<String[]>();
+    Iterator<List<String>> sentencesIter = sentences.iterator();
     while(sentencesIter.hasNext()){
-      List sentence = (List)sentencesIter.next();
-      Iterator wordsIter = sentence.iterator();
+      List<String> sentence = sentencesIter.next();
+      Iterator<String> wordsIter = sentence.iterator();
       while(wordsIter.hasNext()){
-        String newWord = (String)wordsIter.next();
+        String newWord = wordsIter.next();
         oneStep(newWord, taggedSentence);
       }//while(wordsIter.hasNext())
       //finished adding all the words from a sentence, add six more
@@ -186,27 +186,12 @@
       }
       //we have a new finished sentence
       output.add(taggedSentence);
-      taggedSentence = new ArrayList();
+      taggedSentence = new ArrayList<String[]>();
     }//while(sentencesIter.hasNext())
     return output;
   }
 
-
   /**
-   * This method sets the encoding that POS tagger uses to read rules and the
-   * lexicons.
-   *
-   * @deprecated The rules and lexicon are read at construction time, so
-   * setting the encoding later will have no effect.
-   */
-  public void setEncoding(String encoding) {
-    throw new IllegalStateException("Cannot change encoding once POS tagger "
-                                  + "has been constructed.  Use the three "
-                                  + "argument constructor to specify "
-                                  + "encoding.");
-  }
-
-  /**
    * Adds a new word to the window of 7 words (on the last position) and tags
    * the word currently in the middle (i.e. on position 3). This function
    * also reads the word on the first position and adds its tag to the
@@ -218,7 +203,7 @@
    * of tagging the current sentence so far.
    * @return returns true if a full sentence is now tagged, otherwise false.
    */
-  protected boolean oneStep(String word, List taggedSentence){
+  protected boolean oneStep(String word, List<String[]> taggedSentence){
     //add the new word at the end of the text window
     for (int i=1 ; i<7 ; i++) {
       wordBuff[i-1] = wordBuff[i];
@@ -232,11 +217,11 @@
     //apply the rules to the word in the middle of the text window
     //Try to fire a rule for the current lexical entry. It may be the case that
     //no rule applies.
-    List rulesToApply = (List)rules.get(lexBuff[3][0]);
+    List<Rule> rulesToApply = rules.get(lexBuff[3][0]);
     if(rulesToApply != null && rulesToApply.size() > 0){
-      Iterator rulesIter = rulesToApply.iterator();
+      Iterator<Rule> rulesIter = rulesToApply.iterator();
       //find the first rule that applies, fire it and stop.
-      while(rulesIter.hasNext() && !((Rule)rulesIter.next()).apply(this)){}
+      while(rulesIter.hasNext() && !(rulesIter.next()).apply(this)){}
     }
 
     //save the tagged word from the first position
@@ -276,9 +261,9 @@
         while (tokens.hasMoreTokens()) ruleParts.add(tokens.nextToken());
         if (ruleParts.size() < 3) throw new InvalidRuleException(line);
   
-        newRule = createNewRule((String)ruleParts.get(2));
+        newRule = createNewRule(ruleParts.get(2));
         newRule.initialise(ruleParts);
-        List<Rule> existingRules = (List)rules.get(newRule.from);
+        List<Rule> existingRules = rules.get(newRule.from);
         if(existingRules == null){
           existingRules = new ArrayList<Rule>();
           rules.put(newRule.from, existingRules);
@@ -307,7 +292,7 @@
 
     if (staart.equals(wd)) return staartLex;
 
-    List categories = (List)lexicon.get(wd);
+    List<String> categories = lexicon.get(wd);
     if(categories != null){
       result = new String[categories.size()];
       for(int i = 0; i < result.length; i++){
@@ -410,18 +395,18 @@
 
         while(line != null){
           StringTokenizer tokens = new StringTokenizer(line);
-          List sentence = new ArrayList();
+          List<String> sentence = new ArrayList<String>();
           while(tokens.hasMoreTokens()) sentence.add(tokens.nextToken());
-          List sentences = new ArrayList();
+          List<List<String>> sentences = new ArrayList<List<String>>();
           sentences.add(sentence);
-          List result = tagger.runTagger(sentences);
+          List<List<String[]>> result = tagger.runTagger(sentences);
 
-          Iterator iter = result.iterator();
+          Iterator<List<String[]>> iter = result.iterator();
           while(iter.hasNext()){
-            List sentenceFromTagger = (List)iter.next();
-            Iterator sentIter = sentenceFromTagger.iterator();
+            List<String[]> sentenceFromTagger = iter.next();
+            Iterator<String[]> sentIter = sentenceFromTagger.iterator();
             while(sentIter.hasNext()){
-              String[] tag = (String[])sentIter.next();
+              String[] tag = sentIter.next();
               System.out.print(tag[0] + "/" + tag[1]);
               if(sentIter.hasNext()) System.out.print(" ");
               else System.out.println();
@@ -469,13 +454,13 @@
    * Reads one input file and creates the structure needed by the tagger
    * for input.
    */
-  private static List readInput(String file) throws IOException{
+  private static List<List<String>> readInput(String file) throws IOException{
     BufferedReader reader = new BufferedReader(new FileReader(file));
     String line = reader.readLine();
-    List result = new ArrayList();
+    List<List<String>> result = new ArrayList<List<String>>();
     while(line != null){
       StringTokenizer tokens = new StringTokenizer(line);
-      List sentence = new ArrayList();
+      List<String> sentence = new ArrayList<String>();
       while(tokens.hasMoreTokens()) sentence.add(tokens.nextToken());
       result.add(sentence);
       line = reader.readLine();

Modified: gate/trunk/src/main/hepple/postag/Rule.java
===================================================================
--- gate/trunk/src/main/hepple/postag/Rule.java 2014-02-22 14:06:13 UTC (rev 
17401)
+++ gate/trunk/src/main/hepple/postag/Rule.java 2014-02-22 14:44:43 UTC (rev 
17402)
@@ -26,7 +26,7 @@
 
 package hepple.postag;
 
-import java.util.*;
+import java.util.List;
 
 public abstract class Rule {
 
@@ -35,13 +35,13 @@
   protected String ruleId;
   protected String[] context;
 
-  public void initialise(List ruleParts) {
-    from = (String)ruleParts.get(0);
-    to = (String)ruleParts.get(1);
-    ruleId = (String)ruleParts.get(2);
+  public void initialise(List<String> ruleParts) {
+    from = ruleParts.get(0);
+    to = ruleParts.get(1);
+    ruleId = ruleParts.get(2);
     int contextSize = ruleParts.size() - 3;
     context = new String[contextSize];
-    for (int i=0 ; i<contextSize ; i++) context[i] = 
(String)ruleParts.get(i+3);
+    for (int i=0 ; i<contextSize ; i++) context[i] = ruleParts.get(i+3);
   }
 
   abstract public boolean checkContext(POSTagger tagger);

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to