Revision: 17613
          http://sourceforge.net/p/gate/code/17613
Author:   markagreenwood
Date:     2014-03-10 09:14:31 +0000 (Mon, 10 Mar 2014)
Log Message:
-----------
gate.creole is now warning free :)

Modified Paths:
--------------
    gate/trunk/src/main/gate/creole/GazetteerListsCollector.java
    gate/trunk/src/main/gate/creole/RealtimeCorpusController.java

Modified: gate/trunk/src/main/gate/creole/GazetteerListsCollector.java
===================================================================
--- gate/trunk/src/main/gate/creole/GazetteerListsCollector.java        
2014-03-10 08:51:17 UTC (rev 17612)
+++ gate/trunk/src/main/gate/creole/GazetteerListsCollector.java        
2014-03-10 09:14:31 UTC (rev 17613)
@@ -1,73 +1,85 @@
 package gate.creole;
 
-import java.io.*;
-import java.util.*;
-
 import gate.Annotation;
 import gate.AnnotationSet;
-import gate.creole.gazetteer.*;
-import gate.util.*;
+import gate.creole.gazetteer.Gazetteer;
+import gate.creole.gazetteer.GazetteerList;
+import gate.creole.gazetteer.GazetteerNode;
+import gate.creole.gazetteer.LinearNode;
+import gate.creole.gazetteer.Lookup;
+import gate.util.GateRuntimeException;
+import gate.util.InvalidOffsetException;
+import gate.util.Out;
 
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+
 public class GazetteerListsCollector extends AbstractLanguageAnalyser {
+
+  private static final long serialVersionUID = -4124586772112530437L;
+
   private static String PERSON_ANNOT_NAME = "PER";
 
   @Override
   public void execute() throws gate.creole.ExecutionException {
-    //reinitialise the stats
-    statsPerType = new HashMap();
+    // reinitialise the stats
+    statsPerType = new HashMap<String, Map<String, Integer>>();
 
-    //check the input
+    // check the input
     if(document == null) {
-      throw new ExecutionException(
-        "No document to process!"
-      );
+      throw new ExecutionException("No document to process!");
     }
 
-    if (gazetteer == null) {
-      throw new ExecutionException(
-        "No gazetteer set!"
-      );
+    if(gazetteer == null) {
+      throw new ExecutionException("No gazetteer set!");
     }
 
-    //if no annotation types given, then exit
-    if ((this.annotationTypes == null) || annotationTypes.isEmpty()) {
+    // if no annotation types given, then exit
+    if((this.annotationTypes == null) || annotationTypes.isEmpty()) {
       Out.prln("Gazetteer Lists Collector Warning: No annotation types given 
for processing");
       return;
     }
 
     // get the annotations from document
-    if ((markupSetName == null)|| (markupSetName.equals("")))
+    if((markupSetName == null) || (markupSetName.equals("")))
       allAnnots = document.getAnnotations();
-    else
-      allAnnots = document.getAnnotations(markupSetName);
+    else allAnnots = document.getAnnotations(markupSetName);
 
-    //if none found, print warning and exit
-    if ((allAnnots == null) || allAnnots.isEmpty()) {
+    // if none found, print warning and exit
+    if((allAnnots == null) || allAnnots.isEmpty()) {
       Out.prln("Gazetteer Lists Collector Warning: No annotations found for 
processing");
       return;
     }
 
-    //collect the stats for each annotation type
-    for (int i = 0; i < annotationTypes.size(); i++) {
-      AnnotationSet annots = allAnnots.get((String) annotationTypes.get(i));
-      if (annots == null || annots.isEmpty())
-        continue;
-      statsPerType.put(annotationTypes.get(i), new HashMap());
-      collectLists(annots, (String) annotationTypes.get(i));
+    // collect the stats for each annotation type
+    for(int i = 0; i < annotationTypes.size(); i++) {
+      AnnotationSet annots = allAnnots.get(annotationTypes.get(i));
+      if(annots == null || annots.isEmpty()) continue;
+      statsPerType.put(annotationTypes.get(i), new HashMap<String, Integer>());
+      collectLists(annots, annotationTypes.get(i));
     }
 
-    //print out the stats in log files
+    // print out the stats in log files
     printStats();
 
-    //save the updated gazetteer lists now
-    Map theLists = gazetteer.getLinearDefinition().getListsByNode();
-    Iterator iter1 = theLists.keySet().iterator();
-    while (iter1.hasNext()) {
-      GazetteerList theList = (GazetteerList) theLists.get(iter1.next());
+    // save the updated gazetteer lists now
+    Map<LinearNode, GazetteerList> theLists =
+            gazetteer.getLinearDefinition().getListsByNode();
+    Iterator<LinearNode> iter1 = theLists.keySet().iterator();
+    while(iter1.hasNext()) {
+      GazetteerList theList = theLists.get(iter1.next());
       try {
-        if (theList.isModified())
-          theList.store();
-      } catch (ResourceInstantiationException ex) {
+        if(theList.isModified()) theList.store();
+      } catch(ResourceInstantiationException ex) {
         throw new GateRuntimeException(ex.getMessage());
       }
     }
@@ -78,23 +90,27 @@
     markupSetName = newMarkupASName;
   }
 
-  public String  getMarkupASName() {
+  public String getMarkupASName() {
     return markupSetName;
   }
 
-  /** get the types of the annotation
+  /**
+   * get the types of the annotation
+   * 
    * @return type of the annotation
    */
-  public List getAnnotationTypes() {
+  public List<String> getAnnotationTypes() {
     return annotationTypes;
-  }//getAnnotationTypes
+  }// getAnnotationTypes
 
-  /** set the types of the annotations
-   * @param newType 
+  /**
+   * set the types of the annotations
+   * 
+   * @param newType
    */
-  public void setAnnotationTypes(List newType) {
+  public void setAnnotationTypes(List<String> newType) {
     annotationTypes = newType;
-  }//setAnnotationTypes
+  }// setAnnotationTypes
 
   public Gazetteer getGazetteer() {
     return gazetteer;
@@ -108,7 +124,7 @@
     theLanguage = language;
   }
 
-  public String  getTheLanguage() {
+  public String getTheLanguage() {
     return theLanguage;
   }
 
@@ -116,82 +132,80 @@
     Iterator<Annotation> iter = annots.iterator();
     String listName = "";
     GazetteerList theList = null;
-    Iterator theListsIter =
-      gazetteer.getLinearDefinition().getListsByNode().values().iterator();
-    while (theListsIter.hasNext() && listName.equals("")) {
-      theList = (GazetteerList) theListsIter.next();
-      if (theList.getURL().toExternalForm().endsWith(annotType + ".lst"))
+    Iterator<GazetteerList> theListsIter =
+            gazetteer.getLinearDefinition().getListsByNode().values()
+                    .iterator();
+    while(theListsIter.hasNext() && listName.equals("")) {
+      theList = theListsIter.next();
+      if(theList.getURL().toExternalForm().endsWith(annotType + ".lst"))
         listName = theList.getURL().toExternalForm();
     }
-    while (iter.hasNext()) {
+    while(iter.hasNext()) {
       Annotation annot = iter.next();
       String text = "";
-      List strings = new ArrayList();
+      List<String> strings = new ArrayList<String>();
       try {
-        text = document.getContent().getContent(
-          annot.getStartNode().getOffset(),
-          annot.getEndNode().getOffset()
-        ).toString();
-        //tokenise the text and save for the future if we need it
-        StringTokenizer tok = new StringTokenizer(text, "\n\r.|();-?!\t", 
false);
-        while (tok.hasMoreTokens())
+        text =
+                document.getContent()
+                        .getContent(annot.getStartNode().getOffset(),
+                                annot.getEndNode().getOffset()).toString();
+        // tokenise the text and save for the future if we need it
+        StringTokenizer tok =
+                new StringTokenizer(text, "\n\r.|();-?!\t", false);
+        while(tok.hasMoreTokens())
           strings.add(tok.nextToken());
-        //then replace the line breaks with spaces for the gazetteer
+        // then replace the line breaks with spaces for the gazetteer
         text = text.replace('\r', ' ');
         text = text.replace('\n', ' ');
         text = text.replace('\t', ' ');
 
-      } catch (InvalidOffsetException ex) {
+      } catch(InvalidOffsetException ex) {
         throw new GateRuntimeException(ex.getMessage());
       }
 
-      //collect stats for the string
-      if (((HashMap) statsPerType.get(annotType)).containsKey(text))
-        ((HashMap) statsPerType.get(annotType)).put(text,
-            new Integer(((Integer)
-              ((HashMap) 
statsPerType.get(annotType)).get(text)).intValue()+1));
-      else
-        ((HashMap) statsPerType.get(annotType)).put(text, new Integer(1));
+      // collect stats for the string
+      if(statsPerType.get(annotType).containsKey(text))
+        statsPerType.get(annotType).put(
+                text,
+                new Integer(
+                        statsPerType.get(annotType).get(text).intValue() + 1));
+      else statsPerType.get(annotType).put(text, new Integer(1));
 
-      //also collect stats for the individual tokens in the name to identify 
the most
-      //frequent tokens across names
-      if (strings.size() > 1) {
-        for (int i=0; i < strings.size(); i++) {
-          String theString = (String) strings.get(i);
-          //collect stats for the string
-          if ( ( (HashMap) statsPerType.get(annotType)).containsKey(theString))
-            ( (HashMap) statsPerType.get(annotType)).put(theString,
-                new Integer( ( (Integer)
-                              ( (HashMap) statsPerType.get(annotType)).get(
-                theString)).intValue() + 1));
-          else
-            ( (HashMap) statsPerType.get(annotType)).put(theString,
-                new Integer(1));
+      // also collect stats for the individual tokens in the name to
+      // identify the most
+      // frequent tokens across names
+      if(strings.size() > 1) {
+        for(int i = 0; i < strings.size(); i++) {
+          String theString = strings.get(i);
+          // collect stats for the string
+          if(statsPerType.get(annotType).containsKey(theString))
+            statsPerType.get(annotType).put(
+                    theString,
+                    new Integer(statsPerType.get(annotType).get(theString)
+                            .intValue() + 1));
+          else statsPerType.get(annotType).put(theString, new Integer(1));
         }
       }
 
-      //first we check whether the text is already in the gazetteer
-      Set lookupResult = gazetteer.lookup(text);
-      if (lookupResult != null && lookupResult.size() > 0)
-        continue;
-      //if not, then we add it
-      gazetteer.add(text,
-        new Lookup(listName, annotType, "inferred", theLanguage));
-//      theList.add(text + document.getSourceUrl().toString());
+      // first we check whether the text is already in the gazetteer
+      Set<Lookup> lookupResult = gazetteer.lookup(text);
+      if(lookupResult != null && lookupResult.size() > 0) continue;
+      // if not, then we add it
+      gazetteer.add(text, new Lookup(listName, annotType, "inferred",
+              theLanguage));
+      // theList.add(text + document.getSourceUrl().toString());
       theList.add(new GazetteerNode(text));
 
-
-      //for persons we want also to add their individual names to the list
-      if (annotType.equals(PERSON_ANNOT_NAME) && strings.size() > 1) {
-        for (int i=0; i < strings.size(); i++) {
-          String theString = (String) strings.get(i);
-          Set lookupResult1 = gazetteer.lookup(theString);
-          if (lookupResult1 != null && lookupResult1.size() > 0)
-            continue;
-          if (theString.length() < 3)
-            continue;
-          gazetteer.add(theString,
-            new Lookup(listName, annotType, "inferred", theLanguage));
+      // for persons we want also to add their individual names to the
+      // list
+      if(annotType.equals(PERSON_ANNOT_NAME) && strings.size() > 1) {
+        for(int i = 0; i < strings.size(); i++) {
+          String theString = strings.get(i);
+          Set<Lookup> lookupResult1 = gazetteer.lookup(theString);
+          if(lookupResult1 != null && lookupResult1.size() > 0) continue;
+          if(theString.length() < 3) continue;
+          gazetteer.add(theString, new Lookup(listName, annotType, "inferred",
+                  theLanguage));
           theList.add(new GazetteerNode(theString));
         }
       }
@@ -200,43 +214,47 @@
 
   protected void printStats() {
     try {
-      for (int i=0; i < annotationTypes.size(); i++) {
-        if (! statsPerType.containsKey(annotationTypes.get(i)))
-          continue;
-        BufferedWriter writer = new BufferedWriter(
-          new OutputStreamWriter(new FileOutputStream(
-           annotationTypes.get(i) + ".stats.lst"),
-          "UTF-8"));
-        HashMap stats = (HashMap) statsPerType.get(annotationTypes.get(i));
-        Iterator stringsIter = stats.keySet().iterator();
-        while (stringsIter.hasNext()) {
-          String string = (String) stringsIter.next();
+      for(int i = 0; i < annotationTypes.size(); i++) {
+        if(!statsPerType.containsKey(annotationTypes.get(i))) continue;
+        BufferedWriter writer =
+                new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
+                        annotationTypes.get(i) + ".stats.lst"), "UTF-8"));
+        Map<String,Integer> stats = statsPerType.get(annotationTypes.get(i));
+        Iterator<String> stringsIter = stats.keySet().iterator();
+        while(stringsIter.hasNext()) {
+          String string = stringsIter.next();
           writer.write(string);
           writer.write("$");
-          writer.write( ((Integer)stats.get(string)).toString());
+          writer.write(stats.get(string).toString());
           writer.newLine();
         }
         writer.close();
       }
-  } catch(IOException ioe){
+    } catch(IOException ioe) {
       throw new RuntimeException(ioe.getMessage());
-  }//try
+    }// try
 
   }
 
   /**
-   * The idea is to have this method check if an item
-   * is already present in the gazetteer under this type,
-   * and if so, not to add it. It is not implemented for now.
+   * The idea is to have this method check if an item is already present
+   * in the gazetteer under this type, and if so, not to add it. It is
+   * not implemented for now.
    */
   protected boolean alreadyPresentInGazetteer(String token) {
     return false;
   }
 
   private String markupSetName = "";
+
   private AnnotationSet allAnnots;
-  private List annotationTypes;
+
+  private List<String> annotationTypes;
+
   private Gazetteer gazetteer;
+
   private String theLanguage = "";
-  private HashMap statsPerType = new HashMap();
-}
\ No newline at end of file
+
+  private Map<String, Map<String, Integer>> statsPerType =
+          new HashMap<String, Map<String, Integer>>();
+}

Modified: gate/trunk/src/main/gate/creole/RealtimeCorpusController.java
===================================================================
--- gate/trunk/src/main/gate/creole/RealtimeCorpusController.java       
2014-03-10 08:51:17 UTC (rev 17612)
+++ gate/trunk/src/main/gate/creole/RealtimeCorpusController.java       
2014-03-10 09:14:31 UTC (rev 17613)
@@ -470,9 +470,5 @@
   }
   
   protected boolean suppressExceptions = true;
-  
-  /**
-   * Sleep time in milliseconds while waiting for worker thread to finish.
-   */
-  private static final int POLL_INTERVAL = 50;
+
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Learn Graph Databases - Download FREE O'Reilly Book
"Graph Databases" is the definitive new guide to graph databases and their
applications. Written by three acclaimed leaders in the field,
this first edition is now available. Download your free book today!
http://p.sf.net/sfu/13534_NeoTech
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to