orthomatcher

markagreenwood Tue, 04 Mar 2014 06:49:47 -0800

Revision: 17528
          http://sourceforge.net/p/gate/code/17528
Author:   markagreenwood
Date:     2014-03-04 14:41:51 +0000 (Tue, 04 Mar 2014)
Log Message:
-----------
suffered through the second half of the project telco by cleaning up all the 
generics in the orthomatcher


Modified Paths:
--------------
    gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java
    gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java
    gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java
    gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java
    
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
    
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java

Modified: 
gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java
===================================================================
--- gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java     
2014-03-04 13:49:57 UTC (rev 17527)
+++ gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java     
2014-03-04 14:41:51 UTC (rev 17528)
@@ -5,11 +5,9 @@
 import gate.Document;
 import gate.creole.ExecutionException;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /*
  * This interface is used so that one can create an orthography class that that
@@ -26,26 +24,26 @@
 
   public boolean fuzzyMatch(String s1, String s2);
 
-  public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
-      ArrayList<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
+  public boolean allNonStopTokensInOtherAnnot(List<Annotation> firstName,
+      List<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
       boolean caseSensitive);
 
   public String stripPersonTitle(String annotString, Annotation annot,
       Document doc, Map<Integer, List<Annotation>> tokensMap,
-      HashMap normalizedTokensMap, AnnotationSet nameAllAnnots)
+      Map<Integer, List<Annotation>> normalizedTokensMap, AnnotationSet 
nameAllAnnots)
       throws ExecutionException;
 
   public boolean matchedAlready(Annotation annot1, Annotation annot2,
-      List matchesDocFeature, AnnotationSet nameAllAnnots);
+      List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots);
 
   public Annotation updateMatches(Annotation newAnnot, String annotString,
-      HashMap processedAnnots, AnnotationSet nameAllAnnots,
-      List matchesDocFeature);
+      Map<Integer,String> processedAnnots, AnnotationSet nameAllAnnots,
+      List<List<Integer>> matchesDocFeature);
 
   public void updateMatches(Annotation newAnnot, Annotation prevAnnot,
-      List matchesDocFeature, AnnotationSet nameAllAnnots);
+      List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots);
 
-  public HashSet buildTables(AnnotationSet nameAllAnnots);
+  public Set<String> buildTables(AnnotationSet nameAllAnnots);
 
   public boolean isUnknownGender(String gender);
 }

Modified: 
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java
===================================================================
--- 
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java    
    2014-03-04 13:49:57 UTC (rev 17527)
+++ 
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java    
    2014-03-04 14:41:51 UTC (rev 17528)
@@ -1,8 +1,21 @@
 package gate.creole.orthomatcher;
 
+import static gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME;
+import static gate.creole.ANNIEConstants.LOOKUP_ANNOTATION_TYPE;
+import static gate.creole.orthomatcher.OrthoMatcherHelper.getStringForSpan;
+import static gate.creole.orthomatcher.OrthoMatcherHelper.round2Places;
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Document;
+import gate.Factory;
+import gate.FeatureMap;
+import gate.creole.ExecutionException;
+import gate.util.BomStrippingInputStreamReader;
+import gate.util.Err;
+import gate.util.InvalidOffsetException;
+
 import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.InputStreamReader;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -17,22 +30,6 @@
 
 import org.apache.log4j.Logger;
 
-import gate.Annotation;
-import gate.AnnotationSet;
-import gate.Document;
-import gate.Factory;
-import gate.FeatureMap;
-import gate.creole.ExecutionException;
-import gate.util.BomStrippingInputStreamReader;
-import gate.util.Err;
-import gate.util.InvalidOffsetException;
-
-import static gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME;
-import static gate.creole.ANNIEConstants.LOOKUP_ANNOTATION_TYPE;
-import static gate.creole.ANNIEConstants.PERSON_GENDER_FEATURE_NAME;
-
-import static gate.creole.orthomatcher.OrthoMatcherHelper.*;
-
 /*
  * This class defines an orthography which defines the primary behaviour of the
  * Orthomatcher processing resource in GATE.
@@ -44,8 +41,8 @@
 
   private final String unknownType;
 
-  private Map<String, HashSet<String>> nicknameMap =
-      new HashMap<String, HashSet<String>>();
+  private Map<String, Set<String>> nicknameMap =
+      new HashMap<String, Set<String>>();
 
   private final Double minimumNicknameLikelihood;
 
@@ -96,8 +93,8 @@
    * @return true if all of the tokens in firstName are either found in second
    *         name or are stop words
    */
-  public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
-      ArrayList<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
+  public boolean allNonStopTokensInOtherAnnot(List<Annotation> firstName,
+      List<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
       boolean caseSensitive) {
     for(Annotation a : firstName) {
       if(!a.getFeatures().containsKey("ortho_stop")) {
@@ -122,7 +119,7 @@
    */
   public String stripPersonTitle(String annotString, Annotation annot,
       Document doc, Map<Integer, List<Annotation>> tokensMap,
-      HashMap normalizedTokensMap, AnnotationSet nameAllAnnots)
+      Map<Integer,List<Annotation>> normalizedTokensMap, AnnotationSet 
nameAllAnnots)
       throws ExecutionException {
     FeatureMap queryFM = Factory.newFeatureMap();
     // get the offsets
@@ -159,8 +156,8 @@
             // annotString);
             // log.debug("Tokens are " + tokensMap.get(annot.getId()));
             // log.debug("Title is " + annotTitle);
-            ((ArrayList)tokensMap.get(annot.getId())).remove(0);
-            ((ArrayList)normalizedTokensMap.get(annot.getId())).remove(0);
+            tokensMap.get(annot.getId()).remove(0);
+            normalizedTokensMap.get(annot.getId()).remove(0);
             return annotString.substring(annotTitle.length() + 1,
                 annotString.length());
           }
@@ -173,11 +170,12 @@
   }
 
   public boolean matchedAlready(Annotation annot1, Annotation annot2,
-      List matchesDocFeature, AnnotationSet nameAllAnnots) {
+      List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots) {
     // the two annotations are already matched if the matches list of the first
     // contains the id of the second
-    List matchesList =
-        (List)annot1.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
+    @SuppressWarnings("unchecked")
+    List<Integer> matchesList =
+        (List<Integer>)annot1.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
     if((matchesList == null) || matchesList.isEmpty())
       return false;
     else if(matchesList.contains(annot2.getId())) return true;
@@ -185,8 +183,8 @@
   }
 
   public Annotation updateMatches(Annotation newAnnot, String annotString,
-      HashMap processedAnnots, AnnotationSet nameAllAnnots,
-      List matchesDocFeature) {
+      Map<Integer, String> processedAnnots, AnnotationSet nameAllAnnots,
+      List<List<Integer>> matchesDocFeature) {
     Annotation matchedAnnot = null;
     Integer id;
     // first find a processed annotation with the same string
@@ -195,11 +193,11 @@
     // which is indexed on string rather than testing every id. Need to have 
the
     // index be String + Type
     // for safety
-    Iterator iter = processedAnnots.keySet().iterator();
+    Iterator<Integer> iter = processedAnnots.keySet().iterator();
     // System.out.println("ID's examined: ");
     while(iter.hasNext()) {
-      id = (Integer)iter.next();
-      String oldString = (String)processedAnnots.get(id);
+      id = iter.next();
+      String oldString = processedAnnots.get(id);
       // System.out.print(id + " ");
       if(annotString.equals(oldString)) {
         Annotation tempAnnot = nameAllAnnots.get(id);
@@ -220,12 +218,13 @@
     }// while
      // System.out.println();
     if(matchedAnnot == null) return null;
-    List matchesList =
-        (List)matchedAnnot.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
+    @SuppressWarnings("unchecked")
+    List<Integer> matchesList =
+        
(List<Integer>)matchedAnnot.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
     if((matchesList == null) || matchesList.isEmpty()) {
       // no previous matches, so need to add
       if(matchesList == null) {
-        matchesList = new ArrayList();
+        matchesList = new ArrayList<Integer>();
         matchedAnnot.getFeatures().put(ANNOTATION_COREF_FEATURE_NAME,
             matchesList);
         matchesDocFeature.add(matchesList);
@@ -243,14 +242,15 @@
   }
 
   public void updateMatches(Annotation newAnnot, Annotation prevAnnot,
-      List matchesDocFeature, AnnotationSet nameAllAnnots) {
-    List matchesList =
-        (List)prevAnnot.getFeatures().get(
+      List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots) {
+    @SuppressWarnings("unchecked")
+    List<Integer> matchesList =
+        (List<Integer>)prevAnnot.getFeatures().get(
             OrthoMatcher.ANNOTATION_COREF_FEATURE_NAME);
     if((matchesList == null) || matchesList.isEmpty()) {
       // no previous matches, so need to add
       if(matchesList == null) {
-        matchesList = new ArrayList();
+        matchesList = new ArrayList<Integer>();
         prevAnnot.getFeatures().put(OrthoMatcher.ANNOTATION_COREF_FEATURE_NAME,
             matchesList);
         matchesDocFeature.add(matchesList);
@@ -286,10 +286,10 @@
   /**
    * Tables for namematch info (used by the namematch rules)
    */
-  public HashSet buildTables(AnnotationSet nameAllAnnots) {
+  public Set<String> buildTables(AnnotationSet nameAllAnnots) {
     FeatureMap tempMap = Factory.newFeatureMap();
     // reset the tables first
-    HashSet cdg = new HashSet();
+    Set<String> cdg = new HashSet<String>();
     if(!extLists) {
       // i.e. get cdg from Lookup annotations
       // get all Lookup annotations
@@ -327,16 +327,16 @@
     return true;
   } // isUnknownGender
 
-  protected Map<String, HashSet<String>> initNicknames(
+  protected Map<String, Set<String>> initNicknames(
       String nicknameFileEncoding, java.net.URL fileURL) throws IOException {
     Pattern spacePat = Pattern.compile("(\\s+)");
-    nicknameMap = new HashMap<String, HashSet<String>>();
+    nicknameMap = new HashMap<String, Set<String>>();
     // create the relative URL
     BufferedReader reader =
         new BomStrippingInputStreamReader(fileURL.openStream(),
             nicknameFileEncoding);
     String lineRead = null;
-    int ctr = 0;
+    
     while((lineRead = reader.readLine()) != null) {
       if(lineRead.length() == 0 || lineRead.charAt(0) == '#') {
         continue;

Modified: gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java
===================================================================
--- gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java      
2014-03-04 13:49:57 UTC (rev 17527)
+++ gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java      
2014-03-04 14:41:51 UTC (rev 17528)
@@ -63,6 +63,9 @@
 import org.apache.log4j.Logger;
 @CreoleResource(name="ANNIE OrthoMatcher", comment="ANNIE orthographical 
coreference component.", 
helpURL="http://gate.ac.uk/userguide/sec:annie:orthomatcher";, 
icon="ortho-matcher")
 public class OrthoMatcher extends AbstractLanguageAnalyser {
+
+  private static final long serialVersionUID = -6258229350677707465L;
+
   protected static final Logger log = Logger.getLogger(OrthoMatcher.class);
 
   public static final boolean DEBUG = false;
@@ -103,7 +106,7 @@
   protected String annotationSetName;
 
   /** the types of the annotation */
-  protected List annotationTypes = new ArrayList(10);
+  protected List<String> annotationTypes = new ArrayList<String>(10);
 
   /** the organization type*/
   protected String organizationType = ORGANIZATION_ANNOTATION_TYPE;
@@ -137,26 +140,26 @@
 
   // name lookup tables (used for namematch)
   //gave them bigger default size, coz rehash is expensive
-  protected HashMap alias = new HashMap(100);
-  protected HashSet cdg = new HashSet();
-  protected HashMap spur_match = new HashMap(100);
-  protected HashMap def_art = new HashMap(20);
-  protected HashMap connector = new HashMap(20);
-  protected HashMap prepos = new HashMap(30);
+  protected HashMap<String, String> alias = new HashMap<String, String>(100);
+  protected Set<String> cdg = new HashSet<String>();
+  protected HashMap<String, String> spur_match = new HashMap<String, 
String>(100);
+  protected HashMap<String, String> def_art = new HashMap<String, String>(20);
+  protected HashMap<String, String> connector = new HashMap<String, 
String>(20);
+  protected HashMap<String, String> prepos = new HashMap<String, String>(30);
 
 
   protected AnnotationSet nameAllAnnots = null;
 
-  protected HashMap processedAnnots = new HashMap(150);
-  protected HashMap annots2Remove = new HashMap(75);
-  protected List matchesDocFeature = new ArrayList();
+  protected HashMap<Integer, String> processedAnnots = new HashMap<Integer, 
String>(150);
+  protected HashMap<Integer, String> annots2Remove = new HashMap<Integer, 
String>(75);
+  protected List<List<Integer>> matchesDocFeature = new 
ArrayList<List<Integer>>();
   //maps annotation ids to array lists of tokens
-  protected HashMap tokensMap = new HashMap(150);
-  public HashMap getTokensMap() {
+  protected HashMap<Integer, List<Annotation>> tokensMap = new 
HashMap<Integer, List<Annotation>>(150);
+  public Map<Integer, List<Annotation>> getTokensMap() {
     return tokensMap;
   }
 
-  protected HashMap normalizedTokensMap = new HashMap(150);
+  protected Map<Integer, List<Annotation>> normalizedTokensMap = new 
HashMap<Integer, List<Annotation>>(150);
 
   protected Annotation shortAnnot;
   protected Annotation longAnnot;
@@ -301,7 +304,8 @@
       //check if we've been run on this document before
       //and clean the doc if needed
       docCleanup();
-      Map matchesMap = (Map)document.getFeatures().
+      @SuppressWarnings("unchecked")
+      Map<String, List<List<Integer>>> matchesMap = (Map<String, 
List<List<Integer>>>)document.getFeatures().
       get(DOCUMENT_COREF_FEATURE_NAME);
 
 
@@ -321,7 +325,7 @@
       //    determineMatchesDocument();
       if (! matchesDocFeature.isEmpty()) {
         if(matchesMap == null){
-          matchesMap = new HashMap();
+          matchesMap = new HashMap<String, List<List<Integer>>>();
         }
         matchesMap.put(nameAllAnnots.getName(), matchesDocFeature);
         // System.out.println("matchesMap is: " + matchesMap);
@@ -331,7 +335,7 @@
 
         //cannot do clear() as this has already been put on the document
         //so I need a new one for the next run of matcher
-        matchesDocFeature = new ArrayList();
+        matchesDocFeature = new ArrayList<List<Integer>>();
 
 
         fireStatusChanged("OrthoMatcher completed");
@@ -345,7 +349,7 @@
       annots2Remove.clear();
       tokensMap.clear();
       normalizedTokensMap.clear();
-      matchesDocFeature = new ArrayList();
+      matchesDocFeature = new ArrayList<List<Integer>>();
       longAnnot = null;
       shortAnnot = null;
       tokensLongAnnot = null;
@@ -357,9 +361,9 @@
 
   protected void matchNameAnnotations() throws ExecutionException{
     // go through all the annotation types
-    Iterator iterAnnotationTypes = annotationTypes.iterator();
+    Iterator<String> iterAnnotationTypes = annotationTypes.iterator();
     while (iterAnnotationTypes.hasNext()) {
-      String annotationType = (String)iterAnnotationTypes.next();
+      String annotationType = iterAnnotationTypes.next();
 
       AnnotationSet nameAnnots = nameAllAnnots.get(annotationType);
 
@@ -374,7 +378,6 @@
       for (int snaIndex = 0;snaIndex < sortedNameAnnots.size();snaIndex++) {
         Annotation tempAnnot = sortedNameAnnots.get(snaIndex);
         Annotation nameAnnot = nameAllAnnots.get(tempAnnot.getId()); // Not 
sure if this matters
-        Integer id = nameAnnot.getId();
 
         // get string and value
         String annotString = orthoAnnotation.getStringForAnnotation(nameAnnot, 
document);
@@ -392,7 +395,7 @@
         }
 
         // get the tokens
-        List tokens = new 
ArrayList(tokensNameAS.getContained(nameAnnot.getStartNode().getOffset(),
+        List<Annotation> tokens = new 
ArrayList<Annotation>(tokensNameAS.getContained(nameAnnot.getStartNode().getOffset(),
                 nameAnnot.getEndNode().getOffset()));
 
         //if no tokens to match, do nothing
@@ -488,7 +491,7 @@
 
       // System.out.println("Now trying to match the unknown string: " + 
unknownString);
       //get the tokens
-      List tokens = new ArrayList((Set)
+      List<Annotation> tokens = new ArrayList<Annotation>((Set<Annotation>)
               nameAllTokens.getContained(
                       unknown.getStartNode().getOffset(),
                       unknown.getEndNode().getOffset()
@@ -538,20 +541,21 @@
     } //while though unknowns
 
     if (! annots2Remove.isEmpty()) {
-      Iterator unknownIter = annots2Remove.keySet().iterator();
+      Iterator<Integer> unknownIter = annots2Remove.keySet().iterator();
       while (unknownIter.hasNext()) {
-        Integer unknId = (Integer) unknownIter.next();
+        Integer unknId = unknownIter.next();
         Annotation unknown = nameAllAnnots.get(unknId);
         Integer newID = nameAllAnnots.add(
                 unknown.getStartNode(),
                 unknown.getEndNode(),
-                (String) annots2Remove.get(unknId),
+                annots2Remove.get(unknId),
                 unknown.getFeatures()
         );
         nameAllAnnots.remove(unknown);
 
         //change the id in the matches list
-        List mList = (List)unknown.getFeatures().
+        @SuppressWarnings("unchecked")
+        List<Integer> mList = (List<Integer>)unknown.getFeatures().
         get(ANNOTATION_COREF_FEATURE_NAME);
         mList.remove(unknId);
         mList.add(newID);
@@ -560,7 +564,7 @@
   }
 
   private boolean matchHyphenatedUnknowns(Annotation unknown, String 
unknownString,
-          Iterator iter){
+          Iterator<Annotation> iter){
     boolean matched = false;
 
     //only take the substring before the hyphen
@@ -576,7 +580,7 @@
       iter.remove();
       String newType;
       if (matchedAnnot.getType().equals(unknownType))
-        newType = (String)annots2Remove.get(matchedAnnot.getId());
+        newType = annots2Remove.get(matchedAnnot.getId());
       else
         newType = matchedAnnot.getType();
 
@@ -595,7 +599,8 @@
       nameAllAnnots.remove(unknown);
 
       //change the id in the matches list
-      List mList = (List)unknown.getFeatures().
+      @SuppressWarnings("unchecked")
+      List<Integer> mList = (List<Integer>)unknown.getFeatures().
       get(ANNOTATION_COREF_FEATURE_NAME);
       mList.remove(unknown.getId());
       mList.add(newID);
@@ -703,6 +708,7 @@
 
   protected void propagatePropertyToExactMatchingMatches(Annotation 
updateAnnot,String featureName,Object value) {
     try {
+      @SuppressWarnings("unchecked")
       List<Integer> matchesList = (List<Integer>) 
updateAnnot.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
       if ((matchesList == null) || matchesList.isEmpty()) {
         return;
@@ -734,7 +740,7 @@
 
     // find which annotation string of the two is longer
     //  this is useful for some of the matching rules
-    String prevAnnotString = (String) processedAnnots.get(prevAnnot.getId());
+    String prevAnnotString = processedAnnots.get(prevAnnot.getId());
     // Out.prln("matchAnnotations processing " + annotString + " and " + 
prevAnnotString);
     if (prevAnnotString == null) {
       //    Out.prln("We discovered that the following string is null!:  " + 
prevAnnot.getId() +
@@ -760,12 +766,13 @@
       longerPrevious = false;
     }//if
 
-    tokensLongAnnot = (ArrayList) tokensMap.get(longAnnot.getId());
-    normalizedTokensLongAnnot = (ArrayList) 
normalizedTokensMap.get(longAnnot.getId());
-    tokensShortAnnot = (ArrayList) tokensMap.get(shortAnnot.getId());
-    normalizedTokensShortAnnot = (ArrayList) 
normalizedTokensMap.get(shortAnnot.getId());
+    tokensLongAnnot = (ArrayList<Annotation>) tokensMap.get(longAnnot.getId());
+    normalizedTokensLongAnnot = (ArrayList<Annotation>) 
normalizedTokensMap.get(longAnnot.getId());
+    tokensShortAnnot = (ArrayList<Annotation>) 
tokensMap.get(shortAnnot.getId());
+    normalizedTokensShortAnnot = (ArrayList<Annotation>) 
normalizedTokensMap.get(shortAnnot.getId());
 
-    List matchesList = (List) prevAnnot.getFeatures().
+    @SuppressWarnings("unchecked")
+    List<Integer> matchesList = (List<Integer>) prevAnnot.getFeatures().
     get(ANNOTATION_COREF_FEATURE_NAME);
     if (matchesList == null || matchesList.isEmpty())
       return apply_rules_namematch(prevAnnot.getType(), shortName,longName,
@@ -784,7 +791,7 @@
       if (allMatchingNeeded) {
         allMatchingNeeded = false;
 
-        List toMatchList = new ArrayList(matchesList);
+        List<Integer> toMatchList = new ArrayList<Integer>(matchesList);
         //      if (newAnnot.getType().equals(unknownType))
         //        Out.prln("Matching new " + annotString + " with annots " + 
toMatchList);
         toMatchList.remove(prevAnnot.getId());
@@ -803,7 +810,7 @@
    *  two different entities share a common token: e.g., BT Cellnet
    *  and BT and British Telecom.
    */
-  protected boolean matchOtherAnnots( List toMatchList, Annotation newAnnot,
+  protected boolean matchOtherAnnots( List<Integer> toMatchList, Annotation 
newAnnot,
           String annotString) {
 
     //if the list is empty, then we're matching all right :-)
@@ -814,11 +821,11 @@
     int i = 0;
 
     while (matchedAll && i < toMatchList.size()) {
-      Annotation prevAnnot = nameAllAnnots.get((Integer) toMatchList.get(i));
+      Annotation prevAnnot = nameAllAnnots.get(toMatchList.get(i));
 
       // find which annotation string of the two is longer
       //  this is useful for some of the matching rules
-      String prevAnnotString = (String) processedAnnots.get(prevAnnot.getId());
+      String prevAnnotString = processedAnnots.get(prevAnnot.getId());
       if (prevAnnotString == null)
         try {
           prevAnnotString = document.getContent().getContent(
@@ -845,10 +852,10 @@
           longerPrevious = false;
         }//if
 
-        tokensLongAnnot = (ArrayList) tokensMap.get(longAnnot.getId());
-        normalizedTokensLongAnnot = (ArrayList) 
normalizedTokensMap.get(longAnnot.getId());
-        tokensShortAnnot = (ArrayList) tokensMap.get(shortAnnot.getId());
-        normalizedTokensShortAnnot = (ArrayList) 
normalizedTokensMap.get(shortAnnot.getId());
+        tokensLongAnnot = (ArrayList<Annotation>) 
tokensMap.get(longAnnot.getId());
+        normalizedTokensLongAnnot = (ArrayList<Annotation>) 
normalizedTokensMap.get(longAnnot.getId());
+        tokensShortAnnot = (ArrayList<Annotation>) 
tokensMap.get(shortAnnot.getId());
+        normalizedTokensShortAnnot = (ArrayList<Annotation>) 
normalizedTokensMap.get(shortAnnot.getId());
 
         matchedAll = apply_rules_namematch(prevAnnot.getType(), 
shortName,longName,prevAnnot,newAnnot,
                 longerPrevious);
@@ -860,16 +867,17 @@
     return matchedAll;
   }
 
+  @SuppressWarnings("unchecked")
   protected void docCleanup() {
     Object matchesValue = 
document.getFeatures().get(DOCUMENT_COREF_FEATURE_NAME);
     if (matchesValue != null && (matchesValue instanceof Map))
-      ((Map)matchesValue).remove(nameAllAnnots.getName());
+      
((Map<String,List<List<Integer>>>)matchesValue).remove(nameAllAnnots.getName());
     else if (matchesValue != null) {
-      document.getFeatures().put(DOCUMENT_COREF_FEATURE_NAME, new HashMap());
+      document.getFeatures().put(DOCUMENT_COREF_FEATURE_NAME, new 
HashMap<String,List<List<Integer>>>());
     }
 
     //get all annotations that have a matches feature
-    HashSet fNames = new HashSet();
+    HashSet<String> fNames = new HashSet<String>();
     fNames.add(ANNOTATION_COREF_FEATURE_NAME);
     AnnotationSet annots =
       nameAllAnnots.get(null, fNames);
@@ -890,11 +898,11 @@
   static Pattern periodPat = Pattern.compile("[\\.]+");
 
   protected void normalizePersonName (Annotation annot) throws 
ExecutionException {
-    ArrayList<Annotation> tokens = (ArrayList) 
normalizedTokensMap.get(annot.getId());
+    ArrayList<Annotation> tokens = (ArrayList<Annotation>) 
normalizedTokensMap.get(annot.getId());
     for (int i = tokens.size() - 1; i >= 0; i--) {
       String tokenString = ((String) 
tokens.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME));
       String kind = (String) 
tokens.get(i).getFeatures().get(TOKEN_KIND_FEATURE_NAME);
-      String category = (String) 
tokens.get(i).getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME);
+      //String category = (String) 
tokens.get(i).getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME);
       if (!caseSensitive)  {
         tokenString = tokenString.toLowerCase();
       }
@@ -919,11 +927,10 @@
   /** return an organization  without a designator and starting The*/
   protected String normalizeOrganizationName (String annotString, Annotation 
annot){
 
-    ArrayList<Annotation> tokens = (ArrayList) tokensMap.get(annot.getId());
+    ArrayList<Annotation> tokens = (ArrayList<Annotation>) 
tokensMap.get(annot.getId());
 
     //strip starting The first
-    if ( ((String) ((Annotation) tokens.get(0)
-    ).getFeatures().get(TOKEN_STRING_FEATURE_NAME))
+    if ( ((String) tokens.get(0).getFeatures().get(TOKEN_STRING_FEATURE_NAME))
     .equalsIgnoreCase(THE_VALUE))
       tokens.remove(0);
 
@@ -972,8 +979,7 @@
 
     StringBuffer newString = new StringBuffer(50);
     for (int i = 0; i < tokens.size(); i++){
-      newString.append((String) ((Annotation) tokens.get(i)
-      ).getFeatures().get(TOKEN_STRING_FEATURE_NAME) );
+      newString.append((String) 
tokens.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME) );
       if (i != tokens.size()-1)
         newString.append(" ");
     }
@@ -1185,7 +1191,7 @@
   @RunTime
   @Optional
   @CreoleParameter(comment="Name of the annotation types to use", 
defaultValue="Organization;Person;Location;Date")
-  public void setAnnotationTypes(List newType) {
+  public void setAnnotationTypes(List<String> newType) {
     annotationTypes = newType;
   }//setAnnotationTypes
 
@@ -1214,7 +1220,7 @@
   }//getAnnotationSetName
 
   /** get the types of the annotation*/
-  public List getAnnotationTypes() {
+  public List<String> getAnnotationTypes() {
     return annotationTypes;
   }//getAnnotationTypes
 
@@ -1331,8 +1337,8 @@
       }
       else {
         for (int i = 1; i < normalizedTokensLongAnnot.size() - 1;i++) {
-          String s1_middle = (String) ((Annotation) 
normalizedTokensLongAnnot.get(i)).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
-          String s2_middle = (String) ((Annotation) 
normalizedTokensShortAnnot.get(i)).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
+          String s1_middle = (String) 
normalizedTokensLongAnnot.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
+          String s2_middle = (String) 
normalizedTokensShortAnnot.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
           if (!caseSensitive) {
             s1_middle = s1_middle.toLowerCase();
             s2_middle = s2_middle.toLowerCase();

Modified: gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java
===================================================================
--- gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java        
2014-03-04 13:49:57 UTC (rev 17527)
+++ gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java        
2014-03-04 14:41:51 UTC (rev 17528)
@@ -9,6 +9,7 @@
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.log4j.Logger;
 
@@ -170,7 +171,7 @@
          protected static void setMatchesPositions(AnnotationSet 
nameAllAnnots) {
            
            //get all annotations that have a matches feature
-           HashSet fNames = new HashSet();
+           Set<String> fNames = new HashSet<String>();
            
fNames.add(gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
            AnnotationSet allMatchesAnnots =
              nameAllAnnots.get(null, fNames);

Modified: 
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
===================================================================
--- 
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
    2014-03-04 13:49:57 UTC (rev 17527)
+++ 
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
    2014-03-04 14:41:51 UTC (rev 17528)
@@ -1,25 +1,14 @@
 package gate.creole.orthomatcher.SampleOrthoMatcher;
 
-import static gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME;
-import static gate.creole.ANNIEConstants.LOOKUP_ANNOTATION_TYPE;
-import static gate.creole.ANNIEConstants.PERSON_GENDER_FEATURE_NAME;
 import gate.Annotation;
 import gate.AnnotationSet;
 import gate.Document;
-import gate.Factory;
-import gate.FeatureMap;
 import gate.creole.ExecutionException;
 import gate.creole.orthomatcher.AnnotationOrthography;
-import gate.creole.orthomatcher.OrthoMatcherHelper;
-import gate.util.Err;
-import gate.util.InvalidOffsetException;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 
 /*
@@ -46,26 +35,26 @@
     return defaultOrthography.getStringForAnnotation(a,d);
   }
   
-  public String stripPersonTitle (String annotString, Annotation annot, 
Document doc, Map<Integer, List<Annotation>> tokensMap, HashMap 
normalizedTokensMap,AnnotationSet nameAllAnnots)
+  public String stripPersonTitle (String annotString, Annotation annot, 
Document doc, Map<Integer, List<Annotation>> tokensMap, Map<Integer, 
List<Annotation>> normalizedTokensMap,AnnotationSet nameAllAnnots)
     throws ExecutionException {
         return 
defaultOrthography.stripPersonTitle(annotString,annot,doc,tokensMap,normalizedTokensMap,nameAllAnnots);
     }
   
-  public boolean matchedAlready(Annotation annot1, Annotation annot2,List 
matchesDocFeature,AnnotationSet nameAllAnnots) {
+  public boolean matchedAlready(Annotation annot1, Annotation 
annot2,List<List<Integer>> matchesDocFeature,AnnotationSet nameAllAnnots) {
         return 
defaultOrthography.matchedAlready(annot1,annot2,matchesDocFeature,nameAllAnnots);
     }
 
-    public void updateMatches(Annotation newAnnot, Annotation prevAnnot,List 
matchesDocFeature,AnnotationSet nameAllAnnots) {
+    public void updateMatches(Annotation newAnnot, Annotation 
prevAnnot,List<List<Integer>> matchesDocFeature,AnnotationSet nameAllAnnots) {
              defaultOrthography.updateMatches(newAnnot, 
prevAnnot,matchesDocFeature,nameAllAnnots);
     } 
     
-    public HashSet buildTables(AnnotationSet nameAllAnnots) {
+    public Set<String> buildTables(AnnotationSet nameAllAnnots) {
 
       return defaultOrthography.buildTables(nameAllAnnots);
     }
 
-  public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> arg0,
-      ArrayList<Annotation> arg1, String arg2, boolean arg3) {
+  public boolean allNonStopTokensInOtherAnnot(List<Annotation> arg0,
+      List<Annotation> arg1, String arg2, boolean arg3) {
     
     return defaultOrthography.allNonStopTokensInOtherAnnot(arg0, arg1, arg2, 
arg3);
   }
@@ -76,7 +65,7 @@
     return defaultOrthography.fuzzyMatch(arg1, arg2);
   }
 
-  public Annotation updateMatches(Annotation newAnnot, String 
annotString,HashMap processedAnnots,AnnotationSet nameAllAnnots,List 
matchesDocFeature) {
+  public Annotation updateMatches(Annotation newAnnot, String 
annotString,Map<Integer, String> processedAnnots,AnnotationSet 
nameAllAnnots,List<List<Integer>> matchesDocFeature) {
     
     return defaultOrthography.updateMatches(newAnnot, annotString, 
processedAnnots,nameAllAnnots,matchesDocFeature);
   }

Modified: 
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
===================================================================
--- 
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
     2014-03-04 13:49:57 UTC (rev 17527)
+++ 
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
     2014-03-04 14:41:51 UTC (rev 17528)
@@ -10,6 +10,8 @@
  */
 public class SampleOrthoMatcher extends OrthoMatcher {
 
+  private static final long serialVersionUID = -1774167249206778293L;
+
   @Override
   public Resource init() throws ResourceInstantiationException {
       

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Subversion Kills Productivity. Get off Subversion & Make the Move to Perforce.
With Perforce, you get hassle-free workflows. Merge that actually works. 
Faster operations. Version large binaries.  Built-in WAN optimization and the
freedom to use Git, Perforce or both. Make the move to Perforce.
http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

[gate-cvs] SF.net SVN: gate:[17528] gate/trunk/src/main/gate/creole/orthomatcher

Reply via email to