Revision: 17528
http://sourceforge.net/p/gate/code/17528
Author: markagreenwood
Date: 2014-03-04 14:41:51 +0000 (Tue, 04 Mar 2014)
Log Message:
-----------
suffered through the second half of the project telco by cleaning up all the
generics in the orthomatcher
Modified Paths:
--------------
gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java
gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java
gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
Modified:
gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java
===================================================================
--- gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java
2014-03-04 13:49:57 UTC (rev 17527)
+++ gate/trunk/src/main/gate/creole/orthomatcher/AnnotationOrthography.java
2014-03-04 14:41:51 UTC (rev 17528)
@@ -5,11 +5,9 @@
import gate.Document;
import gate.creole.ExecutionException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
/*
* This interface is used so that one can create an orthography class that that
@@ -26,26 +24,26 @@
public boolean fuzzyMatch(String s1, String s2);
- public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
- ArrayList<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
+ public boolean allNonStopTokensInOtherAnnot(List<Annotation> firstName,
+ List<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
boolean caseSensitive);
public String stripPersonTitle(String annotString, Annotation annot,
Document doc, Map<Integer, List<Annotation>> tokensMap,
- HashMap normalizedTokensMap, AnnotationSet nameAllAnnots)
+ Map<Integer, List<Annotation>> normalizedTokensMap, AnnotationSet
nameAllAnnots)
throws ExecutionException;
public boolean matchedAlready(Annotation annot1, Annotation annot2,
- List matchesDocFeature, AnnotationSet nameAllAnnots);
+ List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots);
public Annotation updateMatches(Annotation newAnnot, String annotString,
- HashMap processedAnnots, AnnotationSet nameAllAnnots,
- List matchesDocFeature);
+ Map<Integer,String> processedAnnots, AnnotationSet nameAllAnnots,
+ List<List<Integer>> matchesDocFeature);
public void updateMatches(Annotation newAnnot, Annotation prevAnnot,
- List matchesDocFeature, AnnotationSet nameAllAnnots);
+ List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots);
- public HashSet buildTables(AnnotationSet nameAllAnnots);
+ public Set<String> buildTables(AnnotationSet nameAllAnnots);
public boolean isUnknownGender(String gender);
}
Modified:
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java
===================================================================
---
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java
2014-03-04 13:49:57 UTC (rev 17527)
+++
gate/trunk/src/main/gate/creole/orthomatcher/BasicAnnotationOrthography.java
2014-03-04 14:41:51 UTC (rev 17528)
@@ -1,8 +1,21 @@
package gate.creole.orthomatcher;
+import static gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME;
+import static gate.creole.ANNIEConstants.LOOKUP_ANNOTATION_TYPE;
+import static gate.creole.orthomatcher.OrthoMatcherHelper.getStringForSpan;
+import static gate.creole.orthomatcher.OrthoMatcherHelper.round2Places;
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.Document;
+import gate.Factory;
+import gate.FeatureMap;
+import gate.creole.ExecutionException;
+import gate.util.BomStrippingInputStreamReader;
+import gate.util.Err;
+import gate.util.InvalidOffsetException;
+
import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
@@ -17,22 +30,6 @@
import org.apache.log4j.Logger;
-import gate.Annotation;
-import gate.AnnotationSet;
-import gate.Document;
-import gate.Factory;
-import gate.FeatureMap;
-import gate.creole.ExecutionException;
-import gate.util.BomStrippingInputStreamReader;
-import gate.util.Err;
-import gate.util.InvalidOffsetException;
-
-import static gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME;
-import static gate.creole.ANNIEConstants.LOOKUP_ANNOTATION_TYPE;
-import static gate.creole.ANNIEConstants.PERSON_GENDER_FEATURE_NAME;
-
-import static gate.creole.orthomatcher.OrthoMatcherHelper.*;
-
/*
* This class defines an orthography which defines the primary behaviour of the
* Orthomatcher processing resource in GATE.
@@ -44,8 +41,8 @@
private final String unknownType;
- private Map<String, HashSet<String>> nicknameMap =
- new HashMap<String, HashSet<String>>();
+ private Map<String, Set<String>> nicknameMap =
+ new HashMap<String, Set<String>>();
private final Double minimumNicknameLikelihood;
@@ -96,8 +93,8 @@
* @return true if all of the tokens in firstName are either found in second
* name or are stop words
*/
- public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> firstName,
- ArrayList<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
+ public boolean allNonStopTokensInOtherAnnot(List<Annotation> firstName,
+ List<Annotation> secondName, String TOKEN_STRING_FEATURE_NAME,
boolean caseSensitive) {
for(Annotation a : firstName) {
if(!a.getFeatures().containsKey("ortho_stop")) {
@@ -122,7 +119,7 @@
*/
public String stripPersonTitle(String annotString, Annotation annot,
Document doc, Map<Integer, List<Annotation>> tokensMap,
- HashMap normalizedTokensMap, AnnotationSet nameAllAnnots)
+ Map<Integer,List<Annotation>> normalizedTokensMap, AnnotationSet
nameAllAnnots)
throws ExecutionException {
FeatureMap queryFM = Factory.newFeatureMap();
// get the offsets
@@ -159,8 +156,8 @@
// annotString);
// log.debug("Tokens are " + tokensMap.get(annot.getId()));
// log.debug("Title is " + annotTitle);
- ((ArrayList)tokensMap.get(annot.getId())).remove(0);
- ((ArrayList)normalizedTokensMap.get(annot.getId())).remove(0);
+ tokensMap.get(annot.getId()).remove(0);
+ normalizedTokensMap.get(annot.getId()).remove(0);
return annotString.substring(annotTitle.length() + 1,
annotString.length());
}
@@ -173,11 +170,12 @@
}
public boolean matchedAlready(Annotation annot1, Annotation annot2,
- List matchesDocFeature, AnnotationSet nameAllAnnots) {
+ List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots) {
// the two annotations are already matched if the matches list of the first
// contains the id of the second
- List matchesList =
- (List)annot1.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
+ @SuppressWarnings("unchecked")
+ List<Integer> matchesList =
+ (List<Integer>)annot1.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
if((matchesList == null) || matchesList.isEmpty())
return false;
else if(matchesList.contains(annot2.getId())) return true;
@@ -185,8 +183,8 @@
}
public Annotation updateMatches(Annotation newAnnot, String annotString,
- HashMap processedAnnots, AnnotationSet nameAllAnnots,
- List matchesDocFeature) {
+ Map<Integer, String> processedAnnots, AnnotationSet nameAllAnnots,
+ List<List<Integer>> matchesDocFeature) {
Annotation matchedAnnot = null;
Integer id;
// first find a processed annotation with the same string
@@ -195,11 +193,11 @@
// which is indexed on string rather than testing every id. Need to have
the
// index be String + Type
// for safety
- Iterator iter = processedAnnots.keySet().iterator();
+ Iterator<Integer> iter = processedAnnots.keySet().iterator();
// System.out.println("ID's examined: ");
while(iter.hasNext()) {
- id = (Integer)iter.next();
- String oldString = (String)processedAnnots.get(id);
+ id = iter.next();
+ String oldString = processedAnnots.get(id);
// System.out.print(id + " ");
if(annotString.equals(oldString)) {
Annotation tempAnnot = nameAllAnnots.get(id);
@@ -220,12 +218,13 @@
}// while
// System.out.println();
if(matchedAnnot == null) return null;
- List matchesList =
- (List)matchedAnnot.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
+ @SuppressWarnings("unchecked")
+ List<Integer> matchesList =
+
(List<Integer>)matchedAnnot.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
if((matchesList == null) || matchesList.isEmpty()) {
// no previous matches, so need to add
if(matchesList == null) {
- matchesList = new ArrayList();
+ matchesList = new ArrayList<Integer>();
matchedAnnot.getFeatures().put(ANNOTATION_COREF_FEATURE_NAME,
matchesList);
matchesDocFeature.add(matchesList);
@@ -243,14 +242,15 @@
}
public void updateMatches(Annotation newAnnot, Annotation prevAnnot,
- List matchesDocFeature, AnnotationSet nameAllAnnots) {
- List matchesList =
- (List)prevAnnot.getFeatures().get(
+ List<List<Integer>> matchesDocFeature, AnnotationSet nameAllAnnots) {
+ @SuppressWarnings("unchecked")
+ List<Integer> matchesList =
+ (List<Integer>)prevAnnot.getFeatures().get(
OrthoMatcher.ANNOTATION_COREF_FEATURE_NAME);
if((matchesList == null) || matchesList.isEmpty()) {
// no previous matches, so need to add
if(matchesList == null) {
- matchesList = new ArrayList();
+ matchesList = new ArrayList<Integer>();
prevAnnot.getFeatures().put(OrthoMatcher.ANNOTATION_COREF_FEATURE_NAME,
matchesList);
matchesDocFeature.add(matchesList);
@@ -286,10 +286,10 @@
/**
* Tables for namematch info (used by the namematch rules)
*/
- public HashSet buildTables(AnnotationSet nameAllAnnots) {
+ public Set<String> buildTables(AnnotationSet nameAllAnnots) {
FeatureMap tempMap = Factory.newFeatureMap();
// reset the tables first
- HashSet cdg = new HashSet();
+ Set<String> cdg = new HashSet<String>();
if(!extLists) {
// i.e. get cdg from Lookup annotations
// get all Lookup annotations
@@ -327,16 +327,16 @@
return true;
} // isUnknownGender
- protected Map<String, HashSet<String>> initNicknames(
+ protected Map<String, Set<String>> initNicknames(
String nicknameFileEncoding, java.net.URL fileURL) throws IOException {
Pattern spacePat = Pattern.compile("(\\s+)");
- nicknameMap = new HashMap<String, HashSet<String>>();
+ nicknameMap = new HashMap<String, Set<String>>();
// create the relative URL
BufferedReader reader =
new BomStrippingInputStreamReader(fileURL.openStream(),
nicknameFileEncoding);
String lineRead = null;
- int ctr = 0;
+
while((lineRead = reader.readLine()) != null) {
if(lineRead.length() == 0 || lineRead.charAt(0) == '#') {
continue;
Modified: gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java
===================================================================
--- gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java
2014-03-04 13:49:57 UTC (rev 17527)
+++ gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcher.java
2014-03-04 14:41:51 UTC (rev 17528)
@@ -63,6 +63,9 @@
import org.apache.log4j.Logger;
@CreoleResource(name="ANNIE OrthoMatcher", comment="ANNIE orthographical
coreference component.",
helpURL="http://gate.ac.uk/userguide/sec:annie:orthomatcher",
icon="ortho-matcher")
public class OrthoMatcher extends AbstractLanguageAnalyser {
+
+ private static final long serialVersionUID = -6258229350677707465L;
+
protected static final Logger log = Logger.getLogger(OrthoMatcher.class);
public static final boolean DEBUG = false;
@@ -103,7 +106,7 @@
protected String annotationSetName;
/** the types of the annotation */
- protected List annotationTypes = new ArrayList(10);
+ protected List<String> annotationTypes = new ArrayList<String>(10);
/** the organization type*/
protected String organizationType = ORGANIZATION_ANNOTATION_TYPE;
@@ -137,26 +140,26 @@
// name lookup tables (used for namematch)
//gave them bigger default size, coz rehash is expensive
- protected HashMap alias = new HashMap(100);
- protected HashSet cdg = new HashSet();
- protected HashMap spur_match = new HashMap(100);
- protected HashMap def_art = new HashMap(20);
- protected HashMap connector = new HashMap(20);
- protected HashMap prepos = new HashMap(30);
+ protected HashMap<String, String> alias = new HashMap<String, String>(100);
+ protected Set<String> cdg = new HashSet<String>();
+ protected HashMap<String, String> spur_match = new HashMap<String,
String>(100);
+ protected HashMap<String, String> def_art = new HashMap<String, String>(20);
+ protected HashMap<String, String> connector = new HashMap<String,
String>(20);
+ protected HashMap<String, String> prepos = new HashMap<String, String>(30);
protected AnnotationSet nameAllAnnots = null;
- protected HashMap processedAnnots = new HashMap(150);
- protected HashMap annots2Remove = new HashMap(75);
- protected List matchesDocFeature = new ArrayList();
+ protected HashMap<Integer, String> processedAnnots = new HashMap<Integer,
String>(150);
+ protected HashMap<Integer, String> annots2Remove = new HashMap<Integer,
String>(75);
+ protected List<List<Integer>> matchesDocFeature = new
ArrayList<List<Integer>>();
//maps annotation ids to array lists of tokens
- protected HashMap tokensMap = new HashMap(150);
- public HashMap getTokensMap() {
+ protected HashMap<Integer, List<Annotation>> tokensMap = new
HashMap<Integer, List<Annotation>>(150);
+ public Map<Integer, List<Annotation>> getTokensMap() {
return tokensMap;
}
- protected HashMap normalizedTokensMap = new HashMap(150);
+ protected Map<Integer, List<Annotation>> normalizedTokensMap = new
HashMap<Integer, List<Annotation>>(150);
protected Annotation shortAnnot;
protected Annotation longAnnot;
@@ -301,7 +304,8 @@
//check if we've been run on this document before
//and clean the doc if needed
docCleanup();
- Map matchesMap = (Map)document.getFeatures().
+ @SuppressWarnings("unchecked")
+ Map<String, List<List<Integer>>> matchesMap = (Map<String,
List<List<Integer>>>)document.getFeatures().
get(DOCUMENT_COREF_FEATURE_NAME);
@@ -321,7 +325,7 @@
// determineMatchesDocument();
if (! matchesDocFeature.isEmpty()) {
if(matchesMap == null){
- matchesMap = new HashMap();
+ matchesMap = new HashMap<String, List<List<Integer>>>();
}
matchesMap.put(nameAllAnnots.getName(), matchesDocFeature);
// System.out.println("matchesMap is: " + matchesMap);
@@ -331,7 +335,7 @@
//cannot do clear() as this has already been put on the document
//so I need a new one for the next run of matcher
- matchesDocFeature = new ArrayList();
+ matchesDocFeature = new ArrayList<List<Integer>>();
fireStatusChanged("OrthoMatcher completed");
@@ -345,7 +349,7 @@
annots2Remove.clear();
tokensMap.clear();
normalizedTokensMap.clear();
- matchesDocFeature = new ArrayList();
+ matchesDocFeature = new ArrayList<List<Integer>>();
longAnnot = null;
shortAnnot = null;
tokensLongAnnot = null;
@@ -357,9 +361,9 @@
protected void matchNameAnnotations() throws ExecutionException{
// go through all the annotation types
- Iterator iterAnnotationTypes = annotationTypes.iterator();
+ Iterator<String> iterAnnotationTypes = annotationTypes.iterator();
while (iterAnnotationTypes.hasNext()) {
- String annotationType = (String)iterAnnotationTypes.next();
+ String annotationType = iterAnnotationTypes.next();
AnnotationSet nameAnnots = nameAllAnnots.get(annotationType);
@@ -374,7 +378,6 @@
for (int snaIndex = 0;snaIndex < sortedNameAnnots.size();snaIndex++) {
Annotation tempAnnot = sortedNameAnnots.get(snaIndex);
Annotation nameAnnot = nameAllAnnots.get(tempAnnot.getId()); // Not
sure if this matters
- Integer id = nameAnnot.getId();
// get string and value
String annotString = orthoAnnotation.getStringForAnnotation(nameAnnot,
document);
@@ -392,7 +395,7 @@
}
// get the tokens
- List tokens = new
ArrayList(tokensNameAS.getContained(nameAnnot.getStartNode().getOffset(),
+ List<Annotation> tokens = new
ArrayList<Annotation>(tokensNameAS.getContained(nameAnnot.getStartNode().getOffset(),
nameAnnot.getEndNode().getOffset()));
//if no tokens to match, do nothing
@@ -488,7 +491,7 @@
// System.out.println("Now trying to match the unknown string: " +
unknownString);
//get the tokens
- List tokens = new ArrayList((Set)
+ List<Annotation> tokens = new ArrayList<Annotation>((Set<Annotation>)
nameAllTokens.getContained(
unknown.getStartNode().getOffset(),
unknown.getEndNode().getOffset()
@@ -538,20 +541,21 @@
} //while though unknowns
if (! annots2Remove.isEmpty()) {
- Iterator unknownIter = annots2Remove.keySet().iterator();
+ Iterator<Integer> unknownIter = annots2Remove.keySet().iterator();
while (unknownIter.hasNext()) {
- Integer unknId = (Integer) unknownIter.next();
+ Integer unknId = unknownIter.next();
Annotation unknown = nameAllAnnots.get(unknId);
Integer newID = nameAllAnnots.add(
unknown.getStartNode(),
unknown.getEndNode(),
- (String) annots2Remove.get(unknId),
+ annots2Remove.get(unknId),
unknown.getFeatures()
);
nameAllAnnots.remove(unknown);
//change the id in the matches list
- List mList = (List)unknown.getFeatures().
+ @SuppressWarnings("unchecked")
+ List<Integer> mList = (List<Integer>)unknown.getFeatures().
get(ANNOTATION_COREF_FEATURE_NAME);
mList.remove(unknId);
mList.add(newID);
@@ -560,7 +564,7 @@
}
private boolean matchHyphenatedUnknowns(Annotation unknown, String
unknownString,
- Iterator iter){
+ Iterator<Annotation> iter){
boolean matched = false;
//only take the substring before the hyphen
@@ -576,7 +580,7 @@
iter.remove();
String newType;
if (matchedAnnot.getType().equals(unknownType))
- newType = (String)annots2Remove.get(matchedAnnot.getId());
+ newType = annots2Remove.get(matchedAnnot.getId());
else
newType = matchedAnnot.getType();
@@ -595,7 +599,8 @@
nameAllAnnots.remove(unknown);
//change the id in the matches list
- List mList = (List)unknown.getFeatures().
+ @SuppressWarnings("unchecked")
+ List<Integer> mList = (List<Integer>)unknown.getFeatures().
get(ANNOTATION_COREF_FEATURE_NAME);
mList.remove(unknown.getId());
mList.add(newID);
@@ -703,6 +708,7 @@
protected void propagatePropertyToExactMatchingMatches(Annotation
updateAnnot,String featureName,Object value) {
try {
+ @SuppressWarnings("unchecked")
List<Integer> matchesList = (List<Integer>)
updateAnnot.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
if ((matchesList == null) || matchesList.isEmpty()) {
return;
@@ -734,7 +740,7 @@
// find which annotation string of the two is longer
// this is useful for some of the matching rules
- String prevAnnotString = (String) processedAnnots.get(prevAnnot.getId());
+ String prevAnnotString = processedAnnots.get(prevAnnot.getId());
// Out.prln("matchAnnotations processing " + annotString + " and " +
prevAnnotString);
if (prevAnnotString == null) {
// Out.prln("We discovered that the following string is null!: " +
prevAnnot.getId() +
@@ -760,12 +766,13 @@
longerPrevious = false;
}//if
- tokensLongAnnot = (ArrayList) tokensMap.get(longAnnot.getId());
- normalizedTokensLongAnnot = (ArrayList)
normalizedTokensMap.get(longAnnot.getId());
- tokensShortAnnot = (ArrayList) tokensMap.get(shortAnnot.getId());
- normalizedTokensShortAnnot = (ArrayList)
normalizedTokensMap.get(shortAnnot.getId());
+ tokensLongAnnot = (ArrayList<Annotation>) tokensMap.get(longAnnot.getId());
+ normalizedTokensLongAnnot = (ArrayList<Annotation>)
normalizedTokensMap.get(longAnnot.getId());
+ tokensShortAnnot = (ArrayList<Annotation>)
tokensMap.get(shortAnnot.getId());
+ normalizedTokensShortAnnot = (ArrayList<Annotation>)
normalizedTokensMap.get(shortAnnot.getId());
- List matchesList = (List) prevAnnot.getFeatures().
+ @SuppressWarnings("unchecked")
+ List<Integer> matchesList = (List<Integer>) prevAnnot.getFeatures().
get(ANNOTATION_COREF_FEATURE_NAME);
if (matchesList == null || matchesList.isEmpty())
return apply_rules_namematch(prevAnnot.getType(), shortName,longName,
@@ -784,7 +791,7 @@
if (allMatchingNeeded) {
allMatchingNeeded = false;
- List toMatchList = new ArrayList(matchesList);
+ List<Integer> toMatchList = new ArrayList<Integer>(matchesList);
// if (newAnnot.getType().equals(unknownType))
// Out.prln("Matching new " + annotString + " with annots " +
toMatchList);
toMatchList.remove(prevAnnot.getId());
@@ -803,7 +810,7 @@
* two different entities share a common token: e.g., BT Cellnet
* and BT and British Telecom.
*/
- protected boolean matchOtherAnnots( List toMatchList, Annotation newAnnot,
+ protected boolean matchOtherAnnots( List<Integer> toMatchList, Annotation
newAnnot,
String annotString) {
//if the list is empty, then we're matching all right :-)
@@ -814,11 +821,11 @@
int i = 0;
while (matchedAll && i < toMatchList.size()) {
- Annotation prevAnnot = nameAllAnnots.get((Integer) toMatchList.get(i));
+ Annotation prevAnnot = nameAllAnnots.get(toMatchList.get(i));
// find which annotation string of the two is longer
// this is useful for some of the matching rules
- String prevAnnotString = (String) processedAnnots.get(prevAnnot.getId());
+ String prevAnnotString = processedAnnots.get(prevAnnot.getId());
if (prevAnnotString == null)
try {
prevAnnotString = document.getContent().getContent(
@@ -845,10 +852,10 @@
longerPrevious = false;
}//if
- tokensLongAnnot = (ArrayList) tokensMap.get(longAnnot.getId());
- normalizedTokensLongAnnot = (ArrayList)
normalizedTokensMap.get(longAnnot.getId());
- tokensShortAnnot = (ArrayList) tokensMap.get(shortAnnot.getId());
- normalizedTokensShortAnnot = (ArrayList)
normalizedTokensMap.get(shortAnnot.getId());
+ tokensLongAnnot = (ArrayList<Annotation>)
tokensMap.get(longAnnot.getId());
+ normalizedTokensLongAnnot = (ArrayList<Annotation>)
normalizedTokensMap.get(longAnnot.getId());
+ tokensShortAnnot = (ArrayList<Annotation>)
tokensMap.get(shortAnnot.getId());
+ normalizedTokensShortAnnot = (ArrayList<Annotation>)
normalizedTokensMap.get(shortAnnot.getId());
matchedAll = apply_rules_namematch(prevAnnot.getType(),
shortName,longName,prevAnnot,newAnnot,
longerPrevious);
@@ -860,16 +867,17 @@
return matchedAll;
}
+ @SuppressWarnings("unchecked")
protected void docCleanup() {
Object matchesValue =
document.getFeatures().get(DOCUMENT_COREF_FEATURE_NAME);
if (matchesValue != null && (matchesValue instanceof Map))
- ((Map)matchesValue).remove(nameAllAnnots.getName());
+
((Map<String,List<List<Integer>>>)matchesValue).remove(nameAllAnnots.getName());
else if (matchesValue != null) {
- document.getFeatures().put(DOCUMENT_COREF_FEATURE_NAME, new HashMap());
+ document.getFeatures().put(DOCUMENT_COREF_FEATURE_NAME, new
HashMap<String,List<List<Integer>>>());
}
//get all annotations that have a matches feature
- HashSet fNames = new HashSet();
+ HashSet<String> fNames = new HashSet<String>();
fNames.add(ANNOTATION_COREF_FEATURE_NAME);
AnnotationSet annots =
nameAllAnnots.get(null, fNames);
@@ -890,11 +898,11 @@
static Pattern periodPat = Pattern.compile("[\\.]+");
protected void normalizePersonName (Annotation annot) throws
ExecutionException {
- ArrayList<Annotation> tokens = (ArrayList)
normalizedTokensMap.get(annot.getId());
+ ArrayList<Annotation> tokens = (ArrayList<Annotation>)
normalizedTokensMap.get(annot.getId());
for (int i = tokens.size() - 1; i >= 0; i--) {
String tokenString = ((String)
tokens.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME));
String kind = (String)
tokens.get(i).getFeatures().get(TOKEN_KIND_FEATURE_NAME);
- String category = (String)
tokens.get(i).getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME);
+ //String category = (String)
tokens.get(i).getFeatures().get(TOKEN_CATEGORY_FEATURE_NAME);
if (!caseSensitive) {
tokenString = tokenString.toLowerCase();
}
@@ -919,11 +927,10 @@
/** return an organization without a designator and starting The*/
protected String normalizeOrganizationName (String annotString, Annotation
annot){
- ArrayList<Annotation> tokens = (ArrayList) tokensMap.get(annot.getId());
+ ArrayList<Annotation> tokens = (ArrayList<Annotation>)
tokensMap.get(annot.getId());
//strip starting The first
- if ( ((String) ((Annotation) tokens.get(0)
- ).getFeatures().get(TOKEN_STRING_FEATURE_NAME))
+ if ( ((String) tokens.get(0).getFeatures().get(TOKEN_STRING_FEATURE_NAME))
.equalsIgnoreCase(THE_VALUE))
tokens.remove(0);
@@ -972,8 +979,7 @@
StringBuffer newString = new StringBuffer(50);
for (int i = 0; i < tokens.size(); i++){
- newString.append((String) ((Annotation) tokens.get(i)
- ).getFeatures().get(TOKEN_STRING_FEATURE_NAME) );
+ newString.append((String)
tokens.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME) );
if (i != tokens.size()-1)
newString.append(" ");
}
@@ -1185,7 +1191,7 @@
@RunTime
@Optional
@CreoleParameter(comment="Name of the annotation types to use",
defaultValue="Organization;Person;Location;Date")
- public void setAnnotationTypes(List newType) {
+ public void setAnnotationTypes(List<String> newType) {
annotationTypes = newType;
}//setAnnotationTypes
@@ -1214,7 +1220,7 @@
}//getAnnotationSetName
/** get the types of the annotation*/
- public List getAnnotationTypes() {
+ public List<String> getAnnotationTypes() {
return annotationTypes;
}//getAnnotationTypes
@@ -1331,8 +1337,8 @@
}
else {
for (int i = 1; i < normalizedTokensLongAnnot.size() - 1;i++) {
- String s1_middle = (String) ((Annotation)
normalizedTokensLongAnnot.get(i)).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
- String s2_middle = (String) ((Annotation)
normalizedTokensShortAnnot.get(i)).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
+ String s1_middle = (String)
normalizedTokensLongAnnot.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
+ String s2_middle = (String)
normalizedTokensShortAnnot.get(i).getFeatures().get(TOKEN_STRING_FEATURE_NAME);
if (!caseSensitive) {
s1_middle = s1_middle.toLowerCase();
s2_middle = s2_middle.toLowerCase();
Modified: gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java
===================================================================
--- gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java
2014-03-04 13:49:57 UTC (rev 17527)
+++ gate/trunk/src/main/gate/creole/orthomatcher/OrthoMatcherHelper.java
2014-03-04 14:41:51 UTC (rev 17528)
@@ -9,6 +9,7 @@
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
+import java.util.Set;
import org.apache.log4j.Logger;
@@ -170,7 +171,7 @@
protected static void setMatchesPositions(AnnotationSet
nameAllAnnots) {
//get all annotations that have a matches feature
- HashSet fNames = new HashSet();
+ Set<String> fNames = new HashSet<String>();
fNames.add(gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
AnnotationSet allMatchesAnnots =
nameAllAnnots.get(null, fNames);
Modified:
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
===================================================================
---
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
2014-03-04 13:49:57 UTC (rev 17527)
+++
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleAnnotationOrthography.java
2014-03-04 14:41:51 UTC (rev 17528)
@@ -1,25 +1,14 @@
package gate.creole.orthomatcher.SampleOrthoMatcher;
-import static gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME;
-import static gate.creole.ANNIEConstants.LOOKUP_ANNOTATION_TYPE;
-import static gate.creole.ANNIEConstants.PERSON_GENDER_FEATURE_NAME;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
-import gate.Factory;
-import gate.FeatureMap;
import gate.creole.ExecutionException;
import gate.creole.orthomatcher.AnnotationOrthography;
-import gate.creole.orthomatcher.OrthoMatcherHelper;
-import gate.util.Err;
-import gate.util.InvalidOffsetException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Set;
/*
@@ -46,26 +35,26 @@
return defaultOrthography.getStringForAnnotation(a,d);
}
- public String stripPersonTitle (String annotString, Annotation annot,
Document doc, Map<Integer, List<Annotation>> tokensMap, HashMap
normalizedTokensMap,AnnotationSet nameAllAnnots)
+ public String stripPersonTitle (String annotString, Annotation annot,
Document doc, Map<Integer, List<Annotation>> tokensMap, Map<Integer,
List<Annotation>> normalizedTokensMap,AnnotationSet nameAllAnnots)
throws ExecutionException {
return
defaultOrthography.stripPersonTitle(annotString,annot,doc,tokensMap,normalizedTokensMap,nameAllAnnots);
}
- public boolean matchedAlready(Annotation annot1, Annotation annot2,List
matchesDocFeature,AnnotationSet nameAllAnnots) {
+ public boolean matchedAlready(Annotation annot1, Annotation
annot2,List<List<Integer>> matchesDocFeature,AnnotationSet nameAllAnnots) {
return
defaultOrthography.matchedAlready(annot1,annot2,matchesDocFeature,nameAllAnnots);
}
- public void updateMatches(Annotation newAnnot, Annotation prevAnnot,List
matchesDocFeature,AnnotationSet nameAllAnnots) {
+ public void updateMatches(Annotation newAnnot, Annotation
prevAnnot,List<List<Integer>> matchesDocFeature,AnnotationSet nameAllAnnots) {
defaultOrthography.updateMatches(newAnnot,
prevAnnot,matchesDocFeature,nameAllAnnots);
}
- public HashSet buildTables(AnnotationSet nameAllAnnots) {
+ public Set<String> buildTables(AnnotationSet nameAllAnnots) {
return defaultOrthography.buildTables(nameAllAnnots);
}
- public boolean allNonStopTokensInOtherAnnot(ArrayList<Annotation> arg0,
- ArrayList<Annotation> arg1, String arg2, boolean arg3) {
+ public boolean allNonStopTokensInOtherAnnot(List<Annotation> arg0,
+ List<Annotation> arg1, String arg2, boolean arg3) {
return defaultOrthography.allNonStopTokensInOtherAnnot(arg0, arg1, arg2,
arg3);
}
@@ -76,7 +65,7 @@
return defaultOrthography.fuzzyMatch(arg1, arg2);
}
- public Annotation updateMatches(Annotation newAnnot, String
annotString,HashMap processedAnnots,AnnotationSet nameAllAnnots,List
matchesDocFeature) {
+ public Annotation updateMatches(Annotation newAnnot, String
annotString,Map<Integer, String> processedAnnots,AnnotationSet
nameAllAnnots,List<List<Integer>> matchesDocFeature) {
return defaultOrthography.updateMatches(newAnnot, annotString,
processedAnnots,nameAllAnnots,matchesDocFeature);
}
Modified:
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
===================================================================
---
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
2014-03-04 13:49:57 UTC (rev 17527)
+++
gate/trunk/src/main/gate/creole/orthomatcher/SampleOrthoMatcher/SampleOrthoMatcher.java
2014-03-04 14:41:51 UTC (rev 17528)
@@ -10,6 +10,8 @@
*/
public class SampleOrthoMatcher extends OrthoMatcher {
+ private static final long serialVersionUID = -1774167249206778293L;
+
@Override
public Resource init() throws ResourceInstantiationException {
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Subversion Kills Productivity. Get off Subversion & Make the Move to Perforce.
With Perforce, you get hassle-free workflows. Merge that actually works.
Faster operations. Version large binaries. Built-in WAN optimization and the
freedom to use Git, Perforce or both. Make the move to Perforce.
http://pubads.g.doubleclick.net/gampad/clk?id=122218951&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs