Revision: 17327
          http://sourceforge.net/p/gate/code/17327
Author:   dgmaynard
Date:     2014-02-17 17:22:25 +0000 (Mon, 17 Feb 2014)
Log Message:
-----------
better cleaning, and better twitter grammar

Modified Paths:
--------------
    gate/trunk/plugins/ANNIE/resources/NE/clean.jape
    gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape

Modified: gate/trunk/plugins/ANNIE/resources/NE/clean.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/clean.jape    2014-02-17 16:38:01 UTC 
(rev 17326)
+++ gate/trunk/plugins/ANNIE/resources/NE/clean.jape    2014-02-17 17:22:25 UTC 
(rev 17327)
@@ -14,8 +14,8 @@
 */
 
 Phase: Clean
-Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear 
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs
-Options: control = appelt
+Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear 
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs 
ClosedClass Initials Upper FirstPerson Jobtitle HashtagToken HashtagLookup
+Options: control = all
 
 Rule:CleanTempAnnotations
 (
@@ -33,7 +33,14 @@
  {Phone}|
  {Ip}|
  {TempIdentifier}|
- {TempSpecs}
+ {TempSpecs}|
+ {ClosedClass}|
+ {Upper}|
+ {Initials}|
+ {FirstPerson}|
+ {Jobtitle}|
+ {HashtagToken}|
+ {HashtagLookup}
 ):temp
 -->
 {

Modified: gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape     2014-02-17 
16:38:01 UTC (rev 17326)
+++ gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape     2014-02-17 
17:22:25 UTC (rev 17327)
@@ -10,12 +10,12 @@
 *
 *  Diana Maynard, 10 Sep 2001
 * 
-*  $Id: name.jape 13147 2010-10-15 08:30:24Z markagreenwood $
+*  $Id: name.jape 17326 2014-02-17 16:38:01Z dgmaynard $
 */
 
 
 Phase: Name
-Input: Token Lookup Title FirstPerson TempDate Split UserID
+Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID
 Options: control = appelt debug = false
 
 ///////////////////////////////////////////////////////////////
@@ -29,21 +29,9 @@
  ({Token.string == "."})?
 )
 
-Macro: INITIALS
-(
-  ({Token.orth == upperInitial, Token.length =="1"}
-  ({Token.string == "."})?
-  )+
-)
 
-Macro: INITIALS2
 
-(
- {Token.orth == allCaps, Token.length == "2"} |
- {Token.orth == allCaps, Token.length == "3"}
-)
 
-
 Macro: FIRSTNAME
 
  ({FirstPerson.gender == male} |
@@ -57,17 +45,8 @@
 
 
 
-Macro: UPPER
-(
- ({Token.category == NNP}| 
- {Token.orth == upperInitial}|
- {Token.orth == mixedCaps} 
-)
- ({Token.string == "-"}
-  {Token.category == NNP}
- )?
-)
 
+
 Macro: PERSONENDING
 (
  {Lookup.majorType == person_ending}
@@ -86,20 +65,13 @@
 
 
 ///////////////////////////////////////////////////////////
-Rule: NotAnything
-Priority: 1000
 
-(
- {Lookup.majorType == spur}
-)
--->
-{}
 
 // Person Rules
 
 Rule: Pronoun
 Priority: 1000
-//stops personal pronouns being recognised as Initials
+
 (
  {Token.category == PP}|
  {Token.category == PRP}|
@@ -110,34 +82,76 @@
 
 
 
+Rule:Reject
+Priority: 1000
+// stops certain things being recognised as People
+(
+ {ClosedClass}
+)
+-->
+{}
 
-Rule:  GazPerson
+
+Rule:    GazPerson
 Priority: 50
 (
  {Lookup.majorType == person_full}
 )
 :person -->
 {
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("kind", "personName");
+
+// find the Token annotations
+AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS, 
personSet, "Token");
+// put them in order
+List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet);
+
+if (tokenList.size() == 1) {
+ // if there's only one Token, guess it's a surname
+
+  String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+  features.put("surname", surnameContent);
+ }
+
+else if (tokenList.size() > 0) {
+  // the string under the first Token
+   String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+  features.put("firstName", firstNameContent);
+
+
+  // the string under the remaining Tokens if any
+  if (tokenList.size() > 1) {
+    Long lastNameStart = gate.Utils.start(tokenList.get(1));
+    Long lastNameEnd   = gate.Utils.end(tokenList.get(tokenList.size() - 1));
+    String surnameContent = gate.Utils.stringFor(doc, lastNameStart, 
lastNameEnd);
+    features.put("surname", surnameContent);
+  }
+}
+
+features.put("kind", "fullName");
 features.put("rule", "GazPerson");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
+features.put("gender", personAnn.getFeatures().get("gender"));
+
+// this method doesn't require try-catch
+gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
 }
 
-Rule:  TheGazPersonFirst
+
+
+
+
+Rule:  GazPersonFirst
 Priority: 200
 (
  {Token.category == DT}|
  {Token.category == PRP}|
  {Token.category == RB}
-)
+)?
 (
- {FirstPerson}
-)
-:person 
+ {FirstPerson.kind != ambig}
+):person 
 ( 
  {Token.orth == upperInitial, Token.length == "1"}
 )?
@@ -147,40 +161,19 @@
 gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
 features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
+features.put("kind", "firstName");
 features.put("rule", "GazPersonFirst");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
-//outputAS.removeAll(person);
-}
 
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
 
-Rule:  GazPersonFirst
-Priority: 70
-(
- {FirstPerson}
-)
-:person 
-( 
- {Token.orth == upperInitial, Token.length == "1"}
-)?
--->
-{
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
-features.put("rule", "GazPersonFirst");
 outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
 features);
-//outputAS.removeAll(person);
 }
 
 
 
-
-
 Rule: PersonFirstContext
 Priority: 30
 // Anne and Kenton
@@ -191,180 +184,196 @@
 (
  {Token.string == "and"}
 )
-({Token.orth == upperInitial})
+({Token.orth == upperInitial, Token.length != "1"})
 :person2
  -->
 {
 //first deal with person1
  gate.FeatureMap features1 = Factory.newFeatureMap();
-gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
-gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)person1Set.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
+ gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
+  gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
+ 
+  String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+  features1.put("firstName", contentFirstName);
   features1.put("gender", personAnn.getFeatures().get("gender"));
-}
-  features1.put("kind", "personName");
+  features1.put("kind", "firstName");
   features1.put("rule", "PersonFirstContext");
 outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
 features1);
+
 //now deal with person2
 gate.FeatureMap features2 = Factory.newFeatureMap();
 gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
-  features2.put("kind", "personName");
+gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
+
+  String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
+  features2.put("firstName", content2FirstName);
+  features2.put("kind", "firstName");
   features2.put("rule", "PersonFirstContext");
 outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
 features2);
 }
 
 
-Rule: PersonFirstContext2
-Priority: 40
-// Anne and I
+Rule:  PersonTitle
+Priority: 35
+// Mr. Jones
+// Mr Fred Jones
+// note we only allow one first and surname, 
+// but we add more in a final phase if we find adjacent unknowns
 
+( 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
 (
- {FirstPerson}
-):person
-(
- {Token.string == "and"}
- {Token.length == "1"}
-)
- -->
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+  (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+  (PREFIX)* 
+  ({Upper})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)personSet.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
   features.put("kind", "personName");
-  features.put("rule", "PersonFirstContext2");
+  features.put("rule", "PersonTitle");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
-Rule:  PersonTitle
+
+
+
+Rule:  PersonTitleInitials
 Priority: 35
-// Mr. Jones
-// Mr Fred Jones
-// note we only allow one first and surname, 
-// but we can add more in a final phase if we find adjacent unknowns
 
+// Mr J. Jones
+
+
 ( 
  {Token.category == DT}|
  {Token.category == PRP}|
  {Token.category == RB}
 )?
 (
- (TITLE)+
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )?
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+  ({Initials})?
+ ):initials
+ (
   (PREFIX)* 
-  (UPPER)
- (PERSONENDING)?
-)
-:person -->
+  ({Upper, !Initials})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-  
- // get all Title annotations that have a gender feature
- HashSet fNames = new HashSet();
-    fNames.add("gender");
-    gate.AnnotationSet personTitle = personSet.get("Title", fNames);
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
 
-// if the gender feature exists
- if (personTitle != null && personTitle.size()>0)
-{
-  //Out.prln("Titles found " +  personTitle);
-  gate.Annotation personAnn = (gate.Annotation)personTitle.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-else
-{
-  //get all firstPerson annotations that have a gender feature
-  //  HashSet fNames = new HashSet();
-   // fNames.add("gender");
-    gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames);
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
 
-  if (firstPerson != null && firstPerson.size()>0)
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (initialsSet != null && initialsSet.size()>0)
  {
-    //Out.prln("First persons found " +  firstPerson);
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
+  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+  Long initialsStart = gate.Utils.start(initialsList.get(0));
+  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
+  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
+  features.put("initials", initialsContent);
  }
-}
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
   features.put("kind", "personName");
-  features.put("rule", "PersonTitle");
+  features.put("rule", "PersonTitleInitials");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
-Rule:  PersonFirstTitleGender
+Rule:  TitleFirstName
 Priority: 55
 // use this rule when we know what gender the title indicates
 // Mr Fred
 
 (
- ({Title.gender == male} | {Title.gender == female})
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )
+ ({Title.gender == male} | {Title.gender == female}):title
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstname
+
 )
 :person -->
+
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-if (title != null && title.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-  features.put("kind", "personName");
-  features.put("rule", "PersonFirstTitleGender");
-outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
-features);
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
 
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
 
-Rule: PersonTitleGender
-Priority: 18
-// use this rule if the title has a feature gender
-// Miss F Smith
-(
- ({Title.gender == male}|
-  {Title.gender == female}
- ) 
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )*
- (UPPER)
- (PERSONENDING)?
-)
-:person -->
-{
- gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-// if the annotation type title doesn't exist, do nothing
-if (title != null && title.size()>0)
-{
-// if it does exist, take the first element in the set
-  gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
-//propagate gender feature (and value) from title
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-// create some new features
-  features.put("kind", "personName");
-  features.put("rule", "PersonTitleGender");
-// create a TempPerson annotation and add the features we've created
-outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+ 
+ features.put("kind", "personName");
+
+ features.put("rule", "TitleFirstName");
+ outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
+
 Rule: PersonJobTitle
 Priority: 20
 // note we include titles but not jobtitles in markup
@@ -377,12 +386,12 @@
  ((FIRSTNAME | FIRSTNAMEAMBIG )
  )
  (PREFIX)* 
- (UPPER)
+ ({Upper,!Initials})
  (PERSONENDING)?
 )
 :person 
 -->
-    :person.TempPerson = {kind = "personName", rule = "PersonJobTitle"},
+    :person.TempPerson = {kind = "fullName", rule = "PersonJobTitle"},
    :jobtitle.JobTitle = {rule = "PersonJobTitle"} 
 
 
@@ -403,6 +412,8 @@
 )
 :person -->
   {}
+
+
 Rule: FirstPersonStop
 Priority: 50
 // John And
@@ -422,7 +433,7 @@
 gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
 features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
+features.put("kind", "firstName");
 features.put("rule", "GazPersonFirst");
 outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
 features);
@@ -442,7 +453,7 @@
 )
 (
  (PREFIX)* 
- (UPPER)
+ ({Upper})
  (PERSONENDING)?
 ):foo
 -->
@@ -452,33 +463,91 @@
 
 Rule: LocPersonAmbig1
 Priority: 50
-// Location + Surname
+// Location + Possible Surname --> Location only (ignore Surname)
+
 (
  {Lookup.majorType == location}
 ):loc
 (
  (PREFIX)* 
- (UPPER)
+ ({Upper,!Initials})
  (PERSONENDING)
 ):foo
 -->
 :loc.TempLocation = {kind = "locName", rule = LocPersonAmbig1}
 
+
 Rule: LocPersonAmbig2
 Priority: 50
-// Location + Surname
+// Location + + Possible Surname --> Location only (ignore Surname)
+
 (
  {Lookup.majorType == location}
 ):loc
 (
  (PREFIX)
- (UPPER)
+ ({Upper,!Initials})
  (PERSONENDING)?
 ):foo
 -->
 :loc.TempLocation = {kind = "locName", rule = LocPersonAmbig2}
 
 
+
+Rule:  PersonFullInitials
+Priority: 10
+// F.W. Jones
+
+(
+ {Token.category == DT}
+)?
+(
+ 
+  ({Initials}):initials
+  ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+  ({Upper,!Initials})
+  (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+  
+  gate.AnnotationSet initialsSet = 
(gate.AnnotationSet)bindings.get("initials");
+  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+  Long initialsStart = gate.Utils.start(initialsList.get(0));
+  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
+  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
+  features.put("initials", initialsContent);
+ 
+
+ gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+ features.put("gender", middleNameAnn.getFeatures().get("gender"));
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFullInitials");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
 Rule:  PersonFull
 Priority: 10
 // F.W. Jones
@@ -487,31 +556,46 @@
  {Token.category == DT}
 )?
 (
- ((FIRSTNAME | FIRSTNAMEAMBIG) )+
- (PREFIX)*
- (UPPER)
- (PERSONENDING)?
-)
-:person -->
+ 
+  (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+  ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+  ({Upper,!Initials})
+  (PERSONENDING)?
+ ):surname
+):person -->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
   
-  //get all firstPerson annotations that have a gender feature
-    HashSet fNames = new HashSet();
-    fNames.add("gender");
-    gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames);
+  gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+ 
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
 
-  if (firstPerson != null && firstPerson.size()>0)
- {
-    //Out.prln("First persons found " +  firstPerson);
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
+ gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
 }
-  features.put("kind", "personName");
-  features.put("rule", "PersonFull");
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFull");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
+
 }
 
 Rule: PersonFullStop
@@ -521,20 +605,20 @@
 (
  ((FIRSTNAME | FIRSTNAMEAMBIG) )
  (PREFIX)* 
- (UPPER)
+ ({Upper})
 ):person
 (
  {Lookup.majorType == date}
 )
 -->
- :person.TempPerson = {kind = "personName", rule = "PersonFullStop"}
+ :person.TempPerson = {kind = "fullName", rule = "PersonFullStop"}
 
 
 Rule: NotPersonFullReverse
 Priority: 20
 // XYZ, I
 (
- (UPPER)
+ ({Upper})
  {Token.string == ","}
  {Token.category == PRP}
  (PERSONENDING)?
@@ -562,7 +646,7 @@
   gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
   features.put("gender", personAnn.getFeatures().get("gender"));
 }
-  features.put("kind", "personName");
+  features.put("kind", "firstName");
   features.put("rule", "PersonSaint");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
@@ -576,26 +660,71 @@
 
 // Christian name + Location --> Person's Name
 (
- {Lookup.majorType == person_first}
-  {Lookup.majorType == location}
-)
-:person -->
+  ({Lookup.majorType == person_first}):firstName
+  ({Lookup.majorType == location}):surname
+):person -->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)inputAS.get("FirstPerson", 
personSet.firstNode().getOffset(), personSet.lastNode().getOffset());
-if (firstPerson != null && firstPerson.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-  features.put("kind", "personName");
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); 
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName"); 
+ gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); 
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", firstNameAnn.getFeatures().get("minorType"));
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+  features.put("kind", "fullName");
   features.put("rule", "PersonLocAmbig");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
+Rule: TitlePersonLocAmbig
+Priority: 50
+// Professor London
+// title + Location --> Person's Name
+
+(
+  ({Title}):title
+  ({Lookup.majorType == location}):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); 
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); 
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); 
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ String titleContent = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", titleContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+  features.put("kind", "fullName");
+  features.put("rule", "TitlePersonLocAmbig");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
 Rule:PersonOrgAmbig
 Priority: 50
 // if the last name is an organisation ending, treat as an organisation not 
person
@@ -730,14 +859,14 @@
  {Token.string == "&"}
 
  (
-  {Token.orth == upperInitial, Token.category != PRP}
+  {Token.orth == upperInitial}
  )+
 
  (CDG)?
 
 )
 :orgName -->
-  :orgName.TempOrganization = {orgType = "unknown", rule = "INOrgXandY"}
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
 
 Rule:  OrgXandY
 Priority: 20
@@ -753,10 +882,10 @@
  {Token.string == "&"}
 
  (
-  {Token.orth == upperInitial, Token.category != PRP }
+  {Token.orth == upperInitial}
  )+
 
- (CDG)
+ (CDG)?
 
 )
 :orgName -->
@@ -806,11 +935,11 @@
  {Token.category == DT}
 )
 (
-  (UPPER)
-  (UPPER)?
-  (UPPER)?
-  (UPPER)?
-  (UPPER)?
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
  {Lookup.majorType == org_key}
  ({Lookup.majorType == org_ending})?
 )
@@ -834,6 +963,7 @@
 {}
 
 
+
 Rule: NotTheKey
 Priority: 200
 
@@ -846,17 +976,18 @@
 -->
 {}
 
+
 Rule:  OrgXKey
 Priority: 125
 
 // Aaaa Ltd.
-
+({Token.category == DT})?
 (
-  ((UPPER))
-  (UPPER)?
-  (UPPER)?
-  (UPPER)?
-  (UPPER)?
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
  {Lookup.majorType == org_key}
  ({Lookup.majorType == org_ending})?
 )
@@ -889,8 +1020,8 @@
  {Token.category == DT}
 )
 (
-  (UPPER)
-  (UPPER)?
+  ({Upper})
+  ({Upper})?
  {Lookup.majorType == cdg}
 )
 :orgName -->
@@ -904,8 +1035,8 @@
 // Coca Cola Co.
 
 (
-  (UPPER)
-  (UPPER)?
+  ({Upper})
+  ({Upper})?
  {Lookup.majorType == cdg}
 )
 :orgName -->
@@ -918,13 +1049,13 @@
  {Token.category == DT}
 )
 (
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
   (({Token.string == "and"} | 
     {Token.string == "&"})
-   (UPPER)?
-   (UPPER)?
-   (UPPER)?
+   ({Upper})?
+   ({Upper})?
+   ({Upper})?
   )
  {Lookup.majorType == org_key}
  ({Lookup.majorType == org_ending})?
@@ -943,13 +1074,13 @@
 // but NOT A XXX Services Ltd.
 
 (
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
   (({Token.string == "and"} | 
     {Token.string == "&"})
-   (UPPER)?
-   (UPPER)?
-   (UPPER)?
+   ({Upper})?
+   ({Upper})?
+   ({Upper})?
   )
  {Lookup.majorType == org_key}
  ({Lookup.majorType == org_ending})?
@@ -965,8 +1096,8 @@
 // Queen's Ware
 
 (
-  (UPPER)?
-  (UPPER)?
+  ({Upper})?
+  ({Upper})?
   ({Token.orth == upperInitial}
    {Token.string == "'"}
    ({Token.string == "s"})?
@@ -1008,19 +1139,19 @@
 )
 (
  (
-  (UPPER)|
+  ({Upper})|
   {Lookup.majorType == organization}
  )
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
  ({Lookup.majorType == org_base}|
   {Lookup.majorType == govern_key}
  )
  (
   {Token.string == "of"}
-  (UPPER)
-  (UPPER)?
-  (UPPER)?
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
  )?
 )
 :orgName -->
@@ -1037,19 +1168,19 @@
 
 (
  (
-  (UPPER)|
+  ({Upper})|
   {Lookup.majorType == organization}
  )
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
  ({Lookup.majorType == org_base}|
   {Lookup.majorType == govern_key}
  )
  (
   {Token.string == "of"}
-  (UPPER)
-  (UPPER)?
-  (UPPER)?
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
  )?
 )
 :orgName -->
@@ -1070,8 +1201,8 @@
  ( 
   {Token.category == DT}
  )?
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
 )
 :orgName -->
   :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
@@ -1091,8 +1222,8 @@
  ( 
   {Token.category == DT}
  )?
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
 )
 :orgName -->
   :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
@@ -1115,7 +1246,6 @@
 
 
 
-
 Rule: OrgChurch
 Priority: 150
 // St. Andrew's Church
@@ -1136,7 +1266,9 @@
 // overrides PersonFull
 
 (
+ (TITLE)?
  (FIRSTNAME)
+ {Token.string == "'"}({Token.string == "s"})?
  ({Lookup.majorType == org_key}|
   {Lookup.majorType == org_base})
  ({Lookup.majorType == org_ending})?
@@ -1284,7 +1416,7 @@
 (
  ({Lookup.majorType == loc_key, Lookup.minorType == pre}
  )
- (UPPER)
+ ({Upper})
  (
   {Lookup.majorType == loc_key, Lookup.minorType == post})?
 )
@@ -1297,8 +1429,7 @@
 
 Rule:InLoc1
 (
- {Token.string == "in"}|
- {Token.string == "to"}
+ {Token.string == "in"}
 )
 (
  {Lookup.majorType == location}
@@ -1314,7 +1445,7 @@
  {Token.string == "of"}
 )
 (
- (UPPER)
+ ({Upper})
 )
 :loc
 -->
@@ -1329,9 +1460,9 @@
  {Token.string == "company"}
 )
 (
- (UPPER)
- (UPPER)?
- (UPPER)? 
+ ({Upper})
+ ({Upper})?
+ ({Upper})? 
 )
 :org
 -->
@@ -1344,9 +1475,9 @@
 // Medici offices
 
 (
- (UPPER)
- (UPPER)?
- (UPPER)? 
+ ({Upper})
+ ({Upper})?
+ ({Upper})? 
 )
 : org
 (
@@ -1374,9 +1505,9 @@
  )
 )
 (
- (UPPER)
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
 )
 :org
 -->

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to