Revision: 18907
          http://sourceforge.net/p/gate/code/18907
Author:   dgmaynard
Date:     2015-09-15 10:08:00 +0000 (Tue, 15 Sep 2015)
Log Message:
-----------
made a few fixes thanks to Mark spotting some problems, hopefully I haven't 
buggered anything up. Longer names with titles get recognised better now, and 
some ambiguities are moved to the ambiguous gazetteer list

Modified Paths:
--------------
    gate/trunk/plugins/ANNIE/resources/NE/name.jape
    gate/trunk/plugins/ANNIE/resources/NE/name_context.jape
    gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def
    gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst
    gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst
    gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst
    gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst
    gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst

Removed Paths:
-------------
    gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst

Modified: gate/trunk/plugins/ANNIE/resources/NE/name.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name.jape     2015-09-15 01:19:57 UTC 
(rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/NE/name.jape     2015-09-15 10:08:00 UTC 
(rev 18907)
@@ -276,8 +276,61 @@
 }
 
 
+Rule:  PersonTitleUnknownGender
+Priority: 30
+// Prof. Jones
+// This person will just get an unknown value for gender. Or we could decide 
to make them male by default, as they're mostly military etc.
 
+( 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ ({Title}):title
+ ({Title})?
+ (
+  (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+  (PREFIX)* 
+  ({Upper})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
 
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", "unknown");
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
+  features.put("kind", "personName");
+  features.put("rule", "PersonTitleGenderUnknown");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
 Rule:  PersonTitleInitials
 Priority: 35
 

Modified: gate/trunk/plugins/ANNIE/resources/NE/name_context.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name_context.jape     2015-09-15 
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/NE/name_context.jape     2015-09-15 
10:08:00 UTC (rev 18907)
@@ -117,7 +117,9 @@
 Rule:PersonTitle1
 Priority: 40
 (
- {Person.rule1 == PersonTitle}
+ ({Person.rule == PersonTitle}|
+  {Person.rule == PersonTitleGenderUnknown}
+ )
  {Unknown}
 ):person
 -->

Deleted: gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst      2015-09-15 
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst      2015-09-15 
10:08:00 UTC (rev 18907)
@@ -1,32 +0,0 @@
-The Beatles
-Pink Floyd
-AC/DC
-The Rolling Stones
-ABBA
-Eagles
-U2
-Aerosmith
-Genesis
-Bee Gees
-Dire Straits
-Eminem
-Metallica
-Fleetwood Mac
-Backstreet Boys
-Guns N' Roses
-The Carpenters
-Def Leppard
-The Beach Boys
-Kiss
-The Who
-B'z
-Santana
-R.E.M.
-Red Hot Chili Peppers
-New Kids on the Block
-The Black Eyed Peas
-Green Day
-Nirvana
-Spice Girls
-Mötley Crüe
-Depeche Mode

Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def      2015-09-15 
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def      2015-09-15 
10:08:00 UTC (rev 18907)
@@ -115,3 +115,4 @@
 bands.lst:organization:band
 music-artists.lst:person_full:music
 religious_adj.lst:religious_adj
+test.lst:

Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst        
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst        
2015-09-15 10:08:00 UTC (rev 18907)
@@ -2740,7 +2740,6 @@
 Pedr
 Pedran
 Pedro
-Peers
 Pelo
 Pelota
 Penjani
@@ -4360,3 +4359,5 @@
 Dariusz
 Lamberto
 Zine
+Francis
+Frederic

Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst  
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst  
2015-09-15 10:08:00 UTC (rev 18907)
@@ -151,3 +151,4 @@
 Al:kind=ambig
 Franc:kind=ambig
 Bill:kind=ambig
+Peers:kind=

Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst  
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst  
2015-09-15 10:08:00 UTC (rev 18907)
@@ -842,7 +842,6 @@
 eamon
 eamonn
 eanruig
-earnest
 ebenezer
 eberhard
 ebert

Modified: 
gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst    
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst    
2015-09-15 10:08:00 UTC (rev 18907)
@@ -161,3 +161,4 @@
 bill:kind=ambig
 franc:kind=ambig
 nab:kind=ambig
+earnest:kind=ambig

Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst 2015-09-15 
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst 2015-09-15 
10:08:00 UTC (rev 18907)
@@ -16,3 +16,5 @@
 Archbishop
 Baron
 Lord Chief Justice
+Right Hon
+Right Hon.

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to