Revision: 18907 http://sourceforge.net/p/gate/code/18907 Author: dgmaynard Date: 2015-09-15 10:08:00 +0000 (Tue, 15 Sep 2015) Log Message: ----------- made a few fixes thanks to Mark spotting some problems, hopefully I haven't buggered anything up. Longer names with titles get recognised better now, and some ambiguities are moved to the ambiguous gazetteer list
Modified Paths: -------------- gate/trunk/plugins/ANNIE/resources/NE/name.jape gate/trunk/plugins/ANNIE/resources/NE/name_context.jape gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst Removed Paths: ------------- gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst Modified: gate/trunk/plugins/ANNIE/resources/NE/name.jape =================================================================== --- gate/trunk/plugins/ANNIE/resources/NE/name.jape 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/NE/name.jape 2015-09-15 10:08:00 UTC (rev 18907) @@ -276,8 +276,61 @@ } +Rule: PersonTitleUnknownGender +Priority: 30 +// Prof. Jones +// This person will just get an unknown value for gender. Or we could decide to make them male by default, as they're mostly military etc. +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? +( + ({Title}):title + ({Title})? + ( + (FIRSTNAME | FIRSTNAMEAMBIG )? + ):firstName + ( + (PREFIX)* + ({Upper}) + (PERSONENDING)? + ):surname +):person +--> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", "unknown"); + + if (firstNameSet != null && firstNameSet.size()>0) + { + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + } + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "personName"); + features.put("rule", "PersonTitleGenderUnknown"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + Rule: PersonTitleInitials Priority: 35 Modified: gate/trunk/plugins/ANNIE/resources/NE/name_context.jape =================================================================== --- gate/trunk/plugins/ANNIE/resources/NE/name_context.jape 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/NE/name_context.jape 2015-09-15 10:08:00 UTC (rev 18907) @@ -117,7 +117,9 @@ Rule:PersonTitle1 Priority: 40 ( - {Person.rule1 == PersonTitle} + ({Person.rule == PersonTitle}| + {Person.rule == PersonTitleGenderUnknown} + ) {Unknown} ):person --> Deleted: gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst 2015-09-15 10:08:00 UTC (rev 18907) @@ -1,32 +0,0 @@ -The Beatles -Pink Floyd -AC/DC -The Rolling Stones -ABBA -Eagles -U2 -Aerosmith -Genesis -Bee Gees -Dire Straits -Eminem -Metallica -Fleetwood Mac -Backstreet Boys -Guns N' Roses -The Carpenters -Def Leppard -The Beach Boys -Kiss -The Who -B'z -Santana -R.E.M. -Red Hot Chili Peppers -New Kids on the Block -The Black Eyed Peas -Green Day -Nirvana -Spice Girls -Mötley Crüe -Depeche Mode Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def 2015-09-15 10:08:00 UTC (rev 18907) @@ -115,3 +115,4 @@ bands.lst:organization:band music-artists.lst:person_full:music religious_adj.lst:religious_adj +test.lst: Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst 2015-09-15 10:08:00 UTC (rev 18907) @@ -2740,7 +2740,6 @@ Pedr Pedran Pedro -Peers Pelo Pelota Penjani @@ -4360,3 +4359,5 @@ Dariusz Lamberto Zine +Francis +Frederic Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst 2015-09-15 10:08:00 UTC (rev 18907) @@ -151,3 +151,4 @@ Al:kind=ambig Franc:kind=ambig Bill:kind=ambig +Peers:kind= Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst 2015-09-15 10:08:00 UTC (rev 18907) @@ -842,7 +842,6 @@ eamon eamonn eanruig -earnest ebenezer eberhard ebert Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst 2015-09-15 10:08:00 UTC (rev 18907) @@ -161,3 +161,4 @@ bill:kind=ambig franc:kind=ambig nab:kind=ambig +earnest:kind=ambig Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst =================================================================== --- gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst 2015-09-15 01:19:57 UTC (rev 18906) +++ gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst 2015-09-15 10:08:00 UTC (rev 18907) @@ -16,3 +16,5 @@ Archbishop Baron Lord Chief Justice +Right Hon +Right Hon. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs