Revision: 19646
          http://sourceforge.net/p/gate/code/19646
Author:   dgmaynard
Date:     2016-10-06 12:35:17 +0000 (Thu, 06 Oct 2016)
Log Message:
-----------
updating to latest English version

Modified Paths:
--------------
    gate/trunk/plugins/Lang_French/grammar/clean.jape
    gate/trunk/plugins/Lang_French/grammar/date.jape
    gate/trunk/plugins/Lang_French/grammar/date_pre.jape
    gate/trunk/plugins/Lang_French/grammar/email.jape
    gate/trunk/plugins/Lang_French/grammar/final.jape
    gate/trunk/plugins/Lang_French/grammar/first.jape
    gate/trunk/plugins/Lang_French/grammar/firstname.jape
    gate/trunk/plugins/Lang_French/grammar/loc_context.jape
    gate/trunk/plugins/Lang_French/grammar/main.jape
    gate/trunk/plugins/Lang_French/grammar/name.jape
    gate/trunk/plugins/Lang_French/grammar/name_context.jape
    gate/trunk/plugins/Lang_French/grammar/name_post.jape
    gate/trunk/plugins/Lang_French/grammar/number.jape
    gate/trunk/plugins/Lang_French/grammar/org_context.jape
    gate/trunk/plugins/Lang_French/grammar/reldate.jape
    gate/trunk/plugins/Lang_French/grammar/unknown.jape
    gate/trunk/plugins/Lang_French/grammar/url.jape

Added Paths:
-----------
    gate/trunk/plugins/Lang_French/grammar/document_date.jape
    gate/trunk/plugins/Lang_French/grammar/hyphens.jape
    gate/trunk/plugins/Lang_French/grammar/main-twitter.jape
    gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
    gate/trunk/plugins/Lang_French/grammar/number_clean.jape
    gate/trunk/plugins/Lang_French/grammar/numberletter.jape

Modified: gate/trunk/plugins/Lang_French/grammar/clean.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/clean.jape   2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/clean.jape   2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -14,8 +14,8 @@
 */
 
 Phase: Clean
-Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear 
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs
-Options: control = appelt
+Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear 
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs 
ClosedClass Initials Upper FirstPerson JobTitle HashtagToken HashtagLookup 
NumberLetter Temp Title UrlPre
+Options: control = all
 
 Rule:CleanTempAnnotations
 (
@@ -33,7 +33,18 @@
  {Phone}|
  {Ip}|
  {TempIdentifier}|
- {TempSpecs}
+ {TempSpecs}|
+ {ClosedClass}|
+ {Upper}|
+ {Initials}|
+ {FirstPerson}|
+ {JobTitle}|
+ {HashtagToken}|
+ {HashtagLookup}|
+ {Title}|
+ {UrlPre}|
+ {Temp}|
+ {NumberLetter}
 ):temp
 -->
 {

Modified: gate/trunk/plugins/Lang_French/grammar/date.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/date.jape    2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/date.jape    2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -21,13 +21,6 @@
 
 /////////////////////////////////////////////////
 
-Macro: DEF_ART_SING
-(
- {Token.string == "le"}|
- {Token.string == "la"}|
- {Token.string == "l"}{Token.string == "'"}
- )
-
 Macro: DAY_NAME 
 ({Lookup.minorType == day })
 
@@ -70,6 +63,9 @@
 Macro: DASH
   {Token.string == "-"}
 
+Macro: DOT
+  {Token.string == "."}
+
 Macro: OF
   {Token.string == "of"}
 
@@ -112,11 +108,7 @@
 
 Macro: ORDINAL
 (
-   ({Token.kind == number}
-    ({Token.string == "th"}|
-     {Token.string == "rd"}|
-     {Token.string == "nd"}|
-     {Token.string == "st"})
+   ({Token.string ==~ "[0-9][0-9]?(th|rd|nd|st)"}
     |
    {Lookup.minorType == ordinal})
    (
@@ -170,33 +162,38 @@
 
 
 Rule:  TimeDigital2
-// 04h30
-// 6h
 
+// 8:14 am
+// 4.34 pm
+// 6am
+
 (
  (ONE_DIGIT|TWO_DIGIT) 
- ({Token.string == "h"}|{Token.string == "H"})
-  (TWO_DIGIT)
+ (({Token.string == ":"}|{Token.string == "."} |{Token.string == "-"} )
+  TWO_DIGIT)?
+ (TIME_AMPM)
  (TIME_ZONE)?
 )
 :time
 -->
-:time.TempTime = {kind = "positive", rule = "TimeDigital2"}
+:time.TempTime = {kind = "positive", rule = "TimeDigital"}
 
 
 Rule: TimeOClock
-// dix heures
+// ten o'clock
 
 (
  {Lookup.minorType == hour}
- {Token.string == "heures"}
+ {Token.string == "o"}
+ {Token.string == "'"}
+ {Token.string == "clock"}
 )
 :time 
 -->
  :time.TempTime = {kind = "positive", rule = "TimeOClock"}
 
  
-/*Rule: TimeAnalogue
+Rule: TimeAnalogue
 // half past ten
 // ten to twelve
 // twenty six minutes to twelve
@@ -217,9 +214,9 @@
 :time 
 -->
  :time.TempTime = {kind = "positive", rule = "TimeAnalogue"}
-*/
 
-/*Rule: TimeWordsContext
+
+Rule: TimeWordsContext
 Priority: 50
 // seven thirty tomorrow
 
@@ -234,10 +231,10 @@
 ) 
 -->
 :time1.TempTime = {kind = "positive", rule = "TimeWordsContext"}
-*/
 
-/*Rule: TimeWords
 
+Rule: TimeWords
+
 (
  {Lookup.majorType == number}
  (
@@ -248,7 +245,7 @@
 -->
   :time.TempTime = {kind = "timeWords", rule = "TimeWords"}
 
-  */
+  
 
 
 Rule: TimeDigitalContext1
@@ -356,20 +353,16 @@
 
 // Date Rules
 
-// commented out this rule because Date and Person are not included in the 
-// Input headers and I have no idea if adding them will mess up other rules
+//Rule: IgnoreDatePerson
+//Priority: 500
+//(
+// {Date}
+// {Person}
+//)
+//:date
+//-->
+//{}
 
-/*
-Rule: IgnoreDatePerson
-Priority: 500
-(
- {Date}
- {Person}
-)
-:date
--->
-{}
-*/
 
 
 Rule:  DateSlash           // UK only
@@ -405,7 +398,6 @@
  :date.TempDate = {rule = "DateDash"}
 
 
-
 Rule:  DateName
 Priority: 20
 // Wed 10 July
@@ -417,7 +409,6 @@
 // July, 2000
 
 (
- (DEF_ART_SING)?
  (DAY_NAME NUM_OR_ORDINAL MONTH_NAME)|
 
  (DAY_NAME (COMMA)? 
@@ -505,16 +496,16 @@
  :date.TempDate = {rule = "DateNumDashRev"}
 
 
-Rule:  DateNumSlash
+Rule:  DateNumSlashDot
 // 01/07/00
 // Note: not 07/00
 
 ( 
-DAY_MONTH_NUM SLASH DAY_MONTH_NUM SLASH YEAR
+DAY_MONTH_NUM (SLASH|DOT) DAY_MONTH_NUM (SLASH|DOT) YEAR
 )
 :date
 -->
- :date.TempDate = {rule = "DateNumSlash"}
+ :date.TempDate = {rule = "DateNumSlashDot"}
 
 
 Rule: ModifierMonth
@@ -626,7 +617,7 @@
 
 (FOUR_DIGIT)
 :date -->
- :date.TempYear = {kind = "positive", rule = "TempYear3"}
+ :date.TempYear = {kind = "negative", rule = "TempYear3"}
 
 
 Rule: YearWords

Modified: gate/trunk/plugins/Lang_French/grammar/date_pre.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/date_pre.jape        2016-10-06 
12:34:37 UTC (rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/date_pre.jape        2016-10-06 
12:35:17 UTC (rev 19646)
@@ -51,6 +51,16 @@
 -->
  :date.TempDate = {rule = "GazDate"}
 
+Rule: GazDateAmbig
+Priority: 200
+(SPACE | {Token.kind == punctuation})
+(
+ {Token.string == "Sun"}
+)
+:date
+(SPACE | {Token.kind == punctuation})
+-->
+ :date.TempDate = {rule = "GazDateAmbig", }
 
 Rule: PersonDateAmbig
 Priority: 100

Added: gate/trunk/plugins/Lang_French/grammar/document_date.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/document_date.jape                   
        (rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/document_date.jape   2016-10-06 
12:35:17 UTC (rev 19646)
@@ -0,0 +1,33 @@
+Phase: DateHeader
+Input: DCT
+Options: control = appelt
+
+Rule: DCT
+(
+ {DCT}
+):tag
+-->
+{
+gate.AnnotationSet tagSet = (gate.AnnotationSet)bindings.get("tag");
+gate.Annotation tagAnn = (gate.Annotation)tagSet.iterator().next();
+
+gate.FeatureMap features = Factory.newFeatureMap();
+
+
+String s = gate.Utils.stringFor(doc, tagAnn);
+//String content = 
doc.getContent().getContent(tagAnn.getStartNode().getOffset(),
+ //                tagAnn.getEndNode().getOffset()).toString();
+ 
+ if (s.matches("^\\d{8}$") ) {
+String s1 = s.substring(0,4) + "-" + s.substring(4,6) + "-" + s.substring(6,8);
+
+doc.getFeatures().put("document-date", s1);
+}
+
+ }
+
+
+
+
+
+


Property changes on: gate/trunk/plugins/Lang_French/grammar/document_date.jape
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: gate/trunk/plugins/Lang_French/grammar/email.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/email.jape   2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/email.jape   2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -25,15 +25,17 @@
 (
  (
   {Token.kind == word}|
-  {Token.kind == number}|
+  {Token.kind == number}
+ )[1,9]
+ (
   {Token.string == "_"}
- )
- ({Token.string == "."}
+ )?
+ ({Token.string == "."})?
   ({Token.kind == word}|
    {Token.kind == number}|
    {Token.string == "_"}
-  )
- )?
+  )[0,9]
+ 
  {Token.string == "@"}         
  (
   {Token.kind == word}|
@@ -47,7 +49,7 @@
   {Token.kind == symbol}|
   {Token.kind == punctuation}|
   {Token.kind == number}
- )?
+ )[0,9]
  ({Token.string == "."})?
 (
   {Token.kind == word}|

Modified: gate/trunk/plugins/Lang_French/grammar/final.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/final.jape   2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/final.jape   2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -16,34 +16,34 @@
 //note: organization should be included as part of the address ??
 
 Phase: Final
-Input: Token Lookup JobTitle TempPerson TempLocation TempOrganization TempDate 
TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier 
TempSpecs
+Input: Token Lookup Jobtitle TempPerson TempLocation TempOrganization TempDate 
TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier 
TempSpecs Title Split Money
 Options: control = appelt
 
 
 ///////////////////////////////////////////////////////////////
+Rule: Money
+Priority: 200
+(
+ {Money}
+)
+--> 
+{}
 
 Rule: PersonFinal
 Priority: 30
-({JobTitle}
-)?
+
 (
- {TempPerson.kind == personName}
-)+
+ {TempPerson}
+)
 :person
 --> 
 {
  gate.FeatureMap features = Factory.newFeatureMap();
 gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation person1Ann = (gate.Annotation)personSet.iterator().next();
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
 
-gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)personSet.get("TempPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-  features.put("rule1", person1Ann.getFeatures().get("rule"));
-  features.put("rule", "PersonFinal");
+features.putAll(personAnn.getFeatures());
+features.put("ruleFinal", "PersonFinal");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "Person",
 features);
 outputAS.removeAll(personSet);
@@ -75,21 +75,23 @@
 -->
 {
 //removes TempOrg annotation, gets the rule feature and adds a new Org 
annotation
-gate.AnnotationSet org = (gate.AnnotationSet)bindings.get("org");
-gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org");
+//locate the first TempOrganization annotation
+//(there will always be at least one)
+gate.Annotation orgAnn = orgSet.get("TempOrganization").iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("orgType", orgAnn.getFeatures().get("orgType"));
-features.put("rule1", orgAnn.getFeatures().get("rule"));
-features.put("rule2", "OrgCountryFinal");
-outputAS.add(org.firstNode(), org.lastNode(), "Organization",
+
+features.putAll(orgAnn.getFeatures());
+features.put("ruleFinal", "OrgCountryFinal");
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "Organization",
 features);
-outputAS.removeAll(org);
+outputAS.removeAll(orgSet);
 }
  
 
+// note - move this rule to after final
+// another note - I have no idea why the original note is there, or even which 
rule this refers to
 
-//note - move this rule to after final
-
 Rule: OrgFinal
 Priority: 10
 (
@@ -99,21 +101,21 @@
 --> 
 {
 //removes TempOrg annotation, gets the rule feature and adds a new Org 
annotation
-gate.AnnotationSet org = (gate.AnnotationSet)bindings.get("org");
-gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org");
+gate.Annotation orgAnn = (gate.Annotation)orgSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("orgType", orgAnn.getFeatures().get("orgType"));
-features.put("rule1", orgAnn.getFeatures().get("rule"));
-features.put("rule2", "OrgFinal");
-outputAS.add(org.firstNode(), org.lastNode(), "Organization",
+
+features.putAll(orgAnn.getFeatures());
+features.put("ruleFinal", "OrgFinal");
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "Organization",
 features);
-outputAS.removeAll(org);
+outputAS.removeAll(orgSet);
 }
 
 
 Rule: PersonLocFinal
 Priority: 100
-// George Airport
+// George Airport is a Location not a Person
 // later we might change this to any facility, rather than just airports
 
 (
@@ -125,14 +127,15 @@
 -->
  {
 //removes TempLoc annotation, gets the rule feature and adds a new Loc 
annotation
-gate.AnnotationSet loc = (gate.AnnotationSet)bindings.get("loc");
-gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("loc");
+gate.Annotation locAnn = (gate.Annotation)locSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", locAnn.getFeatures().get("rule"));
-features.put("rule2", "PersonLocFinal");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+features.putAll(locAnn.getFeatures());
+features.put("ruleFinal", "PersonLocFinal");
+features.put("locType", "airport");
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "Location",
 features);
-outputAS.removeAll(loc);
+outputAS.removeAll(locSet);
 }
 
 
@@ -146,15 +149,14 @@
 --> 
  {
 //removes TempLoc annotation, gets the rule feature and adds a new Loc 
annotation
-gate.AnnotationSet loc = (gate.AnnotationSet)bindings.get("loc");
-gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("loc");
+gate.Annotation locAnn = (gate.Annotation)locSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("locType",locAnn.getFeatures().get("locType"));
-features.put("rule1", locAnn.getFeatures().get("rule"));
-features.put("rule2", "LocFinal");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+features.putAll(locAnn.getFeatures());
+features.put("ruleFinal", "LocFinal");
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "Location",
 features);
-outputAS.removeAll(loc);
+outputAS.removeAll(locSet);
 }
 
 
@@ -165,13 +167,15 @@
 Rule: DateTimeFinal
 Priority: 20
 // Friday 10 January 2000 2pm
+// 2008-01-25T16:10:48
 
 (
  {TempDate}
  (
   ({Token.string == ","})?
   {TempDate})?
- ({Token.string == ":"})?
+ ({Token.string == ":"}|
+  {Token.string == "T"})?
  {TempTime}
  ({TempYear})?
  ({TempZone})?
@@ -180,15 +184,14 @@
 -->
  {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-//features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "DateTimeFinal");
+features.put("ruleFinal", "DateTimeFinal");
 features.put("kind", "dateTime");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -203,15 +206,15 @@
 -->
  {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "SeasonYearFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "SeasonYearFinal");
 features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -229,15 +232,15 @@
 -->
  {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
+features.putAll(dateAnn.getFeatures());
 features.put("rule2", "DateYearFinal");
 features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -256,15 +259,14 @@
 -->
  {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-//features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeDateFinal");
+features.put("ruleFinal", "TimeDateFinal");
 features.put("kind", "dateTime");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -282,15 +284,14 @@
 -->
   {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-//features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeYearFinal");
+features.put("ruleFinal", "TimeYearFinal");
 features.put("kind", "dateTime");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -298,7 +299,10 @@
 
 
 Rule: DateOnlyFinal
-Priority: 10
+Priority: 50
+( 
+ {Title}
+)?
 (
  {TempDate}
 )
@@ -306,15 +310,15 @@
 -->
  {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "DateOnlyFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "DateOnlyFinal");
 features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 //fix this later
@@ -333,25 +337,26 @@
 -->
   {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule", "TimeContextFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "TimeContextFinal");
 features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
+
 //removes TempTime annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet time = (gate.AnnotationSet)bindings.get("time");
-gate.Annotation timeAnn = (gate.Annotation)time.iterator().next();
+gate.AnnotationSet timeSet = (gate.AnnotationSet)bindings.get("time");
+gate.Annotation timeAnn = (gate.Annotation)timeSet.iterator().next();
 gate.FeatureMap features2 = Factory.newFeatureMap();
-features2.put("rule1", timeAnn.getFeatures().get("rule"));
-features2.put("rule", "TimeContextFinal");
+features.putAll(timeAnn.getFeatures());
+features2.put("ruleFinal", "TimeContextFinal");
 features2.put("kind", "time");
-outputAS.add(time.firstNode(), date.lastNode(), "Date",
+outputAS.add(timeSet.firstNode(), timeSet.lastNode(), "Date",
 features2);
-outputAS.removeAll(time);
+outputAS.removeAll(timeSet);
 }
 
 
@@ -368,15 +373,15 @@
 -->
  {
 //removes TempTime annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeWordsContextFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "TimeWordsContextFinal");
 features.put("kind", "time");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -389,15 +394,15 @@
 --> 
 {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "YearOnlyFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "YearOnlyFinal");
 features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -411,15 +416,15 @@
 -->
 {
 //removes TempDate annotation, gets the rule feature and adds a new Date 
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeOnlyFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "TimeOnlyFinal");
 features.put("kind", "time");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
 features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
 }
 
 
@@ -440,15 +445,15 @@
 -->
 {
 //removes TempAddress annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "AddressFull");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "AddressFull");
 features.put("kind", "complete");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 
@@ -461,15 +466,15 @@
 -->
 {
 //removes Email annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "EmailFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "EmailFinal");
 features.put("kind", "email");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 
@@ -482,15 +487,15 @@
 -->
 {
 //removes TempAddress annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "PhoneFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "PhoneFinal");
 features.put("kind", "phone");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 
@@ -503,15 +508,15 @@
 -->
 {
 //removes TempAddress annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "PostcodeFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "PostcodeFinal");
 features.put("kind", "postcode");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 
@@ -524,15 +529,15 @@
 -->
 {
 //removes TempAddress annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "IpFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "IpFinal");
 features.put("kind", "ip");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 
@@ -545,15 +550,15 @@
 -->
 {
 //removes TempAddress annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
+features.putAll(addressAnn.getFeatures());
 features.put("rule2", "UrlFinal");
 features.put("kind", "url");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 
@@ -567,14 +572,14 @@
 -->
 {
 //removes TempAddress annotation, gets the rule feature and adds a new Address 
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "StreetFinal");
-outputAS.add(address.firstNode(), address.lastNode(), "Location",
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "StreetFinal");
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Location",
 features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
 }
 
 ////////////////////////////////////////////////////////////
@@ -590,14 +595,14 @@
 -->
 {
 //removes TempIdent annotation, gets the rule feature and adds a new 
Identifier annotation
-gate.AnnotationSet ident = (gate.AnnotationSet)bindings.get("ident");
-gate.Annotation identAnn = (gate.Annotation)ident.iterator().next();
+gate.AnnotationSet identSet = (gate.AnnotationSet)bindings.get("ident");
+gate.Annotation identAnn = (gate.Annotation)identSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", identAnn.getFeatures().get("rule"));
-features.put("rule2", "IdentifierFinal");
-outputAS.add(ident.firstNode(), ident.lastNode(), "Identifier",
+features.putAll(identAnn.getFeatures());
+features.put("ruleFinal", "IdentifierFinal");
+outputAS.add(identSet.firstNode(), identSet.lastNode(), "Identifier",
 features);
-outputAS.removeAll(ident);
+outputAS.removeAll(identSet);
 }
 
 
@@ -613,38 +618,10 @@
 -->
 {
 //removes TempSpecs annotation
-gate.AnnotationSet spec = (gate.AnnotationSet)bindings.get("spec");
+gate.AnnotationSet specSet = (gate.AnnotationSet)bindings.get("spec");
 //gate.FeatureMap features = Factory.newFeatureMap();
-outputAS.removeAll(spec);
+outputAS.removeAll(specSet);
 }
 
 //////////////////////////////////////////////////////
 
-Rule: UnknownPerson
-Priority: 5
-( 
- {Token.category == NNP}
- (({Token.string == "-"})?
-  {Token.category == NNP})?
- ( {Token.category == NNP})?
- ( {Token.category == NNP})?
-):unknown
- 
-(
- {TempPerson}
-):person
--->
-:unknown.Unknown = {kind = "PN", rule = UnknownTempPerson},
-{
-//removes TempPerson annotation, gets the rule feature and adds a new Person 
annotation
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("rule1", personAnn.getFeatures().get("rule"));
-features.put("rule2", "UnknownPerson");
-outputAS.add(person.firstNode(), person.lastNode(), "Person",
-features);
-outputAS.removeAll(person);
-}
-

Modified: gate/trunk/plugins/Lang_French/grammar/first.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/first.jape   2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/first.jape   2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -14,11 +14,10 @@
 */
 
 Phase: First
-Input: Token Lookup
+Input: Token NumberLetter
 Options: control = appelt
 
 // this has to be run first of all 
-// contains any macros etc needed only for standard grammars
 
 //////////////////////////////////////////////////////////////
 Macro: SPACE
@@ -40,21 +39,59 @@
 )
 
 
+
+
 ///////////////////////////////////////////////////////////////
 
-Rule: Silly
-// we have to have a rule here, so we'll just have something silly
+Rule: ClosedClass
+// closed class words should not be part of names generally, so let's identify 
them
+Priority: 100
 
 (
- {Token.string == "afguahughaegarth"}
-)
-:silly
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}|
+ {Token.category == IN}
+):tag
 -->
- {}
+:tag.ClosedClass = {rule = "ClosedClass"}
 
+Rule: NumberLetter
+Priority: 100
+( 
+ {NumberLetter}
+):tag
+-->
+{} 
 
 
+Rule: UpperAllCaps
+Priority: 100
+// separate proper nouns that are in all caps, as they're more ambiguous
+(
+ {Token.category == NNP, Token.orth == allCaps}
+ ({Token.string == "-"}
+  {Token.category == NNP, Token.orth == allCaps}
+ )?
+):tag
+-->
+:tag.Upper = {kind = "allCaps", rule = "Upper"}
 
+Rule: Upper
+// define what can be a possible proper noun - cater for the fact that POS tag 
might not be correct
+(
+ ({Token.category == NNP}| 
+   {Token.orth == upperInitial}|
+   {Token.orth == mixedCaps} 
+  )
+ ({Token.string == "-"}
+  {Token.category == NNP}
+ )?
+):tag
+-->
+:tag.Upper = {rule = "Upper"}
 
 
 
+
+

Modified: gate/trunk/plugins/Lang_French/grammar/firstname.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/firstname.jape       2016-10-06 
12:34:37 UTC (rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/firstname.jape       2016-10-06 
12:35:17 UTC (rev 19646)
@@ -14,14 +14,87 @@
 */
 
 Phase: FirstName
-Input: Token Lookup
+Input: Token Lookup ClosedClass NumberLetter UserID
 Options: control = appelt
 
+
+Rule: FirstNameTwitterName
+Priority: 500
+// @fred
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind !=ambig}
+):person
+-->
+{
+gate.AnnotationSet person = 
(gate.AnnotationSet)bindings.get("person").get("Lookup");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =  
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", 
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+  //we're only interested in annots of the same length
+  
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+  }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterName");
+features.put("twittername", "yes");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
+Rule: FirstNameTwitterNameAmbig
+Priority: 600
+// @mark
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind ==ambig}
+):person
+-->
+{
+gate.AnnotationSet person = 
(gate.AnnotationSet)bindings.get("person").get("Lookup");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =  
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", 
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+  //we're only interested in annots of the same length
+  
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+  }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterNameAmbig");
+features.put("twittername", "yes");
+features.put("kind", "ambig");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
 Rule: FirstName
 // Fred
 
+
 (
- {Lookup.majorType == person_first}
+ {Lookup.majorType == person_first, !ClosedClass}
 ):person
 -->
 {
@@ -45,18 +118,61 @@
 if(!ambig) features.put("gender", gender);
 
 features.put("rule", "FirstName");
+features.put("twittername", "no");
 outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
 features);
 }
 
+
+Rule: FirstNameAmbig
+Priority: 300
+/* prefer this rule if the firstname has an ambiguous feature in the 
gazetteer, e.g. "Christian"
+   In this case, we won't use it in the main name-finding grammar if we find 
it on its own, 
+   only as part of a longer name
+*/
+
+(
+ {Lookup.majorType == person_first, Lookup.kind == ambig}
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =  
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", 
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+  //we're only interested in annots of the same length
+  
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+  }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameAmbig");
+features.put("kind", "ambig");
+features.put("twittername", "no");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
 Rule: TitleGender
 Priority: 50
 // Mr
 
 (
- {Lookup.majorType == title, Lookup.minorType == male}|
- {Lookup.majorType == title, Lookup.minorType == female}
-):person
+ ({Lookup.majorType == title, Lookup.minorType == male}|
+  {Lookup.majorType == title, Lookup.minorType == female})
+ ({Token.string == "."})?
+)
+:person
 -->
 {
 gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
@@ -73,6 +189,7 @@
 
 (
  {Lookup.majorType == title}
+ ({Token.string == "."})?
 ):person
 -->
  :person.Title = {rule = "Title"}
@@ -80,6 +197,29 @@
 
 
 
+Rule: Initials1
+// A.B.
+// A.
+// A
 
+(
+  ({Token.orth == upperInitial, Token.length =="1", !ClosedClass, 
!NumberLetter}
+  ({Token.string == "."})?
+  )+
+):tag
+-->
+:tag.Initials = {rule = "Initials1"}
 
 
+Rule: Initials2
+// AB
+// ABC
+
+(
+ {Token.orth == allCaps, Token.length == "2", !Lookup, !ClosedClass, 
!NumberLetter} |
+ {Token.orth == allCaps, Token.length == "3", !Lookup, !ClosedClass, 
!NumberLetter}
+):tag
+-->
+:tag.Initials = {kind = "nopunct", rule = "Initials2"}
+
+

Added: gate/trunk/plugins/Lang_French/grammar/hyphens.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/hyphens.jape                         
(rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/hyphens.jape 2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -0,0 +1,25 @@
+Phase: Hyphens
+Input: Token SpaceToken
+Options: control = appelt
+
+/* A phase to deal with weird problems in hyphenated words
+*/
+
+Rule: UpperHyphenated
+// two NNPs separated by no white space should also be an Upper. 
+// This happens when they're hyphenated and the hyphen is part of the first NNP
+
+(
+ ({Token.category == NNP}| 
+   {Token.orth == upperInitial}|
+   {Token.orth == mixedCaps} 
+  )
+ (
+  {Token.category == NNP}|
+  {Token.orth == upperInitial}|
+  {Token.orth == mixedCaps} 
+ )
+):tag
+-->
+:tag.Upper = {rule = "UpperHyphenated"}
+

Modified: gate/trunk/plugins/Lang_French/grammar/loc_context.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/loc_context.jape     2016-10-06 
12:34:37 UTC (rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/loc_context.jape     2016-10-06 
12:35:17 UTC (rev 19646)
@@ -14,56 +14,59 @@
 */
 
 Phase: Loc_Context
-Input: Unknown Token Location
+Input: Unknown Token Location Lookup
 Options: control = appelt
 
 
-Rule: LocConjLoc1 
-Priority: 10
+//Rule: LocConjLoc1 
+//Priority: 10
+// Unknown and Location 
 
-(
-{Unknown.kind == PN}
-):loc
-(
-{Token.category == CC}
-({Token.category == DT}
-)?
-{Location}
-)
--->
-{
-gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule ", "LocConjLoc1");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
-features);
-outputAS.removeAll(loc);
-}
+//(
+//{Unknown.kind == PN}
+//):loc
+//(
+//{Token.category == CC}
+//({Token.category == DT}
+//)?
+//{Location}
+//)
+//-->
+//{
+//gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
+//gate.FeatureMap features = Factory.newFeatureMap();
+//features.put("rule ", "LocConjLoc1");
+//outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+//features);
+//outputAS.removeAll(loc);
+//}
 
 
-Rule: LocConjLoc2
-Priority: 10
+//Rule: LocConjLoc2
+//Priority: 10
 
-(
- {Location}
- {Token.category == CC}
- ({Token.category == DT}
- )?
-)
-(
- {Unknown.kind == PN}
-):loc
--->
- {
-gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule ", "LocConjLoc2");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
-features);
-outputAS.removeAll(loc);
-}
+// Location and Unknown
 
+//(
+// {Location}
+// {Token.category == CC}
+ //({Token.category == DT}
+ //)?
+//)
+//(
+// {Unknown.kind == PN}
+//):loc
+//-->
+// {
+//gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
+//gate.FeatureMap features = Factory.newFeatureMap();
+//features.put("rule ", "LocConjLoc2");
+//outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+//features);
+//outputAS.removeAll(loc);
+//}
 
+
 Rule: UnknownLocRegion
 Priority: 50
 (
@@ -76,10 +79,37 @@
 ):loc
 (
  {Token.string == ","}
- {Location.kind == region}
+ {Location.locType == region}
 )
 -->
- :loc.Location = {rule = "UnknownLocRegion"}
+ :loc.Location = {rule = "UnknownLocRegion"},
+ {
+gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
+outputAS.removeAll(loc);
+}
 
+Rule: LocState
+Priority: 100
+(
+ {Location.locType == city}
+ ({Token.string == ","})?
+)
+(
+ {Lookup.majorType == state}
+):tag
+-->
+:tag.Location = {locType = region, rule = "LocState"}
 
 
+Rule: UnknownLocKey
+Priority: 20
+(
+ ({Unknown}):tag
+ {Lookup.majorType == loc_general_key}
+)
+-->
+:tag.Location = {locType = unknown, rule = "UnknownLocKey"},
+{
+gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("tag");
+outputAS.removeAll(loc);
+}

Added: gate/trunk/plugins/Lang_French/grammar/main-twitter.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/main-twitter.jape                    
        (rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/main-twitter.jape    2016-10-06 
12:35:17 UTC (rev 19646)
@@ -0,0 +1,38 @@
+/*
+*  main.jape
+*
+* Copyright (c) 1998-2004, The University of Sheffield.
+*
+*  This file is part of GATE (see http://gate.ac.uk/), and is free
+*  software, licenced under the GNU Library General Public License,
+*  Version 2, June 1991 (in the distribution as file licence.html,
+*  and also available at http://gate.ac.uk/gate/licence.html).
+*
+*  Diana Maynard, 02 Aug 2001
+*
+*  $Id: main.jape 9233 2007-11-23 13:01:52Z dgmaynard $
+*/
+
+MultiPhase:    TestTheGrammars
+Phases: 
+first
+firstname
+name-twitter
+name_post
+date_pre
+date
+reldate
+number
+number_clean
+address
+url_pre
+url
+email
+identifier
+jobtitle
+final
+unknown
+name_context
+org_context
+loc_context
+//clean

Modified: gate/trunk/plugins/Lang_French/grammar/main.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/main.jape    2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/main.jape    2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -14,8 +14,10 @@
 */
 
 MultiPhase:    TestTheGrammars
-Phases: 
+Phases:
+numberletter 
 first
+hyphens
 firstname
 name
 name_post
@@ -23,6 +25,7 @@
 date
 reldate
 number
+number_clean
 address
 url_pre
 url
@@ -34,4 +37,4 @@
 name_context
 org_context
 loc_context
-clean
\ No newline at end of file
+clean

Added: gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/name-twitter.jape                    
        (rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/name-twitter.jape    2016-10-06 
12:35:17 UTC (rev 19646)
@@ -0,0 +1,1726 @@
+/*
+*  name.jape
+*
+* Copyright (c) 1998-2004, The University of Sheffield.
+*
+*  This file is part of GATE (see http://gate.ac.uk/), and is free
+*  software, licenced under the GNU Library General Public License,
+*  Version 2, June 1991 (in the distribution as file licence.html,
+*  and also available at http://gate.ac.uk/gate/licence.html).
+*
+*  Diana Maynard, 10 Sep 2001
+* 
+*  $Id: name.jape 18116 2014-06-23 11:35:16Z dgmaynard $
+*/
+
+
+Phase: Name
+Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID 
URL
+Options: control = appelt debug = false
+
+///////////////////////////////////////////////////////////////
+
+// Person Rules
+
+/////////////////////////////////////////////////////////////////
+Macro: TITLE
+(
+ {Title}
+ ({Token.string == "."})?
+)
+
+
+
+
+Macro: FIRSTNAME
+
+ ({FirstPerson.gender == male, FirstPerson.kind != ambig, 
FirstPerson.twittername == no} |
+  {FirstPerson.gender == female, FirstPerson.kind != ambig, 
FirstPerson.twittername == no})
+
+
+
+Macro: FIRSTNAMEAMBIG
+(
+ {FirstPerson.kind == ambig, FirstPerson.twittername == no}
+)
+
+Macro: FIRSTNAMETWITTER
+
+(
+ {FirstPerson.twittername == yes}
+)
+
+
+Macro: PERSONENDING
+(
+ ({Token.string == ","})?
+ {Lookup.majorType == person_ending}
+)
+
+Macro: PREFIX
+(
+ ({Lookup.majorType == surname, Lookup.minorType == prefix}
+ )|
+ (({Token.string == "O"}|{Token.string == "D"})
+  {Token.string == "'"}
+ )
+)
+
+
+
+
+///////////////////////////////////////////////////////////
+
+
+// Person Rules
+
+Rule: Pronoun
+Priority: 1000
+
+(
+ {Token.category == PP}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+):pro
+-->
+{}
+
+
+Rule:Reject
+Priority: 1000
+// stops certain things being recognised as People
+(
+ ({ClosedClass}|{URL})[1,5]
+)
+-->
+{}
+
+Rule:    GazPerson
+Priority: 50
+(
+ {Lookup.majorType == person_full}
+)
+:person -->
+{
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+// find the Token annotations
+AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS, 
personSet, "Token");
+// put them in order
+List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet);
+
+if (tokenList.size() == 1) {
+ // if there's only one Token, guess it's a surname
+
+  String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+  features.put("surname", surnameContent);
+ }
+
+else if (tokenList.size() > 0) {
+  // the string under the first Token
+   String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+  features.put("firstName", firstNameContent);
+
+
+  // the string under the remaining Tokens if any
+  if (tokenList.size() > 1) {
+    Long lastNameStart = gate.Utils.start(tokenList.get(1));
+    Long lastNameEnd   = gate.Utils.end(tokenList.get(tokenList.size() - 1));
+    String surnameContent = gate.Utils.stringFor(doc, lastNameStart, 
lastNameEnd);
+    features.put("surname", surnameContent);
+  }
+}
+
+features.put("kind", "fullName");
+features.put("rule", "GazPerson");
+features.put("gender", personAnn.getFeatures().get("gender"));
+
+// this method doesn't require try-catch
+gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
+}
+
+Rule:  GazPersonFirstTwitter
+Priority: 300
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.twittername == yes, FirstPerson.kind != ambig}
+):person 
+( 
+ {Token.orth == upperInitial, Token.length == "1"}
+)?
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "GazPersonFirstTwitter");
+
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+features.put("twittername", "yes");
+
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+
+
+Rule:  GazPersonFirst
+Priority: 200
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.kind != ambig}
+):person 
+( 
+ {Token.orth == upperInitial, Token.length == "1"}
+)?
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "GazPersonFirst");
+
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+
+Rule: PersonFirstContext
+Priority: 30
+// Anne and Kenton
+
+(FIRSTNAME):person1
+(
+ {Token.string == "and"}
+)
+({Token.orth == upperInitial, Token.length != "1"})
+:person2
+ -->
+{
+//first deal with person1
+ gate.FeatureMap features1 = Factory.newFeatureMap();
+ gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
+  gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
+ 
+  String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+  features1.put("firstName", contentFirstName);
+  features1.put("gender", personAnn.getFeatures().get("gender"));
+  features1.put("kind", "firstName");
+  features1.put("rule", "PersonFirstContext");
+outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
+features1);
+
+//now deal with person2
+gate.FeatureMap features2 = Factory.newFeatureMap();
+gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
+gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
+
+  String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
+  features2.put("firstName", content2FirstName);
+  features2.put("kind", "firstName");
+  features2.put("rule", "PersonFirstContext");
+outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
+features2);
+}
+
+
+Rule:  PersonTitle
+Priority: 35
+// Mr. Jones
+// Mr Fred Jones
+// note we only allow one first and surname, 
+// but we add more in a final phase if we find adjacent unknowns
+
+( 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+  (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+  (PREFIX)* 
+  ({Upper})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
+  features.put("kind", "personName");
+  features.put("rule", "PersonTitle");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
+
+Rule:  PersonTitleInitials
+Priority: 35
+
+// Mr J. Jones
+
+
+( 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+  ({Initials})?
+ ):initials
+ (
+  (PREFIX)* 
+  ({Upper, !Initials})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (initialsSet != null && initialsSet.size()>0)
+ {
+  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+  Long initialsStart = gate.Utils.start(initialsList.get(0));
+  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
+  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
+  features.put("initials", initialsContent);
+ }
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
+  features.put("kind", "personName");
+  features.put("rule", "PersonTitleInitials");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule:  TitleFirstName
+Priority: 55
+// use this rule when we know what gender the title indicates
+// Mr Fred
+
+(
+ ({Title.gender == male} | {Title.gender == female}):title
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstname
+
+)
+:person -->
+
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+ 
+ features.put("kind", "personName");
+
+ features.put("rule", "TitleFirstName");
+ outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
+Rule: PersonJobTitle
+Priority: 20
+// note we include titles but not jobtitles in markup
+
+(
+ {Lookup.majorType == jobtitle}
+):jobtitle
+(
+ (TITLE)?
+ ((FIRSTNAME | FIRSTNAMEAMBIG )
+ )
+ (PREFIX)* 
+ ({Upper,!Initials})
+ (PERSONENDING)?
+)
+:person 
+-->
+    :person.TempPerson = {kind = "fullName", rule = "PersonJobTitle"},
+   :jobtitle.JobTitle = {rule = "PersonJobTitle"} 
+
+
+
+
+Rule: NotFirstPersonStop
+Priority: 70
+// ambig first name and surname is stop word
+// e.g. Will And
+
+(
+ ((FIRSTNAMEAMBIG)+ | 
+  {Token.category == PRP}|
+  {Token.category == DT}
+ )
+ ({Lookup.majorType == stop}
+ )
+)
+:person -->
+  {}
+
+
+Rule: FirstPersonStop
+Priority: 50
+// John And
+
+(FIRSTNAME):person
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}|
+ {Token.category == IN}
+)
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "FirstPersonStop");
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+
+
+
+Rule: NotPersonFull
+Priority: 50
+// do not allow Det + Surname
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)
+(
+ (PREFIX)* 
+ ({Upper})
+ (PERSONENDING)?
+):foo
+-->
+{}
+
+
+
+Rule: LocPersonAmbig1
+Priority: 50
+// Location + Possible Surname --> Location only (ignore Surname)
+
+(
+ {Lookup.majorType == location}
+):loc
+(
+ (PREFIX)* 
+ ({Upper,!Initials})
+ (PERSONENDING)
+):foo
+-->
+:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig1}
+
+
+Rule: LocPersonAmbig2
+Priority: 50
+// Location + + Possible Surname --> Location only (ignore Surname)
+
+(
+ {Lookup.majorType == location}
+):loc
+(
+ (PREFIX)
+ ({Upper,!Initials})
+ (PERSONENDING)?
+):foo
+-->
+:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig2}
+
+
+Rule: LocPersonAmbig3
+Priority: 100
+// Ambiguous Location/Person + Possible Surname --> Person
+
+(
+ {Lookup.majorType == location, Lookup.ambig == yes, FirstPerson}
+ (PREFIX)
+ ({Upper,!Initials})
+ (PERSONENDING)?
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "LocPersonAmbig3");
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: PersonFullInitialsCaps
+Priority: 100
+// TO FISH
+// If the initials is of type nopunct, we want to discard the Person if the 
surname is also in all caps, as it's too ambiguous
+
+(
+ {Token.category == DT}
+)?
+(
+ 
+  ({Initials.kind == nopunct})
+  ((FIRSTNAME | FIRSTNAMEAMBIG )?)
+ ((PREFIX)*
+  ({Upper.kind == allCaps})
+  (PERSONENDING)?
+ )
+):person -->
+:person.Discard = {rule = "PersonFullInitialsCaps"}
+
+
+Rule:  PersonFull
+Priority: 10
+// F.W. Jones
+// Fred Jones
+
+(
+ 
+  (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+  ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+  ({Upper,!Initials})
+  (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+  
+  gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+ 
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
+
+ gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFull");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+Rule:  PersonFullInitials
+Priority: 10
+// F.W. Jones
+
+(
+ {Token.category == DT}
+)?
+(
+ 
+  ({Initials, !Lookup}):initials
+  ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+  ({Upper,!Initials})
+  (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+  
+  gate.AnnotationSet initialsSet = 
(gate.AnnotationSet)bindings.get("initials");
+  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+  Long initialsStart = gate.Utils.start(initialsList.get(0));
+  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
+  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
+  features.put("initials", initialsContent);
+ 
+
+ gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+ features.put("gender", middleNameAnn.getFeatures().get("gender"));
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFullInitials");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+
+
+Rule:  PersonFullDoubleBarrelled
+Priority: 9
+// F.W. Smith Jones
+// Fred Smith Jones
+
+
+(
+ 
+  (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+ ((PREFIX)*
+  ({Upper,!Initials})
+  ({Upper,!Initials})
+  (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+  
+  gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+ 
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
+
+ gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFullDoubleBarrelled");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+Rule:  PersonMiddleInitial
+Priority: 10
+// Fred C. Jones
+
+
+(
+ 
+  (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+  ({Initials}):initials
+ ((PREFIX)*
+  ({Upper,!Initials})
+  (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+  
+  gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+ 
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
+
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
+
+ if (initialsSet != null && initialsSet.size()>0)
+{
+ gate.Annotation initialsAnn = (gate.Annotation)initialsSet.iterator().next();
+ String initialsContent = gate.Utils.stringFor(doc, initialsAnn);
+ features.put("initials", initialsContent);
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonMiddleInitial");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+Rule: PersonFullStop
+Priority: 50
+// G.Wilson Fri
+
+(
+ ((FIRSTNAME | FIRSTNAMEAMBIG) )
+ (PREFIX)* 
+ ({Upper})
+):person
+(
+ {Lookup.majorType == date}
+)
+-->
+ :person.TempPerson = {kind = "fullName", rule = "PersonFullStop"}
+
+
+Rule: NotPersonFullReverse
+Priority: 20
+// XYZ, I
+(
+ ({Upper})
+ {Token.string == ","}
+ {Token.category == PRP}
+ (PERSONENDING)?
+)
+:unknown 
+-->
+{}
+
+
+Rule:  PersonSaint
+Priority: 50
+// Note: ensure that it's not a Saints Day first
+(
+ ({Token.string == "St"} ({Token.string == "."})? |
+ {Token.string == "Saint"})
+ (FIRSTNAME)
+ )
+:person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)personSet.get("FirstPerson");
+if (firstPerson != null && firstPerson.size()>0)
+{
+  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
+  features.put("gender", personAnn.getFeatures().get("gender"));
+}
+  features.put("kind", "firstName");
+  features.put("rule", "PersonSaint");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: PersonLocAmbig
+Priority: 40
+// Ken London
+// Susan Hampshire
+
+// Christian name + Location --> Person's Name
+(
+  (FIRSTNAME):firstName
+  ({Lookup.majorType == location}):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); 
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName"); 
+ gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); 
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", firstNameAnn.getFeatures().get("minorType"));
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+  features.put("kind", "fullName");
+  features.put("rule", "PersonLocAmbig");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: TitlePersonLocAmbig
+Priority: 50
+// Professor London
+// title + Location --> Person's Name
+
+(
+  ({Title}):title
+  ({Lookup.majorType == location}):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); 
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); 
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); 
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ String titleContent = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", titleContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+  features.put("kind", "fullName");
+  features.put("rule", "TitlePersonLocAmbig");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
+Rule:PersonOrgAmbig
+Priority: 50
+// if the last name is an organisation ending, treat as an organisation not 
person
+// e.g. A.B. Consulting
+
+(
+ {Token.category == DT}
+)?
+(
+ ((FIRSTNAME | FIRSTNAMEAMBIG) )+
+ ({Lookup.majorType == org_key}|
+  {Lookup.majorType == org_base}
+ ) 
+)
+:orgName -->
+ :orgName.TempOrganization = {kind = "unknown", rule = "PersonOrgAmbig"}
+
+
+
+///////////////////////////////////////////////////////////////////
+// Organisation Rules
+
+Macro:  CDG
+// cdg is something like "Ltd."
+ (
+  ({Lookup.majorType == cdg})|
+  ({Token.string == ","} 
+  {Lookup.majorType == cdg})
+ )
+
+
+Macro: SAINT
+(
+ ({Token.string == "St"} ({Token.string == "."})? |
+ {Token.string == "Saint"})
+)
+
+Macro: CHURCH
+(
+{Token.string == "Church"}|{Token.string == "church"}|
+{Token.string == "Cathedral"}|{Token.string == "cathedral"}|
+{Token.string == "Chapel"}|{Token.string == "chapel"}
+)
+
+/////////////////////////////////////////////////////////////
+Rule:  TheGazOrganization
+Priority: 245
+(
+ {Token.category == DT}|
+ {Token.category == RB}
+)
+(
+{Lookup.majorType == organization}
+)
+:orgName -->  
+ {
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org 
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (org != null && org.size()>0)
+{
+// if it does exist, take the first element in the set
+  gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+//propagate minorType feature (and value) from org
+  features.put("orgType", orgAnn.getFeatures().get("minorType"));
+}
+// create some new features
+  features.put("rule", "GazOrganization");
+// create a TempOrg annotation and add the features we've created
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
+features);
+}
+
+
+Rule:  GazOrganization
+Priority: 145
+(
+{Lookup.majorType == organization}
+)
+:orgName -->  
+ {
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org 
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (org != null && org.size()>0)
+{
+// if it does exist, take the first element in the set
+  gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+//propagate minorType feature (and value) from org
+  features.put("orgType", orgAnn.getFeatures().get("minorType"));
+}
+// create some new features
+  features.put("rule", "GazOrganization");
+// create a TempOrg annotation and add the features we've created
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
+features);
+}
+
+Rule:  LocOrganization
+Priority: 50
+// Ealing Police
+(
+ ({Lookup.majorType == location} |
+  {Lookup.majorType == country_adj})
+{Lookup.majorType == organization}
+({Lookup.majorType == organization})?
+)
+:orgName -->  
+  :orgName.TempOrganization = {kind = "orgName", rule=LocOrganization}
+
+
+Rule: NewspaperEnding
+Priority: 200
+// GSA Today
+
+(
+ ({Upper}|{Initials})
+ {Lookup.majorType == newspaper_ending}
+):orgName
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
+gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
+
+if (org != null && org.size()>0)
+{
+  gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+  features.put("orgType", orgAnn.getFeatures().get("minorType"));
+}
+// create some new features
+  features.put("rule", "NewspaperEndng");
+// create a TempOrg annotation and add the features we've created
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
+features);
+}
+
+
+Rule:  INOrgXandY
+Priority: 200
+
+// Bradford & Bingley
+// Bradford & Bingley Ltd
+(
+ {Token.category == IN}
+)
+
+(
+ ({Token.category == NNP}
+  )+
+
+ {Token.string == "&"}
+
+ (
+  {Token.orth == upperInitial}
+ )+
+
+ (CDG)?
+
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
+
+Rule:  OrgXandY
+Priority: 20
+
+// Bradford & Bingley
+// Bradford & Bingley Ltd
+
+
+(
+ ({Token.category == NNP}
+  )+
+
+ {Token.string == "&"}
+
+ (
+  {Token.orth == upperInitial}
+ )+
+
+ (CDG)?
+
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
+
+
+Rule:OrgUni
+Priority: 25
+// University of Sheffield
+// Sheffield University
+// A Sheffield University
+(
+ {Token.string == "University"}
+ {Token.string == "of"}
+ (
+ {Token.category == NNP})+
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "other", rule = "OrgDept"}
+
+
+
+Rule: OrgDept
+Priority: 25
+// Department of Pure Mathematics and Physics
+
+(
+ {Token.string == "Department"}
+ 
+ {Token.string == "of"}
+ (
+ {Token.orth == upperInitial})+
+ (
+  {Token.string == "and"}
+  ( 
+   {Token.orth == upperInitial})+
+ )?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "department", rule = "OrgDept"}
+
+Rule:  TheOrgXKey
+Priority: 500
+
+// The Aaaa Ltd.
+(
+ {Token.category == DT}
+)
+(
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+:org.TempOrganization = {orgType = "unknown", rule = "TheOrgXKey"}
+
+Rule: NotOrgXKey
+Priority: 150
+// if all the names are org_base or org_key, it's not an organisation
+// e.g. Business Consulting
+
+(
+ ({Lookup.majorType == org_key}|
+  {Lookup.majorType == org_base}
+ )+
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+{}
+
+
+
+Rule: NotTheKey
+Priority: 200
+
+(
+ {Token.category == DT}
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+{}
+
+
+Rule:  OrgXKey
+Priority: 125
+
+// Aaaa Ltd.
+({Token.category == DT})?
+(
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
+  ({Upper})?
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+:org.TempOrganization = {orgType = "unknown", rule = "OrgXKey"}
+
+
+Rule: NotOrgXEnding
+Priority: 500
+// Very Limited
+
+(
+ {Token.category == DT}
+)?
+(
+ {Token.category == RB}
+ {Lookup.majorType == cdg}
+)
+:label
+-->
+{}
+ 
+ Rule: NotOrgXEnding2
+Priority: 500
+
+// The Coca Cola Co.
+
+(
+ {Token.category == DT}
+)
+(
+  ({Upper})
+  ({Upper})?
+ {Lookup.majorType == cdg}
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "company", rule = "OrgXEnding"}
+
+
+
+Rule:  OrgXEnding
+Priority: 120
+
+// Coca Cola Co.
+
+(
+  ({Upper})
+  ({Upper})?
+ {Lookup.majorType == cdg}
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXEnding"}
+
+Rule:  TheOrgXandYKey
+Priority: 220
+
+(
+ {Token.category == DT}
+)
+(
+ ({Upper})
+ ({Upper})?
+  (({Token.string == "and"} | 
+    {Token.string == "&"})
+   ({Upper})?
+   ({Upper})?
+   ({Upper})?
+  )
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"}
+
+
+
+Rule:  OrgXandYKey
+Priority: 120
+
+// Aaaa Ltd.
+// Xxx Services Ltd. 
+// AA and BB Services Ltd.
+// but NOT A XXX Services Ltd.
+
+(
+ ({Upper})
+ ({Upper})?
+  (({Token.string == "and"} | 
+    {Token.string == "&"})
+   ({Upper})?
+   ({Upper})?
+   ({Upper})?
+  )
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"}
+
+
+Rule:  OrgXsKeyBase
+Priority: 120
+ 
+// Gandy's Circus
+// Queen's Ware
+
+(
+  ({Upper})?
+  ({Upper})?
+  ({Token.orth == upperInitial}
+   {Token.string == "'"}
+   ({Token.string == "s"})?
+  )
+ ({Lookup.majorType == org_key}|
+  {Lookup.majorType == org_base})
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXsKeybase"}
+
+
+
+Rule: NotOrgXBase
+Priority: 1000
+// not things like British National
+// or The University
+
+
+(
+ ({Token.category == DT} 
+ )?
+)
+(
+ ({Lookup.majorType == country_adj}|
+  {Token.orth == lowercase})
+ ({Lookup.majorType == org_base}|
+  {Lookup.majorType == govern_key})
+)
+:orgName -->
+  :orgName.Temp = {kind = "notorgName", rule = "NotOrgXBase"}
+
+
+Rule:  TheOrgXBase
+Priority: 230
+
+(
+ ({Token.category == DT}
+ )
+)
+(
+ (
+  ({Upper})|
+  {Lookup.majorType == organization}
+ )
+ ({Upper})?
+ ({Upper})?
+ ({Lookup.majorType == org_base}|
+  {Lookup.majorType == govern_key}
+ )
+ (
+  {Token.string == "of"}
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
+ )?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "TheOrgXBase"}
+
+
+Rule:  OrgXBase
+Priority: 130
+
+// same as OrgXKey but uses base instead of key
+// includes govern_key e.g. academy
+// Barclays Bank
+// Royal Academy of Art
+
+(
+ (
+  ({Upper})|
+  {Lookup.majorType == organization}
+ )
+ ({Upper})?
+ ({Upper})?
+ ({Lookup.majorType == org_base}|
+  {Lookup.majorType == govern_key}
+ )
+ (
+  {Token.string == "of"}
+  ({Upper})
+  ({Upper})?
+  ({Upper})?
+ )?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXBase"}
+
+Rule:  TheBaseofOrg
+Priority: 230
+
+(
+ {Token.category == DT}
+)
+(
+ ({Lookup.majorType == org_base}|
+  {Lookup.majorType == govern_key}
+ )
+ 
+ {Token.string == "of"}
+ ( 
+  {Token.category == DT}
+ )?
+ ({Upper})
+ ({Upper})?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
+
+
+
+
+Rule:  BaseofOrg
+Priority: 130
+
+(
+ ({Lookup.majorType == org_base}|
+  {Lookup.majorType == govern_key}
+ )
+ 
+ {Token.string == "of"}
+ ( 
+  {Token.category == DT}
+ )?
+ ({Upper})
+ ({Upper})?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
+
+
+
+Rule:  OrgPreX
+Priority: 130
+
+// Royal Tuscan
+
+(
+ {Lookup.majorType == org_pre}
+ (
+  {Token.orth == upperInitial})+
+ ({Lookup.majorType == org_ending})?
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "unknown", rule = "OrgPreX"}
+
+
+
+Rule: OrgChurch
+Priority: 150
+// St. Andrew's Church
+
+(
+  (SAINT)
+  {Token.orth == upperInitial}
+  {Token.string == "'"}({Token.string == "s"})?
+  (CHURCH)
+)
+:orgName -->
+  :orgName.TempOrganization = {orgType = "other", rule = "OrgChurch"}
+
+
+Rule:OrgPersonAmbig
+Priority: 130
+// Alexandra Pottery should be org not person
+// overrides PersonFull
+
+(
+ (TITLE)?
+ (FIRSTNAME)
+ {Token.string == "'"}({Token.string == "s"})?
+ ({Lookup.majorType == org_key}|
+  {Lookup.majorType == org_base})
+ ({Lookup.majorType == org_ending})?
+)
+:org 
+-->
+ :org.TempOrganization= {orgType = "unknown", rule = "OrgPersonAmbig"}
+
+ 
+
+/////////////////////////////////////////////////////////////////
+// Location rules
+
+
+Rule:  Location1
+Priority: 200
+// Lookup = city, country, province, region, water
+
+// Western Europe
+// South China sea
+
+(
+ {Token.category == DT}
+)?
+(
+ ({Lookup.majorType == loc_key, Lookup.minorType == pre}
+ )?
+ {Lookup.majorType == location}
+ (
+  {Lookup.majorType == loc_key, Lookup.minorType == post})?
+)
+:locName -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org 
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (loc != null && loc.size()>0)
+{
+// if it does exist, take the first element in the set
+  gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+//propagate minorType feature (and value) from loc
+  features.put("locType", locAnn.getFeatures().get("minorType"));
+}
+// create some new features
+  features.put("rule", "Location1");
+// create a TempLoc annotation and add the features we've created
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation",
+features);
+}
+
+Rule:  GazLocation
+Priority: 200
+(
+ {Token.category == DT}
+)?  
+(
+ {Lookup.majorType == location}
+)
+:locName
+ -->   
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org 
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (loc != null && loc.size()>0)
+{
+// if it does exist, take the first element in the set
+  gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+//propagate minorType feature (and value) from loc
+  features.put("locType", locAnn.getFeatures().get("minorType"));
+}
+// create some new features
+  features.put("rule", "GazLocation");
+// create a TempLoc annotation and add the features we've created
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation",
+features);
+}
+
+Rule:  GazLocationLocation
+Priority: 100
+
+(
+ ({Lookup.majorType == location}):locName1
+ {Token.string == ","}
+ ({Lookup.majorType == location}):locName2
+) 
+-->    
+
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.FeatureMap morefeatures = Factory.newFeatureMap();
+gate.AnnotationSet loc1Set = (gate.AnnotationSet)bindings.get("locName1");
+gate.AnnotationSet loc1 = (gate.AnnotationSet)loc1Set.get("Lookup");
+
+gate.AnnotationSet loc2Set = (gate.AnnotationSet)bindings.get("locName2");
+gate.AnnotationSet loc2 = (gate.AnnotationSet)loc2Set.get("Lookup");
+
+// if the annotation type Lookup doesn't exist, do nothing
+if (loc1 != null && loc1.size()>0)
+{
+  gate.Annotation loc1Ann = (gate.Annotation)loc1.iterator().next();
+  features.put("locType", loc1Ann.getFeatures().get("minorType"));
+}
+
+if (loc2 != null && loc2.size()>0)
+{
+  gate.Annotation loc2Ann = (gate.Annotation)loc2.iterator().next();
+  morefeatures.put("locType", loc2Ann.getFeatures().get("minorType"));
+}
+
+features.put("rule", "GazLocation");
+outputAS.add(loc1Set.firstNode(), loc1Set.lastNode(), "TempLocation", 
features);
+
+morefeatures.put("rule", "GazLocation");
+outputAS.add(loc2Set.firstNode(), loc2Set.lastNode(), "TempLocation", 
morefeatures);
+}
+
+
+
+
+
+Rule: LocationPost
+Priority: 50
+(
+ {Token.category == DT}
+)?
+(
+ {Token.category == NNP}
+ {Lookup.majorType == loc_key, Lookup.minorType == post}
+)
+:locName
+-->
+ :locName.TempLocation = {kind = "locName", rule = LocationPost}
+
+Rule:LocKey
+(
+ {Token.category == DT}
+)?
+(
+ ({Lookup.majorType == loc_key, Lookup.minorType == pre}
+ )
+ ({Upper})
+ (
+  {Lookup.majorType == loc_key, Lookup.minorType == post})?
+)
+:locName -->
+:locName.TempLocation = {kind = "locName", rule = LocKey}
+/////////////////////////////////////////////////////////////////
+
+// Context-based Rules
+
+
+Rule:InLoc1
+(
+ {Token.string == "in"}
+)
+(
+ {Lookup.majorType == location}
+)
+:locName
+-->
+ :locName.TempLocation = {kind = "locName", rule = InLoc1, locType = 
:locName.Lookup.minorType}
+
+Rule:LocGeneralKey
+Priority: 30
+(
+ {Lookup.majorType == loc_general_key}
+ {Token.string == "of"}
+)
+(
+ ({Upper})
+)
+:loc
+-->
+ :loc.TempLocation = {kind = "locName", rule = LocGeneralKey}
+
+
+Rule:OrgContext1
+Priority: 1
+// company X
+
+(
+ {Token.string == "company"}
+)
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})? 
+)
+:org
+-->
+ :org.TempOrganization= {orgType = "company", rule = "OrgContext1"}
+
+Rule: OrgContext2
+Priority: 5
+
+// Telstar laboratory
+// Medici offices
+
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})? 
+)
+: org
+(
+ ({Token.string == "offices"} |
+ {Token.string == "Offices"} |
+ {Token.string == "laboratory"} | 
+ {Token.string == "Laboratory"} |
+ {Token.string == "laboratories"} |
+ {Token.string == "Laboratories"})
+)
+-->
+ :org.TempOrganization= {orgType = "other", rule = "OrgContext2"}
+
+
+
+Rule:JoinOrg
+Priority: 50
+// Smith joined Energis
+
+(
+ ({Token.string == "joined"}|
+  {Token.string == "joining"}|
+  {Token.string == "joins"}|
+  {Token.string == "join"}
+ )
+)
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+)
+:org
+-->
+ :org.TempOrganization= {orgType = "company", rule = "joinOrg"}
+
+
+
+
+
+
+
+
+
+
+


Property changes on: gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: gate/trunk/plugins/Lang_French/grammar/name.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/name.jape    2016-10-06 12:34:37 UTC 
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/name.jape    2016-10-06 12:35:17 UTC 
(rev 19646)
@@ -15,7 +15,7 @@
 
 
 Phase: Name
-Input: Token Lookup Title FirstPerson TreeTaggerToken
+Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID 
Hashtag 
 Options: control = appelt debug = false
 
 ///////////////////////////////////////////////////////////////
@@ -28,57 +28,28 @@
  {Title}
  ({Token.string == "."})?
 )
-Macro: INITIALS
-(
-  ({Token.orth == upperInitial, Token.length =="1"}
-  ({Token.string == "."})?
-  )+
-)
 
-Macro: INITIALS2
 
-(
- {Token.orth == allCaps, Token.length == "2"} |
- {Token.orth == allCaps, Token.length == "3"}
-)
 
 
 Macro: FIRSTNAME
-(
- ({FirstPerson.gender == male} |
-  {FirstPerson.gender == female})
- |
- (INITIALS)
-)
 
+ ({FirstPerson.gender == male, FirstPerson.kind != ambig} |
+  {FirstPerson.gender == female, FirstPerson.kind != ambig})
+
+
 Macro: FIRSTNAMEAMBIG
 (
- {Lookup.majorType == person_first, Lookup.minorType == ambig}
+ {FirstPerson.kind == ambig}
 )
 
 
 
-Macro: UPPERTAG
-(
- ({TreeTaggerToken.category == NAM}
-)
- ({Token.string == "-"}
-  {TreeTaggerToken.category == NAM}
- )?
-)
 
-Macro: UPPER
-(
- ({Token.orth == upperInitial}
-)
- ({Token.string == "-"}
-  {Token.orth == upperInitial}
- )?
-)
 
-
 Macro: PERSONENDING
 (
+ ({Token.string == ","})?
  {Lookup.majorType == person_ending}
 )
 
@@ -101,67 +72,92 @@
 
 Rule: Pronoun
 Priority: 1000
-//stops personal pronouns being recognised as Initials
+
 (
- {TreeTaggerToken.category == PP}|
- {TreeTaggerToken.category == PRP}|
- {TreeTaggerToken.category == RB}
+ {Token.category == PP}|
+ {Token.category == PRP}|
+ {Token.category == RB}
 ):pro
 -->
 {}
 
- 
 
-Rule:  GazPerson
-Priority: 50
+
+Rule:Reject
+Priority: 1000
+// stops certain things being recognised as People
 (
- {Lookup.majorType == person_full, Lookup.minorType == normal}
+ {Hashtag}|{UserID}|{ClosedClass}
 )
-:person -->
-{
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("kind", "personName");
-features.put("rule", "GazPerson");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
-}
+-->
+{}
 
-Rule:  TheGazPersonFirst
-Priority: 200
+
+Rule:    GazPerson
+Priority: 100
 (
- {TreeTaggerToken.category == DT}|
- {TreeTaggerToken.category == PRP}|
- {TreeTaggerToken.category == RB}
-)
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
 (
- {FirstPerson}
+ {Lookup.majorType == person_full}
 )
-:person 
-( 
- {Token.orth == upperInitial, Token.length == "1"}
-)?
--->
+:person -->
 {
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
+
+// find the Token annotations
+AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS, 
personSet, "Token");
+// put them in order
+List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet);
+
+if (tokenList.size() == 1) {
+ // if there's only one Token, guess it's a surname
+
+  String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+  features.put("surname", surnameContent);
+ }
+
+else if (tokenList.size() > 0) {
+  // the string under the first Token
+   String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+  features.put("firstName", firstNameContent);
+
+
+  // the string under the remaining Tokens if any
+  if (tokenList.size() > 1) {
+    Long lastNameStart = gate.Utils.start(tokenList.get(1));
+    Long lastNameEnd   = gate.Utils.end(tokenList.get(tokenList.size() - 1));
+    String surnameContent = gate.Utils.stringFor(doc, lastNameStart, 
lastNameEnd);
+    features.put("surname", surnameContent);
+  }
+}
+
+features.put("kind", "fullName");
+features.put("rule", "GazPerson");
 features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
-features.put("rule", "GazPersonFirst");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
-//outputAS.removeAll(person);
+
+// this method doesn't require try-catch
+gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
 }
 
 
+
+
+
 Rule:  GazPersonFirst
-Priority: 70
+Priority: 200
 (
- {FirstPerson}
-)
-:person 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.kind != ambig}
+):person 
 ( 
  {Token.orth == upperInitial, Token.length == "1"}
 )?
@@ -171,202 +167,270 @@
 gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
 gate.FeatureMap features = Factory.newFeatureMap();
 features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
+features.put("kind", "firstName");
 features.put("rule", "GazPersonFirst");
+
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+
 outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
 features);
-//outputAS.removeAll(person);
 }
 
 
 
-
-
 Rule: PersonFirstContext
 Priority: 30
 // Anne and Kenton
 
+(FIRSTNAME):person1
 (
- {FirstPerson}
-):person1
-(
  {Token.string == "and"}
 )
-({Token.orth == upperInitial})
+({Token.orth == upperInitial, Token.length != "1"})
 :person2
  -->
 {
 //first deal with person1
  gate.FeatureMap features1 = Factory.newFeatureMap();
-gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
-gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)person1Set.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
+ gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
+  gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
+ 
+  String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+  features1.put("firstName", contentFirstName);
   features1.put("gender", personAnn.getFeatures().get("gender"));
-}
-  features1.put("kind", "personName");
+  features1.put("kind", "firstName");
   features1.put("rule", "PersonFirstContext");
 outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
 features1);
+
 //now deal with person2
 gate.FeatureMap features2 = Factory.newFeatureMap();
 gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
-  features2.put("kind", "personName");
+gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
+
+  String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
+  features2.put("firstName", content2FirstName);
+  features2.put("kind", "firstName");
   features2.put("rule", "PersonFirstContext");
 outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
 features2);
 }
 
 
-Rule: PersonFirstContext2
-Priority: 40
-// Anne and I
+Rule:  PersonTitle
+Priority: 35
+// Mr. Jones
+// Mr Fred Jones
+// note we only allow one first and surname, 
+// but we add more in a final phase if we find adjacent unknowns
 
+( 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
 (
- {FirstPerson}
-):person
-(
- {Token.string == "and"}
- {Token.length == "1"}
-)
- -->
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+  (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+  (PREFIX)* 
+  ({Upper})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet firstPerson = 
(gate.AnnotationSet)personSet.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
   features.put("kind", "personName");
-  features.put("rule", "PersonFirstContext2");
+  features.put("rule", "PersonTitle");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
-Rule:  PersonTitle
-Priority: 35
-// Mr. Jones
-// Mr Fred Jones
-// note we only allow one first and surname, 
-// but we can add more in a final phase if we find adjacent unknowns
+Rule:  PersonTitleUnknownGender
+Priority: 30
+// Prof. Jones
+// This person will just get an unknown value for gender. Or we could decide 
to make them male by default, as they're mostly military etc.
 
 ( 
- {TreeTaggerToken.category == DT}|
- {TreeTaggerToken.category == PRP}|
- {TreeTaggerToken.category == RB}
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
 )?
 (
- (TITLE)+
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )?
+ ({Title}):title
+ ({Title})?
+ (
+  (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
   (PREFIX)* 
-  (UPPER)
- (PERSONENDING)?
-)
-:person -->
+  ({Upper})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-  
- // get all Title annotations that have a gender feature
- HashSet fNames = new HashSet();
-    fNames.add("gender");
-    gate.AnnotationSet personTitle = personSet.get("Title", fNames);
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
 
-// if the gender feature exists
- if (personTitle != null && personTitle.size()>0)
-{
-  //Out.prln("Titles found " +  personTitle);
-  gate.Annotation personAnn = (gate.Annotation)personTitle.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-else
-{
-  //get all firstPerson annotations that have a gender feature
-  //  HashSet fNames = new HashSet();
-   // fNames.add("gender");
-    gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames);
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
 
-  if (firstPerson != null && firstPerson.size()>0)
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", "unknown");
+
+ if (firstNameSet != null && firstNameSet.size()>0)
  {
-    //Out.prln("First persons found " +  firstPerson);
-  gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
  }
-}
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
   features.put("kind", "personName");
-  features.put("rule", "PersonTitle");
+  features.put("rule", "PersonTitleGenderUnknown");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
-Rule:  PersonFirstTitleGender
-Priority: 55
-// use this rule when we know what gender the title indicates
-// Mr Fred
+Rule:  PersonTitleInitials
+Priority: 35
 
+// Mr J. Jones
+
+
+( 
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
 (
- ({Title.gender == male} | {Title.gender == female})
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )
-)
-:person -->
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+  ({Initials})?
+ ):initials
+ (
+  (PREFIX)* 
+  ({Upper, !Initials})
+  (PERSONENDING)?
+ ):surname
+):person 
+-->
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-if (title != null && title.size()>0)
-{
-  gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (initialsSet != null && initialsSet.size()>0)
+ {
+  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+  Long initialsStart = gate.Utils.start(initialsList.get(0));
+  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
+  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
+  features.put("initials", initialsContent);
+ }
+  String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+  features.put("surname", surnameContent);
+
   features.put("kind", "personName");
-  features.put("rule", "PersonFirstTitleGender");
+  features.put("rule", "PersonTitleInitials");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
-Rule: PersonTitleGender
-Priority: 18
-// use this rule if the title has a feature gender
-// Miss F Smith
+Rule:  TitleFirstName
+Priority: 55
+// use this rule when we know what gender the title indicates
+// Mr Fred
+
 (
- ({Title.gender == male}|
-  {Title.gender == female}
- ) 
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )*
- (UPPER)
- (PERSONENDING)?
+ ({Title.gender == male} | {Title.gender == female}):title
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstname
+
 )
 :person -->
+
 {
  gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-// if the annotation type title doesn't exist, do nothing
-if (title != null && title.size()>0)
-{
-// if it does exist, take the first element in the set
-  gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
-//propagate gender feature (and value) from title
-  features.put("gender", personAnn.getFeatures().get("gender"));
-}
-// create some new features
-  features.put("kind", "personName");
-  features.put("rule", "PersonTitleGender");
-// create a TempPerson annotation and add the features we've created
-outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ 
+ gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+  String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+  features.put("title", contentTitle);
+  features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+  features.put("firstName", firstNameContent);
+ }
+ 
+ features.put("kind", "personName");
+
+ features.put("rule", "TitleFirstName");
+ outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 }
 
 
+
 Rule: PersonJobTitle
 Priority: 20
 // note we include titles but not jobtitles in markup

@@ Diff output truncated at 100000 characters. @@
This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to