Revision: 19646 http://sourceforge.net/p/gate/code/19646 Author: dgmaynard Date: 2016-10-06 12:35:17 +0000 (Thu, 06 Oct 2016) Log Message: ----------- updating to latest English version
Modified Paths: -------------- gate/trunk/plugins/Lang_French/grammar/clean.jape gate/trunk/plugins/Lang_French/grammar/date.jape gate/trunk/plugins/Lang_French/grammar/date_pre.jape gate/trunk/plugins/Lang_French/grammar/email.jape gate/trunk/plugins/Lang_French/grammar/final.jape gate/trunk/plugins/Lang_French/grammar/first.jape gate/trunk/plugins/Lang_French/grammar/firstname.jape gate/trunk/plugins/Lang_French/grammar/loc_context.jape gate/trunk/plugins/Lang_French/grammar/main.jape gate/trunk/plugins/Lang_French/grammar/name.jape gate/trunk/plugins/Lang_French/grammar/name_context.jape gate/trunk/plugins/Lang_French/grammar/name_post.jape gate/trunk/plugins/Lang_French/grammar/number.jape gate/trunk/plugins/Lang_French/grammar/org_context.jape gate/trunk/plugins/Lang_French/grammar/reldate.jape gate/trunk/plugins/Lang_French/grammar/unknown.jape gate/trunk/plugins/Lang_French/grammar/url.jape Added Paths: ----------- gate/trunk/plugins/Lang_French/grammar/document_date.jape gate/trunk/plugins/Lang_French/grammar/hyphens.jape gate/trunk/plugins/Lang_French/grammar/main-twitter.jape gate/trunk/plugins/Lang_French/grammar/name-twitter.jape gate/trunk/plugins/Lang_French/grammar/number_clean.jape gate/trunk/plugins/Lang_French/grammar/numberletter.jape Modified: gate/trunk/plugins/Lang_French/grammar/clean.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/clean.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/clean.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -14,8 +14,8 @@ */ Phase: Clean -Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs -Options: control = appelt +Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs ClosedClass Initials Upper FirstPerson JobTitle HashtagToken HashtagLookup NumberLetter Temp Title UrlPre +Options: control = all Rule:CleanTempAnnotations ( @@ -33,7 +33,18 @@ {Phone}| {Ip}| {TempIdentifier}| - {TempSpecs} + {TempSpecs}| + {ClosedClass}| + {Upper}| + {Initials}| + {FirstPerson}| + {JobTitle}| + {HashtagToken}| + {HashtagLookup}| + {Title}| + {UrlPre}| + {Temp}| + {NumberLetter} ):temp --> { Modified: gate/trunk/plugins/Lang_French/grammar/date.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/date.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/date.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -21,13 +21,6 @@ ///////////////////////////////////////////////// -Macro: DEF_ART_SING -( - {Token.string == "le"}| - {Token.string == "la"}| - {Token.string == "l"}{Token.string == "'"} - ) - Macro: DAY_NAME ({Lookup.minorType == day }) @@ -70,6 +63,9 @@ Macro: DASH {Token.string == "-"} +Macro: DOT + {Token.string == "."} + Macro: OF {Token.string == "of"} @@ -112,11 +108,7 @@ Macro: ORDINAL ( - ({Token.kind == number} - ({Token.string == "th"}| - {Token.string == "rd"}| - {Token.string == "nd"}| - {Token.string == "st"}) + ({Token.string ==~ "[0-9][0-9]?(th|rd|nd|st)"} | {Lookup.minorType == ordinal}) ( @@ -170,33 +162,38 @@ Rule: TimeDigital2 -// 04h30 -// 6h +// 8:14 am +// 4.34 pm +// 6am + ( (ONE_DIGIT|TWO_DIGIT) - ({Token.string == "h"}|{Token.string == "H"}) - (TWO_DIGIT) + (({Token.string == ":"}|{Token.string == "."} |{Token.string == "-"} ) + TWO_DIGIT)? + (TIME_AMPM) (TIME_ZONE)? ) :time --> -:time.TempTime = {kind = "positive", rule = "TimeDigital2"} +:time.TempTime = {kind = "positive", rule = "TimeDigital"} Rule: TimeOClock -// dix heures +// ten o'clock ( {Lookup.minorType == hour} - {Token.string == "heures"} + {Token.string == "o"} + {Token.string == "'"} + {Token.string == "clock"} ) :time --> :time.TempTime = {kind = "positive", rule = "TimeOClock"} -/*Rule: TimeAnalogue +Rule: TimeAnalogue // half past ten // ten to twelve // twenty six minutes to twelve @@ -217,9 +214,9 @@ :time --> :time.TempTime = {kind = "positive", rule = "TimeAnalogue"} -*/ -/*Rule: TimeWordsContext + +Rule: TimeWordsContext Priority: 50 // seven thirty tomorrow @@ -234,10 +231,10 @@ ) --> :time1.TempTime = {kind = "positive", rule = "TimeWordsContext"} -*/ -/*Rule: TimeWords +Rule: TimeWords + ( {Lookup.majorType == number} ( @@ -248,7 +245,7 @@ --> :time.TempTime = {kind = "timeWords", rule = "TimeWords"} - */ + Rule: TimeDigitalContext1 @@ -356,20 +353,16 @@ // Date Rules -// commented out this rule because Date and Person are not included in the -// Input headers and I have no idea if adding them will mess up other rules +//Rule: IgnoreDatePerson +//Priority: 500 +//( +// {Date} +// {Person} +//) +//:date +//--> +//{} -/* -Rule: IgnoreDatePerson -Priority: 500 -( - {Date} - {Person} -) -:date ---> -{} -*/ Rule: DateSlash // UK only @@ -405,7 +398,6 @@ :date.TempDate = {rule = "DateDash"} - Rule: DateName Priority: 20 // Wed 10 July @@ -417,7 +409,6 @@ // July, 2000 ( - (DEF_ART_SING)? (DAY_NAME NUM_OR_ORDINAL MONTH_NAME)| (DAY_NAME (COMMA)? @@ -505,16 +496,16 @@ :date.TempDate = {rule = "DateNumDashRev"} -Rule: DateNumSlash +Rule: DateNumSlashDot // 01/07/00 // Note: not 07/00 ( -DAY_MONTH_NUM SLASH DAY_MONTH_NUM SLASH YEAR +DAY_MONTH_NUM (SLASH|DOT) DAY_MONTH_NUM (SLASH|DOT) YEAR ) :date --> - :date.TempDate = {rule = "DateNumSlash"} + :date.TempDate = {rule = "DateNumSlashDot"} Rule: ModifierMonth @@ -626,7 +617,7 @@ (FOUR_DIGIT) :date --> - :date.TempYear = {kind = "positive", rule = "TempYear3"} + :date.TempYear = {kind = "negative", rule = "TempYear3"} Rule: YearWords Modified: gate/trunk/plugins/Lang_French/grammar/date_pre.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/date_pre.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/date_pre.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -51,6 +51,16 @@ --> :date.TempDate = {rule = "GazDate"} +Rule: GazDateAmbig +Priority: 200 +(SPACE | {Token.kind == punctuation}) +( + {Token.string == "Sun"} +) +:date +(SPACE | {Token.kind == punctuation}) +--> + :date.TempDate = {rule = "GazDateAmbig", } Rule: PersonDateAmbig Priority: 100 Added: gate/trunk/plugins/Lang_French/grammar/document_date.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/document_date.jape (rev 0) +++ gate/trunk/plugins/Lang_French/grammar/document_date.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -0,0 +1,33 @@ +Phase: DateHeader +Input: DCT +Options: control = appelt + +Rule: DCT +( + {DCT} +):tag +--> +{ +gate.AnnotationSet tagSet = (gate.AnnotationSet)bindings.get("tag"); +gate.Annotation tagAnn = (gate.Annotation)tagSet.iterator().next(); + +gate.FeatureMap features = Factory.newFeatureMap(); + + +String s = gate.Utils.stringFor(doc, tagAnn); +//String content = doc.getContent().getContent(tagAnn.getStartNode().getOffset(), + // tagAnn.getEndNode().getOffset()).toString(); + + if (s.matches("^\\d{8}$") ) { +String s1 = s.substring(0,4) + "-" + s.substring(4,6) + "-" + s.substring(6,8); + +doc.getFeatures().put("document-date", s1); +} + + } + + + + + + Property changes on: gate/trunk/plugins/Lang_French/grammar/document_date.jape ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Modified: gate/trunk/plugins/Lang_French/grammar/email.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/email.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/email.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -25,15 +25,17 @@ ( ( {Token.kind == word}| - {Token.kind == number}| + {Token.kind == number} + )[1,9] + ( {Token.string == "_"} - ) - ({Token.string == "."} + )? + ({Token.string == "."})? ({Token.kind == word}| {Token.kind == number}| {Token.string == "_"} - ) - )? + )[0,9] + {Token.string == "@"} ( {Token.kind == word}| @@ -47,7 +49,7 @@ {Token.kind == symbol}| {Token.kind == punctuation}| {Token.kind == number} - )? + )[0,9] ({Token.string == "."})? ( {Token.kind == word}| Modified: gate/trunk/plugins/Lang_French/grammar/final.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/final.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/final.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -16,34 +16,34 @@ //note: organization should be included as part of the address ?? Phase: Final -Input: Token Lookup JobTitle TempPerson TempLocation TempOrganization TempDate TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs +Input: Token Lookup Jobtitle TempPerson TempLocation TempOrganization TempDate TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs Title Split Money Options: control = appelt /////////////////////////////////////////////////////////////// +Rule: Money +Priority: 200 +( + {Money} +) +--> +{} Rule: PersonFinal Priority: 30 -({JobTitle} -)? + ( - {TempPerson.kind == personName} -)+ + {TempPerson} +) :person --> { gate.FeatureMap features = Factory.newFeatureMap(); gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); -gate.Annotation person1Ann = (gate.Annotation)personSet.iterator().next(); +gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); -gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("TempPerson"); -if (firstPerson != null && firstPerson.size()>0) -{ - gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); - features.put("gender", personAnn.getFeatures().get("gender")); -} - features.put("rule1", person1Ann.getFeatures().get("rule")); - features.put("rule", "PersonFinal"); +features.putAll(personAnn.getFeatures()); +features.put("ruleFinal", "PersonFinal"); outputAS.add(personSet.firstNode(), personSet.lastNode(), "Person", features); outputAS.removeAll(personSet); @@ -75,21 +75,23 @@ --> { //removes TempOrg annotation, gets the rule feature and adds a new Org annotation -gate.AnnotationSet org = (gate.AnnotationSet)bindings.get("org"); -gate.Annotation orgAnn = (gate.Annotation)org.iterator().next(); +gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org"); +//locate the first TempOrganization annotation +//(there will always be at least one) +gate.Annotation orgAnn = orgSet.get("TempOrganization").iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("orgType", orgAnn.getFeatures().get("orgType")); -features.put("rule1", orgAnn.getFeatures().get("rule")); -features.put("rule2", "OrgCountryFinal"); -outputAS.add(org.firstNode(), org.lastNode(), "Organization", + +features.putAll(orgAnn.getFeatures()); +features.put("ruleFinal", "OrgCountryFinal"); +outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "Organization", features); -outputAS.removeAll(org); +outputAS.removeAll(orgSet); } +// note - move this rule to after final +// another note - I have no idea why the original note is there, or even which rule this refers to -//note - move this rule to after final - Rule: OrgFinal Priority: 10 ( @@ -99,21 +101,21 @@ --> { //removes TempOrg annotation, gets the rule feature and adds a new Org annotation -gate.AnnotationSet org = (gate.AnnotationSet)bindings.get("org"); -gate.Annotation orgAnn = (gate.Annotation)org.iterator().next(); +gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org"); +gate.Annotation orgAnn = (gate.Annotation)orgSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("orgType", orgAnn.getFeatures().get("orgType")); -features.put("rule1", orgAnn.getFeatures().get("rule")); -features.put("rule2", "OrgFinal"); -outputAS.add(org.firstNode(), org.lastNode(), "Organization", + +features.putAll(orgAnn.getFeatures()); +features.put("ruleFinal", "OrgFinal"); +outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "Organization", features); -outputAS.removeAll(org); +outputAS.removeAll(orgSet); } Rule: PersonLocFinal Priority: 100 -// George Airport +// George Airport is a Location not a Person // later we might change this to any facility, rather than just airports ( @@ -125,14 +127,15 @@ --> { //removes TempLoc annotation, gets the rule feature and adds a new Loc annotation -gate.AnnotationSet loc = (gate.AnnotationSet)bindings.get("loc"); -gate.Annotation locAnn = (gate.Annotation)loc.iterator().next(); +gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("loc"); +gate.Annotation locAnn = (gate.Annotation)locSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", locAnn.getFeatures().get("rule")); -features.put("rule2", "PersonLocFinal"); -outputAS.add(loc.firstNode(), loc.lastNode(), "Location", +features.putAll(locAnn.getFeatures()); +features.put("ruleFinal", "PersonLocFinal"); +features.put("locType", "airport"); +outputAS.add(locSet.firstNode(), locSet.lastNode(), "Location", features); -outputAS.removeAll(loc); +outputAS.removeAll(locSet); } @@ -146,15 +149,14 @@ --> { //removes TempLoc annotation, gets the rule feature and adds a new Loc annotation -gate.AnnotationSet loc = (gate.AnnotationSet)bindings.get("loc"); -gate.Annotation locAnn = (gate.Annotation)loc.iterator().next(); +gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("loc"); +gate.Annotation locAnn = (gate.Annotation)locSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("locType",locAnn.getFeatures().get("locType")); -features.put("rule1", locAnn.getFeatures().get("rule")); -features.put("rule2", "LocFinal"); -outputAS.add(loc.firstNode(), loc.lastNode(), "Location", +features.putAll(locAnn.getFeatures()); +features.put("ruleFinal", "LocFinal"); +outputAS.add(locSet.firstNode(), locSet.lastNode(), "Location", features); -outputAS.removeAll(loc); +outputAS.removeAll(locSet); } @@ -165,13 +167,15 @@ Rule: DateTimeFinal Priority: 20 // Friday 10 January 2000 2pm +// 2008-01-25T16:10:48 ( {TempDate} ( ({Token.string == ","})? {TempDate})? - ({Token.string == ":"})? + ({Token.string == ":"}| + {Token.string == "T"})? {TempTime} ({TempYear})? ({TempZone})? @@ -180,15 +184,14 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -//features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "DateTimeFinal"); +features.put("ruleFinal", "DateTimeFinal"); features.put("kind", "dateTime"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -203,15 +206,15 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "SeasonYearFinal"); +features.putAll(dateAnn.getFeatures()); +features.put("ruleFinal", "SeasonYearFinal"); features.put("kind", "date"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -229,15 +232,15 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); +features.putAll(dateAnn.getFeatures()); features.put("rule2", "DateYearFinal"); features.put("kind", "date"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -256,15 +259,14 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -//features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "TimeDateFinal"); +features.put("ruleFinal", "TimeDateFinal"); features.put("kind", "dateTime"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -282,15 +284,14 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -//features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "TimeYearFinal"); +features.put("ruleFinal", "TimeYearFinal"); features.put("kind", "dateTime"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -298,7 +299,10 @@ Rule: DateOnlyFinal -Priority: 10 +Priority: 50 +( + {Title} +)? ( {TempDate} ) @@ -306,15 +310,15 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "DateOnlyFinal"); +features.putAll(dateAnn.getFeatures()); +features.put("ruleFinal", "DateOnlyFinal"); features.put("kind", "date"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } //fix this later @@ -333,25 +337,26 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule", "TimeContextFinal"); +features.putAll(dateAnn.getFeatures()); +features.put("ruleFinal", "TimeContextFinal"); features.put("kind", "date"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); + //removes TempTime annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet time = (gate.AnnotationSet)bindings.get("time"); -gate.Annotation timeAnn = (gate.Annotation)time.iterator().next(); +gate.AnnotationSet timeSet = (gate.AnnotationSet)bindings.get("time"); +gate.Annotation timeAnn = (gate.Annotation)timeSet.iterator().next(); gate.FeatureMap features2 = Factory.newFeatureMap(); -features2.put("rule1", timeAnn.getFeatures().get("rule")); -features2.put("rule", "TimeContextFinal"); +features.putAll(timeAnn.getFeatures()); +features2.put("ruleFinal", "TimeContextFinal"); features2.put("kind", "time"); -outputAS.add(time.firstNode(), date.lastNode(), "Date", +outputAS.add(timeSet.firstNode(), timeSet.lastNode(), "Date", features2); -outputAS.removeAll(time); +outputAS.removeAll(timeSet); } @@ -368,15 +373,15 @@ --> { //removes TempTime annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "TimeWordsContextFinal"); +features.putAll(dateAnn.getFeatures()); +features.put("ruleFinal", "TimeWordsContextFinal"); features.put("kind", "time"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -389,15 +394,15 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "YearOnlyFinal"); +features.putAll(dateAnn.getFeatures()); +features.put("ruleFinal", "YearOnlyFinal"); features.put("kind", "date"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -411,15 +416,15 @@ --> { //removes TempDate annotation, gets the rule feature and adds a new Date annotation -gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date"); -gate.Annotation dateAnn = (gate.Annotation)date.iterator().next(); +gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date"); +gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", dateAnn.getFeatures().get("rule")); -features.put("rule2", "TimeOnlyFinal"); +features.putAll(dateAnn.getFeatures()); +features.put("ruleFinal", "TimeOnlyFinal"); features.put("kind", "time"); -outputAS.add(date.firstNode(), date.lastNode(), "Date", +outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date", features); -outputAS.removeAll(date); +outputAS.removeAll(dateSet); } @@ -440,15 +445,15 @@ --> { //removes TempAddress annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); -features.put("rule2", "AddressFull"); +features.putAll(addressAnn.getFeatures()); +features.put("ruleFinal", "AddressFull"); features.put("kind", "complete"); -outputAS.add(address.firstNode(), address.lastNode(), "Address", +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } @@ -461,15 +466,15 @@ --> { //removes Email annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); -features.put("rule2", "EmailFinal"); +features.putAll(addressAnn.getFeatures()); +features.put("ruleFinal", "EmailFinal"); features.put("kind", "email"); -outputAS.add(address.firstNode(), address.lastNode(), "Address", +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } @@ -482,15 +487,15 @@ --> { //removes TempAddress annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); -features.put("rule2", "PhoneFinal"); +features.putAll(addressAnn.getFeatures()); +features.put("ruleFinal", "PhoneFinal"); features.put("kind", "phone"); -outputAS.add(address.firstNode(), address.lastNode(), "Address", +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } @@ -503,15 +508,15 @@ --> { //removes TempAddress annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); -features.put("rule2", "PostcodeFinal"); +features.putAll(addressAnn.getFeatures()); +features.put("ruleFinal", "PostcodeFinal"); features.put("kind", "postcode"); -outputAS.add(address.firstNode(), address.lastNode(), "Address", +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } @@ -524,15 +529,15 @@ --> { //removes TempAddress annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); -features.put("rule2", "IpFinal"); +features.putAll(addressAnn.getFeatures()); +features.put("ruleFinal", "IpFinal"); features.put("kind", "ip"); -outputAS.add(address.firstNode(), address.lastNode(), "Address", +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } @@ -545,15 +550,15 @@ --> { //removes TempAddress annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); +features.putAll(addressAnn.getFeatures()); features.put("rule2", "UrlFinal"); features.put("kind", "url"); -outputAS.add(address.firstNode(), address.lastNode(), "Address", +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } @@ -567,14 +572,14 @@ --> { //removes TempAddress annotation, gets the rule feature and adds a new Address annotation -gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address"); -gate.Annotation addressAnn = (gate.Annotation)address.iterator().next(); +gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address"); +gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", addressAnn.getFeatures().get("rule")); -features.put("rule2", "StreetFinal"); -outputAS.add(address.firstNode(), address.lastNode(), "Location", +features.putAll(addressAnn.getFeatures()); +features.put("ruleFinal", "StreetFinal"); +outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Location", features); -outputAS.removeAll(address); +outputAS.removeAll(addressSet); } //////////////////////////////////////////////////////////// @@ -590,14 +595,14 @@ --> { //removes TempIdent annotation, gets the rule feature and adds a new Identifier annotation -gate.AnnotationSet ident = (gate.AnnotationSet)bindings.get("ident"); -gate.Annotation identAnn = (gate.Annotation)ident.iterator().next(); +gate.AnnotationSet identSet = (gate.AnnotationSet)bindings.get("ident"); +gate.Annotation identAnn = (gate.Annotation)identSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule1", identAnn.getFeatures().get("rule")); -features.put("rule2", "IdentifierFinal"); -outputAS.add(ident.firstNode(), ident.lastNode(), "Identifier", +features.putAll(identAnn.getFeatures()); +features.put("ruleFinal", "IdentifierFinal"); +outputAS.add(identSet.firstNode(), identSet.lastNode(), "Identifier", features); -outputAS.removeAll(ident); +outputAS.removeAll(identSet); } @@ -613,38 +618,10 @@ --> { //removes TempSpecs annotation -gate.AnnotationSet spec = (gate.AnnotationSet)bindings.get("spec"); +gate.AnnotationSet specSet = (gate.AnnotationSet)bindings.get("spec"); //gate.FeatureMap features = Factory.newFeatureMap(); -outputAS.removeAll(spec); +outputAS.removeAll(specSet); } ////////////////////////////////////////////////////// -Rule: UnknownPerson -Priority: 5 -( - {Token.category == NNP} - (({Token.string == "-"})? - {Token.category == NNP})? - ( {Token.category == NNP})? - ( {Token.category == NNP})? -):unknown - -( - {TempPerson} -):person ---> -:unknown.Unknown = {kind = "PN", rule = UnknownTempPerson}, -{ -//removes TempPerson annotation, gets the rule feature and adds a new Person annotation -gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); -gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); -gate.FeatureMap features = Factory.newFeatureMap(); -features.put("gender", personAnn.getFeatures().get("gender")); -features.put("rule1", personAnn.getFeatures().get("rule")); -features.put("rule2", "UnknownPerson"); -outputAS.add(person.firstNode(), person.lastNode(), "Person", -features); -outputAS.removeAll(person); -} - Modified: gate/trunk/plugins/Lang_French/grammar/first.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/first.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/first.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -14,11 +14,10 @@ */ Phase: First -Input: Token Lookup +Input: Token NumberLetter Options: control = appelt // this has to be run first of all -// contains any macros etc needed only for standard grammars ////////////////////////////////////////////////////////////// Macro: SPACE @@ -40,21 +39,59 @@ ) + + /////////////////////////////////////////////////////////////// -Rule: Silly -// we have to have a rule here, so we'll just have something silly +Rule: ClosedClass +// closed class words should not be part of names generally, so let's identify them +Priority: 100 ( - {Token.string == "afguahughaegarth"} -) -:silly + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB}| + {Token.category == IN} +):tag --> - {} +:tag.ClosedClass = {rule = "ClosedClass"} +Rule: NumberLetter +Priority: 100 +( + {NumberLetter} +):tag +--> +{} +Rule: UpperAllCaps +Priority: 100 +// separate proper nouns that are in all caps, as they're more ambiguous +( + {Token.category == NNP, Token.orth == allCaps} + ({Token.string == "-"} + {Token.category == NNP, Token.orth == allCaps} + )? +):tag +--> +:tag.Upper = {kind = "allCaps", rule = "Upper"} +Rule: Upper +// define what can be a possible proper noun - cater for the fact that POS tag might not be correct +( + ({Token.category == NNP}| + {Token.orth == upperInitial}| + {Token.orth == mixedCaps} + ) + ({Token.string == "-"} + {Token.category == NNP} + )? +):tag +--> +:tag.Upper = {rule = "Upper"} + + Modified: gate/trunk/plugins/Lang_French/grammar/firstname.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/firstname.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/firstname.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -14,14 +14,87 @@ */ Phase: FirstName -Input: Token Lookup +Input: Token Lookup ClosedClass NumberLetter UserID Options: control = appelt + +Rule: FirstNameTwitterName +Priority: 500 +// @fred + +( + {Lookup.majorType == person_first, UserID, Lookup.kind !=ambig} +):person +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); + +//find out if the gender is unambiguous +String gender = (String)personAnn.getFeatures().get("minorType"); +boolean ambig = false; +gate.FeatureMap constraints = Factory.newFeatureMap(); +constraints.put("majorType", "person_first"); +Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator(); +while(!ambig && lookupsIter.hasNext()){ + gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next(); + //we're only interested in annots of the same length + if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){ + ambig = !gender.equals(anAnnot.getFeatures().get("minorType")); + } +} +if(!ambig) features.put("gender", gender); + +features.put("rule", "FirstNameTwitterName"); +features.put("twittername", "yes"); +outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson", +features); +} + + +Rule: FirstNameTwitterNameAmbig +Priority: 600 +// @mark + +( + {Lookup.majorType == person_first, UserID, Lookup.kind ==ambig} +):person +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person").get("Lookup"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); + +//find out if the gender is unambiguous +String gender = (String)personAnn.getFeatures().get("minorType"); +boolean ambig = false; +gate.FeatureMap constraints = Factory.newFeatureMap(); +constraints.put("majorType", "person_first"); +Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator(); +while(!ambig && lookupsIter.hasNext()){ + gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next(); + //we're only interested in annots of the same length + if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){ + ambig = !gender.equals(anAnnot.getFeatures().get("minorType")); + } +} +if(!ambig) features.put("gender", gender); + +features.put("rule", "FirstNameTwitterNameAmbig"); +features.put("twittername", "yes"); +features.put("kind", "ambig"); +outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson", +features); +} + + Rule: FirstName // Fred + ( - {Lookup.majorType == person_first} + {Lookup.majorType == person_first, !ClosedClass} ):person --> { @@ -45,18 +118,61 @@ if(!ambig) features.put("gender", gender); features.put("rule", "FirstName"); +features.put("twittername", "no"); outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson", features); } + +Rule: FirstNameAmbig +Priority: 300 +/* prefer this rule if the firstname has an ambiguous feature in the gazetteer, e.g. "Christian" + In this case, we won't use it in the main name-finding grammar if we find it on its own, + only as part of a longer name +*/ + +( + {Lookup.majorType == person_first, Lookup.kind == ambig} +):person +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); + +//find out if the gender is unambiguous +String gender = (String)personAnn.getFeatures().get("minorType"); +boolean ambig = false; +gate.FeatureMap constraints = Factory.newFeatureMap(); +constraints.put("majorType", "person_first"); +Iterator lookupsIter = inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", constraints).iterator(); +while(!ambig && lookupsIter.hasNext()){ + gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next(); + //we're only interested in annots of the same length + if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){ + ambig = !gender.equals(anAnnot.getFeatures().get("minorType")); + } +} +if(!ambig) features.put("gender", gender); + +features.put("rule", "FirstNameAmbig"); +features.put("kind", "ambig"); +features.put("twittername", "no"); +outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson", +features); +} + + Rule: TitleGender Priority: 50 // Mr ( - {Lookup.majorType == title, Lookup.minorType == male}| - {Lookup.majorType == title, Lookup.minorType == female} -):person + ({Lookup.majorType == title, Lookup.minorType == male}| + {Lookup.majorType == title, Lookup.minorType == female}) + ({Token.string == "."})? +) +:person --> { gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); @@ -73,6 +189,7 @@ ( {Lookup.majorType == title} + ({Token.string == "."})? ):person --> :person.Title = {rule = "Title"} @@ -80,6 +197,29 @@ +Rule: Initials1 +// A.B. +// A. +// A +( + ({Token.orth == upperInitial, Token.length =="1", !ClosedClass, !NumberLetter} + ({Token.string == "."})? + )+ +):tag +--> +:tag.Initials = {rule = "Initials1"} +Rule: Initials2 +// AB +// ABC + +( + {Token.orth == allCaps, Token.length == "2", !Lookup, !ClosedClass, !NumberLetter} | + {Token.orth == allCaps, Token.length == "3", !Lookup, !ClosedClass, !NumberLetter} +):tag +--> +:tag.Initials = {kind = "nopunct", rule = "Initials2"} + + Added: gate/trunk/plugins/Lang_French/grammar/hyphens.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/hyphens.jape (rev 0) +++ gate/trunk/plugins/Lang_French/grammar/hyphens.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -0,0 +1,25 @@ +Phase: Hyphens +Input: Token SpaceToken +Options: control = appelt + +/* A phase to deal with weird problems in hyphenated words +*/ + +Rule: UpperHyphenated +// two NNPs separated by no white space should also be an Upper. +// This happens when they're hyphenated and the hyphen is part of the first NNP + +( + ({Token.category == NNP}| + {Token.orth == upperInitial}| + {Token.orth == mixedCaps} + ) + ( + {Token.category == NNP}| + {Token.orth == upperInitial}| + {Token.orth == mixedCaps} + ) +):tag +--> +:tag.Upper = {rule = "UpperHyphenated"} + Modified: gate/trunk/plugins/Lang_French/grammar/loc_context.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/loc_context.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/loc_context.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -14,56 +14,59 @@ */ Phase: Loc_Context -Input: Unknown Token Location +Input: Unknown Token Location Lookup Options: control = appelt -Rule: LocConjLoc1 -Priority: 10 +//Rule: LocConjLoc1 +//Priority: 10 +// Unknown and Location -( -{Unknown.kind == PN} -):loc -( -{Token.category == CC} -({Token.category == DT} -)? -{Location} -) ---> -{ -gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc"); -gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule ", "LocConjLoc1"); -outputAS.add(loc.firstNode(), loc.lastNode(), "Location", -features); -outputAS.removeAll(loc); -} +//( +//{Unknown.kind == PN} +//):loc +//( +//{Token.category == CC} +//({Token.category == DT} +//)? +//{Location} +//) +//--> +//{ +//gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc"); +//gate.FeatureMap features = Factory.newFeatureMap(); +//features.put("rule ", "LocConjLoc1"); +//outputAS.add(loc.firstNode(), loc.lastNode(), "Location", +//features); +//outputAS.removeAll(loc); +//} -Rule: LocConjLoc2 -Priority: 10 +//Rule: LocConjLoc2 +//Priority: 10 -( - {Location} - {Token.category == CC} - ({Token.category == DT} - )? -) -( - {Unknown.kind == PN} -):loc ---> - { -gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc"); -gate.FeatureMap features = Factory.newFeatureMap(); -features.put("rule ", "LocConjLoc2"); -outputAS.add(loc.firstNode(), loc.lastNode(), "Location", -features); -outputAS.removeAll(loc); -} +// Location and Unknown +//( +// {Location} +// {Token.category == CC} + //({Token.category == DT} + //)? +//) +//( +// {Unknown.kind == PN} +//):loc +//--> +// { +//gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc"); +//gate.FeatureMap features = Factory.newFeatureMap(); +//features.put("rule ", "LocConjLoc2"); +//outputAS.add(loc.firstNode(), loc.lastNode(), "Location", +//features); +//outputAS.removeAll(loc); +//} + Rule: UnknownLocRegion Priority: 50 ( @@ -76,10 +79,37 @@ ):loc ( {Token.string == ","} - {Location.kind == region} + {Location.locType == region} ) --> - :loc.Location = {rule = "UnknownLocRegion"} + :loc.Location = {rule = "UnknownLocRegion"}, + { +gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc"); +outputAS.removeAll(loc); +} +Rule: LocState +Priority: 100 +( + {Location.locType == city} + ({Token.string == ","})? +) +( + {Lookup.majorType == state} +):tag +--> +:tag.Location = {locType = region, rule = "LocState"} +Rule: UnknownLocKey +Priority: 20 +( + ({Unknown}):tag + {Lookup.majorType == loc_general_key} +) +--> +:tag.Location = {locType = unknown, rule = "UnknownLocKey"}, +{ +gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("tag"); +outputAS.removeAll(loc); +} Added: gate/trunk/plugins/Lang_French/grammar/main-twitter.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/main-twitter.jape (rev 0) +++ gate/trunk/plugins/Lang_French/grammar/main-twitter.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -0,0 +1,38 @@ +/* +* main.jape +* +* Copyright (c) 1998-2004, The University of Sheffield. +* +* This file is part of GATE (see http://gate.ac.uk/), and is free +* software, licenced under the GNU Library General Public License, +* Version 2, June 1991 (in the distribution as file licence.html, +* and also available at http://gate.ac.uk/gate/licence.html). +* +* Diana Maynard, 02 Aug 2001 +* +* $Id: main.jape 9233 2007-11-23 13:01:52Z dgmaynard $ +*/ + +MultiPhase: TestTheGrammars +Phases: +first +firstname +name-twitter +name_post +date_pre +date +reldate +number +number_clean +address +url_pre +url +email +identifier +jobtitle +final +unknown +name_context +org_context +loc_context +//clean Modified: gate/trunk/plugins/Lang_French/grammar/main.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/main.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/main.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -14,8 +14,10 @@ */ MultiPhase: TestTheGrammars -Phases: +Phases: +numberletter first +hyphens firstname name name_post @@ -23,6 +25,7 @@ date reldate number +number_clean address url_pre url @@ -34,4 +37,4 @@ name_context org_context loc_context -clean \ No newline at end of file +clean Added: gate/trunk/plugins/Lang_French/grammar/name-twitter.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/name-twitter.jape (rev 0) +++ gate/trunk/plugins/Lang_French/grammar/name-twitter.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -0,0 +1,1726 @@ +/* +* name.jape +* +* Copyright (c) 1998-2004, The University of Sheffield. +* +* This file is part of GATE (see http://gate.ac.uk/), and is free +* software, licenced under the GNU Library General Public License, +* Version 2, June 1991 (in the distribution as file licence.html, +* and also available at http://gate.ac.uk/gate/licence.html). +* +* Diana Maynard, 10 Sep 2001 +* +* $Id: name.jape 18116 2014-06-23 11:35:16Z dgmaynard $ +*/ + + +Phase: Name +Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID URL +Options: control = appelt debug = false + +/////////////////////////////////////////////////////////////// + +// Person Rules + +///////////////////////////////////////////////////////////////// +Macro: TITLE +( + {Title} + ({Token.string == "."})? +) + + + + +Macro: FIRSTNAME + + ({FirstPerson.gender == male, FirstPerson.kind != ambig, FirstPerson.twittername == no} | + {FirstPerson.gender == female, FirstPerson.kind != ambig, FirstPerson.twittername == no}) + + + +Macro: FIRSTNAMEAMBIG +( + {FirstPerson.kind == ambig, FirstPerson.twittername == no} +) + +Macro: FIRSTNAMETWITTER + +( + {FirstPerson.twittername == yes} +) + + +Macro: PERSONENDING +( + ({Token.string == ","})? + {Lookup.majorType == person_ending} +) + +Macro: PREFIX +( + ({Lookup.majorType == surname, Lookup.minorType == prefix} + )| + (({Token.string == "O"}|{Token.string == "D"}) + {Token.string == "'"} + ) +) + + + + +/////////////////////////////////////////////////////////// + + +// Person Rules + +Rule: Pronoun +Priority: 1000 + +( + {Token.category == PP}| + {Token.category == PRP}| + {Token.category == RB} +):pro +--> +{} + + +Rule:Reject +Priority: 1000 +// stops certain things being recognised as People +( + ({ClosedClass}|{URL})[1,5] +) +--> +{} + +Rule: GazPerson +Priority: 50 +( + {Lookup.majorType == person_full} +) +:person --> +{ +gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); + +// find the Token annotations +AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS, personSet, "Token"); +// put them in order +List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet); + +if (tokenList.size() == 1) { + // if there's only one Token, guess it's a surname + + String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0)); + features.put("surname", surnameContent); + } + +else if (tokenList.size() > 0) { + // the string under the first Token + String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0)); + features.put("firstName", firstNameContent); + + + // the string under the remaining Tokens if any + if (tokenList.size() > 1) { + Long lastNameStart = gate.Utils.start(tokenList.get(1)); + Long lastNameEnd = gate.Utils.end(tokenList.get(tokenList.size() - 1)); + String surnameContent = gate.Utils.stringFor(doc, lastNameStart, lastNameEnd); + features.put("surname", surnameContent); + } +} + +features.put("kind", "fullName"); +features.put("rule", "GazPerson"); +features.put("gender", personAnn.getFeatures().get("gender")); + +// this method doesn't require try-catch +gate.Utils.addAnn(outputAS, personSet, "TempPerson", features); +} + +Rule: GazPersonFirstTwitter +Priority: 300 +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? +( + {FirstPerson.twittername == yes, FirstPerson.kind != ambig} +):person +( + {Token.orth == upperInitial, Token.length == "1"} +)? +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); +features.put("gender", personAnn.getFeatures().get("gender")); +features.put("kind", "firstName"); +features.put("rule", "GazPersonFirstTwitter"); + +// get the string of the first name +String contentFirstName = gate.Utils.stringFor(doc, personAnn); +features.put("firstName", contentFirstName); +features.put("twittername", "yes"); + +outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", +features); +} + + + + +Rule: GazPersonFirst +Priority: 200 +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? +( + {FirstPerson.kind != ambig} +):person +( + {Token.orth == upperInitial, Token.length == "1"} +)? +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); +features.put("gender", personAnn.getFeatures().get("gender")); +features.put("kind", "firstName"); +features.put("rule", "GazPersonFirst"); + +// get the string of the first name +String contentFirstName = gate.Utils.stringFor(doc, personAnn); +features.put("firstName", contentFirstName); + +outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", +features); +} + + + +Rule: PersonFirstContext +Priority: 30 +// Anne and Kenton + +(FIRSTNAME):person1 +( + {Token.string == "and"} +) +({Token.orth == upperInitial, Token.length != "1"}) +:person2 + --> +{ +//first deal with person1 + gate.FeatureMap features1 = Factory.newFeatureMap(); + gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1"); + gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next(); + + String contentFirstName = gate.Utils.stringFor(doc, personAnn); + features1.put("firstName", contentFirstName); + features1.put("gender", personAnn.getFeatures().get("gender")); + features1.put("kind", "firstName"); + features1.put("rule", "PersonFirstContext"); +outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson", +features1); + +//now deal with person2 +gate.FeatureMap features2 = Factory.newFeatureMap(); +gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2"); +gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next(); + + String content2FirstName = gate.Utils.stringFor(doc, person2Ann); + features2.put("firstName", content2FirstName); + features2.put("kind", "firstName"); + features2.put("rule", "PersonFirstContext"); +outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson", +features2); +} + + +Rule: PersonTitle +Priority: 35 +// Mr. Jones +// Mr Fred Jones +// note we only allow one first and surname, +// but we add more in a final phase if we find adjacent unknowns + +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? +( + ({Title.rule == "TitleGender"}):title + ({Title})? + ( + (FIRSTNAME | FIRSTNAMEAMBIG )? + ):firstName + ( + (PREFIX)* + ({Upper}) + (PERSONENDING)? + ):surname +):person +--> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", titleAnn.getFeatures().get("gender")); + + if (firstNameSet != null && firstNameSet.size()>0) + { + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + } + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "personName"); + features.put("rule", "PersonTitle"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + + + +Rule: PersonTitleInitials +Priority: 35 + +// Mr J. Jones + + +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? +( + ({Title.rule == "TitleGender"}):title + ({Title})? + ( + ({Initials})? + ):initials + ( + (PREFIX)* + ({Upper, !Initials}) + (PERSONENDING)? + ):surname +):person +--> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials"); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", titleAnn.getFeatures().get("gender")); + + if (initialsSet != null && initialsSet.size()>0) + { + List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet); + + Long initialsStart = gate.Utils.start(initialsList.get(0)); + Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() - 1)); + String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, initialsEnd); + features.put("initials", initialsContent); + } + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "personName"); + features.put("rule", "PersonTitleInitials"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + +Rule: TitleFirstName +Priority: 55 +// use this rule when we know what gender the title indicates +// Mr Fred + +( + ({Title.gender == male} | {Title.gender == female}):title + (FIRSTNAME | FIRSTNAMEAMBIG ):firstname + +) +:person --> + +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", titleAnn.getFeatures().get("gender")); + + if (firstNameSet != null && firstNameSet.size()>0) + { + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + } + + features.put("kind", "personName"); + + features.put("rule", "TitleFirstName"); + outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + + +Rule: PersonJobTitle +Priority: 20 +// note we include titles but not jobtitles in markup + +( + {Lookup.majorType == jobtitle} +):jobtitle +( + (TITLE)? + ((FIRSTNAME | FIRSTNAMEAMBIG ) + ) + (PREFIX)* + ({Upper,!Initials}) + (PERSONENDING)? +) +:person +--> + :person.TempPerson = {kind = "fullName", rule = "PersonJobTitle"}, + :jobtitle.JobTitle = {rule = "PersonJobTitle"} + + + + +Rule: NotFirstPersonStop +Priority: 70 +// ambig first name and surname is stop word +// e.g. Will And + +( + ((FIRSTNAMEAMBIG)+ | + {Token.category == PRP}| + {Token.category == DT} + ) + ({Lookup.majorType == stop} + ) +) +:person --> + {} + + +Rule: FirstPersonStop +Priority: 50 +// John And + +(FIRSTNAME):person +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB}| + {Token.category == IN} +) +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); +features.put("gender", personAnn.getFeatures().get("gender")); +features.put("kind", "firstName"); +features.put("rule", "FirstPersonStop"); +outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", +features); +} + + + + + +Rule: NotPersonFull +Priority: 50 +// do not allow Det + Surname +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +) +( + (PREFIX)* + ({Upper}) + (PERSONENDING)? +):foo +--> +{} + + + +Rule: LocPersonAmbig1 +Priority: 50 +// Location + Possible Surname --> Location only (ignore Surname) + +( + {Lookup.majorType == location} +):loc +( + (PREFIX)* + ({Upper,!Initials}) + (PERSONENDING) +):foo +--> +:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig1} + + +Rule: LocPersonAmbig2 +Priority: 50 +// Location + + Possible Surname --> Location only (ignore Surname) + +( + {Lookup.majorType == location} +):loc +( + (PREFIX) + ({Upper,!Initials}) + (PERSONENDING)? +):foo +--> +:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig2} + + +Rule: LocPersonAmbig3 +Priority: 100 +// Ambiguous Location/Person + Possible Surname --> Person + +( + {Lookup.majorType == location, Lookup.ambig == yes, FirstPerson} + (PREFIX) + ({Upper,!Initials}) + (PERSONENDING)? +):person +--> +{ +gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.FeatureMap features = Factory.newFeatureMap(); +features.put("gender", personAnn.getFeatures().get("gender")); +features.put("kind", "firstName"); +features.put("rule", "LocPersonAmbig3"); +outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", +features); +} + + +Rule: PersonFullInitialsCaps +Priority: 100 +// TO FISH +// If the initials is of type nopunct, we want to discard the Person if the surname is also in all caps, as it's too ambiguous + +( + {Token.category == DT} +)? +( + + ({Initials.kind == nopunct}) + ((FIRSTNAME | FIRSTNAMEAMBIG )?) + ((PREFIX)* + ({Upper.kind == allCaps}) + (PERSONENDING)? + ) +):person --> +:person.Discard = {rule = "PersonFullInitialsCaps"} + + +Rule: PersonFull +Priority: 10 +// F.W. Jones +// Fred Jones + +( + + (FIRSTNAME | FIRSTNAMEAMBIG ):firstName + ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName + ((PREFIX)* + ({Upper,!Initials}) + (PERSONENDING)? + ):surname +):person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + features.put("gender", firstNameAnn.getFeatures().get("gender")); + + gate.AnnotationSet middleNameSet = (gate.AnnotationSet)bindings.get("middleName"); + + if (middleNameSet != null && middleNameSet.size()>0) +{ + gate.Annotation middleNameAnn = (gate.Annotation)middleNameSet.iterator().next(); + String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn); + features.put("middleName", middleNameContent); +} + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "fullName"); + features.put("rule", "PersonFull"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); + +} + + + +Rule: PersonFullInitials +Priority: 10 +// F.W. Jones + +( + {Token.category == DT} +)? +( + + ({Initials, !Lookup}):initials + ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName + ((PREFIX)* + ({Upper,!Initials}) + (PERSONENDING)? + ):surname +):person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); + + gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials"); + List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet); + + Long initialsStart = gate.Utils.start(initialsList.get(0)); + Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() - 1)); + String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, initialsEnd); + features.put("initials", initialsContent); + + + gate.AnnotationSet middleNameSet = (gate.AnnotationSet)bindings.get("middleName"); + + if (middleNameSet != null && middleNameSet.size()>0) +{ + gate.Annotation middleNameAnn = (gate.Annotation)middleNameSet.iterator().next(); + String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn); + features.put("middleName", middleNameContent); + features.put("gender", middleNameAnn.getFeatures().get("gender")); +} + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "fullName"); + features.put("rule", "PersonFullInitials"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); + +} + + + + + +Rule: PersonFullDoubleBarrelled +Priority: 9 +// F.W. Smith Jones +// Fred Smith Jones + + +( + + (FIRSTNAME | FIRSTNAMEAMBIG ):firstName + ((PREFIX)* + ({Upper,!Initials}) + ({Upper,!Initials}) + (PERSONENDING)? + ):surname +):person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + features.put("gender", firstNameAnn.getFeatures().get("gender")); + + gate.AnnotationSet middleNameSet = (gate.AnnotationSet)bindings.get("middleName"); + + if (middleNameSet != null && middleNameSet.size()>0) +{ + gate.Annotation middleNameAnn = (gate.Annotation)middleNameSet.iterator().next(); + String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn); + features.put("middleName", middleNameContent); +} + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "fullName"); + features.put("rule", "PersonFullDoubleBarrelled"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); + +} + + + +Rule: PersonMiddleInitial +Priority: 10 +// Fred C. Jones + + +( + + (FIRSTNAME | FIRSTNAMEAMBIG ):firstName + ({Initials}):initials + ((PREFIX)* + ({Upper,!Initials}) + (PERSONENDING)? + ):surname +):person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + features.put("gender", firstNameAnn.getFeatures().get("gender")); + + gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials"); + + if (initialsSet != null && initialsSet.size()>0) +{ + gate.Annotation initialsAnn = (gate.Annotation)initialsSet.iterator().next(); + String initialsContent = gate.Utils.stringFor(doc, initialsAnn); + features.put("initials", initialsContent); +} + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "fullName"); + features.put("rule", "PersonMiddleInitial"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); + +} + + + +Rule: PersonFullStop +Priority: 50 +// G.Wilson Fri + +( + ((FIRSTNAME | FIRSTNAMEAMBIG) ) + (PREFIX)* + ({Upper}) +):person +( + {Lookup.majorType == date} +) +--> + :person.TempPerson = {kind = "fullName", rule = "PersonFullStop"} + + +Rule: NotPersonFullReverse +Priority: 20 +// XYZ, I +( + ({Upper}) + {Token.string == ","} + {Token.category == PRP} + (PERSONENDING)? +) +:unknown +--> +{} + + +Rule: PersonSaint +Priority: 50 +// Note: ensure that it's not a Saints Day first +( + ({Token.string == "St"} ({Token.string == "."})? | + {Token.string == "Saint"}) + (FIRSTNAME) + ) +:person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); +gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); +gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); +if (firstPerson != null && firstPerson.size()>0) +{ + gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); + features.put("gender", personAnn.getFeatures().get("gender")); +} + features.put("kind", "firstName"); + features.put("rule", "PersonSaint"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + +Rule: PersonLocAmbig +Priority: 40 +// Ken London +// Susan Hampshire + +// Christian name + Location --> Person's Name +( + (FIRSTNAME):firstName + ({Lookup.majorType == location}):surname +):person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + features.put("gender", firstNameAnn.getFeatures().get("minorType")); + + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "fullName"); + features.put("rule", "PersonLocAmbig"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + +Rule: TitlePersonLocAmbig +Priority: 50 +// Professor London +// title + Location --> Person's Name + +( + ({Title}):title + ({Lookup.majorType == location}):surname +):person --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + features.put("gender", titleAnn.getFeatures().get("gender")); + + String titleContent = gate.Utils.stringFor(doc, titleAnn); + features.put("title", titleContent); + + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + + features.put("kind", "fullName"); + features.put("rule", "TitlePersonLocAmbig"); +outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", +features); +} + + + +Rule:PersonOrgAmbig +Priority: 50 +// if the last name is an organisation ending, treat as an organisation not person +// e.g. A.B. Consulting + +( + {Token.category == DT} +)? +( + ((FIRSTNAME | FIRSTNAMEAMBIG) )+ + ({Lookup.majorType == org_key}| + {Lookup.majorType == org_base} + ) +) +:orgName --> + :orgName.TempOrganization = {kind = "unknown", rule = "PersonOrgAmbig"} + + + +/////////////////////////////////////////////////////////////////// +// Organisation Rules + +Macro: CDG +// cdg is something like "Ltd." + ( + ({Lookup.majorType == cdg})| + ({Token.string == ","} + {Lookup.majorType == cdg}) + ) + + +Macro: SAINT +( + ({Token.string == "St"} ({Token.string == "."})? | + {Token.string == "Saint"}) +) + +Macro: CHURCH +( +{Token.string == "Church"}|{Token.string == "church"}| +{Token.string == "Cathedral"}|{Token.string == "cathedral"}| +{Token.string == "Chapel"}|{Token.string == "chapel"} +) + +///////////////////////////////////////////////////////////// +Rule: TheGazOrganization +Priority: 245 +( + {Token.category == DT}| + {Token.category == RB} +) +( +{Lookup.majorType == organization} +) +:orgName --> + { + gate.FeatureMap features = Factory.newFeatureMap(); +// create an annotation set consisting of all the annotations for org +gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName"); +// create an annotation set consisting of the annotation matching Lookup +gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup"); +// if the annotation type Lookup doesn't exist, do nothing +if (org != null && org.size()>0) +{ +// if it does exist, take the first element in the set + gate.Annotation orgAnn = (gate.Annotation)org.iterator().next(); +//propagate minorType feature (and value) from org + features.put("orgType", orgAnn.getFeatures().get("minorType")); +} +// create some new features + features.put("rule", "GazOrganization"); +// create a TempOrg annotation and add the features we've created +outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization", +features); +} + + +Rule: GazOrganization +Priority: 145 +( +{Lookup.majorType == organization} +) +:orgName --> + { + gate.FeatureMap features = Factory.newFeatureMap(); +// create an annotation set consisting of all the annotations for org +gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName"); +// create an annotation set consisting of the annotation matching Lookup +gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup"); +// if the annotation type Lookup doesn't exist, do nothing +if (org != null && org.size()>0) +{ +// if it does exist, take the first element in the set + gate.Annotation orgAnn = (gate.Annotation)org.iterator().next(); +//propagate minorType feature (and value) from org + features.put("orgType", orgAnn.getFeatures().get("minorType")); +} +// create some new features + features.put("rule", "GazOrganization"); +// create a TempOrg annotation and add the features we've created +outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization", +features); +} + +Rule: LocOrganization +Priority: 50 +// Ealing Police +( + ({Lookup.majorType == location} | + {Lookup.majorType == country_adj}) +{Lookup.majorType == organization} +({Lookup.majorType == organization})? +) +:orgName --> + :orgName.TempOrganization = {kind = "orgName", rule=LocOrganization} + + +Rule: NewspaperEnding +Priority: 200 +// GSA Today + +( + ({Upper}|{Initials}) + {Lookup.majorType == newspaper_ending} +):orgName +--> +{ + gate.FeatureMap features = Factory.newFeatureMap(); +gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName"); +gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup"); + +if (org != null && org.size()>0) +{ + gate.Annotation orgAnn = (gate.Annotation)org.iterator().next(); + features.put("orgType", orgAnn.getFeatures().get("minorType")); +} +// create some new features + features.put("rule", "NewspaperEndng"); +// create a TempOrg annotation and add the features we've created +outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization", +features); +} + + +Rule: INOrgXandY +Priority: 200 + +// Bradford & Bingley +// Bradford & Bingley Ltd +( + {Token.category == IN} +) + +( + ({Token.category == NNP} + )+ + + {Token.string == "&"} + + ( + {Token.orth == upperInitial} + )+ + + (CDG)? + +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"} + +Rule: OrgXandY +Priority: 20 + +// Bradford & Bingley +// Bradford & Bingley Ltd + + +( + ({Token.category == NNP} + )+ + + {Token.string == "&"} + + ( + {Token.orth == upperInitial} + )+ + + (CDG)? + +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"} + + +Rule:OrgUni +Priority: 25 +// University of Sheffield +// Sheffield University +// A Sheffield University +( + {Token.string == "University"} + {Token.string == "of"} + ( + {Token.category == NNP})+ +) +:orgName --> + :orgName.TempOrganization = {orgType = "other", rule = "OrgDept"} + + + +Rule: OrgDept +Priority: 25 +// Department of Pure Mathematics and Physics + +( + {Token.string == "Department"} + + {Token.string == "of"} + ( + {Token.orth == upperInitial})+ + ( + {Token.string == "and"} + ( + {Token.orth == upperInitial})+ + )? +) +:orgName --> + :orgName.TempOrganization = {orgType = "department", rule = "OrgDept"} + +Rule: TheOrgXKey +Priority: 500 + +// The Aaaa Ltd. +( + {Token.category == DT} +) +( + ({Upper}) + ({Upper})? + ({Upper})? + ({Upper})? + ({Upper})? + {Lookup.majorType == org_key} + ({Lookup.majorType == org_ending})? +) +:org +--> +:org.TempOrganization = {orgType = "unknown", rule = "TheOrgXKey"} + +Rule: NotOrgXKey +Priority: 150 +// if all the names are org_base or org_key, it's not an organisation +// e.g. Business Consulting + +( + ({Lookup.majorType == org_key}| + {Lookup.majorType == org_base} + )+ + ({Lookup.majorType == org_ending})? +) +:org +--> +{} + + + +Rule: NotTheKey +Priority: 200 + +( + {Token.category == DT} + {Lookup.majorType == org_key} + ({Lookup.majorType == org_ending})? +) +:org +--> +{} + + +Rule: OrgXKey +Priority: 125 + +// Aaaa Ltd. +({Token.category == DT})? +( + ({Upper}) + ({Upper})? + ({Upper})? + ({Upper})? + ({Upper})? + {Lookup.majorType == org_key} + ({Lookup.majorType == org_ending})? +) +:org +--> +:org.TempOrganization = {orgType = "unknown", rule = "OrgXKey"} + + +Rule: NotOrgXEnding +Priority: 500 +// Very Limited + +( + {Token.category == DT} +)? +( + {Token.category == RB} + {Lookup.majorType == cdg} +) +:label +--> +{} + + Rule: NotOrgXEnding2 +Priority: 500 + +// The Coca Cola Co. + +( + {Token.category == DT} +) +( + ({Upper}) + ({Upper})? + {Lookup.majorType == cdg} +) +:orgName --> + :orgName.TempOrganization = {orgType = "company", rule = "OrgXEnding"} + + + +Rule: OrgXEnding +Priority: 120 + +// Coca Cola Co. + +( + ({Upper}) + ({Upper})? + {Lookup.majorType == cdg} +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXEnding"} + +Rule: TheOrgXandYKey +Priority: 220 + +( + {Token.category == DT} +) +( + ({Upper}) + ({Upper})? + (({Token.string == "and"} | + {Token.string == "&"}) + ({Upper})? + ({Upper})? + ({Upper})? + ) + {Lookup.majorType == org_key} + ({Lookup.majorType == org_ending})? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"} + + + +Rule: OrgXandYKey +Priority: 120 + +// Aaaa Ltd. +// Xxx Services Ltd. +// AA and BB Services Ltd. +// but NOT A XXX Services Ltd. + +( + ({Upper}) + ({Upper})? + (({Token.string == "and"} | + {Token.string == "&"}) + ({Upper})? + ({Upper})? + ({Upper})? + ) + {Lookup.majorType == org_key} + ({Lookup.majorType == org_ending})? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"} + + +Rule: OrgXsKeyBase +Priority: 120 + +// Gandy's Circus +// Queen's Ware + +( + ({Upper})? + ({Upper})? + ({Token.orth == upperInitial} + {Token.string == "'"} + ({Token.string == "s"})? + ) + ({Lookup.majorType == org_key}| + {Lookup.majorType == org_base}) +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXsKeybase"} + + + +Rule: NotOrgXBase +Priority: 1000 +// not things like British National +// or The University + + +( + ({Token.category == DT} + )? +) +( + ({Lookup.majorType == country_adj}| + {Token.orth == lowercase}) + ({Lookup.majorType == org_base}| + {Lookup.majorType == govern_key}) +) +:orgName --> + :orgName.Temp = {kind = "notorgName", rule = "NotOrgXBase"} + + +Rule: TheOrgXBase +Priority: 230 + +( + ({Token.category == DT} + ) +) +( + ( + ({Upper})| + {Lookup.majorType == organization} + ) + ({Upper})? + ({Upper})? + ({Lookup.majorType == org_base}| + {Lookup.majorType == govern_key} + ) + ( + {Token.string == "of"} + ({Upper}) + ({Upper})? + ({Upper})? + )? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "TheOrgXBase"} + + +Rule: OrgXBase +Priority: 130 + +// same as OrgXKey but uses base instead of key +// includes govern_key e.g. academy +// Barclays Bank +// Royal Academy of Art + +( + ( + ({Upper})| + {Lookup.majorType == organization} + ) + ({Upper})? + ({Upper})? + ({Lookup.majorType == org_base}| + {Lookup.majorType == govern_key} + ) + ( + {Token.string == "of"} + ({Upper}) + ({Upper})? + ({Upper})? + )? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXBase"} + +Rule: TheBaseofOrg +Priority: 230 + +( + {Token.category == DT} +) +( + ({Lookup.majorType == org_base}| + {Lookup.majorType == govern_key} + ) + + {Token.string == "of"} + ( + {Token.category == DT} + )? + ({Upper}) + ({Upper})? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"} + + + + +Rule: BaseofOrg +Priority: 130 + +( + ({Lookup.majorType == org_base}| + {Lookup.majorType == govern_key} + ) + + {Token.string == "of"} + ( + {Token.category == DT} + )? + ({Upper}) + ({Upper})? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"} + + + +Rule: OrgPreX +Priority: 130 + +// Royal Tuscan + +( + {Lookup.majorType == org_pre} + ( + {Token.orth == upperInitial})+ + ({Lookup.majorType == org_ending})? +) +:orgName --> + :orgName.TempOrganization = {orgType = "unknown", rule = "OrgPreX"} + + + +Rule: OrgChurch +Priority: 150 +// St. Andrew's Church + +( + (SAINT) + {Token.orth == upperInitial} + {Token.string == "'"}({Token.string == "s"})? + (CHURCH) +) +:orgName --> + :orgName.TempOrganization = {orgType = "other", rule = "OrgChurch"} + + +Rule:OrgPersonAmbig +Priority: 130 +// Alexandra Pottery should be org not person +// overrides PersonFull + +( + (TITLE)? + (FIRSTNAME) + {Token.string == "'"}({Token.string == "s"})? + ({Lookup.majorType == org_key}| + {Lookup.majorType == org_base}) + ({Lookup.majorType == org_ending})? +) +:org +--> + :org.TempOrganization= {orgType = "unknown", rule = "OrgPersonAmbig"} + + + +///////////////////////////////////////////////////////////////// +// Location rules + + +Rule: Location1 +Priority: 200 +// Lookup = city, country, province, region, water + +// Western Europe +// South China sea + +( + {Token.category == DT} +)? +( + ({Lookup.majorType == loc_key, Lookup.minorType == pre} + )? + {Lookup.majorType == location} + ( + {Lookup.majorType == loc_key, Lookup.minorType == post})? +) +:locName --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); +// create an annotation set consisting of all the annotations for org +gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName"); +// create an annotation set consisting of the annotation matching Lookup +gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup"); +// if the annotation type Lookup doesn't exist, do nothing +if (loc != null && loc.size()>0) +{ +// if it does exist, take the first element in the set + gate.Annotation locAnn = (gate.Annotation)loc.iterator().next(); +//propagate minorType feature (and value) from loc + features.put("locType", locAnn.getFeatures().get("minorType")); +} +// create some new features + features.put("rule", "Location1"); +// create a TempLoc annotation and add the features we've created +outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation", +features); +} + +Rule: GazLocation +Priority: 200 +( + {Token.category == DT} +)? +( + {Lookup.majorType == location} +) +:locName + --> +{ + gate.FeatureMap features = Factory.newFeatureMap(); +// create an annotation set consisting of all the annotations for org +gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName"); +// create an annotation set consisting of the annotation matching Lookup +gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup"); +// if the annotation type Lookup doesn't exist, do nothing +if (loc != null && loc.size()>0) +{ +// if it does exist, take the first element in the set + gate.Annotation locAnn = (gate.Annotation)loc.iterator().next(); +//propagate minorType feature (and value) from loc + features.put("locType", locAnn.getFeatures().get("minorType")); +} +// create some new features + features.put("rule", "GazLocation"); +// create a TempLoc annotation and add the features we've created +outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation", +features); +} + +Rule: GazLocationLocation +Priority: 100 + +( + ({Lookup.majorType == location}):locName1 + {Token.string == ","} + ({Lookup.majorType == location}):locName2 +) +--> + +{ + gate.FeatureMap features = Factory.newFeatureMap(); + gate.FeatureMap morefeatures = Factory.newFeatureMap(); +gate.AnnotationSet loc1Set = (gate.AnnotationSet)bindings.get("locName1"); +gate.AnnotationSet loc1 = (gate.AnnotationSet)loc1Set.get("Lookup"); + +gate.AnnotationSet loc2Set = (gate.AnnotationSet)bindings.get("locName2"); +gate.AnnotationSet loc2 = (gate.AnnotationSet)loc2Set.get("Lookup"); + +// if the annotation type Lookup doesn't exist, do nothing +if (loc1 != null && loc1.size()>0) +{ + gate.Annotation loc1Ann = (gate.Annotation)loc1.iterator().next(); + features.put("locType", loc1Ann.getFeatures().get("minorType")); +} + +if (loc2 != null && loc2.size()>0) +{ + gate.Annotation loc2Ann = (gate.Annotation)loc2.iterator().next(); + morefeatures.put("locType", loc2Ann.getFeatures().get("minorType")); +} + +features.put("rule", "GazLocation"); +outputAS.add(loc1Set.firstNode(), loc1Set.lastNode(), "TempLocation", features); + +morefeatures.put("rule", "GazLocation"); +outputAS.add(loc2Set.firstNode(), loc2Set.lastNode(), "TempLocation", morefeatures); +} + + + + + +Rule: LocationPost +Priority: 50 +( + {Token.category == DT} +)? +( + {Token.category == NNP} + {Lookup.majorType == loc_key, Lookup.minorType == post} +) +:locName +--> + :locName.TempLocation = {kind = "locName", rule = LocationPost} + +Rule:LocKey +( + {Token.category == DT} +)? +( + ({Lookup.majorType == loc_key, Lookup.minorType == pre} + ) + ({Upper}) + ( + {Lookup.majorType == loc_key, Lookup.minorType == post})? +) +:locName --> +:locName.TempLocation = {kind = "locName", rule = LocKey} +///////////////////////////////////////////////////////////////// + +// Context-based Rules + + +Rule:InLoc1 +( + {Token.string == "in"} +) +( + {Lookup.majorType == location} +) +:locName +--> + :locName.TempLocation = {kind = "locName", rule = InLoc1, locType = :locName.Lookup.minorType} + +Rule:LocGeneralKey +Priority: 30 +( + {Lookup.majorType == loc_general_key} + {Token.string == "of"} +) +( + ({Upper}) +) +:loc +--> + :loc.TempLocation = {kind = "locName", rule = LocGeneralKey} + + +Rule:OrgContext1 +Priority: 1 +// company X + +( + {Token.string == "company"} +) +( + ({Upper}) + ({Upper})? + ({Upper})? +) +:org +--> + :org.TempOrganization= {orgType = "company", rule = "OrgContext1"} + +Rule: OrgContext2 +Priority: 5 + +// Telstar laboratory +// Medici offices + +( + ({Upper}) + ({Upper})? + ({Upper})? +) +: org +( + ({Token.string == "offices"} | + {Token.string == "Offices"} | + {Token.string == "laboratory"} | + {Token.string == "Laboratory"} | + {Token.string == "laboratories"} | + {Token.string == "Laboratories"}) +) +--> + :org.TempOrganization= {orgType = "other", rule = "OrgContext2"} + + + +Rule:JoinOrg +Priority: 50 +// Smith joined Energis + +( + ({Token.string == "joined"}| + {Token.string == "joining"}| + {Token.string == "joins"}| + {Token.string == "join"} + ) +) +( + ({Upper}) + ({Upper})? + ({Upper})? +) +:org +--> + :org.TempOrganization= {orgType = "company", rule = "joinOrg"} + + + + + + + + + + + Property changes on: gate/trunk/plugins/Lang_French/grammar/name-twitter.jape ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Modified: gate/trunk/plugins/Lang_French/grammar/name.jape =================================================================== --- gate/trunk/plugins/Lang_French/grammar/name.jape 2016-10-06 12:34:37 UTC (rev 19645) +++ gate/trunk/plugins/Lang_French/grammar/name.jape 2016-10-06 12:35:17 UTC (rev 19646) @@ -15,7 +15,7 @@ Phase: Name -Input: Token Lookup Title FirstPerson TreeTaggerToken +Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID Hashtag Options: control = appelt debug = false /////////////////////////////////////////////////////////////// @@ -28,57 +28,28 @@ {Title} ({Token.string == "."})? ) -Macro: INITIALS -( - ({Token.orth == upperInitial, Token.length =="1"} - ({Token.string == "."})? - )+ -) -Macro: INITIALS2 -( - {Token.orth == allCaps, Token.length == "2"} | - {Token.orth == allCaps, Token.length == "3"} -) Macro: FIRSTNAME -( - ({FirstPerson.gender == male} | - {FirstPerson.gender == female}) - | - (INITIALS) -) + ({FirstPerson.gender == male, FirstPerson.kind != ambig} | + {FirstPerson.gender == female, FirstPerson.kind != ambig}) + + Macro: FIRSTNAMEAMBIG ( - {Lookup.majorType == person_first, Lookup.minorType == ambig} + {FirstPerson.kind == ambig} ) -Macro: UPPERTAG -( - ({TreeTaggerToken.category == NAM} -) - ({Token.string == "-"} - {TreeTaggerToken.category == NAM} - )? -) -Macro: UPPER -( - ({Token.orth == upperInitial} -) - ({Token.string == "-"} - {Token.orth == upperInitial} - )? -) - Macro: PERSONENDING ( + ({Token.string == ","})? {Lookup.majorType == person_ending} ) @@ -101,67 +72,92 @@ Rule: Pronoun Priority: 1000 -//stops personal pronouns being recognised as Initials + ( - {TreeTaggerToken.category == PP}| - {TreeTaggerToken.category == PRP}| - {TreeTaggerToken.category == RB} + {Token.category == PP}| + {Token.category == PRP}| + {Token.category == RB} ):pro --> {} - -Rule: GazPerson -Priority: 50 + +Rule:Reject +Priority: 1000 +// stops certain things being recognised as People ( - {Lookup.majorType == person_full, Lookup.minorType == normal} + {Hashtag}|{UserID}|{ClosedClass} ) -:person --> -{ -gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); -gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); -gate.FeatureMap features = Factory.newFeatureMap(); -features.put("kind", "personName"); -features.put("rule", "GazPerson"); -outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", -features); -} +--> +{} -Rule: TheGazPersonFirst -Priority: 200 + +Rule: GazPerson +Priority: 100 ( - {TreeTaggerToken.category == DT}| - {TreeTaggerToken.category == PRP}| - {TreeTaggerToken.category == RB} -) + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? ( - {FirstPerson} + {Lookup.majorType == person_full} ) -:person -( - {Token.orth == upperInitial, Token.length == "1"} -)? ---> +:person --> { -gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person"); -gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); +gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); +gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); + +// find the Token annotations +AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS, personSet, "Token"); +// put them in order +List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet); + +if (tokenList.size() == 1) { + // if there's only one Token, guess it's a surname + + String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0)); + features.put("surname", surnameContent); + } + +else if (tokenList.size() > 0) { + // the string under the first Token + String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0)); + features.put("firstName", firstNameContent); + + + // the string under the remaining Tokens if any + if (tokenList.size() > 1) { + Long lastNameStart = gate.Utils.start(tokenList.get(1)); + Long lastNameEnd = gate.Utils.end(tokenList.get(tokenList.size() - 1)); + String surnameContent = gate.Utils.stringFor(doc, lastNameStart, lastNameEnd); + features.put("surname", surnameContent); + } +} + +features.put("kind", "fullName"); +features.put("rule", "GazPerson"); features.put("gender", personAnn.getFeatures().get("gender")); -features.put("kind", "personName"); -features.put("rule", "GazPersonFirst"); -outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", -features); -//outputAS.removeAll(person); + +// this method doesn't require try-catch +gate.Utils.addAnn(outputAS, personSet, "TempPerson", features); } + + + Rule: GazPersonFirst -Priority: 70 +Priority: 200 ( - {FirstPerson} -) -:person + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? +( + {FirstPerson.kind != ambig} +):person ( {Token.orth == upperInitial, Token.length == "1"} )? @@ -171,202 +167,270 @@ gate.Annotation personAnn = (gate.Annotation)person.iterator().next(); gate.FeatureMap features = Factory.newFeatureMap(); features.put("gender", personAnn.getFeatures().get("gender")); -features.put("kind", "personName"); +features.put("kind", "firstName"); features.put("rule", "GazPersonFirst"); + +// get the string of the first name +String contentFirstName = gate.Utils.stringFor(doc, personAnn); +features.put("firstName", contentFirstName); + outputAS.add(person.firstNode(), person.lastNode(), "TempPerson", features); -//outputAS.removeAll(person); } - - Rule: PersonFirstContext Priority: 30 // Anne and Kenton +(FIRSTNAME):person1 ( - {FirstPerson} -):person1 -( {Token.string == "and"} ) -({Token.orth == upperInitial}) +({Token.orth == upperInitial, Token.length != "1"}) :person2 --> { //first deal with person1 gate.FeatureMap features1 = Factory.newFeatureMap(); -gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1"); -gate.AnnotationSet firstPerson = (gate.AnnotationSet)person1Set.get("FirstPerson"); -if (firstPerson != null && firstPerson.size()>0) -{ - gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); + gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1"); + gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next(); + + String contentFirstName = gate.Utils.stringFor(doc, personAnn); + features1.put("firstName", contentFirstName); features1.put("gender", personAnn.getFeatures().get("gender")); -} - features1.put("kind", "personName"); + features1.put("kind", "firstName"); features1.put("rule", "PersonFirstContext"); outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson", features1); + //now deal with person2 gate.FeatureMap features2 = Factory.newFeatureMap(); gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2"); - features2.put("kind", "personName"); +gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next(); + + String content2FirstName = gate.Utils.stringFor(doc, person2Ann); + features2.put("firstName", content2FirstName); + features2.put("kind", "firstName"); features2.put("rule", "PersonFirstContext"); outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson", features2); } -Rule: PersonFirstContext2 -Priority: 40 -// Anne and I +Rule: PersonTitle +Priority: 35 +// Mr. Jones +// Mr Fred Jones +// note we only allow one first and surname, +// but we add more in a final phase if we find adjacent unknowns +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? ( - {FirstPerson} -):person -( - {Token.string == "and"} - {Token.length == "1"} -) - --> + ({Title.rule == "TitleGender"}):title + ({Title})? + ( + (FIRSTNAME | FIRSTNAMEAMBIG )? + ):firstName + ( + (PREFIX)* + ({Upper}) + (PERSONENDING)? + ):surname +):person +--> { gate.FeatureMap features = Factory.newFeatureMap(); -gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); -gate.AnnotationSet firstPerson = (gate.AnnotationSet)personSet.get("FirstPerson"); -if (firstPerson != null && firstPerson.size()>0) -{ - gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); - features.put("gender", personAnn.getFeatures().get("gender")); -} + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", titleAnn.getFeatures().get("gender")); + + if (firstNameSet != null && firstNameSet.size()>0) + { + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + } + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + features.put("kind", "personName"); - features.put("rule", "PersonFirstContext2"); + features.put("rule", "PersonTitle"); outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } -Rule: PersonTitle -Priority: 35 -// Mr. Jones -// Mr Fred Jones -// note we only allow one first and surname, -// but we can add more in a final phase if we find adjacent unknowns +Rule: PersonTitleUnknownGender +Priority: 30 +// Prof. Jones +// This person will just get an unknown value for gender. Or we could decide to make them male by default, as they're mostly military etc. ( - {TreeTaggerToken.category == DT}| - {TreeTaggerToken.category == PRP}| - {TreeTaggerToken.category == RB} + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} )? ( - (TITLE)+ - ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2) - )? + ({Title}):title + ({Title})? + ( + (FIRSTNAME | FIRSTNAMEAMBIG )? + ):firstName + ( (PREFIX)* - (UPPER) - (PERSONENDING)? -) -:person --> + ({Upper}) + (PERSONENDING)? + ):surname +):person +--> { gate.FeatureMap features = Factory.newFeatureMap(); -gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); - - // get all Title annotations that have a gender feature - HashSet fNames = new HashSet(); - fNames.add("gender"); - gate.AnnotationSet personTitle = personSet.get("Title", fNames); + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); -// if the gender feature exists - if (personTitle != null && personTitle.size()>0) -{ - //Out.prln("Titles found " + personTitle); - gate.Annotation personAnn = (gate.Annotation)personTitle.iterator().next(); - features.put("gender", personAnn.getFeatures().get("gender")); -} -else -{ - //get all firstPerson annotations that have a gender feature - // HashSet fNames = new HashSet(); - // fNames.add("gender"); - gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames); + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); - if (firstPerson != null && firstPerson.size()>0) + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", "unknown"); + + if (firstNameSet != null && firstNameSet.size()>0) { - //Out.prln("First persons found " + firstPerson); - gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next(); - features.put("gender", personAnn.getFeatures().get("gender")); + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); } -} + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + features.put("kind", "personName"); - features.put("rule", "PersonTitle"); + features.put("rule", "PersonTitleGenderUnknown"); outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } -Rule: PersonFirstTitleGender -Priority: 55 -// use this rule when we know what gender the title indicates -// Mr Fred +Rule: PersonTitleInitials +Priority: 35 +// Mr J. Jones + + +( + {Token.category == DT}| + {Token.category == PRP}| + {Token.category == RB} +)? ( - ({Title.gender == male} | {Title.gender == female}) - ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2) - ) -) -:person --> + ({Title.rule == "TitleGender"}):title + ({Title})? + ( + ({Initials})? + ):initials + ( + (PREFIX)* + ({Upper, !Initials}) + (PERSONENDING)? + ):surname +):person +--> { gate.FeatureMap features = Factory.newFeatureMap(); -gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); -gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title"); -if (title != null && title.size()>0) -{ - gate.Annotation personAnn = (gate.Annotation)title.iterator().next(); - features.put("gender", personAnn.getFeatures().get("gender")); -} + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials"); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname"); + gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", titleAnn.getFeatures().get("gender")); + + if (initialsSet != null && initialsSet.size()>0) + { + List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet); + + Long initialsStart = gate.Utils.start(initialsList.get(0)); + Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() - 1)); + String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, initialsEnd); + features.put("initials", initialsContent); + } + String surnameContent = gate.Utils.stringFor(doc, surnameAnn); + features.put("surname", surnameContent); + features.put("kind", "personName"); - features.put("rule", "PersonFirstTitleGender"); + features.put("rule", "PersonTitleInitials"); outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } -Rule: PersonTitleGender -Priority: 18 -// use this rule if the title has a feature gender -// Miss F Smith +Rule: TitleFirstName +Priority: 55 +// use this rule when we know what gender the title indicates +// Mr Fred + ( - ({Title.gender == male}| - {Title.gender == female} - ) - ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2) - )* - (UPPER) - (PERSONENDING)? + ({Title.gender == male} | {Title.gender == female}):title + (FIRSTNAME | FIRSTNAMEAMBIG ):firstname + ) :person --> + { gate.FeatureMap features = Factory.newFeatureMap(); -gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); -gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title"); -// if the annotation type title doesn't exist, do nothing -if (title != null && title.size()>0) -{ -// if it does exist, take the first element in the set - gate.Annotation personAnn = (gate.Annotation)title.iterator().next(); -//propagate gender feature (and value) from title - features.put("gender", personAnn.getFeatures().get("gender")); -} -// create some new features - features.put("kind", "personName"); - features.put("rule", "PersonTitleGender"); -// create a TempPerson annotation and add the features we've created -outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", + gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person"); + + gate.AnnotationSet firstNameSet = (gate.AnnotationSet)bindings.get("firstName"); + + gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title"); + gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next(); + + String contentTitle = gate.Utils.stringFor(doc, titleAnn); + features.put("title", contentTitle); + features.put("gender", titleAnn.getFeatures().get("gender")); + + if (firstNameSet != null && firstNameSet.size()>0) + { + gate.Annotation firstNameAnn = (gate.Annotation)firstNameSet.iterator().next(); + String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn); + features.put("firstName", firstNameContent); + } + + features.put("kind", "personName"); + + features.put("rule", "TitleFirstName"); + outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson", features); } + Rule: PersonJobTitle Priority: 20 // note we include titles but not jobtitles in markup @@ Diff output truncated at 100000 characters. @@ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs