This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 122f6f4a Improve name matching
122f6f4a is described below
commit 122f6f4a8ad5374c3848a5ae942b16ab87768808
Author: Sebb <[email protected]>
AuthorDate: Fri Feb 24 20:34:04 2023 +0000
Improve name matching
Also only process names once
---
www/members/board-nominations.cgi | 36 ++++++++++++++++++++----------------
www/members/nominations.cgi | 27 ++++++++++++++-------------
2 files changed, 34 insertions(+), 29 deletions(-)
diff --git a/www/members/board-nominations.cgi
b/www/members/board-nominations.cgi
index cf297268..7c932829 100755
--- a/www/members/board-nominations.cgi
+++ b/www/members/board-nominations.cgi
@@ -18,7 +18,7 @@ ROSTER = '/roster/committer'
MEETINGS = ASF::SVN['Meetings']
MAIL_ROOT = '/srv/mail' # TODO: this should be config item
# Only need these items
-Email = Struct.new(:subject, :date, :message_id, :from)
+Email = Struct.new(:subject, :date, :message_id, :from, :asciiname)
# Encapsulate gathering data to improve error processing
def setup_data
@@ -38,9 +38,10 @@ def setup_data
next unless subject
date = value[:Date]
next unless date.include? year
- next unless subject =~ /^\[?BOARD NOMI[MN]ATION\]?/i
+ next unless /^\[?BOARD NOMI[MN]ATION\]? *(?<name>.*)/i =~ subject
+ # N.B. the named capture only works if the RE is on the LHS
messageid = value[:MessageId]
- emails << Email.new(subject, Time.parse(date).utc, messageid,
[value[:From]])
+ emails << Email.new(subject, Time.parse(date).utc, messageid,
[value[:From]], ASF::Person.asciize(name.delete('.'), nil))
end
end
@@ -53,24 +54,29 @@ def setup_data
ASF::Person.preload('cn',
nominations.map {|nominee| ASF::Person.find(nominee[:id])})
+ # build up the matches once
+ nominations.each do |nominee|
+ nominee[:match] = create_match(nominee)
+ end
return nominations, emails
end
# create the match RE from a nominee
def create_match(nominee)
names = []
- pname = nominee[:name]
+ pname = ASF::Person.asciize(nominee[:name], nil) # don't change non-words
names << pname
- names << pname.sub(%r{ [A-Z]\.? }, ' ') # drop initial
+ names << pname.delete('.')
+ names << pname.sub(%r{ [A-Z] }, ' ') # drop initial
+ names << pname.sub(/\bChristo(ph|f)er\b/, 'Chris') # Special
personname = ASF::Person.find(nominee[:id]).public_name
- names << personname if personname
- list = names.uniq.map{|name| Regexp.escape(name)}.join('|')
+ names << ASF::Person.asciize(personname, nil) if personname
+ list = names.uniq.map {|name| Regexp.escape(name)}.join('|')
# N.B. \b does not match if it follows ')', so won't match John (Fred)
# TODO: Work-round is to also look for EOS, but this needs to be improved
%r{\b(#{list})(\b|$)}i
end
-
# produce HTML output of reports, highlighting ones that have not (yet)
# been posted
_html do
@@ -108,16 +114,14 @@ _html do
_p.count "Count: #{nominations.count}"
- _ul nominations.sort_by {|nominee| nominee[:name]} do |nominee|
+ _ul(nominations.sort_by {|nominee| nominee[:name]}) do |nominee|
_li! do
person = ASF::Person.find(nominee[:id])
- match = create_match(nominee)
-
- if emails.any? {|mail| mail.subject.downcase =~ match}
- _a.present person.public_name, href:
"#{ROSTER}/#{nominee[:id]}"
+ if emails.any? {|mail|
ASF::Person.asciize(mail.subject.downcase.delete('.'), nil) =~ nominee[:match]}
+ _a.present person.public_name || '??', href:
"#{ROSTER}/#{nominee[:id]}"
else
- _a.missing person.public_name, href:
"#{ROSTER}/#{nominee[:id]}"
+ _a.missing person.public_name || '??', href:
"#{ROSTER}/#{nominee[:id]}"
_ ' Nominated by: '
_ nominee[:nominator]
end
@@ -149,7 +153,7 @@ _html do
href = MBOX + mail.date.strftime('%Y%m') + '.mbox/' +
ERB::Util.url_encode('<' + mail.message_id + '>')
- if nominations.any? {|nominee| mail.subject =~
create_match(nominee)}
+ if nominations.any? {|nominee| mail[:asciiname] =~
nominee[:match]}
_a.present mail.subject, href: href
else
_a.missing mail.subject, href: href
@@ -170,6 +174,6 @@ _json do
_ reports do |mail| # TODO: reports is not defined
_subject mail.subject
_link MBOX + ERB::Util.url_encode('<' + mail.message_id + '>') # TODO
looks wrong: does not agree with href above
- _missing missing.any? {|title| mail.subject.downcase =~
/\b#{Regexp.escape(title)}\b/}
+ _missing(missing.any? {|title| mail.subject.downcase =~
/\b#{Regexp.escape(title)}\b/})
end
end
diff --git a/www/members/nominations.cgi b/www/members/nominations.cgi
index c181f4d7..027dfc01 100755
--- a/www/members/nominations.cgi
+++ b/www/members/nominations.cgi
@@ -18,7 +18,7 @@ ROSTER = '/roster/committer'
MEETINGS = ASF::SVN['Meetings']
MAIL_ROOT = '/srv/mail' # TODO: this should be config item
# Only need these items
-Email = Struct.new(:subject, :date, :message_id, :from)
+Email = Struct.new(:subject, :date, :message_id, :from, :asciiname)
# Encapsulate gathering data to improve error processing
def setup_data
@@ -40,9 +40,10 @@ def setup_data
next unless date.include? year
next if subject =~ /Member nominations: a plea/ # not a nomination!
next if subject.downcase == 'member nomination process'
- next unless subject =~ /^\[?MEMBER(SHIP)? NOMI[MN]ATION\]?/i
+ next unless /^\[?MEMBER(SHIP)? NOMI[MN]ATION\]? *(?<name>.*)/i =~ subject
+ # N.B. the named capture only works if the RE is on the LHS
messageid = value[:MessageId]
- emails << Email.new(subject, Time.parse(date).utc, messageid,
[value[:From]])
+ emails << Email.new(subject, Time.parse(date).utc, messageid,
[value[:From]], ASF::Person.asciize(name.delete('.'), nil))
end
end
@@ -55,18 +56,23 @@ def setup_data
ASF::Person.preload('cn',
nominations.map {|nominee| ASF::Person.find(nominee[:id])})
+ # build up the matches once
+ nominations.each do |nominee|
+ nominee[:match] = create_match(nominee)
+ end
return nominations, emails
end
# create the match RE from a nominee
def create_match(nominee)
names = []
- pname = nominee[:name]
+ pname = ASF::Person.asciize(nominee[:name], nil) # don't change non-words
names << pname
names << pname.delete('.')
- names << pname.sub(%r{ [A-Z]\.? }, ' ') # drop initial
+ names << pname.sub(%r{ [A-Z] }, ' ') # drop initial
+ names << pname.sub(/\bChristo(ph|f)er\b/, 'Chris') # Special
personname = ASF::Person.find(nominee[:id]).public_name
- names << personname if personname
+ names << ASF::Person.asciize(personname, nil) if personname
list = names.uniq.map {|name| Regexp.escape(name)}.join('|')
# N.B. \b does not match if it follows ')', so won't match John (Fred)
# TODO: Work-round is to also look for EOS, but this needs to be improved
@@ -118,9 +124,7 @@ _html do
_li! do
person = ASF::Person.find(nominee[:id])
- match = create_match(nominee)
-
- if emails.any? {|mail| mail.subject.downcase =~ match ||
mail.subject.downcase.delete('.') =~ match}
+ if emails.any? {|mail|
ASF::Person.asciize(mail.subject.downcase.delete('.'), nil) =~ nominee[:match]}
_a.present person.public_name || '??', href:
"#{ROSTER}/#{nominee[:id]}"
else
_a.missing person.public_name || '??', href:
"#{ROSTER}/#{nominee[:id]}"
@@ -156,10 +160,7 @@ _html do
href = MBOX + mail.date.strftime('%Y%m') + '.mbox/' +
ERB::Util.url_encode('<' + mail.message_id + '>')
- if nominations.any? do |nominee|
- m = create_match(nominee)
- mail.subject.downcase =~ m ||
mail.subject.downcase.delete('.') =~ m
- end
+ if nominations.any? {|nominee| mail[:asciiname] =~
nominee[:match]}
_a.present mail.subject, href: href
else
_a.missing mail.subject, href: href