This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new f177af40 Synchronise asciize methods
f177af40 is described below
commit f177af400efced93ce223f1371df5756ac2ceaee
Author: Sebb <[email protected]>
AuthorDate: Wed Feb 8 00:35:37 2023 +0000
Synchronise asciize methods
---
lib/whimsy/asf/person.rb | 5 +-
www/secretary/workbench/views/asciize.js.rb | 145 +++++++++++++++--------
www/secretary/workbench/views/forms/ccla.js.rb | 2 +-
www/secretary/workbench/views/forms/icla.js.rb | 2 +-
www/secretary/workbench/views/forms/memapp.js.rb | 4 +-
5 files changed, 102 insertions(+), 56 deletions(-)
diff --git a/lib/whimsy/asf/person.rb b/lib/whimsy/asf/person.rb
index 06201c15..48fbc100 100644
--- a/lib/whimsy/asf/person.rb
+++ b/lib/whimsy/asf/person.rb
@@ -14,6 +14,7 @@ module ASF
# Convert non-ASCII characters to equivalent ASCII
# optionally: replace any remaining non-word characters (e.g. '.' and
space) with '-'
def self.asciize(name, nonWord = '-')
+ # Should agree with asciize.js.rb
if name.match %r{[^\x00-\x7F]} # at least one non-ASCII character present
# digraphs. May be culturally sensitive
# Note that the combining accents require matching two characters
@@ -79,7 +80,7 @@ module ASF
name.gsub! %r{[\u0143\u0145\u0147]}, 'N'
name.gsub! %r{[\u0144\u0146\u0148\u0149]}, 'n'
name.gsub! %r{[\u014C\u014E\u0150]}, 'O'
- name.gsub! %r{[\u014D\u014F\u0151]}, 'o'
+ name.gsub! %r{[\u014D\u014F\u0151\u01A1]}, 'o'
name.gsub! %r{[\u0152]}, 'OE'
name.gsub! %r{[\u0153]}, 'oe'
name.gsub! %r{[\u0154\u0156\u0158]}, 'R'
@@ -89,7 +90,7 @@ module ASF
name.gsub! %r{[\u0162\u0164\u0166]}, 'T'
name.gsub! %r{[\u0163\u0165\u0167]}, 't'
name.gsub! %r{[\u0168\u016A\u016C\u016E\u0170\u0172]}, 'U'
- name.gsub! %r{[\u0169\u016B\u016D\u016F\u0171\u0173]}, 'u'
+ name.gsub! %r{[\u0169\u016B\u016D\u016F\u0171\u0173\u01B0]}, 'u'
name.gsub! %r{[\u0174]}, 'W'
name.gsub! %r{[\u0175]}, 'w'
name.gsub! %r{[\u0176\u0178]}, 'Y'
diff --git a/www/secretary/workbench/views/asciize.js.rb
b/www/secretary/workbench/views/asciize.js.rb
index 8b0ef2ab..da5ec42f 100644
--- a/www/secretary/workbench/views/asciize.js.rb
+++ b/www/secretary/workbench/views/asciize.js.rb
@@ -1,59 +1,104 @@
-# Map non-ASCII characters to lower case ASCII
-def asciize(name)
- if name =~ /[^\x00-\x7F]/
+# Convert non-ASCII characters to equivalent ASCII
+# optionally: replace any remaining non-word characters (e.g. '.' and space)
with '-'
+def asciize(name, nonWord = '-')
+ # Should agree with ASF::Person.asciize
+ if name =~ /[^\x00-\x7F]/ # at least one non-ASCII character present
# digraphs. May be culturally sensitive
- name.gsub! /\u00df/, 'ss'
- name.gsub! /\u00e4|a\u0308/, 'ae'
- name.gsub! /\u00e5|a\u030a/, 'aa'
- name.gsub! /\u00e6/, 'ae'
- name.gsub! /\u00f1|n\u0303/, 'ny'
- name.gsub! /\u00f6|o\u0308/, 'oe'
- name.gsub! /\u00fc|u\u0308/, 'ue'
+ # Note that the combining accents require matching two characters
+ name.gsub! %r{\u00df}, 'ss'
+ name.gsub! %r{\u00e4|a\u0308}, 'ae' # 308 = combining diaeresis
+ name.gsub! %r{\u00e5|a\u030a}, 'aa' # a with ring above: should this
translate as 'a'?
+ name.gsub! %r{\u00c5|A\u030a}, 'AA' # A with ring above: should this
translate as 'A'?
+ name.gsub! %r{\u00e6}, 'ae' # small letter ae
+ name.gsub! %r{\u00c6}, 'AE' # large letter AE
+ name.gsub! %r{\u00f1|n\u0303}, 'ny' # 303 = combining tilde
+ name.gsub! %r{\u00d1|N\u0303}, 'NY' # 303 = combining tilde
+ name.gsub! %r{\u00f6|o\u0308}, 'oe' # 308 = combining diaeresis
+ name.gsub! %r{\u00d6|O\u0308}, 'OE' # 308 = combining diaeresis
+ name.gsub! %r{\u00de}, 'TH' # thorn
+ name.gsub! %r{\u00fe}, 'th' # thorn
+ name.gsub! %r{\u00fc|u\u0308}, 'ue' # 308 = combining diaeresis
+ name.gsub! %r{\u00dc|U\u0308}, 'UE' # 308 = combining diaeresis
- # latin 1 - uppercase
- name.gsub! /[\u00c0-\u00c5]/, 'a'
- name.gsub! /\u00c7/, 'c'
- name.gsub! /[\u00c8-\u00cb]/, 'e'
- name.gsub! /[\u00cc-\u00cf]/, 'i'
- name.gsub! /[\u00d2-\u00d6]|\u00d8/, 'o'
- name.gsub! /[\u00d9-\u00dc]/, 'u'
- name.gsub! /[\u00dd]/, 'y'
-
- # latin 1 - lowercase
- name.gsub! /[\u00e0-\u00e5]/, 'a'
- name.gsub! /\u00e7/, 'c'
- name.gsub! /[\u00e8-\u00eb]/, 'e'
- name.gsub! /[\u00ec-\u00ef]/, 'i'
- name.gsub! /[\u00f2-\u00f6]|\u00f8/, 'o'
- name.gsub! /[\u00f9-\u00fc]/, 'u'
- name.gsub! /[\u00fd\u00ff]/, 'y'
+ # latin 1
+ name.gsub! %r{[\u00e0-\u00e3]}, 'a' # a with various accents
+ name.gsub! %r{[\u00c0-\u00c3]}, 'A' # A with various accents
+ name.gsub! %r{\u00e7}, 'c' # c-cedilla
+ name.gsub! %r{\u00c7}, 'C' # C-cedilla
+ name.gsub! %r{\u00f0}, 'd' # eth
+ name.gsub! %r{\u00d0}, 'D' # eth
+ name.gsub! %r{[\u00e8-\u00eb]}, 'e'
+ name.gsub! %r{[\u00c8-\u00cb]}, 'E'
+ name.gsub! %r{[\u00ec-\u00ef]}, 'i'
+ name.gsub! %r{[\u00cc-\u00cf]}, 'I'
+ name.gsub! %r{[\u00f2-\u00f5\u00f8]}, 'o'
+ name.gsub! %r{[\u00d2-\u00d5\u00d8]}, 'O'
+ name.gsub! %r{[\u00f9-\u00fb]}, 'u'
+ name.gsub! %r{[\u00d9-\u00db]}, 'U'
+ name.gsub! %r{[\u00fd\u00ff]}, 'y'
+ name.gsub! %r{[\u00dd\u0178]}, 'Y'
# Latin Extended-A
- name.gsub! /[\u0100-\u0105]/, 'a'
- name.gsub! /[\u0106-\u010d]/, 'c'
- name.gsub! /[\u010e-\u0111]/, 'd'
- name.gsub! /[\u0112-\u011b]/, 'e'
- name.gsub! /[\u011c-\u0123]/, 'g'
- name.gsub! /[\u0124-\u0127]/, 'h'
- name.gsub! /[\u0128-\u0131]/, 'i'
- name.gsub! /[\u0132-\u0133]/, 'ij'
- name.gsub! /[\u0134-\u0135]/, 'j'
- name.gsub! /[\u0136-\u0138]/, 'k'
- name.gsub! /[\u0139-\u0142]/, 'l'
- name.gsub! /[\u0143-\u014b]/, 'n'
- name.gsub! /[\u014C-\u0151]/, 'o'
- name.gsub! /[\u0152-\u0153]/, 'oe'
- name.gsub! /[\u0154-\u0159]/, 'r'
- name.gsub! /[\u015a-\u0162]/, 's'
- name.gsub! /[\u0162-\u0167]/, 't'
- name.gsub! /[\u0168-\u0173]/, 'u'
- name.gsub! /[\u0174-\u0175]/, 'w'
- name.gsub! /[\u0176-\u0178]/, 'y'
- name.gsub! /[\u0179-\u017e]/, 'z'
+ name.gsub! %r{[\u0100\u0102\u0104]}, 'A'
+ name.gsub! %r{[\u0101\u0103\u0105]}, 'a'
+ name.gsub! %r{[\u0106\u0108\u010A\u010C]}, 'C'
+ name.gsub! %r{[\u0107\u0109\u010B\u010D]}, 'c'
+ name.gsub! %r{[\u010E\u0110]}, 'D'
+ name.gsub! %r{[\u010F\u0111]}, 'd'
+ name.gsub! %r{[\u0112\u0114\u0116\u0118\u011A]}, 'E'
+ name.gsub! %r{[\u0113\u0115\u0117\u0119\u011B]}, 'e'
+ name.gsub! %r{[\u014A]}, 'ENG'
+ name.gsub! %r{[\u014B]}, 'eng'
+ name.gsub! %r{[\u011C\u011E\u0120\u0122]}, 'G'
+ name.gsub! %r{[\u011D\u011F\u0121\u0123]}, 'g'
+ name.gsub! %r{[\u0124\u0126]}, 'H'
+ name.gsub! %r{[\u0125\u0127]}, 'h'
+ name.gsub! %r{[\u0128\u012A\u012C\u012E\u0130]}, 'I'
+ name.gsub! %r{[\u0129\u012B\u012D\u012F\u0131]}, 'i'
+ name.gsub! %r{[\u0132]}, 'IJ'
+ name.gsub! %r{[\u0133]}, 'ij'
+ name.gsub! %r{[\u0134]}, 'J'
+ name.gsub! %r{[\u0135]}, 'j'
+ name.gsub! %r{[\u0136]}, 'K'
+ name.gsub! %r{[\u0137]}, 'k'
+ name.gsub! %r{[\u0138]}, 'kra'
+ name.gsub! %r{[\u0139\u013B\u013D\u013F\u0141]}, 'L'
+ name.gsub! %r{[\u013A\u013C\u013E\u0140\u0142]}, 'l'
+ name.gsub! %r{[\u0143\u0145\u0147]}, 'N'
+ name.gsub! %r{[\u0144\u0146\u0148\u0149]}, 'n'
+ name.gsub! %r{[\u014C\u014E\u0150]}, 'O'
+ name.gsub! %r{[\u014D\u014F\u0151\u01A1]}, 'o'
+ name.gsub! %r{[\u0152]}, 'OE'
+ name.gsub! %r{[\u0153]}, 'oe'
+ name.gsub! %r{[\u0154\u0156\u0158]}, 'R'
+ name.gsub! %r{[\u0155\u0157\u0159]}, 'r'
+ name.gsub! %r{[\u015A\u015C\u015E\u0160]}, 'S'
+ name.gsub! %r{[\u015B\u015D\u015F\u0161]}, 's'
+ name.gsub! %r{[\u0162\u0164\u0166]}, 'T'
+ name.gsub! %r{[\u0163\u0165\u0167]}, 't'
+ name.gsub! %r{[\u0168\u016A\u016C\u016E\u0170\u0172]}, 'U'
+ name.gsub! %r{[\u0169\u016B\u016D\u016F\u0171\u0173\u01B0]}, 'u'
+ name.gsub! %r{[\u0174]}, 'W'
+ name.gsub! %r{[\u0175]}, 'w'
+ name.gsub! %r{[\u0176\u0178]}, 'Y'
+ name.gsub! %r{[\u0177]}, 'y'
+ name.gsub! %r{[\u0179\u017B\u017D]}, 'Z'
+ name.gsub! %r{[\u017A\u017C\u017E]}, 'z'
+
+ # Latin Extended Additional
+ # N.B. Only ones seen in iclas.txt are included here
+ name.gsub! %r{\u1ea0}, 'A' # A with combining dot below
+ name.gsub! %r{\u1ea1}, 'a' # a with combining dot below
+ name.gsub! %r{\u1ec4}, 'E' # E with circumflex and tilde
+ name.gsub! %r{\u1ec5}, 'e' # e with circumflex and tilde
- # denormalized diacritics
- name.gsub! /[\u0300-\u036f]/, ''
+ # remove unhandled combining diacritics (some combinations are handled
above)
+ name.gsub! %r{[\u0300-\u036f]}, ''
end
+ if nonWord
+ # deal with any remaining non-word characters
+ return name.strip.gsub %r{[^\w]+}, nonWord if nonWord
+ end
return name
end
diff --git a/www/secretary/workbench/views/forms/ccla.js.rb
b/www/secretary/workbench/views/forms/ccla.js.rb
index 4759bf45..5c7935e0 100644
--- a/www/secretary/workbench/views/forms/ccla.js.rb
+++ b/www/secretary/workbench/views/forms/ccla.js.rb
@@ -143,7 +143,7 @@ class CCLA < Vue
def genfilename(company, product)
basename = company
basename += '-' + product if product
- return asciize(basename.strip()).downcase().gsub(/\W+/, '-')
+ return asciize(basename.strip()).downcase()
end
# when leaving an input field, trigger change event (for Safari)
diff --git a/www/secretary/workbench/views/forms/icla.js.rb
b/www/secretary/workbench/views/forms/icla.js.rb
index 2fd76882..cac91d31 100644
--- a/www/secretary/workbench/views/forms/icla.js.rb
+++ b/www/secretary/workbench/views/forms/icla.js.rb
@@ -279,7 +279,7 @@ class ICLA < Vue
# generate file name from the real name
def genfilename(realname, familyfirst)
- nominalname = asciize(realname.strip()).downcase().gsub(/\W+/, '-')
+ nominalname = asciize(realname.strip()).downcase()
if !familyfirst
return nominalname
else
diff --git a/www/secretary/workbench/views/forms/memapp.js.rb
b/www/secretary/workbench/views/forms/memapp.js.rb
index 100649ee..8b81b2fd 100644
--- a/www/secretary/workbench/views/forms/memapp.js.rb
+++ b/www/secretary/workbench/views/forms/memapp.js.rb
@@ -78,7 +78,7 @@ class MemApp < Vue
# when fullname changes, change filename
def changeFullName(event)
@name = event.target.value
- @filename = asciize(event.target.value).downcase().gsub(/\W+/, '-')
+ @filename = asciize(event.target.value).downcase()
end
# when id is selected, default full name and filename
@@ -87,7 +87,7 @@ class MemApp < Vue
@received.each do |line|
if line.id == id
@name = line.name
- @filename = asciize(line.name).downcase().gsub(/\W+/, '-')
+ @filename = asciize(line.name).downcase()
@disabled = false
if @@headers.from =~ /@apache.org$/