branch: externals/ebdb commit da1c6f677804483d155b08a0ac63e4825f49e7b4 Author: Eric Abrahamsen <e...@ericabrahamsen.net> Commit: Eric Abrahamsen <e...@ericabrahamsen.net>
Improve name parsing * ebdb.el (ebdb-lastname-re): Better regexp for matching surnames with hyphens and apostrophes. (ebdb-divide-name): Check for all-caps UN-style surname. Also, now returns given names as a list. (ebdb-parse): Complex name method assumes given-names are already a list. * ebdb-test.el (ebdb-parse-name): Add more test cases. --- ebdb-test.el | 32 +++++++++++++++++++++++++++++++- ebdb.el | 56 +++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/ebdb-test.el b/ebdb-test.el index a5c4c86..2e856ce 100644 --- a/ebdb-test.el +++ b/ebdb-test.el @@ -347,7 +347,37 @@ If it doesn't exist, raise `ebdb-related-unfound'." (slot-value (ebdb-parse 'ebdb-field-name-complex "Eric Abrahamsen, III") 'suffix) - "III"))) + "III")) + (should (equal + (slot-value + (ebdb-parse 'ebdb-field-name-complex "Albus Percival Wulfric Brian Dumbledore") + 'given-names) + '("Albus" "Percival" "Wulfric" "Brian"))) + (should (equal + (slot-value + (ebdb-parse 'ebdb-field-name-complex "MURAKAMI Haruki") + 'surname) + "Murakami")) + (should (equal + (slot-value + (ebdb-parse 'ebdb-field-name-complex "Fintan O'Toole") + 'surname) + "O'Toole")) + (should (equal + (slot-value + (ebdb-parse 'ebdb-field-name-complex "O'Toole, Fintan") + 'surname) + "O'Toole")) + (should (equal + (slot-value + (ebdb-parse 'ebdb-field-name-complex "O'TOOLE Fintan") + 'surname) + "O'Toole")) + (should (equal + (slot-value + (ebdb-parse 'ebdb-field-name-complex "Daniel Micahel Blake Day-Lewis") + 'surname) + "Day-Lewis"))) ;; Snarf testing. diff --git a/ebdb.el b/ebdb.el index 5a95a51..3cacfbb 100644 --- a/ebdb.el +++ b/ebdb.el @@ -578,8 +578,8 @@ Case is ignored." (defcustom ebdb-lastname-re (concat "[- \t]*\\(\\(?:\\<" (regexp-opt ebdb-lastname-prefixes) - ;; multiple last names concatenated by `-' - "\\>[- \t]+\\)?\\(?:\\w+[ \t]*-[ \t]*\\)*\\w+\\)\\'") + ;; Last names can contain hyphens and apostrophes. + "\\>[- \t]+\\)?\\w[[:word:]'-]+\\)\\>") "Regexp matching the last name of a full name. Its first parenthetical subexpression becomes the last name." :group 'ebdb-record-edit @@ -1296,8 +1296,7 @@ first one." (ebdb-divide-name str))) (unless (plist-get slots :given-names) (setq slots (plist-put slots :given-names - (when given-names - (split-string given-names nil t))))) + given-names))) (unless (plist-get slots :surname) (setq slots (plist-put slots :surname (or surname "")))) @@ -4866,27 +4865,42 @@ also be one of the special symbols below. (defun ebdb-divide-name (string) "Divide STRING into its component parts. -Case is ignored. Return name as a list of (LAST FIRST SUFFIX). -LAST is always a string (possibly empty). FIRST and SUFFIX may -be nil." +Return name as a list of (SURNAME GIVEN-NAMES SUFFIX). SURNAME +is always a string (possibly empty). GIVEN-NAMES, if present, is +a list of first names. GIVEN-NAMES and SUFFIX may be nil. + +During parsing `case-fold-search' is non-nil, with the exception +that a string of all-upper-case letters will be assumed (a la UN +usage) to represent the surname." (let ((case-fold-search t) - first suffix) + given suffix) ;; Separate a suffix. - (if (string-match ebdb-lastname-suffix-re string) - (setq suffix (match-string 1 string) - string (substring string 0 (match-beginning 0)))) - (cond ((string-match "\\`\\(.+\\),[ \t\n]*\\(.+\\)\\'" string) - ;; If STRING contains a comma, this probably means that STRING - ;; is of the form "Last, First". - (setq first (match-string 2 string) - string (match-string 1 string))) - ((string-match ebdb-lastname-re string) - (setq first (and (not (zerop (match-beginning 0))) - (substring string 0 (match-beginning 0))) - string (match-string 1 string)))) + (when (string-match ebdb-lastname-suffix-re string) + (setq suffix (match-string 1 string) + string (substring string 0 (match-beginning 0)))) + (if (let ((case-fold-search nil)) + ;; If there's an all-upper-case word, it's the last name. + (string-match + "[ \t\n]*\\([[:upper:]]+[[:upper:]-']+\\)\\>[ \t\n]*" + string)) + (setq given (concat (substring string 0 (match-beginning 1)) + " " + (substring string (match-end 1))) + string (capitalize (match-string 1 string))) + (cond ((string-match + (concat "\\`" ebdb-lastname-re ",[ \t\n]*\\(.+\\)\\'") + string) + ;; If STRING contains a comma, this probably means that STRING + ;; is of the form "Last, First". + (setq given (match-string 2 string) + string (match-string 1 string))) + ((string-match (concat ebdb-lastname-re "[ ,]*\\'") string) + (setq given (and (not (zerop (match-beginning 0))) + (substring string 0 (match-beginning 0))) + string (match-string 1 string))))) (delq nil (list (ebdb-string-trim string) - (and first (ebdb-string-trim first)) + (and given (split-string given nil t)) suffix)))) (defsubst ebdb-record-lessp (record1 record2)