branch: elpa/datetime commit 32f62080f4655d12831c8b6320bc1f0c8899345c Author: Paul Pogonyshev <pogonys...@gmail.com> Commit: Paul Pogonyshev <pogonys...@gmail.com>
Fix building of parsing regexp in case there are run-together numeric groups in the pattern. --- datetime.el | 197 ++++++++++++++++++++++++++++++++-------------------------- test/parse.el | 13 ++++ 2 files changed, 121 insertions(+), 89 deletions(-) diff --git a/datetime.el b/datetime.el index 79ab2ff4c7..3a05868905 100644 --- a/datetime.el +++ b/datetime.el @@ -936,6 +936,8 @@ unless specified otherwise. (case-insensitive (and (plist-get options :case-insensitive) t)) (lax-whitespace (plist-get options :lax-whitespace)) (part-index 0) + regexp-part-sources + last-part-was-numeric regexp-parts ;; To handle excessive information patterns (e.g. "Mon 16 Sep 2018" is excessive, ;; since day of the week can be found from the day of the year), we keep track of @@ -956,100 +958,117 @@ unless specified otherwise. second-part-indices second-fractional-part-indices have-case-sensitive-parts) + ;; Doing this in two loops, so that the second can look ahead and easily find out if + ;; the next regexp part is going to be a numeric value. (dolist (part (datetime--parse-pattern type pattern options)) - (if (stringp part) - (let ((quoted (regexp-quote part))) - (when (not (or have-case-sensitive-parts (string= (upcase part) (downcase part)))) - (setq have-case-sensitive-parts t)) - (push (if lax-whitespace - (replace-regexp-in-string (rx (1+ (any blank))) (rx (1+ (any blank))) quoted t t) - quoted) - regexp-parts)) - (let* ((type (car part)) - (details (cdr part)) - (regexp (pcase type - (`era (when (or validating (null era-part-indices)) - (push part-index era-part-indices)) - (datetime-locale-field locale :eras)) - (`year - (when (or validating (null year-part-indices)) - (push (cons part-index details) year-part-indices)) - (cond ((or (memq details '(1 add-century-when-parsing)) (not (plist-get options :require-leading-zeros))) - (rx (1+ (any "0-9")))) - ((memq details '(2 always-two-digits)) - (rx (any "0-9") (1+ (any "0-9")))) - (t - (format "[0-9]\\{%d\\}[0-9]+" (1- details))))) - (`year-for-week (error "Parsing `%s' is currently not implemented" type)) - (`month (when (or validating (null month-number-part-indices)) - (push part-index month-number-part-indices)) - 12) - (`month-context-name (let ((field (if (eq details 'abbreviated) :month-context-abbr :month-context-names))) - (when (or validating (null month-name-part-indices)) - (push (cons part-index field) month-name-part-indices)) - (datetime-locale-field locale field))) - (`month-standalone-name (let ((field (if (eq details 'abbreviated) :month-standalone-abbr :month-standalone-names))) - (when (or validating (null month-name-part-indices)) - (push (cons part-index field) month-name-part-indices)) - (datetime-locale-field locale field))) - (`week-in-year (error "Parsing `%s' is currently not implemented" type)) - (`week-in-month (error "Parsing `%s' is currently not implemented" type)) - (`day-in-month (when (or validating (null day-of-month-part-indices)) - (push part-index day-of-month-part-indices)) - 31) - (`weekday-in-month (error "Parsing `%s' is currently not implemented" type)) - (`weekday 7) - (`weekday-context-name - (datetime-locale-field locale (if (eq details 'abbreviated) :weekday-context-abbr :weekday-context-names))) - (`weekday-standalone-name - (datetime-locale-field locale (if (eq details 'abbreviated) :weekday-standalone-abbr :weekday-standalone-names))) - (`am-pm (when (or validating (null am-pm-part-indices)) - (push part-index am-pm-part-indices)) - (datetime-locale-field locale :am-pm)) - (`hour-0-23 (when (or validating (null hour-0-23-part-indices)) - (push part-index hour-0-23-part-indices)) - 23) - (`hour-1-24 (when (or validating (null hour-1-24-part-indices)) - (push part-index hour-1-24-part-indices)) - 24) - (`hour-am-pm-0-11 (when (or validating (null hour-am-pm-0-11-part-indices)) - (push part-index hour-am-pm-0-11-part-indices)) - 11) - (`hour-am-pm-1-12 (when (or validating (null hour-am-pm-1-12-part-indices)) - (push part-index hour-am-pm-1-12-part-indices)) - 12) - (`minute (when (or validating (null minute-part-indices)) - (push part-index minute-part-indices)) - 59) - (`second (when (or validating (null second-part-indices)) - (push part-index second-part-indices)) - 59) - (`decimal-separator (rx (or "." ","))) - (`second-fractional (push (cons part-index (expt 10.0 details)) second-fractional-part-indices) - (apply #'concat (make-list details (rx (any "0-9"))))) - (`timezone - (signal 'datetime-unsupported-timezone nil)) - (_ (error "Unexpected value %s" type))))) - (push (cond ((integerp regexp) - ;; REGEXP is really the maximum value of this one- or two-digit - ;; number. However, we don't include it in the regexp in most of - ;; the cases (unlike in `datetime-matching-regexp'). - (if (<= regexp 9) - (format "0*[1-%d]" regexp) - (cond ((and (= details 1) (plist-get options :accept-leading-space)) - (format "[ 0-%d]?[0-9]" (/ regexp 10))) - ((>= regexp 20) - (format "0*[1-%d]?[0-9]" (/ regexp 10))) - (t - "0*1?[0-9]")))) - ((vectorp regexp) + (push (if (stringp part) + (let ((quoted (regexp-quote part))) + (when (not (or have-case-sensitive-parts (string= (upcase part) (downcase part)))) + (setq have-case-sensitive-parts t)) + (cons (if lax-whitespace + (replace-regexp-in-string (rx (1+ (any blank))) (rx (1+ (any blank))) quoted t t) + quoted) + nil)) + (let* ((type (car part)) + (details (cdr part))) + (cons (pcase type + (`era (when (or validating (null era-part-indices)) + (push part-index era-part-indices)) + (datetime-locale-field locale :eras)) + (`year + (when (or validating (null year-part-indices)) + (push (cons part-index details) year-part-indices)) + ;; Magic number for the next loop. + 0) + (`year-for-week (error "Parsing `%s' is currently not implemented" type)) + (`month (when (or validating (null month-number-part-indices)) + (push part-index month-number-part-indices)) + 12) + (`month-context-name (let ((field (if (eq details 'abbreviated) :month-context-abbr :month-context-names))) + (when (or validating (null month-name-part-indices)) + (push (cons part-index field) month-name-part-indices)) + (datetime-locale-field locale field))) + (`month-standalone-name (let ((field (if (eq details 'abbreviated) :month-standalone-abbr :month-standalone-names))) + (when (or validating (null month-name-part-indices)) + (push (cons part-index field) month-name-part-indices)) + (datetime-locale-field locale field))) + (`week-in-year (error "Parsing `%s' is currently not implemented" type)) + (`week-in-month (error "Parsing `%s' is currently not implemented" type)) + (`day-in-month (when (or validating (null day-of-month-part-indices)) + (push part-index day-of-month-part-indices)) + 31) + (`weekday-in-month (error "Parsing `%s' is currently not implemented" type)) + (`weekday 7) + (`weekday-context-name + (datetime-locale-field locale (if (eq details 'abbreviated) :weekday-context-abbr :weekday-context-names))) + (`weekday-standalone-name + (datetime-locale-field locale (if (eq details 'abbreviated) :weekday-standalone-abbr :weekday-standalone-names))) + (`am-pm (when (or validating (null am-pm-part-indices)) + (push part-index am-pm-part-indices)) + (datetime-locale-field locale :am-pm)) + (`hour-0-23 (when (or validating (null hour-0-23-part-indices)) + (push part-index hour-0-23-part-indices)) + 23) + (`hour-1-24 (when (or validating (null hour-1-24-part-indices)) + (push part-index hour-1-24-part-indices)) + 24) + (`hour-am-pm-0-11 (when (or validating (null hour-am-pm-0-11-part-indices)) + (push part-index hour-am-pm-0-11-part-indices)) + 11) + (`hour-am-pm-1-12 (when (or validating (null hour-am-pm-1-12-part-indices)) + (push part-index hour-am-pm-1-12-part-indices)) + 12) + (`minute (when (or validating (null minute-part-indices)) + (push part-index minute-part-indices)) + 59) + (`second (when (or validating (null second-part-indices)) + (push part-index second-part-indices)) + 59) + (`decimal-separator (rx (or "." ","))) + (`second-fractional (push (cons part-index (expt 10.0 details)) second-fractional-part-indices) + (apply #'concat (make-list details (rx (any "0-9"))))) + (`timezone + (signal 'datetime-unsupported-timezone nil)) + (_ (error "Unexpected value %s" type))) + details))) + regexp-part-sources) + (setf part-index (1+ part-index))) + (setf regexp-part-sources (nreverse regexp-part-sources)) + ;; Not using `dolist' since we need access to the next entry. + (while regexp-part-sources + (let* ((entry (pop regexp-part-sources)) + (regexp (car entry)) + (details (cdr entry))) + (push (if (integerp regexp) + (let ((run-together-numeric-groups (or last-part-was-numeric (integerp (caar regexp-part-sources))))) + (setf last-part-was-numeric t) + (if (= regexp 0) + ;; Magic number for years. + (cond ((or (memq details '(1 add-century-when-parsing)) (not (plist-get options :require-leading-zeros))) + (rx (1+ (any "0-9")))) + ((memq details '(2 always-two-digits)) + (rx (any "0-9") (1+ (any "0-9")))) + (t + (format "[0-9]\\{%d\\}[0-9]+" (1- details)))) + ;; REGEXP is really the maximum value of this one- or two-digit + ;; number. However, we don't include it in the regexp in most of + ;; the cases (unlike in `datetime-matching-regexp'). + (if (<= regexp 9) + (format (if run-together-numeric-groups "[1-%d]" "0*[1-%d]") regexp) + (cond ((and (= details 1) (plist-get options :accept-leading-space)) + (format (if run-together-numeric-groups "[0-%d][0-9]" "[ 0-%d]?[0-9]") (/ regexp 10))) + ((>= regexp 20) + (format (if run-together-numeric-groups "[0-%d][0-9]" "0*[1-%d]?[0-9]") (/ regexp 10))) + (t + (if run-together-numeric-groups "[01][0-9]" "0*1?[0-9]")))))) + (setf last-part-was-numeric nil) + (cond ((vectorp regexp) ;; A vector of options returned by `datetime-locale-field'. (setq have-case-sensitive-parts t) (regexp-opt (append regexp nil))) (t - regexp)) - regexp-parts))) - (setq part-index (1+ part-index))) + regexp))) + regexp-parts))) (setq era-part-indices (nreverse era-part-indices) year-part-indices (nreverse year-part-indices) month-number-part-indices (nreverse month-number-part-indices) diff --git a/test/parse.el b/test/parse.el index 79a5bfaa8f..6b249eaed9 100644 --- a/test/parse.el +++ b/test/parse.el @@ -146,3 +146,16 @@ (dolist (timezone (datetime-list-timezones)) (datetime--test-set-up-parser timezone 'en "yyyy-MM-dd HH:mm:ss" (datetime--test-parser '("2100-01-01 00:00:00"))))) + + +(ert-deftest datetime-parsing-run-together-1 () + ;; Real failure: run-together digit groups in the pattern would confuse the parser, see + ;; https://github.com/doublep/datetime/issues/6. + (dolist (timezone (datetime-list-timezones)) + (datetime--test-set-up-parser timezone 'en "yyyyMMdd" + (datetime--test-parser '("20220506"))))) + +(ert-deftest datetime-parsing-run-together-2 () + (dolist (timezone (datetime-list-timezones)) + (datetime--test-set-up-parser timezone 'en "yyyyMMddHHmmss" + (datetime--test-parser '("20220506123000")))))