branch: externals/xr commit 952276b251a8bd667927d18f96995a22d582a115 Author: Mattias Engdegård <matti...@acm.org> Commit: Mattias Engdegård <matti...@acm.org>
Add dialect option and improve pretty-printing `xr' and `xr-pp' now take an optional DIALECT argument that controls the choice of keywords to some extent. The pretty-printer now attempts to avoid line breaks after short operators. Example: (seq (or apple banana) orange) should now be rendered as (seq (or apple banana) orange) --- xr-test.el | 43 +++++++++++++++++++++++++++++ xr.el | 93 +++++++++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 117 insertions(+), 19 deletions(-) diff --git a/xr-test.el b/xr-test.el index 8dc2453..4a24ef3 100644 --- a/xr-test.el +++ b/xr-test.el @@ -258,6 +258,49 @@ "(?? nonl)\n")) (should (equal (xr-pp-rx-to-str '(repeat 1 63 "a")) "(repeat 1 63 \"a\")\n")) + (let ((indent-tabs-mode nil)) + (should (equal (xr-pp-rx-to-str + '(seq (1+ nonl + (or "a" + (not (any space)))) + (* (? (not cntrl) + blank + (| nonascii "abcdef"))))) + (concat + "(seq (1+ nonl\n" + " (or \"a\"\n" + " (not (any space))))\n" + " (* (? (not cntrl)\n" + " blank\n" + " (| nonascii \"abcdef\"))))\n")))) + ) + +(ert-deftest xr-dialect () + (should (equal (xr "a*b+c?d\\{2,5\\}\\(e\\|f\\)[gh][^ij]" 'medium) + '(seq (zero-or-more "a") (one-or-more "b") (opt "c") + (repeat 2 5 "d") (group (or "e" "f")) + (any "gh") (not (any "ij"))))) + (should (equal (xr "a*b+c?d\\{2,5\\}\\(e\\|f\\)[gh][^ij]" 'verbose) + '(seq (zero-or-more "a") (one-or-more "b") (zero-or-one "c") + (repeat 2 5 "d") (group (or "e" "f")) + (any "gh") (not (any "ij"))))) + (should (equal (xr "a*b+c?d\\{2,5\\}\\(e\\|f\\)[gh][^ij]" 'brief) + '(seq (0+ "a") (1+ "b") (opt "c") + (repeat 2 5 "d") (group (or "e" "f")) + (any "gh") (not (any "ij"))))) + (should (equal (xr "a*b+c?d\\{2,5\\}\\(e\\|f\\)[gh][^ij]" 'terse) + '(: (* "a") (+ "b") (? "c") + (** 2 5 "d") (group (| "e" "f")) + (in "gh") (not (in "ij"))))) + (should (equal (xr "^\\`\\<.\\>\\'$" 'medium) + '(seq bol bos bow nonl eow eos eol))) + (should (equal (xr "^\\`\\<.\\>\\'$" 'verbose) + '(seq line-start string-start word-start not-newline + word-end string-end line-end))) + (should (equal (xr "^\\`\\<.\\>\\'$" 'brief) + '(seq bol bos bow nonl eow eos eol))) + (should (equal (xr "^\\`\\<.\\>\\'$" 'terse) + '(: bol bos bow nonl eow eos eol))) ) (ert-deftest xr-lint () diff --git a/xr.el b/xr.el index 03b2110..98a306e 100644 --- a/xr.el +++ b/xr.el @@ -564,12 +564,63 @@ (error "Unbalanced \\)")) rx))) +;; Substitute keywords in RX using HEAD-ALIST and BODY-ALIST in the +;; head and body positions, respectively. +(defun xr--substitute-keywords (head-alist body-alist rx) + (cond + ((symbolp rx) + (or (cdr (assq rx body-alist)) rx)) + ((consp rx) + (cons (or (cdr (assq (car rx) head-alist)) + (car rx)) + (mapcar (lambda (elem) (xr--substitute-keywords + head-alist body-alist elem)) + (cdr rx)))) + (t rx))) + +;; Alist mapping keyword dialect to (HEAD-ALIST . BODY-ALIST), +;; or to nil if no translation should take place. +;; The alists are mapping from the default choice. +(defconst xr--keywords + '((medium . nil) + (brief . (((zero-or-more . 0+) + (one-or-more . 1+)) + . nil)) + (terse . (((seq . :) + (or . |) + (any . in) + (zero-or-more . *) + (one-or-more . +) + (opt . ? ) + (repeat . **)) + . nil)) + (verbose . (((opt . zero-or-one)) + . + ((nonl . not-newline) + (bol . line-start) + (eol . line-end) + (bos . string-start) + (eos . string-end) + (bow . word-start) + (eow . word-end)))))) + ;;;###autoload -(defun xr (re-string) +(defun xr (re-string &optional dialect) "Convert a regexp string to rx notation; the inverse of `rx'. Passing the returned value to `rx' (or `rx-to-string') yields a regexp string -equivalent to RE-STRING." - (xr--parse re-string nil)) +equivalent to RE-STRING. DIALECT controls the choice of keywords, +and is one of: +`verbose' -- verbose keywords +`short' -- short keywords +`terse' -- very short keywords +`medium' or nil -- a compromise (the default)" + (let ((keywords (assq (or dialect 'medium) xr--keywords))) + (unless keywords + (error "Unknown dialect `%S'" dialect)) + (let ((rx (xr--parse re-string nil))) + (if (cdr keywords) + (xr--substitute-keywords (cadr keywords) (cddr keywords) rx) + rx)))) ;;;###autoload (defun xr-lint (re-string) @@ -618,11 +669,11 @@ in RE-STRING." ((eq rx '*?) "*?") ; Avoid unnecessary \ in symbol. ((eq rx '+?) "+?") ((consp rx) - ;; Render character ? as ?? when first in a list. - ;; Elsewhere, it's just an integer. - (let ((first (if (eq (car rx) ??) - "??" - (xr--rx-to-string (car rx)))) + ;; Render the characters SPC and ? as ? and ?? when first in a list. + ;; Elsewhere, they are just integers. + (let ((first (cond ((eq (car rx) ?\s) "?") + ((eq (car rx) ??) "??") + (t (xr--rx-to-string (car rx))))) (rest (mapcar #'xr--rx-to-string (cdr rx)))) (concat "(" (mapconcat #'identity (cons first rest) " ") ")"))) ((stringp rx) @@ -636,26 +687,30 @@ It does a slightly better job than standard `pp' for rx purposes." (insert (xr--rx-to-string rx) "\n") (pp-buffer) - ;; Remove the line break after "(not" for readability and compactness. + ;; Remove the line break after short operator names for + ;; readability and compactness. (goto-char (point-min)) (while (re-search-forward - (rx bol - (zero-or-more (any space)) "(not" - (group "\n" (zero-or-more (any space))) - (one-or-more nonl) "))" - eol) + (rx "(" + (or "not" "0+" "1+" "*" "+" "?" "opt" "seq" ":" "|" "or" + "??" "*?" "+?" "=" ">=" "**") + (group "\n" (zero-or-more (any space)))) nil t) (replace-match " " t t nil 1)) + ;; Reindent the buffer in case line breaks have been removed. + (goto-char (point-min)) + (indent-sexp) + (buffer-string))) ;;;###autoload -(defun xr-pp (re-string) +(defun xr-pp (re-string &optional dialect) "Convert to `rx' notation and pretty-print. -This basically does `(pp (xr RE-STRING))', but in a slightly more readable -way. It is intended for use from an interactive elisp session. -Returns nil." - (insert (xr-pp-rx-to-str (xr re-string)))) +This basically does `(pp (xr RE-STRING DIALECT))', but in a slightly +more readable way. It is intended for use from an interactive elisp +session. Returns nil." + (insert (xr-pp-rx-to-str (xr re-string dialect)))) (provide 'xr)