branch: elpa/rust-mode
commit 6e72f647da716bf532013484515e5f7ebde60209
Author: Micah Chalmer <[email protected]>
Commit: Micah Chalmer <[email protected]>
Fix bugs in raw strings and character literals
---
rust-mode-tests.el | 80 +++++++++++++++++++++
rust-mode.el | 202 ++++++++++++++++++++++++++++++-----------------------
2 files changed, 196 insertions(+), 86 deletions(-)
diff --git a/rust-mode-tests.el b/rust-mode-tests.el
index c7446d6..a846a27 100644
--- a/rust-mode-tests.el
+++ b/rust-mode-tests.el
@@ -1482,3 +1482,83 @@ la la\");
(test-indent
;; Needs to leave 1 space before "world"
"\"hello \\\n world\""))
+
+(defun rust-test-matching-parens (content pairs &optional nonparen-positions)
+ "Assert that in rust-mode, given a buffer with the given `content',
+ emacs's paren matching will find all of the pairs of positions
+ as matching braces. The list of nonparen-positions asserts
+ specific positions that should NOT be considered to be
+ parens/braces of any kind.
+
+ This does not assert that the `pairs' list is
+ comprehensive--there can be additional pairs that don't appear
+ in the list and the test still passes (as long as none of their
+ positions appear in `nonparen-positions'.)"
+ (with-temp-buffer
+ (rust-mode)
+ (insert content)
+ (font-lock-fontify-buffer)
+ (dolist (pair pairs)
+ (let* ((open-pos (nth 0 pair))
+ (close-pos (nth 1 pair)))
+ (should (equal 4 (syntax-class (syntax-after open-pos))))
+ (should (equal 5 (syntax-class (syntax-after close-pos))))
+ (should (equal (scan-sexps open-pos 1) (+ 1 close-pos)))
+ (should (equal (scan-sexps (+ 1 close-pos) -1) open-pos))))
+ (dolist (nonpar-pos nonparen-positions)
+ (let ((nonpar-syntax-class (syntax-class (syntax-after nonpar-pos))))
+ (should (not (equal 4 nonpar-syntax-class)))
+ (should (not (equal 5 nonpar-syntax-class)))))))
+
+(ert-deftest rust-test-unmatched-single-quote-in-comment-paren-matching ()
+ ;; This was a bug from the char quote handling that affected the paren
+ ;; matching. An unmatched quote char in a comment caused the problems.
+ (rust-test-matching-parens
+ "// If this appeared first in the file...
+\"\\
+{\";
+
+// And the { was not the on the first column:
+ {
+ // This then messed up the paren matching: '\\'
+}
+
+"
+ '((97 150) ;; The { and } at the bottom
+ )))
+
+(ert-deftest rust-test-two-character-quotes-in-a-row ()
+ (with-temp-buffer
+ (rust-mode)
+ (font-lock-fontify-buffer)
+ (insert "'\\n','a', fn")
+ (font-lock-after-change-function 1 12 0)
+
+ (should (equal 'font-lock-string-face (get-text-property 3 'face)))
+ (should (equal nil (get-text-property 5 'face)))
+ (should (equal 'font-lock-string-face (get-text-property 7 'face)))
+ (should (equal nil (get-text-property 9 'face)))
+ (should (equal 'font-lock-keyword-face (get-text-property 12 'face)))
+ )
+ )
+
+(ert-deftest single-quote-null-char ()
+ (rust-test-font-lock
+ "'\\0' 'a' fn"
+ '("'\\0'" font-lock-string-face
+ "'a'" font-lock-string-face
+ "fn" font-lock-keyword-face)))
+
+(ert-deftest r-in-string-after-single-quoted-double-quote ()
+ (rust-test-font-lock
+ "'\"';\n\"r\";\n\"oops\";"
+ '("'\"'" font-lock-string-face
+ "\"r\"" font-lock-string-face
+ "\"oops\"" font-lock-string-face
+ )))
+
+(ert-deftest char-literal-after-quote-in-raw-string ()
+ (rust-test-font-lock
+ "r#\"\"\"#;\n'q'"
+ '("r#\"\"\"#" font-lock-string-face
+ "'q'" font-lock-string-face)))
diff --git a/rust-mode.el b/rust-mode.el
index ae42052..193f672 100644
--- a/rust-mode.el
+++ b/rust-mode.el
@@ -374,38 +374,53 @@
("fn" . font-lock-function-name-face)
("static" . font-lock-constant-face)))))
-(defun rust-extend-region-raw-string ()
+(defun rust-font-lock-extend-region ()
"Extend the region given by `font-lock-beg' and `font-lock-end'
- to include the beginning of a string if it includes part of it.
- Adjusts to include the r[#] of a raw string as well."
-
- (let* ((orig-beg font-lock-beg)
- (orig-end font-lock-end)
- (beg-ppss (syntax-ppss font-lock-beg))
- (beg-in-str (nth 3 beg-ppss))
- (end-ppss (syntax-ppss font-lock-end))
- (end-in-str (nth 3 end-ppss)))
-
- (when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss)))
- (setq font-lock-beg str-beg)
- (while (equal ?# (char-before font-lock-beg))
- (setq font-lock-beg (1- font-lock-beg)))
- (when (equal ?r (char-before font-lock-beg))
- (setq font-lock-beg (1- font-lock-beg))))
-
- (when end-in-str
- (save-excursion
- (goto-char (nth 8 end-ppss))
- (ignore-errors (forward-sexp))
- (setq font-lock-end (max font-lock-end (point)))))
-
- ;; If we have the beginning of a raw string in the region, make sure we
have the end of
- ;; it.
- (when (or beg-in-str end-in-str)
- (save-excursion
- (goto-char font-lock-beg)
- (while (and (< (point) font-lock-end) (ignore-errors
(rust-look-for-raw-string (buffer-end 1)))))
- (setq font-lock-end (max font-lock-end (point)))))
+ to include the beginning of a string or comment if it includes
+ part of it. Adjusts to include the r[#] of a raw string as
+ well."
+
+ (let ((orig-beg font-lock-beg)
+ (orig-end font-lock-end))
+ (cond
+ ;; If we are not syntactically fontified yet, we cannot correctly cover
+ ;; anything less than the full buffer. The syntactic fontification
+ ;; modifies the syntax, so until it's done we can't use the syntax to
+ ;; determine what to fontify.
+ ((< (or font-lock-syntactically-fontified 0) font-lock-end)
+ (setq font-lock-beg 1)
+ (setq font-lock-end (buffer-end 1)))
+
+ ((let* ((beg-ppss (syntax-ppss font-lock-beg))
+ (beg-in-cmnt (and (nth 4 beg-ppss) (nth 8 beg-ppss)))
+ (beg-in-str (nth 3 beg-ppss))
+ (end-ppss (syntax-ppss font-lock-end))
+ (end-in-str (nth 3 end-ppss)))
+
+ (when (and beg-in-str (> font-lock-beg (nth 8 beg-ppss)))
+ (setq font-lock-beg (nth 8 beg-ppss))
+ (while (equal ?# (char-before font-lock-beg))
+ (setq font-lock-beg (1- font-lock-beg)))
+ (when (equal ?r (char-before font-lock-beg))
+ (setq font-lock-beg (1- font-lock-beg))))
+
+ (when (and beg-in-cmnt (> font-lock-beg beg-in-cmnt))
+ (setq font-lock-beg beg-in-cmnt))
+
+ (when end-in-str
+ (save-excursion
+ (goto-char (nth 8 end-ppss))
+ (ignore-errors (forward-sexp))
+ (setq font-lock-end (max font-lock-end (point)))))
+
+ ;; If we have the beginning of a raw string in the region, make sure
we have the end of
+ ;; it.
+ (when (or beg-in-str end-in-str)
+ (save-excursion
+ (goto-char font-lock-beg)
+ (while (and (< (point) font-lock-end) (ignore-errors
(rust-look-for-raw-string (buffer-end 1)))))
+ (setq font-lock-end (max font-lock-end (point)))))
+ )))
(or (/= font-lock-beg orig-beg)
(/= font-lock-end orig-end))
@@ -437,67 +452,82 @@
(set-match-data (nth 1 ret-list))
(nth 0 ret-list))))
-(defun rust-look-for-raw-string (bound)
- ;; Find a raw string, but only if it's not in the middle of another string or
- ;; a comment
+(defun rust-look-for-non-standard-string (bound)
+ ;; Find a raw string or character literal, but only if it's not in the middle
+ ;; of another string or a comment.
- (let* ((raw-str-regexp
+ (let* ((non-standard-str-regexp
(rx
- (seq
- ;; The "r" starts the raw string. Capture it as group 1 to mark
it as such syntactically:
- (group "r")
-
- ;; Then either:
- (or
- ;; a sequence at least one "#" (followed by quote). Capture all
- ;; but the last "#" as group 2 for this case.
- (seq (group (* "#")) "#\"")
-
- ;; ...or a quote without any "#". Capture it as group 3. This is
- ;; used later to match the opposite quote only if this capture
- ;; occurred
- (group "\""))
-
- ;; The contents of the string:
- (*? anything)
-
- ;; If there are any backslashes at the end of the string, capture
- ;; them as group 4 so we can suppress the normal escape syntax
- ;; parsing:
- (group (* "\\"))
-
- ;; Then the end of the string--the backreferences ensure that we
- ;; only match the kind of ending that corresponds to the beginning
- ;; we had:
- (or
- ;; There were "#"s - capture the last one as group 5 to mark it as
- ;; the end of the string:
- (seq "\"" (backref 2) (group "#"))
-
- ;; No "#"s - capture the ending quote (using a backref to group 3,
- ;; so that we can't match a quote if we had "#"s) as group 6
- (group (backref 3))))
- ;; If it matches, it ends up with the starting character of the
string
- ;; as group 1, any ending backslashes as group 4, and the ending
- ;; character as either group 5 or group 6.
+ (or
+ ;; Raw string: if it matches, it ends up with the starting
character
+ ;; of the string as group 1, any ending backslashes as group 4, and
+ ;; the ending character as either group 5 or group 6.
+ (seq
+ ;; The "r" starts the raw string. Capture it as group 1 to mark
it as such syntactically:
+ (group "r")
+
+ ;; Then either:
+ (or
+ ;; a sequence at least one "#" (followed by quote). Capture all
+ ;; but the last "#" as group 2 for this case.
+ (seq (group (* "#")) "#\"")
+
+ ;; ...or a quote without any "#". Capture it as group 3. This is
+ ;; used later to match the opposite quote only if this capture
+ ;; occurred
+ (group "\""))
+
+ ;; The contents of the string:
+ (*? anything)
+
+ ;; If there are any backslashes at the end of the string, capture
+ ;; them as group 4 so we can suppress the normal escape syntax
+ ;; parsing:
+ (group (* "\\"))
+
+ ;; Then the end of the string--the backreferences ensure that we
+ ;; only match the kind of ending that corresponds to the beginning
+ ;; we had:
+ (or
+ ;; There were "#"s - capture the last one as group 5 to mark it
as
+ ;; the end of the string:
+ (seq "\"" (backref 2) (group "#"))
+
+ ;; No "#"s - capture the ending quote (using a backref to group
3,
+ ;; so that we can't match a quote if we had "#"s) as group 6
+ (group (backref 3))))
+
+ ;; Character literal: match the beginning ' of a character literal
+ ;; as group 7, and the ending one as group 8
+ (seq
+ (group "'")
+ (or
+ (seq
+ "\\"
+ (or
+ (: "U" (= 8 xdigit))
+ (: "u" (= 4 xdigit))
+ (: "x" (= 2 xdigit))
+ (any "'nrt0\"\\")))
+ (not (any "'\\"))
+ )
+ (group "'"))
+ )
)))
(rust-conditional-re-search-forward
- raw-str-regexp bound
- (lambda () (save-excursion
- (goto-char (match-beginning 0))
- (not (rust-in-str-or-cmnt)))))))
+ non-standard-str-regexp bound
+ (lambda ()
+ (let ((pstate (syntax-ppss (match-beginning 0))))
+ (not
+ (or
+ (nth 4 pstate) ;; Skip if in a comment
+ (and (nth 3 pstate) (wholenump (nth 8 pstate)) (< (nth 8 pstate)
(match-beginning 0))) ;; Skip if in a string that isn't starting here
+ )))))))
(defvar rust-mode-font-lock-syntactic-keywords
(append
- ;; Handle single quoted character literals:
- (mapcar (lambda (re) (list re '(1 "\"") '(2 "\"")))
- '("\\('\\)[^']\\('\\)"
- "\\('\\)\\\\['nrt\"\\]\\('\\)"
- "\\('\\)\\\\x[[:xdigit:]]\\{2\\}\\('\\)"
- "\\('\\)\\\\u[[:xdigit:]]\\{4\\}\\('\\)"
- "\\('\\)\\\\U[[:xdigit:]]\\{8\\}\\('\\)"))
- ;; Handle raw strings:
- `((rust-look-for-raw-string (1 "|") (4 "_" nil t) (5 "|" nil t) (6 "|" nil
t)))))
+ ;; Handle raw strings and character literals:
+ `((rust-look-for-non-standard-string (1 "|" nil t) (4 "_" nil t) (5 "|" nil
t) (6 "|" nil t) (7 "\"" nil t) (8 "\"" nil t)))))
(defun rust-mode-syntactic-face-function (state)
"Syntactic face function to distinguish doc comments from other comments."
@@ -768,7 +798,7 @@ This is written mainly to be used as
`end-of-defun-function' for Rust."
(setq-local indent-line-function 'rust-mode-indent-line)
;; Fonts
- (add-to-list 'font-lock-extend-region-functions
'rust-extend-region-raw-string)
+ (add-to-list 'font-lock-extend-region-functions
'rust-font-lock-extend-region)
(setq-local font-lock-defaults '(rust-mode-font-lock-keywords
nil nil nil nil
(font-lock-syntactic-keywords .
rust-mode-font-lock-syntactic-keywords)