Control: tags 917314 patch
Control: tags 917398 patch

I'm using the attached patch formed by various patches from
the emacs-26 branch and others posted upstream[*]. It fixes
both bugs 917314 and 917398 and some other issues.

[*] https://debbugs.gnu.org/cgi/bugreport.cgi?bug=18871
    https://debbugs.gnu.org/cgi/bugreport.cgi?bug=32823
    https://debbugs.gnu.org/cgi/bugreport.cgi?bug=33887

It includes:
  ca14dd1d4628094dd33d5d94694dcf5f29e843b8 (emacs-26 branch)
  7dab3ee7ab54b3c2e7bc24170376054786c01d6f (emacs-26 branch)
  3a1a36b0b42772f35c70fb7e996ba8fed787e1c2 [1]
  a866e4f4b556fb4a346fa68c62296f10966690a1 [2]
  e4cde42657f8f91f795e6b7041dc50b896dc468d (emacs-26 branch)
  b8c659dc0c6ef41adc19db3c1884f73c1a10c8d8 [3]
  2025fa25f76fd8a2df46fca8807ca386372757d5 [4]
  d1520ab5b94d0f130955800ea11222a3702a5519 [5]
  0c3e6a97f92dec31e7e186dae933c86700034089 [6]
  290bbf4341c58339dafe46ed87b1979115306556 [7]

[1] 0001-Backport-sgml-syntax-propertize-rules-speedup-Bug-33.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=33887#58

[2] 0001-Fix-sgml-syntax-handling-of-quotes-in-comments.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=33887#73

[3] 0001-Keep-nxml-prolog-end-up-to-date-Bug-18871.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=18871#30

[4] 0001-Handle-lone-quote-500-characters-away-from-XML-tag-B.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=33887#94

[5] 0002-Handle-outside-SGML-XML-tags-Bug-33887.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=33887#94

[6] 0001-Don-t-fontify-text-outside-of-SGML-XML-tags-Bug-3388.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=33887#124

[7] 0001-Don-t-sgml-syntax-propertize-inside-XML-prolog-Bug-3.patch
    at https://debbugs.gnu.org/cgi/bugreport.cgi?bug=32823#39

-- 
Vincent Lefèvre <vinc...@vinc17.net> - Web: <https://www.vinc17.net/>
100% accessible validated (X)HTML - Blog: <https://www.vinc17.net/blog/>
Work: CR INRIA - computer arithmetic / AriC project (LIP, ENS-Lyon)
Index: emacs-26.1+1/lisp/nxml/nxml-rap.el
===================================================================
--- emacs-26.1+1.orig/lisp/nxml/nxml-rap.el
+++ emacs-26.1+1/lisp/nxml/nxml-rap.el
@@ -35,35 +35,25 @@
 ;;
 ;; Our strategy is to keep track of just the problematic things.
 ;; Specifically, we keep track of all comments, CDATA sections and
-;; processing instructions in the instance.  We do this by marking all
-;; except the first character of these with a non-nil nxml-inside text
-;; property. The value of the nxml-inside property is comment,
-;; cdata-section or processing-instruction.  The first character does
-;; not have the nxml-inside property so we can find the beginning of
-;; the construct by looking for a change in a text property value
-;; (Emacs provides primitives for this).  We use text properties
-;; rather than overlays, since the implementation of overlays doesn't
-;; look like it scales to large numbers of overlays in a buffer.
-;;
-;; We don't in fact track all these constructs, but only track them in
-;; some initial part of the instance.
+;; processing instructions in the instance.  We do this by marking
+;; the first character of these with the generic string syntax by setting
+;; a 'syntax-table' text property in `sgml-syntax-propertize'.
 ;;
 ;; Thus to parse some random point in the file we first ensure that we
-;; have scanned up to that point.  Then we search backwards for a
-;; <. Then we check whether the < has an nxml-inside property. If it
-;; does we go backwards to first character that does not have an
-;; nxml-inside property (this character must be a <).  Then we start
-;; parsing forward from the < we have found.
+;; have scanned up to that point.  Then we search backwards for a <.
+;; Then we check whether the < has the generic string syntax.  If it
+;; does we go backwards to first character of the generic string (this
+;; character must be a <).  Then we start parsing forward from the <
+;; we have found.
 ;;
 ;; The prolog has to be parsed specially, so we also keep track of the
 ;; end of the prolog in `nxml-prolog-end'. The prolog is reparsed on
 ;; every change to the prolog.  This won't work well if people try to
 ;; edit huge internal subsets. Hopefully that will be rare.
 ;;
-;; We keep track of the changes by adding to the buffer's
-;; after-change-functions hook.  Scanning is also done as a
-;; prerequisite to fontification by adding to fontification-functions
-;; (in the same way as jit-lock).  This means that scanning for these
+;; We rely on the `syntax-propertize-function' machinery to keep track
+;; of the changes in the buffer.  Fontification also relies on correct
+;; `syntax-table' properties.  This means that scanning for these
 ;; constructs had better be quick.  Fortunately it is. Firstly, the
 ;; typical proportion of comments, CDATA sections and processing
 ;; instructions is small relative to other things.  Secondly, to scan
@@ -79,7 +69,15 @@
   "Integer giving position following end of the prolog.")
 
 (defsubst nxml-get-inside (pos)
-  (save-excursion (nth 8 (syntax-ppss pos))))
+  "Return non-nil if inside comment, CDATA, or PI."
+  (let ((ppss (save-excursion (syntax-ppss pos))))
+    (or
+     ;; Inside comment.
+     (nth 4 ppss)
+     ;; Inside "generic" string which is used for CDATA, and PI.
+     ;; "Normal" double and single quoted strings are used for
+     ;; attribute values.
+     (eq t (nth 3 ppss)))))
 
 (defun nxml-inside-end (pos)
   "Return the end of the inside region containing POS.
@@ -110,6 +108,12 @@ Return nil if the character at POS is no
     (setq nxml-prolog-regions (xmltok-forward-prolog))
     (setq nxml-prolog-end (point))))
 
+(defun nxml-maybe-rescan-prolog (start _end _length)
+  "Reparse the prolog if START lies within it.
+`nxml-mode' adds this function on `after-change-functions'."
+  (when (<= start nxml-prolog-end)
+    (save-excursion
+      (nxml-scan-prolog))))
 
 ;;; Random access parsing
 
Index: emacs-26.1+1/test/lisp/nxml/nxml-mode-tests.el
===================================================================
--- /dev/null
+++ emacs-26.1+1/test/lisp/nxml/nxml-mode-tests.el
@@ -0,0 +1,125 @@
+;;; nxml-mode-tests.el --- Test NXML Mode -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2019 Free Software Foundation, Inc.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+(require 'nxml-mode)
+
+(defun nxml-mode-tests-correctly-indented-string (str)
+  (with-temp-buffer
+    (nxml-mode)
+    (insert str)
+    (indent-region (point-min) (point-max))
+    (equal (buffer-string) str)))
+
+(ert-deftest nxml-indent-line-after-attribute ()
+  (should (nxml-mode-tests-correctly-indented-string "
+<settings
+    xmlns=\"http://maven.apache.org/SETTINGS/1.0.0\";
+    xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\";
+    xsi:schemaLocation=\"http://maven.apache.org/SETTINGS/1.0.0
+                        https://maven.apache.org/xsd/settings-1.0.0.xsd\";>
+  <mirrors>
+    ...
+  </mirrors>
+</settings>
+"))
+  (should (nxml-mode-tests-correctly-indented-string "\
+<x>
+  <abc xx=\"x/x/x/x/x/x/x/
+           y/y/y/y/y/y/
+           \">
+    <zzz/>
+  </abc>
+  <nl>&#10;</nl>
+</x>
+")))
+
+(ert-deftest nxml-balanced-close-start-tag-inline ()
+  (with-temp-buffer
+    (nxml-mode)
+    (insert "<a><b c=\"\"</a>")
+    (search-backward "</a>")
+    (nxml-balanced-close-start-tag-inline)
+    (should (equal (buffer-string) "<a><b c=\"\"></b></a>"))))
+
+(ert-deftest nxml-mode-font-lock-quotes ()
+  (with-temp-buffer
+    (nxml-mode)
+    (insert "<x a=\"dquote attr\" b='squote attr'>\"dquote text\"'squote 
text'</x>")
+    (font-lock-ensure)
+    (let ((squote-txt-pos (search-backward "squote text"))
+          (dquote-txt-pos (search-backward "dquote text"))
+          (squote-att-pos (search-backward "squote attr"))
+          (dquote-att-pos (search-backward "dquote attr")))
+      ;; Just make sure that each quote uses the same face for quoted
+      ;; attribute values, and a different face for quoted text
+      ;; outside tags.  Don't test `font-lock-string-face' vs
+      ;; `nxml-attribute-value' here.
+      (should (equal (get-text-property squote-att-pos 'face)
+                     (get-text-property dquote-att-pos 'face)))
+      (should (equal (get-text-property squote-txt-pos 'face)
+                     (get-text-property dquote-txt-pos 'face)))
+      (should-not (equal (get-text-property squote-txt-pos 'face)
+                         (get-text-property dquote-att-pos 'face))))))
+
+(ert-deftest nxml-mode-doctype-and-quote-syntax ()
+  (with-temp-buffer
+    (insert "<!DOCTYPE t [\n<!ENTITY f SYSTEM \"f.xml\">\n]>\n<t>'</t>")
+    (nxml-mode)
+    ;; Check that last tag is parsed as a tag.
+    (should (= 1 (car (syntax-ppss (1- (point-max))))))
+    (should (= 0 (car (syntax-ppss (point-max)))))))
+
+(ert-deftest nxml-mode-prolog-comment ()
+  (with-temp-buffer
+    (insert "<?xml version=\"1.0\" encoding=\"utf-8\"?><!-- comment1 -->
+<t><!-- comment2 --></t><!-- comment3 -->")
+    (nxml-mode)
+    ;; Check that all comments are parsed as comments
+    (goto-char (point-min))
+    (search-forward "comment1")
+    (should (nth 4 (syntax-ppss)))
+    (search-forward "comment2")
+    (should (nth 4 (syntax-ppss)))
+    (search-forward "comment3")))
+
+(ert-deftest nxml-mode-quote-in-long-text ()
+  (with-temp-buffer
+    (nxml-mode)
+    (insert "<t>"
+            ;; `syntax-propertize-wholelines' extends chunk size based
+            ;; on line length, so newlines are significant!
+            (make-string syntax-propertize-chunk-size ?a) "\n"
+            "'"
+            (make-string syntax-propertize-chunk-size ?a) "\n"
+            "</t>")
+    ;; If we just check (syntax-ppss (point-max)) immediately, then
+    ;; we'll end up propertizing the whole buffer in one chunk (so the
+    ;; test is useless).  Simulate something more like what happens
+    ;; when the buffer is viewed normally.
+    (cl-loop for pos from (point-min) to (point-max)
+             by syntax-propertize-chunk-size
+             do (syntax-ppss pos))
+    (syntax-ppss (point-max))
+    ;; Check that last tag is parsed as a tag.
+    (should (= 1 (- (car (syntax-ppss (1- (point-max))))
+                    (car (syntax-ppss (point-max))))))))
+
+(provide 'nxml-mode-tests)
+;;; nxml-mode-tests.el ends here
Index: emacs-26.1+1/lisp/textmodes/sgml-mode.el
===================================================================
--- emacs-26.1+1.orig/lisp/textmodes/sgml-mode.el
+++ emacs-26.1+1/lisp/textmodes/sgml-mode.el
@@ -100,24 +100,20 @@ a DOCTYPE or an XML declaration."
   :group 'sgml
   :type 'hook)
 
-;; As long as Emacs's syntax can't be complemented with predicates to context
-;; sensitively confirm the syntax of characters, we have to live with this
-;; kludgy kind of tradeoff.
-(defvar sgml-specials '(?\")
+;; The official handling of "--" is complicated in SGML, and
+;; historically not well supported by browser HTML parsers.
+;; Recommendations for writing HTML comments is to use <!--...-->
+;; (where ... doesn't contain "--") to avoid the complications
+;; altogether (XML goes even further by requiring this in the spec).
+;; So there is probably no need to handle it "correctly".
+(defvar sgml-specials '(?\" ?\')
   "List of characters that have a special meaning for SGML mode.
 This list is used when first loading the `sgml-mode' library.
-The supported characters and potential disadvantages are:
+The supported characters are ?\\\", ?\\=', and ?-.
 
-  ?\\\"        Makes \" in text start a string.
-  ?\\='        Makes \\=' in text start a string.
-  ?-   Makes -- in text start a comment.
-
-When only one of ?\\\" or ?\\=' are included, \"\\='\" or \\='\"\\=', as can 
be found in
-DTDs, start a string.  To partially avoid this problem this also makes these
-self insert as named entities depending on `sgml-quick-keys'.
-
-Including ?- has the problem of affecting dashes that have nothing to do
-with comments, so we normally turn it off.")
+Including ?- makes double dashes into comment delimiters, but
+they are really only supposed to delimit comments within DTD
+definitions.  So we normally turn it off.")
 
 (defvar sgml-quick-keys nil
   "Use <, >, &, /, SPC and `sgml-specials' keys \"electrically\" when non-nil.
@@ -341,6 +337,12 @@ Any terminating `>' or `/' is not matche
 (defvar sgml-font-lock-keywords sgml-font-lock-keywords-1
   "Rules for highlighting SGML code.  See also `sgml-tag-face-alist'.")
 
+(defun sgml-font-lock-syntactic-face (state)
+  "`font-lock-syntactic-face-function' for `sgml-mode'."
+  ;; Don't use string face outside of tags.
+  (cond ((and (nth 9 state) (nth 3 state)) font-lock-string-face)
+        ((nth 4 state) font-lock-comment-face)))
+
 (eval-and-compile
   (defconst sgml-syntax-propertize-rules
     (syntax-propertize-precompile-rules
@@ -351,12 +353,33 @@ Any terminating `>' or `/' is not matche
      ("--[ \t\n]*\\(>\\)" (1 "> b"))
      ("\\(<\\)[?!]" (1 (prog1 "|>"
                          (sgml-syntax-propertize-inside end))))
-     ;; Double quotes outside of tags should not introduce strings.
-     ;; Be careful to call `syntax-ppss' on a position before the one we're
-     ;; going to change, so as not to need to flush the data we just computed.
-     ("\"" (0 (if (prog1 (zerop (car (syntax-ppss (match-beginning 0))))
-                    (goto-char (match-end 0)))
-                  (string-to-syntax ".")))))))
+     ;; Quotes outside of tags should not introduce strings which end up
+     ;; hiding tags.  We used to test every quote and mark it as "."
+     ;; if it's outside of tags, but there are too many quotes and
+     ;; the resulting number of calls to syntax-ppss made it too slow
+     ;; (bug#33887), so we're now careful to leave alone any pair
+     ;; of quotes that doesn't hold a < or > char, which is the vast majority.
+     ;; We also check quotes which are unpaired to end of line,
+     ;; otherwise we miss the case where the quote might "contain" an
+     ;; angle bracket outside of the current syntax-propertize chunk
+     ;; (this relies on `syntax-propertize-wholelines' being enabled).
+     ("\\([\"']\\)[^<>\"']*\\([<>\"']\\|$\\)"
+      (1 (unless (eq (char-after (match-beginning 1))
+                     (char-after (match-beginning 2)))
+           ;; Be careful to call `syntax-ppss' on a position before the one
+           ;; we're going to change, so as not to need to flush the data we
+           ;; just computed.
+           (let ((ppss (syntax-ppss (match-beginning 0))))
+             ;; Can't rely on depth (nth 0 ppss), because we don't
+             ;; mark ">" outside of tags.
+             (if (prog1 (null (nth 9 ppss)) ; Outside tag.
+                   (goto-char (1- (match-end 0)))
+                   ;; If we're in a comment, don't skip over comment
+                   ;; ender.
+                   (when (nth 4 ppss)
+                     (skip-chars-backward "- \t\n")))
+                (string-to-syntax "."))))))
+     )))
 
 (defun sgml-syntax-propertize (start end)
   "Syntactic keywords for `sgml-mode'."
@@ -552,7 +575,7 @@ Do \\[describe-key] on the following bin
   ;; This is desirable because SGML discards a newline that appears
   ;; immediately after a start tag or immediately before an end tag.
   (setq-local paragraph-start (concat "[ \t]*$\\|\
-[ \t]*</?\\(" sgml-name-re sgml-attrs-re "\\)?>"))
+\[ \t]*</?\\(" sgml-name-re sgml-attrs-re "\\)?>"))
   (setq-local paragraph-separate (concat paragraph-start "$"))
   (setq-local adaptive-fill-regexp "[ \t]*")
   (add-hook 'fill-nobreak-predicate 'sgml-fill-nobreak nil t)
@@ -570,7 +593,9 @@ Do \\[describe-key] on the following bin
   (setq font-lock-defaults '((sgml-font-lock-keywords
                              sgml-font-lock-keywords-1
                              sgml-font-lock-keywords-2)
-                            nil t))
+                             nil t nil
+                             (font-lock-syntactic-face-function
+                              . sgml-font-lock-syntactic-face)))
   (setq-local syntax-propertize-function #'sgml-syntax-propertize)
   (setq-local facemenu-add-face-function 'sgml-mode-facemenu-add-face-function)
   (setq-local sgml-xml-mode (sgml-xml-guess))
Index: emacs-26.1+1/test/lisp/textmodes/sgml-mode-tests.el
===================================================================
--- emacs-26.1+1.orig/test/lisp/textmodes/sgml-mode-tests.el
+++ emacs-26.1+1/test/lisp/textmodes/sgml-mode-tests.el
@@ -125,11 +125,29 @@ The point is set to the beginning of the
      (should (string= content (buffer-string))))))
 
 (ert-deftest sgml-delete-tag-bug-8203-should-not-delete-apostrophe ()
-  :expected-result :failed
   (sgml-with-content
    "<title>Winter is comin'</title>"
    (sgml-delete-tag 1)
    (should (string= "Winter is comin'" (buffer-string)))))
 
+(ert-deftest sgml-tests--quotes-syntax ()
+  (dolist (str '("a\"b <t>c'd</t>"
+                 "a'b <t>c\"d</t>"
+                 "<t>\"a'</t>"
+                 "<t>'a\"</t>"
+                 "<t>\"a'\"</t>"
+                 "<t>'a\"'</t>"
+                 "<t><!-- ' --></t>"
+                 "<t><!-- \" --></t>"
+                 ;; Yes, ">" is technically valid outside tags!
+                 "<t>>'</t>"
+                 ))
+    (ert-info (str :prefix "Test string: ")
+      (sgml-with-content
+       str
+       ;; Check that last tag is parsed as a tag.
+       (should (= 1 (- (car (syntax-ppss (1- (point-max))))
+                       (car (syntax-ppss (point-max))))))))))
+
 (provide 'sgml-mode-tests)
 ;;; sgml-mode-tests.el ends here
Index: emacs-26.1+1/lisp/nxml/nxml-mode.el
===================================================================
--- emacs-26.1+1.orig/lisp/nxml/nxml-mode.el
+++ emacs-26.1+1/lisp/nxml/nxml-mode.el
@@ -423,6 +423,30 @@ reference.")
     (when rng-validate-mode
       (rng-validate-while-idle (current-buffer)))))
 
+(defvar nxml-prolog-end) ;; nxml-rap.el
+
+(defun nxml-syntax-propertize (start end)
+  "Syntactic keywords for `nxml-mode'."
+  ;; Like `sgml-syntax-propertize', but handle `nxml-prolog-regions'.
+  (when (< start nxml-prolog-end)
+    (catch 'done-prolog
+      (dolist (prolog-elem nxml-prolog-regions)
+        (let ((type (aref prolog-elem 0))
+              (pbeg (aref prolog-elem 1))
+              (pend (aref prolog-elem 2)))
+          (when (eq type 'comment)
+            (put-text-property pbeg (1+ pbeg)
+                               'syntax-table (string-to-syntax "< b"))
+            (put-text-property (1- pend) pend
+                               'syntax-table (string-to-syntax "> b")))
+          (when (> pend end)
+            (throw 'done-prolog t)))))
+    (setq start nxml-prolog-end))
+  (if (>= start end)
+      (goto-char end)
+    (goto-char start)
+    (sgml-syntax-propertize start end)))
+
 (defvar tildify-space-string)
 (defvar tildify-foreach-region-function)
 
@@ -518,8 +542,9 @@ Many aspects this mode can be customized
        (nxml-with-invisible-motion
          (nxml-scan-prolog)))))
   (setq-local syntax-ppss-table sgml-tag-syntax-table)
-  (setq-local syntax-propertize-function #'sgml-syntax-propertize)
+  (setq-local syntax-propertize-function #'nxml-syntax-propertize)
   (add-hook 'change-major-mode-hook #'nxml-cleanup nil t)
+  (add-hook 'after-change-functions #'nxml-maybe-rescan-prolog nil t)
 
   ;; Emacs 23 handles the encoding attribute on the xml declaration
   ;; transparently to nxml-mode, so there is no longer a need for the below
@@ -540,7 +565,9 @@ Many aspects this mode can be customized
           nil  ; no special syntax table
           (font-lock-extend-region-functions . (nxml-extend-region))
           (jit-lock-contextually . t)
-          (font-lock-unfontify-region-function . nxml-unfontify-region)))
+          (font-lock-unfontify-region-function . nxml-unfontify-region)
+          (font-lock-syntactic-face-function
+           . sgml-font-lock-syntactic-face)))
 
   (with-demoted-errors (rng-nxml-mode-init)))
 
@@ -2378,7 +2405,9 @@ With a prefix argument, inserts the char
 (put 'nxml-char-ref 'evaporate t)
 
 (defun nxml-char-ref-display-extra (start end n)
-  (when nxml-char-ref-extra-display
+  (when (and ;; Displaying literal newline is unhelpful.
+         (not (eql n ?\n))
+         nxml-char-ref-extra-display)
     (let ((name (or (get-char-code-property n 'name)
                     (get-char-code-property n 'old-name)))
          (glyph-string (and nxml-char-ref-display-glyph-flag
Index: emacs-26.1+1/lisp/font-lock.el
===================================================================
--- emacs-26.1+1.orig/lisp/font-lock.el
+++ emacs-26.1+1/lisp/font-lock.el
@@ -527,9 +527,12 @@ If nil, this is ignored, in which case t
 sometimes be slightly incorrect.")
 (make-variable-buffer-local 'font-lock-syntactically-fontified)
 
+(defun font-lock-syntactic-face-function-default (state)
+  "Default value for `font-lock-syntactic-face-function'."
+  (if (nth 3 state) font-lock-string-face font-lock-comment-face))
+
 (defvar font-lock-syntactic-face-function
-  (lambda (state)
-    (if (nth 3 state) font-lock-string-face font-lock-comment-face))
+  #'font-lock-syntactic-face-function-default
   "Function to determine which face to use when fontifying syntactically.
 The function is called with a single parameter (the state as returned by
 `parse-partial-sexp' at the beginning of the region to highlight) and

Reply via email to