gbranden pushed a commit to branch master
in repository groff.

commit b6a737385406f9fd3df4ece0a4814b9fd1a500d9
Author: G. Branden Robinson <[email protected]>
AuthorDate: Thu Dec 25 23:04:21 2025 -0600

    [troff]: Fix Savannah #67734.
    
    * src/roff/troff/input.cpp: Add new Boolean-valued parameter to
      `read_input_until_terminator()`, `want_identifier`, defaulting to
      false, so that we can distinguish callers that want a GNU troff
      identifier from those gathering some other kind of input.  This is so
      that can we can reject (all) C0 control and Latin-1 Supplement
      characters in identifiers.  (C1 controls are already rejected on
      input.)  The prohibition against C0 controls is to make the language
      less tolerant of unreadable input, and the latter is to enable us to
      pivot to reading UTF-8-encoded input in a future release.
    
      (read_input_until_terminator): Update declaration to add new parameter
      with default value.  Update definition to reject, with error
      diagnostic, character codes less than 32 and greater than
      159.  Add assertion that the putative identifier character is not a
      space (character code 32); these have never been valid in *roff
      identifiers.  This function's callers must ensure that the terminator
      precedes any space in the input.
    
    Fixes <https://savannah.gnu.org/bugs/?67734>.
    
    NEWS: Report change.
---
 ChangeLog                | 22 ++++++++++++++++++++++
 NEWS                     |  8 ++++++++
 src/roff/troff/input.cpp | 17 ++++++++++++++---
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 577a2c6e9..d5ed09e02 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2025-12-25  G. Branden Robinson <[email protected]>
+
+       * src/roff/troff/input.cpp: Add new Boolean-valued parameter to
+       `read_input_until_terminator()`, `want_identifier`, defaulting
+       to false, so that we can distinguish callers that want a GNU
+       troff identifier from those gathering some other kind of input.
+       This is so that can we can reject (all) C0 control and Latin-1
+       Supplement characters in identifiers.  (C1 controls are already
+       rejected on input.)  The prohibition against C0 controls is to
+       make the language less tolerant of unreadable input, and the
+       latter is to enable us to pivot to reading UTF-8-encoded input
+       in a future release.
+       (read_input_until_terminator): Update declaration to add new
+       parameter with default value.  Update definition to reject, with
+       error diagnostic, character codes less than 32 and greater than
+       159.  Add assertion that the putative identifier character is
+       not a space (character code 32); these have never been valid in
+       *roff identifiers.  This function's callers must ensure that the
+       terminator precedes any space in the input.
+
+       Fixes <https://savannah.gnu.org/bugs/?67734>.
+
 2025-12-26  G. Branden Robinson <[email protected]>
 
        * src/roff/troff/input.cpp (read_delimited_measurement): Make
diff --git a/NEWS b/NEWS
index 886d45d03..34afd2ea0 100644
--- a/NEWS
+++ b/NEWS
@@ -125,6 +125,14 @@ troff
    escape sequence to serve as delimiter to another delimited escape
    sequence.
 
+*  GNU troff no longer accepts C0 controls or Latin-1 Supplement
+   characters in identifiers.  We prohibit C0 controls to make the
+   language less tolerant of unreadable input, and Latin-1 Supplement
+   code points to enable us to pivot to reading UTF-8-encoded input in a
+   future release.  (Thus, we plan for Latin-1 Supplement characters to
+   again be accepted in identifiers, but only as components of multibyte
+   UTF-8 sequences.)
+
 *  The `color`, `cp`, `kern`, `linetabs`, and `vpt` requests now
    interpret arguments with negative values as instructions to disable
    the corresponding feature, using the *roff integer-to-Boolean
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 183633534..be4afcded 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -178,7 +178,7 @@ static bool read_delimited_measurement(units * /* n */,
 static bool read_delimited_measurement(units * /* n */,
     unsigned char /* si */, units /* prev_value */);
 static symbol read_input_until_terminator(bool /* required */,
-    unsigned char /* end_char */);
+    unsigned char /* end_char */, bool /* want_identifier */ = false);
 static bool read_line_rule_expression(units * /* res */,
     unsigned char /* si */, charinfo ** /* cp */);
 static bool read_size(int *);
@@ -3191,13 +3191,15 @@ symbol read_identifier(bool required)
 
 symbol read_long_identifier(bool required)
 {
-  return read_input_until_terminator(required, 0U);
+  return read_input_until_terminator(required, 0U,
+                                    true /* want identifier */);
 }
 
 // Read bytes from input until reaching a null byte or the specified
 // `end_char`; construct and return a `symbol` object therefrom.
 static symbol read_input_until_terminator(bool required,
-                                         unsigned char end_char)
+                                         unsigned char end_char,
+                                         bool want_identifier)
 {
   tok.skip_spaces();
   int buf_size = default_buffer_size;
@@ -3233,6 +3235,15 @@ static symbol read_input_until_terminator(bool required,
     buf[i] = tok.ch();
     if ((0U == buf[i]) || (terminator == buf[i]))
       break;
+    else if (want_identifier && ((buf[i] < ' ') || (buf[i] > 159))) {
+      // Of C0 controls, Solaris, Heirloom, and Plan 9 troff support
+      // ^[BCEFG] (only) in identifiers.  DWB 3.3 supports none.
+      assert(buf[i] != ' '); // ensure caller handled spaces
+      error("character code %1 is not allowed in an identifier",
+           static_cast<int>(buf[i]));
+      delete[] buf;
+      return NULL_SYMBOL;
+    }
     i++;
     tok.next();
   }

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to