gbranden pushed a commit to branch master
in repository groff.

commit 9896f5d7c4cf80177741ae64aee217a89ab96e4f
Author: G. Branden Robinson <[email protected]>
AuthorDate: Sun Apr 19 09:34:18 2026 -0500

    [troff]: Refactor, adding token::is_terminator().
    
    * src/roff/troff/token.h (class token): Add new member function,
      `is_terminator()` returning `bool`.  The idea is to make handlers of
      formatter instructions more consistent by having one source of truth
      regarding whether, in the parlance of parser theory, the token on the
      input stream terminates a "nonterminal symbol" (like an identifier or
      numeric expression, which can comprise multiple "terminal" or
      "primitive" tokens).  Right now, GNU troff pretty consistently regards
      newline and EOF tokens as terminating a sequence of such nonterminals.
      It's less consistent regarding whether a right brace escape sequence
      does, causing inconsistency in its handling of numeric expressions as
      arguments to `nr` requests on the one hand and "contents" arguments to
      `ds` requests on the other, and divergence between AT&T and GNU troff
      parsers.  See Savannah #68257.  (token::is_terminator): Define it
      `inline`.  Equivalent to `tok.is_newline() && tok.is_eof()`.
    
    * src/roff/troff/div.cpp (begin_page_request)
      (space_request)
      (need_vertical_space_request)
      (save_vertical_space_request)
      (output_saved_vertical_space_request)
      (flush_request):
    * src/roff/troff/env.cpp (fill)
      (no_fill)
      (center)
      (right_justify)
      (indent)
      (temporary_indent)
      (number_lines)
      (do_break_request):
    * src/roff/troff/input.cpp (diagnose_missing_identifier)
      (diagnose_invalid_identifier)
      (flush_pending_lines)
      (decode_macro_call_arguments)
      (remove_character)
      (do_define_macro)
      (read_title_parts)
      (ps_bbox_request)
      (do_translate)
      (hyphenation_patterns_file_code)
      (define_class_request)
      (read_drawing_command):
    * src/roff/troff/node.cpp (remove_font_specific_character_request):
      Migrate input handlers to call `token::is_terminator()` rather than
      the evaluating Boolean expression `tok.is_newline() && tok.is_eof()`.
---
 ChangeLog                | 50 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/roff/troff/div.cpp   | 10 +++++-----
 src/roff/troff/env.cpp   | 20 +++++++++----------
 src/roff/troff/input.cpp | 26 ++++++++++++-------------
 src/roff/troff/node.cpp  |  2 +-
 src/roff/troff/token.h   |  9 +++++++++
 6 files changed, 88 insertions(+), 29 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 954440696..0d55a93ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,53 @@
+2026-04-19  G. Branden Robinson <[email protected]>
+
+       * src/roff/troff/token.h (class token): Add new member function,
+       `is_terminator()` returning `bool`.  The idea is to make
+       handlers of formatter instructions more consistent by having one
+       source of truth regarding whether, in the parlance of parser
+       theory, the token on the input stream terminates a "nonterminal
+       symbol" (like an identifier or numeric expression, which can
+       comprise multiple "terminal" or "primitive" tokens).  Right now,
+       GNU troff pretty consistently regards newline and EOF tokens as
+       terminating a sequence of such nonterminals.  It's less
+       consistent regarding whether a right brace escape sequence does,
+       causing inconsistency in its handling of numeric expressions as
+       arguments to `nr` requests on the one hand and "contents"
+       arguments to `ds` requests on the other, and divergence between
+       AT&T and GNU troff parsers.  See Savannah #68257.
+       (token::is_terminator): Define it `inline`.  Equivalent to
+       `tok.is_newline() && tok.is_eof()`.
+
+       * src/roff/troff/div.cpp (begin_page_request)
+       (space_request)
+       (need_vertical_space_request)
+       (save_vertical_space_request)
+       (output_saved_vertical_space_request)
+       (flush_request):
+       * src/roff/troff/env.cpp (fill)
+       (no_fill)
+       (center)
+       (right_justify)
+       (indent)
+       (temporary_indent)
+       (number_lines)
+       (do_break_request):
+       * src/roff/troff/input.cpp (diagnose_missing_identifier)
+       (diagnose_invalid_identifier)
+       (flush_pending_lines)
+       (decode_macro_call_arguments)
+       (remove_character)
+       (do_define_macro)
+       (read_title_parts)
+       (ps_bbox_request)
+       (do_translate)
+       (hyphenation_patterns_file_code)
+       (define_class_request)
+       (read_drawing_command):
+       * src/roff/troff/node.cpp
+       (remove_font_specific_character_request): Migrate input handlers
+       to call `token::is_terminator()` rather than the evaluating
+       Boolean expression `tok.is_newline() && tok.is_eof()`.
+
 2026-04-19  G. Branden Robinson <[email protected]>
 
        * src/roff/groff/tests/dt-request-works.sh: Add test case: a
diff --git a/src/roff/troff/div.cpp b/src/roff/troff/div.cpp
index 60359c163..ceb383a18 100644
--- a/src/roff/troff/div.cpp
+++ b/src/roff/troff/div.cpp
@@ -797,7 +797,7 @@ static void begin_page_request() // .bp
   if (has_arg()
       && read_integer_crement(&n, topdiv->get_page_number()))
     got_arg = true;
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (curdiv == topdiv) {
     if (topdiv->before_first_page_status > 0) {
@@ -883,7 +883,7 @@ static void space_request() // .sp
   vunits n;
   if (!has_arg() || !read_vunits(&n, 'v'))
     n = curenv->get_vertical_spacing();
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (!unpostpone_traps() && !curdiv->is_in_no_space_mode)
     curdiv->space(n);
@@ -911,7 +911,7 @@ static void need_vertical_space_request() // .ne
   vunits n;
   if (!has_arg() || !read_vunits(&n, 'v'))
     n = curenv->get_vertical_spacing();
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   curdiv->need(n);
   tok.next();
@@ -947,7 +947,7 @@ static void save_vertical_space_request() // .sv
 
 static void output_saved_vertical_space_request() // .os
 {
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (saved_space > V0)
     curdiv->space(saved_space, true /* forcing */);
@@ -957,7 +957,7 @@ static void output_saved_vertical_space_request() // .os
 
 static void flush_request() // .fl
 {
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break();
diff --git a/src/roff/troff/env.cpp b/src/roff/troff/env.cpp
index 3d7029355..b0027304d 100644
--- a/src/roff/troff/env.cpp
+++ b/src/roff/troff/env.cpp
@@ -1499,7 +1499,7 @@ static void space_size() // .ss
 
 static void fill() // .fi
 {
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break();
@@ -1509,7 +1509,7 @@ static void fill() // .fi
 
 static void no_fill() // .nf
 {
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break();
@@ -1532,7 +1532,7 @@ void center() // .ce
     n = 1;
   else if (n < 0)
     n = 0;
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break();
@@ -1553,7 +1553,7 @@ void right_justify() // .rj
     n = 1;
   else if (n < 0)
     n = 0;
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break();
@@ -1666,7 +1666,7 @@ void indent() // .in
   }
   else
     temp = curenv->prev_indent;
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break();
@@ -1702,7 +1702,7 @@ void temporary_indent() // .ti
     if (!read_hunits(&temp, 'm', curenv->get_indent()))
       is_valid = false;
     // XXX: Why not `skip_line()`?
-    while (!tok.is_newline() && !tok.is_eof())
+    while (!tok.is_terminator())
       tok.next();
   }
   if (was_invoked_with_regular_control_character)
@@ -1816,7 +1816,7 @@ void number_lines() // .nm
       }
     }
     else
-      while (!tok.is_space() && !tok.is_newline() && !tok.is_eof())
+      while (!tok.is_space() && !tok.is_terminator())
        tok.next();
     if (has_arg()) {
       if (!tok.is_usable_as_delimiter()) { // XXX abuse of function
@@ -1830,7 +1830,7 @@ void number_lines() // .nm
        }
       }
       else
-       while (!tok.is_space() && !tok.is_newline() && !tok.is_eof())
+       while (!tok.is_space() && !tok.is_terminator())
          tok.next();
       if (has_arg()) {
        if (!tok.is_usable_as_delimiter()) { // XXX abuse of function
@@ -1838,7 +1838,7 @@ void number_lines() // .nm
            curenv->number_text_separation = n;
        }
        else
-         while (!tok.is_space() && !tok.is_newline() && !tok.is_eof())
+         while (!tok.is_space() && !tok.is_terminator())
            tok.next();
        if (has_arg() && !tok.is_usable_as_delimiter() // XXX abuse of function
            && read_integer(&n))
@@ -2736,7 +2736,7 @@ bool environment::is_empty()
 
 void do_break_request(bool want_adjustment)
 {
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   if (was_invoked_with_regular_control_character)
     curenv->do_break(want_adjustment);
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 5e93e032a..01cf11d5c 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -3236,7 +3236,7 @@ static void diagnose_missing_identifier(bool is_mandatory)
       tok.next();
     } while (tok.is_space() || tok.is_right_brace() || tok.is_tab());
     // XXX: unreachable code? --GBR
-    if (!tok.is_newline() && !tok.is_eof())
+    if (!tok.is_terminator())
       error("%1 is not allowed before an argument", start);
     else if (is_mandatory)
       warning(WARN_MISSING, "missing identifier");
@@ -3251,7 +3251,7 @@ static void diagnose_missing_identifier(bool is_mandatory)
 
 static void diagnose_invalid_identifier()
 {
-  if (!tok.is_newline() && !tok.is_eof() && !tok.is_space()
+  if (!tok.is_terminator() && !tok.is_space()
       && !tok.is_tab() && !tok.is_right_brace()
       // We don't want to give a warning for .el\{
       && !tok.is_left_brace())
@@ -3867,7 +3867,7 @@ void process_input_stack()
 
 void flush_pending_lines()
 {
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   curenv->output_pending_lines();
   tok.next();
@@ -4740,7 +4740,7 @@ static void interpolate_macro_or_invoke_request(symbol nm,
 
 static void decode_macro_call_arguments(macro_iterator *mi)
 {
-  if (!tok.is_newline() && !tok.is_eof()) {
+  if (!tok.is_terminator()) {
     node *n;
     int c = read_character_in_copy_mode(&n);
     for (;;) {
@@ -5340,7 +5340,7 @@ static void remove_character()
     skip_line();
     return;
   }
-  while (!tok.is_newline() && !tok.is_eof()) {
+  while (!tok.is_terminator()) {
     if (!tok.is_space() && !tok.is_tab()) {
       if (tok.is_any_character()) {
        charinfo *ci = tok.get_charinfo(true /* is_mandatory */,
@@ -5557,7 +5557,7 @@ static void do_define_macro(define_mode mode, 
calling_mode calling,
   term = read_identifier(); // terminating name
   if (term.is_null())
     term = dot_symbol;
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
   const char *start_filename;
   int start_lineno;
@@ -6595,7 +6595,7 @@ void read_title_parts(node **part, hunits *part_width)
   int start_level = input_stack::get_level();
   tok.next();
   for (int i = 0; i < 3; i++) {
-    while (!tok.is_newline() && !tok.is_eof()) {
+    while (!tok.is_terminator()) {
       if ((tok == start)
          && (want_att_compat
              || input_stack::get_level() == start_level)) {
@@ -6618,7 +6618,7 @@ void read_title_parts(node **part, hunits *part_width)
     part_width[i] = curenv->get_input_line_position();
     part[i] = curenv->extract_output_line();
   }
-  while (!tok.is_newline() && !tok.is_eof())
+  while (!tok.is_terminator())
     tok.next();
 }
 
@@ -8089,7 +8089,7 @@ void ps_bbox_request() // .psbb
   else {
     // File name acquired: swallow the rest of the line.
     //
-    while (!tok.is_newline() && !tok.is_eof())
+    while (!tok.is_terminator())
       tok.next();
     errno = 0;
 
@@ -8705,7 +8705,7 @@ static void init_hpf_code_table()
 static void do_translate(bool transparently, bool as_input)
 {
   tok.skip_spaces();
-  while (!tok.is_newline() && !tok.is_eof()) {
+  while (!tok.is_terminator()) {
     if (tok.is_space()) {
       // This is a really bizarre troff feature.
       tok.next();
@@ -8922,7 +8922,7 @@ void hyphenation_patterns_file_code() // .hpfcode
     skip_line();
     return;
   }
-  while (!tok.is_newline() && !tok.is_eof()) {
+  while (!tok.is_terminator()) {
     int n1, n2;
     if (read_integer(&n1) && ((0 <= n1) && (n1 <= 255))) {
       if (!has_arg()) {
@@ -8963,7 +8963,7 @@ static void define_class_request() // .class
   (void) ci->set_macro(m);
   charinfo *child1 = 0 /* nullptr */, *child2 = 0 /* nullptr */;
   bool just_chained_a_range_expression = false;
-  while (!tok.is_newline() && !tok.is_eof()) {
+  while (!tok.is_terminator()) {
     tok.skip_spaces();
     // Chained range expressions like
     //   \[u3041]-\[u3096]-\[u30FF]
@@ -10610,7 +10610,7 @@ static node *read_drawing_command() // \D
        }
        tok.skip_spaces();
       }
-      while (tok != start_token && !tok.is_newline() && !tok.is_eof())
+      while (tok != start_token && !tok.is_terminator())
        tok.next();
       if (!had_error) {
        switch (type) {
diff --git a/src/roff/troff/node.cpp b/src/roff/troff/node.cpp
index 6d61b7f03..9f6d72393 100644
--- a/src/roff/troff/node.cpp
+++ b/src/roff/troff/node.cpp
@@ -7119,7 +7119,7 @@ static void remove_font_specific_character_request() // 
.rfschar
                      " character");
   else {
     symbol f = font_table[finfo.position]->get_name();
-    while (!tok.is_newline() && !tok.is_eof()) {
+    while (!tok.is_terminator()) {
       if (!tok.is_space() && !tok.is_tab()) {
        charinfo *s = tok.get_charinfo(true /* is_mandatory */);
        if (0 /* nullptr */ == s)
diff --git a/src/roff/troff/token.h b/src/roff/troff/token.h
index 0037a32e2..49f4f09fd 100644
--- a/src/roff/troff/token.h
+++ b/src/roff/troff/token.h
@@ -109,6 +109,7 @@ public:
   bool is_page_ejector();
   bool is_hyphen_indicator();
   bool is_zero_width_break();
+  bool is_terminator();
   bool operator==(const token &); // for delimiters & conditional exprs
   bool operator!=(const token &); // ditto
   unsigned char ch();
@@ -292,6 +293,14 @@ inline bool token::is_zero_width_break()
   return (TOKEN_ZERO_WIDTH_BREAK == type);
 }
 
+// Does this token terminate a nonterminal symbol such as an identifier
+// or numeric expression?
+inline bool token::is_terminator()
+{
+  // TODO: Add right brace?  Left brace?  Tab?
+  return ((TOKEN_NEWLINE == type) || (TOKEN_EOF == type));
+}
+
 // Local Variables:
 // fill-column: 72
 // mode: C++

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to