gbranden pushed a commit to branch master
in repository groff.

commit 8d25aa7190b7954757fa03b8e83fee852c00c799
Author: G. Branden Robinson <[email protected]>
AuthorDate: Thu Dec 25 17:32:04 2025 -0600

    src/roff/troff/input.cpp: Trivially refactor (1).
    
    Rename `get_copy()` to `read_char_in_copy_mode()`, because this function
    advances the input stream pointer and because it changes the formatter's
    lexing and parsing rules from the default to "copy mode", a fundamental
    *roff concept since J. F. Ossanna transformed roff(1) into the macro
    language nroff(1) with the introduction of macro definitions circa 1972.
    
      (get_copy): Rename this...
      (read_char_in_copy_mode): ...to this (decl and defn).
    
      (read_char_in_copy_mode): Rename Boolean-valued argument
      `handle_escape_E` to `handle_escaped_E`, to appease the ruthless
      grammarian in my head.
    
      (read_char_in_escape_sequence_parameter)
      (has_arg)
      (process_input_stack)
      (decode_macro_call_arguments)
      (decode_escape_sequence_arguments)
      (read_request)
      (do_define_string)
      (define_character)
      (do_define_macro)
      (length_request)
      (do_non_interpreted)
      (device_request)
      (output_request)
      (tag)
      (taga)
      (do_terminal)
      (do_write_request)
      (abort_request)
      (read_rest_of_line_as_argument): Update call sites.
---
 ChangeLog                |  34 ++++++++++
 src/roff/troff/input.cpp | 163 ++++++++++++++++++++++++++---------------------
 2 files changed, 126 insertions(+), 71 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 673d46de7..e162c235a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2025-12-25  G. Branden Robinson <[email protected]>
+
+       * src/roff/troff/input.cpp: Trivially refactor.  Rename
+       `get_copy()` to `read_char_in_copy_mode()`, because this
+       function advances the input stream pointer and because it
+       changes the formatter's lexing and parsing rules from the
+       default to "copy mode", a fundamental *roff concept since J. F.
+       Ossanna transformed roff(1) into the macro language nroff(1)
+       with the introduction of macro definitions circa 1972.
+       (get_copy): Rename this...
+       (read_char_in_copy_mode): ...to this (decl and defn).
+       (read_char_in_copy_mode): Rename Boolean-valued argument
+       `handle_escape_E` to `handle_escaped_E`, to appease the ruthless
+       grammarian in my head.
+       (read_char_in_escape_sequence_parameter)
+       (has_arg)
+       (process_input_stack)
+       (decode_macro_call_arguments)
+       (decode_escape_sequence_arguments)
+       (read_request)
+       (do_define_string)
+       (define_character)
+       (do_define_macro)
+       (length_request)
+       (do_non_interpreted)
+       (device_request)
+       (output_request)
+       (tag)
+       (taga)
+       (do_terminal)
+       (do_write_request)
+       (abort_request)
+       (read_rest_of_line_as_argument): Update call sites.
+
 2025-12-23  G. Branden Robinson <[email protected]>
 
        * src/roff/troff/number.cpp (get_number_rigidly): Delete.  It no
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index 1f6a9734c..d75a6fafc 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -153,7 +153,9 @@ search_path *mac_path = &safer_macro_path;
 // Initialize inclusion search path with only the current directory.
 search_path include_search_path(0 /* nullptr */, 0 /* nullptr */, 0, 1);
 
-static int get_copy(node**, bool = false, bool = false);
+static int read_char_in_copy_mode(node ** /* nd; 0 to discard */,
+                                 bool /* is_defining */ = false,
+                                 bool /* handle_escaped_E */ = false);
 static void copy_mode_error(const char *,
                            const errarg & = empty_errarg,
                            const errarg & = empty_errarg,
@@ -963,12 +965,13 @@ void shift()
   skip_line();
 }
 
+// TODO: return unsigned char (future: grochar)?  We handle EOF here.
 static char read_char_in_escape_sequence_parameter(bool allow_space
                                                   = false)
 {
-  int c = get_copy(0 /* nullptr */,
-                  false /* is defining */,
-                  true /* handle \E */);
+  int c = read_char_in_copy_mode(0 /* nullptr */,
+                                false /* is_defining */,
+                                true /* handle_escaped_E  */);
   switch (c) {
   case EOF:
     copy_mode_error("end of input in escape sequence");
@@ -1114,7 +1117,13 @@ static symbol read_increment_and_escape_parameter(int 
*incp)
   return symbol(buf);
 }
 
-static int get_copy(node **nd, bool is_defining, bool handle_escape_E)
+// In copy mode, we don't tokenize normally; characters on the input
+// stream are typically read into the contents of an existing node (like
+// a string or macro definition), or discarded.  A handful of escape
+// sequences (\n, etc.) interpolate as they do outside of copy mode.
+static int read_char_in_copy_mode(node **nd,
+                                 bool is_defining,
+                                 bool handle_escaped_E)
 {
   for (;;) {
     int c = input_stack::get(nd);
@@ -1142,7 +1151,7 @@ static int get_copy(node **nd, bool is_defining, bool 
handle_escape_E)
     }
     if (c == DOUBLE_QUOTE)
       continue;
-    if (c == ESCAPE_E && handle_escape_E)
+    if (c == ESCAPE_E && handle_escaped_E)
       c = escape_char;
     if (c == ESCAPE_NEWLINE) {
       if (is_defining)
@@ -1200,7 +1209,7 @@ static int get_copy(node **nd, bool is_defining, bool 
handle_escape_E)
       return ESCAPE_e;
     case 'E':
       (void) input_stack::get(0 /* nullptr */);
-      if (handle_escape_E)
+      if (handle_escaped_E)
        goto again;
       return ESCAPE_E;
     case 'n':
@@ -2102,7 +2111,7 @@ bool has_arg(bool want_peek)
     for (;;) {
       c = input_stack::peek();
       if (' ' == c)
-       (void) get_copy(0 /* nullptr */);
+       (void) read_char_in_copy_mode(0 /* nullptr */);
       else
        break;
     }
@@ -3551,7 +3560,7 @@ void process_input_stack()
            int cc;
            do {
              node *n;
-             cc = get_copy(&n);
+             cc = read_char_in_copy_mode(&n);
              if (cc != EOF) {
                if (cc != '\0')
                  curdiv->transparent_output(transparent_translate(cc));
@@ -4600,10 +4609,10 @@ static void decode_macro_call_arguments(macro_iterator 
*mi)
 {
   if (!tok.is_newline() && !tok.is_eof()) {
     node *n;
-    int c = get_copy(&n);
+    int c = read_char_in_copy_mode(&n);
     for (;;) {
       while (c == ' ')
-       c = get_copy(&n);
+       c = read_char_in_copy_mode(&n);
       if (c == '\n' || c == EOF)
        break;
       macro arg;
@@ -4614,7 +4623,7 @@ static void decode_macro_call_arguments(macro_iterator 
*mi)
       if (c == '"') {
        arg.append(DOUBLE_QUOTE);
        quote_input_level = input_stack::get_level();
-       c = get_copy(&n);
+       c = read_char_in_copy_mode(&n);
       }
       while (c != EOF && c != '\n'
             && !(c == ' ' && quote_input_level == 0)) {
@@ -4622,10 +4631,10 @@ static void decode_macro_call_arguments(macro_iterator 
*mi)
            && (want_att_compat
                || input_stack::get_level() == quote_input_level)) {
          arg.append(DOUBLE_QUOTE);
-         c = get_copy(&n);
+         c = read_char_in_copy_mode(&n);
          if (c == '"') {
            arg.append(c);
-           c = get_copy(&n);
+           c = read_char_in_copy_mode(&n);
          }
          else
            break;
@@ -4641,7 +4650,7 @@ static void decode_macro_call_arguments(macro_iterator 
*mi)
            }
            arg.append(c);
          }
-         c = get_copy(&n);
+         c = read_char_in_copy_mode(&n);
        }
       }
       arg.append(POP_GROFFCOMP_MODE);
@@ -4653,10 +4662,10 @@ static void decode_macro_call_arguments(macro_iterator 
*mi)
 static void decode_escape_sequence_arguments(macro_iterator *mi)
 {
   node *n;
-  int c = get_copy(&n);
+  int c = read_char_in_copy_mode(&n);
   for (;;) {
     while (c == ' ')
-      c = get_copy(&n);
+      c = read_char_in_copy_mode(&n);
     if (c == '\n' || c == EOF) {
       error("missing ']' in parameterized escape sequence");
       break;
@@ -4668,17 +4677,17 @@ static void 
decode_escape_sequence_arguments(macro_iterator *mi)
     bool was_warned = false; // about an input tab character
     if (c == '"') {
       quote_input_level = input_stack::get_level();
-      c = get_copy(&n);
+      c = read_char_in_copy_mode(&n);
     }
     while (c != EOF && c != '\n'
           && !(c == ']' && quote_input_level == 0)
           && !(c == ' ' && quote_input_level == 0)) {
       if (quote_input_level > 0 && c == '"'
          && input_stack::get_level() == quote_input_level) {
-       c = get_copy(&n);
+       c = read_char_in_copy_mode(&n);
        if (c == '"') {
          arg.append(c);
-         c = get_copy(&n);
+         c = read_char_in_copy_mode(&n);
        }
        else
          break;
@@ -4695,7 +4704,7 @@ static void 
decode_escape_sequence_arguments(macro_iterator *mi)
          }
          arg.append(c);
        }
-       c = get_copy(&n);
+       c = read_char_in_copy_mode(&n);
       }
     }
     mi->add_arg(arg, (c == ' '));
@@ -4924,16 +4933,16 @@ void read_request()
   int reading_from_terminal = isatty(fileno(stdin));
   int had_prompt = 0;
   if (has_arg(true /* peek */)) {
-    int c = get_copy(0);
+    int c = read_char_in_copy_mode(0 /* nullptr */);
     while (c == ' ')
-      c = get_copy(0);
+      c = read_char_in_copy_mode(0 /* nullptr */);
     while (c != EOF && c != '\n' && c != ' ') {
       if (!is_invalid_input_char(c)) {
        if (reading_from_terminal)
          fputc(c, stderr);
        had_prompt = 1;
       }
-      c = get_copy(0);
+      c = read_char_in_copy_mode(0 /* nullptr */);
     }
     if (c == ' ') {
       tok.make_space();
@@ -4992,11 +5001,11 @@ void do_define_string(define_mode mode, comp_mode comp)
     return;
   }
   else
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   while (c == ' ')
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   if (c == '"')
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   macro mac;
   request_or_macro *rm
     = static_cast<request_or_macro *>(request_dictionary.lookup(nm));
@@ -5012,7 +5021,7 @@ void do_define_string(define_mode mode, comp_mode comp)
       mac.append(n);
     else
       mac.append((unsigned char) c);
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   }
   if (comp == COMP_DISABLE || comp == COMP_ENABLE)
     mac.append(POP_GROFFCOMP_MODE);
@@ -5105,11 +5114,11 @@ void define_character(char_mode mode, const char 
*font_name)
     return;
   }
   else
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   while (c == ' ' || c == '\t')
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   if (c == '"')
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   macro *m = new macro;
   // Construct a macro from input characters; if the input character
   // code is 0, we've read a node--append that.
@@ -5118,7 +5127,7 @@ void define_character(char_mode mode, const char 
*font_name)
       m->append(static_cast<unsigned char>(c));
     else
       m->append(n);
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   }
   // Assign the macro to the character, discarding any previous macro.
   m = ci->set_macro(m, mode);
@@ -5423,7 +5432,7 @@ void do_define_macro(define_mode mode, calling_mode 
calling, comp_mode comp)
                                &start_lineno);
   node *n;
   // doing this here makes the line numbers come out right
-  int c = get_copy(&n, true /* is defining*/);
+  int c = read_char_in_copy_mode(&n, true /* is_defining */);
   macro mac;
   macro *mm = 0 /* nullptr */;
   if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND) {
@@ -5445,7 +5454,7 @@ void do_define_macro(define_mode mode, calling_mode 
calling, comp_mode comp)
     while (c == ESCAPE_NEWLINE) {
       if (mode == DEFINE_NORMAL || mode == DEFINE_APPEND)
        mac.append(c);
-      c = get_copy(&n, true /* is defining */);
+      c = read_char_in_copy_mode(&n, true /* is_defining */);
     }
     if (reading_beginning_of_input_line && c == '.') {
       const char *s = term.contents();
@@ -5453,11 +5462,11 @@ void do_define_macro(define_mode mode, calling_mode 
calling, comp_mode comp)
       // see if it matches term
       int i = 0;
       if (s[0] != 0) {
-       while ((d = get_copy(&n)) == ' ' || d == '\t')
+       while (((d = read_char_in_copy_mode(&n)) == ' ') || (d == '\t'))
          ;
        if ((unsigned char) s[0] == d) {
          for (i = 1; s[i] != 0; i++) {
-           d = get_copy(&n);
+           d = read_char_in_copy_mode(&n);
            if ((unsigned char) s[i] != d)
              break;
          }
@@ -5465,7 +5474,7 @@ void do_define_macro(define_mode mode, calling_mode 
calling, comp_mode comp)
       }
       if (s[i] == 0
          && ((i == 2 && want_att_compat)
-             || (d = get_copy(&n)) == ' '
+             || ((d = read_char_in_copy_mode(&n)) == ' ')
              || d == '\n')) {  // we found it
        if (d == '\n')
          tok.make_newline();
@@ -5521,7 +5530,7 @@ void do_define_macro(define_mode mode, calling_mode 
calling, comp_mode comp)
        mac.append(c);
     }
     reading_beginning_of_input_line = (c == '\n');
-    c = get_copy(&n, true /* is defining */);
+    c = read_char_in_copy_mode(&n, true /* is_defining */);
   }
 }
 
@@ -5869,15 +5878,15 @@ void length_request()
     return;
   }
   else
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   while (c == ' ')
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   if (c == '"')
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   int len = 0;
   while (c != '\n' && c != EOF) {
     ++len;
-    c = get_copy(&n);
+    c = read_char_in_copy_mode(&n);
   }
   reg *r = static_cast<reg *>(register_dictionary.lookup(ret));
   if (r != 0 /* nullptr */)
@@ -6551,7 +6560,9 @@ static node *do_non_interpreted() // \?
   node *n;
   int c;
   macro mac;
-  while ((c = get_copy(&n)) != ESCAPE_QUESTION && c != EOF && c != '\n')
+  while (((c = read_char_in_copy_mode(&n)) != ESCAPE_QUESTION)
+        && (c != EOF)
+        && (c != '\n'))
     if (c == 0)
       mac.append(n);
     else
@@ -6764,9 +6775,9 @@ static void device_request()
   macro mac;
   int c;
   for (;;) {
-    c = get_copy(0 /* nullptr */);
+    c = read_char_in_copy_mode(0 /* nullptr */);
     if ('"' == c) {
-      c = get_copy(0 /* nullptr */);
+      c = read_char_in_copy_mode(0 /* nullptr */);
       break;
     }
     if (c != ' ' && c != '\t')
@@ -6776,7 +6787,7 @@ static void device_request()
     topdiv->begin_page();
   for (;
       (c != '\0') && (c != '\n') && (c != EOF);
-       c = get_copy(0 /* nullptr */)) {
+       c = read_char_in_copy_mode(0 /* nullptr */)) {
     // We may encounter some of the C0 and C1 character codes GNU troff
     // uses for special purposes; see src/roff/troff/input.h.  They
     // produce nothing in grout.  Warn only about the ones that are left
@@ -6792,7 +6803,7 @@ static void device_request()
     else if (c != '\\')
       mac.append(c);
     else {
-      int c1 = get_copy(0 /* nullptr */);
+      int c1 = read_char_in_copy_mode(0 /* nullptr */);
       if (c1 != '[') {
        mac.append(c);
        mac.append(c1);
@@ -6816,9 +6827,9 @@ static void device_request()
        // character escape sequence?
        bool is_valid = false;
        string sc = "";
-       int c2 = get_copy(0 /* nullptr */);
+       int c2 = read_char_in_copy_mode(0 /* nullptr */);
        for (; (c2 != '\0') && (c2 != '\n') && (c2 != EOF);
-            c2 = get_copy(0 /* nullptr */)) {
+            c2 = read_char_in_copy_mode(0 /* nullptr */)) {
          // XXX: `map_special_character_for_device_output()` will need
          // the closing bracket in the iterator we construct, but a
          // composite character mapping mustn't see it.
@@ -6877,15 +6888,17 @@ static void output_request()
   }
   int c;
   for (;;) {
-    c = get_copy(0 /* nullptr */);
+    c = read_char_in_copy_mode(0 /* nullptr */);
     if ('"' == c) {
-      c = get_copy(0 /* nullptr */);
+      c = read_char_in_copy_mode(0 /* nullptr */);
       break;
     }
     if (c != ' ' && c != '\t')
       break;
   }
-  for (; c != '\n' && c != EOF; c = get_copy(0 /* nullptr */))
+  for (;
+       (c != '\n') && (c != EOF);
+       (c = read_char_in_copy_mode(0 /* nullptr */)))
     topdiv->transparent_output(c);
   topdiv->transparent_output('\n');
   tok.next();
@@ -8064,16 +8077,18 @@ void tag()
     string s;
     int c;
     for (;;) {
-      c = get_copy(0);
+      c = read_char_in_copy_mode(0 /* nullptr */);
       if (c == '"') {
-       c = get_copy(0);
+       c = read_char_in_copy_mode(0 /* nullptr */);
        break;
       }
       if (c != ' ' && c != '\t')
        break;
     }
     s = "x X ";
-    for (; c != '\n' && c != EOF; c = get_copy(0))
+    for (;
+        (c != '\n') && (c != EOF);
+        (c = read_char_in_copy_mode(0 /* nullptr */)))
       s += (char) c;
     s += '\n';
     curenv->add_node(new tag_node(s, 0));
@@ -8087,16 +8102,18 @@ void taga()
     string s;
     int c;
     for (;;) {
-      c = get_copy(0);
+      c = read_char_in_copy_mode(0 /* nullptr */);
       if (c == '"') {
-       c = get_copy(0);
+       c = read_char_in_copy_mode(0 /* nullptr */);
        break;
       }
       if (c != ' ' && c != '\t')
        break;
     }
     s = "x X ";
-    for (; c != '\n' && c != EOF; c = get_copy(0))
+    for (;
+        (c != '\n') && (c != EOF);
+        (c = read_char_in_copy_mode(0 /* nullptr */)))
       s += (char) c;
     s += '\n';
     curenv->add_node(new tag_node(s, 1));
@@ -8111,15 +8128,17 @@ void do_terminal(int newline, int string_like)
   if (has_arg(true /* peek */)) {
     int c;
     for (;;) {
-      c = get_copy(0);
+      c = read_char_in_copy_mode(0 /* nullptr */);
       if (string_like && c == '"') {
-       c = get_copy(0);
+       c = read_char_in_copy_mode(0 /* nullptr */);
        break;
       }
       if (c != ' ' && c != '\t')
        break;
     }
-    for (; c != '\n' && c != EOF; c = get_copy(0))
+    for (;
+        (c != '\n') && (c != EOF);
+        (c = read_char_in_copy_mode(0 /* nullptr */)))
       fputs(asciify(c), stderr);
   }
   if (newline)
@@ -8348,14 +8367,14 @@ void do_write_request(int newline)
     return;
   }
   if (has_arg(true /* peek */)) {
-    int c = get_copy(0 /* nullptr */);
+    int c = read_char_in_copy_mode(0 /* nullptr */);
     while (' ' == c)
-      c = get_copy(0 /* nullptr */);
+      c = read_char_in_copy_mode(0 /* nullptr */);
     if ('"' == c)
-      c = get_copy(0 /* nullptr */);
+      c = read_char_in_copy_mode(0 /* nullptr */);
     while (c != '\n' && c != EOF) {
       fputs(asciify(c), fp);
-      c = get_copy(0 /* nullptr */);
+      c = read_char_in_copy_mode(0 /* nullptr */);
     }
   }
   if (newline)
@@ -9251,11 +9270,13 @@ void abort_request()
   else if (tok.is_newline())
     c = '\n';
   else {
-    while ((c = get_copy(0)) == ' ')
+    while ((c = read_char_in_copy_mode(0 /* nullptr */)) == ' ')
       ;
   }
   if (!(c == EOF || c == '\n')) {
-    for (; c != '\n' && c != EOF; c = get_copy(0))
+    for (;
+        (c != '\n') && (c != EOF);
+        (c = read_char_in_copy_mode(0 /* nullptr */)))
       fputs(asciify(c), stderr);
     fputc('\n', stderr);
   }
@@ -9277,11 +9298,11 @@ char *read_rest_of_line_as_argument()
   int buf_size = 256;
   char *s = new char[buf_size]; // C++03: new char[buf_size]();
   (void) memset(s, 0, (buf_size * sizeof(char)));
-  int c = get_copy(0 /* nullptr */);
+  int c = read_char_in_copy_mode(0 /* nullptr */);
   while (' ' == c)
-    c = get_copy(0 /* nullptr */);
+    c = read_char_in_copy_mode(0 /* nullptr */);
   if ('"' == c)
-    c = get_copy(0 /* nullptr */);
+    c = read_char_in_copy_mode(0 /* nullptr */);
   int i = 0;
   while ((c != '\n') && (c != EOF)) {
     if (!is_invalid_input_char(c)) {
@@ -9295,7 +9316,7 @@ char *read_rest_of_line_as_argument()
       }
       s[i++] = c;
     }
-    c = get_copy(0 /* nullptr */);
+    c = read_char_in_copy_mode(0 /* nullptr */);
   }
   s[i] = '\0';
   if (0 == i) {

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to