gbranden pushed a commit to branch master
in repository groff.

commit 034b847a2d19891c0bea5437815c02e863076b80
Author: G. Branden Robinson <g.branden.robin...@gmail.com>
AuthorDate: Thu Jul 10 20:39:03 2025 -0500

    src/roff/troff/input.cpp: Refactor to simplify.
    
    * src/roff/troff/input.cpp: As an apparent optimization (dating back as
      far as groff 1.02, 1991) parts of the input parser that needed to read
      arbitrarily far ahead in the input stream to consume a token used an
      initial stack-allocated buffer of 16 bytes.  (Also known as
      "automatic" storage, thus the name "abuf", I think.)  If 16 bytes
      wasn't enough, open-coded logic would allocate a buffer from the heap
      and double it in size until the token fit.  Simplify by converting
      `ABUF_SIZE` from a preprocessor macro to a `const` symbol named
      `default_buffer_size`, and _always_ allocate from the heap.  In the
      future, we can convert stuff like this to a C++ STL `vector` and let
      the standard library worry about dynamic storage management.
    
      (read_long_escape_parameters, do_get_long_name, get_delimited_name):
      Do it.
---
 ChangeLog                |  18 +++++++
 src/roff/troff/input.cpp | 137 ++++++++++++++++-------------------------------
 2 files changed, 63 insertions(+), 92 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0b6b81fde..fd18e1893 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2025-07-10  G. Branden Robinson <g.branden.robin...@gmail.com>
+
+       * src/roff/troff/input.cpp: Refactor to simplify.  As an
+       apparent optimization (dating back as far as groff 1.02, 1991)
+       parts of the input parser that needed to read arbitrarily far
+       ahead in the input stream to consume a token used an initial
+       stack-allocated buffer of 16 bytes.  (Also known as "automatic"
+       storage, thus the name "abuf", I think.)  If 16 bytes wasn't
+       enough, open-coded logic would allocate a buffer from the heap
+       and double it in size until the token fit.  Simplify by
+       converting `ABUF_SIZE` from a preprocessor macro to a `const`
+       symbol named `default_buffer_size`, and _always_ allocate from
+       the heap.  In the future, we can convert stuff like this to a
+       C++ STL `vector` and let the standard library worry about
+       dynamic storage management.
+       (read_long_escape_parameters, do_get_long_name)
+       (get_delimited_name): Do it.
+
 2025-07-10  G. Branden Robinson <g.branden.robin...@gmail.com>
 
        * src/roff/troff/input.cpp: Fix code style nits.
diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp
index b9acacd2c..4275e9f87 100644
--- a/src/roff/troff/input.cpp
+++ b/src/roff/troff/input.cpp
@@ -80,12 +80,12 @@ extern "C" {
      (WARN_CHAR|WARN_NUMBER|WARN_BREAK|WARN_SPACE|WARN_FONT|WARN_FILE)
 #endif
 
-// initial size of buffer for reading names; expanded as necessary
-#define ABUF_SIZE 16
-
 extern "C" const char *program_name;
 extern "C" const char *Version_string;
 
+// initial size for input buffers that need to grow arbitrarily
+static const int default_buffer_size = 16;
+
 #ifdef COLUMN
 void init_column_requests();
 #endif /* COLUMN */
@@ -995,39 +995,28 @@ static symbol read_two_char_escape_parameter()
 static symbol read_long_escape_parameters(read_mode mode)
 {
   int start_level = input_stack::get_level();
-  char abuf[ABUF_SIZE];
-  char *buf = abuf;
-  int buf_size = ABUF_SIZE;
+  int buf_size = default_buffer_size;
+  char *buf = new char[buf_size];
   int i = 0;
   char c;
   bool have_char = false;
   for (;;) {
     c = get_char_for_escape_parameter(have_char && (WITH_ARGS == mode));
     if ('\0' == c) {
-      if (buf != abuf)
-       delete[] buf;
+      delete[] buf;
       return NULL_SYMBOL;
     }
     have_char = true;
     if ((WITH_ARGS == mode) && (' ' == c))
       break;
-    if ((i + 2) > buf_size) {
-      if (buf == abuf) {
-       // C++03: new char[ABUF_SIZE * 2]();
-       buf = new char[ABUF_SIZE * 2];
-       (void) memset(buf, 0, (ABUF_SIZE * 2 * sizeof(char)));
-       memcpy(buf, abuf, buf_size);
-       buf_size = ABUF_SIZE * 2;
-      }
-      else {
-       char *old_buf = buf;
-       // C++03: new char[buf_size * 2]();
-       buf = new char[buf_size * 2];
-       (void) memset(buf, 0, (buf_size * 2 * sizeof(char)));
-       memcpy(buf, old_buf, buf_size);
-       buf_size *= 2;
-       delete[] old_buf;
-      }
+    if (i + 2 > buf_size) {
+      char *old_buf = buf;
+      // C++03: new char[buf_size * 2]();
+      buf = new char[buf_size * 2];
+      (void) memset(buf, 0, (buf_size * 2 * sizeof(char)));
+      memcpy(buf, old_buf, buf_size);
+      buf_size *= 2;
+      delete[] old_buf;
     }
     if ((']' == c) && (input_stack::get_level() == start_level))
       break;
@@ -1043,14 +1032,9 @@ static symbol read_long_escape_parameters(read_mode mode)
   }
   if (' ' == c)
     have_multiple_params = true;
-  if (buf == abuf) {
-    return symbol(abuf);
-  }
-  else {
-    symbol s(buf);
-    delete[] buf;
-    return s;
-  }
+  symbol s(buf);
+  delete[] buf;
+  return s;
 }
 
 static symbol read_escape_parameter(read_mode mode)
@@ -2834,29 +2818,19 @@ static symbol do_get_long_name(bool required, char 
end_char)
 {
   while (tok.is_space())
     tok.next();
-  char abuf[ABUF_SIZE];
-  char *buf = abuf;
-  int buf_size = ABUF_SIZE;
+  int buf_size = default_buffer_size;
+  char *buf = new char[buf_size];
   int i = 0;
   for (;;) {
     // If `end_char` != `\0` we normally have to append a null byte.
     if ((i + 2) > buf_size) {
-      if (buf == abuf) {
-       // C++03: new char[ABUF_SIZE * 2]();
-       buf = new char[ABUF_SIZE * 2];
-       (void) memset(buf, 0, (ABUF_SIZE * 2 * sizeof(char)));
-       memcpy(buf, abuf, (buf_size * sizeof(char)));
-       buf_size = ABUF_SIZE * 2;
-      }
-      else {
-       char *old_buf = buf;
-       // C++03: new char[buf_size * 2]();
-       buf = new char[buf_size * 2];
-       (void) memset(buf, 0, (buf_size * 2 * sizeof(char)));
-       memcpy(buf, old_buf, (buf_size * sizeof(char)));
-       buf_size *= 2;
-       delete[] old_buf;
-      }
+      char *old_buf = buf;
+      // C++03: new char[buf_size * 2]();
+      buf = new char[buf_size * 2];
+      (void) memset(buf, 0, (buf_size * 2 * sizeof(char)));
+      memcpy(buf, old_buf, (buf_size * sizeof(char)));
+      buf_size *= 2;
+      delete[] old_buf;
     }
     if ((buf[i] = tok.ch()) == '\0' || (buf[i] == end_char))
       break;
@@ -2871,13 +2845,9 @@ static symbol do_get_long_name(bool required, char 
end_char)
     buf[i + 1] = '\0';
   else
     diagnose_invalid_identifier();
-  if (buf == abuf)
-    return symbol(buf);
-  else {
-    symbol s(buf);
-    delete[] buf;
-    return s;
-  }
+  symbol s(buf);
+  delete[] buf;
+  return s;
 }
 
 static void close_all_streams();
@@ -5801,28 +5771,18 @@ static symbol get_delimited_name()
     return NULL_SYMBOL;
   }
   int start_level = input_stack::get_level();
-  char abuf[ABUF_SIZE];
-  char *buf = abuf;
-  int buf_size = ABUF_SIZE;
+  int buf_size = default_buffer_size;
+  char *buf = new char[buf_size];
   int i = 0;
   for (;;) {
     if ((i + 1) > buf_size) {
-      if (buf == abuf) {
-       // C++03: new char[ABUF_SIZE * 2]();
-       buf = new char[ABUF_SIZE * 2];
-       (void) memset(buf, 0, (ABUF_SIZE * 2 * sizeof(char)));
-       memcpy(buf, abuf, buf_size);
-       buf_size = ABUF_SIZE * 2;
-      }
-      else {
-       char *old_buf = buf;
-       // C++03: new char[buf_size * 2]();
-       buf = new char[buf_size * 2];
-       (void) memset(buf, 0, (buf_size * 2 * sizeof(char)));
-       memcpy(buf, old_buf, buf_size);
-       buf_size *= 2;
-       delete[] old_buf;
-      }
+      char *old_buf = buf;
+      // C++03: new char[buf_size * 2]();
+      buf = new char[buf_size * 2];
+      (void) memset(buf, 0, (buf_size * 2 * sizeof(char)));
+      memcpy(buf, old_buf, buf_size);
+      buf_size *= 2;
+      delete[] old_buf;
     }
     tok.next();
     if ((tok == start_token)
@@ -5838,26 +5798,19 @@ static symbol get_delimited_name()
        error("closing delimiter does not match; expected %1, got %2",
              delimdesc, tok.description());
       free(delimdesc);
-      if (buf != abuf)
-       delete[] buf;
+      delete[] buf;
       return NULL_SYMBOL;
     }
     i++;
   }
   buf[i] = '\0';
-  if (buf == abuf) {
-    if (0 == i) {
-      error("empty delimited name");
-      return NULL_SYMBOL;
-    }
-    else
-      return symbol(buf);
-  }
-  else {
-    symbol s(buf);
-    delete[] buf;
-    return s;
+  if (0 == i) {
+    error("empty delimited name");
+    return NULL_SYMBOL;
   }
+  symbol s(buf);
+  delete[] buf;
+  return s;
 }
 
 static void do_register() // \R

_______________________________________________
groff-commit mailing list
groff-commit@gnu.org
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to