gbranden pushed a commit to branch master
in repository groff.

commit f25ec47dac6c44dd7fb97a06db2f885e9374d099
Author: G. Branden Robinson <[email protected]>
AuthorDate: Mon Jun 1 20:15:44 2026 -0500

    src/devices/grohtml/post-html.cpp: Refactor.
    
    * src/devices/grohtml/post-html.cpp: New `static` `const` variable of
      type `size_t`, `char_buffer_block_size`, does what's written on the
      tin.
    
      (struct char_block): Drop; it has been used exclusively by the
      file-local `char_buffer` class.
    
      (class char_buffer): Replace `head` and `tail` private member
      variables--pointers to `char_block`s, with an STL vector of `char`
      called `vec`.
    
      (char_buffer::char_buffer): Drop initializers of `head` and `tail`.
      Populate body with call of `std::vector<char>` constructor.
    
      (char_buffer::~char_buffer): Empty destructor body; being an STL
      container, we expect `vec` to dispose of its own backing storage when
      it goes out of scope (when its parent object, a `char_buffer`, is
      destroyed).
    
      (char_buffer::add_string): Rewrite to idiomatically populate `vec`,
      resizing it by `char_buffer_block_size` as needed.  Stop returning a
      pointer into the data backing the STL vector, as its `resize()`
      operation might alter its memory addresses, as realloc(3) can.  Return
      a strdup(3)-ed copy of the string instead.
    
    Continues the long process of fixing Savannah #66672.
    
    Performance analysis
    ====================
    
    Impact seems low, and more deterministic.
    
    $ bash ATTIC/measure-grohtml.bash | datamash range 1 mean 1 sstdev 1
    
    [grohtml]: Null-terminate `html_string`.
    0.28    5.7285  0.063600314464631
    
    src/devices/grohtml/post-html.cpp: Refactor.
    0.21    5.767   0.056391488719487
---
 ChangeLog                         | 26 +++++++++++
 src/devices/grohtml/post-html.cpp | 94 +++++++++------------------------------
 2 files changed, 47 insertions(+), 73 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7c9feae6c..2d6439acc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2026-06-01  G. Branden Robinson <[email protected]>
+
+       * src/devices/grohtml/post-html.cpp: Refactor.  New `static`
+       `const` variable of type `size_t`, `char_buffer_block_size`,
+       does what's written on the tin.
+       (struct char_block): Drop; it has been used exclusively by the
+       file-local `char_buffer` class.
+       (class char_buffer): Replace `head` and `tail` private member
+       variables--pointers to `char_block`s, with an STL vector of
+       `char` called `vec`.
+       (char_buffer::char_buffer): Drop initializers of `head` and
+       `tail`.  Populate body with call of `std::vector<char>`
+       constructor.
+       (char_buffer::~char_buffer): Empty destructor body; being an STL
+       container, we expect `vec` to dispose of its own backing storage
+       when it goes out of scope (when its parent object, a
+       `char_buffer`, is destroyed).
+       (char_buffer::add_string): Rewrite to idiomatically populate
+       `vec`, resizing it by `char_buffer_block_size` as needed.  Stop
+       returning a pointer into the data backing the STL vector, as
+       its `resize()` operation might alter its memory addresses, as
+       realloc(3) can.  Return a strdup(3)-ed copy of the string
+       instead.
+
+       Continues the long process of fixing Savannah #66672.
+
 2026-06-06  G. Branden Robinson <[email protected]>
 
        * src/devices/grohtml/post-html.cpp: Improve type-correctness.
diff --git a/src/devices/grohtml/post-html.cpp 
b/src/devices/grohtml/post-html.cpp
index d056829d9..6cd739892 100644
--- a/src/devices/grohtml/post-html.cpp
+++ b/src/devices/grohtml/post-html.cpp
@@ -32,12 +32,15 @@ along with this program.  If not, see 
<http://www.gnu.org/licenses/>. */
 #include <stdio.h> // EOF, FILE, fclose(), fflush(), fopen(), freopen(),
                   // fseek(), SEEK_SET, setbuf(), stderr, stdout
 #include <stdlib.h> // abs(), atoi(), EXIT_SUCCESS, exit()
-#include <string.h> // strcmp(), strerror(), strlen(), strncmp()
+#include <string.h> // strcmp(), strdup(), strerror(), strlen(),
+                   // strncmp()
 #include <strings.h> // strcasecmp()
 #include <time.h> // asctime(), tm
 
 #include <getopt.h> // getopt_long()
 
+#include <vector>
+
 // libgroff
 #include "symbol.h" // prerequisite of color.h
 #include "color.h"
@@ -391,39 +394,6 @@ int style::operator!=(const style &s) const
   return !(*this == s);
 }
 
-/*
- *  the class and methods for retaining ascii text
- */
-
-struct char_block {
-  enum { SIZE = 256 };
-  char         *buffer;
-  int           used;
-  char_block   *next;
-
-  char_block();
-  char_block(int /* length */);
-  ~char_block();
-};
-
-char_block::char_block()
-: buffer(0 /* nullptr */), used(0), next(0 /* nullptr */)
-{
-}
-
-char_block::char_block(int length)
-: used(0), next(0 /* nullptr */)
-{
-  buffer = new char[max(length, char_block::SIZE)];
-  if (0 /* nullptr */ == buffer)
-    fatal("out of memory error");
-}
-
-char_block::~char_block()
-{
-  delete[] buffer;
-}
-
 class char_buffer {
 public:
   char_buffer();
@@ -432,58 +402,36 @@ public:
                    unsigned int /* length */);
   char  *add_string(const string & /* s */);
 private:
-  char_block *head;
-  char_block *tail;
+  std::vector<char> vec;
 };
 
+// A block size of 4096 seems to run faster and with less variance than
+// the default, or 128, 256, 512, 1024, 2048, or 8192.
+// TODO: Use sysconf(PAGE_SIZE)?
+static const size_t char_buffer_block_size = 4096;
+
 char_buffer::char_buffer()
-: head(0 /* nullptr */), tail(0 /* nullptr */)
 {
+  vec = std::vector<char>(char_buffer_block_size, '\0');
 }
 
 char_buffer::~char_buffer()
 {
-  while (head != 0 /* nullptr */) {
-    char_block *temp = head;
-    head = head->next;
-    delete temp;
-  }
 }
 
 char *char_buffer::add_string (const char *s, unsigned int length)
 {
-  int i = 0;
-  unsigned int old_used;
-
-  if ((0 /* nullptr */ == s) || (0 == length))
+  if (length <= 0)
     return 0 /* nullptr */;
-
-  if (0 /* nullptr */ == tail) {
-    tail = new char_block(length+1);
-    head = tail;
-  } else {
-    if (tail->used + length+1 > char_block::SIZE) {
-      tail->next  = new char_block(length+1);
-      tail        = tail->next;
-    }
-  }
-
-  old_used = tail->used;
-  do {
-    tail->buffer[tail->used] = s[i];
-    tail->used++;
-    i++;
-    length--;
-  } while (length>0);
-
-  // add terminating nul character
-
-  tail->buffer[tail->used] = '\0';
-  tail->used++;
-
-  // and return start of new string
-
-  return &tail->buffer[old_used];
+  if ((vec.capacity() - vec.size()) < length)
+    vec.resize(vec.capacity() + char_buffer_block_size);
+  for (size_t i = 0; i < length; i++)
+    vec.push_back(s[i]);
+  vec.push_back('\0');
+  // XXX: This strdup() likely leaks memory.
+  // TODO: Have `char_buffer` maintain a list of `char` pointers and
+  // free them in destructor?
+  return strdup(s);
 }
 
 char *char_buffer::add_string (const string &s)

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to