gbranden pushed a commit to branch master
in repository groff.

commit 1413f6847ca54dbf12feb48f6dde6c2aefe3d213
Author: G. Branden Robinson <[email protected]>
AuthorDate: Sat Jun 6 10:10:16 2026 -0500

    [libgroff]: Back all strings with clean storage.
    
    * src/libs/libgroff/string.cpp: And ensure that all `string`s are backed
      by storage such that the capacity `sz` is always nonzero, and the
      `contents()` member function never returns a null pointer.  Add
      assert(3)ions to enforce these invariants.  New `static` `const`
      variable `initial_string_buffer_size` does what it says, reserving a
      minimum backing allocation even for empty strings.
    
      (salloc): Discard zero-length code, except insofar as a zero-length
      allocation request gets `initial_string_buffer_size` `char` array
      elements.
    
      (string::string): Discard now tautologously true `if` tests.  Drop
      early return from constructor taking only a `char *` argument, so that
      we can assert(3) a postcondition invariant.
    
    An instrumented, subsequent refactoring of grohtml reveals that omitting
    this procedure permits garbage to leak into string contents and thus the
    output document.
    
    $ ./build/test-groff -man -T html \
      ./build/src/utils/addftinfo/addftinfo.1 2>&1 >/dev/null \
      | sed 's/.*debug: //'
    GBR: header_buffer (before) = '(null)'
    GBR: header_buffer (after) = 'Name�U'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Synopsis'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Description'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Options'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Exit'
    GBR: header_buffer (before) = 'Exit '
    GBR: header_buffer (after) = 'Exit status'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'See'
    GBR: header_buffer (before) = 'See '
    GBR: header_buffer (after) = 'See also'
    
    After:
    
    $ ./build/test-groff -man -T html \
      ./build/src/utils/addftinfo/addftinfo.1 2>&1 >/dev/null \
      | sed 's/.*debug: //'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Name'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Synopsis'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Description'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Options'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'Exit'
    GBR: header_buffer (before) = 'Exit '
    GBR: header_buffer (after) = 'Exit status'
    GBR: header_buffer (before) = ''
    GBR: header_buffer (after) = 'See'
    GBR: header_buffer (before) = 'See '
    GBR: header_buffer (after) = 'See also'
---
 ChangeLog                    | 16 ++++++++++++++
 src/libs/libgroff/string.cpp | 52 ++++++++++++++++++++++++--------------------
 2 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f8d30c8e0..3dd3761da 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2026-06-06  G. Branden Robinson <[email protected]>
+
+       * src/libs/libgroff/string.cpp: Back all strings with clean
+       storage.  And ensure that all `string`s are backed by storage
+       such that the capacity `sz` is always nonzero, and the
+       `contents()` member function never returns a null pointer.  Add
+       assert(3)ions to enforce these invariants.  New `static` `const`
+       variable `initial_string_buffer_size` does what it says,
+       reserving a minimum backing allocation even for empty strings.
+       (salloc): Discard zero-length code, except insofar as a
+       zero-length allocation request gets `initial_string_buffer_size`
+       `char` array elements.
+       (string::string): Discard now tautologously true `if` tests.
+       Drop early return from constructor taking only a `char *`
+       argument, so that we can assert(3) a postcondition invariant.
+
 2026-06-06  G. Branden Robinson <[email protected]>
 
        Refactor groff's bespoke `string` class to use `size_t` instead
diff --git a/src/libs/libgroff/string.cpp b/src/libs/libgroff/string.cpp
index cd821fbf5..e81667b5b 100644
--- a/src/libs/libgroff/string.cpp
+++ b/src/libs/libgroff/string.cpp
@@ -39,23 +39,22 @@ along with this program.  If not, see 
<http://www.gnu.org/licenses/>. */
 // TODO 1: Replace all this memory management stuff with vector<char>.
 // TODO 2: Replace this entire class.  See Savannah #67735.
 
+static const size_t initial_string_buffer_size = 8;
+
 static char *salloc(size_t len, size_t *sizep)
 {
-  if (0 == len) {
-    *sizep = 0;
-    return 0 /* nullptr */;
-  }
   char *p = 0 /* nullptr */;
   size_t amount = len * 2;
-  p = new char[*sizep = amount];
-  assert(amount > 0);
+  if (0 == amount)
+    amount = initial_string_buffer_size;
   try {
     p = new char[*sizep = amount];
   }
   catch (const std::bad_alloc &exc) {
     fatal("cannot allocate %1 bytes for string allocation", amount);
   }
-  memset(p, 0, amount);
+  assert(*sizep > 0);
+  memset(p, 0, *sizep);
   return p;
 }
 
@@ -121,39 +120,43 @@ static char *srealloc(char *ptr, size_t oldsz, size_t 
oldlen,
   }
 }
 
-string::string() : ptr(0 /* nullptr */), len(0), sz(0)
+string::string() : len(0), sz(initial_string_buffer_size)
 {
+  ptr = salloc(initial_string_buffer_size, &sz);
+  assert(ptr != 0 /* nullptr */);
 }
 
 string::string(const char *p, size_t n) : len(n)
 {
   ptr = salloc(n, &sz);
-  if (sz > 0)
-    memset(ptr, 0, sz);
+  memset(ptr, 0, sz);
   if (n != 0)
     memcpy(ptr, p, n);
+  assert(ptr != 0 /* nullptr */);
 }
 
 string::string(const char *p)
 {
   if (0 /* nullptr */ == p) {
     len = 0;
-    ptr = 0 /* nullptr */;
-    sz = 0;
-    return;
+    ptr = salloc(initial_string_buffer_size, &sz);
   }
-  len = strlen(p);
-  ptr = salloc(len, &sz);
-  if ((sz > 0) && (len < sz))
-    memset(ptr, 0, sz);
-  if (len != 0)
-    memcpy(ptr, p, len);
+  else {
+    len = strlen(p);
+    ptr = salloc(len, &sz);
+    if (len < sz)
+      memset(ptr, 0, sz);
+    if (len != 0)
+      memcpy(ptr, p, len);
+  }
+  assert(ptr != 0 /* nullptr */);
 }
 
 string::string(char c) : len(1)
 {
   ptr = salloc(1, &sz);
   *ptr = c;
+  assert(ptr != 0 /* nullptr */);
 }
 
 string::string(const string &s) : len(s.len)
@@ -163,6 +166,7 @@ string::string(const string &s) : len(s.len)
     memset(ptr, 0, sz);
   if (len != 0)
     memcpy(ptr, s.ptr, len);
+  assert(ptr != 0 /* nullptr */);
 }
 
 string::~string()
@@ -231,6 +235,7 @@ string &string::operator+=(const char *p)
     memcpy(ptr + len, p, n);
     len = newlen;
   }
+  assert(ptr != 0 /* nullptr */);
   return *this;
 }
 
@@ -243,6 +248,7 @@ string &string::operator+=(const string &s)
     memcpy(ptr + len, s.ptr, s.len);
     len = newlen;
   }
+  assert(ptr != 0 /* nullptr */);
   return *this;
 }
 
@@ -260,10 +266,8 @@ void string::append(const char *p, size_t n)
 string::string(const char *s1, size_t n1, const char *s2, size_t n2)
 {
   len = n1 + n2;
-  if (0 == len) {
-    sz = 0;
-    ptr = 0 /* nullptr */;
-  }
+  if (0 == len)
+    ptr = salloc(initial_string_buffer_size, &sz);
   else {
     ptr = salloc(len, &sz);
     if (0 == n1)
@@ -274,6 +278,7 @@ string::string(const char *s1, size_t n1, const char *s2, 
size_t n2)
        memcpy(ptr + n1, s2, n2);
     }
   }
+  assert(ptr != 0 /* nullptr */);
 }
 
 bool operator<=(const string &s1, const string &s2)
@@ -461,6 +466,7 @@ void string::remove_spaces()
     delete[] ptr;
     ptr = tmp;
   }
+  assert(ptr != 0 /* nullptr */);
 }
 
 void put_string(const string &s, FILE *fp)

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to