gbranden pushed a commit to branch master
in repository groff.

commit 830e119147b4dc05d42f6d08a3dd653f047e278b
Author: G. Branden Robinson <[email protected]>
AuthorDate: Thu Jun 4 13:50:43 2026 -0500

    [libgroff]: Clear heap memory backing `string`s.
    
    * src/libs/libgroff/string.cpp (salloc, sfree_alloc, srealloc)
      (string::string, string::clear, string::remove_spaces): Zero out
      memory allocations from the heap, and when user requests erasure of
      `string` contents.
    
    This change resolves an issue I noted when instrumenting grohtml(1) to
    inspect the groff `string`s and C strings it passes around internally.
    
    Examples of differences using `debug()` instrumentation in my working
    copy:
    
    $ diff -U0 ERR1 ERR2|cat -v|head -n 27
    --- ERR1        2026-06-03 22:55:51.904692449 -0500
    +++ ERR2        2026-06-03 22:57:18.775831578 -0500
    @@ -23,12 +23,12 @@
    -post-grohtml:<standard input>:(www.tmac):57: debug: GBR: 
html_printer::flush_sbuf() flushing 'Home^L^?'
    -post-grohtml:<standard input>:(www.tmac):57: debug: GBR: page::add() 
'Home^L^?'
    -post-grohtml:<standard input>:(www.tmac):57: debug: GBR: 
char_buffer::add_string() adding 'Home^L^?'
    -post-grohtml:<standard input>:(www.tmac):59: debug: GBR: 
html_printer::flush_sbuf() flushing 'ofme^L^?'
    -post-grohtml:<standard input>:(www.tmac):59: debug: GBR: page::add() 
'ofme^L^?'
    -post-grohtml:<standard input>:(www.tmac):59: debug: GBR: 
char_buffer::add_string() adding 'ofme^L^?'
    -post-grohtml:<standard input>:(www.tmac):61: debug: GBR: 
html_printer::flush_sbuf() flushing 'Groff^?'
    -post-grohtml:<standard input>:(www.tmac):61: debug: GBR: page::add() 
'Groff^?'
    -post-grohtml:<standard input>:(www.tmac):61: debug: GBR: 
char_buffer::add_string() adding 'Groff^?'
    -post-grohtml:<standard input>:(www.tmac):63: debug: GBR: 
html_printer::flush_sbuf() flushing '(GNUf^?'
    -post-grohtml:<standard input>:(www.tmac):63: debug: GBR: page::add() 
'(GNUf^?'
    -post-grohtml:<standard input>:(www.tmac):63: debug: GBR: 
char_buffer::add_string() adding '(GNUf^?'
    +post-grohtml:<standard input>:(www.tmac):57: debug: GBR: 
html_printer::flush_sbuf() flushing 'Home'
    +post-grohtml:<standard input>:(www.tmac):57: debug: GBR: page::add() 'Home'
    +post-grohtml:<standard input>:(www.tmac):57: debug: GBR: 
char_buffer::add_string() adding 'Home'
    +post-grohtml:<standard input>:(www.tmac):59: debug: GBR: 
html_printer::flush_sbuf() flushing 'of'
    +post-grohtml:<standard input>:(www.tmac):59: debug: GBR: page::add() 'of'
    +post-grohtml:<standard input>:(www.tmac):59: debug: GBR: 
char_buffer::add_string() adding 'of'
    +post-grohtml:<standard input>:(www.tmac):61: debug: GBR: 
html_printer::flush_sbuf() flushing 'Groff'
    +post-grohtml:<standard input>:(www.tmac):61: debug: GBR: page::add() 
'Groff'
    +post-grohtml:<standard input>:(www.tmac):61: debug: GBR: 
char_buffer::add_string() adding 'Groff'
    +post-grohtml:<standard input>:(www.tmac):63: debug: GBR: 
html_printer::flush_sbuf() flushing '(GNU'
    +post-grohtml:<standard input>:(www.tmac):63: debug: GBR: page::add() '(GNU'
    +post-grohtml:<standard input>:(www.tmac):63: debug: GBR: 
char_buffer::add_string() adding '(GNU'
    
    There are worse examples than the foregoing; the copying of unnecessary
    bytes can cause reallocations from the heap as the `char_buffer` grows
    to accommodate string content that won't be used.
    
    $ diff -U0 ERR1 ERR2|cat -v|grep -w 640
    -post-grohtml:<standard input>:(www.tmac):640: debug: GBR: 
html_printer::flush_sbuf() flushing 'topgecededonslyves&rdquo;)g/">-1.png'
    -post-grohtml:<standard input>:(www.tmac):640: debug: GBR: page::add() 
'topgecededonslyves&rdquo;)g/">-1.png'
    -post-grohtml:<standard input>:(www.tmac):640: debug: GBR: 
char_buffer::add_string() adding 'topgecededonslyves&rdquo;)g/">-1.png'
    +post-grohtml:<standard input>:(www.tmac):640: debug: GBR: 
html_printer::flush_sbuf() flushing 'top'
    +post-grohtml:<standard input>:(www.tmac):640: debug: GBR: page::add() 'top'
    +post-grohtml:<standard input>:(www.tmac):640: debug: GBR: 
char_buffer::add_string() adding 'top'
    
    Here's another case, a more obnoxious one because the excess content of
    these strings could cause unnecessary memory (re)allocations.
    
    $ diff -U0 ERR1 ERR2|cat -v|grep -w 36409
    -post-grohtml:<standard input>:(<standard input>):36409: debug: GBR: 
html_printer::flush_sbuf() flushing 'AUMBERr.s:deor,t;;;t;uot;;;;ot;0)'
    -post-grohtml:<standard input>:(<standard input>):36409: debug: GBR: 
page::add() 'AUMBERr.s:deor,t;;;t;uot;;;;ot;0)'
    -post-grohtml:<standard input>:(<standard input>):36409: debug: GBR: 
char_buffer::add_string() adding 'AUMBERr.s:deor,t;;;t;uot;;;;ot;0)'
    +post-grohtml:<standard input>:(<standard input>):36409: debug: GBR: 
html_printer::flush_sbuf() flushing 'A'
    +post-grohtml:<standard input>:(<standard input>):36409: debug: GBR: 
page::add() 'A'
    +post-grohtml:<standard input>:(<standard input>):36409: debug: GBR: 
char_buffer::add_string() adding 'A'
    
    Performance analysis
    ====================
    
    Insignificant change.
    
    $ bash ./measure-all-docs.bash | datamash range 1 mean 1 sstdev 1
    
    Before:
    1.18    24.1655 0.22933485056618
    
    After:
    1.41    24.1565 0.32841365123239
    
    $ cat ./measure-all-docs.bash
    \#!/bin/bash
    
    \# Example:
    \#   bash ./measure-all-docs.bash | datamash range 1 mean 1 sstdev 1
    
    shopt -s extglob
    
    if ! make -C build -j >/dev/null 2>&1
    then
            echo "$0: fatal error; tree is not buildable" >&2
            exit 1
    fi
    
    : ${N:=20}
    
    for n in $(seq $N)
    do
            printf "%d " $n >&2
            rm -f build/doc/!(groff).pdf build/doc/*.ps \
                    build/doc/!(groff).html \
                    build/contrib/hdtbl/examples/*.ps \
                    build/contrib/mom/examples/*.pdf \
                    contrib/sboxes/msboxes.pdf \
                    doc/groff-man-pages.utf8.txt
            t=$({ time -p make -C build >/dev/null 2>&1; } 2>/dev/stdout)
            echo "$t" | awk '/real/ { print $2; }'
    done
    
    echo >&2
---
 ChangeLog                    |  7 ++++++
 src/libs/libgroff/string.cpp | 51 +++++++++++++++++++++++++++++++-------------
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dfc0e1d4c..42dd64044 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2026-06-04  G. Branden Robinson <[email protected]>
+
+       * src/libs/libgroff/string.cpp (salloc, sfree_alloc, srealloc)
+       (string::string, string::clear, string::remove_spaces): Zero out
+       memory allocations from the heap, and when user requests erasure
+       of `string` contents.
+
 2026-06-04  G. Branden Robinson <[email protected]>
 
        * src/libs/libgroff/string.cpp: Fix code style nits.
diff --git a/src/libs/libgroff/string.cpp b/src/libs/libgroff/string.cpp
index 2d31c43e8..c735548af 100644
--- a/src/libs/libgroff/string.cpp
+++ b/src/libs/libgroff/string.cpp
@@ -44,8 +44,12 @@ static char *salloc(int len, int *sizep)
     *sizep = 0;
     return 0 /* nullptr */;
   }
-  else
-    return new char[*sizep = (len * 2)];
+  char *p = 0 /* nullptr */;
+  size_t amount = len * 2;
+  p = new char[*sizep = amount];
+  assert(amount > 0);
+  memset(p, 0, amount);
+  return p;
 }
 
 static void sfree(char *ptr, int)
@@ -64,8 +68,12 @@ static char *sfree_alloc(char *ptr, int oldsz, int len, int 
*sizep)
     *sizep = 0;
     return 0 /* nullptr */;
   }
-  else
-    return new char[*sizep = (len * 2)];
+  char *p = 0 /* nullptr */;
+  size_t amount = len * 2;
+  p = new char[*sizep = amount];
+  assert(amount > 0);
+  memset(p, 0, amount);
+  return p;
 }
 
 static char *srealloc(char *ptr, int oldsz, int oldlen, int newlen,
@@ -81,9 +89,14 @@ static char *srealloc(char *ptr, int oldsz, int oldlen, int 
newlen,
     return 0 /* nullptr */;
   }
   else {
-    char *p = new char[*sizep = (newlen * 2)];
-    if ((oldlen < newlen) && (oldlen != 0))
+    size_t amount = newlen * 2;
+    char *p = 0 /* nullptr */;
+    p = new char[*sizep = amount];
+    if ((oldlen < newlen) && (oldlen != 0)) {
+      assert(amount > 0);
+      memset(p, 0, amount);
       memcpy(p, ptr, oldlen);
+    }
     delete[] ptr;
     return p;
   }
@@ -97,6 +110,8 @@ string::string(const char *p, int n) : len(n)
 {
   assert(n >= 0);
   ptr = salloc(n, &sz);
+  if (sz > 0)
+    memset(ptr, 0, sz);
   if (n != 0)
     memcpy(ptr, p, n);
 }
@@ -107,15 +122,14 @@ string::string(const char *p)
     len = 0;
     ptr = 0 /* nullptr */;
     sz = 0;
+    return;
   }
-  else {
-    len = strlen(p);
-    ptr = salloc(len, &sz);
-    if (len < sz)
-      memset(ptr, 0, sz);
-    if (len != 0)
-      memcpy(ptr, p, len);
-  }
+  len = strlen(p);
+  ptr = salloc(len, &sz);
+  if ((sz > 0) && (len < sz))
+    memset(ptr, 0, sz);
+  if (len != 0)
+    memcpy(ptr, p, len);
 }
 
 string::string(char c) : len(1)
@@ -127,6 +141,8 @@ string::string(char c) : len(1)
 string::string(const string &s) : len(s.len)
 {
   ptr = salloc(len, &sz);
+  if (sz > 0)
+    memset(ptr, 0, sz);
   if (len != 0)
     memcpy(ptr, s.ptr, len);
 }
@@ -281,6 +297,8 @@ void string::set_length(int i)
 
 void string::clear()
 {
+  if (ptr != 0 /* nullptr */)
+    memset(ptr, 0, sz);
   len = 0;
 }
 
@@ -414,7 +432,10 @@ void string::remove_spaces()
   if (len - 1 != l) {
     if (l >= 0) {
       len = l + 1;
-      char *tmp = new char[sz];
+      char *tmp = 0 /* nullptr */;
+      tmp = new char[sz];
+      assert(sz > 0);
+      memset(tmp, 0, sz);
       memcpy(tmp, p, len);
       delete[] ptr;
       ptr = tmp;

_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit

Reply via email to