gbranden pushed a commit to branch master
in repository groff.
commit 830e119147b4dc05d42f6d08a3dd653f047e278b
Author: G. Branden Robinson <[email protected]>
AuthorDate: Thu Jun 4 13:50:43 2026 -0500
[libgroff]: Clear heap memory backing `string`s.
* src/libs/libgroff/string.cpp (salloc, sfree_alloc, srealloc)
(string::string, string::clear, string::remove_spaces): Zero out
memory allocations from the heap, and when user requests erasure of
`string` contents.
This change resolves an issue I noted when instrumenting grohtml(1) to
inspect the groff `string`s and C strings it passes around internally.
Examples of differences using `debug()` instrumentation in my working
copy:
$ diff -U0 ERR1 ERR2|cat -v|head -n 27
--- ERR1 2026-06-03 22:55:51.904692449 -0500
+++ ERR2 2026-06-03 22:57:18.775831578 -0500
@@ -23,12 +23,12 @@
-post-grohtml:<standard input>:(www.tmac):57: debug: GBR:
html_printer::flush_sbuf() flushing 'Home^L^?'
-post-grohtml:<standard input>:(www.tmac):57: debug: GBR: page::add()
'Home^L^?'
-post-grohtml:<standard input>:(www.tmac):57: debug: GBR:
char_buffer::add_string() adding 'Home^L^?'
-post-grohtml:<standard input>:(www.tmac):59: debug: GBR:
html_printer::flush_sbuf() flushing 'ofme^L^?'
-post-grohtml:<standard input>:(www.tmac):59: debug: GBR: page::add()
'ofme^L^?'
-post-grohtml:<standard input>:(www.tmac):59: debug: GBR:
char_buffer::add_string() adding 'ofme^L^?'
-post-grohtml:<standard input>:(www.tmac):61: debug: GBR:
html_printer::flush_sbuf() flushing 'Groff^?'
-post-grohtml:<standard input>:(www.tmac):61: debug: GBR: page::add()
'Groff^?'
-post-grohtml:<standard input>:(www.tmac):61: debug: GBR:
char_buffer::add_string() adding 'Groff^?'
-post-grohtml:<standard input>:(www.tmac):63: debug: GBR:
html_printer::flush_sbuf() flushing '(GNUf^?'
-post-grohtml:<standard input>:(www.tmac):63: debug: GBR: page::add()
'(GNUf^?'
-post-grohtml:<standard input>:(www.tmac):63: debug: GBR:
char_buffer::add_string() adding '(GNUf^?'
+post-grohtml:<standard input>:(www.tmac):57: debug: GBR:
html_printer::flush_sbuf() flushing 'Home'
+post-grohtml:<standard input>:(www.tmac):57: debug: GBR: page::add() 'Home'
+post-grohtml:<standard input>:(www.tmac):57: debug: GBR:
char_buffer::add_string() adding 'Home'
+post-grohtml:<standard input>:(www.tmac):59: debug: GBR:
html_printer::flush_sbuf() flushing 'of'
+post-grohtml:<standard input>:(www.tmac):59: debug: GBR: page::add() 'of'
+post-grohtml:<standard input>:(www.tmac):59: debug: GBR:
char_buffer::add_string() adding 'of'
+post-grohtml:<standard input>:(www.tmac):61: debug: GBR:
html_printer::flush_sbuf() flushing 'Groff'
+post-grohtml:<standard input>:(www.tmac):61: debug: GBR: page::add()
'Groff'
+post-grohtml:<standard input>:(www.tmac):61: debug: GBR:
char_buffer::add_string() adding 'Groff'
+post-grohtml:<standard input>:(www.tmac):63: debug: GBR:
html_printer::flush_sbuf() flushing '(GNU'
+post-grohtml:<standard input>:(www.tmac):63: debug: GBR: page::add() '(GNU'
+post-grohtml:<standard input>:(www.tmac):63: debug: GBR:
char_buffer::add_string() adding '(GNU'
There are worse examples than the foregoing; the copying of unnecessary
bytes can cause reallocations from the heap as the `char_buffer` grows
to accommodate string content that won't be used.
$ diff -U0 ERR1 ERR2|cat -v|grep -w 640
-post-grohtml:<standard input>:(www.tmac):640: debug: GBR:
html_printer::flush_sbuf() flushing 'topgecededonslyves”)g/">-1.png'
-post-grohtml:<standard input>:(www.tmac):640: debug: GBR: page::add()
'topgecededonslyves”)g/">-1.png'
-post-grohtml:<standard input>:(www.tmac):640: debug: GBR:
char_buffer::add_string() adding 'topgecededonslyves”)g/">-1.png'
+post-grohtml:<standard input>:(www.tmac):640: debug: GBR:
html_printer::flush_sbuf() flushing 'top'
+post-grohtml:<standard input>:(www.tmac):640: debug: GBR: page::add() 'top'
+post-grohtml:<standard input>:(www.tmac):640: debug: GBR:
char_buffer::add_string() adding 'top'
Here's another case, a more obnoxious one because the excess content of
these strings could cause unnecessary memory (re)allocations.
$ diff -U0 ERR1 ERR2|cat -v|grep -w 36409
-post-grohtml:<standard input>:(<standard input>):36409: debug: GBR:
html_printer::flush_sbuf() flushing 'AUMBERr.s:deor,t;;;t;uot;;;;ot;0)'
-post-grohtml:<standard input>:(<standard input>):36409: debug: GBR:
page::add() 'AUMBERr.s:deor,t;;;t;uot;;;;ot;0)'
-post-grohtml:<standard input>:(<standard input>):36409: debug: GBR:
char_buffer::add_string() adding 'AUMBERr.s:deor,t;;;t;uot;;;;ot;0)'
+post-grohtml:<standard input>:(<standard input>):36409: debug: GBR:
html_printer::flush_sbuf() flushing 'A'
+post-grohtml:<standard input>:(<standard input>):36409: debug: GBR:
page::add() 'A'
+post-grohtml:<standard input>:(<standard input>):36409: debug: GBR:
char_buffer::add_string() adding 'A'
Performance analysis
====================
Insignificant change.
$ bash ./measure-all-docs.bash | datamash range 1 mean 1 sstdev 1
Before:
1.18 24.1655 0.22933485056618
After:
1.41 24.1565 0.32841365123239
$ cat ./measure-all-docs.bash
\#!/bin/bash
\# Example:
\# bash ./measure-all-docs.bash | datamash range 1 mean 1 sstdev 1
shopt -s extglob
if ! make -C build -j >/dev/null 2>&1
then
echo "$0: fatal error; tree is not buildable" >&2
exit 1
fi
: ${N:=20}
for n in $(seq $N)
do
printf "%d " $n >&2
rm -f build/doc/!(groff).pdf build/doc/*.ps \
build/doc/!(groff).html \
build/contrib/hdtbl/examples/*.ps \
build/contrib/mom/examples/*.pdf \
contrib/sboxes/msboxes.pdf \
doc/groff-man-pages.utf8.txt
t=$({ time -p make -C build >/dev/null 2>&1; } 2>/dev/stdout)
echo "$t" | awk '/real/ { print $2; }'
done
echo >&2
---
ChangeLog | 7 ++++++
src/libs/libgroff/string.cpp | 51 +++++++++++++++++++++++++++++++-------------
2 files changed, 43 insertions(+), 15 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index dfc0e1d4c..42dd64044 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2026-06-04 G. Branden Robinson <[email protected]>
+
+ * src/libs/libgroff/string.cpp (salloc, sfree_alloc, srealloc)
+ (string::string, string::clear, string::remove_spaces): Zero out
+ memory allocations from the heap, and when user requests erasure
+ of `string` contents.
+
2026-06-04 G. Branden Robinson <[email protected]>
* src/libs/libgroff/string.cpp: Fix code style nits.
diff --git a/src/libs/libgroff/string.cpp b/src/libs/libgroff/string.cpp
index 2d31c43e8..c735548af 100644
--- a/src/libs/libgroff/string.cpp
+++ b/src/libs/libgroff/string.cpp
@@ -44,8 +44,12 @@ static char *salloc(int len, int *sizep)
*sizep = 0;
return 0 /* nullptr */;
}
- else
- return new char[*sizep = (len * 2)];
+ char *p = 0 /* nullptr */;
+ size_t amount = len * 2;
+ p = new char[*sizep = amount];
+ assert(amount > 0);
+ memset(p, 0, amount);
+ return p;
}
static void sfree(char *ptr, int)
@@ -64,8 +68,12 @@ static char *sfree_alloc(char *ptr, int oldsz, int len, int
*sizep)
*sizep = 0;
return 0 /* nullptr */;
}
- else
- return new char[*sizep = (len * 2)];
+ char *p = 0 /* nullptr */;
+ size_t amount = len * 2;
+ p = new char[*sizep = amount];
+ assert(amount > 0);
+ memset(p, 0, amount);
+ return p;
}
static char *srealloc(char *ptr, int oldsz, int oldlen, int newlen,
@@ -81,9 +89,14 @@ static char *srealloc(char *ptr, int oldsz, int oldlen, int
newlen,
return 0 /* nullptr */;
}
else {
- char *p = new char[*sizep = (newlen * 2)];
- if ((oldlen < newlen) && (oldlen != 0))
+ size_t amount = newlen * 2;
+ char *p = 0 /* nullptr */;
+ p = new char[*sizep = amount];
+ if ((oldlen < newlen) && (oldlen != 0)) {
+ assert(amount > 0);
+ memset(p, 0, amount);
memcpy(p, ptr, oldlen);
+ }
delete[] ptr;
return p;
}
@@ -97,6 +110,8 @@ string::string(const char *p, int n) : len(n)
{
assert(n >= 0);
ptr = salloc(n, &sz);
+ if (sz > 0)
+ memset(ptr, 0, sz);
if (n != 0)
memcpy(ptr, p, n);
}
@@ -107,15 +122,14 @@ string::string(const char *p)
len = 0;
ptr = 0 /* nullptr */;
sz = 0;
+ return;
}
- else {
- len = strlen(p);
- ptr = salloc(len, &sz);
- if (len < sz)
- memset(ptr, 0, sz);
- if (len != 0)
- memcpy(ptr, p, len);
- }
+ len = strlen(p);
+ ptr = salloc(len, &sz);
+ if ((sz > 0) && (len < sz))
+ memset(ptr, 0, sz);
+ if (len != 0)
+ memcpy(ptr, p, len);
}
string::string(char c) : len(1)
@@ -127,6 +141,8 @@ string::string(char c) : len(1)
string::string(const string &s) : len(s.len)
{
ptr = salloc(len, &sz);
+ if (sz > 0)
+ memset(ptr, 0, sz);
if (len != 0)
memcpy(ptr, s.ptr, len);
}
@@ -281,6 +297,8 @@ void string::set_length(int i)
void string::clear()
{
+ if (ptr != 0 /* nullptr */)
+ memset(ptr, 0, sz);
len = 0;
}
@@ -414,7 +432,10 @@ void string::remove_spaces()
if (len - 1 != l) {
if (l >= 0) {
len = l + 1;
- char *tmp = new char[sz];
+ char *tmp = 0 /* nullptr */;
+ tmp = new char[sz];
+ assert(sz > 0);
+ memset(tmp, 0, sz);
memcpy(tmp, p, len);
delete[] ptr;
ptr = tmp;
_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit