gbranden pushed a commit to branch master
in repository groff.
commit f25ec47dac6c44dd7fb97a06db2f885e9374d099
Author: G. Branden Robinson <[email protected]>
AuthorDate: Mon Jun 1 20:15:44 2026 -0500
src/devices/grohtml/post-html.cpp: Refactor.
* src/devices/grohtml/post-html.cpp: New `static` `const` variable of
type `size_t`, `char_buffer_block_size`, does what's written on the
tin.
(struct char_block): Drop; it has been used exclusively by the
file-local `char_buffer` class.
(class char_buffer): Replace `head` and `tail` private member
variables--pointers to `char_block`s, with an STL vector of `char`
called `vec`.
(char_buffer::char_buffer): Drop initializers of `head` and `tail`.
Populate body with call of `std::vector<char>` constructor.
(char_buffer::~char_buffer): Empty destructor body; being an STL
container, we expect `vec` to dispose of its own backing storage when
it goes out of scope (when its parent object, a `char_buffer`, is
destroyed).
(char_buffer::add_string): Rewrite to idiomatically populate `vec`,
resizing it by `char_buffer_block_size` as needed. Stop returning a
pointer into the data backing the STL vector, as its `resize()`
operation might alter its memory addresses, as realloc(3) can. Return
a strdup(3)-ed copy of the string instead.
Continues the long process of fixing Savannah #66672.
Performance analysis
====================
Impact seems low, and more deterministic.
$ bash ATTIC/measure-grohtml.bash | datamash range 1 mean 1 sstdev 1
[grohtml]: Null-terminate `html_string`.
0.28 5.7285 0.063600314464631
src/devices/grohtml/post-html.cpp: Refactor.
0.21 5.767 0.056391488719487
---
ChangeLog | 26 +++++++++++
src/devices/grohtml/post-html.cpp | 94 +++++++++------------------------------
2 files changed, 47 insertions(+), 73 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 7c9feae6c..2d6439acc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2026-06-01 G. Branden Robinson <[email protected]>
+
+ * src/devices/grohtml/post-html.cpp: Refactor. New `static`
+ `const` variable of type `size_t`, `char_buffer_block_size`,
+ does what's written on the tin.
+ (struct char_block): Drop; it has been used exclusively by the
+ file-local `char_buffer` class.
+ (class char_buffer): Replace `head` and `tail` private member
+ variables--pointers to `char_block`s, with an STL vector of
+ `char` called `vec`.
+ (char_buffer::char_buffer): Drop initializers of `head` and
+ `tail`. Populate body with call of `std::vector<char>`
+ constructor.
+ (char_buffer::~char_buffer): Empty destructor body; being an STL
+ container, we expect `vec` to dispose of its own backing storage
+ when it goes out of scope (when its parent object, a
+ `char_buffer`, is destroyed).
+ (char_buffer::add_string): Rewrite to idiomatically populate
+ `vec`, resizing it by `char_buffer_block_size` as needed. Stop
+ returning a pointer into the data backing the STL vector, as
+ its `resize()` operation might alter its memory addresses, as
+ realloc(3) can. Return a strdup(3)-ed copy of the string
+ instead.
+
+ Continues the long process of fixing Savannah #66672.
+
2026-06-06 G. Branden Robinson <[email protected]>
* src/devices/grohtml/post-html.cpp: Improve type-correctness.
diff --git a/src/devices/grohtml/post-html.cpp
b/src/devices/grohtml/post-html.cpp
index d056829d9..6cd739892 100644
--- a/src/devices/grohtml/post-html.cpp
+++ b/src/devices/grohtml/post-html.cpp
@@ -32,12 +32,15 @@ along with this program. If not, see
<http://www.gnu.org/licenses/>. */
#include <stdio.h> // EOF, FILE, fclose(), fflush(), fopen(), freopen(),
// fseek(), SEEK_SET, setbuf(), stderr, stdout
#include <stdlib.h> // abs(), atoi(), EXIT_SUCCESS, exit()
-#include <string.h> // strcmp(), strerror(), strlen(), strncmp()
+#include <string.h> // strcmp(), strdup(), strerror(), strlen(),
+ // strncmp()
#include <strings.h> // strcasecmp()
#include <time.h> // asctime(), tm
#include <getopt.h> // getopt_long()
+#include <vector>
+
// libgroff
#include "symbol.h" // prerequisite of color.h
#include "color.h"
@@ -391,39 +394,6 @@ int style::operator!=(const style &s) const
return !(*this == s);
}
-/*
- * the class and methods for retaining ascii text
- */
-
-struct char_block {
- enum { SIZE = 256 };
- char *buffer;
- int used;
- char_block *next;
-
- char_block();
- char_block(int /* length */);
- ~char_block();
-};
-
-char_block::char_block()
-: buffer(0 /* nullptr */), used(0), next(0 /* nullptr */)
-{
-}
-
-char_block::char_block(int length)
-: used(0), next(0 /* nullptr */)
-{
- buffer = new char[max(length, char_block::SIZE)];
- if (0 /* nullptr */ == buffer)
- fatal("out of memory error");
-}
-
-char_block::~char_block()
-{
- delete[] buffer;
-}
-
class char_buffer {
public:
char_buffer();
@@ -432,58 +402,36 @@ public:
unsigned int /* length */);
char *add_string(const string & /* s */);
private:
- char_block *head;
- char_block *tail;
+ std::vector<char> vec;
};
+// A block size of 4096 seems to run faster and with less variance than
+// the default, or 128, 256, 512, 1024, 2048, or 8192.
+// TODO: Use sysconf(PAGE_SIZE)?
+static const size_t char_buffer_block_size = 4096;
+
char_buffer::char_buffer()
-: head(0 /* nullptr */), tail(0 /* nullptr */)
{
+ vec = std::vector<char>(char_buffer_block_size, '\0');
}
char_buffer::~char_buffer()
{
- while (head != 0 /* nullptr */) {
- char_block *temp = head;
- head = head->next;
- delete temp;
- }
}
char *char_buffer::add_string (const char *s, unsigned int length)
{
- int i = 0;
- unsigned int old_used;
-
- if ((0 /* nullptr */ == s) || (0 == length))
+ if (length <= 0)
return 0 /* nullptr */;
-
- if (0 /* nullptr */ == tail) {
- tail = new char_block(length+1);
- head = tail;
- } else {
- if (tail->used + length+1 > char_block::SIZE) {
- tail->next = new char_block(length+1);
- tail = tail->next;
- }
- }
-
- old_used = tail->used;
- do {
- tail->buffer[tail->used] = s[i];
- tail->used++;
- i++;
- length--;
- } while (length>0);
-
- // add terminating nul character
-
- tail->buffer[tail->used] = '\0';
- tail->used++;
-
- // and return start of new string
-
- return &tail->buffer[old_used];
+ if ((vec.capacity() - vec.size()) < length)
+ vec.resize(vec.capacity() + char_buffer_block_size);
+ for (size_t i = 0; i < length; i++)
+ vec.push_back(s[i]);
+ vec.push_back('\0');
+ // XXX: This strdup() likely leaks memory.
+ // TODO: Have `char_buffer` maintain a list of `char` pointers and
+ // free them in destructor?
+ return strdup(s);
}
char *char_buffer::add_string (const string &s)
_______________________________________________
groff-commit mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/groff-commit