gbranden pushed a commit to branch master in repository groff. commit 034b847a2d19891c0bea5437815c02e863076b80 Author: G. Branden Robinson <g.branden.robin...@gmail.com> AuthorDate: Thu Jul 10 20:39:03 2025 -0500
src/roff/troff/input.cpp: Refactor to simplify. * src/roff/troff/input.cpp: As an apparent optimization (dating back as far as groff 1.02, 1991) parts of the input parser that needed to read arbitrarily far ahead in the input stream to consume a token used an initial stack-allocated buffer of 16 bytes. (Also known as "automatic" storage, thus the name "abuf", I think.) If 16 bytes wasn't enough, open-coded logic would allocate a buffer from the heap and double it in size until the token fit. Simplify by converting `ABUF_SIZE` from a preprocessor macro to a `const` symbol named `default_buffer_size`, and _always_ allocate from the heap. In the future, we can convert stuff like this to a C++ STL `vector` and let the standard library worry about dynamic storage management. (read_long_escape_parameters, do_get_long_name, get_delimited_name): Do it. --- ChangeLog | 18 +++++++ src/roff/troff/input.cpp | 137 ++++++++++++++++------------------------------- 2 files changed, 63 insertions(+), 92 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0b6b81fde..fd18e1893 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2025-07-10 G. Branden Robinson <g.branden.robin...@gmail.com> + + * src/roff/troff/input.cpp: Refactor to simplify. As an + apparent optimization (dating back as far as groff 1.02, 1991) + parts of the input parser that needed to read arbitrarily far + ahead in the input stream to consume a token used an initial + stack-allocated buffer of 16 bytes. (Also known as "automatic" + storage, thus the name "abuf", I think.) If 16 bytes wasn't + enough, open-coded logic would allocate a buffer from the heap + and double it in size until the token fit. Simplify by + converting `ABUF_SIZE` from a preprocessor macro to a `const` + symbol named `default_buffer_size`, and _always_ allocate from + the heap. In the future, we can convert stuff like this to a + C++ STL `vector` and let the standard library worry about + dynamic storage management. + (read_long_escape_parameters, do_get_long_name) + (get_delimited_name): Do it. + 2025-07-10 G. Branden Robinson <g.branden.robin...@gmail.com> * src/roff/troff/input.cpp: Fix code style nits. diff --git a/src/roff/troff/input.cpp b/src/roff/troff/input.cpp index b9acacd2c..4275e9f87 100644 --- a/src/roff/troff/input.cpp +++ b/src/roff/troff/input.cpp @@ -80,12 +80,12 @@ extern "C" { (WARN_CHAR|WARN_NUMBER|WARN_BREAK|WARN_SPACE|WARN_FONT|WARN_FILE) #endif -// initial size of buffer for reading names; expanded as necessary -#define ABUF_SIZE 16 - extern "C" const char *program_name; extern "C" const char *Version_string; +// initial size for input buffers that need to grow arbitrarily +static const int default_buffer_size = 16; + #ifdef COLUMN void init_column_requests(); #endif /* COLUMN */ @@ -995,39 +995,28 @@ static symbol read_two_char_escape_parameter() static symbol read_long_escape_parameters(read_mode mode) { int start_level = input_stack::get_level(); - char abuf[ABUF_SIZE]; - char *buf = abuf; - int buf_size = ABUF_SIZE; + int buf_size = default_buffer_size; + char *buf = new char[buf_size]; int i = 0; char c; bool have_char = false; for (;;) { c = get_char_for_escape_parameter(have_char && (WITH_ARGS == mode)); if ('\0' == c) { - if (buf != abuf) - delete[] buf; + delete[] buf; return NULL_SYMBOL; } have_char = true; if ((WITH_ARGS == mode) && (' ' == c)) break; - if ((i + 2) > buf_size) { - if (buf == abuf) { - // C++03: new char[ABUF_SIZE * 2](); - buf = new char[ABUF_SIZE * 2]; - (void) memset(buf, 0, (ABUF_SIZE * 2 * sizeof(char))); - memcpy(buf, abuf, buf_size); - buf_size = ABUF_SIZE * 2; - } - else { - char *old_buf = buf; - // C++03: new char[buf_size * 2](); - buf = new char[buf_size * 2]; - (void) memset(buf, 0, (buf_size * 2 * sizeof(char))); - memcpy(buf, old_buf, buf_size); - buf_size *= 2; - delete[] old_buf; - } + if (i + 2 > buf_size) { + char *old_buf = buf; + // C++03: new char[buf_size * 2](); + buf = new char[buf_size * 2]; + (void) memset(buf, 0, (buf_size * 2 * sizeof(char))); + memcpy(buf, old_buf, buf_size); + buf_size *= 2; + delete[] old_buf; } if ((']' == c) && (input_stack::get_level() == start_level)) break; @@ -1043,14 +1032,9 @@ static symbol read_long_escape_parameters(read_mode mode) } if (' ' == c) have_multiple_params = true; - if (buf == abuf) { - return symbol(abuf); - } - else { - symbol s(buf); - delete[] buf; - return s; - } + symbol s(buf); + delete[] buf; + return s; } static symbol read_escape_parameter(read_mode mode) @@ -2834,29 +2818,19 @@ static symbol do_get_long_name(bool required, char end_char) { while (tok.is_space()) tok.next(); - char abuf[ABUF_SIZE]; - char *buf = abuf; - int buf_size = ABUF_SIZE; + int buf_size = default_buffer_size; + char *buf = new char[buf_size]; int i = 0; for (;;) { // If `end_char` != `\0` we normally have to append a null byte. if ((i + 2) > buf_size) { - if (buf == abuf) { - // C++03: new char[ABUF_SIZE * 2](); - buf = new char[ABUF_SIZE * 2]; - (void) memset(buf, 0, (ABUF_SIZE * 2 * sizeof(char))); - memcpy(buf, abuf, (buf_size * sizeof(char))); - buf_size = ABUF_SIZE * 2; - } - else { - char *old_buf = buf; - // C++03: new char[buf_size * 2](); - buf = new char[buf_size * 2]; - (void) memset(buf, 0, (buf_size * 2 * sizeof(char))); - memcpy(buf, old_buf, (buf_size * sizeof(char))); - buf_size *= 2; - delete[] old_buf; - } + char *old_buf = buf; + // C++03: new char[buf_size * 2](); + buf = new char[buf_size * 2]; + (void) memset(buf, 0, (buf_size * 2 * sizeof(char))); + memcpy(buf, old_buf, (buf_size * sizeof(char))); + buf_size *= 2; + delete[] old_buf; } if ((buf[i] = tok.ch()) == '\0' || (buf[i] == end_char)) break; @@ -2871,13 +2845,9 @@ static symbol do_get_long_name(bool required, char end_char) buf[i + 1] = '\0'; else diagnose_invalid_identifier(); - if (buf == abuf) - return symbol(buf); - else { - symbol s(buf); - delete[] buf; - return s; - } + symbol s(buf); + delete[] buf; + return s; } static void close_all_streams(); @@ -5801,28 +5771,18 @@ static symbol get_delimited_name() return NULL_SYMBOL; } int start_level = input_stack::get_level(); - char abuf[ABUF_SIZE]; - char *buf = abuf; - int buf_size = ABUF_SIZE; + int buf_size = default_buffer_size; + char *buf = new char[buf_size]; int i = 0; for (;;) { if ((i + 1) > buf_size) { - if (buf == abuf) { - // C++03: new char[ABUF_SIZE * 2](); - buf = new char[ABUF_SIZE * 2]; - (void) memset(buf, 0, (ABUF_SIZE * 2 * sizeof(char))); - memcpy(buf, abuf, buf_size); - buf_size = ABUF_SIZE * 2; - } - else { - char *old_buf = buf; - // C++03: new char[buf_size * 2](); - buf = new char[buf_size * 2]; - (void) memset(buf, 0, (buf_size * 2 * sizeof(char))); - memcpy(buf, old_buf, buf_size); - buf_size *= 2; - delete[] old_buf; - } + char *old_buf = buf; + // C++03: new char[buf_size * 2](); + buf = new char[buf_size * 2]; + (void) memset(buf, 0, (buf_size * 2 * sizeof(char))); + memcpy(buf, old_buf, buf_size); + buf_size *= 2; + delete[] old_buf; } tok.next(); if ((tok == start_token) @@ -5838,26 +5798,19 @@ static symbol get_delimited_name() error("closing delimiter does not match; expected %1, got %2", delimdesc, tok.description()); free(delimdesc); - if (buf != abuf) - delete[] buf; + delete[] buf; return NULL_SYMBOL; } i++; } buf[i] = '\0'; - if (buf == abuf) { - if (0 == i) { - error("empty delimited name"); - return NULL_SYMBOL; - } - else - return symbol(buf); - } - else { - symbol s(buf); - delete[] buf; - return s; + if (0 == i) { + error("empty delimited name"); + return NULL_SYMBOL; } + symbol s(buf); + delete[] buf; + return s; } static void do_register() // \R _______________________________________________ groff-commit mailing list groff-commit@gnu.org https://lists.gnu.org/mailman/listinfo/groff-commit