I'm applying these patches to all three active branches. The first fixes a potential bug in undivert due to a stack allocation of a 16k buffer; on some systems, a stack overflow that overshoots a 4k guard page can lead to silent termination of the program without invoking the SIGSEGV fault handler, or other equally bad behavior.
Next, I made esyscmd dump directly into the obstack via fread, rather than repeatedly calling fgetc. I also added detection for (unlikely) read errors during esyscmd. Finally, I found more places that could use memchr instead of byte-wise searches, although the relative frequency of regexp (\), patsubst (\), and format (%) don't give as much noticeable speedup as today's earlier patch to macro expansion ($). >From 01f216c3c98d41db7878b18c93d23e8dc38d8643 Mon Sep 17 00:00:00 2001 From: Eric Blake <[email protected]> Date: Wed, 18 Feb 2009 11:25:01 -0700 Subject: [PATCH 1/3] Avoid risk of stack overflow. * src/output.c (insert_file): Avoid stack allocation of large buffer. (freeze_diversions): Avoid spurious semicolon. Signed-off-by: Eric Blake <[email protected]> --- ChangeLog | 7 +++++++ src/output.c | 14 ++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0176fe4..cb35601 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2009-02-18 Eric Blake <[email protected]> + + Avoid risk of stack overflow. + * src/output.c (insert_file): Avoid stack allocation of large + buffer. + (freeze_diversions): Avoid spurious semicolon. + 2009-02-16 Eric Blake <[email protected]> Avoid test failure due to different errno. diff --git a/src/output.c b/src/output.c index 0586b89..a39d1e5 100644 --- a/src/output.c +++ b/src/output.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, - 2007, 2008 Free Software Foundation, Inc. + 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU M4. @@ -812,21 +812,19 @@ make_diversion (int divnum) void insert_file (FILE *file) { - char buffer[COPY_BUFFER_SIZE]; + static char buffer[COPY_BUFFER_SIZE]; size_t length; /* Optimize out inserting into a sink. */ - if (!output_diversion) return; /* Insert output by big chunks. */ - - for (;;) + while (1) { - length = fread (buffer, 1, COPY_BUFFER_SIZE, file); + length = fread (buffer, 1, sizeof buffer, file); if (ferror (file)) - M4ERROR ((EXIT_FAILURE, errno, "ERROR: reading inserted file")); + M4ERROR ((EXIT_FAILURE, errno, "error reading inserted file")); if (length == 0) break; output_text (buffer, length); @@ -982,7 +980,7 @@ freeze_diversions (FILE *file) iter = gl_oset_iterator (diversion_table); while (gl_oset_iterator_next (&iter, &elt)) { - m4_diversion *diversion = (m4_diversion *) elt;; + m4_diversion *diversion = (m4_diversion *) elt; if (diversion->size || diversion->used) { if (diversion->size) -- 1.6.1.2 >From 1974da270a84d8c0cf2b3e3e08ce5292fdb211f3 Mon Sep 17 00:00:00 2001 From: Eric Blake <[email protected]> Date: Wed, 18 Feb 2009 11:38:54 -0700 Subject: [PATCH 2/3] Speed up esyscmd with buffer reads. * src/builtin.c (m4_esyscmd): Read blocks directly into obstack, rather than repeatedly reading bytes. Detect read errors. Signed-off-by: Eric Blake <[email protected]> --- ChangeLog | 4 ++++ src/builtin.c | 24 ++++++++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index cb35601..0a6c47e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2009-02-18 Eric Blake <[email protected]> + Speed up esyscmd with buffer reads. + * src/builtin.c (m4_esyscmd): Read blocks directly into obstack, + rather than repeatedly reading bytes. Detect read errors. + Avoid risk of stack overflow. * src/output.c (insert_file): Avoid stack allocation of large buffer. diff --git a/src/builtin.c b/src/builtin.c index 50f10cd..4a89fcc 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006, - 2007, 2008 Free Software Foundation, Inc. + 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU M4. @@ -999,7 +999,6 @@ static void m4_esyscmd (struct obstack *obs, int argc, token_data **argv) { FILE *pin; - int ch; if (bad_argc (argv[0], argc, 2, 2)) { @@ -1019,8 +1018,25 @@ m4_esyscmd (struct obstack *obs, int argc, token_data **argv) } else { - while ((ch = getc (pin)) != EOF) - obstack_1grow (obs, (char) ch); + while (1) + { + size_t avail = obstack_room (obs); + size_t len; + if (!avail) + { + int ch = getc (pin); + if (ch == EOF) + break; + obstack_1grow (obs, ch); + continue; + } + len = fread (obstack_next_free (obs), 1, avail, pin); + if (len <= 0) + break; + obstack_blank_fast (obs, len); + } + if (ferror (pin)) + M4ERROR ((EXIT_FAILURE, errno, "cannot read pipe")); sysval = pclose (pin); } } -- 1.6.1.2 >From 45a154e6d70517bf1b837715aae2fe366c9c1116 Mon Sep 17 00:00:00 2001 From: Eric Blake <[email protected]> Date: Wed, 18 Feb 2009 11:03:55 -0700 Subject: [PATCH 3/3] Prefer buffer over byte operations. * src/format.c (expand_format): Use strchr for speed. * src/builtin.c (substitute, expand_user_macro): Likewise. Signed-off-by: Eric Blake <[email protected]> --- ChangeLog | 4 ++++ src/builtin.c | 38 +++++++++++++++++++++----------------- src/format.c | 16 +++++++++------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0a6c47e..3a80a51 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2009-02-18 Eric Blake <[email protected]> + Prefer buffer over byte operations. + * src/format.c (expand_format): Use strchr for speed. + * src/builtin.c (substitute, expand_user_macro): Likewise. + Speed up esyscmd with buffer reads. * src/builtin.c (m4_esyscmd): Read blocks directly into obstack, rather than repeatedly reading bytes. Detect read errors. diff --git a/src/builtin.c b/src/builtin.c index 4a89fcc..a89b861 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1902,17 +1902,18 @@ substitute (struct obstack *obs, const char *victim, const char *repl, struct re_registers *regs) { int ch; - - for (;;) + while (1) { - while ((ch = *repl++) != '\\') + const char *backslash = strchr (repl, '\\'); + if (!backslash) { - if (ch == '\0') - return; - obstack_1grow (obs, ch); + obstack_grow (obs, repl, strlen (repl)); + return; } - - switch ((ch = *repl++)) + obstack_grow (obs, repl, backslash - repl); + repl = backslash; + ch = *++repl; + switch (ch) { case '0': if (!substitute_warned) @@ -1926,6 +1927,7 @@ Warning: \\0 will disappear, use \\& instead in replacements")); case '&': obstack_grow (obs, victim + regs->start[0], regs->end[0] - regs->start[0]); + repl++; break; case '1': case '2': case '3': case '4': case '5': case '6': @@ -1937,6 +1939,7 @@ Warning: \\0 will disappear, use \\& instead in replacements")); else if (regs->end[ch] > 0) obstack_grow (obs, victim + regs->start[ch], regs->end[ch] - regs->start[ch]); + repl++; break; case '\0': @@ -1946,6 +1949,7 @@ Warning: \\0 will disappear, use \\& instead in replacements")); default: obstack_1grow (obs, ch); + repl++; break; } } @@ -2151,19 +2155,19 @@ void expand_user_macro (struct obstack *obs, symbol *sym, int argc, token_data **argv) { - const char *text; + const char *text = SYMBOL_TEXT (sym); int i; - - for (text = SYMBOL_TEXT (sym); *text != '\0';) + while (1) { - if (*text != '$') + const char *dollar = strchr (text, '$'); + if (!dollar) { - obstack_1grow (obs, *text); - text++; - continue; + obstack_grow (obs, text, strlen (text)); + return; } - text++; - switch (*text) + obstack_grow (obs, text, dollar - text); + text = dollar; + switch (*++text) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': diff --git a/src/format.c b/src/format.c index 518394a..a0519a0 100644 --- a/src/format.c +++ b/src/format.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008 - Free Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008, + 2009 Free Software Foundation, Inc. This file is part of GNU M4. @@ -89,14 +89,16 @@ expand_format (struct obstack *obs, int argc, token_data **argv) f = fmt = ARG_STR (argc, argv); memset (ok, 0, sizeof ok); - for (;;) + while (1) { - while ((c = *fmt++) != '%') + const char *percent = strchr (fmt, '%'); + if (!percent) { - if (c == '\0') - return; - obstack_1grow (obs, c); + obstack_grow (obs, fmt, strlen (fmt)); + return; } + obstack_grow (obs, fmt, percent - fmt); + fmt = percent + 1; if (*fmt == '%') { -- 1.6.1.2 _______________________________________________ M4-patches mailing list [email protected] http://lists.gnu.org/mailman/listinfo/m4-patches
