This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=4da8364d3193dd976df9b89f87fe63bb2c82abd2 The branch, master has been updated via 4da8364d3193dd976df9b89f87fe63bb2c82abd2 (commit) from d8726aac7acd47435ad3e4dedef44d01cd828dc1 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 4da8364d3193dd976df9b89f87fe63bb2c82abd2 Author: Eric Blake <[email protected]> Date: Wed Feb 18 17:07:58 2009 -0700 Speed up translit when from argument is short. * modules/m4.c (translit): Use memchr2 when possible. * tests/builtins.at (translit): Add tests. * NEWS: Document this. Signed-off-by: Eric Blake <[email protected]> (cherry picked from commit 29b625aa941718e43cc04dfc217f314518bbc6d1) ----------------------------------------------------------------------- Summary of changes: ChangeLog | 7 +++++++ NEWS | 7 +++++-- modules/m4.c | 39 ++++++++++++++++++++++++++++++--------- tests/builtins.at | 27 +++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9469fbe..f052cb4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2009-02-23 Eric Blake <[email protected]> + + Speed up translit when from argument is short. + * modules/m4.c (translit): Use memchr2 when possible. + * tests/builtins.at (translit): Add tests. + * NEWS: Document this. + 2009-02-18 Eric Blake <[email protected]> Prefer buffer over byte operations. diff --git a/NEWS b/NEWS index 55abe34..d2d42b5 100644 --- a/NEWS +++ b/NEWS @@ -214,8 +214,6 @@ promoted to 2.0. be silenced by applying this patch: http://git.sv.gnu.org/gitweb/?p=autoconf.git;a=commitdiff;h=714eeee87 -** Improve the speed of the input engine. - ** Fix the `m4wrap' builtin to accumulate wrapped text in FIFO order, as required by POSIX. The manual mentions a way to restore the LIFO order present in earlier GNU M4 versions. NOTE: this change exposes a bug @@ -345,6 +343,11 @@ contains the following beta features that were deemed worth deferring until ** The `divert' and `undivert' builtins have been made more efficient when using temporary files for large diversions. +** The `translit' builtin has been made more efficient when the second + argument is short. + +** The input engine has been optimized for faster processing. + ** The command line option `--debugfile', introduced in 1.4.7, now treats its argument as optional, in order to allow setting the debug output back to stderr when used without an argument; and order is now diff --git a/modules/m4.c b/modules/m4.c index 1ad1ce1..6844bf5 100644 --- a/modules/m4.c +++ b/modules/m4.c @@ -28,6 +28,7 @@ # include "m4private.h" #endif +#include "memchr2.h" #include "quotearg.h" #include "stdlib--.h" #include "tempname.h" @@ -1104,8 +1105,8 @@ M4BUILTIN_HANDLER (translit) const char *to; size_t from_len; size_t to_len; - char map[UCHAR_MAX + 1] = {0}; - char found[UCHAR_MAX + 1] = {0}; + char map[UCHAR_MAX + 1]; + char found[UCHAR_MAX + 1]; unsigned char ch; enum { ASIS, REPLACE, DELETE }; @@ -1118,26 +1119,46 @@ M4BUILTIN_HANDLER (translit) from = M4ARG (2); from_len = M4ARGLEN (2); - if (memchr (from, '-', from_len) != NULL) - { - from = m4_expand_ranges (from, &from_len, m4_arg_scratch (context)); - assert (from); - } to = M4ARG (3); to_len = M4ARGLEN (3); if (memchr (to, '-', to_len) != NULL) + to = m4_expand_ranges (to, &to_len, m4_arg_scratch (context)); + + /* If there are only one or two bytes to replace, it is faster to + use memchr2. Using expand_ranges does nothing unless there are + at least three bytes. */ + if (from_len <= 2) { - to = m4_expand_ranges (to, &to_len, m4_arg_scratch (context)); - assert (to); + const char *p; + size_t len = M4ARGLEN (1); + int second = from[from_len / 2]; + data = M4ARG (1); + while ((p = (char *) memchr2 (data, from[0], second, len))) + { + obstack_grow (obs, data, p - data); + len -= p - data + 1; + data = p + 1; + if (*p == from[0] && to_len) + obstack_1grow (obs, to[0]); + else if (*p == second && 1 < to_len) + obstack_1grow (obs, to[1]); + } + obstack_grow (obs, data, len); + return; } + if (memchr (from, '-', from_len) != NULL) + from = m4_expand_ranges (from, &from_len, m4_arg_scratch (context)); + /* Calling memchr(from) for each character in data is quadratic, since both strings can be arbitrarily long. Instead, create a from-to mapping in one pass of from, then use that map in one pass of data, for linear behavior. Traditional behavior is that only the first instance of a character in from is consulted, hence the found map. */ + memset (map, 0, sizeof map); + memset (found, 0, sizeof found); while (from_len--) { ch = *from++; diff --git a/tests/builtins.at b/tests/builtins.at index 46ce548..765ebec 100644 --- a/tests/builtins.at +++ b/tests/builtins.at @@ -1222,6 +1222,33 @@ AT_DATA([in], [[translit(` AT_CHECK_M4([in], [0], [[abc ]]) +dnl Validate short strings, which take a different code path. +AT_DATA([in], [[dnl +translit(`abcdeabcde', `a') +translit(`abcdeabcde', `ab') +translit(`abcdeabcde', `a', `f') +translit(`abcdeabcde', `aa', `fg') +translit(`abcdeabcde', `a', `fg') +translit(`abcdeabcde', `ab', `f') +translit(`abcdeabcde', `ab', `fg') +translit(`abcdeabcde', `ab', `ba') +translit(`abcdeabcde', `e', `f') +translit(`abc', `', `cde') +translit(`', `a', `bc') +]]) +AT_CHECK_M4([in], [0], [[bcdebcde +cdecde +fbcdefbcde +fbcdefbcde +fbcdefbcde +fcdefcde +fgcdefgcde +bacdebacde +abcdfabcdf +abc + +]]) + AT_CLEANUP hooks/post-receive -- GNU M4 source repository
