This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=eeddccf0d89edca640eeb86a879332019048ad08 The branch, branch-1.6 has been updated via eeddccf0d89edca640eeb86a879332019048ad08 (commit) from 1c206fcba932189c7710a64975fa57bfb54bec5b (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit eeddccf0d89edca640eeb86a879332019048ad08 Author: Eric Blake <[email protected]> Date: Fri Feb 29 14:39:35 2008 -0700 Stage 29: Process input by buffer, not bytes. * m4/gnulib-cache.m4: Import freadptr, freadseek, and memchr2 modules. * src/input.c (next_buffer, consume_buffer): New functions. (skip_line, match_input, next_token): Use them to scan a buffer at a time. * NEWS: Document this. Suggested by Bruno Haible: http://lists.gnu.org/archive/html/m4-discuss/2008-02/msg00010.html http://lists.gnu.org/archive/html/m4-discuss/2008-02/msg00012.html Signed-off-by: Eric Blake <[email protected]> (cherry picked from commit 69f894d261851504f9f8dc11f71e7da153bb0ebd) ----------------------------------------------------------------------- Summary of changes: ChangeLog | 16 +++ NEWS | 2 + m4/gnulib-cache.m4 | 5 +- src/input.c | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 312 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 84fce2d..88a3723 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,21 @@ 2009-02-16 Eric Blake <[email protected]> + Stage 29: Process input by buffer, not bytes. + Enhance input engine to provide lookahead buffer, rather than + forcing clients to call next_char for every byte. Utilize this + new interface in all clients. + Memory impact: none. + Speed impact: noticeable improvement, from fewer function calls. + * m4/gnulib-cache.m4: Import freadptr, freadseek, and memchr2 + modules. + * src/input.c (next_buffer, consume_buffer): New functions. + (skip_line, match_input, next_token): Use them to scan a buffer at + a time. + * NEWS: Document this. + Suggested by Bruno Haible: + http://lists.gnu.org/archive/html/m4-discuss/2008-02/msg00010.html + http://lists.gnu.org/archive/html/m4-discuss/2008-02/msg00012.html + Avoid test failure due to different errno. * doc/m4.texinfo (Using frozen files): Ignore stderr, since hardened systems can prevent attempts to read /. diff --git a/NEWS b/NEWS index bfcb684..69c0bb8 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,8 @@ Software Foundation, Inc. be silenced by applying this patch: http://git.sv.gnu.org/gitweb/?p=autoconf.git;a=commitdiff;h=714eeee87 +** Improve the speed of the input engine. + ** Fix the `m4wrap' builtin to accumulate wrapped text in FIFO order, as required by POSIX. The manual mentions a way to restore the LIFO order present in earlier GNU M4 versions. NOTE: this change exposes a bug diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index 49a778d..e235e5c 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,7 +15,7 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io clean-temp cloexec close-stream closein config-h dirname error fdl-1.3 fflush filenamecat flexmember fopen fopen-safer fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 hash intprops memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix +# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io clean-temp cloexec close-stream closein config-h dirname error fdl-1.3 fflush filenamecat flexmember fopen fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 hash intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([local]) @@ -38,6 +38,8 @@ gl_MODULES([ flexmember fopen fopen-safer + freadptr + freadseek fseeko gendocs getopt @@ -47,6 +49,7 @@ gl_MODULES([ gpl-3.0 hash intprops + memchr2 memmem mkstemp obstack diff --git a/src/input.c b/src/input.c index 822f55a..2acbd70 100644 --- a/src/input.c +++ b/src/input.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007, - 2008 Free Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, + 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of GNU M4. @@ -23,6 +23,10 @@ #include "m4.h" +#include "freadptr.h" +#include "freadseek.h" +#include "memchr2.h" + /* Unread input can be either files to be read (command line, "include", "sinclude"), strings which should be rescanned (macro expansion text), or quoted macro definitions (as returned by the @@ -794,6 +798,165 @@ input_print (struct obstack *obs) } +/*-------------------------------------------------------------------. +| Return a pointer to the available bytes of the current input | +| block, and set *LEN to the length of the result. If ALLOW_QUOTE, | +| do not return a buffer for a quoted string. If the result of | +| next_char() would not fit in an unsigned char (for example, | +| CHAR_EOF or CHAR_QUOTE), or if the input block does not have an | +| available buffer at the moment (for example, when hitting a buffer | +| block boundary of a file), return NULL, and the caller must fall | +| back on using next_char(). The buffer is only valid until the | +| next consume_buffer() or next_char(). When searching for a | +| particular byte, it is more efficient to search a buffer at a time | +| than it is to repeatedly call next_char. | +`-------------------------------------------------------------------*/ + +static const char * +next_buffer (size_t *len, bool allow_quote) +{ + token_chain *chain; + + while (1) + { + assert (isp); + if (input_change) + { + current_file = isp->file; + current_line = isp->line; + input_change = false; + } + + switch (isp->type) + { + case INPUT_STRING: + if (isp->u.u_s.len) + { + *len = isp->u.u_s.len; + return isp->u.u_s.str; + } + break; + + case INPUT_FILE: + if (start_of_input_line) + { + start_of_input_line = false; + current_line = ++isp->line; + } + if (isp->u.u_f.end) + break; + return freadptr (isp->u.u_f.fp, len); + + case INPUT_CHAIN: + chain = isp->u.u_c.chain; + while (chain) + { + if (allow_quote && chain->quote_age == current_quote_age) + return NULL; /* CHAR_QUOTE doesn't fit in buffer. */ + switch (chain->type) + { + case CHAIN_STR: + if (chain->u.u_s.len) + { + *len = chain->u.u_s.len; + return chain->u.u_s.str; + } + if (chain->u.u_s.level >= 0) + adjust_refcount (chain->u.u_s.level, false); + break; + case CHAIN_FUNC: + if (chain->u.func) + return NULL; /* CHAR_MACRO doesn't fit in buffer. */ + break; + case CHAIN_ARGV: + if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv)) + { + arg_adjust_refcount (chain->u.u_a.argv, false); + break; + } + return NULL; /* No buffer to provide. */ + case CHAIN_LOC: + isp->file = chain->u.u_l.file; + isp->line = chain->u.u_l.line; + input_change = true; + isp->u.u_c.chain = chain->next; + return next_buffer (len, allow_quote); + default: + assert (!"next_buffer"); + abort (); + } + isp->u.u_c.chain = chain = chain->next; + } + break; + + case INPUT_EOF: + return NULL; /* CHAR_EOF doesn't fit in buffer. */ + + default: + assert (!"next_buffer"); + abort (); + } + + /* End of input source --- pop one level. */ + pop_input (true); + } +} + +/*-----------------------------------------------------------------. +| Consume LEN bytes from the current input block, as though by LEN | +| calls to next_char(). LEN must be less than or equal to the | +| previous length returned by a successful call to next_buffer(). | +`-----------------------------------------------------------------*/ + +static void +consume_buffer (size_t len) +{ + token_chain *chain; + const char *buf; + const char *p; + size_t buf_len; + + assert (isp && !input_change && len); + switch (isp->type) + { + case INPUT_STRING: + assert (len <= isp->u.u_s.len); + isp->u.u_s.len -= len; + isp->u.u_s.str += len; + break; + + case INPUT_FILE: + assert (!start_of_input_line); + buf = freadptr (isp->u.u_f.fp, &buf_len); + assert (buf && len <= buf_len); + buf_len = 0; + while ((p = memchr (buf + buf_len, '\n', len - buf_len))) + { + if (p == buf + len - 1) + start_of_input_line = true; + else + current_line = ++isp->line; + buf_len = p - buf + 1; + } + if (freadseek (isp->u.u_f.fp, len) != 0) + assert (false); + break; + + case INPUT_CHAIN: + chain = isp->u.u_c.chain; + assert (chain && chain->type == CHAIN_STR && len <= chain->u.u_s.len); + /* Partial consumption invalidates quote age. */ + chain->quote_age = 0; + chain->u.u_s.len -= len; + chain->u.u_s.str += len; + break; + + default: + assert (!"consume_buffer"); + abort (); + } +} + /*------------------------------------------------------------------. | Low level input is done a character at a time. The function | | peek_input () is used to look at the next character in the input | @@ -1046,8 +1209,28 @@ skip_line (const call_info *name) { int ch; - while ((ch = next_char (false, false)) != CHAR_EOF && ch != '\n') - ; + while (1) + { + size_t len; + const char *buffer = next_buffer (&len, false); + if (buffer) + { + const char *p = (char *) memchr (buffer, '\n', len); + if (p) + { + consume_buffer (p - buffer + 1); + ch = '\n'; + break; + } + consume_buffer (len); + } + else + { + ch = next_char (false, false); + if (ch == CHAR_EOF || ch == '\n') + break; + } + } if (ch == CHAR_EOF) m4_warn (0, name, _("end of file treated as newline")); } @@ -1214,16 +1397,27 @@ match_input (const char *s, size_t slen, bool consume) int ch; /* input character */ const char *t; bool result = false; + size_t len; if (consume) { s++; slen--; } + /* Try a buffer match first. */ assert (slen); + t = next_buffer (&len, false); + if (t && slen <= len && memcmp (s, t, slen) == 0) + { + if (consume) + consume_buffer (slen); + return true; + } + + /* Fall back on byte matching. */ ch = peek_input (false); if (ch != to_uchar (*s)) - return false; /* fail */ + return false; if (slen == 1) { @@ -1677,7 +1871,29 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, obstack_grow (obs_td, curr_comm.str1, curr_comm.len1); while (1) { - ch = next_char (false, false); + /* Start with buffer search for potential end delimiter. */ + size_t len; + const char *buffer = next_buffer (&len, false); + if (buffer) + { + const char *p = (char *) memchr (buffer, *curr_comm.str2, len); + if (p) + { + obstack_grow (obs_td, buffer, p - buffer); + ch = to_uchar (*p); + consume_buffer (p - buffer + 1); + } + else + { + obstack_grow (obs_td, buffer, len); + consume_buffer (len); + continue; + } + } + + /* Fall back to byte-wise search. */ + else + ch = next_char (false, false); if (ch == CHAR_EOF) { /* Current_file changed to "" if we see CHAR_EOF, use @@ -1708,11 +1924,37 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, else if (default_word_regexp && (isalpha (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); - while ((ch = peek_input (false)) < CHAR_EOF - && (isalnum (ch) || ch == '_')) + while (1) { - obstack_1grow (&token_stack, ch); - next_char (false, false); + size_t len; + const char *buffer = next_buffer (&len, false); + if (buffer) + { + const char *p = buffer; + while (len && (isalnum (to_uchar (*p)) || *p == '_')) + { + p++; + len--; + } + if (p != buffer) + { + obstack_grow (&token_stack, buffer, p - buffer); + consume_buffer (p - buffer); + } + if (len) + break; + } + else + { + ch = peek_input (false); + if (ch < CHAR_EOF && (isalnum (ch) || ch == '_')) + { + obstack_1grow (&token_stack, ch); + next_char (false, false); + } + else + break; + } } type = TOKEN_WORD; } @@ -1782,7 +2024,44 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, type = TOKEN_STRING; while (1) { - ch = next_char (obs != NULL && current_quote_age, false); + /* Start with buffer search for either potential delimiter. */ + size_t len; + const char *buffer = next_buffer (&len, obs && current_quote_age); + if (buffer) + { + const char *p = buffer; + do + { + p = (char *) memchr2 (p, *curr_quote.str1, *curr_quote.str2, + buffer + len - p); + } + while (p && current_quote_age + && (*p++ == *curr_quote.str2 + ? --quote_level : ++quote_level)); + if (p) + { + if (current_quote_age) + { + assert (!quote_level); + obstack_grow (obs_td, buffer, p - buffer - 1); + consume_buffer (p - buffer); + break; + } + obstack_grow (obs_td, buffer, p - buffer); + ch = to_uchar (*p); + consume_buffer (p - buffer + 1); + } + else + { + obstack_grow (obs_td, buffer, len); + consume_buffer (len); + continue; + } + } + + /* Fall back to byte-wise search. */ + else + ch = next_char (obs && current_quote_age, false); if (ch == CHAR_EOF) { /* Current_file changed to "" if we see CHAR_EOF, use hooks/post-receive -- GNU M4 source repository
