This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=4504da8cb8c5006044634a7b6a4df7c90487ec16 The branch, argv_ref has been updated via 4504da8cb8c5006044634a7b6a4df7c90487ec16 (commit) from ffac5851f2b3a287e0308502e79532432553bcf0 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 4504da8cb8c5006044634a7b6a4df7c90487ec16 Author: Eric Blake <[EMAIL PROTECTED]> Date: Tue Jul 15 09:33:43 2008 -0600 Stage33: make appending to existing macros linear ----------------------------------------------------------------------- Summary of changes: doc/m4.texinfo | 12 ++++++ src/builtin.c | 120 +++++++++++++++++++++++++++++++++++++++++--------------- src/m4.h | 9 ++++ src/macro.c | 89 +++++++++++++++++++++++++++++------------ 4 files changed, 172 insertions(+), 58 deletions(-) diff --git a/doc/m4.texinfo b/doc/m4.texinfo index 755725d..7b34589 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -8116,6 +8116,18 @@ include(`loop.m4')dnl @result{}10000 @end example [EMAIL PROTECTED] appending to macro definition + [EMAIL PROTECTED] examples [EMAIL PROTECTED] options: -Dlimit=50000 [EMAIL PROTECTED] +$ @kbd {m4 -I examples -Dlimit=50000} +define(`next', `.')define(`debug', `divert`'len(var)') [EMAIL PROTECTED] +include(`append.m4')dnl [EMAIL PROTECTED] [EMAIL PROTECTED] example + @end ignore @node Improved m4wrap diff --git a/src/builtin.c b/src/builtin.c index 4e29d00..6c7526a 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -424,6 +424,47 @@ set_macro_sequence (const char *regexp) macro_sequence_inuse = true; } +/*----------------------------------------------------------------. +| Check the contents DEFN of length LEN, of the new definition of | +| macro NAME of length NAME_LEN, for any match of | +| --warn-macro-sequence. | +`----------------------------------------------------------------*/ +static void +check_macro_sequence (const char *name, size_t name_len, const char *defn, + size_t len) +{ + if (macro_sequence_inuse && len) + { + regoff_t offset = 0; + struct re_registers *regs = ¯o_sequence_regs; + + while (offset < len + && (offset = re_search (¯o_sequence_buf, defn, len, offset, + len - offset, regs)) >= 0) + { + /* Skip empty matches. */ + if (regs->start[0] == regs->end[0]) + offset++; + else + { + offset = regs->end[0]; + /* Safe to use slot 1 since we don't pass a macro name + to m4_warn. */ + m4_warn (0, NULL, _("definition of %s contains sequence %s"), + quotearg_style_mem (locale_quoting_style, name, + name_len), + quotearg_n_style_mem (1, locale_quoting_style, + defn + regs->start[0], + regs->end[0] - regs->start[0])); + } + } + if (offset == -2) + m4_warn (0, NULL, + _("problem checking --warn-macro-sequence for macro %s"), + quotearg_style_mem (locale_quoting_style, name, name_len)); + } +} + /*------------------------------------------------------. | Free dynamic memory utilized by regular expressions. | `------------------------------------------------------*/ @@ -469,40 +510,10 @@ define_user_macro (const char *name, size_t name_len, const char *text, SYMBOL_TYPE (s) = TOKEN_TEXT; SYMBOL_TEXT (s) = defn; SYMBOL_TEXT_LEN (s) = len; + SYMBOL_TEXT_ALLOCATED (s) = len; SYMBOL_TEXT_QUOTE_AGE (s) = quote_age; SYMBOL_MACRO_ARGS (s) = true; - - /* Implement --warn-macro-sequence. */ - if (macro_sequence_inuse && text) - { - regoff_t offset = 0; - struct re_registers *regs = ¯o_sequence_regs; - - while (offset < len - && (offset = re_search (¯o_sequence_buf, defn, len, offset, - len - offset, regs)) >= 0) - { - /* Skip empty matches. */ - if (regs->start[0] == regs->end[0]) - offset++; - else - { - offset = regs->end[0]; - /* Safe to use slot 1 since we don't pass a macro name - to m4_warn. */ - m4_warn (0, NULL, _("definition of %s contains sequence %s"), - quotearg_style_mem (locale_quoting_style, name, - name_len), - quotearg_n_style_mem (1, locale_quoting_style, - defn + regs->start[0], - regs->end[0] - regs->start[0])); - } - } - if (offset == -2) - m4_warn (0, NULL, - _("problem checking --warn-macro-sequence for macro %s"), - quotearg_style_mem (locale_quoting_style, name, name_len)); - } + check_macro_sequence (name, name_len, defn, len); } /*-----------------------------------------------. @@ -727,6 +738,51 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) static void m4_define (struct obstack *obs, int argc, macro_arguments *argv) { + /* Special case if we detect that the user is appending to an + existing text macro: try to copy only the suffix into the + over-allocated slop, to avoid quadratic behavior of always + copying the prefix on each append. + + Pending expansions of the existing macro must be exactly 1 (the + macro's existing definition is in use as the prefix of the + argument); if pending expansions is zero, we are not appending to + the current macro, and if it is larger than 1, then either the + macro's existing definition is in use elsewhere and must not be + invalidated, or we are at least doubling the size of the macro's + defintion which means the suffix wouldn't fit in the + pre-allocated slop anyway. */ + if (argc > 2 && arg_type (argv, 1) == TOKEN_TEXT + && arg_type (argv, 2) == TOKEN_TEXT) + { + const char *name = ARG (1); + size_t name_len = ARG_LEN (1); + symbol *s = lookup_symbol (name, name_len, SYMBOL_LOOKUP); + const char *tail; + if (s && SYMBOL_TYPE (s) == TOKEN_TEXT + && SYMBOL_PENDING_EXPANSIONS (s) == 1 + && (tail = arg_has_prefix (argv, 2, SYMBOL_TEXT (s), + SYMBOL_TEXT_LEN (s)))) + { + size_t len = ARG_LEN (2); + if (SYMBOL_TEXT_ALLOCATED (s) < len) + { + size_t new_len = len * 2; + char *old_defn = SYMBOL_TEXT (s); + if (new_len < len) + new_len = len; + SYMBOL_TEXT (s) = xcharalloc (new_len); + memcpy (SYMBOL_TEXT (s), old_defn, SYMBOL_TEXT_LEN (s)); + free (old_defn); + SYMBOL_TEXT_ALLOCATED (s) = new_len; + } + memcpy (SYMBOL_TEXT (s) + SYMBOL_TEXT_LEN (s), tail, + len - SYMBOL_TEXT_LEN (s)); + SYMBOL_TEXT_LEN (s) = len; + SYMBOL_TEXT_QUOTE_AGE (s) = arg_quote_age (argv); + check_macro_sequence (name, name_len, SYMBOL_TEXT (s), len); + return; + } + } define_macro (argc, argv, SYMBOL_INSERT); } diff --git a/src/m4.h b/src/m4.h index 3305fc2..69234ca 100644 --- a/src/m4.h +++ b/src/m4.h @@ -431,6 +431,12 @@ struct symbol char *name; size_t len; token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */ + /* In order to make appending an amortized O(n) rather than O(n^2), + we must geometrically over-allocate and modify only the suffix + when possible, rather than reallocating on every insertion. This + is only valid for TOKEN_TEXT, and is at least as large as + SYMBOL_TEXT_LEN. */ + size_t allocated; }; #define SYMBOL_NEXT(S) ((S)->next) @@ -447,6 +453,7 @@ struct symbol /* Only safe when SYMBOL_TYPE(S) == TOKEN_TEXT: */ #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data)) #define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data)) +#define SYMBOL_TEXT_ALLOCATED(S) ((S)->allocated) #define SYMBOL_TEXT_QUOTE_AGE(S) (TOKEN_DATA_QUOTE_AGE (&(S)->data)) /* Only safe when SYMBOL_TYPE(S) == TOKEN_FUNC: */ @@ -479,6 +486,8 @@ unsigned int arg_quote_age (macro_arguments *); token_data_type arg_type (macro_arguments *, unsigned int); const char *arg_text (macro_arguments *, unsigned int, bool); bool arg_equal (macro_arguments *, unsigned int, unsigned int); +const char *arg_has_prefix (macro_arguments *, unsigned int, const char *, + size_t); bool arg_empty (macro_arguments *, unsigned int); size_t arg_len (macro_arguments *, unsigned int, bool); builtin_func *arg_func (macro_arguments *, unsigned int); diff --git a/src/macro.c b/src/macro.c index 4d16f76..00d4e17 100644 --- a/src/macro.c +++ b/src/macro.c @@ -912,6 +912,37 @@ arg_type (macro_arguments *argv, unsigned int arg) return type; } +/* Given a CHAIN, convert its links into text on OBS. If FLATTEN, + builtins are ignored. */ +static void +collect_chain (struct obstack *obs, token_chain *chain, bool flatten) +{ + while (chain) + { + switch (chain->type) + { + case CHAIN_STR: + obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); + break; + case CHAIN_FUNC: + if (flatten) + break; + assert (!"flatten_chain"); + abort (); + case CHAIN_ARGV: + assert (!chain->u.u_a.has_func || flatten); + arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, + quote_cache (NULL, chain->quote_age, chain->u.u_a.quotes), + flatten || chain->u.u_a.flatten, NULL, NULL, NULL, false); + break; + default: + assert (!"flatten_chain"); + abort (); + } + chain = chain->next; + } +} + /* Given ARGV, return the text at argument ARG. Abort if the argument is not text. Arg 0 is always text, and indices beyond argc return the empty string. If FLATTEN, builtins are ignored. The result is @@ -939,32 +970,7 @@ arg_text (macro_arguments *argv, unsigned int arg, bool flatten) case TOKEN_COMP: chain = token->u.u_c.chain; obs = arg_scratch (); - while (chain) - { - switch (chain->type) - { - case CHAIN_STR: - obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); - break; - case CHAIN_FUNC: - if (flatten) - break; - assert (!"arg_text"); - abort (); - case CHAIN_ARGV: - assert (!chain->u.u_a.has_func || flatten || argv->flatten); - arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, - quote_cache (NULL, chain->quote_age, - chain->u.u_a.quotes), - flatten || argv->flatten || chain->u.u_a.flatten, - NULL, NULL, NULL, false); - break; - default: - assert (!"arg_text"); - abort (); - } - chain = chain->next; - } + collect_chain (obs, chain, flatten || argv->flatten); obstack_1grow (obs, '\0'); return (char *) obstack_finish (obs); case TOKEN_FUNC: @@ -1124,6 +1130,37 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) return ca == cb; } +/* Given index ARG within ARGV, if the argument is determined to be + appending text onto the existing definition DEFN of length LEN, + then return only the text occurring after LEN. Otherwise, return + NULL. This is useful for optimizing the builtin define, making + appending O(n) rather than O(n^2). */ +const char * +arg_has_prefix (macro_arguments *argv, unsigned int arg, const char *defn, + size_t len) +{ + token_data *token; + token_chain *chain; + struct obstack *obs; /* Scratch space; cleaned at end of macro_expand. */ + + if (arg == 0 || arg >= argv->argc) + return NULL; + token = arg_token (argv, arg, NULL, false); + if (TOKEN_DATA_TYPE (token) != TOKEN_COMP + || (!argv->flatten && token->u.u_c.has_func)) + return NULL; + chain = token->u.u_c.chain; + assert (chain); + if (chain->type == CHAIN_STR && chain->u.u_s.str == defn + && chain->u.u_s.len == len) + { + obs = arg_scratch (); + collect_chain (obs, chain->next, argv->flatten); + return (char *) obstack_finish (obs); + } + return NULL; +} + /* Given ARGV, return true if argument ARG is the empty string. This gives the same result as comparing arg_len against 0, but is often faster. */ hooks/post-receive -- GNU M4 source repository
