This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=a3a7734d1beabbb438656461076258f5ff32c08b The branch, branch-1.6 has been updated via a3a7734d1beabbb438656461076258f5ff32c08b (commit) from 0d6fb01e76bc35550a00cbf7710d1471db9e7b00 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit a3a7734d1beabbb438656461076258f5ff32c08b Author: Eric Blake <[EMAIL PROTECTED]> Date: Mon Jan 14 17:25:13 2008 -0700 Stage 26: Allow embedded NUL in macro definitions. * src/m4.h (set_word_regexp, arg_len, define_user_macro): Add parameters. (SYMBOL_TEXT_LEN): New macro. (ARG_LEN): Adjust callers. * src/builtin.c (define_user_macro): Add a parameter. (builtin_init, define_macro): Adjust callers. (m4_dumpdef, m4_defn, m4_changeword): Handle embedded NULs. (expand_user_macro): Handle embedded NUL, and speed up search for embedded $. * src/macro.c (arg_len): Add parameter. * src/input.c (set_word_regexp): Add parameter. (input_init): Adjust caller. * src/m4.c (main): Likewise. * src/freeze.c (dump_symbol_CB): Preserve NUL on freeze. (reload_frozen_state): Retrieve NUL on load. * doc/m4.texinfo (Builtin, Using frozen files): Enhance tests. * examples/null.m4: Likewise. * examples/null.out: Update expected output. * examples/null.err: Likewise. (cherry picked from commit cb26d7cb8b438224908d53df59b1d394ba1928f8) Signed-off-by: Eric Blake <[EMAIL PROTECTED]> ----------------------------------------------------------------------- Summary of changes: ChangeLog | 26 ++++++++++++++++++++ doc/m4.texinfo | 13 ++++++++-- examples/null.err | Bin 572 -> 713 bytes examples/null.m4 | Bin 6189 -> 6499 bytes examples/null.out | Bin 468 -> 510 bytes src/builtin.c | 67 +++++++++++++++++++++++++++++++--------------------- src/freeze.c | 6 ++-- src/input.c | 28 +++++++++++++-------- src/m4.c | 2 +- src/m4.h | 10 ++++--- src/macro.c | 25 +++++++------------ 11 files changed, 112 insertions(+), 65 deletions(-) diff --git a/ChangeLog b/ChangeLog index 325bf7a..7a50b85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,31 @@ 2008-08-03 Eric Blake <[EMAIL PROTECTED]> + Stage 26: Allow embedded NUL in macro definitions. + Track macro definitions by length, to allow embedded NUL. Make + arg_len callers aware of the issue of flattening builtins when + determining length. Optimize loops that scan a definition. + Memory impact: none. + Speed impact: slight improvement, due to faster scans. + * src/m4.h (set_word_regexp, arg_len, define_user_macro): Add + parameters. + (SYMBOL_TEXT_LEN): New macro. + (ARG_LEN): Adjust callers. + * src/builtin.c (define_user_macro): Add a parameter. + (builtin_init, define_macro): Adjust callers. + (m4_dumpdef, m4_defn, m4_changeword): Handle embedded NULs. + (expand_user_macro): Handle embedded NUL, and speed up search for + embedded $. + * src/macro.c (arg_len): Add parameter. + * src/input.c (set_word_regexp): Add parameter. + (input_init): Adjust caller. + * src/m4.c (main): Likewise. + * src/freeze.c (dump_symbol_CB): Preserve NUL on freeze. + (reload_frozen_state): Retrieve NUL on load. + * doc/m4.texinfo (Builtin, Using frozen files): Enhance tests. + * examples/null.m4: Likewise. + * examples/null.out: Update expected output. + * examples/null.err: Likewise. + Fix regression in commenting unbalanced quotes, from 2008-02-16. * src/m4.h (enum token_type): Add TOKEN_COMMENT. * src/input.c (next_token, peek_token, token_type_string) diff --git a/doc/m4.texinfo b/doc/m4.texinfo index d8e2625..7f3cb49 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -2684,6 +2684,13 @@ builtin(`builtin') builtin(`builtin',) @error{}m4:stdin:4: Warning: builtin: undefined builtin `' @result{} +builtin(`builtin', ``' +') [EMAIL PROTECTED]:stdin:5: Warning: builtin: undefined builtin ``\'\n' [EMAIL PROTECTED] +indir(`index') [EMAIL PROTECTED]:stdin:7: Warning: index: too few arguments: 0 < 2 [EMAIL PROTECTED] @end example @ignore @@ -7153,13 +7160,13 @@ ifdef(`__unix__', , `errprint(` skipping: syscmd does not have unix semantics ')m4exit(`77')')dnl changequote(`[', `]')dnl -syscmd([printf 'define(-\0-,hi)changequote([,\0])changecom(--\0)dnl +syscmd([printf 'define(-\0-,\0-\0)changequote([,\0])changecom(--\0)dnl divert(1)undivert(null.out)' | ]__program__[ -F in.m4f \ - && printf 'errprint([divnum\0] #-- indir(-\0-))' \ + && printf 'errprint([divnum\0] #-- len(indir(-\0-)))' \ | ]__program__[ -R in.m4f \ && rm in.m4f])errprint([ ]sysval[ ])dnl [EMAIL PROTECTED] #-- hi 0 [EMAIL PROTECTED] #-- 3 0 @end example @end ignore diff --git a/examples/null.err b/examples/null.err index 5f989ee..897ce34 100644 Binary files a/examples/null.err and b/examples/null.err differ diff --git a/examples/null.m4 b/examples/null.m4 index de76742..1823073 100644 Binary files a/examples/null.m4 and b/examples/null.m4 differ diff --git a/examples/null.out b/examples/null.out index 5e90221..dd83416 100644 Binary files a/examples/null.out and b/examples/null.out differ diff --git a/src/builtin.c b/src/builtin.c index f8a3f3c..cc21ea2 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -429,26 +429,32 @@ free_regex (void) } } -/*-----------------------------------------------------------------. -| Define a predefined or user-defined macro, with name NAME of | -| length NAME_LEN, and expansion TEXT. MODE is SYMBOL_INSERT for | -| "define" or SYMBOL_PUSHDEF for "pushdef". This function is also | -| used from main (). | -`-----------------------------------------------------------------*/ +/*------------------------------------------------------------------. +| Define a predefined or user-defined macro, with name NAME of | +| length NAME_LEN, and expansion TEXT of length LEN. LEN may be | +| SIZE_MAX, to use the string length of TEXT instead. MODE is | +| SYMBOL_INSERT for "define" or SYMBOL_PUSHDEF for "pushdef". This | +| function is also used from main (). | +`------------------------------------------------------------------*/ void define_user_macro (const char *name, size_t name_len, const char *text, - symbol_lookup mode) + size_t len, symbol_lookup mode) { symbol *s; - char *defn = xstrdup (text ? text : ""); + char *defn; + assert (text); + if (len == SIZE_MAX) + len = strlen (text); + defn = xmemdup (text, len); s = lookup_symbol (name, name_len, mode); if (SYMBOL_TYPE (s) == TOKEN_TEXT) free (SYMBOL_TEXT (s)); SYMBOL_TYPE (s) = TOKEN_TEXT; SYMBOL_TEXT (s) = defn; + SYMBOL_TEXT_LEN (s) = len; SYMBOL_MACRO_ARGS (s) = true; /* Implement --warn-macro-sequence. */ @@ -456,7 +462,6 @@ define_user_macro (const char *name, size_t name_len, const char *text, { regoff_t offset = 0; struct re_registers *regs = ¯o_sequence_regs; - size_t len = strlen (defn); while (offset < len && (offset = re_search (¯o_sequence_buf, defn, len, offset, @@ -515,13 +520,13 @@ builtin_init (void) { if (pp->unix_name != NULL) define_user_macro (pp->unix_name, strlen (pp->unix_name), - pp->func, SYMBOL_INSERT); + pp->func, SIZE_MAX, SYMBOL_INSERT); } else { if (pp->gnu_name != NULL) define_user_macro (pp->gnu_name, strlen (pp->gnu_name), - pp->func, SYMBOL_INSERT); + pp->func, SIZE_MAX, SYMBOL_INSERT); } } @@ -675,7 +680,7 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) if (argc == 2) { - define_user_macro (ARG (1), ARG_LEN (1), "", mode); + define_user_macro (ARG (1), ARG_LEN (1), "", 0, mode); return; } @@ -685,7 +690,8 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) m4_warn (0, me, _("cannot concatenate builtins")); /* fallthru */ case TOKEN_TEXT: - define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true), mode); + define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true), + arg_len (argv, 2, true), mode); break; case TOKEN_FUNC: @@ -914,7 +920,8 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv) case TOKEN_TEXT: if (debug_level & DEBUG_TRACE_QUOTE) fwrite (curr_quote.str1, 1, curr_quote.len1, debug); - fputs (SYMBOL_TEXT (data.base[0]), debug); + fwrite (SYMBOL_TEXT (data.base[0]), 1, + SYMBOL_TEXT_LEN (data.base[0]), debug); if (debug_level & DEBUG_TRACE_QUOTE) fwrite (curr_quote.str2, 1, curr_quote.len2, debug); break; @@ -1049,7 +1056,7 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv) { case TOKEN_TEXT: obstack_grow (obs, curr_quote.str1, curr_quote.len1); - obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s))); + obstack_grow (obs, SYMBOL_TEXT (s), SYMBOL_TEXT_LEN (s)); obstack_grow (obs, curr_quote.str2, curr_quote.len2); break; @@ -1422,7 +1429,7 @@ m4_changeword (struct obstack *obs, int argc, macro_arguments *argv) if (bad_argc (me, argc, 1, 1)) return; - set_word_regexp (me, ARG (1)); + set_word_regexp (me, ARG (1), ARG_LEN (1)); } #endif /* ENABLE_CHANGEWORD */ @@ -2305,29 +2312,31 @@ void expand_user_macro (struct obstack *obs, symbol *sym, int argc, macro_arguments *argv) { - const char *text; + const char *text = SYMBOL_TEXT (sym); + size_t len = SYMBOL_TEXT_LEN (sym); int i; + const char *dollar = memchr (text, '$', len); - for (text = SYMBOL_TEXT (sym); *text != '\0';) + while (dollar) { - if (*text != '$') - { - obstack_1grow (obs, *text); - text++; - continue; - } - text++; - switch (*text) + obstack_grow (obs, text, dollar - text); + len -= dollar - text; + text = dollar; + if (len == 1) + break; + len--; + switch (*++text) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (no_gnu_extensions) { i = *text++ - '0'; + len--; } else { - for (i = 0; isdigit (to_uchar (*text)); text++) + for (i = 0; len && isdigit (to_uchar (*text)); text++, len--) i = i * 10 + (*text - '0'); } push_arg (obs, argv, i); @@ -2336,17 +2345,21 @@ expand_user_macro (struct obstack *obs, symbol *sym, case '#': /* number of arguments */ shipout_int (obs, argc - 1); text++; + len--; break; case '*': /* all arguments */ case '@': /* ... same, but quoted */ push_args (obs, argv, false, *text == '@'); text++; + len--; break; default: obstack_1grow (obs, '$'); break; } + dollar = memchr (text, '$', len); } + obstack_grow (obs, text, len); } diff --git a/src/freeze.c b/src/freeze.c index 2a7d9dc..c45722f 100644 --- a/src/freeze.c +++ b/src/freeze.c @@ -75,9 +75,9 @@ dump_symbol_CB (symbol *sym, void *f) case TOKEN_TEXT: xfprintf (file, "T%d,%d\n", (int) SYMBOL_NAME_LEN (sym), - (int) strlen (SYMBOL_TEXT (sym))); + (int) SYMBOL_TEXT_LEN (sym)); fwrite (SYMBOL_NAME (sym), 1, SYMBOL_NAME_LEN (sym), file); - fputs (SYMBOL_TEXT (sym), file); + fwrite (SYMBOL_TEXT (sym), 1, SYMBOL_TEXT_LEN (sym), file); fputc ('\n', file); break; @@ -379,7 +379,7 @@ reload_frozen_state (const char *name) /* Enter a macro having an expansion text as a definition. */ - define_user_macro (string[0], number[0], string[1], + define_user_macro (string[0], number[0], string[1], number[1], SYMBOL_PUSHDEF); break; diff --git a/src/input.c b/src/input.c index 4f969b7..b967087 100644 --- a/src/input.c +++ b/src/input.c @@ -1309,7 +1309,7 @@ input_init (void) curr_comm.len2 = 1; #ifdef ENABLE_CHANGEWORD - set_word_regexp (NULL, user_word_regexp); + set_word_regexp (NULL, user_word_regexp, SIZE_MAX); #endif /* ENABLE_CHANGEWORD */ set_quote_age (); @@ -1406,19 +1406,24 @@ set_comment (const char *bc, size_t bc_len, const char *ec, size_t ec_len) #ifdef ENABLE_CHANGEWORD -/*-------------------------------------------------------------------. -| Set the regular expression for recognizing words to REGEXP, and | -| report errors on behalf of CALLER. If REGEXP is NULL, revert back | -| to the default parsing rules. | -`-------------------------------------------------------------------*/ +/*-----------------------------------------------------------------. +| Set the regular expression for recognizing words to REGEXP of | +| length LEN, and report errors on behalf of CALLER. If REGEXP is | +| NULL, revert back to the default parsing rules. If LEN is | +| SIZE_MAX, use strlen(REGEXP) instead. | +`-----------------------------------------------------------------*/ void -set_word_regexp (const call_info *caller, const char *regexp) +set_word_regexp (const call_info *caller, const char *regexp, size_t len) { const char *msg; struct re_pattern_buffer new_word_regexp; - if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP)) + if (len == SIZE_MAX) + len = strlen (regexp); + if (len == 0 + || (len == strlen (DEFAULT_WORD_REGEXP) + && !memcmp (regexp, DEFAULT_WORD_REGEXP, len))) { default_word_regexp = true; set_quote_age (); @@ -1427,12 +1432,13 @@ set_word_regexp (const call_info *caller, const char *regexp) /* Dry run to see whether the new expression is compilable. */ init_pattern_buffer (&new_word_regexp, NULL); - msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp); + msg = re_compile_pattern (regexp, len, &new_word_regexp); regfree (&new_word_regexp); if (msg != NULL) { - m4_warn (0, caller, _("bad regular expression `%s': %s"), regexp, msg); + m4_warn (0, caller, _("bad regular expression %s: %s"), + quotearg_style_mem (locale_quoting_style, regexp, len), msg); return; } @@ -1442,7 +1448,7 @@ set_word_regexp (const call_info *caller, const char *regexp) by the final regfree. */ if (!word_regexp.fastmap) word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1); - msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp); + msg = re_compile_pattern (regexp, len, &word_regexp); assert (!msg); re_set_registers (&word_regexp, ®s, regs.num_regs, regs.start, regs.end); if (re_compile_fastmap (&word_regexp)) diff --git a/src/m4.c b/src/m4.c index 551d80c..1bb1ec7 100644 --- a/src/m4.c +++ b/src/m4.c @@ -623,7 +623,7 @@ main (int argc, char *const *argv, char *const *envp) const char *value = strchr (defines->arg, '='); size_t len = value ? value - defines->arg : strlen (defines->arg); define_user_macro (defines->arg, len, value ? value + 1 : "", - SYMBOL_INSERT); + value ? SIZE_MAX : 0, SYMBOL_INSERT); } break; diff --git a/src/m4.h b/src/m4.h index 40aa5ec..8da7d3c 100644 --- a/src/m4.h +++ b/src/m4.h @@ -381,7 +381,7 @@ extern string_pair curr_quote; void set_quotes (const char *, size_t, const char *, size_t); void set_comment (const char *, size_t, const char *, size_t); #ifdef ENABLE_CHANGEWORD -void set_word_regexp (const call_info *, const char *); +void set_word_regexp (const call_info *, const char *, size_t); #endif unsigned int quote_age (void); bool safe_quotes (void); @@ -438,6 +438,7 @@ struct symbol #define SYMBOL_NAME_LEN(S) ((S)->len) #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data)) #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data)) +#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data)) #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data)) typedef enum symbol_lookup symbol_lookup; @@ -467,7 +468,7 @@ token_data_type arg_type (macro_arguments *, unsigned int); const char *arg_text (macro_arguments *, unsigned int, bool); bool arg_equal (macro_arguments *, unsigned int, unsigned int); bool arg_empty (macro_arguments *, unsigned int); -size_t arg_len (macro_arguments *, unsigned int); +size_t arg_len (macro_arguments *, unsigned int, bool); builtin_func *arg_func (macro_arguments *, unsigned int); struct obstack *arg_scratch (void); bool arg_print (struct obstack *, macro_arguments *, unsigned int, @@ -487,7 +488,7 @@ void wrap_args (macro_arguments *); /* Grab the text length at argv index I. Assumes macro_argument *argv is in scope, and aborts if the argument is not text. */ -#define ARG_LEN(i) arg_len (argv, i) +#define ARG_LEN(i) arg_len (argv, i, false) /* File: builtin.c --- builtins. */ @@ -523,7 +524,8 @@ bool bad_argc (const call_info *, int, unsigned int, unsigned int); void define_builtin (const char *, size_t, const builtin *, symbol_lookup); void set_macro_sequence (const char *); void free_regex (void); -void define_user_macro (const char *, size_t, const char *, symbol_lookup); +void define_user_macro (const char *, size_t, const char *, size_t, + symbol_lookup); void undivert_all (void); void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *); void m4_placeholder (struct obstack *, int, macro_arguments *); diff --git a/src/macro.c b/src/macro.c index 9d8ffbb..d1f70e9 100644 --- a/src/macro.c +++ b/src/macro.c @@ -1128,9 +1128,10 @@ arg_empty (macro_arguments *argv, unsigned int arg) } /* Given ARGV, return the length of argument ARG. Abort if the - argument is not text. Indices beyond argc return 0. */ + argument is not text. Indices beyond argc return 0. If FLATTEN, + builtins are ignored. */ size_t -arg_len (macro_arguments *argv, unsigned int arg) +arg_len (macro_arguments *argv, unsigned int arg, bool flatten) { token_data *token; token_chain *chain; @@ -1143,7 +1144,7 @@ arg_len (macro_arguments *argv, unsigned int arg) } if (arg >= argv->argc) return 0; - token = arg_token (argv, arg, NULL, false); + token = arg_token (argv, arg, NULL, flatten); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -1163,9 +1164,8 @@ arg_len (macro_arguments *argv, unsigned int arg) len += chain->u.u_s.len; break; case CHAIN_FUNC: - /* TODO concatenate builtins. */ - assert (!"implemented"); - abort (); + assert (flatten); + break; case CHAIN_ARGV: i = chain->u.u_a.index; limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last; @@ -1176,15 +1176,8 @@ arg_len (macro_arguments *argv, unsigned int arg) len += (quotes->len1 + quotes->len2) * limit; len += limit - 1; while (limit--) - { - /* TODO handle builtin concatenation. */ - if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL, - false)) == TOKEN_FUNC) - assert (argv->flatten); - else - len += arg_len (chain->u.u_a.argv, i); - i++; - } + len += arg_len (chain->u.u_a.argv, i++, + flatten || chain->u.u_a.flatten); break; default: assert (!"arg_len"); @@ -1192,7 +1185,7 @@ arg_len (macro_arguments *argv, unsigned int arg) } chain = chain->next; } - assert (len); + assert (len || flatten); return len; case TOKEN_FUNC: default: hooks/post-receive -- GNU M4 source repository
