This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=65b9ec43629b517cd4365fb96459b51973ef9ac7 The branch, branch-1_4 has been updated via 65b9ec43629b517cd4365fb96459b51973ef9ac7 (commit) from d28166a2233b32f0f37bdd486a590a814209b765 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 65b9ec43629b517cd4365fb96459b51973ef9ac7 Author: Eric Blake <[EMAIL PROTECTED]> Date: Thu Oct 25 10:47:43 2007 -0600 Stage 7: add chained token support to input parser. * src/m4.h (struct token_chain): Add const safety. (push_token): New prototype. * src/input.c (INPUT_CHAIN): New enumerator. (struct input_block): Add new input type u_c, and change u_s to length-based processing. (make_text_link): New helper function. (push_token): New function. (push_string_finish): Use it. (pop_input, input_print, peek_input, next_char, next_char_1): Adjust to handle new input type. * src/macro.c (push_arg, push_args): Use new function. (cherry picked from commit 687dd577f66622e0b69a8cd03b7e5e76fa546c52) Signed-off-by: Eric Blake <[EMAIL PROTECTED]> ----------------------------------------------------------------------- Summary of changes: ChangeLog | 15 ++++ src/input.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- src/m4.h | 11 ++- src/macro.c | 53 +++++++++++---- 4 files changed, 250 insertions(+), 41 deletions(-) diff --git a/ChangeLog b/ChangeLog index c6f5b46..8c9a901 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2007-12-11 Eric Blake <[EMAIL PROTECTED]> + + Stage 7: add chained token support to input parser. + * src/m4.h (struct token_chain): Add const safety. + (push_token): New prototype. + * src/input.c (INPUT_CHAIN): New enumerator. + (struct input_block): Add new input type u_c, and change u_s to + length-based processing. + (make_text_link): New helper function. + (push_token): New function. + (push_string_finish): Use it. + (pop_input, input_print, peek_input, next_char, next_char_1): + Adjust to handle new input type. + * src/macro.c (push_arg, push_args): Use new function. + 2007-12-10 Eric Blake <[EMAIL PROTECTED]> Stage 6: convert builtins to push arg at a time. diff --git a/src/input.c b/src/input.c index 4e5d299..5c87217 100644 --- a/src/input.c +++ b/src/input.c @@ -69,7 +69,8 @@ enum input_type { INPUT_STRING, /* String resulting from macro expansion. */ INPUT_FILE, /* File from command line or include. */ - INPUT_MACRO /* Builtin resulting from defn. */ + INPUT_MACRO, /* Builtin resulting from defn. */ + INPUT_CHAIN /* FIFO chain of separate strings and $@ refs. */ }; typedef enum input_type input_type; @@ -85,7 +86,8 @@ struct input_block { struct { - char *string; /* Remaining string value. */ + char *str; /* Remaining string value. */ + size_t len; /* Remaining length. */ } u_s; /* INPUT_STRING */ struct @@ -96,7 +98,13 @@ struct input_block bool_bitfield advance : 1; /* Track previous start_of_input_line. */ } u_f; /* INPUT_FILE */ - builtin_func *func; /* Pointer to macro's function. */ + builtin_func *func; /* INPUT_MACRO */ + struct + { + token_chain *chain; /* Current link in chain. */ + token_chain *end; /* Last link in chain. */ + } + u_c; /* INPUT_CHAIN */ } u; }; @@ -184,6 +192,36 @@ static const char *token_type_string (token_type); /*-------------------------------------------------------------------. +| Given an obstack OBS, capture any unfinished text as a link in the | +| chain that starts at *START and ends at *END. START may be NULL | +| if *END is non-NULL. | +`-------------------------------------------------------------------*/ +static void +make_text_link (struct obstack *obs, token_chain **start, token_chain **end) +{ + token_chain *chain; + size_t len = obstack_object_size (obs); + + assert (end && (start || *end)); + if (len) + { + char *str = (char *) obstack_finish (obs); + chain = (token_chain *) obstack_alloc (obs, sizeof *chain); + if (*end) + (*end)->next = chain; + else + *start = chain; + *end = chain; + chain->next = NULL; + chain->str = str; + chain->len = len; + chain->argv = NULL; + chain->index = 0; + chain->flatten = false; + } +} + +/*-------------------------------------------------------------------. | push_file () pushes an input file on the input stack, saving the | | current file name and line number. If next is non-NULL, this push | | invalidates a call to push_string_init (), whose storage is | @@ -272,6 +310,55 @@ push_string_init (void) } /*-------------------------------------------------------------------. +| If TOKEN contains text, then convert the current string into a | +| chain if it is not one already, and add the contents of TOKEN as a | +| new link in the chain. LEVEL describes the current expansion | +| level, or -1 if the contents of TOKEN reside entirely on the | +| current_input stack and TOKEN lives in temporary storage. Allows | +| gathering input from multiple locations, rather than copying | +| everything consecutively onto the input stack. Must be called | +| between push_string_init and push_string_finish. | +`-------------------------------------------------------------------*/ +void +push_token (token_data *token, int level) +{ + token_chain *chain; + + assert (next); + /* TODO - also accept TOKEN_COMP chains. */ + assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); + if (TOKEN_DATA_LEN (token) == 0) + return; + + if (next->type == INPUT_STRING) + { + next->type = INPUT_CHAIN; + next->u.u_c.chain = next->u.u_c.end = NULL; + } + make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); + chain = (token_chain *) obstack_alloc (current_input, sizeof *chain); + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + chain->next = NULL; + if (level >= 0) + /* TODO - use token as-is, rather than copying data. This implies + lengthening lifetime of $@ arguments until the rescan is + complete, rather than the current approach of freeing them + during expand_macro. */ + chain->str = (char *) obstack_copy (current_input, TOKEN_DATA_TEXT (token), + TOKEN_DATA_LEN (token)); + else + chain->str = TOKEN_DATA_TEXT (token); + chain->len = TOKEN_DATA_LEN (token); + chain->argv = NULL; + chain->index = 0; + chain->flatten = false; +} + +/*-------------------------------------------------------------------. | Last half of push_string (). If next is now NULL, a call to | | push_file () or push_macro () has invalidated the previous call to | | push_string_init (), so we just give up. If the new object is | @@ -294,10 +381,15 @@ push_string_finish (void) return NULL; } - if (len) + if (len || next->type == INPUT_CHAIN) { - obstack_1grow (current_input, '\0'); - next->u.u_s.string = (char *) obstack_finish (current_input); + if (next->type == INPUT_STRING) + { + next->u.u_s.str = (char *) obstack_finish (current_input); + next->u.u_s.len = len; + } + else + make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); next->prev = isp; isp = next; input_change = true; @@ -327,7 +419,8 @@ push_wrapup (const char *s) i->type = INPUT_STRING; i->file = current_file; i->line = current_line; - i->u.u_s.string = (char *) obstack_copy0 (wrapup_stack, s, strlen (s)); + i->u.u_s.len = strlen (s); + i->u.u_s.str = (char *) obstack_copy (wrapup_stack, s, i->u.u_s.len); wsp = i; } @@ -345,12 +438,13 @@ static bool pop_input (bool cleanup) { input_block *tmp = isp->prev; + token_chain *chain; switch (isp->type) { case INPUT_STRING: - assert (!cleanup || !*isp->u.u_s.string); - if (*isp->u.u_s.string) + assert (!cleanup || !isp->u.u_s.len); + if (isp->u.u_s.len) return false; break; @@ -359,6 +453,26 @@ pop_input (bool cleanup) return false; break; + case INPUT_CHAIN: + chain = isp->u.u_c.chain; + assert (!chain || !cleanup); + while (chain) + { + if (chain->str) + { + if (chain->len) + return false; + } + else + { + /* TODO - peek into argv. */ + assert (!"implemented yet"); + abort (); + } + chain = chain->next; + } + break; + case INPUT_FILE: if (!cleanup) return false; @@ -451,12 +565,13 @@ void input_print (struct obstack *obs, const input_block *input) { int maxlen = max_debug_argument_length; + token_chain *chain; assert (input); switch (input->type) { case INPUT_STRING: - obstack_print (obs, input->u.u_s.string, SIZE_MAX, &maxlen); + obstack_print (obs, input->u.u_s.str, input->u.u_s.len, &maxlen); break; case INPUT_FILE: obstack_grow (obs, "<file: ", strlen ("<file: ")); @@ -472,6 +587,17 @@ input_print (struct obstack *obs, const input_block *input) obstack_1grow (obs, '>'); } break; + case INPUT_CHAIN: + chain = input->u.u_c.chain; + while (chain) + { + /* TODO support argv refs as well. */ + assert (chain->str); + if (obstack_print (obs, chain->str, chain->len, &maxlen)) + return; + chain = chain->next; + } + break; default: assert (!"input_print"); abort (); @@ -493,6 +619,7 @@ peek_input (void) { int ch; input_block *block = isp; + token_chain *chain; while (1) { @@ -502,10 +629,9 @@ peek_input (void) switch (block->type) { case INPUT_STRING: - ch = to_uchar (block->u.u_s.string[0]); - if (ch != '\0') - return ch; - break; + if (!block->u.u_s.len) + break; + return to_uchar (block->u.u_s.str[0]); case INPUT_FILE: ch = getc (block->u.u_f.fp); @@ -520,6 +646,25 @@ peek_input (void) case INPUT_MACRO: return CHAR_MACRO; + case INPUT_CHAIN: + chain = block->u.u_c.chain; + while (chain) + { + if (chain->str) + { + if (chain->len) + return to_uchar (chain->str[0]); + } + else + { + /* TODO - peek into argv. */ + assert (!"implemented yet"); + abort (); + } + chain = chain->next; + } + break; + default: assert (!"peek_input"); abort (); @@ -539,15 +684,15 @@ peek_input (void) `-------------------------------------------------------------------------*/ #define next_char() \ - (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \ - && !input_change \ - ? to_uchar (*isp->u.u_s.string++) \ + (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ + ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \ : next_char_1 ()) static int next_char_1 (void) { int ch; + token_chain *chain; while (1) { @@ -568,13 +713,10 @@ next_char_1 (void) switch (isp->type) { case INPUT_STRING: - ch = to_uchar (*isp->u.u_s.string); - if (ch != '\0') - { - isp->u.u_s.string++; - return ch; - } - break; + if (!isp->u.u_s.len) + break; + isp->u.u_s.len--; + return to_uchar (*isp->u.u_s.str++); case INPUT_FILE: if (start_of_input_line) @@ -600,6 +742,28 @@ next_char_1 (void) pop_input (true); return CHAR_MACRO; + case INPUT_CHAIN: + chain = isp->u.u_c.chain; + while (chain) + { + if (chain->str) + { + if (chain->len) + { + chain->len--; + return to_uchar (*chain->str++); + } + } + else + { + /* TODO - read from argv. */ + assert (!"implemented yet"); + abort (); + } + isp->u.u_c.chain = chain = chain->next; + } + break; + default: assert (!"next_char_1"); abort (); diff --git a/src/m4.h b/src/m4.h index f7b0d37..111f167 100644 --- a/src/m4.h +++ b/src/m4.h @@ -284,7 +284,7 @@ enum token_data_type struct token_chain { token_chain *next; /* Pointer to next link of chain. */ - char *str; /* NUL-terminated string if text, else NULL. */ + const char *str; /* NUL-terminated string if text, else NULL. */ size_t len; /* Length of str, else 0. */ macro_arguments *argv;/* Reference to earlier [EMAIL PROTECTED] */ unsigned int index; /* Argument index within argv. */ @@ -303,7 +303,7 @@ struct token_data cache for now. But it will be essential if we ever DO support NUL. */ size_t len; - char *text; + char *text; /* The contents of the token. */ /* The value of quote_age when this token was scanned. If this token is later encountered in the context of scanning a quoted string, and quote_age has not changed, @@ -312,7 +312,11 @@ struct token_data might change the parse on rescan. Ignored for 0 len. */ unsigned int quote_age; #ifdef ENABLE_CHANGEWORD - char *original_text; + /* If changeword is in effect, and contains a () group, then + this contains the entire token, while text contains the + portion that matched the () group to form a macro name. + Otherwise, this field is unused. */ + const char *original_text; #endif } u_t; @@ -346,6 +350,7 @@ void skip_line (const char *); void push_file (FILE *, const char *, bool); void push_macro (builtin_func *); struct obstack *push_string_init (void); +void push_token (token_data *, int); const input_block *push_string_finish (void); void push_wrapup (const char *); bool pop_wrapup (void); diff --git a/src/macro.c b/src/macro.c index e5c7099..1e4b271 100644 --- a/src/macro.c +++ b/src/macro.c @@ -727,8 +727,7 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) token = arg_token (argv, index); /* TODO handle func tokens? */ assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); - /* TODO push a reference, rather than copying data. */ - obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token)); + push_token (token, expansion_level - 1); } /* Push series of comma-separated arguments from ARGV, which should @@ -739,23 +738,49 @@ void push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) { token_data *token; - unsigned int i; - bool comma = false; + token_data sep; + unsigned int i = skip ? 2 : 1; + bool use_sep = false; + static char comma[2] = ","; + + if (i >= argv->argc) + return; - /* TODO push reference, rather than copying data. */ - for (i = skip ? 2 : 1; i < argv->argc; i++) + TOKEN_DATA_TYPE (&sep) = TOKEN_TEXT; + TOKEN_DATA_QUOTE_AGE (&sep) = 0; + if (quote) + { + char *str; + obstack_grow (obs, lquote.string, lquote.length); + TOKEN_DATA_LEN (&sep) = obstack_object_size (obs); + obstack_1grow (obs, '\0'); + str = (char *) obstack_finish (obs); + TOKEN_DATA_TEXT (&sep) = str; + push_token (&sep, -1); + obstack_grow (obs, rquote.string, rquote.length); + obstack_1grow (obs, ','); + obstack_grow0 (obs, lquote.string, lquote.length); + str = (char *) obstack_finish (obs); + TOKEN_DATA_TEXT (&sep) = str; + TOKEN_DATA_LEN (&sep) = rquote.length + 1 + lquote.length; + } + else + { + TOKEN_DATA_TEXT (&sep) = comma; + TOKEN_DATA_LEN (&sep) = 1; + } + /* TODO push entire $@ reference, rather than pushing each arg. */ + for ( ; i < argv->argc; i++) { token = arg_token (argv, i); - if (comma) - obstack_1grow (obs, ','); + if (use_sep) + push_token (&sep, -1); else - comma = true; + use_sep = true; /* TODO handle func tokens? */ assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); - if (quote) - obstack_grow (obs, lquote.string, lquote.length); - obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token)); - if (quote) - obstack_grow (obs, rquote.string, rquote.length); + push_token (token, expansion_level - 1); } + if (quote) + obstack_grow (obs, rquote.string, rquote.length); } hooks/post-receive -- GNU M4 source repository
