This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "GNU M4 source repository".
http://git.sv.gnu.org/gitweb/?p=m4.git;a=commitdiff;h=ffaa885fc0efb39bf0ca0df0a592a1c39f92729c The branch, branch-1.6 has been updated via ffaa885fc0efb39bf0ca0df0a592a1c39f92729c (commit) from 38a274a6e3c1e13e2d9d1c803d64b1a5414ff603 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit ffaa885fc0efb39bf0ca0df0a592a1c39f92729c Author: Eric Blake <[EMAIL PROTECTED]> Date: Sat Jul 26 13:27:02 2008 -0600 Use hash module for self-growing symbol table. * m4/gnulib-cache.m4: Import hash module. * src/m4.h (struct symbol): Remove next member, change stack to be circular. (SYMBOL_NEXT): Delete. (symtab_free): New prototype. * src/symtab.c (show_profile) [DEBUG_SYM]: Track more hash statistics, and dump to /dev/tty rather than stderr. (symtab): Change type. (hash_table_size): Delete. (symtab_hasher, symtab_comparator, symtab_free_entry): New functions. (symtab_init, lookup_symbol, hack_all_symbols): Rewrite to wrap external hash table. (symtab_free): New function. (symtab_debug) [DEBUG_SYM]: Adjust client. * src/m4.c (main): Call symbol table cleanup. * src/freeze.c (dump_symbol_CB, reverse_symbol_list): Deal with fact that pushdef stack is now circular. Signed-off-by: Eric Blake <[EMAIL PROTECTED]> ----------------------------------------------------------------------- Summary of changes: ChangeLog | 22 ++++ m4/gnulib-cache.m4 | 3 +- src/freeze.c | 25 ++++-- src/m4.c | 5 + src/m4.h | 5 +- src/symtab.c | 271 +++++++++++++++++++++++++++++++-------------------- 6 files changed, 213 insertions(+), 118 deletions(-) diff --git a/ChangeLog b/ChangeLog index ed2acce..de44226 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2008-07-26 Eric Blake <[EMAIL PROTECTED]> + + Use hash module for self-growing symbol table. + * m4/gnulib-cache.m4: Import hash module. + * src/m4.h (struct symbol): Remove next member, change stack to be + circular. + (SYMBOL_NEXT): Delete. + (symtab_free): New prototype. + * src/symtab.c (show_profile) [DEBUG_SYM]: Track more hash + statistics, and dump to /dev/tty rather than stderr. + (symtab): Change type. + (hash_table_size): Delete. + (symtab_hasher, symtab_comparator, symtab_free_entry): New + functions. + (symtab_init, lookup_symbol, hack_all_symbols): Rewrite to wrap + external hash table. + (symtab_free): New function. + (symtab_debug) [DEBUG_SYM]: Adjust client. + * src/m4.c (main): Call symbol table cleanup. + * src/freeze.c (dump_symbol_CB, reverse_symbol_list): Deal with + fact that pushdef stack is now circular. + 2008-07-22 Eric Blake <[EMAIL PROTECTED]> Make symbol table opaque. diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index f716908..a512830 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,7 +15,7 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 intprops memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix +# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 hash intprops memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([local]) @@ -42,6 +42,7 @@ gl_MODULES([ gnumakefile gnupload gpl-3.0 + hash intprops memmem mkstemp diff --git a/src/freeze.c b/src/freeze.c index 3a48b03..2a7d9dc 100644 --- a/src/freeze.c +++ b/src/freeze.c @@ -30,17 +30,24 @@ static symbol * reverse_symbol_list (symbol *sym) { - symbol *result; + symbol *first = sym; symbol *next; + symbol *prev = sym; + symbol *result; - result = NULL; - while (sym) + assert (sym); + if (sym->stack == sym) + return sym; + next = sym->stack; + do { - next = sym->stack; - sym->stack = result; - result = sym; + result = prev; sym = next; + next = sym->stack; + sym->stack = prev; + prev = sym; } + while (prev != first); return result; } @@ -59,8 +66,9 @@ dump_symbol_CB (symbol *sym, void *f) /* Process all entries in each stack from the last to the first. This order ensures that, at reload time, pushdef's will be executed with the oldest definitions first. */ - sym = stack = reverse_symbol_list (sym); - while (sym) + stack = sym; + sym = reverse_symbol_list (sym); + do { switch (SYMBOL_TYPE (sym)) { @@ -99,6 +107,7 @@ dump_symbol_CB (symbol *sym, void *f) } sym = sym->stack; } + while (sym != stack->stack); /* Reverse the stack once more, putting it back as it was. */ reverse_symbol_list (stack); } diff --git a/src/m4.c b/src/m4.c index 7f0af7e..551d80c 100644 --- a/src/m4.c +++ b/src/m4.c @@ -683,8 +683,13 @@ main (int argc, char *const *argv, char *const *envp) undivert_all (); } output_exit (); +#ifndef NDEBUG + /* Only spend time freeing memory to help isolate leaks; if + assertions are disabled, save the time and exit now. */ free_regex (); quotearg_free (); + symtab_free (); +#endif /* NDEBUG */ #ifdef DEBUG_REGEX if (trace_file) fclose (trace_file); diff --git a/src/m4.h b/src/m4.h index 011439e..ff0377a 100644 --- a/src/m4.h +++ b/src/m4.h @@ -416,8 +416,7 @@ enum symbol_lookup /* Symbol table entry. */ struct symbol { - struct symbol *next; /* Next symbol with the same hash. */ - struct symbol *stack; /* Stack of shadowed symbols of the same name. */ + struct symbol *stack; /* Circular list for pushdef stack of symbol. */ bool_bitfield traced : 1; bool_bitfield macro_args : 1; bool_bitfield blind_no_args : 1; @@ -429,7 +428,6 @@ struct symbol token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */ }; -#define SYMBOL_NEXT(S) ((S)->next) #define SYMBOL_TRACED(S) ((S)->traced) #define SYMBOL_MACRO_ARGS(S) ((S)->macro_args) #define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args) @@ -449,6 +447,7 @@ typedef void hack_symbol (symbol *, void *); void free_symbol (symbol *); void symtab_init (size_t); +void symtab_free (void); symbol *lookup_symbol (const char *, size_t, symbol_lookup); void hack_all_symbols (hack_symbol *, void *); diff --git a/src/symtab.c b/src/symtab.c index 7420e50..a9160c8 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -33,6 +33,8 @@ #include "m4.h" +#include "hash.h" + #ifdef DEBUG_SYM /* When evaluating hash table performance, this profiling code shows how many collisions were encountered. */ @@ -47,19 +49,28 @@ struct profile static struct profile profiles[5]; static symbol_lookup current_mode; +static unsigned long long hash_entry; +static unsigned long long comparator_entry; +static size_t current_size; +static unsigned int resizes; /* On exit, show a profile of symbol table performance. */ static void show_profile (void) { int i; + FILE *f = fopen ("/dev/tty", "w"); for (i = 0; i < 5; i++) { - xfprintf(stderr, "m4: lookup mode %d called %d times, %d compares, " + xfprintf(f, "m4: lookup mode %d called %d times, %d compares, " "%d misses, %lld bytes\n", i, profiles[i].entry, profiles[i].comparisons, profiles[i].misses, profiles[i].bytes); } + xfprintf(f, "m4: %llu hash callbacks, %llu compare callbacks, " + "%zu buckets, %u resizes\n", + hash_entry, comparator_entry, current_size, resizes - 1); + fclose (f); } /* Like memcmp (S1, S2, L), but also track profiling statistics. */ @@ -87,33 +98,12 @@ profile_memcmp (const char *s1, const char *s2, size_t l) #endif /* DEBUG_SYM */ -/*----------------------------------------------------------------------. -| Initialise the symbol table, by allocating the necessary storage, and | -| zeroing all the entries. | -`----------------------------------------------------------------------*/ - /* Pointer to symbol table. */ -static symbol **symtab; - -/* Number of buckets in symbol table. */ -static size_t hash_table_size; - -void -symtab_init (size_t size) -{ - hash_table_size = size; - symtab = (symbol **) xnmalloc (hash_table_size, sizeof *symtab); - memset (symtab, 0, hash_table_size * sizeof *symtab); - -#ifdef DEBUG_SYM - atexit (show_profile); /* Ignore failure, since this is debug code. */ -#endif /* DEBUG_SYM */ -} +static Hash_table *symtab; /*--------------------------------------------------. | Return a hashvalue for a string S of length LEN. | `--------------------------------------------------*/ - static size_t hash (const char *s, size_t len) { @@ -126,6 +116,82 @@ hash (const char *s, size_t len) return val; } +/*----------------------------------------------------. +| Wrap our hash inside signature expected by hash.h. | +`----------------------------------------------------*/ +static size_t +symtab_hasher (const void *entry, size_t buckets) +{ +#ifdef DEBUG_SYM + hash_entry++; + if (buckets != current_size) + { + resizes++; + current_size = buckets; + } +#endif /* DEBUG_SYM */ + const symbol *sym = (const symbol *) entry; + return hash (SYMBOL_NAME (sym), SYMBOL_NAME_LEN (sym)) % buckets; +} + +/*----------------------------------------------. +| Compare two hash table entries for equality. | +`----------------------------------------------*/ +static bool +symtab_comparator (const void *entry_a, const void *entry_b) +{ +#ifdef DEBUG_SYM + comparator_entry++; +#endif /* DEBUG_SYM */ + const symbol *sym_a = (const symbol *) entry_a; + const symbol *sym_b = (const symbol *) entry_b; + return (SYMBOL_NAME_LEN (sym_a) == SYMBOL_NAME_LEN (sym_b) + && memcmp (SYMBOL_NAME (sym_a), SYMBOL_NAME (sym_b), + SYMBOL_NAME_LEN (sym_a)) == 0); +} + +/*---------------------------. +| Reclaim an entry on exit. | +`---------------------------*/ +static void +symtab_free_entry (void *entry) +{ + symbol *sym = (symbol *) entry; + while (sym->stack != sym) + { + symbol *old = sym->stack; + sym->stack = old->stack; + assert (!SYMBOL_PENDING_EXPANSIONS (old)); + free_symbol (old); + } + assert (!SYMBOL_PENDING_EXPANSIONS (sym)); + free_symbol (sym); +} + +/*--------------------------------------------------------------. +| Initialize the symbol table, with SIZE as a hint for expected | +| number of entries. | +`--------------------------------------------------------------*/ +void +symtab_init (size_t size) +{ + symtab = hash_initialize (size, NULL, symtab_hasher, symtab_comparator, + symtab_free_entry); + +#ifdef DEBUG_SYM + atexit (show_profile); /* Ignore failure, since this is debug code. */ +#endif /* DEBUG_SYM */ +} + +/*------------------------. +| Clean up entire table. | +`------------------------*/ +void +symtab_free (void) +{ + hash_free (symtab); +} + /*--------------------------------------------. | Free all storage associated with a symbol. | `--------------------------------------------*/ @@ -162,38 +228,23 @@ free_symbol (symbol *sym) symbol * lookup_symbol (const char *name, size_t len, symbol_lookup mode) { - size_t h; - int cmp = 1; - symbol *sym, *prev; - symbol **spp; + symbol *sym; + symbol *entry; + symbol tmp; #if DEBUG_SYM current_mode = mode; profiles[mode].entry++; #endif /* DEBUG_SYM */ - h = hash (name, len); - sym = symtab[h % hash_table_size]; - - for (prev = NULL; sym != NULL; prev = sym, sym = sym->next) - { - cmp = (len < SYMBOL_NAME_LEN (sym) ? -1 : len > SYMBOL_NAME_LEN (sym) ? 1 - : memcmp (SYMBOL_NAME (sym), name, len)); - if (cmp >= 0) - break; - } - - /* If just searching, return status of search. */ - - if (mode == SYMBOL_LOOKUP) - return cmp == 0 ? sym : NULL; - - /* Symbol not found. */ - - spp = (prev != NULL) ? &prev->next : &symtab[h % hash_table_size]; + tmp.name = (char *) name; + tmp.len = len; + entry = (symbol *) hash_lookup (symtab, &tmp); switch (mode) { + case SYMBOL_LOOKUP: + return entry ? entry->stack : NULL; case SYMBOL_INSERT: @@ -202,14 +253,15 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) a new one; if not, just return the symbol. If not found, just insert the name, and return the new symbol. */ - if (cmp == 0 && sym != NULL) + if (entry) { + sym = entry->stack; if (SYMBOL_PENDING_EXPANSIONS (sym) > 0) { symbol *old = sym; SYMBOL_DELETED (old) = true; - sym = (symbol *) xmalloc (sizeof (symbol)); + sym = (symbol *) xmalloc (sizeof *sym); SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = SYMBOL_TRACED (old); SYMBOL_NAME (sym) = xmemdup0 (name, len); @@ -219,11 +271,20 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) SYMBOL_DELETED (sym) = false; SYMBOL_PENDING_EXPANSIONS (sym) = 0; - SYMBOL_NEXT (sym) = SYMBOL_NEXT (old); - SYMBOL_NEXT (old) = NULL; - sym->stack = old->stack; + if (old == entry) + { + old = (symbol *) hash_delete (symtab, entry); + assert (entry == old); + sym->stack = sym; + entry = (symbol *) hash_insert (symtab, sym); + assert (sym == entry); + } + else + { + entry->stack = sym; + sym->stack = old->stack; + } old->stack = NULL; - *spp = sym; } return sym; } @@ -231,11 +292,13 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) case SYMBOL_PUSHDEF: - /* Insert a name in the symbol table. If there is already a symbol - with the name, insert this in front of it, and mark the old - symbol as "shadowed". */ - - sym = (symbol *) xmalloc (sizeof (symbol)); + /* Insert a name in the symbol table. If there is already a + symbol with the name, add it to the pushdef stack. Since the + hash table does not allow the insertion of duplicates, the + pushdef stack is a circular chain; the hash entry is the + oldest entry, which points to the newest entry; all other + entries point to the next older entry. */ + sym = (symbol *) xmalloc (sizeof *sym); SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = false; SYMBOL_NAME (sym) = xmemdup0 (name, len); @@ -245,17 +308,19 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) SYMBOL_DELETED (sym) = false; SYMBOL_PENDING_EXPANSIONS (sym) = 0; - SYMBOL_NEXT (sym) = *spp; - sym->stack = NULL; - *spp = sym; - - if (mode == SYMBOL_PUSHDEF && cmp == 0) + if (entry) { - sym->stack = sym->next; - sym->next = sym->stack->next; - sym->stack->next = NULL; + assert (mode == SYMBOL_PUSHDEF); + sym->stack = entry->stack; + entry->stack = sym; SYMBOL_TRACED (sym) = SYMBOL_TRACED (sym->stack); } + else + { + sym->stack = sym; + entry = (symbol *) hash_insert (symtab, sym); + assert (sym == entry); + } return sym; case SYMBOL_DELETE: @@ -268,37 +333,36 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) definition is still in use, let the caller free the memory after it is done with the symbol. */ - if (cmp != 0 || sym == NULL) + if (!entry) return NULL; { bool traced = false; - symbol *result = sym; - if (sym->stack && mode == SYMBOL_POPDEF) + symbol *result = sym = entry->stack; + if (sym != entry && mode == SYMBOL_POPDEF) { SYMBOL_TRACED (sym->stack) = SYMBOL_TRACED (sym); - sym->stack->next = sym->next; - *spp = sym->stack; - sym->next = NULL; + entry->stack = sym->stack; sym->stack = NULL; free_symbol (sym); } else { traced = SYMBOL_TRACED (sym); - *spp = sym->next; - do + while (sym != entry) { symbol *old = sym; sym = sym->stack; - old->next = NULL; old->stack = NULL; free_symbol (old); } - while (sym); + sym = (symbol *) hash_delete (symtab, entry); + assert (sym == entry); + sym->stack = NULL; + free_symbol (sym); } if (traced) { - sym = (symbol *) xmalloc (sizeof (symbol)); + sym = (symbol *) xmalloc (sizeof *sym); SYMBOL_TYPE (sym) = TOKEN_VOID; SYMBOL_TRACED (sym) = true; SYMBOL_NAME (sym) = xmemdup0 (name, len); @@ -308,9 +372,9 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) SYMBOL_DELETED (sym) = false; SYMBOL_PENDING_EXPANSIONS (sym) = 0; - SYMBOL_NEXT (sym) = *spp; - sym->stack = NULL; - *spp = sym; + sym->stack = sym; + entry = (symbol *) hash_insert (symtab, sym); + assert (sym == entry); } return result; } @@ -335,20 +399,17 @@ lookup_symbol (const char *name, size_t len, symbol_lookup mode) void hack_all_symbols (hack_symbol *func, void *data) { - size_t h; - symbol *sym; + symbol *sym = (symbol *) hash_get_first (symtab); symbol *next; - for (h = 0; h < hash_table_size; h++) + while (sym) { /* We allow func to call SYMBOL_POPDEF, which can invalidate sym, so we must grab the next element to traverse before calling func. */ - for (sym = symtab[h]; sym != NULL; sym = next) - { - next = SYMBOL_NEXT (sym); - func (sym, data); - } + next = (symbol *) hash_get_next (symtab, sym); + func (sym->stack, data); + sym = next; } } @@ -396,28 +457,26 @@ symtab_debug (void) static void symtab_print_list (int i) { - symbol *sym; + symbol *sym = (symbol *) hash_get_first (symtab); symbol *stack; - size_t h; xprintf ("Symbol dump #%d:\n", i); - for (h = 0; h < hash_table_size; h++) - for (sym = symtab[h]; sym != NULL; sym = sym->next) - { - stack = sym; - do - { - xprintf ("\tname %s, len %zu, bucket %lu, addr %p, next %p, " - "stack %p, flags%s%s, pending %d\n", - SYMBOL_NAME (stack), SYMBOL_NAME_LEN (stack), - (unsigned long int) h, stack, SYMBOL_NEXT (stack), - stack->stack, SYMBOL_TRACED (stack) ? " traced" : "", - SYMBOL_DELETED (stack) ? " deleted" : "", - SYMBOL_PENDING_EXPANSIONS (stack)); - stack = stack->stack; - } - while (stack); - } + while (sym) + { + stack = sym->stack; + do + { + xprintf ("\tname %s, len %zu, addr %p, " + "stack %p, flags%s%s, pending %d\n", + SYMBOL_NAME (stack), SYMBOL_NAME_LEN (stack), + stack, stack->stack, SYMBOL_TRACED (stack) ? " traced" : "", + SYMBOL_DELETED (stack) ? " deleted" : "", + SYMBOL_PENDING_EXPANSIONS (stack)); + stack = stack->stack; + } + while (stack != sym); + sym = (symbol *) hash_get_next (symtab, sym); + } } #endif /* DEBUG_SYM */ hooks/post-receive -- GNU M4 source repository
