This patch disposes of the linear searches in find_array_item in
alist.c.

If you have a large number of similarly named variables or aliases
assigned, this will greatly improve read/update performance.

I'm hoping to commit this if there are no serious problems with it.
Index: source/alist.c
===================================================================
RCS file: /home/cvs/repository/epic4/source/alist.c,v
retrieving revision 1.2
diff -c -r1.2 alist.c
*** source/alist.c      2001/09/24 16:49:08     1.2
--- source/alist.c      2001/10/05 22:17:54
***************
*** 216,221 ****
--- 216,222 ----
        size_t          len = strlen(name);
        int             c = 0, 
                        pos = 0, 
+                       tospot, /* :-) */
                        min, 
                        max;
        u_32int_t       mask;
***************
*** 241,247 ****
         * four letters as 'name'.  Since we're doing all integer
         * comparisons, its cheaper than the full blown string compares.
         */
!       max = set->max - 1;
        min = 0;
  
        while (max >= min)
--- 242,253 ----
         * four letters as 'name'.  Since we're doing all integer
         * comparisons, its cheaper than the full blown string compares.
         */
!       /*
!        * OK, well all of that has been rolled into these two loops,
!        * designed to find the start and end of the range directly.
!        */
! 
!       tospot = max = set->max - 1;
        min = 0;
  
        while (max >= min)
***************
*** 249,308 ****
                bin_ints++;
                pos = (max - min) / 2 + min;
                c = (hash & mask) - (ARRAY_ITEM(set, pos)->hash & mask);
!               if (c == 0)
!                       break;
                else if (c < 0)
!                       max = pos - 1;
                else
                        min = pos + 1;
        }
  
        /*
!        * If we can't find a symbol that qualifies, then we can just drop
!        * out here.  This is good because a "pass" (lookup for a symbol that
!        * does not exist) requires only cheap integer comparisons.
         */
-       if (c != 0)
-       {
-               if (c > 0)
-                       *loc = pos + 1;
-               else
-                       *loc = pos;
-               return NULL;
-       }
  
!       /*
!        * Now we've found some symbol that has the same first four letters.
!        * Expand the min/max range to include all of the symbols that have
!        * the same first four letters...
!        */
!       min = max = pos;
!       while ((min > 0) && (hash & mask) == (ARRAY_ITEM(set, min)->hash & mask))
!               min--, lin_ints++;
!       while ((max < set->max - 1) && (hash & mask) == (ARRAY_ITEM(set, max)->hash & 
mask))
!               max++, lin_ints++;
! 
!       char_searches++;
  
-       /*
-        * Then do a full blown binary search on the smaller range
-        */
        while (max >= min)
        {
!               bin_chars++;
!               pos = (max - min) / 2 + min;
!               c = set->func(name, ARRAY_ITEM(set, pos)->name, len);
!               if (c == 0)
!                       break;
                else if (c < 0)
                        max = pos - 1;
                else
                        min = pos + 1;
        }
  
        /*
!        * At this point, we actually know if the symbol really exists.
!        * If it doesnt, then we can just drop out here.
         */
        if (c != 0)
        {
--- 255,310 ----
                bin_ints++;
                pos = (max - min) / 2 + min;
                c = (hash & mask) - (ARRAY_ITEM(set, pos)->hash & mask);
!               if (c == 0) {
!                       bin_chars++;
!                       c = set->func(name, ARRAY_ITEM(set, pos)->name, len);
!               }
!               if (c == 0) {
!                       if (max == pos)
!                               break;
!                       max = pos;
!               }
                else if (c < 0)
!                       tospot = max = pos - 1;
                else
                        min = pos + 1;
        }
  
        /*
!        * At this point, min is set to the first matching name in
!        * the range and tospot is set to a higher position than
!        * the last.  These are used to refine the next search.
         */
  
!       max = tospot;
!       tospot = pos;
  
        while (max >= min)
        {
!               bin_ints++;
!               pos = (min - max) / 2 + max;  /* Don't ask */
!               c = (hash & mask) - (ARRAY_ITEM(set, pos)->hash & mask);
!               if (c == 0) {
!                       bin_chars++;
!                       c = set->func(name, ARRAY_ITEM(set, pos)->name, len);
!               }
!               if (c == 0) {
!                       if (min == pos)
!                               break;
!                       min = pos;
!               }
                else if (c < 0)
                        max = pos - 1;
                else
                        min = pos + 1;
        }
  
+       min = tospot;
+ 
        /*
!        * If we can't find a symbol that qualifies, then we can just drop
!        * out here.  This is good because a "pass" (lookup for a symbol that
!        * does not exist) requires only cheap integer comparisons.
         */
        if (c != 0)
        {
***************
*** 313,324 ****
                return NULL;
        }
  
        /*
         * If we've gotten this far, then we've found at least
         * one appropriate entry.  So we set *cnt to one
         */
!       *cnt = 1;
  
        /*
         * So we know that 'pos' is a match.  So we start 'min'
         * at one less than 'pos' and walk downard until we find
--- 315,329 ----
                return NULL;
        }
  
+       char_searches++;
+ 
        /*
         * If we've gotten this far, then we've found at least
         * one appropriate entry.  So we set *cnt to one
         */
!       *cnt = 1 + pos - min;
  
+ #if 0
        /*
         * So we know that 'pos' is a match.  So we start 'min'
         * at one less than 'pos' and walk downard until we find
***************
*** 336,341 ****
--- 341,347 ----
        max = pos + 1;
        while (max < set->max && !set->func(name, ARRAY_ITEM(set, max)->name, len))
                (*cnt)++, max++, lin_chars++;
+ #endif
  
        /*
         * Alphabetically, the string that would be identical to

Reply via email to