On Mon, May 02, 2011 at 10:09:29AM +0200, Otto Moerbeek wrote:

> Hi,
> 
> Currently, malloc scans the bits of the chunk bitmap from position
> zero, skipping a random number (n) of free slots and then picking the
> next free one. This slows things down, especially if the number of
> full slots diminishes. 
> 
> 
> This diff changes the scannning to start at a random position in the
> bitmap and then taking the first available free slot, wrapping if the
> end of the bitmap is reached. Of course we'll still scan more if the
> bitmap becomes more full, but the extra iterations skipping free slots
> and then some full slots are avoided. 
> 
> The random number is derived from a global, which is incremented by a
> few bits every time a chunk is needed (with a small optimization if
> only one free slot is left).
> 
> The shows good speedups, especially if lots of small chunks are
> allocated and on e.g. loongson which uses a larger page size than
> others, while randomness of chunk allocation is preserved. 
> 
> Please test & review,

Litle feedback so far. I'd like to move on with this, I have more
goodies queued up. So get this on your machines!

        -Otto
        


> 
> Index: malloc.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 malloc.c
> --- malloc.c  30 Apr 2011 15:46:46 -0000      1.129
> +++ malloc.c  2 May 2011 07:27:54 -0000
> @@ -113,6 +113,7 @@ struct dir_info {
>       struct region_info free_regions[MALLOC_MAXCACHE];
>                                       /* delayed free chunk slots */
>       void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1];
> +     u_short chunk_start;
>  #ifdef MALLOC_STATS
>       size_t inserts;
>       size_t insert_collisions;
> @@ -1013,40 +1014,31 @@ malloc_bytes(struct dir_info *d, size_t 
>  
>       if (bp->canary != d->canary1)
>               wrterror("chunk info corrupted", NULL);
> -     /* Find first word of bitmap which isn't empty */
> -     for (lp = bp->bits; !*lp; lp++)
> -             /* EMPTY */;
> -
> -     /* Find that bit, and tweak it */
> -     u = 1;
> -     k = 0;
> -     while (!(*lp & u)) {
> -             u += u;
> -             k++;
> -     }
> -
> -     /* advance a random # of positions */
> -     if (bp->free > 1) {
> -             i = getrnibble() % bp->free;
> -             while (i > 0) {
> -                     u += u;
> -                     k++;
> -                     if (k >= MALLOC_BITS) {
> -                             while (!*++lp)
> -                                     /* EMPTY */;
> -                             u = 1;
> -                             k = 0;
> -                             if (lp - bp->bits > (bp->total - 1) /
> -                                 MALLOC_BITS) {
> -                                     wrterror("chunk overflow", NULL);
> -                                     errno = EFAULT;
> -                                     return (NULL);
> -                             }
> -                     }
> -                     if (*lp & u)
> -                             i--;
> +
> +     i = d->chunk_start;
> +     if (bp->free > 1)
> +             i += getrnibble();
> +     if (i >= bp->total)
> +             i &= bp->total - 1;
> +     for (;;) {
> +             for (;;) {
> +                     lp = &bp->bits[i / MALLOC_BITS];
> +                     if (!*lp) {
> +                             i += MALLOC_BITS;
> +                             i &= ~(MALLOC_BITS - 1); 
> +                             if (i >= bp->total)
> +                                     i = 0;
> +                     } else
> +                             break;
>               }
> +             k = i % MALLOC_BITS;
> +             u = 1 << k;
> +             if (*lp & u)
> +                     break;
> +             if (++i >= bp->total)
> +                     i = 0;
>       }
> +     d->chunk_start += i + 1;
>  
>       *lp ^= u;

Reply via email to