Hi, Currently, malloc scans the bits of the chunk bitmap from position zero, skipping a random number (n) of free slots and then picking the next free one. This slows things down, especially if the number of full slots diminishes.
This diff changes the scannning to start at a random position in the bitmap and then taking the first available free slot, wrapping if the end of the bitmap is reached. Of course we'll still scan more if the bitmap becomes more full, but the extra iterations skipping free slots and then some full slots are avoided. The random number is derived from a global, which is incremented by a few bits every time a chunk is needed (with a small optimization if only one free slot is left). The shows good speedups, especially if lots of small chunks are allocated and on e.g. loongson which uses a larger page size than others, while randomness of chunk allocation is preserved. Please test & review, -Otto Index: malloc.c =================================================================== RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v retrieving revision 1.129 diff -u -p -r1.129 malloc.c --- malloc.c 30 Apr 2011 15:46:46 -0000 1.129 +++ malloc.c 2 May 2011 07:27:54 -0000 @@ -113,6 +113,7 @@ struct dir_info { struct region_info free_regions[MALLOC_MAXCACHE]; /* delayed free chunk slots */ void *delayed_chunks[MALLOC_DELAYED_CHUNKS + 1]; + u_short chunk_start; #ifdef MALLOC_STATS size_t inserts; size_t insert_collisions; @@ -1013,40 +1014,31 @@ malloc_bytes(struct dir_info *d, size_t if (bp->canary != d->canary1) wrterror("chunk info corrupted", NULL); - /* Find first word of bitmap which isn't empty */ - for (lp = bp->bits; !*lp; lp++) - /* EMPTY */; - - /* Find that bit, and tweak it */ - u = 1; - k = 0; - while (!(*lp & u)) { - u += u; - k++; - } - - /* advance a random # of positions */ - if (bp->free > 1) { - i = getrnibble() % bp->free; - while (i > 0) { - u += u; - k++; - if (k >= MALLOC_BITS) { - while (!*++lp) - /* EMPTY */; - u = 1; - k = 0; - if (lp - bp->bits > (bp->total - 1) / - MALLOC_BITS) { - wrterror("chunk overflow", NULL); - errno = EFAULT; - return (NULL); - } - } - if (*lp & u) - i--; + + i = d->chunk_start; + if (bp->free > 1) + i += getrnibble(); + if (i >= bp->total) + i &= bp->total - 1; + for (;;) { + for (;;) { + lp = &bp->bits[i / MALLOC_BITS]; + if (!*lp) { + i += MALLOC_BITS; + i &= ~(MALLOC_BITS - 1); + if (i >= bp->total) + i = 0; + } else + break; } + k = i % MALLOC_BITS; + u = 1 << k; + if (*lp & u) + break; + if (++i >= bp->total) + i = 0; } + d->chunk_start += i + 1; *lp ^= u;