Hi, This diff is based upon kshe's diff, but there's one differene: I am using the __builtin_ffs instead of ffs(3). Looking at the assembly generated by calling ffs(3) produces a function call, while the __builtin_ffs produces just a few machine instructions on all the platforms I've checked. __builtin_ffs also makes it more easy to port over this diff to the ld.so version of malloc.
Using a standalone test program both gcc and clang generate __builtin_ffs. I do not know why __builtin_ffs is not generated by either gcc or clang when compiling libc. I suppose the flags given to the compiler force it to generate a function call, but I did not find the actual flag that's to blame. Please test on as much platforms as possible, -Otto Index: malloc.c =================================================================== RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v retrieving revision 1.239 diff -u -p -r1.239 malloc.c --- malloc.c 8 Jan 2018 12:20:23 -0000 1.239 +++ malloc.c 9 Jan 2018 11:44:40 -0000 @@ -931,7 +931,7 @@ malloc_bytes(struct dir_info *d, size_t u_int i, r; int j, listnum; size_t k; - u_short u, b, *lp; + u_short *lp; struct chunk_info *bp; void *p; @@ -960,15 +960,12 @@ malloc_bytes(struct dir_info *d, size_t /* start somewhere in a short */ lp = &bp->bits[i / MALLOC_BITS]; if (*lp) { - b = *lp; - k = i % MALLOC_BITS; - u = 1 << k; - while (k < MALLOC_BITS) { - if (b & u) - goto found; - k++; - u <<= 1; - } + j = i % MALLOC_BITS; + k = __builtin_ffs(*lp >> j); + if (k != 0) { + k += j - 1; + goto found; + } } /* no bit halfway, go to next full short */ i /= MALLOC_BITS; @@ -977,14 +974,10 @@ malloc_bytes(struct dir_info *d, size_t i = 0; lp = &bp->bits[i]; if (*lp) { - b = *lp; - k = 0; - u = 1; - for (;;) { - if (b & u) - goto found; - k++; - u <<= 1; + k = __builtin_ffs(*lp); + if (k != 0) { + k--; + break; } } } @@ -996,7 +989,7 @@ found: } #endif - *lp ^= u; + *lp ^= 1 << k; /* If there are no more free, remove from free-list */ if (--bp->free == 0)