Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-03-07 Thread Otto Moerbeek
On Tue, Mar 07, 2023 at 09:02:04AM +0100, Theo Buehler wrote:

> > So here's the dif with the fix.
> 
> The new diff went through an amd64 bulk without fallout and also works
> fine on some dev machines. No noticeable performance impact for my
> workloads.
> 
> It also reads fine to me (ok tb).
> 
> Do you want it to make it into the release or can/should it wait?
> Either way, it would probably be good for it to see more eyes and
> tests.

Thanks for testing and reviewing. I think I won't push it into the
upcoming release. This should benefit from a large part of the release
cycle for testing and it's already quite late in the current cycle.

-Otto



Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-03-07 Thread Theo Buehler
> So here's the dif with the fix.

The new diff went through an amd64 bulk without fallout and also works
fine on some dev machines. No noticeable performance impact for my
workloads.

It also reads fine to me (ok tb).

Do you want it to make it into the release or can/should it wait?
Either way, it would probably be good for it to see more eyes and
tests.



Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-03-01 Thread Otto Moerbeek
On Wed, Mar 01, 2023 at 08:49:56AM +0100, Theo Buehler wrote:

> On Wed, Mar 01, 2023 at 08:39:08AM +0100, Otto Moerbeek wrote:
> > On Wed, Mar 01, 2023 at 08:31:47AM +0100, Theo Buehler wrote:
> > 
> > > On Tue, Feb 28, 2023 at 05:52:28PM +0100, Otto Moerbeek wrote:
> > > > Second iteration.
> > > > 
> > > > Gain back performance by allocation chunk_info pages in a bundle, and
> > > > use less buckets is !malloc option S. The chunk sizes used are 16, 32,
> > > > 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640,
> > > > 768, 896, 1024, 1280, 1536, 1792, 2048 (and a few more for sparc84
> > > > with it's 8k sized pages and loongson with it's 16k pages).
> > > > 
> > > > If malloc option S (or rather cache size 0) is used we use strict
> > > > multiple of 16 buckets, to get as many buckets as possible.
> > > > 
> > > > See the find_bucket() and bin_of() functions. Thanks to Tony Finch for
> > > > pointing me to code to compute nice bucket sizes.
> > > > 
> > > > I think this is ready for review and wide testing.
> > > 
> > > Two vala-based ports, graphics/birdfont and productivity/minder, run out
> > > of memory when attempting to build them with this diff (and its previous
> > > version) on both amd64 and arm64:
> > > 
> > > ***MEMORY-ERROR***: valac[93681]: GSlice: failed to allocate 2032 bytes 
> > > (alignment: 2048): Cannot allocate memory
> > 
> > Thanks, this smells like a bug in the aligned mem case.
> > 
> > +   pof2 = 1 << MALLOC_MINSIZE;
> > 
> > should be 
> > 
> > +   pof2 = MALLOC_MINSIZE;
> > 
> > By the looks of it. I'll get back to this.
> 
> I can confirm that changing this fixes this issue with both ports on
> amd64 and arm64.

So here's the dif with the fix.

-Otto

Index: stdlib/malloc.c
===
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.277
diff -u -p -r1.277 malloc.c
--- stdlib/malloc.c 27 Feb 2023 06:47:54 -  1.277
+++ stdlib/malloc.c 1 Mar 2023 09:14:24 -
@@ -67,6 +67,11 @@
 #define MALLOC_CHUNK_LISTS 4
 #define CHUNK_CHECK_LENGTH 32
 
+#define B2SIZE(b)  ((b) * MALLOC_MINSIZE)
+#define B2ALLOC(b) ((b) == 0 ? MALLOC_MINSIZE : \
+   (b) * MALLOC_MINSIZE)
+#define BUCKETS(MALLOC_MAXCHUNK / MALLOC_MINSIZE)
+
 /*
  * We move allocations between half a page and a whole page towards the end,
  * subject to alignment constraints. This is the extra headroom we allow.
@@ -144,9 +149,9 @@ struct dir_info {
int mutex;
int malloc_mt;  /* multi-threaded mode? */
/* lists of free chunk info structs */
-   struct chunk_head chunk_info_list[MALLOC_MAXSHIFT + 1];
+   struct chunk_head chunk_info_list[BUCKETS + 1];
/* lists of chunks with free slots */
-   struct chunk_head chunk_dir[MALLOC_MAXSHIFT + 1][MALLOC_CHUNK_LISTS];
+   struct chunk_head chunk_dir[BUCKETS + 1][MALLOC_CHUNK_LISTS];
/* delayed free chunk slots */
void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
u_char rbytes[32];  /* random bytes */
@@ -155,6 +160,8 @@ struct dir_info {
size_t bigcache_used;
size_t bigcache_size;
struct bigcache *bigcache;
+   void *chunk_pages;
+   size_t chunk_pages_used;
 #ifdef MALLOC_STATS
size_t inserts;
size_t insert_collisions;
@@ -195,8 +202,7 @@ struct chunk_info {
LIST_ENTRY(chunk_info) entries;
void *page; /* pointer to the page */
u_short canary;
-   u_short size;   /* size of this page's chunks */
-   u_short shift;  /* how far to shift for this size */
+   u_short bucket;
u_short free;   /* how many free chunks */
u_short total;  /* how many chunks */
u_short offset; /* requested size table offset */
@@ -247,11 +253,11 @@ static void malloc_exit(void);
 #endif
 
 /* low bits of r->p determine size: 0 means >= page size and r->size holding
- * real size, otherwise low bits are a shift count, or 1 for malloc(0)
+ * real size, otherwise low bits is the bucket + 1
  */
 #define REALSIZE(sz, r)\
(sz) = (uintptr_t)(r)->p & MALLOC_PAGEMASK, \
-   (sz) = ((sz) == 0 ? (r)->size : ((sz) == 1 ? 0 : (1 << ((sz)-1
+   (sz) = ((sz) == 0 ? (r)->size : B2SIZE((sz) - 1))
 
 static inline void
 _MALLOC_LEAVE(struct dir_info *d)
@@ -502,7 +508,7 @@ omalloc_poolinit(struct dir_info *d, int
d->r = NULL;
d->rbytesused = sizeof(d->rbytes);
d->regions_free = d->regions_total = 0;
-   for (i = 0; i <= MALLOC_MAXSHIFT; i++) {
+   for (i = 0; i <= BUCKETS; i++) {

Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-02-28 Thread Theo Buehler
On Wed, Mar 01, 2023 at 08:39:08AM +0100, Otto Moerbeek wrote:
> On Wed, Mar 01, 2023 at 08:31:47AM +0100, Theo Buehler wrote:
> 
> > On Tue, Feb 28, 2023 at 05:52:28PM +0100, Otto Moerbeek wrote:
> > > Second iteration.
> > > 
> > > Gain back performance by allocation chunk_info pages in a bundle, and
> > > use less buckets is !malloc option S. The chunk sizes used are 16, 32,
> > > 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640,
> > > 768, 896, 1024, 1280, 1536, 1792, 2048 (and a few more for sparc84
> > > with it's 8k sized pages and loongson with it's 16k pages).
> > > 
> > > If malloc option S (or rather cache size 0) is used we use strict
> > > multiple of 16 buckets, to get as many buckets as possible.
> > > 
> > > See the find_bucket() and bin_of() functions. Thanks to Tony Finch for
> > > pointing me to code to compute nice bucket sizes.
> > > 
> > > I think this is ready for review and wide testing.
> > 
> > Two vala-based ports, graphics/birdfont and productivity/minder, run out
> > of memory when attempting to build them with this diff (and its previous
> > version) on both amd64 and arm64:
> > 
> > ***MEMORY-ERROR***: valac[93681]: GSlice: failed to allocate 2032 bytes 
> > (alignment: 2048): Cannot allocate memory
> 
> Thanks, this smells like a bug in the aligned mem case.
> 
> + pof2 = 1 << MALLOC_MINSIZE;
> 
> should be 
>   
> + pof2 = MALLOC_MINSIZE;
> 
> By the looks of it. I'll get back to this.

I can confirm that changing this fixes this issue with both ports on
amd64 and arm64.

> 
>   -Otto
> 
> > 
> > Abort trap (core dumped)
> > 
> > To be able to build birdfont with PORTS_PRIVSEP = Yes, I had to bump
> > _pbuild's datasize-cur to 15G, while 14G was not enough. That's nearly
> > double the current default. On amd64 without this diff, birdfont builds
> > comfortably with a datasize-cur of 1G.
> > 
> > birdfont may be easier to investigate since the error happens early in
> > the build. You can get there relatively quickly by doing
> > 
> > cd /usr/ports/graphics/birdfont
> > doas pkg_add birdfont
> > make FETCH_PACKAGES= prepare
> > make
> > 
> > Not sure if the top of the trace is of much use. Here it is:
> > 
> > #0  thrkill () at /tmp/-:3
> > #1  0x486dd8c0aacac468 in ?? ()
> > #2  0x0c3a34319d0e in _libc_abort () at 
> > /usr/src/lib/libc/stdlib/abort.c:51
> > #3  0x0c39b735724b in mem_error () from 
> > /usr/local/lib/libglib-2.0.so.4201.9
> > #4  0x0c39b735604f in slab_allocator_alloc_chunk () from 
> > /usr/local/lib/libglib-2.0.so.4201.9
> > #5  0x0c39b7355a95 in g_slice_alloc () from 
> > /usr/local/lib/libglib-2.0.so.4201.9
> > #6  0x0c39b735606e in g_slice_alloc0 () from 
> > /usr/local/lib/libglib-2.0.so.4201.9
> > #7  0x0c396c2675f5 in g_type_create_instance () from 
> > /usr/local/lib/libgobject-2.0.so.4200.16
> > #8  0x0c3a19ad in vala_data_type_construct_with_symbol ()
> >from /usr/local/lib/libvala-0.56.so.0.0
> > #9  0x0c3a19b4ecae in vala_integer_type_construct () from 
> > /usr/local/lib/libvala-0.56.so.0.0
> > #10 0x0c3a19b4f08c in vala_integer_type_real_copy () from 
> > /usr/local/lib/libvala-0.56.so.0.0
> > #11 0x0c3a19a9d46f in vala_assignment_real_check () from 
> > /usr/local/lib/libvala-0.56.so.0.0
> > #12 0x0c3a19ae54e5 in vala_expression_statement_real_check ()
> >from /usr/local/lib/libvala-0.56.so.0.0
> > #13 0x0c3a19aa6a0d in vala_block_real_check () from 
> > /usr/local/lib/libvala-0.56.so.0.0



Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-02-28 Thread Otto Moerbeek
On Wed, Mar 01, 2023 at 08:31:47AM +0100, Theo Buehler wrote:

> On Tue, Feb 28, 2023 at 05:52:28PM +0100, Otto Moerbeek wrote:
> > Second iteration.
> > 
> > Gain back performance by allocation chunk_info pages in a bundle, and
> > use less buckets is !malloc option S. The chunk sizes used are 16, 32,
> > 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640,
> > 768, 896, 1024, 1280, 1536, 1792, 2048 (and a few more for sparc84
> > with it's 8k sized pages and loongson with it's 16k pages).
> > 
> > If malloc option S (or rather cache size 0) is used we use strict
> > multiple of 16 buckets, to get as many buckets as possible.
> > 
> > See the find_bucket() and bin_of() functions. Thanks to Tony Finch for
> > pointing me to code to compute nice bucket sizes.
> > 
> > I think this is ready for review and wide testing.
> 
> Two vala-based ports, graphics/birdfont and productivity/minder, run out
> of memory when attempting to build them with this diff (and its previous
> version) on both amd64 and arm64:
> 
> ***MEMORY-ERROR***: valac[93681]: GSlice: failed to allocate 2032 bytes 
> (alignment: 2048): Cannot allocate memory

Thanks, this smells like a bug in the aligned mem case.

+   pof2 = 1 << MALLOC_MINSIZE;

should be 

+   pof2 = MALLOC_MINSIZE;

By the looks of it. I'll get back to this.

-Otto

> 
> Abort trap (core dumped)
> 
> To be able to build birdfont with PORTS_PRIVSEP = Yes, I had to bump
> _pbuild's datasize-cur to 15G, while 14G was not enough. That's nearly
> double the current default. On amd64 without this diff, birdfont builds
> comfortably with a datasize-cur of 1G.
> 
> birdfont may be easier to investigate since the error happens early in
> the build. You can get there relatively quickly by doing
> 
> cd /usr/ports/graphics/birdfont
> doas pkg_add birdfont
> make FETCH_PACKAGES= prepare
> make
> 
> Not sure if the top of the trace is of much use. Here it is:
> 
> #0  thrkill () at /tmp/-:3
> #1  0x486dd8c0aacac468 in ?? ()
> #2  0x0c3a34319d0e in _libc_abort () at 
> /usr/src/lib/libc/stdlib/abort.c:51
> #3  0x0c39b735724b in mem_error () from 
> /usr/local/lib/libglib-2.0.so.4201.9
> #4  0x0c39b735604f in slab_allocator_alloc_chunk () from 
> /usr/local/lib/libglib-2.0.so.4201.9
> #5  0x0c39b7355a95 in g_slice_alloc () from 
> /usr/local/lib/libglib-2.0.so.4201.9
> #6  0x0c39b735606e in g_slice_alloc0 () from 
> /usr/local/lib/libglib-2.0.so.4201.9
> #7  0x0c396c2675f5 in g_type_create_instance () from 
> /usr/local/lib/libgobject-2.0.so.4200.16
> #8  0x0c3a19ad in vala_data_type_construct_with_symbol ()
>from /usr/local/lib/libvala-0.56.so.0.0
> #9  0x0c3a19b4ecae in vala_integer_type_construct () from 
> /usr/local/lib/libvala-0.56.so.0.0
> #10 0x0c3a19b4f08c in vala_integer_type_real_copy () from 
> /usr/local/lib/libvala-0.56.so.0.0
> #11 0x0c3a19a9d46f in vala_assignment_real_check () from 
> /usr/local/lib/libvala-0.56.so.0.0
> #12 0x0c3a19ae54e5 in vala_expression_statement_real_check ()
>from /usr/local/lib/libvala-0.56.so.0.0
> #13 0x0c3a19aa6a0d in vala_block_real_check () from 
> /usr/local/lib/libvala-0.56.so.0.0



Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-02-28 Thread Theo Buehler
On Tue, Feb 28, 2023 at 05:52:28PM +0100, Otto Moerbeek wrote:
> Second iteration.
> 
> Gain back performance by allocation chunk_info pages in a bundle, and
> use less buckets is !malloc option S. The chunk sizes used are 16, 32,
> 48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640,
> 768, 896, 1024, 1280, 1536, 1792, 2048 (and a few more for sparc84
> with it's 8k sized pages and loongson with it's 16k pages).
> 
> If malloc option S (or rather cache size 0) is used we use strict
> multiple of 16 buckets, to get as many buckets as possible.
> 
> See the find_bucket() and bin_of() functions. Thanks to Tony Finch for
> pointing me to code to compute nice bucket sizes.
> 
> I think this is ready for review and wide testing.

Two vala-based ports, graphics/birdfont and productivity/minder, run out
of memory when attempting to build them with this diff (and its previous
version) on both amd64 and arm64:

***MEMORY-ERROR***: valac[93681]: GSlice: failed to allocate 2032 bytes 
(alignment: 2048): Cannot allocate memory

Abort trap (core dumped)

To be able to build birdfont with PORTS_PRIVSEP = Yes, I had to bump
_pbuild's datasize-cur to 15G, while 14G was not enough. That's nearly
double the current default. On amd64 without this diff, birdfont builds
comfortably with a datasize-cur of 1G.

birdfont may be easier to investigate since the error happens early in
the build. You can get there relatively quickly by doing

cd /usr/ports/graphics/birdfont
doas pkg_add birdfont
make FETCH_PACKAGES= prepare
make

Not sure if the top of the trace is of much use. Here it is:

#0  thrkill () at /tmp/-:3
#1  0x486dd8c0aacac468 in ?? ()
#2  0x0c3a34319d0e in _libc_abort () at /usr/src/lib/libc/stdlib/abort.c:51
#3  0x0c39b735724b in mem_error () from /usr/local/lib/libglib-2.0.so.4201.9
#4  0x0c39b735604f in slab_allocator_alloc_chunk () from 
/usr/local/lib/libglib-2.0.so.4201.9
#5  0x0c39b7355a95 in g_slice_alloc () from 
/usr/local/lib/libglib-2.0.so.4201.9
#6  0x0c39b735606e in g_slice_alloc0 () from 
/usr/local/lib/libglib-2.0.so.4201.9
#7  0x0c396c2675f5 in g_type_create_instance () from 
/usr/local/lib/libgobject-2.0.so.4200.16
#8  0x0c3a19ad in vala_data_type_construct_with_symbol ()
   from /usr/local/lib/libvala-0.56.so.0.0
#9  0x0c3a19b4ecae in vala_integer_type_construct () from 
/usr/local/lib/libvala-0.56.so.0.0
#10 0x0c3a19b4f08c in vala_integer_type_real_copy () from 
/usr/local/lib/libvala-0.56.so.0.0
#11 0x0c3a19a9d46f in vala_assignment_real_check () from 
/usr/local/lib/libvala-0.56.so.0.0
#12 0x0c3a19ae54e5 in vala_expression_statement_real_check ()
   from /usr/local/lib/libvala-0.56.so.0.0
#13 0x0c3a19aa6a0d in vala_block_real_check () from 
/usr/local/lib/libvala-0.56.so.0.0



Re: malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-02-28 Thread Otto Moerbeek
Second iteration.

Gain back performance by allocation chunk_info pages in a bundle, and
use less buckets is !malloc option S. The chunk sizes used are 16, 32,
48, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640,
768, 896, 1024, 1280, 1536, 1792, 2048 (and a few more for sparc84
with it's 8k sized pages and loongson with it's 16k pages).

If malloc option S (or rather cache size 0) is used we use strict
multiple of 16 buckets, to get as many buckets as possible.

See the find_bucket() and bin_of() functions. Thanks to Tony Finch for
pointing me to code to compute nice bucket sizes.

I think this is ready for review and wide testing.

-Otto

Index: stdlib/malloc.c
===
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.277
diff -u -p -r1.277 malloc.c
--- stdlib/malloc.c 27 Feb 2023 06:47:54 -  1.277
+++ stdlib/malloc.c 28 Feb 2023 16:49:08 -
@@ -67,6 +67,11 @@
 #define MALLOC_CHUNK_LISTS 4
 #define CHUNK_CHECK_LENGTH 32
 
+#define B2SIZE(b)  ((b) * MALLOC_MINSIZE)
+#define B2ALLOC(b) ((b) == 0 ? MALLOC_MINSIZE : \
+   (b) * MALLOC_MINSIZE)
+#define BUCKETS(MALLOC_MAXCHUNK / MALLOC_MINSIZE)
+
 /*
  * We move allocations between half a page and a whole page towards the end,
  * subject to alignment constraints. This is the extra headroom we allow.
@@ -144,9 +149,9 @@ struct dir_info {
int mutex;
int malloc_mt;  /* multi-threaded mode? */
/* lists of free chunk info structs */
-   struct chunk_head chunk_info_list[MALLOC_MAXSHIFT + 1];
+   struct chunk_head chunk_info_list[BUCKETS + 1];
/* lists of chunks with free slots */
-   struct chunk_head chunk_dir[MALLOC_MAXSHIFT + 1][MALLOC_CHUNK_LISTS];
+   struct chunk_head chunk_dir[BUCKETS + 1][MALLOC_CHUNK_LISTS];
/* delayed free chunk slots */
void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
u_char rbytes[32];  /* random bytes */
@@ -155,6 +160,8 @@ struct dir_info {
size_t bigcache_used;
size_t bigcache_size;
struct bigcache *bigcache;
+   void *chunk_pages;
+   size_t chunk_pages_used;
 #ifdef MALLOC_STATS
size_t inserts;
size_t insert_collisions;
@@ -195,8 +202,7 @@ struct chunk_info {
LIST_ENTRY(chunk_info) entries;
void *page; /* pointer to the page */
u_short canary;
-   u_short size;   /* size of this page's chunks */
-   u_short shift;  /* how far to shift for this size */
+   u_short bucket;
u_short free;   /* how many free chunks */
u_short total;  /* how many chunks */
u_short offset; /* requested size table offset */
@@ -247,11 +253,11 @@ static void malloc_exit(void);
 #endif
 
 /* low bits of r->p determine size: 0 means >= page size and r->size holding
- * real size, otherwise low bits are a shift count, or 1 for malloc(0)
+ * real size, otherwise low bits is the bucket + 1
  */
 #define REALSIZE(sz, r)\
(sz) = (uintptr_t)(r)->p & MALLOC_PAGEMASK, \
-   (sz) = ((sz) == 0 ? (r)->size : ((sz) == 1 ? 0 : (1 << ((sz)-1
+   (sz) = ((sz) == 0 ? (r)->size : B2SIZE((sz) - 1))
 
 static inline void
 _MALLOC_LEAVE(struct dir_info *d)
@@ -502,7 +508,7 @@ omalloc_poolinit(struct dir_info *d, int
d->r = NULL;
d->rbytesused = sizeof(d->rbytes);
d->regions_free = d->regions_total = 0;
-   for (i = 0; i <= MALLOC_MAXSHIFT; i++) {
+   for (i = 0; i <= BUCKETS; i++) {
LIST_INIT(>chunk_info_list[i]);
for (j = 0; j < MALLOC_CHUNK_LISTS; j++)
LIST_INIT(>chunk_dir[i][j]);
@@ -720,7 +726,7 @@ unmap(struct dir_info *d, void *p, size_
 
/* don't look through all slots */
for (j = 0; j < d->bigcache_size / 4; j++) {
-   i = (base + j) % d->bigcache_size;
+   i = (base + j) & (d->bigcache_size - 1);
if (d->bigcache_used <
BIGCACHE_FILL(d->bigcache_size))  {
if (d->bigcache[i].psize == 0)
@@ -764,10 +770,13 @@ unmap(struct dir_info *d, void *p, size_
}
cache = >smallcache[psz - 1];
if (cache->length == cache->max) {
+   int fresh;
/* use a random slot */
-   i = getrbyte(d) % cache->max;
+   i = getrbyte(d) & (cache->max - 1);
r = cache->pages[i];
-   if (!mopts.malloc_freeunmap)
+   fresh = (uintptr_t)r & 1;
+   

malloc: change chunk sizes to be multiple of 16 instead of power of 2

2023-02-23 Thread Otto Moerbeek
Hi,

The basic idea is simple: one of the reasons the recent sshd bug is
potentially exploitable is that a (erroneously) freed malloc chunk
gets re-used in a different role. My malloc has power of two chunk
sizes and so one page of chunks holds many different types of
allocations. Userland malloc has no knowledge of types, we only know
about sizes. So I changed that to use finer-grained chunk sizes.

Originally I thought it would be a *lot* of work, but it's not too
bad: a couple of hours of thinking and a couple of hours coding, which
mostly consisted of hunting for silent assumptions that chunk sizes
are a power of two.

I suspect this is not the final diff, as there is some performance
impact. In particular, sparc64 seems sensitive to these changes. I'm
still investigating why but I wanted to share the current work in
progress anyway.

Yuu can help by testing this.

Thanks,

-Otto

Index: stdlib/malloc.c
===
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.276
diff -u -p -r1.276 malloc.c
--- stdlib/malloc.c 27 Dec 2022 17:31:09 -  1.276
+++ stdlib/malloc.c 20 Feb 2023 07:33:29 -
@@ -67,6 +67,11 @@
 #define MALLOC_CHUNK_LISTS 4
 #define CHUNK_CHECK_LENGTH 32
 
+#define B2SIZE(b)  ((b) * MALLOC_MINSIZE)
+#define B2ALLOC(b) ((b) == 0 ? MALLOC_MINSIZE : \
+   (b) * MALLOC_MINSIZE)
+#define BUCKETS(MALLOC_MAXCHUNK / MALLOC_MINSIZE)
+
 /*
  * We move allocations between half a page and a whole page towards the end,
  * subject to alignment constraints. This is the extra headroom we allow.
@@ -144,9 +149,9 @@ struct dir_info {
int mutex;
int malloc_mt;  /* multi-threaded mode? */
/* lists of free chunk info structs */
-   struct chunk_head chunk_info_list[MALLOC_MAXSHIFT + 1];
+   struct chunk_head chunk_info_list[BUCKETS + 1];
/* lists of chunks with free slots */
-   struct chunk_head chunk_dir[MALLOC_MAXSHIFT + 1][MALLOC_CHUNK_LISTS];
+   struct chunk_head chunk_dir[BUCKETS + 1][MALLOC_CHUNK_LISTS];
/* delayed free chunk slots */
void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
u_char rbytes[32];  /* random bytes */
@@ -195,8 +200,7 @@ struct chunk_info {
LIST_ENTRY(chunk_info) entries;
void *page; /* pointer to the page */
u_short canary;
-   u_short size;   /* size of this page's chunks */
-   u_short shift;  /* how far to shift for this size */
+   u_short bucket;
u_short free;   /* how many free chunks */
u_short total;  /* how many chunks */
u_short offset; /* requested size table offset */
@@ -247,11 +251,11 @@ static void malloc_exit(void);
 #endif
 
 /* low bits of r->p determine size: 0 means >= page size and r->size holding
- * real size, otherwise low bits are a shift count, or 1 for malloc(0)
+ * real size, otherwise low bits is the bucket + 1
  */
 #define REALSIZE(sz, r)\
(sz) = (uintptr_t)(r)->p & MALLOC_PAGEMASK, \
-   (sz) = ((sz) == 0 ? (r)->size : ((sz) == 1 ? 0 : (1 << ((sz)-1
+   (sz) = ((sz) == 0 ? (r)->size : B2SIZE((sz) - 1))
 
 static inline void
 _MALLOC_LEAVE(struct dir_info *d)
@@ -502,7 +506,7 @@ omalloc_poolinit(struct dir_info *d, int
d->r = NULL;
d->rbytesused = sizeof(d->rbytes);
d->regions_free = d->regions_total = 0;
-   for (i = 0; i <= MALLOC_MAXSHIFT; i++) {
+   for (i = 0; i <= BUCKETS; i++) {
LIST_INIT(>chunk_info_list[i]);
for (j = 0; j < MALLOC_CHUNK_LISTS; j++)
LIST_INIT(>chunk_dir[i][j]);
@@ -883,21 +887,13 @@ map(struct dir_info *d, size_t sz, int z
 }
 
 static void
-init_chunk_info(struct dir_info *d, struct chunk_info *p, int bits)
+init_chunk_info(struct dir_info *d, struct chunk_info *p, u_int bucket)
 {
-   int i;
+   u_int i;
 
-   if (bits == 0) {
-   p->shift = MALLOC_MINSHIFT;
-   p->total = p->free = MALLOC_PAGESIZE >> p->shift;
-   p->size = 0;
-   p->offset = 0xdead;
-   } else {
-   p->shift = bits;
-   p->total = p->free = MALLOC_PAGESIZE >> p->shift;
-   p->size = 1U << bits;
-   p->offset = howmany(p->total, MALLOC_BITS);
-   }
+   p->bucket = bucket;
+   p->total = p->free = MALLOC_PAGESIZE / B2ALLOC(bucket);
+   p->offset = bucket == 0 ? 0xdead : howmany(p->total, MALLOC_BITS);
p->canary = (u_short)d->canary1;
 
/* set all valid bits in the bitmap */
@@ -907,18 +903,15 @@