Re: malloc junking tweaks

2021-02-18 Thread Theo de Raadt
This diff will land in snapshots.



Re: malloc junking tweaks

2021-02-18 Thread Stuart Henderson
On 2021/02/18 19:52, Otto Moerbeek wrote:
> Any feedback?

I've been doing i386 ports builds with this in, no problems seen there.
The changes seem sane, and there's an easy way for people to disable the
changes if they do run into problems. Does it make sense to put it into
snaps?



Re: malloc junking tweaks

2021-02-18 Thread Otto Moerbeek
On Fri, Feb 12, 2021 at 02:48:34PM +0100, Otto Moerbeek wrote:

> On Fri, Feb 12, 2021 at 02:18:08PM +0100, Otto Moerbeek wrote:
> 
> > Hi,
> > 
> > Curently, junking is done like this:
> > 
> > - for small chunk, the whole chunk junked on free
> > 
> > - for pages sized and larger, the first half a page is filled
> > 
> > - after a delayed free, the first 32 bytes of small chunks are
> > validated to not be overwritten
> > 
> > - page sized and larger allocations are not validated at all, even if
> > they end up in the cache.
> > 
> > This diff changes the following:
> > 
> > - I make use of the fact that I know how the chunks are aligned, and
> > write 8 bytes at the time by using a uint64_t pointer. For an
> > allocation a max of 4 such uint64_t's are written spread over the
> > allocation. For pages sized and larger, the first page is junked in
> > such a way.
> > 
> > - Delayed free of a small chunk checks the corresponiding way.
> > 
> > - Pages ending up in the cache are validated upon unmapping or re-use.
> > 
> > The last point is the real gain: we also check for write-after-free
> > for large allocations, which we did not do before.
> > 
> > So we are catching more writes-after-frees. A price to pay is that
> > larger chunks are not completely junked, but only a total of 32 bytes
> > are. I chose this number after comparing performance with the current
> > code: we still gain a bit in speed.
> > 
> > Junk mode 0 (j) and junk mode 2 (J) are not changed.
> > 
> > Please test and review,
> > 
> > -Otto
> > 
> 
> And now with correct version of diff

Any feedback?

-Otto
> 
> 
> Index: stdlib/malloc.3
> ===
> RCS file: /cvs/src/lib/libc/stdlib/malloc.3,v
> retrieving revision 1.126
> diff -u -p -r1.126 malloc.3
> --- stdlib/malloc.3   14 Sep 2019 13:16:50 -  1.126
> +++ stdlib/malloc.3   12 Feb 2021 08:14:54 -
> @@ -619,7 +619,7 @@ or
>  reallocate an unallocated pointer was made.
>  .It Dq chunk is already free
>  There was an attempt to free a chunk that had already been freed.
> -.It Dq use after free
> +.It Dq write after free
>  A chunk has been modified after it was freed.
>  .It Dq modified chunk-pointer
>  The pointer passed to
> Index: stdlib/malloc.c
> ===
> RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.267
> diff -u -p -r1.267 malloc.c
> --- stdlib/malloc.c   23 Nov 2020 15:42:11 -  1.267
> +++ stdlib/malloc.c   12 Feb 2021 08:14:54 -
> @@ -89,6 +89,7 @@
>   */
>  #define SOME_JUNK0xdb/* deadbeef */
>  #define SOME_FREEJUNK0xdf/* dead, free */
> +#define SOME_FREEJUNK_ULL0xdfdfdfdfdfdfdfdfULL
>  
>  #define MMAP(sz,f)   mmap(NULL, (sz), PROT_READ | PROT_WRITE, \
>  MAP_ANON | MAP_PRIVATE | (f), -1, 0)
> @@ -655,6 +656,49 @@ delete(struct dir_info *d, struct region
>   }
>  }
>  
> +static inline void
> +junk_free(int junk, void *p, size_t sz)
> +{
> + size_t i, step = 1;
> + uint64_t *lp = p;
> +
> + if (junk == 0 || sz == 0)
> + return;
> + sz /= sizeof(uint64_t);
> + if (junk == 1) {
> + if (sz > MALLOC_PAGESIZE / sizeof(uint64_t))
> + sz = MALLOC_PAGESIZE / sizeof(uint64_t);
> + step = sz / 4;
> + if (step == 0)
> + step = 1;
> + }
> + for (i = 0; i < sz; i += step)
> + lp[i] = SOME_FREEJUNK_ULL;
> +}
> +
> +static inline void
> +validate_junk(struct dir_info *pool, void *p, size_t sz)
> +{
> + size_t i, step = 1;
> + uint64_t *lp = p;
> +
> + if (pool->malloc_junk == 0 || sz == 0)
> + return;
> + sz /= sizeof(uint64_t);
> + if (pool->malloc_junk == 1) {
> + if (sz > MALLOC_PAGESIZE / sizeof(uint64_t))
> + sz = MALLOC_PAGESIZE / sizeof(uint64_t);
> + step = sz / 4;
> + if (step == 0)
> + step = 1;
> + }
> + for (i = 0; i < sz; i += step) {
> + if (lp[i] != SOME_FREEJUNK_ULL)
> + wrterror(pool, "write after free %p", p);
> + }
> +}
> +
> +
>  /*
>   * Cache maintenance. We keep at most malloc_cache pages cached.
>   * If the cache is becoming full, unmap pages in the cache for real,
> @@ -663,7 +707,7 @@ delete(struct dir_info *d, struct region
>   * cache are in MALLOC_PAGESIZE units.
>   */
>  static void
> -unmap(struct dir_info *d, void *p, size_t sz, size_t clear, int junk)
> +unmap(struct dir_info *d, void *p, size_t sz, size_t clear)
>  {
>   size_t psz = sz >> MALLOC_PAGESHIFT;
>   size_t rsz;
> @@ -695,6 +739,8 @@ unmap(struct dir_info *d, void *p, size_
>   r = >free_regions[(i + offset) & mask];
>   if (r->p != NULL) {
>   rsz = r->size << MALLOC_PAGESHIFT;
> + if 

Re: malloc junking tweaks

2021-02-12 Thread Otto Moerbeek
On Fri, Feb 12, 2021 at 02:18:08PM +0100, Otto Moerbeek wrote:

> Hi,
> 
> Curently, junking is done like this:
> 
> - for small chunk, the whole chunk junked on free
> 
> - for pages sized and larger, the first half a page is filled
> 
> - after a delayed free, the first 32 bytes of small chunks are
> validated to not be overwritten
> 
> - page sized and larger allocations are not validated at all, even if
> they end up in the cache.
> 
> This diff changes the following:
> 
> - I make use of the fact that I know how the chunks are aligned, and
> write 8 bytes at the time by using a uint64_t pointer. For an
> allocation a max of 4 such uint64_t's are written spread over the
> allocation. For pages sized and larger, the first page is junked in
> such a way.
> 
> - Delayed free of a small chunk checks the corresponiding way.
> 
> - Pages ending up in the cache are validated upon unmapping or re-use.
> 
> The last point is the real gain: we also check for write-after-free
> for large allocations, which we did not do before.
> 
> So we are catching more writes-after-frees. A price to pay is that
> larger chunks are not completely junked, but only a total of 32 bytes
> are. I chose this number after comparing performance with the current
> code: we still gain a bit in speed.
> 
> Junk mode 0 (j) and junk mode 2 (J) are not changed.
> 
> Please test and review,
> 
>   -Otto
> 

And now with correct version of diff

-Otto


Index: stdlib/malloc.3
===
RCS file: /cvs/src/lib/libc/stdlib/malloc.3,v
retrieving revision 1.126
diff -u -p -r1.126 malloc.3
--- stdlib/malloc.3 14 Sep 2019 13:16:50 -  1.126
+++ stdlib/malloc.3 12 Feb 2021 08:14:54 -
@@ -619,7 +619,7 @@ or
 reallocate an unallocated pointer was made.
 .It Dq chunk is already free
 There was an attempt to free a chunk that had already been freed.
-.It Dq use after free
+.It Dq write after free
 A chunk has been modified after it was freed.
 .It Dq modified chunk-pointer
 The pointer passed to
Index: stdlib/malloc.c
===
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.267
diff -u -p -r1.267 malloc.c
--- stdlib/malloc.c 23 Nov 2020 15:42:11 -  1.267
+++ stdlib/malloc.c 12 Feb 2021 08:14:54 -
@@ -89,6 +89,7 @@
  */
 #define SOME_JUNK  0xdb/* deadbeef */
 #define SOME_FREEJUNK  0xdf/* dead, free */
+#define SOME_FREEJUNK_ULL  0xdfdfdfdfdfdfdfdfULL
 
 #define MMAP(sz,f) mmap(NULL, (sz), PROT_READ | PROT_WRITE, \
 MAP_ANON | MAP_PRIVATE | (f), -1, 0)
@@ -655,6 +656,49 @@ delete(struct dir_info *d, struct region
}
 }
 
+static inline void
+junk_free(int junk, void *p, size_t sz)
+{
+   size_t i, step = 1;
+   uint64_t *lp = p;
+
+   if (junk == 0 || sz == 0)
+   return;
+   sz /= sizeof(uint64_t);
+   if (junk == 1) {
+   if (sz > MALLOC_PAGESIZE / sizeof(uint64_t))
+   sz = MALLOC_PAGESIZE / sizeof(uint64_t);
+   step = sz / 4;
+   if (step == 0)
+   step = 1;
+   }
+   for (i = 0; i < sz; i += step)
+   lp[i] = SOME_FREEJUNK_ULL;
+}
+
+static inline void
+validate_junk(struct dir_info *pool, void *p, size_t sz)
+{
+   size_t i, step = 1;
+   uint64_t *lp = p;
+
+   if (pool->malloc_junk == 0 || sz == 0)
+   return;
+   sz /= sizeof(uint64_t);
+   if (pool->malloc_junk == 1) {
+   if (sz > MALLOC_PAGESIZE / sizeof(uint64_t))
+   sz = MALLOC_PAGESIZE / sizeof(uint64_t);
+   step = sz / 4;
+   if (step == 0)
+   step = 1;
+   }
+   for (i = 0; i < sz; i += step) {
+   if (lp[i] != SOME_FREEJUNK_ULL)
+   wrterror(pool, "write after free %p", p);
+   }
+}
+
+
 /*
  * Cache maintenance. We keep at most malloc_cache pages cached.
  * If the cache is becoming full, unmap pages in the cache for real,
@@ -663,7 +707,7 @@ delete(struct dir_info *d, struct region
  * cache are in MALLOC_PAGESIZE units.
  */
 static void
-unmap(struct dir_info *d, void *p, size_t sz, size_t clear, int junk)
+unmap(struct dir_info *d, void *p, size_t sz, size_t clear)
 {
size_t psz = sz >> MALLOC_PAGESHIFT;
size_t rsz;
@@ -695,6 +739,8 @@ unmap(struct dir_info *d, void *p, size_
r = >free_regions[(i + offset) & mask];
if (r->p != NULL) {
rsz = r->size << MALLOC_PAGESHIFT;
+   if (!mopts.malloc_freeunmap)
+   validate_junk(d, r->p, rsz);
if (munmap(r->p, rsz))
wrterror(d, "munmap %p", r->p);
r->p = NULL;
@@ -716,12 

malloc junking tweaks

2021-02-12 Thread Otto Moerbeek
Hi,

Curently, junking is done like this:

- for small chunk, the whole chunk junked on free

- for pages sized and larger, the first half a page is filled

- after a delayed free, the first 32 bytes of small chunks are
validated to not be overwritten

- page sized and larger allocations are not validated at all, even if
they end up in the cache.

This diff changes the following:

- I make use of the fact that I know how the chunks are aligned, and
write 8 bytes at the time by using a uint64_t pointer. For an
allocation a max of 4 such uint64_t's are written spread over the
allocation. For pages sized and larger, the first page is junked in
such a way.

- Delayed free of a small chunk checks the corresponiding way.

- Pages ending up in the cache are validated upon unmapping or re-use.

The last point is the real gain: we also check for write-after-free
for large allocations, which we did not do before.

So we are catching more writes-after-frees. A price to pay is that
larger chunks are not completely junked, but only a total of 32 bytes
are. I chose this number after comparing performance with the current
code: we still gain a bit in speed.

Junk mode 0 (j) and junk mode 2 (J) are not changed.

Please test and review,

-Otto

Index: stdlib/malloc.c
===
RCS file: /cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.267
diff -u -p -r1.267 malloc.c
--- stdlib/malloc.c 23 Nov 2020 15:42:11 -  1.267
+++ stdlib/malloc.c 6 Feb 2021 20:03:49 -
@@ -89,6 +89,7 @@
  */
 #define SOME_JUNK  0xdb/* deadbeef */
 #define SOME_FREEJUNK  0xdf/* dead, free */
+#define SOME_FREEJUNK_ULL  0xdfdfdfdfdfdfdfdfULL
 
 #define MMAP(sz,f) mmap(NULL, (sz), PROT_READ | PROT_WRITE, \
 MAP_ANON | MAP_PRIVATE | (f), -1, 0)
@@ -655,6 +656,46 @@ delete(struct dir_info *d, struct region
}
 }
 
+static inline void
+junkfree(int junk, void *p, size_t sz)
+{
+   size_t byte, step = 1;
+
+   if (junk == 0 || sz == 0)
+   return;
+   if (junk == 1) {
+   if (sz > MALLOC_PAGESIZE)
+   sz = MALLOC_PAGESIZE;
+   step = sz / 32;
+   if (step == 0)
+   step = 1;
+   }
+   for (byte = 0; byte < sz; byte++)
+   ((unsigned char *)p)[byte] = SOME_FREEJUNK;
+}
+
+static inline void
+validate_junk(struct dir_info *pool, void *p, size_t sz)
+{
+   size_t byte, step = 1;
+
+   if (pool->malloc_junk == 0 || sz == 0)
+   return;
+   if (pool->malloc_junk == 1) {
+   if (sz > MALLOC_PAGESIZE)
+   sz = MALLOC_PAGESIZE;
+   step = sz / 32;
+   if (step == 0)
+   step = 1;
+   }
+   for (byte = 0; byte < sz; byte += step) {
+   if (((unsigned char *)p)[byte] != SOME_FREEJUNK)
+   wrterror(pool, "write after free %p %#zx@%#zx", p,
+   byte, sz);
+   }
+}
+
+
 /*
  * Cache maintenance. We keep at most malloc_cache pages cached.
  * If the cache is becoming full, unmap pages in the cache for real,
@@ -663,7 +704,7 @@ delete(struct dir_info *d, struct region
  * cache are in MALLOC_PAGESIZE units.
  */
 static void
-unmap(struct dir_info *d, void *p, size_t sz, size_t clear, int junk)
+unmap(struct dir_info *d, void *p, size_t sz, size_t clear)
 {
size_t psz = sz >> MALLOC_PAGESHIFT;
size_t rsz;
@@ -716,12 +757,10 @@ unmap(struct dir_info *d, void *p, size_
if (r->p == NULL) {
if (clear > 0)
memset(p, 0, clear);
-   if (junk && !mopts.malloc_freeunmap) {
-   size_t amt = junk == 1 ?  MALLOC_MAXCHUNK : sz;
-   memset(p, SOME_FREEJUNK, amt);
-   }
if (mopts.malloc_freeunmap)
mprotect(p, sz, PROT_NONE);
+   else
+   junkfree(d->malloc_junk, p, psz << 
MALLOC_PAGESHIFT);
r->p = p;
r->size = psz;
d->free_regions_size += psz;
@@ -760,15 +799,16 @@ map(struct dir_info *d, size_t sz, int z
if (r->p != NULL) {
if (r->size == psz) {
p = r->p;
+   if (!mopts.malloc_freeunmap)
+   validate_junk(d, p, psz << 
MALLOC_PAGESHIFT);
r->p = NULL;
d->free_regions_size -= psz;
if (mopts.malloc_freeunmap)
mprotect(p, sz, PROT_READ | PROT_WRITE);
if (zero_fill)
memset(p, 0,