On Fri, Sep 22, 2017 at 04:35:39PM -0400, Daniel Micay wrote:
> A linear search works well for the current small quarantine (16) but won't
> work
> well if you ever want to have a larger / configurable quarantine size. It
> would
> also be nice to make this fast enough to enable by default.
>
> We (CopperheadOS) use an open addressed hash table for this based on the
> existing hash table since we use a larger quarantine with a FIFO queue
> alongside the random array and a configuration size. Ideally the code would be
> shared with the existing hash table but I didn't want to make it into an
> invasive change downstream.
>
> These are the three downstream patches for OpenBSD malloc in our copy of
> Bionic
> (Android libc), so I'd need to port them to the current upstream code to apply
> cleanly. They're currently applied after other changes and it's a slightly
> older copy of the base code (after multi-pool support, but before the canary
> rework since we'll need to adapt that to our needs). Can get the general idea
> from the patches even though they're not going to apply cleanly though.
>
> [1] quarantine double-free detection via hash table
Thanks for sharing this, I'll take a look soon.
Thinking a bit about this: wouldn't a closed hash table be sufficient?
A collision would then either be a double free, otherwise just replace
old with new. You'll get a O(1) lookup and insert and simpler code.
-Otto
>
> diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
> index 23af59501..4f4e1290c 100644
> --- a/libc/bionic/omalloc.c
> +++ b/libc/bionic/omalloc.c
> @@ -157,6 +157,7 @@ struct dir_info {
> struct region_info free_regions[MALLOC_MAXCACHE];
> /* delayed free chunk slots */
> void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
> + void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 4];
> size_t rbytesused; /* random bytes used */
> char *func; /* current function */
> int mutex;
> @@ -292,6 +293,22 @@ hash(void *p)
> return sum;
> }
>
> +static inline size_t
> +hash_chunk(void *p)
> +{
> + size_t sum;
> + uintptr_t u;
> +
> + u = (uintptr_t)p >> MALLOC_MINSHIFT;
> + sum = u;
> + sum = (sum << 7) - sum + (u >> 16);
> +#ifdef __LP64__
> + sum = (sum << 7) - sum + (u >> 32);
> + sum = (sum << 7) - sum + (u >> 48);
> +#endif
> + return sum;
> +}
> +
> static inline
> struct dir_info *getpool(void)
> {
> @@ -983,6 +1000,56 @@ delete(struct dir_info *d, struct region_info *ri)
> }
> }
>
> +void delayed_chunks_insert(struct dir_info *d, void *p)
> +{
> + size_t index;
> + size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
> + void *q;
> +
> + index = hash_chunk(p) & mask;
> + q = d->delayed_chunks_set[index];
> + while (q != NULL) {
> + if (p == q)
> + wrterror(d, "double free", p);
> + index = (index - 1) & mask;
> + q = d->delayed_chunks_set[index];
> + }
> + d->delayed_chunks_set[index] = p;
> +}
> +
> +void delayed_chunks_delete(struct dir_info *d, void *p)
> +{
> + size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
> + size_t i, j, r;
> + void *q;
> +
> + i = hash_chunk(p) & mask;
> + q = d->delayed_chunks_set[i];
> + while (q != p) {
> + if (q == NULL)
> + wrterror(d, "pointer missing from address tracking
> table", p);
> + i = (i - 1) & mask;
> + q = d->delayed_chunks_set[i];
> + }
> +
> + for (;;) {
> + d->delayed_chunks_set[i] = NULL;
> + j = i;
> + for (;;) {
> + i = (i - 1) & mask;
> + if (d->delayed_chunks_set[i] == NULL)
> + return;
> + r = hash_chunk(d->delayed_chunks_set[i]) & mask;
> + if ((i <= r && r < j) || (r < j && j < i) ||
> + (j < i && i <= r))
> + continue;
> + d->delayed_chunks_set[j] = d->delayed_chunks_set[i];
> + break;
> + }
> + }
> +}
> +
> +
> /*
> * Allocate a page of chunks
> */
> @@ -1474,11 +1541,21 @@ ofree(struct dir_info *argpool, void *p)
> if (!mopts.malloc_freenow) {
> if (find_chunknum(pool, r, p) == (uint32_t)-1)
> goto done;
> +
> + if (p == NULL)
> + goto done;
> +
> + delayed_chunks_insert(pool, p);
> +
> i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK;
> tmp = p;
> p = pool->delayed_chunks[i];
> - if (tmp == p)
> - wrterror(pool, "double free", p);
> +
> + if (p == NULL)
> + goto done;
> +
> + delayed_chunks_delete(pool, p);
> +
> if (mopts.malloc_junk)
> validate_junk(pool, p);
> pool->delayed_chunks[i] = tmp;
> @@ -2264,6 +2341,7 @@ malloc_dump(int fd, struct dir_info *pool)
> r = find(pool, p);
> if (r == NULL)
> wrterror(pool, "bogus pointer in malloc_dump", p);
> + delayed_chunks_delete(pool, p);
> free_bytes(pool, r, p);
> pool->delayed_chunks[i] = NULL;
> }
>
> [2] extend the malloc quarantine with a queue
>
> diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
> index 4f4e1290c..e92a1fc45 100644
> --- a/libc/bionic/omalloc.c
> +++ b/libc/bionic/omalloc.c
> @@ -156,7 +156,9 @@ struct dir_info {
> /* free pages cache */
> struct region_info free_regions[MALLOC_MAXCACHE];
> /* delayed free chunk slots */
> + size_t queue_index;
> void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
> + void *delayed_chunks_queue[MALLOC_DELAYED_CHUNK_MASK + 1];
> void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 4];
> size_t rbytesused; /* random bytes used */
> char *func; /* current function */
> @@ -1468,8 +1470,10 @@ validate_delayed_chunks(void)
> continue;
> _MALLOC_LOCK(pool->mutex);
> pool->func = "validate_delayed_chunks():";
> - for (j = 0; j < MALLOC_DELAYED_CHUNK_MASK + 1; j++)
> + for (j = 0; j < MALLOC_DELAYED_CHUNK_MASK + 1; j++) {
> validate_junk(pool, pool->delayed_chunks[j]);
> + validate_junk(pool, pool->delayed_chunks_queue[j]);
> + }
> _MALLOC_UNLOCK(pool->mutex);
> }
> }
> @@ -1550,6 +1554,16 @@ ofree(struct dir_info *argpool, void *p)
> i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK;
> tmp = p;
> p = pool->delayed_chunks[i];
> + pool->delayed_chunks[i] = tmp;
> +
> + if (p == NULL)
> + goto done;
> +
> + tmp = p;
> + p = pool->delayed_chunks_queue[pool->queue_index];
> + pool->delayed_chunks_queue[pool->queue_index] = tmp;
> + pool->queue_index++;
> + pool->queue_index &= MALLOC_DELAYED_CHUNK_MASK;
>
> if (p == NULL)
> goto done;
> @@ -1558,7 +1572,6 @@ ofree(struct dir_info *argpool, void *p)
>
> if (mopts.malloc_junk)
> validate_junk(pool, p);
> - pool->delayed_chunks[i] = tmp;
> }
> if (p != NULL) {
> r = find(pool, p);
>
> [3] make the quarantine size configurable
>
> diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
> index 822de7c9c..d04c923e2 100644
> --- a/libc/bionic/omalloc.c
> +++ b/libc/bionic/omalloc.c
> @@ -157,9 +157,10 @@ struct dir_info {
> struct region_info free_regions[MALLOC_MAXCACHE];
> /* delayed free chunk slots */
> size_t queue_index;
> - void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
> - void *delayed_chunks_queue[MALLOC_DELAYED_CHUNK_MASK + 1];
> - void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 4];
> + void **delayed_chunks;
> + void **delayed_chunks_queue;
> + void **delayed_chunks_set;
> +
> size_t rbytesused; /* random bytes used */
> char *func; /* current function */
> int mutex;
> @@ -231,6 +232,7 @@ struct malloc_readonly {
> #endif
> u_int32_t malloc_canary; /* Matched against ones in malloc_pool
> */
> uintptr_t malloc_chunk_canary;
> + size_t delayed_chunk_size;
> };
>
> /* This object is mapped PROT_READ after initialisation to prevent tampering
> */
> @@ -569,6 +571,14 @@ omalloc_parseopt(char opt)
> case '<':
> mopts.malloc_cache >>= 1;
> break;
> + case '+':
> + mopts.delayed_chunk_size <<= 1;
> + if (mopts.delayed_chunk_size > UCHAR_MAX + 1)
> + mopts.delayed_chunk_size = UCHAR_MAX + 1;
> + break;
> + case '-':
> + mopts.delayed_chunk_size >>= 1;
> + break;
> case 'a':
> case 'A':
> /* ignored */
> @@ -588,11 +598,11 @@ omalloc_parseopt(char opt)
> break;
> #endif /* MALLOC_STATS */
> case 'f':
> - mopts.malloc_freenow = 0;
> + mopts.delayed_chunk_size = MALLOC_DELAYED_CHUNK_MASK + 1;
> mopts.malloc_freeunmap = 0;
> break;
> case 'F':
> - mopts.malloc_freenow = 1;
> + mopts.delayed_chunk_size = 0;
> mopts.malloc_freeunmap = 1;
> break;
> case 'g':
> @@ -702,6 +712,7 @@ omalloc_init(void)
> mopts.malloc_junk = 1;
> mopts.malloc_move = 1;
> mopts.malloc_cache = MALLOC_DEFAULT_CACHE;
> + mopts.delayed_chunk_size = MALLOC_DELAYED_CHUNK_MASK + 1;
>
> for (i = 0; i < 3; i++) {
> switch (i) {
> @@ -818,6 +829,14 @@ omalloc_poolinit(struct dir_info **dp)
> d->canary2 = ~d->canary1;
>
> *dp = d;
> +
> + if (mopts.delayed_chunk_size) {
> + d->delayed_chunks = MMAP(mopts.delayed_chunk_size * 6 *
> sizeof(void *));
> + if (d->delayed_chunks == MAP_FAILED)
> + wrterror(NULL, "malloc init mmap failed", NULL);
> + d->delayed_chunks_queue = d->delayed_chunks +
> mopts.delayed_chunk_size;
> + d->delayed_chunks_set = d->delayed_chunks_queue +
> mopts.delayed_chunk_size;
> + }
> }
>
> static int
> @@ -1003,7 +1022,7 @@ delete(struct dir_info *d, struct region_info *ri)
> void delayed_chunks_insert(struct dir_info *d, void *p)
> {
> size_t index;
> - size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
> + size_t mask = mopts.delayed_chunk_size * 4 - 1;
> void *q;
>
> index = hash_chunk(p) & mask;
> @@ -1019,7 +1038,7 @@ void delayed_chunks_insert(struct dir_info *d, void *p)
>
> void delayed_chunks_delete(struct dir_info *d, void *p)
> {
> - size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
> + size_t mask = mopts.delayed_chunk_size * 4 - 1;
> size_t i, j, r;
> void *q;
>
> @@ -1461,14 +1480,15 @@ validate_junk(struct dir_info *pool, void *p) {
> static void
> validate_delayed_chunks(void)
> {
> - int i, j;
> + int i;
> + size_t j;
> for (i = 0; i < _MALLOC_MUTEXES; i++) {
> struct dir_info *pool = mopts.malloc_pool[i];
> if (pool == NULL)
> continue;
> _MALLOC_LOCK(pool->mutex);
> pool->func = "validate_delayed_chunks():";
> - for (j = 0; j < MALLOC_DELAYED_CHUNK_MASK + 1; j++) {
> + for (j = 0; j < mopts.delayed_chunk_size; j++) {
> validate_junk(pool, pool->delayed_chunks[j]);
> validate_junk(pool, pool->delayed_chunks_queue[j]);
> }
> @@ -1540,7 +1560,7 @@ ofree(struct dir_info *argpool, void *p)
>
> if (mopts.malloc_junk && sz > 0)
> memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries);
> - if (!mopts.malloc_freenow) {
> + if (mopts.delayed_chunk_size) {
> if (find_chunknum(pool, r, p) == (uint32_t)-1)
> goto done;
>
> @@ -1549,7 +1569,7 @@ ofree(struct dir_info *argpool, void *p)
>
> delayed_chunks_insert(pool, p);
>
> - i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK;
> + i = getrbyte(pool) & (mopts.delayed_chunk_size - 1);
> tmp = p;
> p = pool->delayed_chunks[i];
> pool->delayed_chunks[i] = tmp;
> @@ -1561,7 +1581,7 @@ ofree(struct dir_info *argpool, void *p)
> p = pool->delayed_chunks_queue[pool->queue_index];
> pool->delayed_chunks_queue[pool->queue_index] = tmp;
> pool->queue_index++;
> - pool->queue_index &= MALLOC_DELAYED_CHUNK_MASK;
> + pool->queue_index &= mopts.delayed_chunk_size - 1;
>
> if (p == NULL)
> goto done;
> @@ -2338,14 +2358,14 @@ malloc_dump1(int fd, struct dir_info *d)
> void
> malloc_dump(int fd, struct dir_info *pool)
> {
> - int i;
> + size_t i;
> void *p;
> struct region_info *r;
> int saved_errno = errno;
>
> if (pool == NULL)
> return;
> - for (i = 0; i < MALLOC_DELAYED_CHUNK_MASK + 1; i++) {
> + for (i = 0; i < mopts.delayed_chunk_size; i++) {
> p = pool->delayed_chunks[i];
> if (p == NULL)
> continue;