A linear search works well for the current small quarantine (16) but won't work
well if you ever want to have a larger / configurable quarantine size. It would
also be nice to make this fast enough to enable by default.
We (CopperheadOS) use an open addressed hash table for this based on the
existing hash table since we use a larger quarantine with a FIFO queue
alongside the random array and a configuration size. Ideally the code would be
shared with the existing hash table but I didn't want to make it into an
invasive change downstream.
These are the three downstream patches for OpenBSD malloc in our copy of Bionic
(Android libc), so I'd need to port them to the current upstream code to apply
cleanly. They're currently applied after other changes and it's a slightly
older copy of the base code (after multi-pool support, but before the canary
rework since we'll need to adapt that to our needs). Can get the general idea
from the patches even though they're not going to apply cleanly though.
[1] quarantine double-free detection via hash table
diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
index 23af59501..4f4e1290c 100644
--- a/libc/bionic/omalloc.c
+++ b/libc/bionic/omalloc.c
@@ -157,6 +157,7 @@ struct dir_info {
struct region_info free_regions[MALLOC_MAXCACHE];
/* delayed free chunk slots */
void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
+ void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 4];
size_t rbytesused; /* random bytes used */
char *func; /* current function */
int mutex;
@@ -292,6 +293,22 @@ hash(void *p)
return sum;
}
+static inline size_t
+hash_chunk(void *p)
+{
+ size_t sum;
+ uintptr_t u;
+
+ u = (uintptr_t)p >> MALLOC_MINSHIFT;
+ sum = u;
+ sum = (sum << 7) - sum + (u >> 16);
+#ifdef __LP64__
+ sum = (sum << 7) - sum + (u >> 32);
+ sum = (sum << 7) - sum + (u >> 48);
+#endif
+ return sum;
+}
+
static inline
struct dir_info *getpool(void)
{
@@ -983,6 +1000,56 @@ delete(struct dir_info *d, struct region_info *ri)
}
}
+void delayed_chunks_insert(struct dir_info *d, void *p)
+{
+ size_t index;
+ size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
+ void *q;
+
+ index = hash_chunk(p) & mask;
+ q = d->delayed_chunks_set[index];
+ while (q != NULL) {
+ if (p == q)
+ wrterror(d, "double free", p);
+ index = (index - 1) & mask;
+ q = d->delayed_chunks_set[index];
+ }
+ d->delayed_chunks_set[index] = p;
+}
+
+void delayed_chunks_delete(struct dir_info *d, void *p)
+{
+ size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
+ size_t i, j, r;
+ void *q;
+
+ i = hash_chunk(p) & mask;
+ q = d->delayed_chunks_set[i];
+ while (q != p) {
+ if (q == NULL)
+ wrterror(d, "pointer missing from address tracking
table", p);
+ i = (i - 1) & mask;
+ q = d->delayed_chunks_set[i];
+ }
+
+ for (;;) {
+ d->delayed_chunks_set[i] = NULL;
+ j = i;
+ for (;;) {
+ i = (i - 1) & mask;
+ if (d->delayed_chunks_set[i] == NULL)
+ return;
+ r = hash_chunk(d->delayed_chunks_set[i]) & mask;
+ if ((i <= r && r < j) || (r < j && j < i) ||
+ (j < i && i <= r))
+ continue;
+ d->delayed_chunks_set[j] = d->delayed_chunks_set[i];
+ break;
+ }
+ }
+}
+
+
/*
* Allocate a page of chunks
*/
@@ -1474,11 +1541,21 @@ ofree(struct dir_info *argpool, void *p)
if (!mopts.malloc_freenow) {
if (find_chunknum(pool, r, p) == (uint32_t)-1)
goto done;
+
+ if (p == NULL)
+ goto done;
+
+ delayed_chunks_insert(pool, p);
+
i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK;
tmp = p;
p = pool->delayed_chunks[i];
- if (tmp == p)
- wrterror(pool, "double free", p);
+
+ if (p == NULL)
+ goto done;
+
+ delayed_chunks_delete(pool, p);
+
if (mopts.malloc_junk)
validate_junk(pool, p);
pool->delayed_chunks[i] = tmp;
@@ -2264,6 +2341,7 @@ malloc_dump(int fd, struct dir_info *pool)
r = find(pool, p);
if (r == NULL)
wrterror(pool, "bogus pointer in malloc_dump", p);
+ delayed_chunks_delete(pool, p);
free_bytes(pool, r, p);
pool->delayed_chunks[i] = NULL;
}
[2] extend the malloc quarantine with a queue
diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
index 4f4e1290c..e92a1fc45 100644
--- a/libc/bionic/omalloc.c
+++ b/libc/bionic/omalloc.c
@@ -156,7 +156,9 @@ struct dir_info {
/* free pages cache */
struct region_info free_regions[MALLOC_MAXCACHE];
/* delayed free chunk slots */
+ size_t queue_index;
void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
+ void *delayed_chunks_queue[MALLOC_DELAYED_CHUNK_MASK + 1];
void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 4];
size_t rbytesused; /* random bytes used */
char *func; /* current function */
@@ -1468,8 +1470,10 @@ validate_delayed_chunks(void)
continue;
_MALLOC_LOCK(pool->mutex);
pool->func = "validate_delayed_chunks():";
- for (j = 0; j < MALLOC_DELAYED_CHUNK_MASK + 1; j++)
+ for (j = 0; j < MALLOC_DELAYED_CHUNK_MASK + 1; j++) {
validate_junk(pool, pool->delayed_chunks[j]);
+ validate_junk(pool, pool->delayed_chunks_queue[j]);
+ }
_MALLOC_UNLOCK(pool->mutex);
}
}
@@ -1550,6 +1554,16 @@ ofree(struct dir_info *argpool, void *p)
i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK;
tmp = p;
p = pool->delayed_chunks[i];
+ pool->delayed_chunks[i] = tmp;
+
+ if (p == NULL)
+ goto done;
+
+ tmp = p;
+ p = pool->delayed_chunks_queue[pool->queue_index];
+ pool->delayed_chunks_queue[pool->queue_index] = tmp;
+ pool->queue_index++;
+ pool->queue_index &= MALLOC_DELAYED_CHUNK_MASK;
if (p == NULL)
goto done;
@@ -1558,7 +1572,6 @@ ofree(struct dir_info *argpool, void *p)
if (mopts.malloc_junk)
validate_junk(pool, p);
- pool->delayed_chunks[i] = tmp;
}
if (p != NULL) {
r = find(pool, p);
[3] make the quarantine size configurable
diff --git a/libc/bionic/omalloc.c b/libc/bionic/omalloc.c
index 822de7c9c..d04c923e2 100644
--- a/libc/bionic/omalloc.c
+++ b/libc/bionic/omalloc.c
@@ -157,9 +157,10 @@ struct dir_info {
struct region_info free_regions[MALLOC_MAXCACHE];
/* delayed free chunk slots */
size_t queue_index;
- void *delayed_chunks[MALLOC_DELAYED_CHUNK_MASK + 1];
- void *delayed_chunks_queue[MALLOC_DELAYED_CHUNK_MASK + 1];
- void *delayed_chunks_set[(MALLOC_DELAYED_CHUNK_MASK + 1) * 4];
+ void **delayed_chunks;
+ void **delayed_chunks_queue;
+ void **delayed_chunks_set;
+
size_t rbytesused; /* random bytes used */
char *func; /* current function */
int mutex;
@@ -231,6 +232,7 @@ struct malloc_readonly {
#endif
u_int32_t malloc_canary; /* Matched against ones in malloc_pool
*/
uintptr_t malloc_chunk_canary;
+ size_t delayed_chunk_size;
};
/* This object is mapped PROT_READ after initialisation to prevent tampering */
@@ -569,6 +571,14 @@ omalloc_parseopt(char opt)
case '<':
mopts.malloc_cache >>= 1;
break;
+ case '+':
+ mopts.delayed_chunk_size <<= 1;
+ if (mopts.delayed_chunk_size > UCHAR_MAX + 1)
+ mopts.delayed_chunk_size = UCHAR_MAX + 1;
+ break;
+ case '-':
+ mopts.delayed_chunk_size >>= 1;
+ break;
case 'a':
case 'A':
/* ignored */
@@ -588,11 +598,11 @@ omalloc_parseopt(char opt)
break;
#endif /* MALLOC_STATS */
case 'f':
- mopts.malloc_freenow = 0;
+ mopts.delayed_chunk_size = MALLOC_DELAYED_CHUNK_MASK + 1;
mopts.malloc_freeunmap = 0;
break;
case 'F':
- mopts.malloc_freenow = 1;
+ mopts.delayed_chunk_size = 0;
mopts.malloc_freeunmap = 1;
break;
case 'g':
@@ -702,6 +712,7 @@ omalloc_init(void)
mopts.malloc_junk = 1;
mopts.malloc_move = 1;
mopts.malloc_cache = MALLOC_DEFAULT_CACHE;
+ mopts.delayed_chunk_size = MALLOC_DELAYED_CHUNK_MASK + 1;
for (i = 0; i < 3; i++) {
switch (i) {
@@ -818,6 +829,14 @@ omalloc_poolinit(struct dir_info **dp)
d->canary2 = ~d->canary1;
*dp = d;
+
+ if (mopts.delayed_chunk_size) {
+ d->delayed_chunks = MMAP(mopts.delayed_chunk_size * 6 *
sizeof(void *));
+ if (d->delayed_chunks == MAP_FAILED)
+ wrterror(NULL, "malloc init mmap failed", NULL);
+ d->delayed_chunks_queue = d->delayed_chunks +
mopts.delayed_chunk_size;
+ d->delayed_chunks_set = d->delayed_chunks_queue +
mopts.delayed_chunk_size;
+ }
}
static int
@@ -1003,7 +1022,7 @@ delete(struct dir_info *d, struct region_info *ri)
void delayed_chunks_insert(struct dir_info *d, void *p)
{
size_t index;
- size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
+ size_t mask = mopts.delayed_chunk_size * 4 - 1;
void *q;
index = hash_chunk(p) & mask;
@@ -1019,7 +1038,7 @@ void delayed_chunks_insert(struct dir_info *d, void *p)
void delayed_chunks_delete(struct dir_info *d, void *p)
{
- size_t mask = sizeof(d->delayed_chunks_set) / sizeof(void *) - 1;
+ size_t mask = mopts.delayed_chunk_size * 4 - 1;
size_t i, j, r;
void *q;
@@ -1461,14 +1480,15 @@ validate_junk(struct dir_info *pool, void *p) {
static void
validate_delayed_chunks(void)
{
- int i, j;
+ int i;
+ size_t j;
for (i = 0; i < _MALLOC_MUTEXES; i++) {
struct dir_info *pool = mopts.malloc_pool[i];
if (pool == NULL)
continue;
_MALLOC_LOCK(pool->mutex);
pool->func = "validate_delayed_chunks():";
- for (j = 0; j < MALLOC_DELAYED_CHUNK_MASK + 1; j++) {
+ for (j = 0; j < mopts.delayed_chunk_size; j++) {
validate_junk(pool, pool->delayed_chunks[j]);
validate_junk(pool, pool->delayed_chunks_queue[j]);
}
@@ -1540,7 +1560,7 @@ ofree(struct dir_info *argpool, void *p)
if (mopts.malloc_junk && sz > 0)
memset(p, SOME_FREEJUNK, sz - mopts.malloc_canaries);
- if (!mopts.malloc_freenow) {
+ if (mopts.delayed_chunk_size) {
if (find_chunknum(pool, r, p) == (uint32_t)-1)
goto done;
@@ -1549,7 +1569,7 @@ ofree(struct dir_info *argpool, void *p)
delayed_chunks_insert(pool, p);
- i = getrbyte(pool) & MALLOC_DELAYED_CHUNK_MASK;
+ i = getrbyte(pool) & (mopts.delayed_chunk_size - 1);
tmp = p;
p = pool->delayed_chunks[i];
pool->delayed_chunks[i] = tmp;
@@ -1561,7 +1581,7 @@ ofree(struct dir_info *argpool, void *p)
p = pool->delayed_chunks_queue[pool->queue_index];
pool->delayed_chunks_queue[pool->queue_index] = tmp;
pool->queue_index++;
- pool->queue_index &= MALLOC_DELAYED_CHUNK_MASK;
+ pool->queue_index &= mopts.delayed_chunk_size - 1;
if (p == NULL)
goto done;
@@ -2338,14 +2358,14 @@ malloc_dump1(int fd, struct dir_info *d)
void
malloc_dump(int fd, struct dir_info *pool)
{
- int i;
+ size_t i;
void *p;
struct region_info *r;
int saved_errno = errno;
if (pool == NULL)
return;
- for (i = 0; i < MALLOC_DELAYED_CHUNK_MASK + 1; i++) {
+ for (i = 0; i < mopts.delayed_chunk_size; i++) {
p = pool->delayed_chunks[i];
if (p == NULL)
continue;