Hi, On Fri, Aug 16, 2019 at 02:24:28PM -0500, Jonathon Anderson wrote: > 2. Adding a lock & array structure to the memory manager (pseudo-TLS) > (libdwP.h, libdw_alloc.c)
Specifically concentrating on this part. > diff --git a/libdw/ChangeLog b/libdw/ChangeLog > index bf1f4857..87abf7a7 100644 > --- a/libdw/ChangeLog > +++ b/libdw/ChangeLog > @@ -1,3 +1,12 @@ > +2019-08-15 Jonathon Anderson <jm...@rice.edu> > + > + * libdw_alloc.c (__libdw_allocate): Added thread-safe stack allocator. > + * libdwP.h (Dwarf): Likewise. > + * dwarf_begin_elf.c (dwarf_begin_elf): Support for above. > + * dwarf_end.c (dwarf_end): Likewise. > [...] > + * Makefile.am: Link -lpthread to provide rwlocks. > diff --git a/libdw/Makefile.am b/libdw/Makefile.am > index 7a3d5322..6d0a0187 100644 > --- a/libdw/Makefile.am > +++ b/libdw/Makefile.am > @@ -108,7 +108,7 @@ am_libdw_pic_a_OBJECTS = $(libdw_a_SOURCES:.c=.os) > libdw_so_LIBS = libdw_pic.a ../libdwelf/libdwelf_pic.a \ > ../libdwfl/libdwfl_pic.a ../libebl/libebl.a > libdw_so_DEPS = ../lib/libeu.a ../libelf/libelf.so > -libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS) > +libdw_so_LDLIBS = $(libdw_so_DEPS) -ldl -lz $(argp_LDADD) $(zip_LIBS) > -lpthread > libdw_so_SOURCES = > libdw.so$(EXEEXT): $(srcdir)/libdw.map $(libdw_so_LIBS) $(libdw_so_DEPS) > # The rpath is necessary for libebl because its $ORIGIN use will Do we also need -pthread for CFLAGS? > diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c > index 38c8f5c6..b3885bb5 100644 > --- a/libdw/dwarf_begin_elf.c > +++ b/libdw/dwarf_begin_elf.c > @@ -417,11 +417,14 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn > *scngrp) > /* Initialize the memory handling. */ > result->mem_default_size = mem_default_size; > result->oom_handler = __libdw_oom; > - result->mem_tail = (struct libdw_memblock *) (result + 1); > - result->mem_tail->size = (result->mem_default_size > - - offsetof (struct libdw_memblock, mem)); > - result->mem_tail->remaining = result->mem_tail->size; > - result->mem_tail->prev = NULL; > + pthread_rwlock_init(&result->mem_rwl, NULL); > + result->mem_stacks = 1; > + result->mem_tails = malloc (sizeof (struct libdw_memblock *)); > + result->mem_tails[0] = (struct libdw_memblock *) (result + 1); > + result->mem_tails[0]->size = (result->mem_default_size > + - offsetof (struct libdw_memblock, mem)); > + result->mem_tails[0]->remaining = result->mem_tails[0]->size; > + result->mem_tails[0]->prev = NULL; Can't we just set mem_tails[0] = NULL? Wouldn't __libdw_alloc_tail () then initialize it? > diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c > index 29795c10..6317bcda 100644 > --- a/libdw/dwarf_end.c > +++ b/libdw/dwarf_end.c > @@ -94,14 +94,22 @@ dwarf_end (Dwarf *dwarf) > /* And the split Dwarf. */ > tdestroy (dwarf->split_tree, noop_free); > > - struct libdw_memblock *memp = dwarf->mem_tail; > - /* The first block is allocated together with the Dwarf object. */ > - while (memp->prev != NULL) > - { > - struct libdw_memblock *prevp = memp->prev; > - free (memp); > - memp = prevp; > - } > + for (size_t i = 0; i < dwarf->mem_stacks; i++) > + { > + struct libdw_memblock *memp = dwarf->mem_tails[i]; > + /* The first block is allocated together with the Dwarf object. > */ Then this wouldn't be true again. > + while (memp != NULL && memp->prev != NULL) > + { > + struct libdw_memblock *prevp = memp->prev; > + free (memp); > + memp = prevp; > + } > + /* Only for stack 0 though, the others are allocated > individually. */ > + if (memp != NULL && i > 0) > + free (memp); > + } And then you don't need this special case. > + free (dwarf->mem_tails); > + pthread_rwlock_destroy (&dwarf->mem_rwl); > > /* Free the pubnames helper structure. */ > free (dwarf->pubnames_sets); > diff --git a/libdw/libdwP.h b/libdw/libdwP.h > index eebb7d12..442d493d 100644 > --- a/libdw/libdwP.h > +++ b/libdw/libdwP.h > @@ -31,6 +31,7 @@ > > #include <libintl.h> > #include <stdbool.h> > +#include <pthread.h> > > #include <libdw.h> > #include <dwarf.h> > @@ -218,16 +219,18 @@ struct Dwarf > /* Similar for addrx/constx, which will come from .debug_addr section. */ > struct Dwarf_CU *fake_addr_cu; > > - /* Internal memory handling. This is basically a simplified > + /* Internal memory handling. This is basically a simplified thread-local > reimplementation of obstacks. Unfortunately the standard obstack > implementation is not usable in libraries. */ > + pthread_rwlock_t mem_rwl; > + size_t mem_stacks; > struct libdw_memblock > { > size_t size; > size_t remaining; > struct libdw_memblock *prev; > char mem[0]; > - } *mem_tail; > + } **mem_tails; Please document what/how exactly mem_rwl protects which other fields. > /* Default size of allocated memory blocks. */ > size_t mem_default_size; > @@ -572,7 +575,7 @@ extern void __libdw_seterrno (int value) > internal_function; > > /* Memory handling, the easy parts. This macro does not do any locking. */ This comment isn't true anymore. __libdw_alloc_tail does locking. Also I think the locking should be extended beyond just that functions, see below. > #define libdw_alloc(dbg, type, tsize, cnt) \ > - ({ struct libdw_memblock *_tail = (dbg)->mem_tail; \ > + ({ struct libdw_memblock *_tail = __libdw_alloc_tail(dbg); \ > size_t _required = (tsize) * (cnt); \ > type *_result = (type *) (_tail->mem + (_tail->size - > _tail->remaining));\ > size_t _padding = ((__alignof (type) \ > @@ -591,6 +594,10 @@ extern void __libdw_seterrno (int value) > internal_function; > #define libdw_typed_alloc(dbg, type) \ > libdw_alloc (dbg, type, sizeof (type), 1) > > +/* Callback to choose a thread-local memory allocation stack. */ > +extern struct libdw_memblock *__libdw_alloc_tail (Dwarf* dbg) > + __nonnull_attribute__ (1); > + > /* Callback to allocate more. */ > extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) > __attribute__ ((__malloc__)) __nonnull_attribute__ (1); > diff --git a/libdw/libdw_alloc.c b/libdw/libdw_alloc.c > index f1e08714..c3c5e8a7 100644 > --- a/libdw/libdw_alloc.c > +++ b/libdw/libdw_alloc.c > @@ -33,9 +33,73 @@ > > #include <errno.h> > #include <stdlib.h> > +#include <assert.h> > #include "libdwP.h" > #include "system.h" > +#include "stdatomic.h" > +#if USE_VG_ANNOTATIONS == 1 > +#include <helgrind.h> > +#include <drd.h> > +#else > +#define ANNOTATE_HAPPENS_BEFORE(X) > +#define ANNOTATE_HAPPENS_AFTER(X) > +#endif > + > + > +#define thread_local __thread > > +static thread_local int initialized = 0; > +static thread_local size_t thread_id; > +static atomic_size_t next_id = ATOMIC_VAR_INIT(0); Could you initialize thread_id to (size_t) -1? Then you don't need another thread_local to indicate not initialized? > +struct libdw_memblock * > +__libdw_alloc_tail (Dwarf *dbg) > +{ > + if (!initialized) And change this to thead_id == (size_t) -1? > + { > + thread_id = atomic_fetch_add (&next_id, 1); > + initialized = 1; > + } > + > + pthread_rwlock_rdlock (&dbg->mem_rwl); > + if (thread_id >= dbg->mem_stacks) > + { > + pthread_rwlock_unlock (&dbg->mem_rwl); > + pthread_rwlock_wrlock (&dbg->mem_rwl); > + > + /* Another thread may have already reallocated. In theory using an > + atomic would be faster, but given that this only happens once per > + thread per Dwarf, some minor slowdown should be fine. */ > + if (thread_id >= dbg->mem_stacks) > + { > + dbg->mem_tails = realloc (dbg->mem_tails, (thread_id+1) > + * sizeof (struct libdw_memblock *)); > + assert(dbg->mem_tails); Don't use assert here, use if (dbg->mem_tails == NULL) dbg->oom_handler (); But what if realloc moves the block? Then all dbg->mem_tails[thread_id] pointers become invalid. And this function drops the lock before returning a libdw_memblock*. So I think the lock needs to extend beyond this function somehow. Or we need to prevent another thread reallocing while we are dealing with the assigned memblock. > + for (size_t i = dbg->mem_stacks; i <= thread_id; i++) > + dbg->mem_tails[i] = NULL; > + dbg->mem_stacks = thread_id + 1; > + ANNOTATE_HAPPENS_BEFORE (&dbg->mem_tails); > + } > + > + pthread_rwlock_unlock (&dbg->mem_rwl); > + pthread_rwlock_rdlock (&dbg->mem_rwl); > + } > + > + /* At this point, we have an entry in the tail array. */ > + ANNOTATE_HAPPENS_AFTER (&dbg->mem_tails); > + struct libdw_memblock *result = dbg->mem_tails[thread_id]; > + if (result == NULL) > + { > + result = malloc (dbg->mem_default_size); > + result->size = dbg->mem_default_size > + - offsetof (struct libdw_memblock, mem); > + result->remaining = result->size; > + result->prev = NULL; > + dbg->mem_tails[thread_id] = result; > + } > + pthread_rwlock_unlock (&dbg->mem_rwl); > + return result; Here the lock is dropped and result points into dbg->mem_tails which means it cannot be used because the realloc above might make it invalid. > +} > > void * > __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) > @@ -52,8 +116,10 @@ __libdw_allocate (Dwarf *dbg, size_t minsize, size_t > align) > newp->size = size - offsetof (struct libdw_memblock, mem); > newp->remaining = (uintptr_t) newp + size - (result + minsize); > > - newp->prev = dbg->mem_tail; > - dbg->mem_tail = newp; > + pthread_rwlock_rdlock (&dbg->mem_rwl); > + newp->prev = dbg->mem_tails[thread_id]; > + dbg->mem_tails[thread_id] = newp; > + pthread_rwlock_unlock (&dbg->mem_rwl); > > return (void *) result; > } Cheers, Mark