Greetings,
I am now trying to use Lassi's fast trace, and the results are exceptionally
good: on some of my test cases the overhead of using libunwind (compared
to frame-based unwinder) went from 9x to 1.2x ;-)
Lassi, thanks!
One problem that cropped up: tcmalloc wants to record stack traces on free,
and this causes a crash when a thread is exiting.
Glibc calls thread-specific dtors in the order in which the keys were added,
so the first dtor is the trace_cache_free() one. Then thread-specific
data for some other key is free()d, which calls into unw_backtrace(),
which uses dangling cache and munmapped cache->frames.
Attached patch
1. delays destruction of trace cache to the last possible moment, and
2. clears tls_cache, so the dangling pointer will not be used.
Tested on Linux/x86_64 (Ubuntu 10.04); no new failures.
Thanks,
--
Paul Pluzhnikov
diff --git a/src/x86_64/Gtrace.c b/src/x86_64/Gtrace.c
index 4a5b583..8de12be 100644
--- a/src/x86_64/Gtrace.c
+++ b/src/x86_64/Gtrace.c
@@ -25,6 +25,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. */
#include "unwind_i.h"
#include "ucontext_i.h"
#include <signal.h>
+#include <limits.h>
#pragma weak pthread_once
#pragma weak pthread_key_create
@@ -45,6 +46,8 @@ typedef struct
unw_tdep_frame_t *frames;
size_t log_size;
size_t used;
+ size_t dtor_count; /* Counts how many times our destructor has already
+ been called. */
} unw_trace_cache_t;
static const unw_tdep_frame_t empty_frame = { 0, UNW_X86_64_FRAME_OTHER, -1,
-1, 0, -1, -1 };
@@ -52,14 +55,24 @@ static pthread_mutex_t trace_init_lock =
PTHREAD_MUTEX_INITIALIZER;
static pthread_once_t trace_cache_once = PTHREAD_ONCE_INIT;
static pthread_key_t trace_cache_key;
static struct mempool trace_cache_pool;
+static __thread unw_trace_cache_t *tls_cache;
/* Free memory for a thread's trace cache. */
static void
trace_cache_free (void *arg)
{
unw_trace_cache_t *cache = arg;
+ if (++cache->dtor_count < PTHREAD_DESTRUCTOR_ITERATIONS)
+ {
+ /* Not yet our turn to get destroyed. Re-install ourselves into the key. */
+ pthread_setspecific(trace_cache_key, cache);
+ Debug(5, "delayed freeing cache %p (%llx to go)\n", cache,
+ PTHREAD_DESTRUCTOR_ITERATIONS - cache->dtor_count);
+ return;
+ }
munmap (cache->frames, (1u << cache->log_size) * sizeof(unw_tdep_frame_t));
mempool_free (&trace_cache_pool, cache);
+ tls_cache = NULL;
Debug(5, "freed cache %p\n", cache);
}
@@ -108,6 +121,7 @@ trace_cache_create (void)
cache->log_size = HASH_MIN_BITS;
cache->used = 0;
+ cache->dtor_count = 0;
Debug(5, "allocated cache %p\n", cache);
return cache;
}
@@ -135,8 +149,6 @@ trace_cache_expand (unw_trace_cache_t *cache)
return 0;
}
-static __thread unw_trace_cache_t *tls_cache;
-
/* Get the frame cache for the current thread. Create it if there is none. */
static unw_trace_cache_t *
trace_cache_get (void)
_______________________________________________
Libunwind-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/libunwind-devel