Greetings,

I am now trying to use Lassi's fast trace, and the results are exceptionally
good: on some of my test cases the overhead of using libunwind (compared
to frame-based unwinder) went from 9x to 1.2x ;-)

Lassi, thanks!

One problem that cropped up: tcmalloc wants to record stack traces on free,
and this causes a crash when a thread is exiting.

Glibc calls thread-specific dtors in the order in which the keys were added,
so the first dtor is the trace_cache_free() one. Then thread-specific
data for some other key is free()d, which calls into unw_backtrace(),
which uses dangling cache and munmapped cache->frames.

Attached patch
1. delays destruction of trace cache to the last possible moment, and
2. clears tls_cache, so the dangling pointer will not be used.

Tested on Linux/x86_64 (Ubuntu 10.04); no new failures.

Thanks,
-- 
Paul Pluzhnikov
diff --git a/src/x86_64/Gtrace.c b/src/x86_64/Gtrace.c
index 4a5b583..8de12be 100644
--- a/src/x86_64/Gtrace.c
+++ b/src/x86_64/Gtrace.c
@@ -25,6 +25,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.  */
 #include "unwind_i.h"
 #include "ucontext_i.h"
 #include <signal.h>
+#include <limits.h>
 
 #pragma weak pthread_once
 #pragma weak pthread_key_create
@@ -45,6 +46,8 @@ typedef struct
   unw_tdep_frame_t *frames;
   size_t log_size;
   size_t used;
+  size_t dtor_count;  /* Counts how many times our destructor has already
+                        been called. */
 } unw_trace_cache_t;
 
 static const unw_tdep_frame_t empty_frame = { 0, UNW_X86_64_FRAME_OTHER, -1, 
-1, 0, -1, -1 };
@@ -52,14 +55,24 @@ static pthread_mutex_t trace_init_lock = 
PTHREAD_MUTEX_INITIALIZER;
 static pthread_once_t trace_cache_once = PTHREAD_ONCE_INIT;
 static pthread_key_t trace_cache_key;
 static struct mempool trace_cache_pool;
+static __thread  unw_trace_cache_t *tls_cache;
 
 /* Free memory for a thread's trace cache. */
 static void
 trace_cache_free (void *arg)
 {
   unw_trace_cache_t *cache = arg;
+  if (++cache->dtor_count < PTHREAD_DESTRUCTOR_ITERATIONS)
+  {
+    /* Not yet our turn to get destroyed. Re-install ourselves into the key. */
+    pthread_setspecific(trace_cache_key, cache);
+    Debug(5, "delayed freeing cache %p (%llx to go)\n", cache,
+         PTHREAD_DESTRUCTOR_ITERATIONS - cache->dtor_count);
+    return;
+  }
   munmap (cache->frames, (1u << cache->log_size) * sizeof(unw_tdep_frame_t));
   mempool_free (&trace_cache_pool, cache);
+  tls_cache = NULL;
   Debug(5, "freed cache %p\n", cache);
 }
 
@@ -108,6 +121,7 @@ trace_cache_create (void)
 
   cache->log_size = HASH_MIN_BITS;
   cache->used = 0;
+  cache->dtor_count = 0;
   Debug(5, "allocated cache %p\n", cache);
   return cache;
 }
@@ -135,8 +149,6 @@ trace_cache_expand (unw_trace_cache_t *cache)
   return 0;
 }
 
-static __thread  unw_trace_cache_t *tls_cache;
-
 /* Get the frame cache for the current thread. Create it if there is none. */
 static unw_trace_cache_t *
 trace_cache_get (void)
_______________________________________________
Libunwind-devel mailing list
[email protected]
https://lists.nongnu.org/mailman/listinfo/libunwind-devel

Reply via email to