Here is an update diff that implements __cxa_thread_atexit which is emitted by clang (and modern gcc) to implement certain aspects of C++11 thread_local.
Compared to the previous function this now also prevents unloading of shared libraries that call __cxa_thread_atexit. As before uou'll need to install header files and build and install ld.so before installing the new libc. This also needs a libc minor bump (not included). ok? Index: include/dlfcn.h =================================================================== RCS file: /cvs/src/include/dlfcn.h,v retrieving revision 1.13 diff -u -p -r1.13 dlfcn.h --- include/dlfcn.h 24 Mar 2013 01:37:21 -0000 1.13 +++ include/dlfcn.h 19 Nov 2017 20:59:08 -0000 @@ -72,6 +72,7 @@ typedef struct dl_info { #define DL_GETLOADADDR x #define DL_SETTHREADLCK 2 #define DL_SETBINDLCK 3 +#define DL_REFERENCE 4 #define DL_LAZY RTLD_LAZY /* Compat */ Index: include/tib.h =================================================================== RCS file: /cvs/src/include/tib.h,v retrieving revision 1.5 diff -u -p -r1.5 tib.h --- include/tib.h 10 Aug 2017 13:35:18 -0000 1.5 +++ include/tib.h 19 Nov 2017 20:59:08 -0000 @@ -135,6 +135,7 @@ */ struct tib { + void *tib_atexit; int tib_thread_flags; /* internal to libpthread */ pid_t tib_tid; int tib_cantcancel; @@ -182,16 +183,12 @@ struct tib { int tib_cantcancel; pid_t tib_tid; int tib_thread_flags; /* internal to libpthread */ -#if !defined(__LP64__) && !defined(__i386) - int __tib_padding; /* padding for 8byte alignment */ -#endif + void *tib_atexit; }; #if defined(__i386) || defined(__amd64) # define _TIB_PREP(tib) \ ((void)((tib)->__tib_self = (tib))) -#elif !defined(__LP64__) && !defined(__i386) -# define _TIB_PREP(tib) ((void)((tib)->__tib_padding = 0)) #endif #define TIB_EXTRA_ALIGN sizeof(void *) @@ -207,6 +204,7 @@ struct tib { #define TIB_INIT(tib, dtv, thread) do { \ (tib)->tib_thread = (thread); \ + (tib)->tib_atexit = NULL; \ (tib)->tib_locale = NULL; \ (tib)->tib_cantcancel = 0; \ (tib)->tib_cancel_point = 0; \ Index: lib/libc/Symbols.list =================================================================== RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.61 diff -u -p -r1.61 Symbols.list --- lib/libc/Symbols.list 4 Nov 2017 22:53:57 -0000 1.61 +++ lib/libc/Symbols.list 19 Nov 2017 20:59:08 -0000 @@ -1466,6 +1466,7 @@ random /* stdlib */ _Exit __cxa_atexit +__cxa_thread_atexit __cxa_finalize __isthreaded a64l Index: lib/libc/include/thread_private.h =================================================================== RCS file: /cvs/src/lib/libc/include/thread_private.h,v retrieving revision 1.32 diff -u -p -r1.32 thread_private.h --- lib/libc/include/thread_private.h 4 Nov 2017 22:53:57 -0000 1.32 +++ lib/libc/include/thread_private.h 19 Nov 2017 20:59:09 -0000 @@ -394,6 +394,7 @@ void _spinunlock(volatile _atomic_lock_t void _rthread_debug(int, const char *, ...) __attribute__((__format__ (printf, 2, 3))); pid_t _thread_dofork(pid_t (*_sys_fork)(void)); +void _thread_finalize(void); /* * Threading syscalls not declared in system headers Index: lib/libc/stdlib/atexit.c =================================================================== RCS file: /cvs/src/lib/libc/stdlib/atexit.c,v retrieving revision 1.24 diff -u -p -r1.24 atexit.c --- lib/libc/stdlib/atexit.c 10 Nov 2015 04:14:03 -0000 1.24 +++ lib/libc/stdlib/atexit.c 19 Nov 2017 20:59:09 -0000 @@ -31,12 +31,26 @@ #include <sys/types.h> #include <sys/mman.h> +#include <dlfcn.h> +#ifndef NO_PIC +#include <elf.h> +#pragma weak _DYNAMIC +#endif #include <stdlib.h> #include <string.h> #include <unistd.h> #include "atexit.h" #include "atfork.h" #include "thread_private.h" +#include "tib.h" + +typeof(dlctl) dlctl asm("_dlctl") __attribute__((weak)); + +struct thread_atexit_fn { + void (*func)(void *); + void *arg; + struct thread_atexit_fn *next; +}; struct atexit *__atexit; static int restartloop; @@ -121,6 +135,43 @@ atexit(void (*fn)(void)) } DEF_STRONG(atexit); +int +__cxa_thread_atexit(void (*func)(void *), void *arg, void *dso) +{ + struct thread_atexit_fn *fnp; + struct tib *tib = TIB_GET(); + + fnp = calloc(1, sizeof(struct thread_atexit_fn)); + if (fnp == NULL) + return -1; + +#ifndef NO_PIC + if (_DYNAMIC) + dlctl(NULL, DL_REFERENCE, dso); +#endif + + fnp->func = func; + fnp->arg = arg; + fnp->next = tib->tib_atexit; + tib->tib_atexit = fnp; + + return 0; +} +DEF_STRONG(__cxa_thread_atexit); + +void +_thread_finalize(void) +{ + struct tib *tib = TIB_GET(); + + while (tib->tib_atexit) { + struct thread_atexit_fn *fnp = tib->tib_atexit; + tib->tib_atexit = fnp->next; + fnp->func(fnp->arg); + free(fnp); + } +} + /* * Call all handlers registered with __cxa_atexit() for the shared * object owning 'dso'. @@ -133,6 +184,9 @@ __cxa_finalize(void *dso) struct atexit_fn fn; int n, pgsize = getpagesize(); static int call_depth; + + if (dso == NULL) + _thread_finalize(); _ATEXIT_LOCK(); call_depth++; Index: lib/libc/stdlib/atexit.h =================================================================== RCS file: /cvs/src/lib/libc/stdlib/atexit.h,v retrieving revision 1.10 diff -u -p -r1.10 atexit.h --- lib/libc/stdlib/atexit.h 25 Oct 2015 18:01:24 -0000 1.10 +++ lib/libc/stdlib/atexit.h 19 Nov 2017 20:59:09 -0000 @@ -46,7 +46,9 @@ extern struct atexit *__atexit; /* poin __END_HIDDEN_DECLS int __cxa_atexit(void (*)(void *), void *, void *); +int __cxa_thread_atexit(void (*)(void *), void *, void *); void __cxa_finalize(void *); PROTO_NORMAL(__cxa_atexit); +PROTO_STD_DEPRECATED(__cxa_thread_atexit); PROTO_NORMAL(__cxa_finalize); Index: lib/libc/thread/rthread.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread.c,v retrieving revision 1.6 diff -u -p -r1.6 rthread.c --- lib/libc/thread/rthread.c 4 Nov 2017 22:53:57 -0000 1.6 +++ lib/libc/thread/rthread.c 19 Nov 2017 20:59:09 -0000 @@ -137,6 +137,7 @@ pthread_exit(void *retval) oclfn->fn(oclfn->arg); free(oclfn); } + _thread_finalize(); _rthread_tls_destructors(thread); if (_thread_cb.tc_thread_release != NULL)