Re: Implement __cxa_thread_atexit

2017-11-22 Thread Joerg Sonnenberger
On Mon, Nov 20, 2017 at 10:07:33PM +0100, Mark Kettenis wrote:
> > Date: Sun, 19 Nov 2017 23:13:05 +0100
> > From: Joerg Sonnenberger 
> > 
> > On Sun, Nov 19, 2017 at 11:05:31PM +0100, Joerg Sonnenberger wrote:
> > > On Sun, Nov 19, 2017 at 10:04:45PM +0100, Mark Kettenis wrote:
> > > > Here is an update diff that implements __cxa_thread_atexit which is
> > > > emitted by clang (and modern gcc) to implement certain aspects of
> > > > C++11 thread_local.
> > > 
> > > Note that without providing __cxa_thread_atexit, gcc will not detect it
> > > and try to provide its own.
> > 
> > __cxa_thread_atexit_impl I mean, sorry.
> 
> GCC 7.2 also checks for __cxa_thread_atexit.
> 
> GCC 6.4 and 4.9 only check for __cxa_thread_atexit_impl.  The
> consequence is that code compiled with ports GCC will indeed use GCC's
> implementation instead of the one in libc.  That means it doesn't
> prevent the unloading of shared libraries.  And with the current diff
> static linking will fail if __cxa_thread_atexit is used.  If ports
> people deem fixing this is important, I can follow what FreeBSD did
> and put the implementation in a separate file and/or provide an alias.

It has also a good chance of breaking static linkage. Providing an alias
is likely the easiest solution. Taking from experience :)

Joerg



Re: Implement __cxa_thread_atexit

2017-11-20 Thread Mark Kettenis
> Date: Sun, 19 Nov 2017 23:13:05 +0100
> From: Joerg Sonnenberger 
> 
> On Sun, Nov 19, 2017 at 11:05:31PM +0100, Joerg Sonnenberger wrote:
> > On Sun, Nov 19, 2017 at 10:04:45PM +0100, Mark Kettenis wrote:
> > > Here is an update diff that implements __cxa_thread_atexit which is
> > > emitted by clang (and modern gcc) to implement certain aspects of
> > > C++11 thread_local.
> > 
> > Note that without providing __cxa_thread_atexit, gcc will not detect it
> > and try to provide its own.
> 
> __cxa_thread_atexit_impl I mean, sorry.

GCC 7.2 also checks for __cxa_thread_atexit.

GCC 6.4 and 4.9 only check for __cxa_thread_atexit_impl.  The
consequence is that code compiled with ports GCC will indeed use GCC's
implementation instead of the one in libc.  That means it doesn't
prevent the unloading of shared libraries.  And with the current diff
static linking will fail if __cxa_thread_atexit is used.  If ports
people deem fixing this is important, I can follow what FreeBSD did
and put the implementation in a separate file and/or provide an alias.



Re: Implement __cxa_thread_atexit

2017-11-19 Thread Joerg Sonnenberger
On Sun, Nov 19, 2017 at 11:05:31PM +0100, Joerg Sonnenberger wrote:
> On Sun, Nov 19, 2017 at 10:04:45PM +0100, Mark Kettenis wrote:
> > Here is an update diff that implements __cxa_thread_atexit which is
> > emitted by clang (and modern gcc) to implement certain aspects of
> > C++11 thread_local.
> 
> Note that without providing __cxa_thread_atexit, gcc will not detect it
> and try to provide its own.

__cxa_thread_atexit_impl I mean, sorry.

Joerg



Re: Implement __cxa_thread_atexit

2017-11-19 Thread Joerg Sonnenberger
On Sun, Nov 19, 2017 at 10:04:45PM +0100, Mark Kettenis wrote:
> Here is an update diff that implements __cxa_thread_atexit which is
> emitted by clang (and modern gcc) to implement certain aspects of
> C++11 thread_local.

Note that without providing __cxa_thread_atexit, gcc will not detect it
and try to provide its own.

Joerg



Implement __cxa_thread_atexit

2017-11-19 Thread Mark Kettenis
Here is an update diff that implements __cxa_thread_atexit which is
emitted by clang (and modern gcc) to implement certain aspects of
C++11 thread_local.

Compared to the previous function this now also prevents unloading of
shared libraries that call __cxa_thread_atexit.

As before uou'll need to install header files and build and install
ld.so before installing the new libc.  This also needs a libc minor
bump (not included).

ok?


Index: include/dlfcn.h
===
RCS file: /cvs/src/include/dlfcn.h,v
retrieving revision 1.13
diff -u -p -r1.13 dlfcn.h
--- include/dlfcn.h 24 Mar 2013 01:37:21 -  1.13
+++ include/dlfcn.h 19 Nov 2017 20:59:08 -
@@ -72,6 +72,7 @@ typedef   struct dl_info {
 #define DL_GETLOADADDR x
 #define DL_SETTHREADLCK2
 #define DL_SETBINDLCK  3
+#define DL_REFERENCE   4
 
 #defineDL_LAZY RTLD_LAZY   /* Compat */
 
Index: include/tib.h
===
RCS file: /cvs/src/include/tib.h,v
retrieving revision 1.5
diff -u -p -r1.5 tib.h
--- include/tib.h   10 Aug 2017 13:35:18 -  1.5
+++ include/tib.h   19 Nov 2017 20:59:08 -
@@ -135,6 +135,7 @@
  */
 
 struct tib {
+   void*tib_atexit;
int tib_thread_flags;   /* internal to libpthread */
pid_t   tib_tid;
int tib_cantcancel;
@@ -182,16 +183,12 @@ struct tib {
int tib_cantcancel;
pid_t   tib_tid;
int tib_thread_flags;   /* internal to libpthread */
-#if !defined(__LP64__) && !defined(__i386)
-   int __tib_padding;  /* padding for 8byte alignment */
-#endif
+   void*tib_atexit;
 };
 
 #if defined(__i386) || defined(__amd64)
 # define _TIB_PREP(tib)\
((void)((tib)->__tib_self = (tib)))
-#elif !defined(__LP64__) && !defined(__i386)
-# define _TIB_PREP(tib)((void)((tib)->__tib_padding = 0))
 #endif
 
 #defineTIB_EXTRA_ALIGN sizeof(void *)
@@ -207,6 +204,7 @@ struct tib {
 
 #defineTIB_INIT(tib, dtv, thread)  do {\
(tib)->tib_thread   = (thread); \
+   (tib)->tib_atexit   = NULL; \
(tib)->tib_locale   = NULL; \
(tib)->tib_cantcancel   = 0;\
(tib)->tib_cancel_point = 0;\
Index: lib/libc/Symbols.list
===
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.61
diff -u -p -r1.61 Symbols.list
--- lib/libc/Symbols.list   4 Nov 2017 22:53:57 -   1.61
+++ lib/libc/Symbols.list   19 Nov 2017 20:59:08 -
@@ -1466,6 +1466,7 @@ random
 /* stdlib */
 _Exit
 __cxa_atexit
+__cxa_thread_atexit
 __cxa_finalize
 __isthreaded
 a64l
Index: lib/libc/include/thread_private.h
===
RCS file: /cvs/src/lib/libc/include/thread_private.h,v
retrieving revision 1.32
diff -u -p -r1.32 thread_private.h
--- lib/libc/include/thread_private.h   4 Nov 2017 22:53:57 -   1.32
+++ lib/libc/include/thread_private.h   19 Nov 2017 20:59:09 -
@@ -394,6 +394,7 @@ void_spinunlock(volatile _atomic_lock_t
 void   _rthread_debug(int, const char *, ...)
__attribute__((__format__ (printf, 2, 3)));
 pid_t  _thread_dofork(pid_t (*_sys_fork)(void));
+void   _thread_finalize(void);
 
 /*
  * Threading syscalls not declared in system headers
Index: lib/libc/stdlib/atexit.c
===
RCS file: /cvs/src/lib/libc/stdlib/atexit.c,v
retrieving revision 1.24
diff -u -p -r1.24 atexit.c
--- lib/libc/stdlib/atexit.c10 Nov 2015 04:14:03 -  1.24
+++ lib/libc/stdlib/atexit.c19 Nov 2017 20:59:09 -
@@ -31,12 +31,26 @@
 
 #include 
 #include 
+#include 
+#ifndef NO_PIC
+#include 
+#pragma weak _DYNAMIC
+#endif
 #include 
 #include 
 #include 
 #include "atexit.h"
 #include "atfork.h"
 #include "thread_private.h"
+#include "tib.h"
+
+typeof(dlctl) dlctl asm("_dlctl") __attribute__((weak));
+
+struct thread_atexit_fn {
+   void (*func)(void *);
+   void *arg;
+   struct thread_atexit_fn *next;
+};
 
 struct atexit *__atexit;
 static int restartloop;
@@ -121,6 +135,43 @@ atexit(void (*fn)(void))
 }
 DEF_STRONG(atexit);
 
+int
+__cxa_thread_atexit(void (*func)(void *), void *arg, void *dso)
+{
+   struct thread_atexit_fn *fnp;
+   struct tib *tib = TIB_GET();
+
+   fnp = calloc(1, sizeof(struct thread_atexit_fn));
+   if (fnp == NULL)
+   return -1;
+
+#ifndef NO_PIC
+   if (_DYNAMIC)
+   dlctl(NULL, DL_REFERENCE, dso);
+#endif
+
+   fnp->func = func;
+   fnp->arg = arg;
+   fnp->next = tib->tib_atexit;
+   tib->tib_atexit = fnp;
+
+   return 0;
+}
+DEF_STRONG(__cxa_thread_atexit);
+
+void

Re: Implement __cxa_thread_atexit

2017-08-11 Thread Joerg Sonnenberger
On Fri, Aug 11, 2017 at 04:31:44PM +0200, Mark Kettenis wrote:
> The diff below implements __cxa_thread_atexit().  Calls to this
> function are emitted by the compiler to schedule running desctructors
> for thread_local objects when a thread terminates or calls exit(3).
> The Linux implementation prevents unloading of shared libraries that
> registered such destructors to prevent things from crashing.  This
> diff does not implement that functionality yet.  I plan to add that
> later.  I expect this to be a bit of a corner case.

Well, if you don't care that much about the corner case, you can
directly use the generic fallback implementation in libc++abi.
Preventing unloading is the least stupid of all options, the rest
basically break one major promise or another.

> I've chosen to implement __cxa_thread_atexit() directly instead of
> __cxa_thread_atexit_impl().  I think that is cleaner.  It means we
> don't need to make changes to libc++ for this to start working.  It
> looks like modern libstdc++ version will detect __cxa_thread_atexit().

Which version did you look at? All cases I have seen check exclusively
for *_impl(). That's why I settled down on making it the official
interface in NetBSD and providing the more natural __cxa_thread_atexit
only as weak alias.

Joerg



Re: Implement __cxa_thread_atexit

2017-08-11 Thread Philip Guenther
On Fri, 11 Aug 2017, Todd C. Miller wrote:
> On Fri, 11 Aug 2017 16:31:44 +0200, Mark Kettenis wrote:
> 
> > I've chosen to implement __cxa_thread_atexit() directly instead of
> > __cxa_thread_atexit_impl().  I think that is cleaner.  It means we
> > don't need to make changes to libc++ for this to start working.  It
> > looks like modern libstdc++ version will detect __cxa_thread_atexit().
> > 
> > This adds a member to the "TIB".  This means that you'll need to
> > install the new headers and rebuild ld.so before rebuilding libpthread
> > and libc.
> 
> Is there a reason you didn't use mmap() and mprotect() like we do
> for __cxa_atexit()?

Good point.  It's per thread so it doesn't need the locking that the 
atexit() bits require.

(Actually, the atfork bits should be converted to that too.  Free 
project!)


Philip



Re: Implement __cxa_thread_atexit

2017-08-11 Thread Todd C. Miller
On Fri, 11 Aug 2017 16:31:44 +0200, Mark Kettenis wrote:

> I've chosen to implement __cxa_thread_atexit() directly instead of
> __cxa_thread_atexit_impl().  I think that is cleaner.  It means we
> don't need to make changes to libc++ for this to start working.  It
> looks like modern libstdc++ version will detect __cxa_thread_atexit().
> 
> This adds a member to the "TIB".  This means that you'll need to
> install the new headers and rebuild ld.so before rebuilding libpthread
> and libc.

Is there a reason you didn't use mmap() and mprotect() like we do
for __cxa_atexit()?

 - todd



Re: Implement __cxa_thread_atexit

2017-08-11 Thread Philip Guenther
On Fri, 11 Aug 2017, Mark Kettenis wrote:
> The diff below implements __cxa_thread_atexit().  Calls to this function 
> are emitted by the compiler to schedule running desctructors for 
> thread_local objects when a thread terminates or calls exit(3). The 
> Linux implementation prevents unloading of shared libraries that 
> registered such destructors to prevent things from crashing.  This diff 
> does not implement that functionality yet.  I plan to add that later.  
> I expect this to be a bit of a corner case.
> 
> I've chosen to implement __cxa_thread_atexit() directly instead of
> __cxa_thread_atexit_impl().  I think that is cleaner.  It means we
> don't need to make changes to libc++ for this to start working.  It
> looks like modern libstdc++ version will detect __cxa_thread_atexit().
>
> This adds a member to the "TIB".  Tis means that you'll need to
> install the new headers and rebuild ld.so before rebuilding libpthread
> and libc.
> 
> This will require a libc minor bump.  But maybe it could ride the
> scheduled major bump?

Either seems fine.  Couple comments below...


> --- include/tib.h 20 Apr 2017 16:07:52 -  1.4
> +++ include/tib.h 11 Aug 2017 14:20:30 -
> @@ -135,6 +135,7 @@
>   */
>  
>  struct tib {
> + void*tib_atexit;
>   int tib_thread_flags;   /* internal to libpthread */
>   pid_t   tib_tid;
>   int tib_cantcancel;
> @@ -182,6 +183,7 @@ struct tib {
>   int tib_cantcancel;
>   pid_t   tib_tid;
>   int tib_thread_flags;   /* internal to libpthread */
> + void*tib_atexit;
>  #if !defined(__LP64__) && !defined(__i386)
>   int __tib_padding;  /* padding for 8byte alignment */
>  #endif

To keep this structure a multiple of 8 in size, the padding conditionals 
need to swap around, so that the extra member is only present on i386.  
HOWEVER, looking again at _dl_tib_allocate(), I already taught it to round 
up the sizes for correct alignment of the structs!  I think __tib_padding 
can go away now.


...
> +DEF_STRONG(__cxa_thread_atexit);
...
> +PROTO_NORMAL(__cxa_thread_atexit);

It's not actually called inside libc, right?  If not, you can delete 
DEF_STRONG() and use PROTO_STD_DEPRECATED() intead of PROTO_NORMAL() (case 
IIb1 in libc/include/README)


Philip



Implement __cxa_thread_atexit

2017-08-11 Thread Mark Kettenis
The diff below implements __cxa_thread_atexit().  Calls to this
function are emitted by the compiler to schedule running desctructors
for thread_local objects when a thread terminates or calls exit(3).
The Linux implementation prevents unloading of shared libraries that
registered such destructors to prevent things from crashing.  This
diff does not implement that functionality yet.  I plan to add that
later.  I expect this to be a bit of a corner case.

I've chosen to implement __cxa_thread_atexit() directly instead of
__cxa_thread_atexit_impl().  I think that is cleaner.  It means we
don't need to make changes to libc++ for this to start working.  It
looks like modern libstdc++ version will detect __cxa_thread_atexit().

This adds a member to the "TIB".  Tis means that you'll need to
install the new headers and rebuild ld.so before rebuilding libpthread
and libc.

This will require a libc minor bump.  But maybe it could ride the
scheduled major bump?


Index: include/tib.h
===
RCS file: /cvs/src/include/tib.h,v
retrieving revision 1.4
diff -u -p -r1.4 tib.h
--- include/tib.h   20 Apr 2017 16:07:52 -  1.4
+++ include/tib.h   11 Aug 2017 14:20:30 -
@@ -135,6 +135,7 @@
  */
 
 struct tib {
+   void*tib_atexit;
int tib_thread_flags;   /* internal to libpthread */
pid_t   tib_tid;
int tib_cantcancel;
@@ -182,6 +183,7 @@ struct tib {
int tib_cantcancel;
pid_t   tib_tid;
int tib_thread_flags;   /* internal to libpthread */
+   void*tib_atexit;
 #if !defined(__LP64__) && !defined(__i386)
int __tib_padding;  /* padding for 8byte alignment */
 #endif
@@ -207,6 +209,7 @@ struct tib {
 
 #defineTIB_INIT(tib, dtv, thread)  do {\
(tib)->tib_thread   = (thread); \
+   (tib)->tib_atexit   = NULL; \
(tib)->tib_locale   = NULL; \
(tib)->tib_cantcancel   = 0;\
(tib)->tib_cancel_point = 0;\
Index: lib/librthread/rthread.c
===
RCS file: /cvs/src/lib/librthread/rthread.c,v
retrieving revision 1.95
diff -u -p -r1.95 rthread.c
--- lib/librthread/rthread.c27 Jul 2017 16:35:08 -  1.95
+++ lib/librthread/rthread.c11 Aug 2017 14:20:30 -
@@ -331,6 +331,12 @@ pthread_exit(void *retval)
oclfn->fn(oclfn->arg);
free(oclfn);
}
+   while (tib->tib_atexit) {
+   struct thread_atexit_fn *fnp = tib->tib_atexit;
+   tib->tib_atexit = fnp->next;
+   fnp->func(fnp->arg);
+   free(fnp);
+   }
_rthread_tls_destructors(thread);
_spinlock(&_thread_lock);
LIST_REMOVE(thread, threads);
Index: lib/libc/Symbols.list
===
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.54
diff -u -p -r1.54 Symbols.list
--- lib/libc/Symbols.list   19 Jun 2017 03:06:26 -  1.54
+++ lib/libc/Symbols.list   11 Aug 2017 14:20:30 -
@@ -1420,6 +1420,7 @@ random
 /* stdlib */
 _Exit
 __cxa_atexit
+__cxa_thread_atexit
 __cxa_finalize
 __isthreaded
 a64l
Index: lib/libc/include/thread_private.h
===
RCS file: /cvs/src/lib/libc/include/thread_private.h,v
retrieving revision 1.29
diff -u -p -r1.29 thread_private.h
--- lib/libc/include/thread_private.h   15 Oct 2016 18:24:40 -  1.29
+++ lib/libc/include/thread_private.h   11 Aug 2017 14:20:31 -
@@ -96,6 +96,12 @@ struct thread_callbacks {
__pid_t (*tc_vfork)(void);
 };
 
+struct thread_atexit_fn {
+   void (*func)(void *);
+   void *arg;
+   struct thread_atexit_fn *next;
+};
+
 __BEGIN_PUBLIC_DECLS
 /*
  *  Set the callbacks used by libc
Index: lib/libc/stdlib/atexit.c
===
RCS file: /cvs/src/lib/libc/stdlib/atexit.c,v
retrieving revision 1.24
diff -u -p -r1.24 atexit.c
--- lib/libc/stdlib/atexit.c10 Nov 2015 04:14:03 -  1.24
+++ lib/libc/stdlib/atexit.c11 Aug 2017 14:20:31 -
@@ -38,6 +38,8 @@
 #include "atfork.h"
 #include "thread_private.h"
 
+#include "tib.h"
+
 struct atexit *__atexit;
 static int restartloop;
 
@@ -121,6 +123,25 @@ atexit(void (*fn)(void))
 }
 DEF_STRONG(atexit);
 
+int
+__cxa_thread_atexit(void (*func)(void *), void *arg, void *dso)
+{
+   struct thread_atexit_fn *fnp;
+   struct tib *tib = TIB_GET();
+
+   fnp = calloc(1, sizeof(struct thread_atexit_fn));
+   if (fnp == NULL)
+   return -1;
+
+   fnp->func = func;
+   fnp->arg = arg;
+   fnp->next = ti