Re: hang in dlclose() on rtld lock (powerpc64)

2017-03-09 Thread Justin Hibbits
On Thursday, March 9, 2017, Konstantin Belousov  wrote:

> On Thu, Mar 09, 2017 at 09:59:00AM -0600, Justin Hibbits wrote:
> > When building ports in poudriere, I see gdk-pixbuf-query-modules and
> > gio-querymodules hanging on r314676, but working in r305820.  I took a
> > backtrace on both in gdb, and see the following (identical between both):
> >
> > Program received signal SIGINT, Interrupt.
> > 0x506831d8 in .__sys.umtx_op () from /lib/libc.so.7
> > (gdb) bt
> > #0  0x506831d8 in .__sys.umtx_op () from /lib/libc.so.7
> > #1  0x50588010 in _umtx_op_err (obj=0x4, op=13, val=0, uaddr=0x0,
> > uaddr2=0x0)
> > at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.c:37
> > #2  0x505881b8 in __thr_rwlock_wrlock (rwlock=,
> > tsp=)
> > at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.c:325
> > #3  0x505965f0 in _thr_rwlock_wrlock (tsp=,
> > rwlock=)
> > at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.h:239
> > #4  _thr_rtld_wlock_acquire (lock=0x505bdd00)
> > at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_rtld.c:141
> > #5  0x50026bf4 in wlock_acquire (lock=0x5004cf20 ,
> > lockstate=0xcab0)
> > at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld_lock.c:222
> > #6  0x50022b1c in dlclose (handle=0x51d62000)
> > at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:3021
> > #7  0x50022c90 in free_needed_filtees (n=0x509f7420)
> > at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:2113
> > #8  0x50022d18 in unload_filtees (obj=0x509fa800)
> > at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:2129
> > #9  0x50022e54 in unload_object (root=)
> > at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:4464
> > #10 0x50022c20 in dlclose (handle=0x50054000)
> > at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:3044
> > ---Type  to continue, or q  to quit---q
> >
> >
> > This happens on powerpc64.  I haven't tested on powerpc or any other
> arch.
>
> Please test the following patch.  It avoids recursing on the bind lock.
>
> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
> index a7c61b2d13f..880cf100c45 100644
> --- a/libexec/rtld-elf/rtld.c
> +++ b/libexec/rtld-elf/rtld.c
> @@ -77,6 +77,7 @@ static void digest_dynamic2(Obj_Entry *, const Elf_Dyn
> *, const Elf_Dyn *,
>  static void digest_dynamic(Obj_Entry *, int);
>  static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char
> *);
>  static Obj_Entry *dlcheck(void *);
> +static int dlclose_locked(void *, RtldLockState *);
>  static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry
> *refobj,
>  int lo_flags, int mode, RtldLockState *lockstate);
>  static Obj_Entry *do_load_object(int, const char *, char *, struct stat
> *, int);
> @@ -98,7 +99,7 @@ static void initlist_add_objects(Obj_Entry *, Obj_Entry
> *, Objlist *);
>  static void linkmap_add(Obj_Entry *);
>  static void linkmap_delete(Obj_Entry *);
>  static void load_filtees(Obj_Entry *, int flags, RtldLockState *);
> -static void unload_filtees(Obj_Entry *);
> +static void unload_filtees(Obj_Entry *, RtldLockState *);
>  static int load_needed_objects(Obj_Entry *, int);
>  static int load_preload_objects(void);
>  static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *,
> int);
> @@ -142,7 +143,7 @@ static int symlook_obj1_sysv(SymLook *, const
> Obj_Entry *);
>  static int symlook_obj1_gnu(SymLook *, const Obj_Entry *);
>  static void trace_loaded_objects(Obj_Entry *);
>  static void unlink_object(Obj_Entry *);
> -static void unload_object(Obj_Entry *);
> +static void unload_object(Obj_Entry *, RtldLockState *lockstate);
>  static void unref_dag(Obj_Entry *);
>  static void ref_dag(Obj_Entry *);
>  static char *origin_subst_one(Obj_Entry *, char *, const char *,
> @@ -2104,13 +2105,13 @@ initlist_add_objects(Obj_Entry *obj, Obj_Entry
> *tail, Objlist *list)
>  #endif
>
>  static void
> -free_needed_filtees(Needed_Entry *n)
> +free_needed_filtees(Needed_Entry *n, RtldLockState *lockstate)
>  {
>  Needed_Entry *needed, *needed1;
>
>  for (needed = n; needed != NULL; needed = needed->next) {
> if (needed->obj != NULL) {
> -   dlclose(needed->obj);
> +   dlclose_locked(needed->obj, lockstate);
> needed->obj = NULL;
> }
>  }
> @@ -2121,14 +2122,14 @@ free_needed_filtees(Needed_Entry *n)
>  }
>
>  static void
> -unload_filtees(Obj_Entry *obj)
> +unload_filtees(Obj_Entry *obj, RtldLockState *lockstate)
>  {
>
> -free_needed_filtees(obj->needed_filtees);
> -obj->needed_filtees = NULL;
> -free_needed_filtees(obj->needed_aux_filtees);
> -obj->needed_aux_filtees = NULL;
> -obj->filtees_loaded = false;
> +   free_needed_filtees(obj->needed_filtees, lockstate);
> +   obj->needed_filtees = NULL;
> +   free_needed_filtees(obj->needed_aux_filtees, lockstate);
> +

Re: hang in dlclose() on rtld lock (powerpc64)

2017-03-09 Thread Konstantin Belousov
On Thu, Mar 09, 2017 at 09:59:00AM -0600, Justin Hibbits wrote:
> When building ports in poudriere, I see gdk-pixbuf-query-modules and
> gio-querymodules hanging on r314676, but working in r305820.  I took a
> backtrace on both in gdb, and see the following (identical between both):
> 
> Program received signal SIGINT, Interrupt.
> 0x506831d8 in .__sys.umtx_op () from /lib/libc.so.7
> (gdb) bt
> #0  0x506831d8 in .__sys.umtx_op () from /lib/libc.so.7
> #1  0x50588010 in _umtx_op_err (obj=0x4, op=13, val=0, uaddr=0x0,
> uaddr2=0x0)
> at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.c:37
> #2  0x505881b8 in __thr_rwlock_wrlock (rwlock=,
> tsp=)
> at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.c:325
> #3  0x505965f0 in _thr_rwlock_wrlock (tsp=,
> rwlock=)
> at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.h:239
> #4  _thr_rtld_wlock_acquire (lock=0x505bdd00)
> at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_rtld.c:141
> #5  0x50026bf4 in wlock_acquire (lock=0x5004cf20 ,
> lockstate=0xcab0)
> at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld_lock.c:222
> #6  0x50022b1c in dlclose (handle=0x51d62000)
> at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:3021
> #7  0x50022c90 in free_needed_filtees (n=0x509f7420)
> at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:2113
> #8  0x50022d18 in unload_filtees (obj=0x509fa800)
> at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:2129
> #9  0x50022e54 in unload_object (root=)
> at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:4464
> #10 0x50022c20 in dlclose (handle=0x50054000)
> at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:3044
> ---Type  to continue, or q  to quit---q
> 
> 
> This happens on powerpc64.  I haven't tested on powerpc or any other arch.

Please test the following patch.  It avoids recursing on the bind lock.

diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
index a7c61b2d13f..880cf100c45 100644
--- a/libexec/rtld-elf/rtld.c
+++ b/libexec/rtld-elf/rtld.c
@@ -77,6 +77,7 @@ static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, 
const Elf_Dyn *,
 static void digest_dynamic(Obj_Entry *, int);
 static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *);
 static Obj_Entry *dlcheck(void *);
+static int dlclose_locked(void *, RtldLockState *);
 static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj,
 int lo_flags, int mode, RtldLockState *lockstate);
 static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, 
int);
@@ -98,7 +99,7 @@ static void initlist_add_objects(Obj_Entry *, Obj_Entry *, 
Objlist *);
 static void linkmap_add(Obj_Entry *);
 static void linkmap_delete(Obj_Entry *);
 static void load_filtees(Obj_Entry *, int flags, RtldLockState *);
-static void unload_filtees(Obj_Entry *);
+static void unload_filtees(Obj_Entry *, RtldLockState *);
 static int load_needed_objects(Obj_Entry *, int);
 static int load_preload_objects(void);
 static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *, int);
@@ -142,7 +143,7 @@ static int symlook_obj1_sysv(SymLook *, const Obj_Entry *);
 static int symlook_obj1_gnu(SymLook *, const Obj_Entry *);
 static void trace_loaded_objects(Obj_Entry *);
 static void unlink_object(Obj_Entry *);
-static void unload_object(Obj_Entry *);
+static void unload_object(Obj_Entry *, RtldLockState *lockstate);
 static void unref_dag(Obj_Entry *);
 static void ref_dag(Obj_Entry *);
 static char *origin_subst_one(Obj_Entry *, char *, const char *,
@@ -2104,13 +2105,13 @@ initlist_add_objects(Obj_Entry *obj, Obj_Entry *tail, 
Objlist *list)
 #endif
 
 static void
-free_needed_filtees(Needed_Entry *n)
+free_needed_filtees(Needed_Entry *n, RtldLockState *lockstate)
 {
 Needed_Entry *needed, *needed1;
 
 for (needed = n; needed != NULL; needed = needed->next) {
if (needed->obj != NULL) {
-   dlclose(needed->obj);
+   dlclose_locked(needed->obj, lockstate);
needed->obj = NULL;
}
 }
@@ -2121,14 +2122,14 @@ free_needed_filtees(Needed_Entry *n)
 }
 
 static void
-unload_filtees(Obj_Entry *obj)
+unload_filtees(Obj_Entry *obj, RtldLockState *lockstate)
 {
 
-free_needed_filtees(obj->needed_filtees);
-obj->needed_filtees = NULL;
-free_needed_filtees(obj->needed_aux_filtees);
-obj->needed_aux_filtees = NULL;
-obj->filtees_loaded = false;
+   free_needed_filtees(obj->needed_filtees, lockstate);
+   obj->needed_filtees = NULL;
+   free_needed_filtees(obj->needed_aux_filtees, lockstate);
+   obj->needed_aux_filtees = NULL;
+   obj->filtees_loaded = false;
 }
 
 static void
@@ -3015,15 +3016,23 @@ search_library_pathfds(const char *name, const char 
*path, int *fdp)
 int
 dlclose(void *handle)
 {
+   RtldLockState lockstate;
+   int 

hang in dlclose() on rtld lock (powerpc64)

2017-03-09 Thread Justin Hibbits
When building ports in poudriere, I see gdk-pixbuf-query-modules and
gio-querymodules hanging on r314676, but working in r305820.  I took a
backtrace on both in gdb, and see the following (identical between both):

Program received signal SIGINT, Interrupt.
0x506831d8 in .__sys.umtx_op () from /lib/libc.so.7
(gdb) bt
#0  0x506831d8 in .__sys.umtx_op () from /lib/libc.so.7
#1  0x50588010 in _umtx_op_err (obj=0x4, op=13, val=0, uaddr=0x0,
uaddr2=0x0)
at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.c:37
#2  0x505881b8 in __thr_rwlock_wrlock (rwlock=,
tsp=)
at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.c:325
#3  0x505965f0 in _thr_rwlock_wrlock (tsp=,
rwlock=)
at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_umtx.h:239
#4  _thr_rtld_wlock_acquire (lock=0x505bdd00)
at /home/chmeee/freebsd/pristine/lib/libthr/thread/thr_rtld.c:141
#5  0x50026bf4 in wlock_acquire (lock=0x5004cf20 ,
lockstate=0xcab0)
at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld_lock.c:222
#6  0x50022b1c in dlclose (handle=0x51d62000)
at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:3021
#7  0x50022c90 in free_needed_filtees (n=0x509f7420)
at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:2113
#8  0x50022d18 in unload_filtees (obj=0x509fa800)
at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:2129
#9  0x50022e54 in unload_object (root=)
at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:4464
#10 0x50022c20 in dlclose (handle=0x50054000)
at /home/chmeee/freebsd/pristine/libexec/rtld-elf/rtld.c:3044
---Type  to continue, or q  to quit---q


This happens on powerpc64.  I haven't tested on powerpc or any other arch.

- Justin
___
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"