I'm running into the same problem and with one of my systems it's
seems like 10% of the terminals hang this way.

Here's the fix, applied against rxvt_2.7.10-7.dsc
There are two similar problems, both involving deadlocks with a mutex.

seteuid() gets a mutex, is interrupted with SIGCHLD signal, calls the
same exit path and seteuid() blocks waiting formutex

malloc gets a mutex, is interrupted with SIGCHLD signal,
rxvt_clean_exit calls XDestroyIC(), and free() blocks waiting for
mutex

-- 
David Fries <da...@fries.net>
>From 39c8effc203f939146658cee5d93cb8a0936244a Mon Sep 17 00:00:00 2001
From: David Fries <da...@fries.net>
Date: Wed, 26 Sep 2018 21:05:15 -0500
Subject: [PATCH 1/2] fix deadlock in recursive seteuid calls

rxvt is a signal threaded program, but signals are causing mutex
deadlocks.

When the command exits two things happen, rxvt will receive a read
error from the tty and it will receive a SIGCHLD signal at nearly the
same time.  If the read error happens first, rxvt_cmd_getc() was calling
rxvt_clean_exit() directly, which calls rxvt_privileged_ttydev/utmp() which
calls seteuid() which must internally get some kind of mutex.
If in seteuid() the SIGCHLD is delivered, it will also call exit() and
rxvt_clean_exit() and on to seteuid() and deadlock waiting for the
mutex.

That sequence is not theoretical, at least two people have filed bug
reports and one of my systems sees 10% of rxvts hang like this.  It
seems the best option is to disable signals both before calling
rxvt_clean_exit()/exit() in rxvt_cmd_getc() and also in
rxvt_clean_exit() just in case two signals might be delivered back to
back.  Given exit() calls rxvt_clean_exit() there's no need for
rxvt_cmd_getc() to call it before exit and run through it twice
(provided registered with onexit() earlier).
---
 src/command.c   | 18 ++++++++++++++++++
 src/init.c      | 17 +++++++++++++++++
 src/init.intpro |  1 +
 src/main.c      | 11 +++++++++++
 4 files changed, 47 insertions(+)

diff --git a/src/command.c b/src/command.c
index 976d087..d559050 100644
--- a/src/command.c
+++ b/src/command.c
@@ -46,6 +46,7 @@
 /*{{{ includes: */
 #include "../config.h"		/* NECESSARY */
 #include "rxvt.h"		/* NECESSARY */
+#include "init.intpro"		/* PROTOS for internal routines */
 #include "version.h"
 #include "command.h"
 
@@ -751,7 +752,24 @@ rxvt_cmd_getc(rxvt_t *r)
 		else if (n == 0 || (n < 0 && errno == EAGAIN))
 		    break;
 		else {
+		    /*
+		     * One likely reason for a read error is the child just
+		     * exited.  If that's the case it's just a matter of time
+		     * before SIGCHLD handler runs, which also runs exit(),
+		     * and if that signal is called in the middle of a call
+		     * to seteuid/setegid the process can dead lock, because
+		     * there is an internal mutex, which is then locked the
+		     * second time through.  Disable signals before calling
+		     * exit.  If the signal goes off before it is disabled,
+		     * it calls exit() and control never returns here, if the
+		     * signal is disabled first exit will run without
+		     * interruption.
+		     */
+		    rxvt_disable_signals();
+#if !defined (HAVE_ATEXIT) && !defined (HAVE_ON_EXIT)
+		    /* will be called from exit if registered */
 		    rxvt_clean_exit();
+#endif
 		    exit(EXIT_FAILURE);	/* bad order of events? */
 		}
 	    if (count != BUFSIZ)	/* some characters read in */
diff --git a/src/init.c b/src/init.c
index 65f4d70..9a2a3b3 100644
--- a/src/init.c
+++ b/src/init.c
@@ -1382,6 +1382,23 @@ rxvt_run_command(rxvt_t *r, const char *const *argv)
     return cfd;
 }
 
+/*----------------------------------------------------------------------*/
+/*
+ * Disable signals for use during the exit path.  Combines them all in one
+ * place.
+ */
+/* INTPROTO */
+void
+rxvt_disable_signals(void)
+{
+#ifndef __svr4__
+    signal(SIGINT, SIG_IGN);
+#endif
+    signal(SIGQUIT, SIG_IGN);
+    signal(SIGTERM, SIG_IGN);
+    signal(SIGCHLD, SIG_IGN);
+}
+
 /* ------------------------------------------------------------------------- *
  *                          CHILD PROCESS OPERATIONS                         *
  * ------------------------------------------------------------------------- */
diff --git a/src/init.intpro b/src/init.intpro
index 8759272..8e9c983 100644
--- a/src/init.intpro
+++ b/src/init.intpro
@@ -3,5 +3,6 @@ void             rxvt_Get_Colours                 __PROTO((rxvt_t *r));
 void             rxvt_color_aliases               __PROTO((rxvt_t *r, int idx));
 void             rxvt_get_ourmods                 __PROTO((rxvt_t *r));
 int              rxvt_run_command                 __PROTO((rxvt_t *r, const char *const *argv));
+void             rxvt_disable_signals             __PROTO((void));
 int              rxvt_run_child                   __PROTO((rxvt_t *r, const char *const *argv));
 void             rxvt_get_ttymode                 __PROTO((ttymode_t *tio));
diff --git a/src/main.c b/src/main.c
index 193540d..7ae3c0a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -33,6 +33,7 @@
 #include "../config.h"		/* NECESSARY */
 #include "rxvt.h"		/* NECESSARY */
 #include "main.intpro"		/* PROTOS for internal routines */
+#include "init.intpro"		/* PROTOS for internal routines */
 
 #include <signal.h>
 
@@ -189,6 +190,16 @@ rxvt_clean_exit(void)
 {
     rxvt_t         *r = rxvt_get_r();
 
+    /* Protect against multiple signals calling exit.  seteuid/setegid has
+     * an internal mutex and if another signal goes off with that lock
+     * held it will deadlock if this routine is called again.  This is
+     * defensive programming, the problem observed is from a read error in
+     * command.c and SIGCHLD signal both calling here.  It is not defined
+     * what happens when exit() is called more than once, best to disable
+     * other signals early.
+     */
+    rxvt_disable_signals();
+
 #ifdef DEBUG_SCREEN
     rxvt_scr_release(r);
 #endif
-- 
2.11.0


>From b06b9d72c23d7b369d65f9872cdc665a7182c2a9 Mon Sep 17 00:00:00 2001
From: David Fries <da...@fries.net>
Date: Wed, 26 Sep 2018 22:46:18 -0500
Subject: [PATCH 2/2] malloc/_int_free deadlock

don't call exit handlers from a signal, too risky, just do the
minimal tty/utmp cleanup needed.

test with some variation of the following
i=0; while [ $i -lt 1000 ] ; do rxvt -e bash -c 'sleep .1; exit 1'& sleep .02;  i=$(($i+1));  done

 #0  __lll_lock_wait_private ()
     at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
 #1  0x00007f52c9a00bd0 in _int_free (av=0x7f52c9d22b00 <main_arena>,
     p=0x56039f22bbd0, have_lock=0) at malloc.c:4000
 #2  0x00007f52c9d91e2b in ?? () from /usr/lib/x86_64-linux-gnu/libX11.so.6
 #3  0x00007f52c9d77b82 in XDestroyIC ()
    from /usr/lib/x86_64-linux-gnu/libX11.so.6
 #4  0x000056039d161340 in rxvt_clean_exit () at main.c:216
 #5  0x000056039d16120c in rxvt_Child_signal (sig=17) at main.c:142
 #6  <signal handler called>
 #7  0x00007f52c9a01cf0 in _int_malloc (
     av=av@entry=0x7f52c9d22b00 <main_arena>, bytes=bytes@entry=36)
     at malloc.c:3405
 #8  0x00007f52c9a03f64 in __GI___libc_malloc (bytes=36) at malloc.c:2928
 #9  0x00007f52c976f6c3 in ?? () from /usr/lib/x86_64-linux-gnu/libxcb.so.1
 #10 0x00007f52c976d44c in ?? () from /usr/lib/x86_64-linux-gnu/libxcb.so.1
 #11 0x00007f52c976ec0f in ?? () from /usr/lib/x86_64-linux-gnu/libxcb.so.1
 #12 0x00007f52c976ed81 in xcb_wait_for_reply64 ()
    from /usr/lib/x86_64-linux-gnu/libxcb.so.1
 #13 0x00007f52c9d6b018 in _XReply () from /usr/lib/x86_64-linux-gnu/libX11.so.6
 #14 0x00007f52c9d55e2f in XInternAtom ()
    from /usr/lib/x86_64-linux-gnu/libX11.so.6
 #15 0x00007f52c9d90d4f in ?? () from /usr/lib/x86_64-linux-gnu/libX11.so.6
 #16 0x000056039d15a7fb in rxvt_cmd_getc (r=0x56039f1e5010) at command.c:683
 #17 0x000056039d15e193 in rxvt_main_loop (r=0x56039f1e5010) at command.c:3049
 #18 0x000056039d159b6e in main (argc=5, argv=0x7ffd67d5efa8) at rxvt.c:13
---
 src/main.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/main.c b/src/main.c
index 7ae3c0a..cfcd55e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -122,6 +122,21 @@ rxvt_init(int argc, const char *const *argv)
 /* ------------------------------------------------------------------------- *
  *                       SIGNAL HANDLING & EXIT HANDLER                      *
  * ------------------------------------------------------------------------- */
+
+/*
+ * Exit gracefully, clearing the utmp entry and restoring tty attributes
+ * If called from the signal handler this can't allocate or deallocate memory,
+ * because malloc or free could have locked a mutex, and calling them again
+ * will deadlock.
+ */
+static void
+rxvt_minimal_exit(void)
+{
+    rxvt_t         *r = rxvt_get_r();
+    rxvt_privileged_ttydev(r, RESTORE);
+    rxvt_privileged_utmp(r, RESTORE);
+}
+
 /*
  * Catch a SIGCHLD signal and exit if the direct child has died
  */
@@ -138,8 +153,14 @@ rxvt_Child_signal(int sig __attribute__((unused)))
     } while ((pid = waitpid(-1, NULL, WNOHANG)) == -1 && errno == EINTR);
 
     r = rxvt_get_r();
-    if (pid == r->h->cmd_pid)
-	exit(EXIT_SUCCESS);
+    if (pid == r->h->cmd_pid) {
+	rxvt_minimal_exit();
+	    /* Get a signal at the right (or wrong) time and XDestroyIC()
+	     * called from atexit() can deadlock.  The above is the important
+	     * clean up rxvt needed done, bail without running exit handlers.
+	     */
+	_exit(EXIT_SUCCESS);
+    }
 
     errno = save_errno;
     signal(SIGCHLD, rxvt_Child_signal);
@@ -156,7 +177,7 @@ rxvt_Exit_signal(int sig)
 #ifdef DEBUG_CMD
     rxvt_print_error("signal %d", sig);
 #endif
-    rxvt_clean_exit();
+    rxvt_minimal_exit();
     kill(getpid(), sig);
 }
 
@@ -203,8 +224,7 @@ rxvt_clean_exit(void)
 #ifdef DEBUG_SCREEN
     rxvt_scr_release(r);
 #endif
-    rxvt_privileged_ttydev(r, RESTORE);
-    rxvt_privileged_utmp(r, RESTORE);
+    rxvt_minimal_exit();
 #ifdef USE_XIM
     if (r->h->Input_Context != NULL) {
 	XDestroyIC(r->h->Input_Context);
-- 
2.11.0

Reply via email to