The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a2cfe535771ded3ca8526bae405a5b61f71f1f33

commit a2cfe535771ded3ca8526bae405a5b61f71f1f33
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2026-06-05 20:21:59 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2026-06-08 20:14:20 +0000

    exit1(9): do not deadlock if exit is called due to PT_SC_REMOTERQ
    
    The remote syscall is executed in the context where debugger owns a
    p_lock hold on the target.  Due to this, exit1() waiting for p_lock
    going to zero, never happen.
    
    Postpone the exit1() call to ast then, saving the provided rval and
    signo in the struct proc.  Mark the async-exiting proc with the new
    p_flag P_ASYNC_EXIT.
    
    While p_xexit can be reused, p_xsig can be only set by actual exit1(),
    otherwise it breaks the ptrace mechanism. Allocate a dedicated p_asig
    for it.
    
    Reviewed by:    markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D57482
---
 sys/compat/linux/linux_fork.c |  4 +--
 sys/compat/linux/linux_misc.c |  4 +--
 sys/kern/kern_exec.c          | 13 +++++-----
 sys/kern/kern_exit.c          | 57 ++++++++++++++++++++++++++++++++++++++++---
 sys/kern/kern_fork.c          |  3 ++-
 sys/kern/kern_ucoredump.c     |  4 +--
 sys/sys/proc.h                |  4 ++-
 sys/sys/signalvar.h           |  2 +-
 sys/sys/syscallsubr.h         |  1 +
 9 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index 1c9189162a09..24c5d3351623 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c
@@ -486,8 +486,8 @@ linux_exit(struct thread *td, struct linux_exit_args *args)
         * exit via pthread_exit() try thr_exit() first.
         */
        kern_thr_exit(td);
-       exit1(td, args->rval, 0);
-               /* NOTREACHED */
+       kern_exit(td, args->rval, 0);
+       return (0);
 }
 
 int
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index eafba4f4bd19..c863e1db8b02 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -1545,8 +1545,8 @@ linux_exit_group(struct thread *td, struct 
linux_exit_group_args *args)
         * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?)
         * as it doesnt occur often.
         */
-       exit1(td, args->error_code, 0);
-               /* NOTREACHED */
+       kern_exit(td, args->error_code, 0);
+       return (0);
 }
 
 #define _LINUX_CAPABILITY_VERSION_1  0x19980330
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 4066682cbcc5..8ea00543989e 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -341,11 +341,11 @@ post_execve(struct thread *td, int error, struct vmspace 
*oldvmspace)
 }
 
 /*
- * kern_execve() has the astonishing property of not always returning to
- * the caller.  If sufficiently bad things happen during the call to
- * do_execve(), it can end up calling exit1(); as a result, callers must
- * avoid doing anything which they might need to undo (e.g., allocating
- * memory).
+ * kern_execve() has the astonishing property of not always returning
+ * to the caller.  If sufficiently bad things happen during the call
+ * to do_execve(), it can end up calling exit2(). Callers must avoid
+ * doing anything which they might need to undo (e.g., allocating
+ * memory), unless called from the ptrace(PT_SC_REMOTERQ) handler.
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
@@ -1042,8 +1042,7 @@ exec_fail:
        if (error && imgp->vmspace_destroyed) {
                /* sorry, no more process anymore. exit gracefully */
                exec_cleanup(td, oldvmspace);
-               exit1(td, 0, SIGABRT);
-               /* NOT REACHED */
+               kern_exit(td, 0, SIGABRT);
        }
 
 #ifdef KTRACE
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 18ea3a7bd29d..63e46dcf46f7 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -204,9 +204,8 @@ exit_onexit(struct proc *p)
 int
 sys__exit(struct thread *td, struct _exit_args *uap)
 {
-
-       exit1(td, uap->rval, 0);
-       __unreachable();
+       kern_exit(td, uap->rval, 0);
+       return (0);
 }
 
 void
@@ -216,6 +215,48 @@ proc_set_p2_wexit(struct proc *p)
        p->p_flag2 |= P2_WEXIT;
 }
 
+static void
+ast_async_exit(struct thread *td, int asts)
+{
+       struct proc *p;
+
+       p = td->td_proc;
+       if ((p->p_flag & P_ASYNC_EXIT) != 0)
+               exit1(td, p->p_xexit, p->p_asig);
+}
+
+/*
+ * The variation on exit1() intended to be used in the syscall
+ * handlers.  Unlike exit1(), it might delay the current process exit
+ * to ast.  This is needed e.g. when _exit(2) is executed due to the
+ * ptrace(PT_SC_REMOTERQ), which must do more work after the syscall
+ * handler call.
+ */
+void
+kern_exit(struct thread *td, int rval, int signo)
+{
+       struct proc *p;
+
+       KASSERT(rval == 0 || signo == 0,
+           ("kern_exit rv %d sig %d", rval, signo));
+
+       p = td->td_proc;
+       if ((td->td_dbgflags & TDB_SCREMOTEREQ) != 0) {
+               PROC_LOCK(p);
+               p->p_xexit = rval;
+               p->p_asig = signo;
+               p->p_flag |= P_ASYNC_EXIT;
+               ast_sched(td, TDA_ASYNC_EXIT);
+               PROC_UNLOCK(p);
+               return;
+       }
+       if ((p->p_flag & P_ASYNC_EXIT) != 0) {
+               rval = p->p_xexit;
+               signo = p->p_asig;
+       }
+       exit1(td, rval, signo);
+}
+
 /*
  * Exit: deallocate address space and other resources, change proc state to
  * zombie, and unlink proc from allproc and parent's lists.  Save exit status
@@ -231,6 +272,7 @@ exit1(struct thread *td, int rval, int signo)
 
        mtx_assert(&Giant, MA_NOTOWNED);
        KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
+       MPASS((td->td_dbgflags & TDB_SCREMOTEREQ) == 0);
        TSPROCEXIT(td->td_proc->p_pid);
 
        p = td->td_proc;
@@ -828,7 +870,7 @@ out:
        sbuf_delete(sb);
        PROC_LOCK(p);
        sigexit(td, sig);
-       /* NOTREACHED */
+       return (0);
 }
 
 #ifdef COMPAT_43
@@ -1627,3 +1669,10 @@ proc_reparent(struct proc *child, struct proc *parent, 
bool set_oppid)
        if (set_oppid)
                child->p_oppid = parent->p_pid;
 }
+
+static void
+initexit(void *dummy __unused)
+{
+       ast_register(TDA_ASYNC_EXIT, ASTR_ASTF_REQUIRED, 0, ast_async_exit);
+}
+SYSINIT(exit, SI_SUB_EXEC, SI_ORDER_ANY, initexit, NULL);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 75f8413e5f36..2fb4d9d4274d 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -66,6 +66,7 @@
 #include <sys/signalvar.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
@@ -1258,7 +1259,7 @@ fork_return(struct thread *td, struct trapframe *frame)
         * If the prison was killed mid-fork, die along with it.
         */
        if (!prison_isalive(td->td_ucred->cr_prison))
-               exit1(td, 0, SIGKILL);
+               kern_exit(td, 0, SIGKILL);
 
 #ifdef KTRACE
        if (KTRPOINT(td, KTR_SYSRET))
diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c
index d425596b5f24..e08490fbf7b1 100644
--- a/sys/kern/kern_ucoredump.c
+++ b/sys/kern/kern_ucoredump.c
@@ -46,6 +46,7 @@
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rmlock.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/ucoredump.h>
@@ -197,8 +198,7 @@ sigexit(struct thread *td, int sig)
                            err != NULL ? err : "");
        } else
                PROC_UNLOCK(p);
-       exit1(td, 0, sig);
-       /* NOTREACHED */
+       kern_exit(td, 0, sig);
 }
 
 
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index ed69a09422e2..5f017e6ece2c 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -504,6 +504,7 @@ enum {
        TDA_MOD3,               /* .. and after */
        TDA_MOD4,
        TDA_SCHED_PRIV,
+       TDA_ASYNC_EXIT,
        TDA_MAX,
 };
 #define        TDAI(tda)               (1U << (tda))
@@ -777,6 +778,7 @@ struct proc {
 
        TAILQ_HEAD(, kq_timer_cb_data)  p_kqtim_stop;   /* (c) */
        LIST_ENTRY(proc) p_jaillist;    /* (d) Jail process linkage. */
+       u_int           p_asig;         /* (c) ASYNCEXIT pending signal. */
 };
 
 #define        p_session       p_pgrp->pg_session
@@ -842,7 +844,7 @@ struct proc {
 #define        P_INEXEC        0x04000000      /* Process is in execve(). */
 #define        P_STATCHILD     0x08000000      /* Child process stopped or 
exited. */
 #define        P_INMEM         0x10000000      /* Loaded into memory, always 
set. */
-#define        P_UNUSED1       0x20000000      /* --available-- */
+#define        P_ASYNC_EXIT    0x20000000      /* XXX */
 #define        P_UNUSED2       0x40000000      /* --available-- */
 #define        P_PPTRACE       0x80000000      /* PT_TRACEME by vforked child. 
*/
 
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 9a4009d269af..c7b3b620a459 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -399,7 +399,7 @@ int sigacts_shared(struct sigacts *ps);
 int    sig_ast_checksusp(struct thread *td);
 int    sig_ast_needsigchk(struct thread *td);
 void   sig_drop_caught(struct proc *p);
-void   sigexit(struct thread *td, int sig) __dead2;
+void   sigexit(struct thread *td, int sig);
 int    sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
 void   sigfastblock_clear(struct thread *td);
 void   sigfastblock_fetch(struct thread *td);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 8d546428820e..0eb471cc9dde 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -136,6 +136,7 @@ int kern_cpuset_setid(struct thread *td, cpuwhich_t which,
 int    kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
 int    kern_execve(struct thread *td, struct image_args *args,
            struct mac *mac_p, struct vmspace *oldvmspace);
+void   kern_exit(struct thread *, int, int);
 int    kern_extattr_delete_fd(struct thread *td, int fd, int attrnamespace,
            const char *attrname);
 int    kern_extattr_delete_path(struct thread *td, const char *path,

Reply via email to