The branch stable/15 has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f15e54924ccbf405972eeacd3c2ad65df29c4a8d

commit f15e54924ccbf405972eeacd3c2ad65df29c4a8d
Author:     Mark Johnston <[email protected]>
AuthorDate: 2025-11-16 15:49:39 +0000
Commit:     Mark Johnston <[email protected]>
CommitDate: 2025-12-01 16:37:57 +0000

    file: Add a fdclose method
    
    Consider a program that creates a unix socket pair, transmits both
    sockets from one to the other using an SCM_RIGHTS message, and then
    closes both sockets without externalizing the message.  unp_gc() is
    supposed to handle cleanup, but it is only triggered by uipc_detach(),
    which runs when a unix socket is destroyed.  Because the two sockets are
    internalized, their refcounts are positive, so uipc_detach() isn't
    called.
    
    As a result, a userspace program can create an unbounded amount of
    garbage without triggering reclaim.  Let's trigger garbage collection
    whenever a unix socket is close()d.  To implement this, add new a
    fdclose file op and protocol op, and implement them accordingly.  Since
    mqueuefs has a hack to hook into the file close path, convert it to use
    the new op as well.
    
    Now, userspace can't create garbage without triggering reclamation.
    
    Reviewed by:    glebius, kib
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D53744
    
    (cherry picked from commit 1390bba42caf53a00fa370f3844cd7b3725ed4ec)
---
 sys/kern/kern_descrip.c |  8 ++-----
 sys/kern/sys_socket.c   | 12 ++++++++++
 sys/kern/uipc_mqueue.c  | 59 +++++++++++++++++++++++--------------------------
 sys/kern/uipc_socket.c  |  3 ++-
 sys/kern/uipc_usrreq.c  | 27 ++++++++++++++++++----
 sys/sys/file.h          |  2 ++
 sys/sys/mqueue.h        |  5 -----
 sys/sys/protosw.h       |  7 ++++--
 8 files changed, 74 insertions(+), 49 deletions(-)

diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 58b048d780f1..ebc82b8dc427 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -194,7 +194,6 @@ struct filedesc0 {
  */
 static int __exclusive_cache_line openfiles; /* actual number of open files */
 struct mtx sigio_lock;         /* mtx to protect pointers to sigio */
-void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
 
 /*
  * If low >= size, just return low. Otherwise find the first zero bit in the
@@ -1404,11 +1403,8 @@ closefp_impl(struct filedesc *fdp, int fd, struct file 
*fp, struct thread *td,
        if (__predict_false(!TAILQ_EMPTY(&fdp->fd_kqlist)))
                knote_fdclose(td, fd);
 
-       /*
-        * We need to notify mqueue if the object is of type mqueue.
-        */
-       if (__predict_false(fp->f_type == DTYPE_MQUEUE))
-               mq_fdclose(td, fd, fp);
+       if (fp->f_ops->fo_fdclose != NULL)
+               fp->f_ops->fo_fdclose(fp, fd, td);
        FILEDESC_XUNLOCK(fdp);
 
 #ifdef AUDIT
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index c221106ae067..a326b9935290 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -90,6 +90,7 @@ static fo_poll_t soo_poll;
 static fo_kqfilter_t soo_kqfilter;
 static fo_stat_t soo_stat;
 static fo_close_t soo_close;
+static fo_fdclose_t soo_fdclose;
 static fo_chmod_t soo_chmod;
 static fo_fill_kinfo_t soo_fill_kinfo;
 static fo_aio_queue_t soo_aio_queue;
@@ -105,6 +106,7 @@ const struct fileops socketops = {
        .fo_kqfilter = soo_kqfilter,
        .fo_stat = soo_stat,
        .fo_close = soo_close,
+       .fo_fdclose = soo_fdclose,
        .fo_chmod = soo_chmod,
        .fo_chown = invfo_chown,
        .fo_sendfile = invfo_sendfile,
@@ -362,6 +364,16 @@ soo_close(struct file *fp, struct thread *td)
        return (error);
 }
 
+static void
+soo_fdclose(struct file *fp, int fd __unused, struct thread *td)
+{
+       struct socket *so;
+
+       so = fp->f_data;
+       if (so->so_proto->pr_fdclose != NULL)
+               so->so_proto->pr_fdclose(so);
+}
+
 static int
 soo_chmod(struct file *fp, mode_t mode, struct ucred *cred, struct thread *td)
 {
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
index b808da088ffd..e26a21a6efe0 100644
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -267,7 +267,6 @@ static int  _mqueue_send(struct mqueue *mq, struct 
mqueue_msg *msg,
 static int     _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
                        int timo);
 static void    mqueue_send_notification(struct mqueue *mq);
-static void    mqueue_fdclose(struct thread *td, int fd, struct file *fp);
 static void    mq_proc_exit(void *arg, struct proc *p);
 
 /*
@@ -688,7 +687,6 @@ mqfs_init(struct vfsconf *vfc)
        mqfs_fixup_dir(root);
        exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
            EVENTHANDLER_PRI_ANY);
-       mq_fdclose = mqueue_fdclose;
        p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
        mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
        return (0);
@@ -2472,35 +2470,6 @@ sys_kmq_notify(struct thread *td, struct kmq_notify_args 
*uap)
        return (kern_kmq_notify(td, uap->mqd, evp));
 }
 
-static void
-mqueue_fdclose(struct thread *td, int fd, struct file *fp)
-{
-       struct mqueue *mq;
-#ifdef INVARIANTS
-       struct filedesc *fdp;
-
-       fdp = td->td_proc->p_fd;
-       FILEDESC_LOCK_ASSERT(fdp);
-#endif
-
-       if (fp->f_ops == &mqueueops) {
-               mq = FPTOMQ(fp);
-               mtx_lock(&mq->mq_mutex);
-               notifier_remove(td->td_proc, mq, fd);
-
-               /* have to wakeup thread in same process */
-               if (mq->mq_flags & MQ_RSEL) {
-                       mq->mq_flags &= ~MQ_RSEL;
-                       selwakeup(&mq->mq_rsel);
-               }
-               if (mq->mq_flags & MQ_WSEL) {
-                       mq->mq_flags &= ~MQ_WSEL;
-                       selwakeup(&mq->mq_wsel);
-               }
-               mtx_unlock(&mq->mq_mutex);
-       }
-}
-
 static void
 mq_proc_exit(void *arg __unused, struct proc *p)
 {
@@ -2566,6 +2535,33 @@ mqf_close(struct file *fp, struct thread *td)
        return (0);
 }
 
+static void
+mqf_fdclose(struct file *fp, int fd, struct thread *td)
+{
+       struct mqueue *mq;
+#ifdef INVARIANTS
+       struct filedesc *fdp;
+
+       fdp = td->td_proc->p_fd;
+       FILEDESC_LOCK_ASSERT(fdp);
+#endif
+
+       mq = FPTOMQ(fp);
+       mtx_lock(&mq->mq_mutex);
+       notifier_remove(td->td_proc, mq, fd);
+
+       /* have to wakeup thread in same process */
+       if (mq->mq_flags & MQ_RSEL) {
+               mq->mq_flags &= ~MQ_RSEL;
+               selwakeup(&mq->mq_rsel);
+       }
+       if (mq->mq_flags & MQ_WSEL) {
+               mq->mq_flags &= ~MQ_WSEL;
+               selwakeup(&mq->mq_wsel);
+       }
+       mtx_unlock(&mq->mq_mutex);
+}
+
 static int
 mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
 {
@@ -2694,6 +2690,7 @@ static const struct fileops mqueueops = {
        .fo_kqfilter            = mqf_kqfilter,
        .fo_stat                = mqf_stat,
        .fo_close               = mqf_close,
+       .fo_fdclose             = mqf_fdclose,
        .fo_chmod               = mqf_chmod,
        .fo_chown               = mqf_chown,
        .fo_sendfile            = invfo_sendfile,
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index eb9544628137..00aa5f9309b2 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -54,7 +54,8 @@
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pr_abort() also
  * detached protocol state from the socket state, but this is no longer the
- * case.
+ * case.  pr_fdclose() is called when userspace invokes close(2) on a socket
+ * file descriptor.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 05e267b8ae2b..6569422a3710 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -792,13 +792,19 @@ uipc_connect2(struct socket *so1, struct socket *so2)
        return (0);
 }
 
+static void
+maybe_schedule_gc(void)
+{
+       if (atomic_load_int(&unp_rights) != 0)
+               taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
+}
+
 static void
 uipc_detach(struct socket *so)
 {
        struct unpcb *unp, *unp2;
        struct mtx *vplock;
        struct vnode *vp;
-       int local_unp_rights;
 
        unp = sotounpcb(so);
        KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
@@ -854,7 +860,6 @@ uipc_detach(struct socket *so)
        UNP_REF_LIST_UNLOCK();
 
        UNP_PCB_LOCK(unp);
-       local_unp_rights = unp_rights;
        unp->unp_socket->so_pcb = NULL;
        unp->unp_socket = NULL;
        free(unp->unp_addr, M_SONAME);
@@ -865,8 +870,7 @@ uipc_detach(struct socket *so)
                mtx_unlock(vplock);
                vrele(vp);
        }
-       if (local_unp_rights)
-               taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
+       maybe_schedule_gc();
 
        switch (so->so_type) {
        case SOCK_STREAM:
@@ -902,6 +906,18 @@ uipc_disconnect(struct socket *so)
        return (0);
 }
 
+static void
+uipc_fdclose(struct socket *so __unused)
+{
+       /*
+        * Ensure that userspace can't create orphaned file descriptors without
+        * triggering garbage collection.  Triggering GC from uipc_detach() is
+        * not sufficient, since that's only closed once a socket reference
+        * count drops to zero.
+        */
+       maybe_schedule_gc();
+}
+
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
@@ -4374,6 +4390,7 @@ static struct protosw streamproto = {
        .pr_connect2 =          uipc_connect2,
        .pr_detach =            uipc_detach,
        .pr_disconnect =        uipc_disconnect,
+       .pr_fdclose =           uipc_fdclose,
        .pr_listen =            uipc_listen,
        .pr_peeraddr =          uipc_peeraddr,
        .pr_send =              uipc_sendfile,
@@ -4404,6 +4421,7 @@ static struct protosw dgramproto = {
        .pr_connect2 =          uipc_connect2,
        .pr_detach =            uipc_detach,
        .pr_disconnect =        uipc_disconnect,
+       .pr_fdclose =           uipc_fdclose,
        .pr_peeraddr =          uipc_peeraddr,
        .pr_sosend =            uipc_sosend_dgram,
        .pr_sense =             uipc_sense,
@@ -4428,6 +4446,7 @@ static struct protosw seqpacketproto = {
        .pr_connect2 =          uipc_connect2,
        .pr_detach =            uipc_detach,
        .pr_disconnect =        uipc_disconnect,
+       .pr_fdclose =           uipc_fdclose,
        .pr_listen =            uipc_listen,
        .pr_peeraddr =          uipc_peeraddr,
        .pr_sense =             uipc_sense,
diff --git a/sys/sys/file.h b/sys/sys/file.h
index b2bd62f244da..b537daa27ff5 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -114,6 +114,7 @@ typedef     int fo_kqfilter_t(struct file *fp, struct knote 
*kn);
 typedef        int fo_stat_t(struct file *fp, struct stat *sb,
                    struct ucred *active_cred);
 typedef        int fo_close_t(struct file *fp, struct thread *td);
+typedef        void fo_fdclose_t(struct file *fp, int fd, struct thread *td);
 typedef        int fo_chmod_t(struct file *fp, mode_t mode,
                    struct ucred *active_cred, struct thread *td);
 typedef        int fo_chown_t(struct file *fp, uid_t uid, gid_t gid,
@@ -151,6 +152,7 @@ struct fileops {
        fo_kqfilter_t   *fo_kqfilter;
        fo_stat_t       *fo_stat;
        fo_close_t      *fo_close;
+       fo_fdclose_t    *fo_fdclose;
        fo_chmod_t      *fo_chmod;
        fo_chown_t      *fo_chown;
        fo_sendfile_t   *fo_sendfile;
diff --git a/sys/sys/mqueue.h b/sys/sys/mqueue.h
index 50f6681ce218..211f09d57427 100644
--- a/sys/sys/mqueue.h
+++ b/sys/sys/mqueue.h
@@ -37,9 +37,4 @@ struct mq_attr {
        long    __reserved[4];  /* Ignored for input, zeroed for output */
 };
 
-#ifdef _KERNEL
-struct thread;
-struct file;
-extern void    (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
-#endif
 #endif
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
index 7fb8dfdc7208..392ff7cf678e 100644
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -94,6 +94,7 @@ typedef int   pr_sopoll_t(struct socket *, int, struct thread 
*);
 typedef int    pr_kqfilter_t(struct socket *, struct knote *);
 typedef void   pr_sosetlabel_t(struct socket *);
 typedef void   pr_close_t(struct socket *);
+typedef void   pr_fdclose_t(struct socket *);
 typedef int    pr_bindat_t(int, struct socket *, struct sockaddr *,
                    struct thread *);
 typedef int    pr_connectat_t(int, struct socket *, struct sockaddr *,
@@ -120,8 +121,8 @@ struct protosw {
        pr_detach_t     *pr_detach;     /* destruction: sofree() */
        pr_connect_t    *pr_connect;    /* connect(2) */
        pr_disconnect_t *pr_disconnect; /* sodisconnect() */
-       pr_close_t      *pr_close;      /* close(2) */
-       pr_shutdown_t   *pr_shutdown;   /* shutdown(2) */
+       pr_close_t      *pr_close;      /* soclose(), socket refcount is 0 */
+       pr_fdclose_t    *pr_fdclose;    /* close(2) */
        pr_rcvd_t       *pr_rcvd;       /* soreceive_generic() if PR_WANTRCVD */
        pr_aio_queue_t  *pr_aio_queue;  /* aio(9) */
 /* Cache line #3 */
@@ -142,7 +143,9 @@ struct protosw {
        pr_sosetlabel_t *pr_sosetlabel; /* MAC, XXXGL: remove */
        pr_setsbopt_t   *pr_setsbopt;   /* Socket buffer ioctls */
        pr_chmod_t      *pr_chmod;      /* fchmod(2) */
+/* Cache line #5 */
        pr_kqfilter_t   *pr_kqfilter;   /* kevent(2) */
+       pr_shutdown_t   *pr_shutdown;   /* shutdown(2) */
 };
 #endif /* defined(_KERNEL) || defined(_WANT_PROTOSW) */
 #ifdef _KERNEL

Reply via email to