The branch stable/15 has been updated by jamie:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9d7f89ef26073bb56e6ec9c3370089067a71babc

commit 9d7f89ef26073bb56e6ec9c3370089067a71babc
Author:     Jamie Gritton <ja...@freebsd.org>
AuthorDate: 2025-09-12 18:33:19 +0000
Commit:     Jamie Gritton <ja...@freebsd.org>
CommitDate: 2025-09-15 03:33:48 +0000

    jaildesc: add kevent support
    
    Give jail descriptors the same kevent flags as jails.  Also fix the
    event reporting in jails, where it was including data for events the
    user didn't ask for.
    
    (cherry picked from commit 66d8ffe3046ded1eb3f78599c6af8eb965482ef5)
---
 lib/libsys/kqueue.2      |  15 +++++-
 sys/kern/kern_event.c    |  18 ++++---
 sys/kern/kern_jail.c     |   1 +
 sys/kern/kern_jaildesc.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++-
 sys/sys/event.h          |   5 +-
 sys/sys/jaildesc.h       |   4 ++
 6 files changed, 168 insertions(+), 13 deletions(-)

diff --git a/lib/libsys/kqueue.2 b/lib/libsys/kqueue.2
index aafb5317c5e0..96c9b0222a37 100644
--- a/lib/libsys/kqueue.2
+++ b/lib/libsys/kqueue.2
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd September 11, 2025
+.Dd September 12, 2025
 .Dt KQUEUE 2
 .Os
 .Sh NAME
@@ -638,6 +638,19 @@ or
 .Dv NOTE_JAIL_CHILD
 event has been received since the last call to
 .Fn kevent .
+.It Dv EVFILT_JAILDESC
+Takes a jail descriptor returned by
+.Xr jail_set 2
+or
+.Xr jail_get 2
+as the identifier and the events to watch for in
+.Va fflags ,
+and returns when the jail performs one or more of the requested events.
+The events to monitor and the resulting
+.Va fflags
+are the same as those listed in
+.Dv EVFILT_JAIL ,
+above.
 .It Dv EVFILT_TIMER
 Establishes an arbitrary timer identified by
 .Va ident .
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 8d1ff313735b..57cbfb8a0361 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -51,6 +51,7 @@
 #include <sys/filio.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
+#include <sys/jaildesc.h>
 #include <sys/kthread.h>
 #include <sys/selinfo.h>
 #include <sys/queue.h>
@@ -376,6 +377,7 @@ static struct {
        [~EVFILT_SENDFILE] = { &null_filtops },
        [~EVFILT_EMPTY] = { &file_filtops, 1 },
        [~EVFILT_JAIL] = { &jail_filtops, 1 },
+       [~EVFILT_JAILDESC] = { &file_filtops, 1 },
 };
 
 /*
@@ -682,15 +684,15 @@ filt_jail(struct knote *kn, long hint)
            (u_int)hint & NOTE_JAIL_CTRLMASK;
 
        /* If the user is interested in this event, record it. */
-       if (kn->kn_sfflags & event)
+       if (kn->kn_sfflags & event) {
                kn->kn_fflags |= event;
-
-       /* Report the created jail id or attached process id. */
-       if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
-               if (kn->kn_data != 0)
-                       kn->kn_fflags |= NOTE_JAIL_MULTI;
-               kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
-                   (u_int)hint & ~event;
+               /* Report the created jail id or attached process id. */
+               if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
+                       if (kn->kn_data != 0)
+                               kn->kn_fflags |= NOTE_JAIL_MULTI;
+                       kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
+                           (u_int)hint & ~event;
+               }
        }
 
        /* Prison is gone, so flag the event as finished. */
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index d90ccf4a04c8..43035dc009b3 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -5371,6 +5371,7 @@ prison_knote(struct prison *pr, long hint)
        if (!locked)
                mtx_lock(&pr->pr_mtx);
        KNOTE_LOCKED(pr->pr_klist, hint);
+       jaildesc_knote(pr, hint);
        if (!locked)
                mtx_unlock(&pr->pr_mtx);
 }
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
index c9e80f5d8941..3f322b271400 100644
--- a/sys/kern/kern_jaildesc.c
+++ b/sys/kern/kern_jaildesc.c
@@ -36,6 +36,7 @@
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/poll.h>
 #include <sys/priv.h>
 #include <sys/stat.h>
 #include <sys/sysproto.h>
@@ -46,6 +47,8 @@
 
 MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
 
+static fo_poll_t       jaildesc_poll;
+static fo_kqfilter_t   jaildesc_kqfilter;
 static fo_stat_t       jaildesc_stat;
 static fo_close_t      jaildesc_close;
 static fo_fill_kinfo_t jaildesc_fill_kinfo;
@@ -56,8 +59,8 @@ static struct fileops jaildesc_ops = {
        .fo_write = invfo_rdwr,
        .fo_truncate = invfo_truncate,
        .fo_ioctl = invfo_ioctl,
-       .fo_poll = invfo_poll,
-       .fo_kqfilter = invfo_kqfilter,
+       .fo_poll = jaildesc_poll,
+       .fo_kqfilter = jaildesc_kqfilter,
        .fo_stat = jaildesc_stat,
        .fo_close = jaildesc_close,
        .fo_chmod = invfo_chmod,
@@ -135,6 +138,7 @@ jaildesc_alloc(struct thread *td, struct file **fpp, int 
*fdp, int owning)
        finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
            FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
        JAILDESC_LOCK_INIT(jd);
+       knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
        if (owning)
                jd->jd_flags |= JDF_OWNING;
        *fpp = fp;
@@ -176,6 +180,36 @@ jaildesc_prison_cleanup(struct prison *pr)
        }
 }
 
+/*
+ * Pass a note to all listening kqueues.
+ */
+void
+jaildesc_knote(struct prison *pr, long hint)
+{
+       struct jaildesc *jd;
+       int prison_locked;
+
+       if (!LIST_EMPTY(&pr->pr_descs)) {
+               prison_locked = mtx_owned(&pr->pr_mtx);
+               if (!prison_locked)
+                       prison_lock(pr);
+               LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
+                       JAILDESC_LOCK(jd);
+                       if (hint == NOTE_JAIL_REMOVE) {
+                               jd->jd_flags |= JDF_REMOVED;
+                               if (jd->jd_flags & JDF_SELECTED) {
+                                       jd->jd_flags &= ~JDF_SELECTED;
+                                       selwakeup(&jd->jd_selinfo);
+                               }
+                       }
+                       KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
+                       JAILDESC_UNLOCK(jd);
+               }
+               if (!prison_locked)
+                       prison_unlock(pr);
+       }
+}
+
 static int
 jaildesc_close(struct file *fp, struct thread *td)
 {
@@ -223,12 +257,112 @@ jaildesc_close(struct file *fp, struct thread *td)
                        }
                        prison_free(pr);
                }
+               knlist_destroy(&jd->jd_selinfo.si_note);
                JAILDESC_LOCK_DESTROY(jd);
                free(jd, M_JAILDESC);
        }
        return (0);
 }
 
+static int
+jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
+    struct thread *td)
+{
+       struct jaildesc *jd;
+       int revents;
+
+       revents = 0;
+       jd = fp->f_data;
+       JAILDESC_LOCK(jd);
+       if (jd->jd_flags & JDF_REMOVED)
+               revents |= POLLHUP;
+       if (revents == 0) {
+               selrecord(td, &jd->jd_selinfo);
+               jd->jd_flags |= JDF_SELECTED;
+       }
+       JAILDESC_UNLOCK(jd);
+       return (revents);
+}
+
+static void
+jaildesc_kqops_detach(struct knote *kn)
+{
+       struct jaildesc *jd;
+
+       jd = kn->kn_fp->f_data;
+       knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
+}
+
+static int
+jaildesc_kqops_event(struct knote *kn, long hint)
+{
+       struct jaildesc *jd;
+       u_int event;
+
+       jd = kn->kn_fp->f_data;
+       if (hint == 0) {
+               /*
+                * Initial test after registration. Generate a
+                * NOTE_JAIL_REMOVE in case the prison already died
+                * before registration.
+                */
+               event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
+       } else {
+               /*
+                * Mask off extra data.  In the NOTE_JAIL_CHILD case,
+                * that's everything except the NOTE_JAIL_CHILD bit
+                * itself, since a JID is any positive integer.
+                */
+               event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
+                   (u_int)hint & NOTE_JAIL_CTRLMASK;
+       }
+
+       /* If the user is interested in this event, record it. */
+       if (kn->kn_sfflags & event) {
+               kn->kn_fflags |= event;
+               /* Report the created jail id or attached process id. */
+               if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
+                       if (kn->kn_data != 0)
+                               kn->kn_fflags |= NOTE_JAIL_MULTI;
+                       kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
+                           (u_int)hint & ~event;
+               }
+       }
+
+       /* Prison is gone, so flag the event as finished. */
+       if (event == NOTE_JAIL_REMOVE) {
+               kn->kn_flags |= EV_EOF | EV_ONESHOT;
+               if (kn->kn_fflags == 0)
+                       kn->kn_flags |= EV_DROP;
+               return (1);
+       }
+
+       return (kn->kn_fflags != 0);
+}
+
+static const struct filterops jaildesc_kqops = {
+       .f_isfd = 1,
+       .f_detach = jaildesc_kqops_detach,
+       .f_event = jaildesc_kqops_event,
+};
+
+static int
+jaildesc_kqfilter(struct file *fp, struct knote *kn)
+{
+       struct jaildesc *jd;
+
+       jd = fp->f_data;
+       switch (kn->kn_filter) {
+       case EVFILT_JAILDESC:
+               kn->kn_fop = &jaildesc_kqops;
+               kn->kn_flags |= EV_CLEAR;
+               knlist_add(&jd->jd_selinfo.si_note, kn, 0);
+               return (0);
+       default:
+               return (EINVAL);
+       }
+}
+
 static int
 jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
 {
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 91fbaa4834f7..084eaafcbdc0 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -46,7 +46,8 @@
 #define EVFILT_SENDFILE                (-12)   /* attached to sendfile 
requests */
 #define EVFILT_EMPTY           (-13)   /* empty send socket buf */
 #define EVFILT_JAIL            (-14)   /* attached to struct prison */
-#define EVFILT_SYSCOUNT                14
+#define EVFILT_JAILDESC                (-15)   /* attached to jail descriptors 
*/
+#define EVFILT_SYSCOUNT                15
 
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define        EV_SET(kevp_, a, b, c, d, e, f) do {    \
@@ -210,7 +211,7 @@ struct freebsd11_kevent32 {
 #define        NOTE_TRACKERR   0x00000002              /* could not track 
child */
 #define        NOTE_CHILD      0x00000004              /* am a child process */
 
-/* data/hint flags for EVFILT_JAIL */
+/* data/hint flags for EVFILT_JAIL and EVFILT_JAILDESC */
 #define        NOTE_JAIL_CHILD         0x80000000      /* child jail was 
created */
 #define        NOTE_JAIL_SET           0x40000000      /* jail was modified */
 #define        NOTE_JAIL_ATTACH        0x20000000      /* jail was attached to 
*/
diff --git a/sys/sys/jaildesc.h b/sys/sys/jaildesc.h
index 2451b04f7302..fda270d62e70 100644
--- a/sys/sys/jaildesc.h
+++ b/sys/sys/jaildesc.h
@@ -35,6 +35,7 @@
 #ifdef _KERNEL
 
 #include <sys/queue.h>
+#include <sys/selinfo.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_types.h>
@@ -54,6 +55,7 @@ struct jaildesc {
        LIST_ENTRY(jaildesc) jd_list;   /* (d,p) this prison's descs */
        struct prison   *jd_prison;     /* (d) the prison */
        struct mtx       jd_lock;
+       struct selinfo   jd_selinfo;    /* (d) event notification */
        unsigned         jd_flags;      /* (d) JDF_* flags */
 };
 
@@ -69,6 +71,7 @@ struct jaildesc {
 /*
  * Flags for the jd_flags field
  */
+#define        JDF_SELECTED    0x00000001      /* issue selwakeup() */
 #define        JDF_REMOVED     0x00000002      /* jail was removed */
 #define        JDF_OWNING      0x00000004      /* closing descriptor removes 
jail */
 
@@ -77,6 +80,7 @@ int jaildesc_find(struct thread *td, int fd, struct prison 
**prp,
 int jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning);
 void jaildesc_set_prison(struct file *jd, struct prison *pr);
 void jaildesc_prison_cleanup(struct prison *pr);
+void jaildesc_knote(struct prison *pr, long hint);
 
 #endif /* _KERNEL */
 

Reply via email to