The branch main has been updated by jamie:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=851dc7f859c23cab09a348bca03ab655534fb7e0

commit 851dc7f859c23cab09a348bca03ab655534fb7e0
Author:     Jamie Gritton <[email protected]>
AuthorDate: 2025-09-04 20:27:47 +0000
Commit:     Jamie Gritton <[email protected]>
CommitDate: 2025-09-04 20:27:47 +0000

    jail: add jail descriptors
    
    Similar to process descriptors, jail desriptors are allow jail
    administration using the file descriptor interface instead of JIDs.
    They come from and can be used by jail_set(2) and jail_get(2),
    and there are two new system calls, jail_attach_jd(2) and
    jail_remove_jd(2).
    
    Reviewed by:    bz, brooks
    Relnotes:       yes
    Differential Revision:  https://reviews.freebsd.org/D43696
---
 lib/libjail/jail.c                             |  64 +++-
 lib/libsys/Symbol.sys.map                      |   2 +
 lib/libsys/_libsys.h                           |   4 +
 lib/libsys/jail.2                              | 267 ++++++++++++++++-
 lib/libsys/syscalls.map                        |   4 +
 sys/compat/freebsd32/freebsd32_syscall.h       |   4 +-
 sys/compat/freebsd32/freebsd32_syscalls.c      |   2 +
 sys/compat/freebsd32/freebsd32_sysent.c        |   2 +
 sys/compat/freebsd32/freebsd32_systrace_args.c |  44 +++
 sys/conf/files                                 |   1 +
 sys/kern/init_sysent.c                         |   2 +
 sys/kern/kern_descrip.c                        |   2 +
 sys/kern/kern_jail.c                           | 396 +++++++++++++++++++++++--
 sys/kern/kern_jaildesc.c                       | 337 +++++++++++++++++++++
 sys/kern/syscalls.c                            |   2 +
 sys/kern/syscalls.master                       |  10 +
 sys/kern/systrace_args.c                       |  44 +++
 sys/sys/file.h                                 |   1 +
 sys/sys/jail.h                                 |  15 +-
 sys/sys/jaildesc.h                             |  85 ++++++
 sys/sys/syscall.h                              |   4 +-
 sys/sys/syscall.mk                             |   4 +-
 sys/sys/sysproto.h                             |  10 +
 sys/sys/user.h                                 |   4 +
 24 files changed, 1256 insertions(+), 54 deletions(-)

diff --git a/lib/libjail/jail.c b/lib/libjail/jail.c
index 30282e67866c..931391055919 100644
--- a/lib/libjail/jail.c
+++ b/lib/libjail/jail.c
@@ -75,8 +75,9 @@ int
 jail_setv(int flags, ...)
 {
        va_list ap, tap;
-       struct jailparam *jp;
-       const char *name, *value;
+       struct jailparam *jp, *jp_desc;
+       const char *name;
+       char *value, *desc_value;
        int njp, jid;
 
        /* Create the parameter list and import the parameters. */
@@ -86,15 +87,24 @@ jail_setv(int flags, ...)
                (void)va_arg(tap, char *);
        va_end(tap);
        jp = alloca(njp * sizeof(struct jailparam));
-       for (njp = 0; (name = va_arg(ap, char *)) != NULL;) {
+       jp_desc = NULL;
+       desc_value = NULL;
+       for (njp = 0; (name = va_arg(ap, char *)) != NULL; njp++) {
                value = va_arg(ap, char *);
                if (jailparam_init(jp + njp, name) < 0)
                        goto error;
-               if (jailparam_import(jp + njp++, value) < 0)
+               if (jailparam_import(jp + njp, value) < 0)
                        goto error;
+               if (!strcmp(name, "desc")
+                   && (flags & (JAIL_GET_DESC | JAIL_OWN_DESC))) {
+                       jp_desc = jp + njp;
+                       desc_value = value;
+               }
        }
        va_end(ap);
        jid = jailparam_set(jp, njp, flags);
+       if (jid > 0 && jp_desc != NULL)
+               sprintf(desc_value, "%d", *(int *)jp_desc->jp_value);
        jailparam_free(jp, njp);
        return (jid);
 
@@ -112,9 +122,10 @@ int
 jail_getv(int flags, ...)
 {
        va_list ap, tap;
-       struct jailparam *jp, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
+       struct jailparam *jp, *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
        char *valarg, *value;
-       const char *name, *key_value, *lastjid_value, *jid_value, *name_value;
+       const char *name, *key_value, *desc_value, *lastjid_value, *jid_value;
+       const char *name_value;
        int njp, i, jid;
 
        /* Create the parameter list and find the key. */
@@ -126,15 +137,19 @@ jail_getv(int flags, ...)
 
        jp = alloca(njp * sizeof(struct jailparam));
        va_copy(tap, ap);
-       jp_lastjid = jp_jid = jp_name = NULL;
-       lastjid_value = jid_value = name_value = NULL;
+       jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
+       desc_value = lastjid_value = jid_value = name_value = NULL;
        for (njp = 0; (name = va_arg(tap, char *)) != NULL; njp++) {
                value = va_arg(tap, char *);
                if (jailparam_init(jp + njp, name) < 0) {
                        va_end(tap);
                        goto error;
                }
-               if (!strcmp(jp[njp].jp_name, "lastjid")) {
+               if (!strcmp(jp[njp].jp_name, "desc")
+                   && (flags & (JAIL_USE_DESC | JAIL_AT_DESC))) {
+                       jp_desc = jp + njp;
+                       desc_value = value;
+               } else if (!strcmp(jp[njp].jp_name, "lastjid")) {
                        jp_lastjid = jp + njp;
                        lastjid_value = value;
                } else if (!strcmp(jp[njp].jp_name, "jid")) {
@@ -147,7 +162,10 @@ jail_getv(int flags, ...)
        }
        va_end(tap);
        /* Import the key parameter. */
-       if (jp_lastjid != NULL) {
+       if (jp_desc != NULL && (flags & JAIL_USE_DESC)) {
+               jp_key = jp_desc;
+               key_value = desc_value;
+       } else if (jp_lastjid != NULL) {
                jp_key = jp_lastjid;
                key_value = lastjid_value;
        } else if (jp_jid != NULL && strtol(jid_value, NULL, 10) != 0) {
@@ -163,6 +181,9 @@ jail_getv(int flags, ...)
        }
        if (jailparam_import(jp_key, key_value) < 0)
                goto error;
+       if (jp_desc != NULL && jp_desc != jp_key
+           && jailparam_import(jp_desc, desc_value) < 0)
+               goto error;
        /* Get the jail and export the parameters. */
        jid = jailparam_get(jp, njp, flags);
        if (jid < 0)
@@ -571,7 +592,7 @@ int
 jailparam_get(struct jailparam *jp, unsigned njp, int flags)
 {
        struct iovec *jiov;
-       struct jailparam *jp_lastjid, *jp_jid, *jp_name, *jp_key;
+       struct jailparam *jp_desc, *jp_lastjid, *jp_jid, *jp_name, *jp_key;
        int i, ai, ki, jid, arrays, sanity;
        unsigned j;
 
@@ -580,10 +601,13 @@ jailparam_get(struct jailparam *jp, unsigned njp, int 
flags)
         * Find the key and any array parameters.
         */
        jiov = alloca(sizeof(struct iovec) * 2 * (njp + 1));
-       jp_lastjid = jp_jid = jp_name = NULL;
+       jp_desc = jp_lastjid = jp_jid = jp_name = NULL;
        arrays = 0;
        for (ai = j = 0; j < njp; j++) {
-               if (!strcmp(jp[j].jp_name, "lastjid"))
+               if (!strcmp(jp[j].jp_name, "desc")
+                   && (flags & (JAIL_USE_DESC | JAIL_AT_DESC)))
+                       jp_desc = jp + j;
+               else if (!strcmp(jp[j].jp_name, "lastjid"))
                        jp_lastjid = jp + j;
                else if (!strcmp(jp[j].jp_name, "jid"))
                        jp_jid = jp + j;
@@ -599,7 +623,9 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
                        ai++;
                }
        }
-       jp_key = jp_lastjid ? jp_lastjid :
+       jp_key = jp_desc && jp_desc->jp_valuelen == sizeof(int) &&
+           jp_desc->jp_value && (flags & JAIL_USE_DESC) ? jp_desc :
+           jp_lastjid ? jp_lastjid :
            jp_jid && jp_jid->jp_valuelen == sizeof(int) &&
            jp_jid->jp_value && *(int *)jp_jid->jp_value ? jp_jid : jp_name;
        if (jp_key == NULL || jp_key->jp_value == NULL) {
@@ -622,6 +648,14 @@ jailparam_get(struct jailparam *jp, unsigned njp, int 
flags)
        jiov[ki].iov_len = JAIL_ERRMSGLEN;
        ki++;
        jail_errmsg[0] = 0;
+       if (jp_desc != NULL && jp_desc != jp_key) {
+               jiov[ki].iov_base = jp_desc->jp_name;
+               jiov[ki].iov_len = strlen(jp_desc->jp_name) + 1;
+               ki++;
+               jiov[ki].iov_base = jp_desc->jp_value;
+               jiov[ki].iov_len = jp_desc->jp_valuelen;
+               ki++;
+       }
        if (arrays && jail_get(jiov, ki, flags) < 0) {
                if (!jail_errmsg[0])
                        snprintf(jail_errmsg, sizeof(jail_errmsg),
@@ -649,7 +683,7 @@ jailparam_get(struct jailparam *jp, unsigned njp, int flags)
                        jiov[ai].iov_base = jp[j].jp_value;
                        memset(jiov[ai].iov_base, 0, jiov[ai].iov_len);
                        ai++;
-               } else if (jp + j != jp_key) {
+               } else if (jp + j != jp_key && jp + j != jp_desc) {
                        jiov[i].iov_base = jp[j].jp_name;
                        jiov[i].iov_len = strlen(jp[j].jp_name) + 1;
                        i++;
diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map
index 1a297f9df581..e3fd8ac10621 100644
--- a/lib/libsys/Symbol.sys.map
+++ b/lib/libsys/Symbol.sys.map
@@ -382,6 +382,8 @@ FBSD_1.8 {
        getrlimitusage;
        inotify_add_watch_at;
        inotify_rm_watch;
+       jail_attach_jd;
+       jail_remove_jd;
        kcmp;
        setcred;
        setgroups;
diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
index 34eebc1aa67a..6bd768708a78 100644
--- a/lib/libsys/_libsys.h
+++ b/lib/libsys/_libsys.h
@@ -468,6 +468,8 @@ typedef int (__sys_inotify_add_watch_at_t)(int, int, const 
char *, uint32_t);
 typedef int (__sys_inotify_rm_watch_t)(int, int);
 typedef int (__sys_getgroups_t)(int, gid_t *);
 typedef int (__sys_setgroups_t)(int, const gid_t *);
+typedef int (__sys_jail_attach_jd_t)(int);
+typedef int (__sys_jail_remove_jd_t)(int);
 
 _Noreturn void __sys__exit(int rval);
 int __sys_fork(void);
@@ -872,6 +874,8 @@ int __sys_inotify_add_watch_at(int fd, int dfd, const char 
* path, uint32_t mask
 int __sys_inotify_rm_watch(int fd, int wd);
 int __sys_getgroups(int gidsetsize, gid_t * gidset);
 int __sys_setgroups(int gidsetsize, const gid_t * gidset);
+int __sys_jail_attach_jd(int fd);
+int __sys_jail_remove_jd(int fd);
 __END_DECLS
 
 #endif /* __LIBSYS_H_ */
diff --git a/lib/libsys/jail.2 b/lib/libsys/jail.2
index 8f8b9925c712..a0f47cc61cb3 100644
--- a/lib/libsys/jail.2
+++ b/lib/libsys/jail.2
@@ -23,7 +23,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd November 29, 2023
+.Dd September 4, 2025
 .Dt JAIL 2
 .Os
 .Sh NAME
@@ -31,7 +31,9 @@
 .Nm jail_get ,
 .Nm jail_set ,
 .Nm jail_remove ,
-.Nm jail_attach
+.Nm jail_attach ,
+.Nm jail_remove_jd ,
+.Nm jail_attach_jd
 .Nd create and manage system jails
 .Sh LIBRARY
 .Lb libc
@@ -44,6 +46,10 @@
 .Fn jail_attach "int jid"
 .Ft int
 .Fn jail_remove "int jid"
+.Ft int
+.Fn jail_attach_jd "int fd"
+.Ft int
+.Fn jail_remove_jd "int fd"
 .In sys/uio.h
 .Ft int
 .Fn jail_get "struct iovec *iov" "u_int niov" "int flags"
@@ -188,6 +194,29 @@ system call.
 This is deprecated in
 .Fn jail_set
 and has no effect.
+.It Dv JAIL_USE_DESC
+Identify the jail by a descriptor in the
+.Va desc
+parameter.
+.It Dv JAIL_AT_DESC
+Operate in the context of the jail described by the
+.Va desc
+parameter, instead of the current jail.
+Only one of
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+may be specified.
+.It Dv JAIL_GET_DESC
+Return a new jail descriptor for the jail in the
+.Va desc
+parameter.
+.It Dv JAIL_OWN_DESC
+Return an
+.Dq owning
+jail descriptor in the
+.Va desc
+parameter.
 .El
 .Pp
 The
@@ -221,6 +250,9 @@ arguments consists of one or more following flags:
 .Bl -tag -width indent
 .It Dv JAIL_DYING
 Allow getting a jail that is in the process of being removed.
+.It Dv JAIL_USE_DESC , Dv JAIL_AT_DESC , Dv JAIL_GET_DESC , Dv JAIL_OWN_DESC
+These have the same meaning as they do in
+.Fn jail_set .
 .El
 .Pp
 The
@@ -238,6 +270,101 @@ system call removes the jail identified by
 .Fa jid .
 It will kill all processes belonging to the jail, and remove any children
 of that jail.
+.Pp
+The
+.Fn jail_attach_fd
+and
+.Fn jail_remove_fd
+system calls work the same as
+.Fn jail_attach
+and
+.Fn jail_remove ,
+except that they operate on the jail identified by jail descriptor
+.Fa fd .
+.Ss Jail Descriptors
+In addition to the jail ID,
+jails can be referred to using a jail descriptor,
+a type of file descriptor tied to a particular jail.
+Jail descriptors are created by calling
+.Fn jail_set
+or
+.Fn jail_get
+with the special parameter
+.Va desc ,
+and either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flags set.
+The difference between the two flags is that descriptors created with
+.Dv JAIL_OWN_DESC
+.Po
+called
+.Dq owning
+descriptors
+.Pc
+will automatically remove the jail when the descriptor is closed.
+.Pp
+Jail descriptors can be passed back to
+.Fn jail_set
+or
+.Fm jail_get
+with the
+.Va desc
+parameter,
+and either the
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+flags set.
+With
+.Dv JAIL_USE_DESC ,
+the descriptor identifies the jail to operate on,
+instead of the
+.Va jid
+or
+.Va name
+parameter.
+With
+.Dv JAIL_AT_DESC ,
+the descriptor is used in place of the current jail,
+allowing accessing or creating jails that are children of the
+descriptor jail.
+.Pp
+The system calls
+.Fn jail_attach_jd
+and
+.Fn jail_aremove_jd
+work the same as
+.Fn jail_attach
+and
+.Fn jail_remove ,
+except that they operate on the jail referred to by the passed descriptor.
+.Pp
+Jail operations via descriptors can be done by processes that do not
+normally have permission to see or affect the jail,
+as long as they are allowed by the file permissions of the jail
+descriptor itself.
+These permissions can be changed by the descriptor owner via
+.Xr fchmod 2
+and
+.Xr fchown 2 .
+.Fn jail_get
+requires read permission,
+.Fn jail_set
+and
+.Fn jail_remove
+require write permission,
+and
+.Fn jail_attach
+requires execute permission.
+Also, use of a descriptor with the
+.Dv JAIL_AT_DESC
+flag requires execute permission.
+An owning descriptor is identified by the
+.Em sticky bit ,
+which may also be changed via
+.Xr fchmod 2 .
 .Sh RETURN VALUES
 If successful,
 .Fn jail ,
@@ -249,7 +376,7 @@ They return \-1 on failure, and set
 .Va errno
 to indicate the error.
 .Pp
-.Rv -std jail_attach jail_remove
+.Rv -std jail_attach jail_remove jail_attach_jd jail_remove_jd
 .Sh ERRORS
 The
 .Fn jail
@@ -275,12 +402,44 @@ The
 system call
 will fail if:
 .Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Va desc
+parameter does not refer to a valid jail descriptor,
+and either the
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+flag was set.
+.It Bq Er EACCES
+Write permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and the
+.Dv JAIL_USE_DESC
+flag was set.
+.It Bq Er EACCES
+Execute permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and either the
+.Dv JAIL_AT_DESC
+or
+.Dv JAIL_ATTACH
+flag was set.
 .It Bq Er EPERM
 This process is not allowed to create a jail, either because it is not
 the super-user, or because it would exceed the jail's
 .Va children.max
 limit.
 .It Bq Er EPERM
+The jail descriptor in the
+.Va desc
+parameter was created by a user other than the super-user,
+and the
+.Dv JAIL_USE_DESC
+flag was set.
+.It Bq Er EPERM
 A jail parameter was set to a less restrictive value then the current
 environment.
 .It Bq Er EFAULT
@@ -298,8 +457,12 @@ flag is not set.
 .It Bq Er ENOENT
 The jail referred to by a
 .Va jid
-is not accessible by the process, because the process is in a different
-jail.
+parameter is not accessible by the process, because the process is in a
+different jail.
+.It Bq Er ENOENT
+The jail referred to by a
+.Va desc
+parameter has been removed.
 .It Bq Er EEXIST
 The jail referred to by a
 .Va jid
@@ -326,6 +489,24 @@ flags is not set.
 A supplied string parameter is longer than allowed.
 .It Bq Er EAGAIN
 There are no jail IDs left.
+.It Bq Er EMFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the process has already reached its limit for open file descriptors.
+.It Bq Er ENFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the system file table is full.
 .El
 .Pp
 The
@@ -333,6 +514,29 @@ The
 system call
 will fail if:
 .Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Va desc
+parameter does not refer to a valid jail descriptor,
+and either the
+.Dv JAIL_USE_DESC
+or
+.Dv JAIL_AT_DESC
+flag was set.
+.It Bq Er EACCES
+Read permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and the
+.Dv JAIL_USE_DESC
+flag was set.
+.It Bq Er EACCES
+Execute permission is denied on the jail descriptor in the
+.Va desc
+parameter,
+and the
+.Dv JAIL_AT_DESC
+flag was set.
 .It Bq Er EFAULT
 .Fa Iov ,
 or one of the addresses contained within it,
@@ -352,10 +556,33 @@ jail.
 The
 .Va lastjid
 parameter is greater than the highest current jail ID.
+.It Bq Er ENOENT
+The jail referred to by a
+.Va desc
+parameter has been removed
+.Pq even if the Dv JAIL_CREATE flag has been set .
 .It Bq Er EINVAL
 A supplied parameter is the wrong size.
 .It Bq Er EINVAL
 A supplied parameter name does not match any known parameters.
+.It Bq Er EMFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the process has already reached its limit for open file descriptors.
+.It Bq Er ENFILE
+A jail descriptor could not be created for the
+.Va desc
+parameter with either the
+.Dv JAIL_GET_DESC
+or
+.Dv JAIL_OWN_DESC
+flag set,
+because the system file table is full.
 .El
 .Pp
 The
@@ -373,11 +600,39 @@ The jail specified by
 does not exist.
 .El
 .Pp
+The
+.Fn jail_attach_jd
+and
+.Fn jail_remove_jd
+system calls
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid jail descriptor.
+.It Bq Er EACCES
+Permission is denied on the jail descriptor
+.Po
+execute permission for
+.Fn jail_attach_fd ,
+or write permission for
+.Fn jail_remove_fd
+.Pc .
+.It Bq Er EPERM
+The jail descriptor was created by a user other than the super-user.
+.It Bq Er EINVAL
+The jail specified by
+.Fa jid
+has been removed.
+.El
+.Pp
 Further
 .Fn jail ,
 .Fn jail_set ,
+.Fn jail_attach ,
 and
-.Fn jail_attach
+.Fn jail_attach_jd
 call
 .Xr chroot 2
 internally, so they can fail for all the same reasons.
diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
index 4cf80a2ffc69..b5400b9849b3 100644
--- a/lib/libsys/syscalls.map
+++ b/lib/libsys/syscalls.map
@@ -813,4 +813,8 @@ FBSDprivate_1.0 {
        __sys_getgroups;
        _setgroups;
        __sys_setgroups;
+       _jail_attach_jd;
+       __sys_jail_attach_jd;
+       _jail_remove_jd;
+       __sys_jail_remove_jd;
 };
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h 
b/sys/compat/freebsd32/freebsd32_syscall.h
index 90cd21a80923..54063150eef9 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -515,4 +515,6 @@
 #define        FREEBSD32_SYS_inotify_rm_watch  594
 #define        FREEBSD32_SYS_getgroups 595
 #define        FREEBSD32_SYS_setgroups 596
-#define        FREEBSD32_SYS_MAXSYSCALL        597
+#define        FREEBSD32_SYS_jail_attach_jd    597
+#define        FREEBSD32_SYS_jail_remove_jd    598
+#define        FREEBSD32_SYS_MAXSYSCALL        599
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c 
b/sys/compat/freebsd32/freebsd32_syscalls.c
index f0f8d26554b5..f7cc4c284e4d 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -602,4 +602,6 @@ const char *freebsd32_syscallnames[] = {
        "inotify_rm_watch",                     /* 594 = inotify_rm_watch */
        "getgroups",                    /* 595 = getgroups */
        "setgroups",                    /* 596 = setgroups */
+       "jail_attach_jd",                       /* 597 = jail_attach_jd */
+       "jail_remove_jd",                       /* 598 = jail_remove_jd */
 };
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c 
b/sys/compat/freebsd32/freebsd32_sysent.c
index 12f1a346c3e9..18f809ef04e3 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -664,4 +664,6 @@ struct sysent freebsd32_sysent[] = {
        { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t 
*)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },      /* 594 = inotify_rm_watch */
        { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, 
.sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = 
SY_THR_STATIC },  /* 595 = getgroups */
        { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, 
.sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },       
/* 596 = setgroups */
+       { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t 
*)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt 
= SY_THR_STATIC },   /* 597 = jail_attach_jd */
+       { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t 
*)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt 
= SY_THR_STATIC },   /* 598 = jail_remove_jd */
 };
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c 
b/sys/compat/freebsd32/freebsd32_systrace_args.c
index e471c5148021..29a5497e9efa 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3413,6 +3413,20 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, 
int *n_args)
                *n_args = 2;
                break;
        }
+       /* jail_attach_jd */
+       case 597: {
+               struct jail_attach_jd_args *p = params;
+               iarg[a++] = p->fd; /* int */
+               *n_args = 1;
+               break;
+       }
+       /* jail_remove_jd */
+       case 598: {
+               struct jail_remove_jd_args *p = params;
+               iarg[a++] = p->fd; /* int */
+               *n_args = 1;
+               break;
+       }
        default:
                *n_args = 0;
                break;
@@ -9222,6 +9236,26 @@ systrace_entry_setargdesc(int sysnum, int ndx, char 
*desc, size_t descsz)
                        break;
                };
                break;
+       /* jail_attach_jd */
+       case 597:
+               switch (ndx) {
+               case 0:
+                       p = "int";
+                       break;
+               default:
+                       break;
+               };
+               break;
+       /* jail_remove_jd */
+       case 598:
+               switch (ndx) {
+               case 0:
+                       p = "int";
+                       break;
+               default:
+                       break;
+               };
+               break;
        default:
                break;
        };
@@ -11130,6 +11164,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char 
*desc, size_t descsz)
                if (ndx == 0 || ndx == 1)
                        p = "int";
                break;
+       /* jail_attach_jd */
+       case 597:
+               if (ndx == 0 || ndx == 1)
+                       p = "int";
+               break;
+       /* jail_remove_jd */
+       case 598:
+               if (ndx == 0 || ndx == 1)
+                       p = "int";
+               break;
        default:
                break;
        };
diff --git a/sys/conf/files b/sys/conf/files
index d89813c70355..9661bafea8f9 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3808,6 +3808,7 @@ kern/kern_hhook.c         standard
 kern/kern_idle.c               standard
 kern/kern_intr.c               standard
 kern/kern_jail.c               standard
+kern/kern_jaildesc.c           standard
 kern/kern_jailmeta.c           standard
 kern/kern_kcov.c               optional kcov                   \
        compile-with "${NOSAN_C} ${MSAN_CFLAGS}"
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index fcd232cde21e..e42e7dcf8b44 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -663,4 +663,6 @@ struct sysent sysent[] = {
        { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t 
*)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, 
.sy_thrcnt = SY_THR_STATIC },      /* 594 = inotify_rm_watch */
        { .sy_narg = AS(getgroups_args), .sy_call = (sy_call_t *)sys_getgroups, 
.sy_auevent = AUE_GETGROUPS, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = 
SY_THR_STATIC },  /* 595 = getgroups */
        { .sy_narg = AS(setgroups_args), .sy_call = (sy_call_t *)sys_setgroups, 
.sy_auevent = AUE_SETGROUPS, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC },       
/* 596 = setgroups */
+       { .sy_narg = AS(jail_attach_jd_args), .sy_call = (sy_call_t 
*)sys_jail_attach_jd, .sy_auevent = AUE_JAIL_ATTACH, .sy_flags = 0, .sy_thrcnt 
= SY_THR_STATIC },   /* 597 = jail_attach_jd */
+       { .sy_narg = AS(jail_remove_jd_args), .sy_call = (sy_call_t 
*)sys_jail_remove_jd, .sy_auevent = AUE_JAIL_REMOVE, .sy_flags = 0, .sy_thrcnt 
= SY_THR_STATIC },   /* 598 = jail_remove_jd */
 };
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index a27ab33b34da..057235574eb5 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -5250,6 +5250,8 @@ file_type_to_name(short type)
                return ("eventfd");
        case DTYPE_TIMERFD:
                return ("timerfd");
+       case DTYPE_JAILDESC:
+               return ("jail");
        default:
                return ("unkn");
        }
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 52210553016b..5a1fbe23ddeb 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -39,6 +39,7 @@
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
+#include <sys/file.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/osd.h>
@@ -49,6 +50,7 @@
 #include <sys/taskqueue.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
+#include <sys/jaildesc.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
@@ -988,6 +990,8 @@ prison_ip_cnt(const struct prison *pr, const pr_family_t af)
 int
 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 {
+       struct file *jfp_out;
+       struct jaildesc *desc_in;
        struct nameidata nd;
 #ifdef INET
        struct prison_ip *ip4;
@@ -998,6 +1002,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
        struct vfsopt *opt;
        struct vfsoptlist *opts;
        struct prison *pr, *deadpr, *dinspr, *inspr, *mypr, *ppr, *tpr;
+       struct ucred *jdcred;
        struct vnode *root;
        char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
        char *g_path, *osrelstr;
@@ -1011,7 +1016,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
        int created, cuflags, descend, drflags, enforce;
        int error, errmsg_len, errmsg_pos;
        int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
-       int deadid, jid, jsys, len, level;
+       int deadid, jfd_in, jfd_out, jfd_pos, jid, jsys, len, level;
        int childmax, osreldt, rsnum, slevel;
 #ifdef INET
        int ip4s;
@@ -1027,17 +1032,26 @@ kern_jail_set(struct thread *td, struct uio *optuio, 
int flags)
        unsigned tallow;
        char numbuf[12];
 
-       error = priv_check(td, PRIV_JAIL_SET);
-       if (!error && (flags & JAIL_ATTACH))
-               error = priv_check(td, PRIV_JAIL_ATTACH);
-       if (error)
-               return (error);
        mypr = td->td_ucred->cr_prison;
-       if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
+       if (((flags & (JAIL_CREATE | JAIL_AT_DESC)) == JAIL_CREATE)
+           && mypr->pr_childmax == 0)
                return (EPERM);
        if (flags & ~JAIL_SET_MASK)
                return (EINVAL);
+       if ((flags & (JAIL_USE_DESC | JAIL_AT_DESC))
+           == (JAIL_USE_DESC | JAIL_AT_DESC))
+               return (EINVAL);
+       prison_hold(mypr);
 
+#ifdef INET
+       ip4 = NULL;
+#endif
+#ifdef INET6
+       ip6 = NULL;
+#endif
+       g_path = NULL;
+       jfp_out = NULL;
+       jfd_out = -1;
        /*
         * Check all the parameters before committing to anything.  Not all
         * errors can be caught early, but we may as well try.  Also, this
@@ -1050,14 +1064,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
         */
        error = vfs_buildopts(optuio, &opts);
        if (error)
-               return (error);
-#ifdef INET
-       ip4 = NULL;
-#endif
-#ifdef INET6
-       ip6 = NULL;
-#endif
-       g_path = NULL;
+               goto done_free;
 
        cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
        if (!cuflags) {
@@ -1066,6 +1073,72 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                goto done_errmsg;
        }
 
+       error = vfs_copyopt(opts, "desc", &jfd_in, sizeof(jfd_in));
+       if (error == ENOENT) {
+               if (flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+                   JAIL_OWN_DESC)) {
+                       vfs_opterror(opts, "missing desc");
+                       goto done_errmsg;
+               }
+               jfd_in = -1;
+       } else if (error != 0)
+               goto done_free;
+       else {
+               if (!(flags & (JAIL_USE_DESC | JAIL_AT_DESC | JAIL_GET_DESC |
+                   JAIL_OWN_DESC))) {
+                       vfs_opterror(opts, "unexpected desc");
+                       goto done_errmsg;
+               }
+               if (flags & JAIL_AT_DESC) {
+                       /*
+                        * Look up and create jails based on the
+                        * descriptor's prison.
+                        */
+                       prison_free(mypr);
+                       error = jaildesc_find(td, jfd_in, &desc_in, &mypr,
+                           NULL);
+                       if (error != 0) {
+                               vfs_opterror(opts, error == ENOENT
+                                   ? "descriptor to dead jail"
+                                   : "not a jail descriptor");
+                               goto done_errmsg;
+                       }
+                       /*
+                        * Check file permissions using the current
+                        * credentials, and operation permissions
+                        * using the descriptor's credentials.
+                        */
+                       error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+                           desc_in->jd_gid, VEXEC, td->td_ucred);
+                       JAILDESC_UNLOCK(desc_in);
+                       if (error != 0)
+                               goto done_free;
+                       if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) {
+                               error = EPERM;
+                               goto done_free;
+                       }
+               }
+               if (flags & (JAIL_GET_DESC | JAIL_OWN_DESC)) {
+                       /* Allocate a jail descriptor to return later. */
+                       error = jaildesc_alloc(td, &jfp_out, &jfd_out,
+                               flags & JAIL_OWN_DESC);
+                       if (error)
+                               goto done_free;
+               }
+       }
+
+       /*
+        * Delay the permission check if using a jail descriptor,
+        * until we get the descriptor's credentials.
+        */
+       if (!(flags & JAIL_USE_DESC)) {
+               error = priv_check(td, PRIV_JAIL_SET);
+               if (error == 0 && (flags & JAIL_ATTACH))
+                       error = priv_check(td, PRIV_JAIL_ATTACH);
+               if (error)
+                       goto done_free;
+       }
+
        error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
        if (error == ENOENT)
                jid = 0;
@@ -1441,7 +1514,57 @@ kern_jail_set(struct thread *td, struct uio *optuio, int 
flags)
                error = EAGAIN;
                goto done_deref;
        }
-       if (jid != 0) {
+       if (flags & JAIL_USE_DESC) {
+               /* Get the jail from its descriptor. */
+               error = jaildesc_find(td, jfd_in, &desc_in, &pr, &jdcred);
+               if (error) {
+                       vfs_opterror(opts, error == ENOENT
+                           ? "descriptor to dead jail"
+                           : "not a jail descriptor");
+                       goto done_deref;
+               }
+               drflags |= PD_DEREF;
+               /*
+                * Check file permissions using the current credentials,
+                * and operation permissions using the descriptor's
+                * credentials.
+                */
+               error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+                   desc_in->jd_gid, VWRITE, td->td_ucred);
+               if (error == 0 && (flags & JAIL_ATTACH))
+                       error = vaccess(VREG, desc_in->jd_mode, desc_in->jd_uid,
+                           desc_in->jd_gid, VEXEC, td->td_ucred);
+               JAILDESC_UNLOCK(desc_in);
+               if (error == 0)
+                       error = priv_check_cred(jdcred, PRIV_JAIL_SET);
+               if (error == 0 && (flags & JAIL_ATTACH))
+                       error = priv_check_cred(jdcred, PRIV_JAIL_ATTACH);
+               crfree(jdcred);
+               if (error)
+                       goto done_deref;
+               mtx_lock(&pr->pr_mtx);
+               drflags |= PD_LOCKED;
+               if (cuflags == JAIL_CREATE) {
+                       error = EEXIST;
+                       vfs_opterror(opts, "jail %d already exists",
+                           pr->pr_id);
+                       goto done_deref;
+               }
+               if (!prison_isalive(pr)) {
+                       /* While a jid can be resurrected, the prison
+                        * itself cannot.
+                        */
+                       error = ENOENT;
+                       vfs_opterror(opts, "jail %d is dying", pr->pr_id);
*** 1065 LINES SKIPPED ***

Reply via email to