The branch releng/14.3 has been updated by markj:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=aaa0c2247f4c1466ca90b58650381e318f8be8fc

commit aaa0c2247f4c1466ca90b58650381e318f8be8fc
Author:     Mark Johnston <[email protected]>
AuthorDate: 2025-06-24 20:17:07 +0000
Commit:     Mark Johnston <[email protected]>
CommitDate: 2026-02-23 16:00:13 +0000

    file: Add a fd flag with O_RESOLVE_BENEATH semantics
    
    The O_RESOLVE_BENEATH openat(2) flag restricts name lookups such that
    they remain under the directory referenced by the dirfd.  This commit
    introduces an implicit version of the flag, FD_RESOLVE_BENEATH, stored
    in the file descriptor entry.  When the flag is set, any lookup relative
    to that fd automatically has O_RESOLVE_BENEATH semantics.  Furthermore,
    the flag is sticky, meaning that it cannot be cleared, and it is copied
    by dup() and openat().
    
    File descriptors with FD_RESOLVE_BENEATH set may not be passed to
    fchdir(2) or fchroot(2).  Various fd lookup routines are modified to
    return fd flags to the caller.
    
    This flag will be used to address a case where jails with different root
    directories and the ability to pass SCM_RIGHTS messages across the jail
    boundary can transfer directory fds in such as way as to allow a
    filesystem escape.
    
    Approved by:    so
    PR:             262180
    Reviewed by:    kib
    MFC after:      3 weeks
    Differential Revision:  https://reviews.freebsd.org/D50371
    
    (cherry picked from commit f35525ff2053e026a423e852136d73ed93c95803)
    (cherry picked from commit 10b3f2138573da952b4db29f88f6d67cfc3300cd)
---
 sys/fs/fdescfs/fdesc_vnops.c |  4 +-
 sys/kern/kern_descrip.c      | 97 ++++++++++++++++++++++++++++++++------------
 sys/kern/uipc_syscalls.c     |  2 +-
 sys/kern/vfs_acl.c           |  4 +-
 sys/kern/vfs_cache.c         | 14 +++++--
 sys/kern/vfs_extattr.c       |  8 ++--
 sys/kern/vfs_syscalls.c      | 21 +++++++---
 sys/sys/file.h               |  2 +-
 sys/sys/filedesc.h           |  8 +++-
 sys/sys/namei.h              |  1 +
 10 files changed, 113 insertions(+), 48 deletions(-)

diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 9c451aa9247a..47fb387a007e 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -504,7 +504,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
                    cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
        } else {
                error = getvnode_path(td, fd,
-                   cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+                   cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
        }
        if (error) {
                /*
@@ -641,7 +641,7 @@ fdesc_readlink(struct vop_readlink_args *va)
        VOP_UNLOCK(vn);
 
        td = curthread;
-       error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL);
+       error = fget_cap(td, fd_fd, &cap_no_rights, NULL, &fp, NULL);
        if (error != 0)
                goto out;
 
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 8b391da95840..1932336ded28 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -111,8 +111,8 @@ static void fdgrowtable_exp(struct filedesc *fdp, int nfd);
 static void    fdunused(struct filedesc *fdp, int fd);
 static void    fdused(struct filedesc *fdp, int fd);
 static int     fget_unlocked_seq(struct thread *td, int fd,
-                   const cap_rights_t *needrightsp, struct file **fpp,
-                   seqc_t *seqp);
+                   const cap_rights_t *needrightsp, uint8_t *flagsp,
+                   struct file **fpp, seqc_t *seqp);
 static int     getmaxfd(struct thread *td);
 static u_long  *filecaps_copy_prep(const struct filecaps *src);
 static void    filecaps_copy_finish(const struct filecaps *src,
@@ -480,6 +480,8 @@ kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long 
arg)
        return (error);
 }
 
+#define        FD_RESOLVE_BENEATH      2
+
 int
 kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
 {
@@ -529,7 +531,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
                fde = fdeget_noref(fdp, fd);
                if (fde != NULL) {
                        td->td_retval[0] =
-                           (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+                           ((fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0) |
+                           ((fde->fde_flags & UF_RESOLVE_BENEATH) ?
+                           FD_RESOLVE_BENEATH : 0);
                        error = 0;
                }
                FILEDESC_SUNLOCK(fdp);
@@ -540,8 +544,13 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t 
arg)
                FILEDESC_XLOCK(fdp);
                fde = fdeget_noref(fdp, fd);
                if (fde != NULL) {
+                       /*
+                        * UF_RESOLVE_BENEATH is sticky and cannot be cleared.
+                        */
                        fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
-                           (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+                           ((arg & FD_CLOEXEC) != 0 ? UF_EXCLOSE : 0) |
+                           ((arg & FD_RESOLVE_BENEATH) != 0 ?
+                           UF_RESOLVE_BENEATH : 0);
                        error = 0;
                }
                FILEDESC_XUNLOCK(fdp);
@@ -2166,7 +2175,8 @@ _finstall(struct filedesc *fdp, struct file *fp, int fd, 
int flags,
        seqc_write_begin(&fde->fde_seqc);
 #endif
        fde->fde_file = fp;
-       fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0;
+       fde->fde_flags = ((flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0) |
+           ((flags & O_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0);
        if (fcaps != NULL)
                filecaps_move(fcaps, &fde->fde_caps);
        else
@@ -2914,7 +2924,7 @@ out:
 #ifdef CAPABILITIES
 int
 fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp, struct filecaps *havecapsp)
+    uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
 {
        struct filedesc *fdp = td->td_proc->p_fd;
        int error;
@@ -2923,7 +2933,8 @@ fget_cap(struct thread *td, int fd, const cap_rights_t 
*needrightsp,
 
        *fpp = NULL;
        for (;;) {
-               error = fget_unlocked_seq(td, fd, needrightsp, &fp, &seq);
+               error = fget_unlocked_seq(td, fd, needrightsp, flagsp, &fp,
+                   &seq);
                if (error != 0)
                        return (error);
 
@@ -2954,10 +2965,10 @@ get_locked:
 #else
 int
 fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp, struct filecaps *havecapsp)
+    uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
 {
        int error;
-       error = fget_unlocked(td, fd, needrightsp, fpp);
+       error = fget_unlocked(td, fd, needrightsp, flagsp, fpp);
        if (havecapsp != NULL && error == 0)
                filecaps_fill(havecapsp);
 
@@ -3040,7 +3051,7 @@ out:
 
 #ifdef CAPABILITIES
 int
-fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp)
 {
        const struct filedescent *fde;
        const struct fdescenttbl *fdt;
@@ -3050,7 +3061,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode 
**vpp, bool *fsearch)
        const cap_rights_t *haverights;
        cap_rights_t rights;
        seqc_t seq;
-       int fd;
+       int fd, flags;
 
        VFS_SMR_ASSERT_ENTERED();
 
@@ -3070,7 +3081,9 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode 
**vpp, bool *fsearch)
                return (EAGAIN);
        if (__predict_false(cap_check_inline_transient(haverights, &rights)))
                return (EAGAIN);
-       *fsearch = ((fp->f_flag & FSEARCH) != 0);
+       flags = fp->f_flag & FSEARCH;
+       flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
+           O_RESOLVE_BENEATH : 0;
        vp = fp->f_vnode;
        if (__predict_false(vp == NULL)) {
                return (EAGAIN);
@@ -3104,17 +3117,19 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode 
**vpp, bool *fsearch)
 #endif
        }
        *vpp = vp;
+       *flagsp = flags;
        return (0);
 }
 #else
 int
-fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp)
 {
+       const struct filedescent *fde;
        const struct fdescenttbl *fdt;
        struct filedesc *fdp;
        struct file *fp;
        struct vnode *vp;
-       int fd;
+       int fd, flags;
 
        VFS_SMR_ASSERT_ENTERED();
 
@@ -3123,9 +3138,13 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode 
**vpp, bool *fsearch)
        fdt = fdp->fd_files;
        if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
                return (EBADF);
-       fp = fdt->fdt_ofiles[fd].fde_file;
+       fde = &fdt->fdt_ofiles[fd];
+       fp = fde->fde_file;
        if (__predict_false(fp == NULL))
                return (EAGAIN);
+       flags = fp->f_flag & FSEARCH;
+       flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
+           O_RESOLVE_BENEATH : 0;
        *fsearch = ((fp->f_flag & FSEARCH) != 0);
        vp = fp->f_vnode;
        if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
@@ -3141,6 +3160,7 @@ fgetvp_lookup_smr(struct nameidata *ndp, struct vnode 
**vpp, bool *fsearch)
                return (EAGAIN);
        filecaps_fill(&ndp->ni_filecaps);
        *vpp = vp;
+       *flagsp = flags;
        return (0);
 }
 #endif
@@ -3154,13 +3174,15 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp)
        struct componentname *cnp;
        cap_rights_t rights;
        int error;
+       uint8_t flags;
 
        td = curthread;
        rights = *ndp->ni_rightsneeded;
        cap_rights_set_one(&rights, CAP_LOOKUP);
        cnp = &ndp->ni_cnd;
 
-       error = fget_cap(td, ndp->ni_dirfd, &rights, &fp, &ndp->ni_filecaps);
+       error = fget_cap(td, ndp->ni_dirfd, &rights, &flags, &fp,
+           &ndp->ni_filecaps);
        if (__predict_false(error != 0))
                return (error);
        if (__predict_false(fp->f_ops == &badfileops)) {
@@ -3178,6 +3200,10 @@ fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp)
         */
        if ((fp->f_flag & FSEARCH) != 0)
                cnp->cn_flags |= NOEXECCHECK;
+       if ((flags & UF_RESOLVE_BENEATH) != 0) {
+               cnp->cn_flags |= RBENEATH;
+               ndp->ni_resflags |= NIRES_BENEATH;
+       }
        fdrop(fp, td);
 
 #ifdef CAPABILITIES
@@ -3225,7 +3251,7 @@ out_free:
 #ifdef CAPABILITIES
 static int
 fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp, seqc_t *seqp)
+    uint8_t *flagsp, struct file **fpp, seqc_t *seqp)
 {
        struct filedesc *fdp;
        const struct filedescent *fde;
@@ -3234,6 +3260,7 @@ fget_unlocked_seq(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
        seqc_t seq;
        cap_rights_t haverights;
        int error;
+       uint8_t flags;
 
        fdp = td->td_proc->p_fd;
        fdt = fdp->fd_files;
@@ -3245,6 +3272,7 @@ fget_unlocked_seq(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
                fde = &fdt->fdt_ofiles[fd];
                haverights = *cap_rights_fde_inline(fde);
                fp = fde->fde_file;
+               flags = fde->fde_flags;
                if (__predict_false(fp == NULL)) {
                        if (seqc_consistent(fd_seqc(fdt, fd), seq))
                                return (EBADF);
@@ -3273,19 +3301,21 @@ fget_unlocked_seq(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
                fdrop(fp, td);
        }
        *fpp = fp;
-       if (seqp != NULL) {
+       if (flagsp != NULL)
+               *flagsp = flags;
+       if (seqp != NULL)
                *seqp = seq;
-       }
        return (0);
 }
 #else
 static int
 fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp, seqc_t *seqp __unused)
+    uint8_t *flagsp, struct file **fpp, seqc_t *seqp __unused)
 {
        struct filedesc *fdp;
        const struct fdescenttbl *fdt;
        struct file *fp;
+       uint8_t flags;
 
        fdp = td->td_proc->p_fd;
        fdt = fdp->fd_files;
@@ -3294,6 +3324,7 @@ fget_unlocked_seq(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
 
        for (;;) {
                fp = fdt->fdt_ofiles[fd].fde_file;
+               flags = fdt->fdt_ofiles[fd].fde_flags;
                if (__predict_false(fp == NULL))
                        return (EBADF);
                if 
(__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) {
@@ -3310,6 +3341,8 @@ fget_unlocked_seq(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
                        break;
                fdrop(fp, td);
        }
+       if (flagsp != NULL)
+               *flagsp = flags;
        *fpp = fp;
        return (0);
 }
@@ -3323,8 +3356,8 @@ fget_unlocked_seq(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
  * racing with itself.
  */
 int
-fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp)
+fget_unlocked_flags(struct thread *td, int fd, const cap_rights_t *needrightsp,
+    uint8_t *flagsp, struct file **fpp)
 {
        struct filedesc *fdp;
 #ifdef CAPABILITIES
@@ -3336,6 +3369,7 @@ fget_unlocked(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
        seqc_t seq;
        const cap_rights_t *haverights;
 #endif
+       uint8_t flags;
 
        fdp = td->td_proc->p_fd;
        fdt = fdp->fd_files;
@@ -3348,8 +3382,10 @@ fget_unlocked(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
        fde = &fdt->fdt_ofiles[fd];
        haverights = cap_rights_fde_inline(fde);
        fp = fde->fde_file;
+       flags = fde->fde_flags;
 #else
        fp = fdt->fdt_ofiles[fd].fde_file;
+       flags = fdt->fdt_ofiles[fd].fde_flags;
 #endif
        if (__predict_false(fp == NULL))
                goto out_fallback;
@@ -3373,12 +3409,21 @@ fget_unlocked(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
 #endif
                goto out_fdrop;
        *fpp = fp;
+       if (flagsp != NULL)
+               *flagsp = flags;
        return (0);
 out_fdrop:
        fdrop(fp, td);
 out_fallback:
        *fpp = NULL;
-       return (fget_unlocked_seq(td, fd, needrightsp, fpp, NULL));
+       return (fget_unlocked_seq(td, fd, needrightsp, flagsp, fpp, NULL));
+}
+
+int
+fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
+    struct file **fpp)
+{
+       return (fget_unlocked_flags(td, fd, needrightsp, NULL, fpp));
 }
 
 /*
@@ -3530,7 +3575,7 @@ fget_mmap(struct thread *td, int fd, const cap_rights_t 
*rightsp,
        fdp = td->td_proc->p_fd;
        MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
        for (;;) {
-               error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq);
+               error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq);
                if (__predict_false(error != 0))
                        return (error);
                if (__predict_false(fp->f_ops == &badfileops)) {
@@ -3585,7 +3630,7 @@ fget_fcntl(struct thread *td, int fd, const cap_rights_t 
*rightsp,
        *fpp = NULL;
        MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
        for (;;) {
-               error = fget_unlocked_seq(td, fd, rightsp, &fp, &seq);
+               error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq);
                if (error != 0)
                        return (error);
                error = cap_fcntl_check(fdp, fd, needfcntl);
@@ -3647,7 +3692,7 @@ fgetvp_rights(struct thread *td, int fd, const 
cap_rights_t *needrightsp,
        struct file *fp;
        int error;
 
-       error = fget_cap(td, fd, needrightsp, &fp, &caps);
+       error = fget_cap(td, fd, needrightsp, NULL, &fp, &caps);
        if (error != 0)
                return (error);
        if (fp->f_ops == &badfileops) {
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 4dca0522f707..86b3731468fa 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -93,7 +93,7 @@ getsock_cap(struct thread *td, int fd, const cap_rights_t 
*rightsp,
        struct file *fp;
        int error;
 
-       error = fget_cap(td, fd, rightsp, &fp, havecapsp);
+       error = fget_cap(td, fd, rightsp, NULL, &fp, havecapsp);
        if (__predict_false(error != 0))
                return (error);
        if (__predict_false(fp->f_type != DTYPE_SOCKET)) {
diff --git a/sys/kern/vfs_acl.c b/sys/kern/vfs_acl.c
index 58b950a04cd1..b4b1285400d3 100644
--- a/sys/kern/vfs_acl.c
+++ b/sys/kern/vfs_acl.c
@@ -435,7 +435,7 @@ sys___acl_get_fd(struct thread *td, struct 
__acl_get_fd_args *uap)
 
        AUDIT_ARG_FD(uap->filedes);
        error = getvnode_path(td, uap->filedes,
-           cap_rights_init_one(&rights, CAP_ACL_GET), &fp);
+           cap_rights_init_one(&rights, CAP_ACL_GET), NULL, &fp);
        if (error == 0) {
                error = vacl_get_acl(td, fp->f_vnode, uap->type, uap->aclp);
                fdrop(fp, td);
@@ -570,7 +570,7 @@ sys___acl_aclcheck_fd(struct thread *td, struct 
__acl_aclcheck_fd_args *uap)
 
        AUDIT_ARG_FD(uap->filedes);
        error = getvnode_path(td, uap->filedes,
-           cap_rights_init_one(&rights, CAP_ACL_CHECK), &fp);
+           cap_rights_init_one(&rights, CAP_ACL_CHECK), NULL, &fp);
        if (error == 0) {
                error = vacl_aclcheck(td, fp->f_vnode, uap->type, uap->aclp);
                fdrop(fp, td);
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 55f3ef8da8c3..afe090795e37 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -4445,17 +4445,23 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct 
vnode **vpp)
 {
        struct nameidata *ndp;
        struct componentname *cnp;
-       int error;
-       bool fsearch;
+       int error, flags;
 
        ndp = fpl->ndp;
        cnp = fpl->cnp;
 
-       error = fgetvp_lookup_smr(ndp, vpp, &fsearch);
+       error = fgetvp_lookup_smr(ndp, vpp, &flags);
        if (__predict_false(error != 0)) {
                return (cache_fpl_aborted(fpl));
        }
-       fpl->fsearch = fsearch;
+       if (__predict_false((flags & O_RESOLVE_BENEATH) != 0)) {
+               _Static_assert((CACHE_FPL_SUPPORTED_CN_FLAGS & RBENEATH) == 0,
+                   "RBENEATH supported by fplookup");
+               cache_fpl_smr_exit(fpl);
+               cache_fpl_aborted(fpl);
+               return (EOPNOTSUPP);
+       }
+       fpl->fsearch = (flags & FSEARCH) != 0;
        if ((*vpp)->v_type != VDIR) {
                if (!((cnp->cn_flags & EMPTYPATH) != 0 && cnp->cn_pnbuf[0] == 
'\0')) {
                        cache_fpl_smr_exit(fpl);
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index dc1bbb58644b..831090f02f90 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -254,7 +254,7 @@ kern_extattr_set_fd(struct thread *td, int fd, int 
attrnamespace,
        AUDIT_ARG_TEXT(attrname);
 
        error = getvnode_path(td, fd,
-           cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+           cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
        if (error)
                return (error);
 
@@ -442,7 +442,7 @@ kern_extattr_get_fd(struct thread *td, int fd, int 
attrnamespace,
        AUDIT_ARG_TEXT(attrname);
 
        error = getvnode_path(td, fd,
-           cap_rights_init_one(&rights, CAP_EXTATTR_GET), &fp);
+           cap_rights_init_one(&rights, CAP_EXTATTR_GET), NULL, &fp);
        if (error)
                return (error);
 
@@ -598,7 +598,7 @@ kern_extattr_delete_fd(struct thread *td, int fd, int 
attrnamespace,
        AUDIT_ARG_TEXT(attrname);
 
        error = getvnode_path(td, fd,
-           cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), &fp);
+           cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), NULL, &fp);
        if (error)
                return (error);
 
@@ -765,7 +765,7 @@ kern_extattr_list_fd(struct thread *td, int fd, int 
attrnamespace,
        AUDIT_ARG_FD(fd);
        AUDIT_ARG_VALUE(attrnamespace);
        error = getvnode_path(td, fd,
-           cap_rights_init_one(&rights, CAP_EXTATTR_LIST), &fp);
+           cap_rights_init_one(&rights, CAP_EXTATTR_LIST), NULL, &fp);
        if (error)
                return (error);
 
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 670ceb151468..ded3a87e7f8b 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -375,7 +375,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
        int error;
 
        AUDIT_ARG_FD(fd);
-       error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
+       error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp);
        if (error != 0)
                return (error);
        vp = fp->f_vnode;
@@ -898,12 +898,17 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap)
        struct mount *mp;
        struct file *fp;
        int error;
+       uint8_t fdflags;
 
        AUDIT_ARG_FD(uap->fd);
-       error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
+       error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags,
            &fp);
        if (error != 0)
                return (error);
+       if ((fdflags & UF_RESOLVE_BENEATH) != 0) {
+               fdrop(fp, td);
+               return (ENOTCAPABLE);
+       }
        vp = fp->f_vnode;
        vrefact(vp);
        fdrop(fp, td);
@@ -1252,6 +1257,10 @@ success:
                else
 #endif
                        fcaps = NULL;
+               if ((nd.ni_resflags & NIRES_BENEATH) != 0)
+                       flags |= O_RESOLVE_BENEATH;
+               else
+                       flags &= ~O_RESOLVE_BENEATH;
                error = finstall_refed(td, fp, &indx, flags, fcaps);
                /* On success finstall_refed() consumes fcaps. */
                if (error != 0) {
@@ -1939,7 +1948,7 @@ kern_funlinkat(struct thread *td, int dfd, const char 
*path, int fd,
 
        fp = NULL;
        if (fd != FD_NONE) {
-               error = getvnode_path(td, fd, &cap_no_rights, &fp);
+               error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp);
                if (error != 0)
                        return (error);
        }
@@ -4326,12 +4335,12 @@ out:
  */
 int
 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp,
-    struct file **fpp)
+    uint8_t *flagsp, struct file **fpp)
 {
        struct file *fp;
        int error;
 
-       error = fget_unlocked(td, fd, rightsp, &fp);
+       error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp);
        if (error != 0)
                return (error);
 
@@ -4368,7 +4377,7 @@ getvnode(struct thread *td, int fd, const cap_rights_t 
*rightsp,
 {
        int error;
 
-       error = getvnode_path(td, fd, rightsp, fpp);
+       error = getvnode_path(td, fd, rightsp, NULL, fpp);
        if (__predict_false(error != 0))
                return (error);
 
diff --git a/sys/sys/file.h b/sys/sys/file.h
index bad161d5d46b..7d1a191b4374 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -300,7 +300,7 @@ int fgetvp_read(struct thread *td, int fd, const 
cap_rights_t *rightsp,
     struct vnode **vpp);
 int fgetvp_write(struct thread *td, int fd, const cap_rights_t *rightsp,
     struct vnode **vpp);
-int fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, bool 
*fsearch);
+int fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp);
 int fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp);
 
 static __inline __result_use_check bool
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index ecb8c58e5d29..440c5d3d15f9 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -150,6 +150,7 @@ struct filedesc_to_leader {
  * Per-process open flags.
  */
 #define        UF_EXCLOSE      0x01            /* auto-close on exec */
+#define        UF_RESOLVE_BENEATH 0x02         /* lookups must be beneath this 
dir */
 
 #ifdef _KERNEL
 
@@ -280,17 +281,20 @@ struct filedesc_to_leader *
 int    getvnode(struct thread *td, int fd, const cap_rights_t *rightsp,
            struct file **fpp);
 int    getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp,
-           struct file **fpp);
+           uint8_t *flagsp, struct file **fpp);
 void   mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
 
 int    fget_cap_noref(struct filedesc *fdp, int fd,
            const cap_rights_t *needrightsp, struct file **fpp,
            struct filecaps *havecapsp);
 int    fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
-           struct file **fpp, struct filecaps *havecapsp);
+           uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp);
 /* Return a referenced file from an unlocked descriptor. */
 int    fget_unlocked(struct thread *td, int fd,
            const cap_rights_t *needrightsp, struct file **fpp);
+int    fget_unlocked_flags(struct thread *td, int fd,
+           const cap_rights_t *needrightsp, uint8_t *flagsp,
+           struct file **fpp);
 /* Return a file pointer without a ref. FILEDESC_IS_ONLY_USER must be true.  */
 int    fget_only_user(struct filedesc *fdp, int fd,
            const cap_rights_t *needrightsp, struct file **fpp);
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 7033e5469240..dae714f97aef 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -195,6 +195,7 @@ int cache_fplookup(struct nameidata *ndp, enum 
cache_fpl_status *status,
 #define        NIRES_ABS       0x00000001 /* Path was absolute */
 #define        NIRES_STRICTREL 0x00000002 /* Restricted lookup result */
 #define        NIRES_EMPTYPATH 0x00000004 /* EMPTYPATH used */
+#define        NIRES_BENEATH   0x00000008 /* O_RESOLVE_BENEATH is to be 
inherited */
 
 /*
  * Flags in ni_lcf, valid for the duration of the namei call.

Reply via email to