Author: mjg
Date: Sat Oct 10 03:48:17 2020
New Revision: 366597
URL: https://svnweb.freebsd.org/changeset/base/366597

Log:
  vfs: support lockless dirfd lookups

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/vfs_cache.c
  head/sys/sys/file.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Sat Oct 10 01:13:14 2020        
(r366596)
+++ head/sys/kern/kern_descrip.c        Sat Oct 10 03:48:17 2020        
(r366597)
@@ -2708,6 +2708,111 @@ get_locked:
        return (error);
 }
 
+#ifdef CAPABILITIES
+int
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool 
*fsearch)
+{
+       const struct filedescent *fde;
+       const struct fdescenttbl *fdt;
+       struct filedesc *fdp;
+       struct file *fp;
+       struct vnode *vp;
+       const cap_rights_t *haverights;
+       cap_rights_t rights;
+       seqc_t seq;
+
+       VFS_SMR_ASSERT_ENTERED();
+
+       rights = *ndp->ni_rightsneeded;
+       cap_rights_set_one(&rights, CAP_LOOKUP);
+
+       fdp = curproc->p_fd;
+       fdt = fdp->fd_files;
+       if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
+               return (EBADF);
+       seq = seqc_read_any(fd_seqc(fdt, fd));
+       if (__predict_false(seqc_in_modify(seq)))
+               return (EAGAIN);
+       fde = &fdt->fdt_ofiles[fd];
+       haverights = cap_rights_fde_inline(fde);
+       fp = fde->fde_file;
+       if (__predict_false(fp == NULL))
+               return (EAGAIN);
+       if (__predict_false(cap_check_inline_transient(haverights, &rights)))
+               return (EAGAIN);
+       *fsearch = ((fp->f_flag & FSEARCH) != 0);
+       vp = fp->f_vnode;
+       if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+               return (EAGAIN);
+       }
+       if (!filecaps_copy(&fde->fde_caps, &ndp->ni_filecaps, false)) {
+               return (EAGAIN);
+       }
+       /*
+        * Use an acquire barrier to force re-reading of fdt so it is
+        * refreshed for verification.
+        */
+       atomic_thread_fence_acq();
+       fdt = fdp->fd_files;
+       if (__predict_false(!seqc_consistent_nomb(fd_seqc(fdt, fd), seq)))
+               return (EAGAIN);
+       /*
+        * If file descriptor doesn't have all rights,
+        * all lookups relative to it must also be
+        * strictly relative.
+        *
+        * Not yet supported by fast path.
+        */
+       CAP_ALL(&rights);
+       if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) ||
+           ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
+           ndp->ni_filecaps.fc_nioctls != -1) {
+#ifdef notyet
+               ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
+#else
+               return (EAGAIN);
+#endif
+       }
+       *vpp = vp;
+       return (0);
+}
+#else
+int
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool 
*fsearch)
+{
+       const struct fdescenttbl *fdt;
+       struct filedesc *fdp;
+       struct file *fp;
+       struct vnode *vp;
+
+       VFS_SMR_ASSERT_ENTERED();
+
+       fdp = curproc->p_fd;
+       fdt = fdp->fd_files;
+       if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
+               return (EBADF);
+       fp = fdt->fdt_ofiles[fd].fde_file;
+       if (__predict_false(fp == NULL))
+               return (EAGAIN);
+       *fsearch = ((fp->f_flag & FSEARCH) != 0);
+       vp = fp->f_vnode;
+       if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
+               return (EAGAIN);
+       }
+       /*
+        * Use an acquire barrier to force re-reading of fdt so it is
+        * refreshed for verification.
+        */
+       atomic_thread_fence_acq();
+       fdt = fdp->fd_files;
+       if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file))
+               return (EAGAIN);
+       filecaps_fill(&ndp->ni_filecaps);
+       *vpp = vp;
+       return (0);
+}
+#endif
+
 int
 fget_unlocked_seq(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,
     struct file **fpp, seqc_t *seqp)

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c   Sat Oct 10 01:13:14 2020        (r366596)
+++ head/sys/kern/vfs_cache.c   Sat Oct 10 03:48:17 2020        (r366597)
@@ -3189,6 +3189,7 @@ struct cache_fpl {
        int line;
        enum cache_fpl_status status:8;
        bool in_smr;
+       bool fsearch;
 };
 
 static void
@@ -3346,10 +3347,6 @@ cache_can_fplookup(struct cache_fpl *fpl)
                cache_fpl_aborted(fpl);
                return (false);
        }
-       if (ndp->ni_dirfd != AT_FDCWD) {
-               cache_fpl_aborted(fpl);
-               return (false);
-       }
        if (IN_CAPABILITY_MODE(td)) {
                cache_fpl_aborted(fpl);
                return (false);
@@ -3365,6 +3362,23 @@ cache_can_fplookup(struct cache_fpl *fpl)
        return (true);
 }
 
+static int
+cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
+{
+       struct nameidata *ndp;
+       int error;
+       bool fsearch;
+
+       ndp = fpl->ndp;
+       error = fgetvp_lookup_smr(ndp->ni_dirfd, ndp, vpp, &fsearch);
+       if (__predict_false(error != 0)) {
+               cache_fpl_smr_exit(fpl);
+               return (cache_fpl_aborted(fpl));
+       }
+       fpl->fsearch = fsearch;
+       return (0);
+}
+
 static bool
 cache_fplookup_vnode_supported(struct vnode *vp)
 {
@@ -4046,9 +4060,11 @@ cache_fplookup_parse_advance(struct cache_fpl *fpl)
 static int __noinline
 cache_fplookup_failed_vexec(struct cache_fpl *fpl, int error)
 {
+       struct componentname *cnp;
        struct vnode *dvp;
        seqc_t dvp_seqc;
 
+       cnp = fpl->cnp;
        dvp = fpl->dvp;
        dvp_seqc = fpl->dvp_seqc;
 
@@ -4070,6 +4086,32 @@ cache_fplookup_failed_vexec(struct cache_fpl *fpl, int
                error = ENOTDIR;
        }
 
+       /*
+        * Hack: handle O_SEARCH.
+        *
+        * Open Group Base Specifications Issue 7, 2018 edition states:
+        * If the access mode of the open file description associated with the
+        * file descriptor is not O_SEARCH, the function shall check whether
+        * directory searches are permitted using the current permissions of
+        * the directory underlying the file descriptor. If the access mode is
+        * O_SEARCH, the function shall not perform the check.
+        *
+        * Regular lookup tests for the NOEXECCHECK flag for every path
+        * component to decide whether to do the permission check. However,
+        * since most lookups never have the flag (and when they do it is only
+        * present for the first path component), lockless lookup only acts on
+        * it if there is a permission problem. Here the flag is represented
+        * with a boolean so that we don't have to clear it on the way out.
+        *
+        * For simplicity this always aborts.
+        * TODO: check if this is the first lookup and ignore the permission
+        * problem. Note the flag has to survive fallback (if it happens to be
+        * performed).
+        */
+       if (fpl->fsearch) {
+               return (cache_fpl_aborted(fpl));
+       }
+
        switch (error) {
        case EAGAIN:
                if (!vn_seqc_consistent(dvp, dvp_seqc)) {
@@ -4308,6 +4350,7 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_s
        cache_fpl_checkpoint(&fpl, &orig);
 
        cache_fpl_smr_enter_initial(&fpl);
+       fpl.fsearch = false;
        pwd = pwd_get_smr();
        fpl.pwd = pwd;
        ndp->ni_rootdir = pwd->pwd_rdir;
@@ -4318,13 +4361,20 @@ cache_fplookup(struct nameidata *ndp, enum cache_fpl_s
        if (cnp->cn_pnbuf[0] == '/') {
                cache_fpl_handle_root(ndp, &dvp);
        } else {
-               MPASS(ndp->ni_dirfd == AT_FDCWD);
-               dvp = pwd->pwd_cdir;
+               if (ndp->ni_dirfd == AT_FDCWD) {
+                       dvp = pwd->pwd_cdir;
+               } else {
+                       error = cache_fplookup_dirfd(&fpl, &dvp);
+                       if (__predict_false(error != 0)) {
+                               goto out;
+                       }
+               }
        }
 
        SDT_PROBE4(vfs, namei, lookup, entry, dvp, cnp->cn_pnbuf, 
cnp->cn_flags, true);
 
        error = cache_fplookup_impl(dvp, &fpl);
+out:
        cache_fpl_smr_assert_not_entered(&fpl);
        SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
 

Modified: head/sys/sys/file.h
==============================================================================
--- head/sys/sys/file.h Sat Oct 10 01:13:14 2020        (r366596)
+++ head/sys/sys/file.h Sat Oct 10 03:48:17 2020        (r366597)
@@ -52,6 +52,7 @@ struct thread;
 struct uio;
 struct knote;
 struct vnode;
+struct nameidata;
 
 #endif /* _KERNEL */
 
@@ -279,6 +280,7 @@ int fgetvp_read(struct thread *td, int fd, cap_rights_
     struct vnode **vpp);
 int fgetvp_write(struct thread *td, int fd, cap_rights_t *rightsp,
     struct vnode **vpp);
+int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool 
*fsearch);
 
 static __inline __result_use_check bool
 fhold(struct file *fp)
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to