Author: mjg
Date: Mon Aug 24 09:00:57 2020
New Revision: 364637
URL: https://svnweb.freebsd.org/changeset/base/364637

Log:
  cache: lockless reverse lookup
  
  This enables fully scalable operation for getcwd and significantly improves
  realpath.
  
  For example:
  PATH_CUSTOM=/usr/src ./getcwd_processes -t 104
  before:  1550851
  after: 380135380
  
  Tested by:    pho

Modified:
  head/sys/kern/vfs_cache.c

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c   Mon Aug 24 09:00:07 2020        (r364636)
+++ head/sys/kern/vfs_cache.c   Mon Aug 24 09:00:57 2020        (r364637)
@@ -85,6 +85,10 @@ SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
     "char *");
+SDT_PROBE_DEFINE2(vfs, namecache, fullpath_smr, hit, "struct vnode *",
+    "const char *");
+SDT_PROBE_DEFINE4(vfs, namecache, fullpath_smr, miss, "struct vnode *",
+    "struct namecache *", "int", "int");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
     "char *", "struct vnode *");
@@ -298,6 +302,10 @@ static u_int __read_mostly ncsize; /* the size as comp
 
 struct nchstats        nchstats;               /* cache effectiveness 
statistics */
 
+static bool __read_frequently cache_fast_revlookup = true;
+SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_revlookup, CTLFLAG_RW,
+    &cache_fast_revlookup, 0, "");
+
 static struct mtx __exclusive_cache_line       ncneg_shrink_lock;
 
 struct neglist {
@@ -477,6 +485,8 @@ STATNODE_COUNTER(shrinking_skipped,
 static void cache_zap_locked(struct namecache *ncp);
 static int vn_fullpath_hardlink(struct nameidata *ndp, char **retbuf,
     char **freebuf, size_t *buflen);
+static int vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf,
+    char **retbuf, size_t *buflen, bool slash_prefixed, size_t addend);
 static int vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf,
     char **retbuf, size_t *buflen);
 static int vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, char *buf,
@@ -2476,9 +2486,17 @@ vn_getcwd(char *buf, char **retbuf, size_t *buflen)
        struct pwd *pwd;
        int error;
 
-       pwd = pwd_hold(curthread);
-       error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, 
buflen);
-       pwd_drop(pwd);
+       vfs_smr_enter();
+       pwd = pwd_get_smr();
+       error = vn_fullpath_any_smr(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf,
+           buflen, false, 0);
+       VFS_SMR_ASSERT_NOT_ENTERED();
+       if (error < 0) {
+               pwd = pwd_hold(curthread);
+               error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf,
+                   retbuf, buflen);
+               pwd_drop(pwd);
+       }
 
 #ifdef KTRACE
        if (KTRPOINT(curthread, KTR_NAMEI) && error == 0)
@@ -2535,9 +2553,15 @@ vn_fullpath(struct vnode *vp, char **retbuf, char **fr
 
        buflen = MAXPATHLEN;
        buf = malloc(buflen, M_TEMP, M_WAITOK);
-       pwd = pwd_hold(curthread);
-       error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen);
-       pwd_drop(pwd);
+       vfs_smr_enter();
+       pwd = pwd_get_smr();
+       error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, &buflen, 
false, 0);
+       VFS_SMR_ASSERT_NOT_ENTERED();
+       if (error < 0) {
+               pwd = pwd_hold(curthread);
+               error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, 
&buflen);
+               pwd_drop(pwd);
+       }
        if (error == 0)
                *freebuf = buf;
        else
@@ -2562,7 +2586,12 @@ vn_fullpath_global(struct vnode *vp, char **retbuf, ch
                return (EINVAL);
        buflen = MAXPATHLEN;
        buf = malloc(buflen, M_TEMP, M_WAITOK);
-       error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen);
+       vfs_smr_enter();
+       error = vn_fullpath_any_smr(vp, rootvnode, buf, retbuf, &buflen, false, 
0);
+       VFS_SMR_ASSERT_NOT_ENTERED();
+       if (error < 0) {
+               error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen);
+       }
        if (error == 0)
                *freebuf = buf;
        else
@@ -2769,7 +2798,145 @@ vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, 
  * - namecache is not mandatory, meaning names are not guaranteed to be added
  *   (in which case resolving fails)
  */
+static void __inline
+cache_rev_failed_impl(int *reason, int line)
+{
+
+       *reason = line;
+}
+#define cache_rev_failed(var)  cache_rev_failed_impl((var), __LINE__)
+
 static int
+vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf,
+    char **retbuf, size_t *buflen, bool slash_prefixed, size_t addend)
+{
+#ifdef KDTRACE_HOOKS
+       struct vnode *startvp = vp;
+#endif
+       struct vnode *tvp;
+       struct mount *mp;
+       struct namecache *ncp;
+       size_t orig_buflen;
+       int reason;
+       int error;
+#ifdef KDTRACE_HOOKS
+       int i;
+#endif
+       seqc_t vp_seqc, tvp_seqc;
+       u_char nc_flag;
+
+       VFS_SMR_ASSERT_ENTERED();
+
+       if (!cache_fast_revlookup) {
+               vfs_smr_exit();
+               return (-1);
+       }
+
+       orig_buflen = *buflen;
+
+       MPASS(*buflen >= 2);
+
+       if (!slash_prefixed) {
+               MPASS(*buflen >= 2);
+               *buflen -= 1;
+               buf[*buflen] = '\0';
+       }
+
+       if (vp == rdir || vp == rootvnode) {
+               if (!slash_prefixed) {
+                       *buflen -= 1;
+                       buf[*buflen] = '/';
+               }
+               goto out_ok;
+       }
+
+#ifdef KDTRACE_HOOKS
+       i = 0;
+#endif
+       error = -1;
+       vp_seqc = vn_seqc_read_any(vp);
+       if (seqc_in_modify(vp_seqc)) {
+               cache_rev_failed(&reason);
+               goto out_abort;
+       }
+
+       for (;;) {
+#ifdef KDTRACE_HOOKS
+               i++;
+#endif
+               if ((vp->v_vflag & VV_ROOT) != 0) {
+                       mp = atomic_load_ptr(&vp->v_mount);
+                       if (mp == NULL) {
+                               cache_rev_failed(&reason);
+                               goto out_abort;
+                       }
+                       tvp = atomic_load_ptr(&mp->mnt_vnodecovered);
+                       tvp_seqc = vn_seqc_read_any(tvp);
+                       if (seqc_in_modify(tvp_seqc)) {
+                               cache_rev_failed(&reason);
+                               goto out_abort;
+                       }
+                       if (!vn_seqc_consistent(vp, vp_seqc)) {
+                               cache_rev_failed(&reason);
+                               goto out_abort;
+                       }
+                       vp = tvp;
+                       vp_seqc = tvp_seqc;
+                       continue;
+               }
+               ncp = atomic_load_ptr(&vp->v_cache_dd);
+               if (ncp == NULL) {
+                       cache_rev_failed(&reason);
+                       goto out_abort;
+               }
+               nc_flag = atomic_load_char(&ncp->nc_flag);
+               if ((nc_flag & NCF_ISDOTDOT) != 0) {
+                       cache_rev_failed(&reason);
+                       goto out_abort;
+               }
+               if (!cache_ncp_canuse(ncp)) {
+                       cache_rev_failed(&reason);
+                       goto out_abort;
+               }
+               if (ncp->nc_nlen >= *buflen) {
+                       cache_rev_failed(&reason);
+                       error = ENOMEM;
+                       goto out_abort;
+               }
+               *buflen -= ncp->nc_nlen;
+               memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
+               *buflen -= 1;
+               buf[*buflen] = '/';
+               tvp = ncp->nc_dvp;
+               tvp_seqc = vn_seqc_read_any(tvp);
+               if (seqc_in_modify(tvp_seqc)) {
+                       cache_rev_failed(&reason);
+                       goto out_abort;
+               }
+               if (!vn_seqc_consistent(vp, vp_seqc)) {
+                       cache_rev_failed(&reason);
+                       goto out_abort;
+               }
+               vp = tvp;
+               vp_seqc = tvp_seqc;
+               if (vp == rdir || vp == rootvnode)
+                       break;
+       }
+out_ok:
+       vfs_smr_exit();
+       *retbuf = buf + *buflen;
+       *buflen = orig_buflen - *buflen + addend;
+       SDT_PROBE2(vfs, namecache, fullpath_smr, hit, startvp, *retbuf);
+       return (0);
+
+out_abort:
+       *buflen = orig_buflen;
+       SDT_PROBE4(vfs, namecache, fullpath_smr, miss, startvp, ncp, reason, i);
+       vfs_smr_exit();
+       return (error);
+}
+
+static int
 vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf,
     size_t *buflen)
 {
@@ -2836,7 +3003,6 @@ vn_fullpath_hardlink(struct nameidata *ndp, char **ret
        slash_prefixed = false;
 
        buf = malloc(*buflen, M_TEMP, M_WAITOK);
-       pwd = pwd_hold(curthread);
 
        addend = 0;
        vp = ndp->ni_vp;
@@ -2880,18 +3046,25 @@ vn_fullpath_hardlink(struct nameidata *ndp, char **ret
                vp = ndp->ni_dvp;
        }
 
-       vref(vp);
-       error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen,
+       vfs_smr_enter();
+       pwd = pwd_get_smr();
+       error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, buflen,
            slash_prefixed, addend);
-       if (error != 0)
-               goto out_bad;
+       VFS_SMR_ASSERT_NOT_ENTERED();
+       if (error < 0) {
+               pwd = pwd_hold(curthread);
+               vref(vp);
+               error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen,
+                   slash_prefixed, addend);
+               pwd_drop(pwd);
+               if (error != 0)
+                       goto out_bad;
+       }
 
-       pwd_drop(pwd);
        *freebuf = buf;
 
        return (0);
 out_bad:
-       pwd_drop(pwd);
        free(buf, M_TEMP);
        return (error);
 }
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to