Author: mjg
Date: Sat Jul 25 00:01:57 2020
New Revision: 363503
URL: https://svnweb.freebsd.org/changeset/base/363503

Log:
  MFC r347503,r347505,r347510,r352177,r352178,r352183,r352612,r352617,
      r352631,r352810,r352811,r352812,r352813,r354893,r355124,r355125,
      r356880,r356883,r356915
  
      cache: push sdt probes in cache_zap_locked to code doing the work
      cache: bump numcache on entry, while here fix lnumcache type
      cache: fix a brainfart in r347505
      cache: assorted cleanups
      cache: change the formula for calculating lock array sizes
      cache: avoid excessive relocking on entry removal during lookup
      cache: jump in negative success instead of positive
      cache: count evictions of negatve entries
      cache: tidy up handling of negative entries
      cache: stop recalculating upper limit each time a new entry is added
      cache: make negative list shrinking a little bit concurrent
      cache: stop requeuing negative entries on the hot list
      cache: decrease ncnegfactor to 5
      cache: minor stat cleanup
      cache: fix numcache accounting on entry
      cache: stop reusing .. entries on enter
      cache: convert numcachehv to counter(9) on 64-bit platforms
      cache: counter_u64_add_protected -> counter_u64_add
      cache: make numcachehv use counter(9) on all archs

Modified:
  stable/12/sys/kern/vfs_cache.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/kern/vfs_cache.c
==============================================================================
--- stable/12/sys/kern/vfs_cache.c      Fri Jul 24 23:51:08 2020        
(r363502)
+++ stable/12/sys/kern/vfs_cache.c      Sat Jul 25 00:01:57 2020        
(r363503)
@@ -91,10 +91,10 @@ SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
-SDT_PROBE_DEFINE3(vfs, namecache, zap_negative, done, "struct vnode *",
-    "char *", "int");
-SDT_PROBE_DEFINE3(vfs, namecache, shrink_negative, done, "struct vnode *",
-    "char *", "int");
+SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
+    "char *");
+SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *",
+    "char *");
 
 /*
  * This structure describes the elements in the cache of recent
@@ -108,7 +108,6 @@ struct      namecache {
        struct  vnode *nc_dvp;          /* vnode of parent of name */
        union {
                struct  vnode *nu_vp;   /* vnode the name refers to */
-               u_int   nu_neghits;     /* negative entry hits */
        } n_un;
        u_char  nc_flag;                /* flag bits */
        u_char  nc_nlen;                /* length of name */
@@ -131,7 +130,6 @@ struct      namecache_ts {
 };
 
 #define        nc_vp           n_un.nu_vp
-#define        nc_neghits      n_un.nu_neghits
 
 /*
  * Flags in namecache.nc_flag
@@ -201,31 +199,22 @@ static __read_mostly LIST_HEAD(nchashhead, namecache) 
 static u_long __read_mostly    nchash;                 /* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
-static u_long __read_mostly    ncnegfactor = 12; /* ratio of negative entries 
*/
+static u_long __read_mostly    ncnegfactor = 5; /* ratio of negative entries */
 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
     "Ratio of negative namecache entries");
 static u_long __exclusive_cache_line   numneg; /* number of negative entries 
allocated */
-SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
-    "Number of negative entries in namecache");
 static u_long __exclusive_cache_line   numcache;/* number of cache entries 
allocated */
-SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
-    "Number of namecache entries");
-static u_long __exclusive_cache_line   numcachehv;/* number of cache entries 
with vnodes held */
-SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
-    "Number of namecache entries with vnodes held");
-u_int __read_mostly    ncsizefactor = 2;
+u_int ncsizefactor = 2;
 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
 static u_int __read_mostly     ncpurgeminvnodes;
 SYSCTL_UINT(_vfs, OID_AUTO, ncpurgeminvnodes, CTLFLAG_RW, &ncpurgeminvnodes, 0,
     "Number of vnodes below which purgevfs ignores the request");
-static u_int __read_mostly     ncneghitsrequeue = 8;
-SYSCTL_UINT(_vfs, OID_AUTO, ncneghitsrequeue, CTLFLAG_RW, &ncneghitsrequeue, 0,
-    "Number of hits to requeue a negative entry in the LRU list");
+static u_int __read_mostly     ncsize; /* the size as computed on creation or 
resizing */
 
 struct nchstats        nchstats;               /* cache effectiveness 
statistics */
 
-static struct mtx       ncneg_shrink_lock;
+static struct mtx __exclusive_cache_line       ncneg_shrink_lock;
 static int     shrink_list_turn;
 
 struct neglist {
@@ -235,6 +224,7 @@ struct neglist {
 
 static struct neglist __read_mostly    *neglists;
 static struct neglist ncneg_hot;
+static u_long numhotneg;
 
 #define        numneglists (ncneghash + 1)
 static u_int __read_mostly     ncneghash;
@@ -358,6 +348,7 @@ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 
        SYSCTL_COUNTER_U64(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 
descr);
 STATNODE_ULONG(numneg, "Number of negative cache entries");
 STATNODE_ULONG(numcache, "Number of cache entries");
+STATNODE_COUNTER(numcachehv, "Number of namecache entries with vnodes held");
 STATNODE_COUNTER(numcalls, "Number of cache lookups");
 STATNODE_COUNTER(dothits, "Number of '.' hits");
 STATNODE_COUNTER(dotdothits, "Number of '..' hits");
@@ -377,11 +368,20 @@ STATNODE_COUNTER(numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors 
(ENOMEM)");
 STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls");
+STATNODE_COUNTER(zap_and_exit_bucket_relock_success,
+    "Number of successful removals after relocking");
 static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail,
     "Number of times zap_and_exit failed to lock");
+static long zap_and_exit_bucket_fail2; 
STATNODE_ULONG(zap_and_exit_bucket_fail2,
+    "Number of times zap_and_exit failed to lock");
 static long cache_lock_vnodes_cel_3_failures;
 STATNODE_ULONG(cache_lock_vnodes_cel_3_failures,
     "Number of times 3-way vnode locking failed");
+STATNODE_ULONG(numhotneg, "Number of hot negative entries");
+STATNODE_COUNTER(numneg_evicted,
+    "Number of negative entries evicted when adding a new entry");
+STATNODE_COUNTER(shrinking_skipped,
+    "Number of times shrinking was already in progress");
 
 static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode 
*rdir,
@@ -393,7 +393,7 @@ static int cache_yield;
 SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0,
     "Number of times cache called yield");
 
-static void
+static void __noinline
 cache_maybe_yield(void)
 {
 
@@ -452,12 +452,14 @@ cache_assert_bucket_locked(struct namecache *ncp, int 
 #define cache_assert_bucket_locked(x, y) do { } while (0)
 #endif
 
-#define cache_sort(x, y)       _cache_sort((void **)(x), (void **)(y))
+#define cache_sort_vnodes(x, y)        _cache_sort_vnodes((void **)(x), (void 
**)(y))
 static void
-_cache_sort(void **p1, void **p2)
+_cache_sort_vnodes(void **p1, void **p2)
 {
        void *tmp;
 
+       MPASS(*p1 != NULL || *p2 != NULL);
+
        if (*p1 > *p2) {
                tmp = *p2;
                *p2 = *p1;
@@ -505,8 +507,7 @@ static int
 cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
-       cache_sort(&vlp1, &vlp2);
-       MPASS(vlp2 != NULL);
+       cache_sort_vnodes(&vlp1, &vlp2);
 
        if (vlp1 != NULL) {
                if (!mtx_trylock(vlp1))
@@ -522,6 +523,19 @@ cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp
 }
 
 static void
+cache_lock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
+{
+
+       MPASS(vlp1 != NULL || vlp2 != NULL);
+       MPASS(vlp1 <= vlp2);
+
+       if (vlp1 != NULL)
+               mtx_lock(vlp1);
+       if (vlp2 != NULL)
+               mtx_lock(vlp2);
+}
+
+static void
 cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
@@ -650,8 +664,6 @@ SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE
  *
  * A variation of LRU scheme is used. New entries are hashed into one of
  * numneglists cold lists. Entries get promoted to the hot list on first hit.
- * Partial LRU for the hot list is maintained by requeueing them every
- * ncneghitsrequeue hits.
  *
  * The shrinker will demote hot list head and evict from the cold list in a
  * round-robin manner.
@@ -660,30 +672,15 @@ static void
 cache_negative_hit(struct namecache *ncp)
 {
        struct neglist *neglist;
-       u_int hits;
 
        MPASS(ncp->nc_flag & NCF_NEGATIVE);
-       hits = atomic_fetchadd_int(&ncp->nc_neghits, 1);
-       if (ncp->nc_flag & NCF_HOTNEGATIVE) {
-               if ((hits % ncneghitsrequeue) != 0)
-                       return;
-               mtx_lock(&ncneg_hot.nl_lock);
-               if (ncp->nc_flag & NCF_HOTNEGATIVE) {
-                       TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
-                       TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
-                       mtx_unlock(&ncneg_hot.nl_lock);
-                       return;
-               }
-               /*
-                * The shrinker cleared the flag and removed the entry from
-                * the hot list. Put it back.
-                */
-       } else {
-               mtx_lock(&ncneg_hot.nl_lock);
-       }
+       if (ncp->nc_flag & NCF_HOTNEGATIVE)
+               return;
        neglist = NCP2NEGLIST(ncp);
+       mtx_lock(&ncneg_hot.nl_lock);
        mtx_lock(&neglist->nl_lock);
        if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
+               numhotneg++;
                TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
                TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
                ncp->nc_flag |= NCF_HOTNEGATIVE;
@@ -737,6 +734,7 @@ cache_negative_remove(struct namecache *ncp, bool neg_
        if (ncp->nc_flag & NCF_HOTNEGATIVE) {
                mtx_assert(&ncneg_hot.nl_lock, MA_OWNED);
                TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
+               numhotneg--;
        } else {
                mtx_assert(&neglist->nl_lock, MA_OWNED);
                TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
@@ -782,8 +780,11 @@ cache_negative_zap_one(void)
        struct mtx *dvlp;
        struct rwlock *blp;
 
-       if (!mtx_trylock(&ncneg_shrink_lock))
+       if (mtx_owner(&ncneg_shrink_lock) != NULL ||
+           !mtx_trylock(&ncneg_shrink_lock)) {
+               counter_u64_add(shrinking_skipped, 1);
                return;
+       }
 
        mtx_lock(&ncneg_hot.nl_lock);
        ncp = TAILQ_FIRST(&ncneg_hot.nl_list);
@@ -793,8 +794,10 @@ cache_negative_zap_one(void)
                TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
                TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
                ncp->nc_flag &= ~NCF_HOTNEGATIVE;
+               numhotneg--;
                mtx_unlock(&neglist->nl_lock);
        }
+       mtx_unlock(&ncneg_hot.nl_lock);
 
        cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
        shrink_list_turn++;
@@ -802,16 +805,14 @@ cache_negative_zap_one(void)
                shrink_list_turn = 0;
        if (ncp == NULL && shrink_list_turn == 0)
                cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
-       if (ncp == NULL) {
-               mtx_unlock(&ncneg_hot.nl_lock);
-               goto out;
-       }
+       mtx_unlock(&ncneg_shrink_lock);
+       if (ncp == NULL)
+               return;
 
        MPASS(ncp->nc_flag & NCF_NEGATIVE);
        dvlp = VP2VNODELOCK(ncp->nc_dvp);
        blp = NCP2BUCKETLOCK(ncp);
        mtx_unlock(&neglist->nl_lock);
-       mtx_unlock(&ncneg_hot.nl_lock);
        mtx_lock(dvlp);
        rw_wlock(blp);
        mtx_lock(&neglist->nl_lock);
@@ -819,18 +820,16 @@ cache_negative_zap_one(void)
        if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) ||
            blp != NCP2BUCKETLOCK(ncp2) || !(ncp2->nc_flag & NCF_NEGATIVE)) {
                ncp = NULL;
-               goto out_unlock_all;
-       }
-       SDT_PROBE3(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
-           ncp->nc_name, ncp->nc_neghits);
+       } else {
+               SDT_PROBE2(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
+                   ncp->nc_name);
 
-       cache_zap_locked(ncp, true);
-out_unlock_all:
+               cache_zap_locked(ncp, true);
+               counter_u64_add(numneg_evicted, 1);
+       }
        mtx_unlock(&neglist->nl_lock);
        rw_wunlock(blp);
        mtx_unlock(dvlp);
-out:
-       mtx_unlock(&ncneg_shrink_lock);
        cache_free(ncp);
 }
 
@@ -851,19 +850,16 @@ cache_zap_locked(struct namecache *ncp, bool neg_locke
 
        CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp,
            (ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp);
+       LIST_REMOVE(ncp, nc_hash);
        if (!(ncp->nc_flag & NCF_NEGATIVE)) {
                SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
                    ncp->nc_name, ncp->nc_vp);
-       } else {
-               SDT_PROBE3(vfs, namecache, zap_negative, done, ncp->nc_dvp,
-                   ncp->nc_name, ncp->nc_neghits);
-       }
-       LIST_REMOVE(ncp, nc_hash);
-       if (!(ncp->nc_flag & NCF_NEGATIVE)) {
                TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
                if (ncp == ncp->nc_vp->v_cache_dd)
                        ncp->nc_vp->v_cache_dd = NULL;
        } else {
+               SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
+                   ncp->nc_name);
                cache_negative_remove(ncp, neg_locked);
        }
        if (ncp->nc_flag & NCF_ISDOTDOT) {
@@ -873,7 +869,7 @@ cache_zap_locked(struct namecache *ncp, bool neg_locke
                LIST_REMOVE(ncp, nc_src);
                if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
                        ncp->nc_flag |= NCF_DVDROP;
-                       atomic_subtract_rel_long(&numcachehv, 1);
+                       counter_u64_add(numcachehv, -1);
                }
        }
        atomic_subtract_rel_long(&numcache, 1);
@@ -926,7 +922,7 @@ cache_zap_locked_vnode_kl2(struct namecache *ncp, stru
                        mtx_unlock(*vlpp);
                        *vlpp = NULL;
                }
-               cache_sort(&vlp1, &vlp2);
+               cache_sort_vnodes(&vlp1, &vlp2);
                if (vlp1 == pvlp) {
                        mtx_lock(vlp2);
                        to_unlock = vlp2;
@@ -952,7 +948,7 @@ out_relock:
        return (false);
 }
 
-static int
+static int __noinline
 cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp)
 {
        struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
@@ -971,7 +967,7 @@ cache_zap_locked_vnode(struct namecache *ncp, struct v
        blp = NCP2BUCKETLOCK(ncp);
        vlp1 = VP2VNODELOCK(ncp->nc_dvp);
        vlp2 = VP2VNODELOCK(ncp->nc_vp);
-       cache_sort(&vlp1, &vlp2);
+       cache_sort_vnodes(&vlp1, &vlp2);
        if (vlp1 == pvlp) {
                mtx_lock(vlp2);
                to_unlock = vlp2;
@@ -991,10 +987,47 @@ out:
        return (error);
 }
 
+/*
+ * If trylocking failed we can get here. We know enough to take all needed 
locks
+ * in the right order and re-lookup the entry.
+ */
 static int
-cache_zap_wlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+    struct vnode *dvp, struct mtx *dvlp, struct mtx *vlp, uint32_t hash,
+    struct rwlock *blp)
 {
+       struct namecache *rncp;
+
+       cache_assert_bucket_locked(ncp, RA_UNLOCKED);
+
+       cache_sort_vnodes(&dvlp, &vlp);
+       cache_lock_vnodes(dvlp, vlp);
+       rw_wlock(blp);
+       LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
+               if (rncp == ncp && rncp->nc_dvp == dvp &&
+                   rncp->nc_nlen == cnp->cn_namelen &&
+                   !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen))
+                       break;
+       }
+       if (rncp != NULL) {
+               cache_zap_locked(rncp, false);
+               rw_wunlock(blp);
+               cache_unlock_vnodes(dvlp, vlp);
+               counter_u64_add(zap_and_exit_bucket_relock_success, 1);
+               return (0);
+       }
+
+       rw_wunlock(blp);
+       cache_unlock_vnodes(dvlp, vlp);
+       return (EAGAIN);
+}
+
+static int __noinline
+cache_zap_wlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+    uint32_t hash, struct rwlock *blp)
+{
        struct mtx *dvlp, *vlp;
+       struct vnode *dvp;
 
        cache_assert_bucket_locked(ncp, RA_WLOCKED);
 
@@ -1009,14 +1042,17 @@ cache_zap_wlocked_bucket(struct namecache *ncp, struct
                return (0);
        }
 
+       dvp = ncp->nc_dvp;
        rw_wunlock(blp);
-       return (EAGAIN);
+       return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
-static int
-cache_zap_rlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+static int __noinline
+cache_zap_rlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+    uint32_t hash, struct rwlock *blp)
 {
        struct mtx *dvlp, *vlp;
+       struct vnode *dvp;
 
        cache_assert_bucket_locked(ncp, RA_RLOCKED);
 
@@ -1033,8 +1069,9 @@ cache_zap_rlocked_bucket(struct namecache *ncp, struct
                return (0);
        }
 
+       dvp = ncp->nc_dvp;
        rw_runlock(blp);
-       return (EAGAIN);
+       return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
 static int
@@ -1049,7 +1086,7 @@ cache_zap_wlocked_bucket_kl(struct namecache *ncp, str
        vlp = NULL;
        if (!(ncp->nc_flag & NCF_NEGATIVE))
                vlp = VP2VNODELOCK(ncp->nc_vp);
-       cache_sort(&dvlp, &vlp);
+       cache_sort_vnodes(&dvlp, &vlp);
 
        if (*vlpp1 == dvlp && *vlpp2 == vlp) {
                cache_zap_locked(ncp, false);
@@ -1196,14 +1233,13 @@ retry:
                goto out_no_entry;
        }
 
-       counter_u64_add(numposzaps, 1);
-
-       error = cache_zap_wlocked_bucket(ncp, blp);
-       if (error != 0) {
+       error = cache_zap_wlocked_bucket(ncp, cnp, hash, blp);
+       if (__predict_false(error != 0)) {
                zap_and_exit_bucket_fail++;
                cache_maybe_yield();
                goto retry;
        }
+       counter_u64_add(numposzaps, 1);
        cache_free(ncp);
        return (0);
 out_no_entry:
@@ -1323,7 +1359,7 @@ retry:
        }
 
        /* We failed to find an entry */
-       if (ncp == NULL) {
+       if (__predict_false(ncp == NULL)) {
                rw_runlock(blp);
                SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
                    NULL);
@@ -1331,35 +1367,17 @@ retry:
                return (0);
        }
 
-       /* We found a "positive" match, return the vnode */
-       if (!(ncp->nc_flag & NCF_NEGATIVE)) {
-               counter_u64_add(numposhits, 1);
-               *vpp = ncp->nc_vp;
-               CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
-                   dvp, cnp->cn_nameptr, *vpp, ncp);
-               SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
-                   *vpp);
-               cache_out_ts(ncp, tsp, ticksp);
-               goto success;
-       }
+       if (ncp->nc_flag & NCF_NEGATIVE)
+               goto negative_success;
 
-negative_success:
-       /* We found a negative match, and want to create it, so purge */
-       if (cnp->cn_nameiop == CREATE) {
-               counter_u64_add(numnegzaps, 1);
-               goto zap_and_exit;
-       }
-
-       counter_u64_add(numneghits, 1);
-       cache_negative_hit(ncp);
-       if (ncp->nc_flag & NCF_WHITE)
-               cnp->cn_flags |= ISWHITEOUT;
-       SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
-           ncp->nc_name);
+       /* We found a "positive" match, return the vnode */
+       counter_u64_add(numposhits, 1);
+       *vpp = ncp->nc_vp;
+       CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
+           dvp, cnp->cn_nameptr, *vpp, ncp);
+       SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
+           *vpp);
        cache_out_ts(ncp, tsp, ticksp);
-       cache_lookup_unlock(blp, dvlp);
-       return (ENOENT);
-
 success:
        /*
         * On success we return a locked and ref'd vnode as per the lookup
@@ -1393,13 +1411,30 @@ success:
        }
        return (-1);
 
+negative_success:
+       /* We found a negative match, and want to create it, so purge */
+       if (cnp->cn_nameiop == CREATE) {
+               counter_u64_add(numnegzaps, 1);
+               goto zap_and_exit;
+       }
+
+       counter_u64_add(numneghits, 1);
+       cache_negative_hit(ncp);
+       if (ncp->nc_flag & NCF_WHITE)
+               cnp->cn_flags |= ISWHITEOUT;
+       SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
+           ncp->nc_name);
+       cache_out_ts(ncp, tsp, ticksp);
+       cache_lookup_unlock(blp, dvlp);
+       return (ENOENT);
+
 zap_and_exit:
        if (blp != NULL)
-               error = cache_zap_rlocked_bucket(ncp, blp);
+               error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp);
        else
                error = cache_zap_locked_vnode(ncp, dvp);
-       if (error != 0) {
-               zap_and_exit_bucket_fail++;
+       if (__predict_false(error != 0)) {
+               zap_and_exit_bucket_fail2++;
                cache_maybe_yield();
                goto retry;
        }
@@ -1435,7 +1470,7 @@ cache_lock_vnodes_cel(struct celockstate *cel, struct 
 
        vlp1 = VP2VNODELOCK(vp);
        vlp2 = VP2VNODELOCK(dvp);
-       cache_sort(&vlp1, &vlp2);
+       cache_sort_vnodes(&vlp1, &vlp2);
 
        if (vlp1 != NULL) {
                mtx_lock(vlp1);
@@ -1505,7 +1540,7 @@ cache_lock_buckets_cel(struct celockstate *cel, struct
        MPASS(cel->blp[0] == NULL);
        MPASS(cel->blp[1] == NULL);
 
-       cache_sort(&blp1, &blp2);
+       cache_sort_vnodes(&blp1, &blp2);
 
        if (blp1 != NULL) {
                rw_wlock(blp1);
@@ -1619,6 +1654,33 @@ cache_enter_unlock(struct celockstate *cel)
        cache_unlock_vnodes_cel(cel);
 }
 
+static void __noinline
+cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
+    struct componentname *cnp)
+{
+       struct celockstate cel;
+       struct namecache *ncp;
+       uint32_t hash;
+       int len;
+
+       if (dvp->v_cache_dd == NULL)
+               return;
+       len = cnp->cn_namelen;
+       cache_celockstate_init(&cel);
+       hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
+       cache_enter_lock_dd(&cel, dvp, vp, hash);
+       ncp = dvp->v_cache_dd;
+       if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
+               KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
+               cache_zap_locked(ncp, false);
+       } else {
+               ncp = NULL;
+       }
+       dvp->v_cache_dd = NULL;
+       cache_enter_unlock(&cel);
+       cache_free(ncp);
+}
+
 /*
  * Add an entry to the cache.
  */
@@ -1630,12 +1692,10 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
        struct namecache *ncp, *n2, *ndd;
        struct namecache_ts *ncp_ts, *n2_ts;
        struct nchashhead *ncpp;
-       struct neglist *neglist;
        uint32_t hash;
        int flag;
        int len;
-       bool neg_locked;
-       int lnumcache;
+       u_long lnumcache;
 
        CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
        VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
@@ -1646,69 +1706,28 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
        if (__predict_false(!doingcache))
                return;
 
+       flag = 0;
+       if (__predict_false(cnp->cn_nameptr[0] == '.')) {
+               if (cnp->cn_namelen == 1)
+                       return;
+               if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
+                       cache_enter_dotdot_prep(dvp, vp, cnp);
+                       flag = NCF_ISDOTDOT;
+               }
+       }
+
        /*
         * Avoid blowout in namecache entries.
         */
-       if (__predict_false(numcache >= desiredvnodes * ncsizefactor))
+       lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
+       if (__predict_false(lnumcache >= ncsize)) {
+               atomic_add_long(&numcache, -1);
                return;
+       }
 
        cache_celockstate_init(&cel);
        ndd = NULL;
        ncp_ts = NULL;
-       flag = 0;
-       if (cnp->cn_nameptr[0] == '.') {
-               if (cnp->cn_namelen == 1)
-                       return;
-               if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
-                       len = cnp->cn_namelen;
-                       hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
-                       cache_enter_lock_dd(&cel, dvp, vp, hash);
-                       /*
-                        * If dotdot entry already exists, just retarget it
-                        * to new parent vnode, otherwise continue with new
-                        * namecache entry allocation.
-                        */
-                       if ((ncp = dvp->v_cache_dd) != NULL &&
-                           ncp->nc_flag & NCF_ISDOTDOT) {
-                               KASSERT(ncp->nc_dvp == dvp,
-                                   ("wrong isdotdot parent"));
-                               neg_locked = false;
-                               if (ncp->nc_flag & NCF_NEGATIVE || vp == NULL) {
-                                       neglist = NCP2NEGLIST(ncp);
-                                       mtx_lock(&ncneg_hot.nl_lock);
-                                       mtx_lock(&neglist->nl_lock);
-                                       neg_locked = true;
-                               }
-                               if (!(ncp->nc_flag & NCF_NEGATIVE)) {
-                                       TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
-                                           ncp, nc_dst);
-                               } else {
-                                       cache_negative_remove(ncp, true);
-                               }
-                               if (vp != NULL) {
-                                       TAILQ_INSERT_HEAD(&vp->v_cache_dst,
-                                           ncp, nc_dst);
-                                       ncp->nc_flag &= 
~(NCF_NEGATIVE|NCF_HOTNEGATIVE);
-                               } else {
-                                       ncp->nc_flag &= ~(NCF_HOTNEGATIVE);
-                                       ncp->nc_flag |= NCF_NEGATIVE;
-                                       cache_negative_insert(ncp, true);
-                               }
-                               if (neg_locked) {
-                                       mtx_unlock(&neglist->nl_lock);
-                                       mtx_unlock(&ncneg_hot.nl_lock);
-                               }
-                               ncp->nc_vp = vp;
-                               cache_enter_unlock(&cel);
-                               return;
-                       }
-                       dvp->v_cache_dd = NULL;
-                       cache_enter_unlock(&cel);
-                       cache_celockstate_init(&cel);
-                       SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
-                       flag = NCF_ISDOTDOT;
-               }
-       }
 
        /*
         * Calculate the hash key and setup as much of the new
@@ -1800,7 +1819,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
        if (flag != NCF_ISDOTDOT) {
                if (LIST_EMPTY(&dvp->v_cache_src)) {
                        vhold(dvp);
-                       atomic_add_rel_long(&numcachehv, 1);
+                       counter_u64_add(numcachehv, 1);
                }
                LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
        }
@@ -1828,7 +1847,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
                    ncp->nc_name);
        }
        cache_enter_unlock(&cel);
-       lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
        if (numneg * ncnegfactor > lnumcache)
                cache_negative_zap_one();
        cache_free(ndd);
@@ -1875,20 +1893,23 @@ nchinit(void *dummy __unused)
            NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts),
            UMA_ZONE_ZINIT);
 
+       ncsize = desiredvnodes * ncsizefactor;
        nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
-       ncbuckethash = cache_roundup_2(mp_ncpus * 64) - 1;
+       ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1;
+       if (ncbuckethash < 7) /* arbitrarily chosen to avoid having one lock */
+               ncbuckethash = 7;
        if (ncbuckethash > nchash)
                ncbuckethash = nchash;
        bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE,
            M_WAITOK | M_ZERO);
        for (i = 0; i < numbucketlocks; i++)
                rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE);
-       ncvnodehash = cache_roundup_2(mp_ncpus * 64) - 1;
+       ncvnodehash = ncbuckethash;
        vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE,
            M_WAITOK | M_ZERO);
        for (i = 0; i < numvnodelocks; i++)
                mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE);
-       ncpurgeminvnodes = numbucketlocks;
+       ncpurgeminvnodes = numbucketlocks * 2;
 
        ncneghash = 3;
        neglists = malloc(sizeof(*neglists) * numneglists, M_VFSCACHE,
@@ -1902,6 +1923,7 @@ nchinit(void *dummy __unused)
 
        mtx_init(&ncneg_shrink_lock, "ncnegs", NULL, MTX_DEF);
 
+       numcachehv = counter_u64_alloc(M_WAITOK);
        numcalls = counter_u64_alloc(M_WAITOK);
        dothits = counter_u64_alloc(M_WAITOK);
        dotdothits = counter_u64_alloc(M_WAITOK);
@@ -1917,6 +1939,9 @@ nchinit(void *dummy __unused)
        numfullpathfail2 = counter_u64_alloc(M_WAITOK);
        numfullpathfail4 = counter_u64_alloc(M_WAITOK);
        numfullpathfound = counter_u64_alloc(M_WAITOK);
+       zap_and_exit_bucket_relock_success = counter_u64_alloc(M_WAITOK);
+       numneg_evicted = counter_u64_alloc(M_WAITOK);
+       shrinking_skipped = counter_u64_alloc(M_WAITOK);
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
 
@@ -1927,8 +1952,10 @@ cache_changesize(int newmaxvnodes)
        u_long new_nchash, old_nchash;
        struct namecache *ncp;
        uint32_t hash;
+       int newncsize;
        int i;
 
+       newncsize = newmaxvnodes * ncsizefactor;
        newmaxvnodes = cache_roundup_2(newmaxvnodes * 2);
        if (newmaxvnodes < numbucketlocks)
                newmaxvnodes = numbucketlocks;
@@ -1958,6 +1985,7 @@ cache_changesize(int newmaxvnodes)
                        LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
                }
        }
+       ncsize = newncsize;
        cache_unlock_all_buckets();
        cache_unlock_all_vnodes();
        free(old_nchashtbl, M_VFSCACHE);
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to