CVS commit: [ad-namecache] src/sys

Andrew Doran Wed, 08 Jan 2020 03:03:05 -0800

Module Name:    src
Committed By:   ad
Date:           Wed Jan  8 11:02:16 UTC 2020


Modified Files:
        src/sys/kern [ad-namecache]: init_sysctl.c vfs_cache.c vfs_vnode.c
        src/sys/sys [ad-namecache]: namei.src vnode_impl.h

Log Message:
Redo the namecache to focus on per-directory data structures, removing the
huge hashtable and nasty locking scheme.

Initially this uses rbtrees (because that's what's there).  The intent is
experiment with other data structures.


To generate a diff of this commit:
cvs rdiff -u -r1.223 -r1.223.2.1 src/sys/kern/init_sysctl.c
cvs rdiff -u -r1.126 -r1.126.2.1 src/sys/kern/vfs_cache.c
cvs rdiff -u -r1.105 -r1.105.2.1 src/sys/kern/vfs_vnode.c
cvs rdiff -u -r1.47 -r1.47.2.1 src/sys/sys/namei.src
cvs rdiff -u -r1.19 -r1.19.2.1 src/sys/sys/vnode_impl.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/init_sysctl.c
diff -u src/sys/kern/init_sysctl.c:1.223 src/sys/kern/init_sysctl.c:1.223.2.1
--- src/sys/kern/init_sysctl.c:1.223	Thu Jan  2 15:42:27 2020
+++ src/sys/kern/init_sysctl.c	Wed Jan  8 11:02:16 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: init_sysctl.c,v 1.223 2020/01/02 15:42:27 thorpej Exp $ */
+/*	$NetBSD: init_sysctl.c,v 1.223.2.1 2020/01/08 11:02:16 ad Exp $ */
 
 /*-
  * Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_sysctl.c,v 1.223 2020/01/02 15:42:27 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: init_sysctl.c,v 1.223.2.1 2020/01/08 11:02:16 ad Exp $");
 
 #include "opt_sysv.h"
 #include "opt_compat_netbsd.h"
@@ -732,7 +732,6 @@ sysctl_kern_maxvnodes(SYSCTLFN_ARGS)
 		return (error);
 	}
 	vfs_reinit();
-	nchreinit();
 
 	return (0);
 }

Index: src/sys/kern/vfs_cache.c
diff -u src/sys/kern/vfs_cache.c:1.126 src/sys/kern/vfs_cache.c:1.126.2.1
--- src/sys/kern/vfs_cache.c:1.126	Mon Jan  6 11:22:33 2020
+++ src/sys/kern/vfs_cache.c	Wed Jan  8 11:02:16 2020
@@ -1,9 +1,12 @@
-/*	$NetBSD: vfs_cache.c,v 1.126 2020/01/06 11:22:33 ad Exp $	*/
+/*	$NetBSD: vfs_cache.c,v 1.126.2.1 2020/01/08 11:02:16 ad Exp $	*/
 
 /*-
- * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -57,23 +60,108 @@
  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
  */
 
+/*
+ * Name caching works as follows:
+ *
+ * 	Names found by directory scans are retained in a cache for future
+ * 	reference.  It is managed pseudo-LRU, so frequently used names will
+ * 	hang around.  Cache is indexed by directory.
+ *
+ *	Upon reaching the last segment of a path, if the reference is for
+ *	DELETE, or NOCACHE is set (rewrite), and name is located in the
+ *	cache, it will be dropped.
+ *
+ * Data structures:
+ *
+ *	The original BSD implementation used a global hash table, which
+ *	works very well on a uniprocessor system but causes performance
+ *	difficulties on a multiprocessor system.  The global hash table is
+ *	also difficult to size dynamically, and can become very large.  To
+ *	try and address these problems, the focus of interest in this
+ *	implementation is the directory itself.  A per-directory structure
+ *	is used to look up names.
+ *
+ *	XXX Currently this structure is an rbtree, but rbtrees touch many
+ *	cache lines during a lookup and so perform badly.  The intent is to
+ *	utimately make use of some other data structure, perhaps a Robin
+ *	Hood hash.  Insert blurb here when that happens.
+ *
+ * Concurrency:
+ *
+ *	There are two locks that are of particular interest:
+ *
+ *	nc_dvp->vi_nclock: a per-directory lock.  This is taken mainly
+ *	during lookups.
+ *
+ *	cache_list_lock: a global lock for all lists, including per-vnode
+ *	lists and the LRU queue.  This is taken mainly during insertion and
+ *	removal, and when operating in the list -> tree direction.
+ *
+ *	vp->v_interlock: per vnode interlock taken when acquiring a ref.
+ *
+ *	Most all modifications are made holding both cache_list_lock and the
+ *	directory lock.  nc_hittime is modified with only the directory lock
+ *	held.  See the definition of "struct namecache" in src/sys/namei.src
+ *	for the particulars.
+ *
+ *	Per-CPU statistics, and "numcache" are read unlocked, since an
+ *	approximate value is OK.  We maintain uintptr_t sized per-CPU
+ *	counters and 64-bit global counters under the theory that uintptr_t
+ *	sized counters are less likely to be hosed by nonatomic increment.
+ *
+ * Lock order:
+ *
+ *	1) nc_dvp->vi_nclock
+ *	2) cache_list_lock
+ *	3) vp->v_interlock
+ *
+ * Ugly ASCII diagram:
+ *
+ *          ...
+ *	     |
+ *	-----o-----
+ *	|  VDIR   |
+ *	|  vnode  |
+ *	-----------
+ *           ^
+ *           |- nd_tree
+ *	     |		  	       	 		 
+ *	+----+----+	  	  -----------		    -----------
+ *	|  VDIR   |		  |  VCHR   |		    |  VREG   |
+ *	|  vnode  o-----+	  |  vnode  o-----+	    |  vnode  o------+
+ *	+---------+     |	  -----------	  |	    -----------      |
+ *	     ^	        |	       ^	  |		 ^           |
+ *	     |- nc_vp   |- vi_nclist   |- nc_vp	  |- vi_nclist	 |- nc_vp    |
+ *	     |	        |	       |	  |		 |           |
+ *	-----o-----     |	  -----o-----	  |	    -----o-----      |
+ *  +---onamecache|<----+     +---onamecache|<----+	+---onamecache|<-----+
+ *  |	-----------	      |	  -----------		|   -----------
+ *  |        ^		      |	       ^   		|	 ^
+ *  |        |                |        |                |        |
+ *  |        |  +----------------------+   		|	 |
+ *  |-nc_dvp | +-------------------------------------------------+
+ *  |	     |/- nd_tree      |		   		|
+ *  |	     |		      |- nc_dvp		        |
+ *  |	-----o-----	      |		  		|
+ *  +-->|  VDIR   |<----------+			        |
+ *	|  vnode  |<------------------------------------+
+ *	-----------
+ */
+
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126 2020/01/06 11:22:33 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.1 2020/01/08 11:02:16 ad Exp $");
 
 #define __NAMECACHE_PRIVATE
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
 #include "opt_dtrace.h"
-#include "opt_revcache.h"
 #endif
 
 #include <sys/param.h>
-#include <sys/atomic.h>
 #include <sys/cpu.h>
 #include <sys/errno.h>
 #include <sys/evcnt.h>
 #include <sys/kernel.h>
-#include <sys/kthread.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
@@ -84,244 +172,42 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,
 #include <sys/time.h>
 #include <sys/vnode_impl.h>
 
-/*
- * Name caching works as follows:
- *
- * Names found by directory scans are retained in a cache
- * for future reference.  It is managed LRU, so frequently
- * used names will hang around.  Cache is indexed by hash value
- * obtained from (dvp, name) where dvp refers to the directory
- * containing name.
- *
- * Upon reaching the last segment of a path, if the reference
- * is for DELETE, or NOCACHE is set (rewrite), and the
- * name is located in the cache, it will be dropped.
- */
-
-/*
- * Cache entry lifetime:
- *
- *	nonexistent
- *	---create---> active
- *	---invalidate---> queued
- *	---reclaim---> nonexistent.
- *
- * States:
- * - Nonexistent.  Cache entry does not exist.
- *
- * - Active.  cache_lookup, cache_lookup_raw, cache_revlookup can look
- *   up, acquire references, and hand off references to vnodes,
- *   e.g. via v_interlock.  Marked by nonnull ncp->nc_dvp.
- *
- * - Queued.  Pending desstruction by cache_reclaim.  Cannot be used by
- *   cache_lookup, cache_lookup_raw, or cache_revlookup.  May still be
- *   on lists.  Marked by null ncp->nc_dvp.
- *
- * Transitions:
- *
- * - Create: nonexistent--->active
- *
- *   Done by cache_enter(dvp, vp, name, namelen, cnflags), called by
- *   VOP_LOOKUP after the answer is found.  Allocates a struct
- *   namecache object, initializes it with the above fields, and
- *   activates it by inserting it into the forward and reverse tables.
- *
- * - Invalidate: active--->queued
- *
- *   Done by cache_invalidate.  If not already invalidated, nullify
- *   ncp->nc_dvp and ncp->nc_vp, and add to cache_gcqueue.  Called,
- *   among various other places, in cache_lookup(dvp, name, namelen,
- *   nameiop, cnflags, &iswht, &vp) when MAKEENTRY is missing from
- *   cnflags.
- *
- * - Reclaim: queued--->nonexistent
- *
- *   Done by cache_reclaim.  Disassociate ncp from any lists it is on
- *   and free memory.
- */
-
-/*
- * Locking.
- *
- * L namecache_lock		Global lock for namecache table and queues.
- * C struct nchcpu::cpu_lock	Per-CPU lock to reduce read contention.
- * N struct namecache::nc_lock	Per-entry lock.
- * V struct vnode::v_interlock	Vnode interlock.
- *
- * Lock order: L -> C -> N -> V
- *
- *	Examples:
- *	. L->C: cache_reclaim
- *	. C->N->V: cache_lookup
- *	. L->N->V: cache_purge1, cache_revlookup
- *
- * All use serialized by namecache_lock:
- *
- *	nclruhead / struct namecache::nc_lru
- *	struct vnode_impl::vi_dnclist / struct namecache::nc_dvlist
- *	struct vnode_impl::vi_nclist / struct namecache::nc_vlist
- *	nchstats
- *
- * - Insertion serialized by namecache_lock,
- * - read protected by per-CPU lock,
- * - insert/read ordering guaranteed by memory barriers, and
- * - deletion allowed only under namecache_lock and *all* per-CPU locks
- *   in CPU_INFO_FOREACH order:
- *
- *	nchashtbl / struct namecache::nc_hash
- *
- *   The per-CPU locks exist only to reduce the probability of
- *   contention between readers.  We do not bind to a CPU, so
- *   contention is still possible.
- *
- * All use serialized by struct namecache::nc_lock:
- *
- *	struct namecache::nc_dvp
- *	struct namecache::nc_vp
- *	struct namecache::nc_gcqueue (*)
- *	struct namecache::nc_hittime (**)
- *
- * (*) Once on the queue, only cache_thread uses this nc_gcqueue, unlocked.
- * (**) cache_prune reads nc_hittime unlocked, since approximate is OK.
- *
- * Unlocked because stable after initialization:
- *
- *	struct namecache::nc_dvp
- *	struct namecache::nc_vp
- *	struct namecache::nc_flags
- *	struct namecache::nc_nlen
- *	struct namecache::nc_name
- *
- * Unlocked because approximation is OK:
- *
- *	struct nchcpu::cpu_stats
- *	struct nchcpu::cpu_stats_last
- *
- * Updates under namecache_lock or any per-CPU lock are marked with
- * COUNT, while updates outside those locks are marked with COUNT_UNL.
- *
- * - The theory seems to have been that you could replace COUNT_UNL by
- *   atomic operations -- except that doesn't help unless you also
- *   replace COUNT by atomic operations, because mixing atomics and
- *   nonatomics is a recipe for failure.
- * - We use 32-bit per-CPU counters and 64-bit global counters under
- *   the theory that 32-bit counters are less likely to be hosed by
- *   nonatomic increment.
- */
-
-/*
- * The comment below is preserved for posterity in case it is
- * important, but it is clear that everywhere the namecache_count_*()
- * functions are called, other cache_*() functions that take the same
- * locks are also called, so I can't imagine how this could be a
- * problem:
- *
- * N.B.: Attempting to protect COUNT_UNL() increments by taking
- * a per-cpu lock in the namecache_count_*() functions causes
- * a deadlock.  Don't do that, use atomic increments instead if
- * the imperfections here bug you.
- */
-
-/*
- * struct nchstats_percpu:
- *
- *	Per-CPU counters.
- */
-struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
-
-/*
- * struct nchcpu:
- *
- *	Per-CPU namecache state: lock and per-CPU counters.
- */
-struct nchcpu {
-	kmutex_t		cpu_lock;
-	struct nchstats_percpu	cpu_stats;
-	/* XXX maybe __cacheline_aligned would improve this? */
-	struct nchstats_percpu	cpu_stats_last;	/* from last sample */
-};
-
-/*
- * The type for the hash code. While the hash function generates a
- * u32, the hash code has historically been passed around as a u_long,
- * and the value is modified by xor'ing a uintptr_t, so it's not
- * entirely clear what the best type is. For now I'll leave it
- * unchanged as u_long.
- */
-
-typedef u_long nchash_t;
-
-/*
- * Structures associated with name cacheing.
- */
+/* Per-CPU counters. */
+struct nchstats_percpu _NAMEI_CACHE_STATS(uintptr_t);
 
-static kmutex_t *namecache_lock __read_mostly;
+/* Global lock, lists and pool. */
+static kmutex_t cache_list_lock __cacheline_aligned;
 static pool_cache_t namecache_cache __read_mostly;
 static TAILQ_HEAD(, namecache) nclruhead __cacheline_aligned;
 
-static LIST_HEAD(nchashhead, namecache) *nchashtbl __read_mostly;
-static u_long	nchash __read_mostly;
-
-#define	NCHASH2(hash, dvp)	\
-	(((hash) ^ ((uintptr_t)(dvp) >> 3)) & nchash)
-
 /* Number of cache entries allocated. */
-static long	numcache __cacheline_aligned;
-
-/* Garbage collection queue and number of entries pending in it. */
-static void	*cache_gcqueue;
-static u_int	cache_gcpend;
+static u_int	numcache __cacheline_aligned;
 
 /* Cache effectiveness statistics.  This holds total from per-cpu stats */
 struct nchstats	nchstats __cacheline_aligned;
 
-/*
- * Macros to count an event, update the central stats with per-cpu
- * values and add current per-cpu increments to the subsystem total
- * last collected by cache_reclaim().
- */
-#define	CACHE_STATS_CURRENT	/* nothing */
-
-#define	COUNT(cpup, f)	((cpup)->cpu_stats.f++)
-
-#define	UPDATE(cpup, f) do { \
-	struct nchcpu *Xcpup = (cpup); \
-	uint32_t Xcnt = (volatile uint32_t) Xcpup->cpu_stats.f; \
-	nchstats.f += Xcnt - Xcpup->cpu_stats_last.f; \
-	Xcpup->cpu_stats_last.f = Xcnt; \
-} while (/* CONSTCOND */ 0)
-
-#define	ADD(stats, cpup, f) do { \
-	struct nchcpu *Xcpup = (cpup); \
-	stats.f += Xcpup->cpu_stats.f - Xcpup->cpu_stats_last.f; \
-} while (/* CONSTCOND */ 0)
-
-/* Do unlocked stats the same way. Use a different name to allow mind changes */
-#define	COUNT_UNL(cpup, f)	COUNT((cpup), f)
+/* Macro to count an event. */
+#define	COUNT(f)	do { \
+	kpreempt_disable(); \
+	((struct nchstats_percpu *)curcpu()->ci_data.cpu_nch)->f++; \
+	kpreempt_enable(); \
+} while (/* CONSTCOND */ 0);
 
-static const int cache_lowat = 95;
-static const int cache_hiwat = 98;
+/* Tunables */
 static const int cache_hottime = 5;	/* number of seconds */
+static const int cache_maxscan = 128;	/* reclaim: max entries to scan */
 static int doingcache = 1;		/* 1 => enable the cache */
 
-static struct evcnt cache_ev_scan;
-static struct evcnt cache_ev_gc;
-static struct evcnt cache_ev_over;
-static struct evcnt cache_ev_under;
-static struct evcnt cache_ev_forced;
-
-static struct namecache *cache_lookup_entry(
-    const struct vnode *, const char *, size_t);
-static void cache_thread(void *);
-static void cache_invalidate(struct namecache *);
-static void cache_disassociate(struct namecache *);
-static void cache_reclaim(void);
-static int cache_ctor(void *, void *, int);
-static void cache_dtor(void *, void *);
-
-static struct sysctllog *sysctllog;
-static void sysctl_cache_stat_setup(void);
+static void	cache_reclaim(void);
+static int	cache_ctor(void *, void *, int);
+static void	cache_dtor(void *, void *);
+static void	cache_remove(struct namecache *, bool);
+
+/* sysctl */
+static struct	sysctllog *sysctllog;
+static void	sysctl_cache_stat_setup(void);
 
+/* dtrace hooks */
 SDT_PROVIDER_DEFINE(vfs);
 
 SDT_PROBE_DEFINE1(vfs, namecache, invalidate, done, "struct vnode *");
@@ -346,159 +232,119 @@ SDT_PROBE_DEFINE3(vfs, namecache, enter,
     "char *", "size_t");
 
 /*
- * Compute the hash for an entry.
- *
- * (This is for now a wrapper around namei_hash, whose interface is
- * for the time being slightly inconvenient.)
+ * rbtree: compare two nodes.
  */
-static nchash_t
-cache_hash(const char *name, size_t namelen)
+static int
+cache_compare_nodes(void *context, const void *n1, const void *n2)
 {
-	const char *endptr;
+	const struct namecache *ncp1 = n1;
+	const struct namecache *ncp2 = n2;
 
-	endptr = name + namelen;
-	return namei_hash(name, &endptr);
+	if (ncp1->nc_nlen != ncp2->nc_nlen) {
+		return (int)ncp1->nc_nlen - (int)ncp2->nc_nlen;
+	}
+	return memcmp(ncp1->nc_name, ncp2->nc_name, ncp1->nc_nlen);
 }
 
 /*
- * Invalidate a cache entry and enqueue it for garbage collection.
- * The caller needs to hold namecache_lock or a per-cpu lock to hold
- * off cache_reclaim().
+ * rbtree: compare a node and a key.
  */
-static void
-cache_invalidate(struct namecache *ncp)
+static int
+cache_compare_key(void *context, const void *n, const void *k)
 {
-	void *head;
-
-	KASSERT(mutex_owned(&ncp->nc_lock));
+	const struct namecache *ncp = n;
+	const struct iovec *iov = k;
 
-	if (ncp->nc_dvp != NULL) {
-		SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
-		    0, 0, 0, 0);
-
-		ncp->nc_vp = NULL;
-		ncp->nc_dvp = NULL;
-		do {
-			head = cache_gcqueue;
-			ncp->nc_gcqueue = head;
-		} while (atomic_cas_ptr(&cache_gcqueue, head, ncp) != head);
-		atomic_inc_uint(&cache_gcpend);
+	if (ncp->nc_nlen != iov->iov_len) {
+		return (int)ncp->nc_nlen - (int)iov->iov_len;
 	}
+	return memcmp(ncp->nc_name, iov->iov_base, ncp->nc_nlen);
 }
 
 /*
- * Disassociate a namecache entry from any vnodes it is attached to,
- * and remove from the global LRU list.
+ * rbtree definition.
  */
-static void
-cache_disassociate(struct namecache *ncp)
-{
-
-	KASSERT(mutex_owned(namecache_lock));
-	KASSERT(ncp->nc_dvp == NULL);
-
-	if (ncp->nc_lru.tqe_prev != NULL) {
-		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-		ncp->nc_lru.tqe_prev = NULL;
-	}
-	if (ncp->nc_vlist.le_prev != NULL) {
-		LIST_REMOVE(ncp, nc_vlist);
-		ncp->nc_vlist.le_prev = NULL;
-	}
-	if (ncp->nc_dvlist.le_prev != NULL) {
-		LIST_REMOVE(ncp, nc_dvlist);
-		ncp->nc_dvlist.le_prev = NULL;
-	}
-}
+static rb_tree_ops_t cache_rbtree_ops __read_mostly = {
+	.rbto_compare_nodes = cache_compare_nodes,
+	.rbto_compare_key = cache_compare_key,
+	.rbto_node_offset = offsetof(struct namecache, nc_node),
+	.rbto_context = NULL
+};
 
 /*
- * Lock all CPUs to prevent any cache lookup activity.  Conceptually,
- * this locks out all "readers".
+ * Remove an entry from the cache.  The directory must be locked, and if
+ * "inreverse" is false, cache_list_lock will be acquired when removing the
+ * entry from the global lists.
  */
 static void
-cache_lock_cpus(void)
+cache_remove(struct namecache *ncp, bool inreverse)
 {
-	CPU_INFO_ITERATOR cii;
-	struct cpu_info *ci;
-	struct nchcpu *cpup;
 
-	/*
-	 * Lock out all CPUs first, then harvest per-cpu stats.  This
-	 * is probably not quite as cache-efficient as doing the lock
-	 * and harvest at the same time, but allows cache_stat_sysctl()
-	 * to make do with a per-cpu lock.
-	 */
-	for (CPU_INFO_FOREACH(cii, ci)) {
-		cpup = ci->ci_data.cpu_nch;
-		mutex_enter(&cpup->cpu_lock);
+	KASSERT(mutex_owned(VNODE_TO_VIMPL(ncp->nc_dvp)->vi_nclock));
+
+	SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
+	    0, 0, 0, 0);
+
+	/* First remove from the directory's rbtree. */
+	rb_tree_remove_node(&VNODE_TO_VIMPL(ncp->nc_dvp)->vi_nctree, ncp);
+
+	/* Then remove from the lists. */
+	if (!inreverse) {
+		mutex_enter(&cache_list_lock);
 	}
-	for (CPU_INFO_FOREACH(cii, ci)) {
-		cpup = ci->ci_data.cpu_nch;
-		UPDATE(cpup, ncs_goodhits);
-		UPDATE(cpup, ncs_neghits);
-		UPDATE(cpup, ncs_badhits);
-		UPDATE(cpup, ncs_falsehits);
-		UPDATE(cpup, ncs_miss);
-		UPDATE(cpup, ncs_long);
-		UPDATE(cpup, ncs_pass2);
-		UPDATE(cpup, ncs_2passes);
-		UPDATE(cpup, ncs_revhits);
-		UPDATE(cpup, ncs_revmiss);
+	ncp->nc_dvp = NULL;
+	if (ncp->nc_vp != NULL) {
+		TAILQ_REMOVE(&VNODE_TO_VIMPL(ncp->nc_vp)->vi_nclist, ncp,
+		    nc_vlist);
+		ncp->nc_vp = NULL;
+	}
+	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);	
+	numcache--;
+	if (!inreverse) {
+		mutex_exit(&cache_list_lock);
 	}
-}
-
-/*
- * Release all CPU locks.
- */
-static void
-cache_unlock_cpus(void)
-{
-	CPU_INFO_ITERATOR cii;
-	struct cpu_info *ci;
-	struct nchcpu *cpup;
 
-	for (CPU_INFO_FOREACH(cii, ci)) {
-		cpup = ci->ci_data.cpu_nch;
-		mutex_exit(&cpup->cpu_lock);
+	/* Finally, free it. */
+	if (ncp->nc_nlen > NCHNAMLEN) {
+		size_t sz = offsetof(struct namecache, nc_name[ncp->nc_nlen]);
+		kmem_free(ncp, sz);
+	} else {
+		pool_cache_put(namecache_cache, ncp);
 	}
 }
 
 /*
- * Find a single cache entry and return it locked.
- * The caller needs to hold namecache_lock or a per-cpu lock to hold
- * off cache_reclaim().
+ * Find a single cache entry and return it locked.  The directory lock must
+ * be held.
  */
 static struct namecache *
-cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen)
+cache_lookup_entry(struct vnode *dvp, const char *name, size_t namelen)
 {
-	struct nchashhead *ncpp;
 	struct namecache *ncp;
-	nchash_t hash;
+	struct iovec iov;
 
-	KASSERT(dvp != NULL);
-	hash = cache_hash(name, namelen);
-	ncpp = &nchashtbl[NCHASH2(hash, dvp)];
-
-	LIST_FOREACH(ncp, ncpp, nc_hash) {
-		membar_datadep_consumer();	/* for Alpha... */
-		if (ncp->nc_dvp != dvp ||
-		    ncp->nc_nlen != namelen ||
-		    memcmp(ncp->nc_name, name, (u_int)ncp->nc_nlen))
-		    	continue;
-	    	mutex_enter(&ncp->nc_lock);
-		if (__predict_true(ncp->nc_dvp == dvp)) {
+	KASSERT(mutex_owned(VNODE_TO_VIMPL(dvp)->vi_nclock));
+
+	iov.iov_base = __UNCONST(name);
+	iov.iov_len = namelen;
+	ncp = rb_tree_find_node(&VNODE_TO_VIMPL(dvp)->vi_nctree, &iov);
+
+	if (ncp != NULL) {
+		KASSERT(ncp->nc_dvp == dvp);
+		/*
+		 * Avoid false sharing: don't write back to nc_hittime
+		 * unless it has changed within the last 32 ticks.
+		 */
+		if (((ncp->nc_hittime ^ hardclock_ticks) & ~31) != 0) {
 			ncp->nc_hittime = hardclock_ticks;
-			SDT_PROBE(vfs, namecache, lookup, hit, dvp,
-			    name, namelen, 0, 0);
-			return ncp;
 		}
-		/* Raced: entry has been nullified. */
-		mutex_exit(&ncp->nc_lock);
+		SDT_PROBE(vfs, namecache, lookup, hit, dvp,
+		    name, namelen, 0, 0);
+	} else {
+		SDT_PROBE(vfs, namecache, lookup, miss, dvp,
+		    name, namelen, 0, 0);
 	}
-
-	SDT_PROBE(vfs, namecache, lookup, miss, dvp,
-	    name, namelen, 0, 0);
-	return NULL;
+	return ncp;
 }
 
 /*
@@ -558,11 +404,10 @@ cache_lookup(struct vnode *dvp, const ch
 {
 	struct namecache *ncp;
 	struct vnode *vp;
-	struct nchcpu *cpup;
+	kmutex_t *dirlock;
 	int error;
 	bool hit;
 
-
 	/* Establish default result values */
 	if (iswht_ret != NULL) {
 		*iswht_ret = 0;
@@ -573,73 +418,66 @@ cache_lookup(struct vnode *dvp, const ch
 		return false;
 	}
 
-	cpup = curcpu()->ci_data.cpu_nch;
-	mutex_enter(&cpup->cpu_lock);
 	if (__predict_false(namelen > USHRT_MAX)) {
 		SDT_PROBE(vfs, namecache, lookup, toolong, dvp,
 		    name, namelen, 0, 0);
-		COUNT(cpup, ncs_long);
-		mutex_exit(&cpup->cpu_lock);
+		COUNT(ncs_long);
 		/* found nothing */
 		return false;
 	}
 
+	dirlock = VNODE_TO_VIMPL(dvp)->vi_nclock;
+	mutex_enter(dirlock);
 	ncp = cache_lookup_entry(dvp, name, namelen);
 	if (__predict_false(ncp == NULL)) {
-		COUNT(cpup, ncs_miss);
-		mutex_exit(&cpup->cpu_lock);
+		mutex_exit(dirlock);
 		/* found nothing */
+		COUNT(ncs_miss);
 		return false;
 	}
-	if ((cnflags & MAKEENTRY) == 0) {
-		COUNT(cpup, ncs_badhits);
+	if (__predict_false((cnflags & MAKEENTRY) == 0)) {
 		/*
 		 * Last component and we are renaming or deleting,
 		 * the cache entry is invalid, or otherwise don't
 		 * want cache entry to exist.
 		 */
-		cache_invalidate(ncp);
-		mutex_exit(&ncp->nc_lock);
-		mutex_exit(&cpup->cpu_lock);
+		cache_remove(ncp, false);
+		mutex_exit(dirlock);
 		/* found nothing */
+		COUNT(ncs_badhits);
 		return false;
 	}
-	if (ncp->nc_vp == NULL) {
-		if (iswht_ret != NULL) {
-			/*
-			 * Restore the ISWHITEOUT flag saved earlier.
-			 */
-			KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
-			*iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
-		} else {
-			KASSERT(ncp->nc_flags == 0);
-		}
-
+	if (__predict_false(ncp->nc_vp == NULL)) {
 		if (__predict_true(nameiop != CREATE ||
 		    (cnflags & ISLASTCN) == 0)) {
-			COUNT(cpup, ncs_neghits);
+			COUNT(ncs_neghits);
 			/* found neg entry; vn is already null from above */
 			hit = true;
 		} else {
-			COUNT(cpup, ncs_badhits);
 			/*
 			 * Last component and we are preparing to create
 			 * the named object, so flush the negative cache
 			 * entry.
 			 */
-			cache_invalidate(ncp);
+			COUNT(ncs_badhits);
+			cache_remove(ncp, false);
 			/* found nothing */
 			hit = false;
 		}
-		mutex_exit(&ncp->nc_lock);
-		mutex_exit(&cpup->cpu_lock);
+		if (iswht_ret != NULL) {
+			/*
+			 * Restore the ISWHITEOUT flag saved earlier.
+			 */
+			*iswht_ret = ncp->nc_whiteout;
+		} else {
+			KASSERT(!ncp->nc_whiteout);
+		}
+		mutex_exit(dirlock);
 		return hit;
 	}
-
 	vp = ncp->nc_vp;
 	mutex_enter(vp->v_interlock);
-	mutex_exit(&ncp->nc_lock);
-	mutex_exit(&cpup->cpu_lock);
+	mutex_exit(dirlock);
 
 	/*
 	 * Unlocked except for the vnode interlock.  Call vcache_tryvget().
@@ -651,20 +489,22 @@ cache_lookup(struct vnode *dvp, const ch
 		 * This vnode is being cleaned out.
 		 * XXX badhits?
 		 */
-		COUNT_UNL(cpup, ncs_falsehits);
+		COUNT(ncs_falsehits);
 		/* found nothing */
 		return false;
 	}
 
-	COUNT_UNL(cpup, ncs_goodhits);
+	COUNT(ncs_goodhits);
 	/* found it */
 	*vn_ret = vp;
 	return true;
 }
 
-
 /*
  * Cut-'n-pasted version of the above without the nameiop argument.
+ *
+ * This could also be used for namei's LOOKUP case, i.e. when the directory
+ * is not being modified.
  */
 bool
 cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
@@ -673,7 +513,7 @@ cache_lookup_raw(struct vnode *dvp, cons
 {
 	struct namecache *ncp;
 	struct vnode *vp;
-	struct nchcpu *cpup;
+	kmutex_t *dirlock;
 	int error;
 
 	/* Establish default results. */
@@ -687,19 +527,19 @@ cache_lookup_raw(struct vnode *dvp, cons
 		return false;
 	}
 
-	cpup = curcpu()->ci_data.cpu_nch;
-	mutex_enter(&cpup->cpu_lock);
+	dirlock = VNODE_TO_VIMPL(dvp)->vi_nclock;
+	mutex_enter(dirlock);
 	if (__predict_false(namelen > USHRT_MAX)) {
-		COUNT(cpup, ncs_long);
-		mutex_exit(&cpup->cpu_lock);
+		mutex_exit(dirlock);
 		/* found nothing */
+		COUNT(ncs_long);
 		return false;
 	}
 	ncp = cache_lookup_entry(dvp, name, namelen);
 	if (__predict_false(ncp == NULL)) {
-		COUNT(cpup, ncs_miss);
-		mutex_exit(&cpup->cpu_lock);
+		mutex_exit(dirlock);
 		/* found nothing */
+		COUNT(ncs_miss);
 		return false;
 	}
 	vp = ncp->nc_vp;
@@ -708,19 +548,16 @@ cache_lookup_raw(struct vnode *dvp, cons
 		 * Restore the ISWHITEOUT flag saved earlier.
 		 */
 		if (iswht_ret != NULL) {
-			KASSERT((ncp->nc_flags & ~ISWHITEOUT) == 0);
 			/*cnp->cn_flags |= ncp->nc_flags;*/
-			*iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0;
+			*iswht_ret = ncp->nc_whiteout;
 		}
-		COUNT(cpup, ncs_neghits);
-		mutex_exit(&ncp->nc_lock);
-		mutex_exit(&cpup->cpu_lock);
+		mutex_exit(dirlock);
 		/* found negative entry; vn is already null from above */
+		COUNT(ncs_neghits);
 		return true;
 	}
 	mutex_enter(vp->v_interlock);
-	mutex_exit(&ncp->nc_lock);
-	mutex_exit(&cpup->cpu_lock);
+	mutex_exit(dirlock);
 
 	/*
 	 * Unlocked except for the vnode interlock.  Call vcache_tryvget().
@@ -732,12 +569,12 @@ cache_lookup_raw(struct vnode *dvp, cons
 		 * This vnode is being cleaned out.
 		 * XXX badhits?
 		 */
-		COUNT_UNL(cpup, ncs_falsehits);
+		COUNT(ncs_falsehits);
 		/* found nothing */
 		return false;
 	}
 
-	COUNT_UNL(cpup, ncs_goodhits); /* XXX can be "badhits" */
+	COUNT(ncs_goodhits); /* XXX can be "badhits" */
 	/* found it */
 	*vn_ret = vp;
 	return true;
@@ -745,6 +582,7 @@ cache_lookup_raw(struct vnode *dvp, cons
 
 /*
  * Scan cache looking for name of directory entry pointing at vp.
+ * Will not search for "." or "..".
  *
  * If the lookup succeeds the vnode is referenced and stored in dvpp.
  *
@@ -760,7 +598,6 @@ cache_revlookup(struct vnode *vp, struct
 {
 	struct namecache *ncp;
 	struct vnode *dvp;
-	struct nchcpu *cpup;
 	char *bp;
 	int error, nlen;
 
@@ -769,70 +606,57 @@ cache_revlookup(struct vnode *vp, struct
 	if (!doingcache)
 		goto out;
 
-	/*
-	 * We increment counters in the local CPU's per-cpu stats.
-	 * We don't take the per-cpu lock, however, since this function
-	 * is the only place these counters are incremented so no one
-	 * will be racing with us to increment them.
-	 */
-	cpup = curcpu()->ci_data.cpu_nch;
-	mutex_enter(namecache_lock);
-	LIST_FOREACH(ncp, &VNODE_TO_VIMPL(vp)->vi_nclist, nc_vlist) {
-		mutex_enter(&ncp->nc_lock);
-		if (ncp->nc_vp == vp &&
-		    (dvp = ncp->nc_dvp) != NULL &&
-		    dvp != vp) { 		/* avoid pesky . entries.. */
-			if (ncp->nc_nlen == 1 &&
-			    ncp->nc_name[0] == '.') {
-			    	mutex_exit(&ncp->nc_lock);
-			    	continue;
-			}
-			if (ncp->nc_nlen == 2 &&
-			    ncp->nc_name[0] == '.' &&
-			    ncp->nc_name[1] == '.') {
-			    	mutex_exit(&ncp->nc_lock);
-			    	continue;
-			}
-			COUNT(cpup, ncs_revhits);
-			nlen = ncp->nc_nlen;
-
-			if (bufp) {
-				bp = *bpp;
-				bp -= nlen;
-				if (bp <= bufp) {
-					*dvpp = NULL;
-					mutex_exit(&ncp->nc_lock);
-					mutex_exit(namecache_lock);
-					SDT_PROBE(vfs, namecache, revlookup,
-					    fail, vp, ERANGE, 0, 0, 0);
-					return (ERANGE);
-				}
-				memcpy(bp, ncp->nc_name, nlen);
-				*bpp = bp;
+	mutex_enter(&cache_list_lock);
+	TAILQ_FOREACH(ncp, &VNODE_TO_VIMPL(vp)->vi_nclist, nc_vlist) {
+		KASSERT(ncp->nc_vp == vp);
+		KASSERT(ncp->nc_dvp != NULL);
+		nlen = ncp->nc_nlen;
+		/*
+		 * The queue is partially sorted.  Once we hit dots, nothing
+		 * else remains but dots and dotdots, so bail out.
+		 */
+		if (ncp->nc_name[0] == '.') {
+			if (nlen == 1 ||
+			    (nlen == 2 && ncp->nc_name[1] == '.')) {
+			    	break;
 			}
+		}
 
-			mutex_enter(dvp->v_interlock);
-			mutex_exit(&ncp->nc_lock);
-			mutex_exit(namecache_lock);
-			error = vcache_tryvget(dvp);
-			if (error) {
-				KASSERT(error == EBUSY);
-				if (bufp)
-					(*bpp) += nlen;
+		if (bufp) {
+			bp = *bpp;
+			bp -= nlen;
+			if (bp <= bufp) {
 				*dvpp = NULL;
-				SDT_PROBE(vfs, namecache, revlookup, fail, vp,
-				    error, 0, 0, 0);
-				return -1;
+				mutex_exit(&cache_list_lock);
+				SDT_PROBE(vfs, namecache, revlookup,
+				    fail, vp, ERANGE, 0, 0, 0);
+				return (ERANGE);
 			}
-			*dvpp = dvp;
-			SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp,
-			    0, 0, 0);
-			return (0);
+			memcpy(bp, ncp->nc_name, nlen);
+			*bpp = bp;
 		}
-		mutex_exit(&ncp->nc_lock);
+
+		dvp = ncp->nc_dvp;
+		mutex_enter(dvp->v_interlock);
+		mutex_exit(&cache_list_lock);
+		error = vcache_tryvget(dvp);
+		if (error) {
+			KASSERT(error == EBUSY);
+			if (bufp)
+				(*bpp) += nlen;
+			*dvpp = NULL;
+			SDT_PROBE(vfs, namecache, revlookup, fail, vp,
+			    error, 0, 0, 0);
+			return -1;
+		}
+		*dvpp = dvp;
+		SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp,
+		    0, 0, 0);
+		COUNT(ncs_revhits);
+		return (0);
 	}
-	COUNT(cpup, ncs_revmiss);
-	mutex_exit(namecache_lock);
+	mutex_exit(&cache_list_lock);
+	COUNT(ncs_revmiss);
  out:
 	*dvpp = NULL;
 	return (-1);
@@ -847,8 +671,7 @@ cache_enter(struct vnode *dvp, struct vn
 {
 	struct namecache *ncp;
 	struct namecache *oncp;
-	struct nchashhead *ncpp;
-	nchash_t hash;
+	vnode_impl_t *vi, *dvi;
 
 	/* First, check whether we can/should add a cache entry. */
 	if ((cnflags & MAKEENTRY) == 0 ||
@@ -859,110 +682,80 @@ cache_enter(struct vnode *dvp, struct vn
 	}
 
 	SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0);
-	if (numcache > desiredvnodes) {
-		mutex_enter(namecache_lock);
-		cache_ev_forced.ev_count++;
+
+	if (__predict_false(numcache > desiredvnodes)) {
+		mutex_enter(&cache_list_lock);
 		cache_reclaim();
-		mutex_exit(namecache_lock);
+		mutex_exit(&cache_list_lock);
 	}
 
 	if (namelen > NCHNAMLEN) {
-		ncp = kmem_alloc(sizeof(*ncp) + namelen, KM_SLEEP);
-		cache_ctor(NULL, ncp, 0);
-	} else
+		size_t sz = offsetof(struct namecache, nc_name[namelen]);
+		ncp = kmem_alloc(sz, KM_SLEEP);
+	} else {
 		ncp = pool_cache_get(namecache_cache, PR_WAITOK);
-
-	mutex_enter(namecache_lock);
-	numcache++;
+	}
 
 	/*
 	 * Concurrent lookups in the same directory may race for a
 	 * cache entry.  if there's a duplicated entry, free it.
 	 */
+	dvi = VNODE_TO_VIMPL(dvp);
+	mutex_enter(dvi->vi_nclock);
 	oncp = cache_lookup_entry(dvp, name, namelen);
 	if (oncp) {
-		cache_invalidate(oncp);
-		mutex_exit(&oncp->nc_lock);
+		cache_remove(oncp, false);
 	}
 
-	/* Grab the vnode we just found. */
-	mutex_enter(&ncp->nc_lock);
+	/* Fill in cache info and insert to directory. */
+	ncp->nc_dvp = dvp;
+	KASSERT(namelen <= USHRT_MAX);
+	ncp->nc_nlen = namelen;
+	memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
+	rb_tree_insert_node(&dvi->vi_nctree, ncp);
 	ncp->nc_vp = vp;
-	ncp->nc_flags = 0;
-	ncp->nc_hittime = 0;
-	ncp->nc_gcqueue = NULL;
+	ncp->nc_hittime = hardclock_ticks;
+
+	/* Insert to the lists. */
+	mutex_enter(&cache_list_lock);
+	numcache++;
+	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
 	if (vp == NULL) {
 		/*
 		 * For negative hits, save the ISWHITEOUT flag so we can
 		 * restore it later when the cache entry is used again.
 		 */
-		ncp->nc_flags = cnflags & ISWHITEOUT;
-	}
-
-	/* Fill in cache info. */
-	ncp->nc_dvp = dvp;
-	LIST_INSERT_HEAD(&VNODE_TO_VIMPL(dvp)->vi_dnclist, ncp, nc_dvlist);
-	if (vp)
-		LIST_INSERT_HEAD(&VNODE_TO_VIMPL(vp)->vi_nclist, ncp, nc_vlist);
-	else {
-		ncp->nc_vlist.le_prev = NULL;
-		ncp->nc_vlist.le_next = NULL;
+		ncp->nc_whiteout = ((cnflags & ISWHITEOUT) != 0);
+	} else {
+		ncp->nc_whiteout = false;
+		vi = VNODE_TO_VIMPL(vp);
+		/* Partially sort the per-vnode list: dots go to back. */
+		if ((name[0] == '.' && namelen == 1) ||
+		    (name[0] == '.' && name[1] == '.' && namelen == 2)) {
+			TAILQ_INSERT_TAIL(&vi->vi_nclist, ncp, nc_vlist);
+		} else {
+			TAILQ_INSERT_HEAD(&vi->vi_nclist, ncp, nc_vlist);
+		}
 	}
-	KASSERT(namelen <= USHRT_MAX);
-	ncp->nc_nlen = namelen;
-	memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
-	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-	hash = cache_hash(name, namelen);
-	ncpp = &nchashtbl[NCHASH2(hash, dvp)];
-
-	/*
-	 * Flush updates before making visible in table.  No need for a
-	 * memory barrier on the other side: to see modifications the
-	 * list must be followed, meaning a dependent pointer load.
-	 * The below is LIST_INSERT_HEAD() inlined, with the memory
-	 * barrier included in the correct place.
-	 */
-	if ((ncp->nc_hash.le_next = ncpp->lh_first) != NULL)
-		ncpp->lh_first->nc_hash.le_prev = &ncp->nc_hash.le_next;
-	ncp->nc_hash.le_prev = &ncpp->lh_first;
-	membar_producer();
-	ncpp->lh_first = ncp;
-	mutex_exit(&ncp->nc_lock);
-	mutex_exit(namecache_lock);
+	mutex_exit(&cache_list_lock);
+	mutex_exit(dvi->vi_nclock);
 }
 
 /*
- * Name cache initialization, from vfs_init() when we are booting
+ * Name cache initialization, from vfs_init() when we are booting.
  */
 void
 nchinit(void)
 {
-	int error;
 
 	TAILQ_INIT(&nclruhead);
-	namecache_cache = pool_cache_init(sizeof(struct namecache) + NCHNAMLEN,
+
+	namecache_cache = pool_cache_init(sizeof(struct namecache),
 	    coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
 	    cache_dtor, NULL);
 	KASSERT(namecache_cache != NULL);
 
-	namecache_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
-	nchashtbl = hashinit(desiredvnodes, HASH_LIST, true, &nchash);
-
-	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, cache_thread,
-	    NULL, NULL, "cachegc");
-	if (error != 0)
-		panic("nchinit %d", error);
-
-	evcnt_attach_dynamic(&cache_ev_scan, EVCNT_TYPE_MISC, NULL,
-	   "namecache", "entries scanned");
-	evcnt_attach_dynamic(&cache_ev_gc, EVCNT_TYPE_MISC, NULL,
-	   "namecache", "entries collected");
-	evcnt_attach_dynamic(&cache_ev_over, EVCNT_TYPE_MISC, NULL,
-	   "namecache", "over scan target");
-	evcnt_attach_dynamic(&cache_ev_under, EVCNT_TYPE_MISC, NULL,
-	   "namecache", "under scan target");
-	evcnt_attach_dynamic(&cache_ev_forced, EVCNT_TYPE_MISC, NULL,
-	   "namecache", "forced reclaims");
+	mutex_init(&cache_list_lock, MUTEX_DEFAULT, IPL_NONE);
 
 	sysctl_cache_stat_setup();
 }
@@ -970,21 +763,27 @@ nchinit(void)
 static int
 cache_ctor(void *arg, void *obj, int flag)
 {
-	struct namecache *ncp;
-
-	ncp = obj;
-	mutex_init(&ncp->nc_lock, MUTEX_DEFAULT, IPL_NONE);
+#if 0 /* XXXAD */
+	struct namecache *ncp = obj;
 
+	mutex_enter(&cache_lru_lock);
+	ncp->nc_dvp = NULL;
+	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);	
+	mutex_exit(&cache_lru_lock);
+#endif
 	return 0;
 }
 
 static void
 cache_dtor(void *arg, void *obj)
 {
-	struct namecache *ncp;
+#if 0 /* XXXAD */
 
-	ncp = obj;
-	mutex_destroy(&ncp->nc_lock);
+	mutex_enter(&cache_lru_lock);
+	KASSERT(ncp->nc_dvp == NULL);
+	TAILQ_REMOVE(&nclruhead, ncp, nc_lru);	
+	mutex_exit(&cache_lru_lock);
+#endif
 }
 
 /*
@@ -993,42 +792,39 @@ cache_dtor(void *arg, void *obj)
 void
 cache_cpu_init(struct cpu_info *ci)
 {
-	struct nchcpu *cpup;
+	void *p;
 	size_t sz;
 
-	sz = roundup2(sizeof(*cpup), coherency_unit) + coherency_unit;
-	cpup = kmem_zalloc(sz, KM_SLEEP);
-	cpup = (void *)roundup2((uintptr_t)cpup, coherency_unit);
-	mutex_init(&cpup->cpu_lock, MUTEX_DEFAULT, IPL_NONE);
-	ci->ci_data.cpu_nch = cpup;
+	sz = roundup2(sizeof(struct nchstats_percpu), coherency_unit) +
+	    coherency_unit;
+	p = kmem_zalloc(sz, KM_SLEEP);
+	ci->ci_data.cpu_nch = (void *)roundup2((uintptr_t)p, coherency_unit);
 }
 
 /*
- * Name cache reinitialization, for when the maximum number of vnodes increases.
+ * A vnode is being allocated: set up cache structures.
  */
 void
-nchreinit(void)
+cache_vnode_init(struct vnode *vp)
 {
-	struct namecache *ncp;
-	struct nchashhead *oldhash, *hash;
-	u_long i, oldmask, mask;
+	vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
 
-	hash = hashinit(desiredvnodes, HASH_LIST, true, &mask);
-	mutex_enter(namecache_lock);
-	cache_lock_cpus();
-	oldhash = nchashtbl;
-	oldmask = nchash;
-	nchashtbl = hash;
-	nchash = mask;
-	for (i = 0; i <= oldmask; i++) {
-		while ((ncp = LIST_FIRST(&oldhash[i])) != NULL) {
-			LIST_REMOVE(ncp, nc_hash);
-			ncp->nc_hash.le_prev = NULL;
-		}
-	}
-	cache_unlock_cpus();
-	mutex_exit(namecache_lock);
-	hashdone(oldhash, HASH_LIST, oldmask);
+	vi->vi_nclock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+	rb_tree_init(&vi->vi_nctree, &cache_rbtree_ops);
+	TAILQ_INIT(&vi->vi_nclist);
+}
+
+/*
+ * A vnode is being freed: finish cache structures.
+ */
+void
+cache_vnode_fini(struct vnode *vp)
+{
+	vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
+
+	KASSERT(RB_TREE_MIN(&vi->vi_nctree) == NULL);
+	KASSERT(TAILQ_EMPTY(&vi->vi_nclist));
+	mutex_obj_free(vi->vi_nclock);
 }
 
 /*
@@ -1038,42 +834,66 @@ nchreinit(void)
 void
 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
 {
-	struct namecache *ncp, *ncnext;
+	struct namecache *ncp;
+	kmutex_t *dirlock, *blocked;
 
-	mutex_enter(namecache_lock);
 	if (flags & PURGE_PARENTS) {
 		SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0);
-
-		for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_nclist);
-		    ncp != NULL; ncp = ncnext) {
-			ncnext = LIST_NEXT(ncp, nc_vlist);
-			mutex_enter(&ncp->nc_lock);
-			cache_invalidate(ncp);
-			mutex_exit(&ncp->nc_lock);
-			cache_disassociate(ncp);
+		blocked = NULL;
+		mutex_enter(&cache_list_lock);
+		while ((ncp = TAILQ_FIRST(&VNODE_TO_VIMPL(vp)->vi_nclist))
+		    != NULL) {
+			dirlock = VNODE_TO_VIMPL(ncp->nc_dvp)->vi_nclock;
+			/*
+			 * We can't wait on the directory lock with the 
+			 * list lock held or the system could
+			 * deadlock.  In the unlikely event that the
+			 * directory lock is unavailable, take a hold on it
+			 * and wait for it to become available with no other
+			 * locks held.  Then retry.  If this happens twice
+			 * in a row, we'll give the other side a breather to
+			 * prevent livelock.
+			 */
+			if (!mutex_tryenter(dirlock)) {
+				mutex_obj_hold(dirlock);
+				mutex_exit(&cache_list_lock);
+				mutex_enter(dirlock);
+				/* Do nothing. */
+				mutex_exit(dirlock);
+				mutex_obj_free(dirlock);
+				if (blocked == dirlock) {
+					kpause("livelock", false, 1, NULL);
+				}
+				mutex_enter(&cache_list_lock);
+				blocked = dirlock;
+			} else {
+				cache_remove(ncp, true);
+				mutex_exit(dirlock);
+				blocked = NULL;
+			}
 		}
+		mutex_exit(&cache_list_lock);
 	}
 	if (flags & PURGE_CHILDREN) {
 		SDT_PROBE(vfs, namecache, purge, children, vp, 0, 0, 0, 0);
-		for (ncp = LIST_FIRST(&VNODE_TO_VIMPL(vp)->vi_dnclist);
-		    ncp != NULL; ncp = ncnext) {
-			ncnext = LIST_NEXT(ncp, nc_dvlist);
-			mutex_enter(&ncp->nc_lock);
-			cache_invalidate(ncp);
-			mutex_exit(&ncp->nc_lock);
-			cache_disassociate(ncp);
+		dirlock = VNODE_TO_VIMPL(vp)->vi_nclock;
+		mutex_enter(dirlock);
+		while ((ncp = rb_tree_iterate(&VNODE_TO_VIMPL(vp)->vi_nctree,
+		    NULL, RB_DIR_RIGHT)) != NULL) {
+			cache_remove(ncp, false);
 		}
+		mutex_exit(dirlock);
 	}
 	if (name != NULL) {
 		SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);
+		dirlock = VNODE_TO_VIMPL(vp)->vi_nclock;
+		mutex_enter(dirlock);
 		ncp = cache_lookup_entry(vp, name, namelen);
 		if (ncp) {
-			cache_invalidate(ncp);
-			mutex_exit(&ncp->nc_lock);
-			cache_disassociate(ncp);
+			cache_remove(ncp, false);
 		}
+		mutex_exit(dirlock);
 	}
-	mutex_exit(namecache_lock);
 }
 
 /*
@@ -1083,143 +903,88 @@ cache_purge1(struct vnode *vp, const cha
 void
 cache_purgevfs(struct mount *mp)
 {
-	struct namecache *ncp, *nxtcp;
-
-	SDT_PROBE(vfs, namecache, purge, vfs, mp, 0, 0, 0, 0);
-	mutex_enter(namecache_lock);
-	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
-		nxtcp = TAILQ_NEXT(ncp, nc_lru);
-		mutex_enter(&ncp->nc_lock);
-		if (ncp->nc_dvp != NULL && ncp->nc_dvp->v_mount == mp) {
-			/* Free the resources we had. */
-			cache_invalidate(ncp);
-			cache_disassociate(ncp);
-		}
-		mutex_exit(&ncp->nc_lock);
-	}
-	cache_reclaim();
-	mutex_exit(namecache_lock);
-}
-
-/*
- * Scan global list invalidating entries until we meet a preset target.
- * Prefer to invalidate entries that have not scored a hit within
- * cache_hottime seconds.  We sort the LRU list only for this routine's
- * benefit.
- */
-static void
-cache_prune(int incache, int target)
-{
-	struct namecache *ncp, *nxtcp, *sentinel;
-	int items, recent, tryharder;
-
-	KASSERT(mutex_owned(namecache_lock));
+	struct vnode_iterator *marker;
+	struct namecache *ncp;
+	vnode_impl_t *vi;
+	kmutex_t *dirlock;
+	vnode_t *vp;
 
-	SDT_PROBE(vfs, namecache, prune, done, incache, target, 0, 0, 0);
-	items = 0;
-	tryharder = 0;
-	recent = hardclock_ticks - hz * cache_hottime;
-	sentinel = NULL;
-	for (ncp = TAILQ_FIRST(&nclruhead); ncp != NULL; ncp = nxtcp) {
-		if (incache <= target)
-			break;
-		items++;
-		nxtcp = TAILQ_NEXT(ncp, nc_lru);
-		if (ncp == sentinel) {
-			/*
-			 * If we looped back on ourself, then ignore
-			 * recent entries and purge whatever we find.
-			 */
-			tryharder = 1;
-		}
-		if (ncp->nc_dvp == NULL)
-			continue;
-		if (!tryharder && (ncp->nc_hittime - recent) > 0) {
-			if (sentinel == NULL)
-				sentinel = ncp;
-			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
-			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+	/*
+	 * In the original BSD implementation this used to traverse the LRU
+	 * list.  To avoid locking difficulties, and avoid scanning a ton of
+	 * namecache entries that we have no interest in, scan the mount
+	 * list instead.
+	 */
+	vfs_vnode_iterator_init(mp, &marker);
+	while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
+		vi = VNODE_TO_VIMPL(vp);
+		if (vp->v_type != VDIR) {
+			KASSERT(RB_TREE_MIN(&vi->vi_nctree) == NULL);
 			continue;
 		}
-		mutex_enter(&ncp->nc_lock);
-		if (ncp->nc_dvp != NULL) {
-			cache_invalidate(ncp);
-			cache_disassociate(ncp);
-			incache--;
+		dirlock = vi->vi_nclock;
+		mutex_enter(dirlock);
+		while ((ncp = rb_tree_iterate(&vi->vi_nctree, NULL,
+		    RB_DIR_RIGHT)) != NULL) {
+			cache_remove(ncp, false);
 		}
-		mutex_exit(&ncp->nc_lock);
+		mutex_exit(dirlock);
 	}
-	cache_ev_scan.ev_count += items;
+	vfs_vnode_iterator_destroy(marker);
 }
 
 /*
- * Collect dead cache entries from all CPUs and garbage collect.
+ * Free some entries from the cache, when we have gone over budget.
+ *
+ * We don't want to cause too much work for any individual caller, and it
+ * doesn't matter if we temporarily go over budget.  This is also "just a
+ * cache" so we can throw out whatever we like.  So we take a relaxed
+ * attitude to this process to reduce its impact.
  */
 static void
 cache_reclaim(void)
 {
 	struct namecache *ncp, *next;
-	int items;
+	kmutex_t *dirlock;
+	int toscan, delta;
 
-	KASSERT(mutex_owned(namecache_lock));
+	KASSERT(mutex_owned(&cache_list_lock));
 
-	/*
-	 * If the number of extant entries not awaiting garbage collection
-	 * exceeds the high water mark, then reclaim stale entries until we
-	 * reach our low water mark.
-	 */
-	items = numcache - cache_gcpend;
-	if (items > (uint64_t)desiredvnodes * cache_hiwat / 100) {
-		cache_prune(items, (int)((uint64_t)desiredvnodes *
-		    cache_lowat / 100));
-		cache_ev_over.ev_count++;
-	} else
-		cache_ev_under.ev_count++;
+	/* Scan up to a preset maxium number of entries. */
+	toscan = cache_maxscan;
+	delta = hz * cache_hottime;
+	SDT_PROBE(vfs, namecache, prune, done, numcache, toscan, 0, 0, 0);
+	next = TAILQ_FIRST(&nclruhead);
+	while ((ncp = next) != NULL && toscan-- != 0) {
+		next = TAILQ_NEXT(ncp, nc_lru);
+		dirlock = VNODE_TO_VIMPL(ncp->nc_dvp)->vi_nclock;
 
-	/*
-	 * Stop forward lookup activity on all CPUs and garbage collect dead
-	 * entries.
-	 */
-	cache_lock_cpus();
-	ncp = cache_gcqueue;
-	cache_gcqueue = NULL;
-	items = cache_gcpend;
-	cache_gcpend = 0;
-	while (ncp != NULL) {
-		next = ncp->nc_gcqueue;
-		cache_disassociate(ncp);
-		KASSERT(ncp->nc_dvp == NULL);
-		if (ncp->nc_hash.le_prev != NULL) {
-			LIST_REMOVE(ncp, nc_hash);
-			ncp->nc_hash.le_prev = NULL;
+		/*
+		 * Locking in the wrong direction.  If we can't get the
+		 * lock, the directory is actively busy, and it could also
+		 * cause problems for the next guy in here, so send the
+		 * entry to the back of the list.
+		 */
+		if (!mutex_tryenter(dirlock)) {
+			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+			continue;
 		}
-		if (ncp->nc_nlen > NCHNAMLEN) {
-			cache_dtor(NULL, ncp);
-			kmem_free(ncp, sizeof(*ncp) + ncp->nc_nlen);
-		} else
-			pool_cache_put(namecache_cache, ncp);
-		ncp = next;
-	}
-	cache_unlock_cpus();
-	numcache -= items;
-	cache_ev_gc.ev_count += items;
-}
 
-/*
- * Cache maintainence thread, awakening once per second to:
- *
- * => keep number of entries below the high water mark
- * => sort pseudo-LRU list
- * => garbage collect dead entries
- */
-static void
-cache_thread(void *arg)
-{
+		/*
+		 * Now have the lock.  If the entry was hit recently, send
+		 * it to the back of the list.
+		 */
+		if (ncp->nc_hittime + delta > hardclock_ticks) {
+			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+			mutex_exit(dirlock);
+			continue;
+		}
 
-	mutex_enter(namecache_lock);
-	for (;;) {
-		cache_reclaim();
-		kpause("cachegc", false, hz, namecache_lock);
+		/* We have a victim: reclaim it. */
+		cache_remove(ncp, true);
+		mutex_exit(dirlock);
 	}
 }
 
@@ -1252,17 +1017,15 @@ namecache_print(struct vnode *vp, void (
 void
 namecache_count_pass2(void)
 {
-	struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
 
-	COUNT_UNL(cpup, ncs_pass2);
+	COUNT(ncs_pass2);
 }
 
 void
 namecache_count_2passes(void)
 {
-	struct nchcpu *cpup = curcpu()->ci_data.cpu_nch;
 
-	COUNT_UNL(cpup, ncs_2passes);
+	COUNT(ncs_2passes);
 }
 
 /*
@@ -1274,11 +1037,8 @@ static int
 cache_stat_sysctl(SYSCTLFN_ARGS)
 {
 	struct nchstats stats;
-	struct nchcpu *my_cpup;
-#ifdef CACHE_STATS_CURRENT
 	CPU_INFO_ITERATOR cii;
 	struct cpu_info *ci;
-#endif	/* CACHE_STATS_CURRENT */
 
 	if (oldp == NULL) {
 		*oldlenp = sizeof(stats);
@@ -1290,32 +1050,26 @@ cache_stat_sysctl(SYSCTLFN_ARGS)
 		return 0;
 	}
 
-	/*
-	 * Take this CPU's per-cpu lock to hold off cache_reclaim()
-	 * from doing a stats update while doing minimal damage to
-	 * concurrent operations.
-	 */
 	sysctl_unlock();
-	my_cpup = curcpu()->ci_data.cpu_nch;
-	mutex_enter(&my_cpup->cpu_lock);
-	stats = nchstats;
-#ifdef CACHE_STATS_CURRENT
+	memset(&stats, 0, sizeof(stats));
 	for (CPU_INFO_FOREACH(cii, ci)) {
-		struct nchcpu *cpup = ci->ci_data.cpu_nch;
+		struct nchstats_percpu *np = ci->ci_data.cpu_nch;
 
-		ADD(stats, cpup, ncs_goodhits);
-		ADD(stats, cpup, ncs_neghits);
-		ADD(stats, cpup, ncs_badhits);
-		ADD(stats, cpup, ncs_falsehits);
-		ADD(stats, cpup, ncs_miss);
-		ADD(stats, cpup, ncs_long);
-		ADD(stats, cpup, ncs_pass2);
-		ADD(stats, cpup, ncs_2passes);
-		ADD(stats, cpup, ncs_revhits);
-		ADD(stats, cpup, ncs_revmiss);
-	}
-#endif	/* CACHE_STATS_CURRENT */
-	mutex_exit(&my_cpup->cpu_lock);
+		stats.ncs_goodhits += np->ncs_goodhits;
+		stats.ncs_neghits += np->ncs_neghits;
+		stats.ncs_badhits += np->ncs_badhits;
+		stats.ncs_falsehits += np->ncs_falsehits;
+		stats.ncs_miss += np->ncs_miss;
+		stats.ncs_long += np->ncs_long;
+		stats.ncs_pass2 += np->ncs_pass2;
+		stats.ncs_2passes += np->ncs_2passes;
+		stats.ncs_revhits += np->ncs_revhits;
+		stats.ncs_revmiss += np->ncs_revmiss;
+	}
+	/* Serialize the update to nchstats, just because. */
+	mutex_enter(&cache_list_lock);
+	nchstats = stats;
+	mutex_exit(&cache_list_lock);
 	sysctl_relock();
 
 	*oldlenp = sizeof(stats);

Index: src/sys/kern/vfs_vnode.c
diff -u src/sys/kern/vfs_vnode.c:1.105 src/sys/kern/vfs_vnode.c:1.105.2.1
--- src/sys/kern/vfs_vnode.c:1.105	Mon Dec 16 22:47:54 2019
+++ src/sys/kern/vfs_vnode.c	Wed Jan  8 11:02:16 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnode.c,v 1.105 2019/12/16 22:47:54 ad Exp $	*/
+/*	$NetBSD: vfs_vnode.c,v 1.105.2.1 2020/01/08 11:02:16 ad Exp $	*/
 
 /*-
  * Copyright (c) 1997-2011, 2019 The NetBSD Foundation, Inc.
@@ -146,7 +146,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.105 2019/12/16 22:47:54 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.105.2.1 2020/01/08 11:02:16 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -1127,12 +1127,12 @@ vcache_alloc(void)
 
 	vip->vi_lock = rw_obj_alloc();
 	/* SLIST_INIT(&vip->vi_hash); */
-	/* LIST_INIT(&vip->vi_nclist); */
 	/* LIST_INIT(&vip->vi_dnclist); */
 
 	vp = VIMPL_TO_VNODE(vip);
 	uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
 	cv_init(&vp->v_cv, "vnode");
+	cache_vnode_init(vp);
 
 	vp->v_usecount = 1;
 	vp->v_type = VNON;
@@ -1192,6 +1192,7 @@ vcache_free(vnode_impl_t *vip)
 	rw_obj_free(vip->vi_lock);
 	uvm_obj_destroy(&vp->v_uobj, true);
 	cv_destroy(&vp->v_cv);
+	cache_vnode_fini(vp);
 	pool_cache_put(vcache_pool, vip);
 }
 

Index: src/sys/sys/namei.src
diff -u src/sys/sys/namei.src:1.47 src/sys/sys/namei.src:1.47.2.1
--- src/sys/sys/namei.src:1.47	Mon Jan  6 11:22:33 2020
+++ src/sys/sys/namei.src	Wed Jan  8 11:02:16 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: namei.src,v 1.47 2020/01/06 11:22:33 ad Exp $	*/
+/*	$NetBSD: namei.src,v 1.47.2.1 2020/01/08 11:02:16 ad Exp $	*/
 
 /*
  * Copyright (c) 1985, 1989, 1991, 1993
@@ -188,42 +188,38 @@ NAMEIFL	PARAMASK	0x02ee300	/* mask of pa
 #endif
 
 #ifdef __NAMECACHE_PRIVATE
+#include <sys/rbtree.h>
+
 /*
  * For simplicity (and economy of storage), names longer than
  * a maximum length of NCHNAMLEN are stored in non-pooled storage.
  */
-#define	NCHNAMLEN	32	/* up to this size gets stored in pool */
+#define	NCHNAMLEN	sizeof(((struct namecache *)NULL)->nc_name)
 
 /*
  * Namecache entry.  
- * This structure describes the elements in the cache of recent
- * names looked up by namei.
- *
- * Locking rules:
  *
- *      -       stable after initialization
- *      L       namecache_lock
- *      C       struct nchcpu::cpu_lock
- *      L/C     insert needs L, read needs L or any C,
- *              must hold L and all C after (or during) delete before free
- *      N       struct namecache::nc_lock
+ * This structure describes the elements in the cache of recent names looked
+ * up by namei.  It's carefully sized to take up 128 bytes on _LP64, to make
+ * good use of space and the CPU caches.
+ *
+ * Field markings and their corresponding locks:
+ *
+ * -  stable throught the lifetime of the namecache entry
+ * d  protected by nc_dvp->vi_nclock
+ * l  protected by cache_list_lock
  */
 struct namecache {
-	LIST_ENTRY(namecache) nc_hash;	/* L/C hash chain */
-	TAILQ_ENTRY(namecache) nc_lru;	/* L pseudo-lru chain */
-	LIST_ENTRY(namecache) nc_dvlist;/* L dvp's list of cache entries */
-	LIST_ENTRY(namecache) nc_vlist; /* L vp's list of cache entries */
-	struct	vnode *nc_dvp;		/* N vnode of parent of name */
-	struct	vnode *nc_vp;		/* N vnode the name refers to */
-	void	*nc_gcqueue;		/* N queue for garbage collection */
-	kmutex_t nc_lock;		/*   lock on this entry */
-	int	nc_hittime;		/* N last time scored a hit */
-	int	nc_flags;		/* - copy of componentname ISWHITEOUT */
-	u_short	nc_nlen;		/* - length of name */
-	char	nc_name[0];		/* - segment name */
+	struct rb_node nc_node;		/* d  red-black tree node */
+	TAILQ_ENTRY(namecache) nc_lru;	/* l  pseudo-lru chain */
+	TAILQ_ENTRY(namecache) nc_vlist;/* l  vp's list of cache entries */
+	struct	vnode *nc_dvp;		/* -  vnode of parent of name */
+	struct	vnode *nc_vp;		/* -  vnode the name refers to */
+	int	nc_hittime;		/* d  approx time of last hit */
+	u_short	nc_nlen;		/* -  length of name */
+	bool	nc_whiteout;		/* -  true if a whiteout */
+	char	nc_name[49];		/* -  segment name */
 };
-__CTASSERT((sizeof(struct namecache) + NCHNAMLEN)
-    % __alignof(struct namecache) == 0);
 #endif
 
 #ifdef _KERNEL
@@ -289,11 +285,13 @@ bool	cache_lookup_raw(struct vnode *, co
 int	cache_revlookup(struct vnode *, struct vnode **, char **, char *);
 void	cache_enter(struct vnode *, struct vnode *,
 			const char *, size_t, uint32_t);
+void	cache_vnode_init(struct vnode * );
+void	cache_vnode_fini(struct vnode * );
+void	cache_cpu_init(struct cpu_info *);
+
 void	nchinit(void);
-void	nchreinit(void);
 void	namecache_count_pass2(void);
 void	namecache_count_2passes(void);
-void	cache_cpu_init(struct cpu_info *);
 void	cache_purgevfs(struct mount *);
 void	namecache_print(struct vnode *, void (*)(const char *, ...)
     __printflike(1, 2));

Index: src/sys/sys/vnode_impl.h
diff -u src/sys/sys/vnode_impl.h:1.19 src/sys/sys/vnode_impl.h:1.19.2.1
--- src/sys/sys/vnode_impl.h:1.19	Sun Dec 22 19:47:34 2019
+++ src/sys/sys/vnode_impl.h	Wed Jan  8 11:02:16 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode_impl.h,v 1.19 2019/12/22 19:47:34 ad Exp $	*/
+/*	$NetBSD: vnode_impl.h,v 1.19.2.1 2020/01/08 11:02:16 ad Exp $	*/
 
 /*-
  * Copyright (c) 2016, 2019 The NetBSD Foundation, Inc.
@@ -68,8 +68,9 @@ struct vnode_impl {
 	enum vnode_state vi_state;		/* i: current state */
 	struct vnodelst *vi_lrulisthd;		/* d: current lru list head */
 	TAILQ_ENTRY(vnode_impl) vi_lrulist;	/* d: lru list */
-	LIST_HEAD(, namecache) vi_dnclist;	/* n: namecaches (children) */
-	LIST_HEAD(, namecache) vi_nclist;	/* n: namecaches (parent) */
+	TAILQ_HEAD(, namecache) vi_nclist;	/* n: namecaches (parent) */
+	rb_tree_t vi_nctree;			/* n: namecache tree */
+	kmutex_t *vi_nclock;			/* n: namecache lock */
 	int vi_synclist_slot;			/* s: synclist slot index */
 	int vi_lrulisttm;			/* i: time of lru enqueue */
 	TAILQ_ENTRY(vnode_impl) vi_synclist;	/* s: vnodes with dirty bufs */

CVS commit: [ad-namecache] src/sys

Reply via email to