Hi,

For explanatation/discussion please see related post in [EMAIL PROTECTED]

Patch from signature till bottom.

Csaba

# HG changeset patch
# User csaba@
# Node ID 6db92777ea99902079b1f31fe330b0a5aac96d88
# Parent  820a1f1d791e95af17294ccd87e5f76dbc1be68c
imported patch cachecoh

diff -r 820a1f1d791e -r 6db92777ea99 sys/emulation/linux/linux_stats.c
--- a/sys/emulation/linux/linux_stats.c Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/emulation/linux/linux_stats.c Mon Jan 23 05:16:56 2006 +0100
@@ -250,8 +250,8 @@ linux_statfs(struct linux_statfs_args *a
        if (error == 0)
                error = kern_statfs(&nd, &statfs);
        if (error == 0) {
-               if (nd.nl_ncp->nc_vp != NULL)
-                       error = vn_get_namelen(nd.nl_ncp->nc_vp, &namelen);
+               if (cache_grphead_l(nd.nl_ncp)->nc_vp != NULL)
+                       error = 
vn_get_namelen(cache_grphead_l(nd.nl_ncp)->nc_vp, &namelen);
                else
                        error = EINVAL;
        }
diff -r 820a1f1d791e -r 6db92777ea99 sys/emulation/svr4/svr4_misc.c
--- a/sys/emulation/svr4/svr4_misc.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/emulation/svr4/svr4_misc.c    Mon Jan 23 05:16:56 2006 +0100
@@ -1395,8 +1395,8 @@ svr4_sys_statvfs(struct svr4_sys_statvfs
        if (error == 0)
                error = kern_statfs(&nd, &bfs);
        if (error == 0) {
-               if (nd.nl_ncp->nc_vp != NULL)
-                       error = vn_get_namelen(nd.nl_ncp->nc_vp, &namelen);
+               if (cache_grphead_l(nd.nl_ncp)->nc_vp != NULL)
+                       error = 
vn_get_namelen(cache_grphead_l(nd.nl_ncp)->nc_vp, &namelen);
                else
                        error = EINVAL;
        }
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/uipc_usrreq.c
--- a/sys/kern/uipc_usrreq.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/uipc_usrreq.c    Mon Jan 23 05:16:56 2006 +0100
@@ -600,7 +600,7 @@ unp_bind(struct unpcb *unp, struct socka
        error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_LOCKVP|NLC_CREATE);
        if (error == 0)
                error = nlookup(&nd);
-       if (error == 0 && nd.nl_ncp->nc_vp != NULL)
+       if (error == 0 && cache_grphead_l(nd.nl_ncp)->nc_vp != NULL)
                error = EADDRINUSE;
        if (error)
                goto done;
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c      Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_cache.c      Mon Jan 23 05:16:56 2006 +0100
@@ -198,6 +198,25 @@ SYSCTL_PROC(_vfs_cache, OID_AUTO, nchsta
 
 static void cache_zap(struct namecache *ncp);
 
+static __inline
+struct namecache *
+_cache_grphead_l(struct namecache *ncp)
+{
+#ifdef INVARIANTS
+       struct namecache *startncp = ncp;
+#endif
+
+       while (ncp->nc_shadowed) {
+               ncp = ncp->nc_shadowed;
+               KKASSERT(startncp != ncp);
+               KKASSERT(ncp->nc_refs > 0);
+       }
+
+       KKASSERT(ncp->nc_exlocks > 0);
+       KKASSERT(ncp->nc_locktd == curthread);
+       return(ncp);
+}
+
 /*
  * cache_hold() and cache_drop() prevent the premature deletion of a
  * namecache entry but do not prevent operations (such as zapping) on
@@ -222,15 +241,60 @@ _cache_drop(struct namecache *ncp)
 {
        KKASSERT(ncp->nc_refs > 0);
        if (ncp->nc_refs == 1 && 
-           (ncp->nc_flag & NCF_UNRESOLVED) && 
+           (ncp->nc_flag & NCF_UNRESOLVED || ncp->nc_shadowed) && 
            TAILQ_EMPTY(&ncp->nc_list)
        ) {
-               KKASSERT(ncp->nc_exlocks == 0);
                cache_lock(ncp);
-               cache_zap(ncp);
-       } else {
+               KKASSERT(_cache_grphead_l(ncp)->nc_exlocks == 1);
+               if (_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
+                       cache_zap(ncp);
+                       return;
+               }
+               cache_unlock(ncp);
+       }
+       --ncp->nc_refs;
+}
+
+static __inline
+struct namecache *
+_cache_grphead(struct namecache *ncp)
+{
+       if (ncp->nc_shadowed) {
+               /*
+                * We need a ref to please the locking routine, but
+                * we get rid of that ASAP. Doing that directly saves
+                * us from a lot of headache (and some CPU cycles).
+                */
+               cache_get(ncp);
                --ncp->nc_refs;
-       }
+               ncp = _cache_grphead_l(ncp);
+               cache_unlock(ncp);
+       }
+
+       return(ncp);
+}
+
+/*
+ * Get the head of the shadow group when it's known to be locked.
+ *
+ * (XXX We need this routine in the API only if recursive locking
+ * is considered bad. The additional costs of the general grphead
+ * routine seem to be negligible, especially for "normal"
+ * (nc_shadowless) namecache entries.)
+ */
+struct namecache *
+cache_grphead_l(struct namecache *ncp)
+{
+       return(_cache_grphead_l(ncp));
+}
+
+/*
+ * Get the head of the shadow group.
+ */
+struct namecache *
+cache_grphead(struct namecache *ncp)
+{
+       return(_cache_grphead(ncp));
 }
 
 /*
@@ -324,6 +388,9 @@ cache_drop(struct namecache *ncp)
        _cache_drop(ncp);
 }
 
+static void cache_lock_one(struct namecache *ncp);
+static void cache_unlock_one(struct namecache *ncp);
+
 /*
  * Namespace locking.  The caller must already hold a reference to the
  * namecache structure in order to lock/unlock it.  This function prevents
@@ -346,6 +413,29 @@ cache_drop(struct namecache *ncp)
  */
 void
 cache_lock(struct namecache *ncp)
+{
+       struct namecache *oncp;
+#ifdef INVARIANTS
+       struct namecache *startncp = ncp;
+#endif
+
+       for (;;) {
+               cache_lock_one(ncp);
+               oncp = ncp;
+               if (! (ncp = ncp->nc_shadowed))
+                       break;
+               KKASSERT(ncp != startncp);
+               /*
+                * The individual lock was used just to protect the transition.
+                * Now that we safely know who's next, unlock the entry
+                * and move on.
+                */
+               cache_unlock_one(oncp);
+       }
+}
+
+static void
+cache_lock_one(struct namecache *ncp)
 {
        thread_t td;
        int didwarn;
@@ -398,12 +488,27 @@ cache_lock_nonblock(struct namecache *nc
 cache_lock_nonblock(struct namecache *ncp)
 {
        thread_t td;
-
+       struct namecache *oncp;
+#ifdef INVARIANTS
+       struct namecache *startncp = ncp;
+#endif
+
+       td = curthread;
+
+step_one:
        KKASSERT(ncp->nc_refs != 0);
-       td = curthread;
        if (ncp->nc_exlocks == 0) {
                ncp->nc_exlocks = 1;
                ncp->nc_locktd = td;
+
+               if (ncp->nc_shadowed) {
+                       oncp = ncp;
+                       ncp = ncp->nc_shadowed;
+                       KKASSERT(startncp != ncp);
+                       cache_unlock_one(oncp);
+                       goto step_one;
+               }
+
                /* 
                 * The vp associated with a locked ncp must be held
                 * to prevent it from being recycled (which would
@@ -422,6 +527,12 @@ void
 void
 cache_unlock(struct namecache *ncp)
 {
+       cache_unlock_one(_cache_grphead_l(ncp));
+}
+
+static void
+cache_unlock_one(struct namecache *ncp)
+{
        thread_t td = curthread;
 
        KKASSERT(ncp->nc_refs > 0);
@@ -452,13 +563,17 @@ int
 int
 cache_get_nonblock(struct namecache *ncp)
 {
+       int error;
+
        /* XXX MP */
-       if (ncp->nc_exlocks == 0 || ncp->nc_locktd == curthread) {
-               _cache_hold(ncp);
-               cache_lock(ncp);
-               return(0);
-       }
-       return(EWOULDBLOCK);
+       _cache_hold(ncp);
+       /*
+        * We can't test easily whether locking would block
+        * so we just make a try to get the lock.
+        */
+       if ((error = cache_lock_nonblock(ncp)))
+               _cache_drop(ncp);
+       return(error);
 }
 
 void
@@ -469,6 +584,61 @@ cache_put(struct namecache *ncp)
 }
 
 /*
+ * Join ncp into the shadow group of sncp.
+ * 
+ * Both entries must be locked on entry. Caller also has to hold a dedicated
+ * reference of sncp.
+ *
+ * The routine will fail and return ELOOP if the intended shadowing association
+ * yielded a loop in the shadow chain.
+ *
+ * - On success ncp will be a representative of the joint shadow group, which
+ *   then will be locked.
+ * - On failure the namecache entries will exist separately just as they did
+ *   before, in the same state.
+ */
+int
+cache_shadow_attach(struct namecache *ncp, struct namecache *sncp)
+{
+       KKASSERT(! ncp->nc_shadowed);
+       KKASSERT(! ncp->nc_vp);
+       KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
+
+       if (_cache_grphead_l(sncp) == ncp) {
+               /* ncp->nc_error = ELOOP; */
+               return(ELOOP);
+       }
+
+       ncp->nc_shadowed = sncp;
+       cache_unlock_one(ncp);
+       return(0);
+}
+
+/*
+ * Take out namecache entry from its shadow group.
+ *
+ * ncp must really shadow someone, and the shadow group must be locked
+ * upon entry.
+ *
+ * After the routine returns, ncp will be the head of a new (possibly 
singleton)
+ * shadow group. The routine returns the former successor of ncp in the 
original
+ * shadow group in a locked+ref'd state.
+ */
+struct namecache *
+cache_shadow_detach(struct namecache *ncp)
+{
+       struct namecache *sncp = ncp->nc_shadowed;
+
+       KKASSERT(sncp);
+
+       cache_lock_one(ncp);
+       ncp->nc_shadowed = NULL;
+       cache_setunresolved(ncp);
+
+       return(sncp);
+}
+
+/*
  * Resolve an unresolved ncp by associating a vnode with it.  If the
  * vnode is NULL, a negative cache entry is created.
  *
@@ -477,6 +647,8 @@ void
 void
 cache_setvp(struct namecache *ncp, struct vnode *vp)
 {
+       ncp = _cache_grphead_l(ncp);
+
        KKASSERT(ncp->nc_flag & NCF_UNRESOLVED);
        ncp->nc_vp = vp;
        if (vp != NULL) {
@@ -517,6 +689,8 @@ void
 void
 cache_settimeout(struct namecache *ncp, int nticks)
 {
+       ncp = _cache_grphead_l(ncp);
+
        if ((ncp->nc_timeout = ticks + nticks) == 0)
                ncp->nc_timeout = 1;
 }
@@ -542,6 +716,8 @@ cache_setunresolved(struct namecache *nc
 cache_setunresolved(struct namecache *ncp)
 {
        struct vnode *vp;
+
+       ncp = _cache_grphead_l(ncp);
 
        if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
                ncp->nc_flag |= NCF_UNRESOLVED;
@@ -619,11 +795,11 @@ cache_inval(struct namecache *ncp, int f
        struct namecache *nextkid;
        int rcnt = 0;
 
-       KKASSERT(ncp->nc_exlocks);
+       KKASSERT(_cache_grphead_l(ncp)->nc_exlocks);
 
        cache_setunresolved(ncp);
        if (flags & CINV_DESTROY)
-               ncp->nc_flag |= NCF_DESTROYED;
+               _cache_grphead_l(ncp)->nc_flag |= NCF_DESTROYED;
 
        if ((flags & CINV_CHILDREN) && 
            (kid = TAILQ_FIRST(&ncp->nc_list)) != NULL
@@ -634,7 +810,8 @@ cache_inval(struct namecache *ncp, int f
                        if ((nextkid = TAILQ_NEXT(kid, nc_entry)) != NULL)
                                cache_hold(nextkid);
                        if ((kid->nc_flag & NCF_UNRESOLVED) == 0 ||
-                           TAILQ_FIRST(&kid->nc_list)
+                           TAILQ_FIRST(&kid->nc_list) ||
+                           kid->nc_shadowed
                        ) {
                                cache_lock(kid);
                                rcnt += cache_inval(kid, flags & ~CINV_DESTROY);
@@ -650,7 +827,7 @@ cache_inval(struct namecache *ncp, int f
         * Someone could have gotten in there while ncp was unlocked,
         * retry if so.
         */
-       if ((ncp->nc_flag & NCF_UNRESOLVED) == 0)
+       if ((_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) == 0)
                ++rcnt;
        return (rcnt);
 }
@@ -774,6 +951,8 @@ cache_vget(struct namecache *ncp, struct
        struct vnode *vp;
        int error;
 
+       ncp = _cache_grphead(ncp);
+
 again:
        vp = NULL;
        if (ncp->nc_flag & NCF_UNRESOLVED) {
@@ -805,6 +984,8 @@ cache_vref(struct namecache *ncp, struct
 {
        struct vnode *vp;
        int error;
+
+       ncp = _cache_grphead(ncp);
 
 again:
        vp = NULL;
@@ -1121,9 +1302,10 @@ again:
                        goto again;
        }
        if (rncp) {
+               struct namecache *srncp = _cache_grphead_l(rncp);
                vrele(pvp);
-               if (rncp->nc_flag & NCF_UNRESOLVED) {
-                       cache_setvp(rncp, dvp);
+               if (srncp->nc_flag & NCF_UNRESOLVED) {
+                       cache_setvp(srncp, dvp);
                        if (ncvp_debug >= 2) {
                                printf("cache_inefficient_scan: setvp %s/%s = 
%p\n",
                                        ncp->nc_name, rncp->nc_name, dvp);
@@ -1132,11 +1314,11 @@ again:
                        if (ncvp_debug >= 2) {
                                printf("cache_inefficient_scan: setvp %s/%s 
already set %p/%p\n", 
                                        ncp->nc_name, rncp->nc_name, dvp,
-                                       rncp->nc_vp);
+                                       srncp->nc_vp);
                        }
                }
-               if (rncp->nc_vp == NULL)
-                       error = rncp->nc_error;
+               if (srncp->nc_vp == NULL)
+                       error = srncp->nc_error;
                cache_put(rncp);
        } else {
                printf("cache_inefficient_scan: dvp %p NOT FOUND in %s\n",
@@ -1179,7 +1361,7 @@ cache_zap(struct namecache *ncp)
         * We only scrap unref'd (other then our ref) unresolved entries,
         * we do not scrap 'live' entries.
         */
-       while (ncp->nc_flag & NCF_UNRESOLVED) {
+       while (_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
                /*
                 * Someone other then us has a ref, stop.
                 */
@@ -1206,6 +1388,9 @@ cache_zap(struct namecache *ncp)
                        if (par->nc_vp && TAILQ_EMPTY(&par->nc_list))
                                vdrop(par->nc_vp);
                }
+
+               if (ncp->nc_shadowed)
+                       cache_put(cache_shadow_detach(ncp));
 
                /*
                 * ncp should not have picked up any refs.  Physically
@@ -1303,6 +1488,7 @@ cache_nlookup(struct namecache *par, str
 cache_nlookup(struct namecache *par, struct nlcomponent *nlc)
 {
        struct namecache *ncp;
+       struct namecache *sncp;
        struct namecache *new_ncp;
        struct nchashhead *nchpp;
        u_int32_t hash;
@@ -1319,15 +1505,16 @@ cache_nlookup(struct namecache *par, str
        new_ncp = NULL;
 restart:
        LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+               sncp = _cache_grphead(ncp);
                numchecks++;
 
                /*
                 * Zap entries that have timed out.
                 */
-               if (ncp->nc_timeout && 
-                   (int)(ncp->nc_timeout - ticks) < 0 &&
-                   (ncp->nc_flag & NCF_UNRESOLVED) == 0 &&
-                   ncp->nc_exlocks == 0
+               if (sncp->nc_timeout && 
+                   (int)(sncp->nc_timeout - ticks) < 0 &&
+                   (sncp->nc_flag & NCF_UNRESOLVED) == 0 &&
+                   sncp->nc_exlocks == 0
                ) {
                        cache_zap(cache_get(ncp));
                        goto restart;
@@ -1341,7 +1528,7 @@ restart:
                if (ncp->nc_parent == par &&
                    ncp->nc_nlen == nlc->nlc_namelen &&
                    bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 &&
-                   (ncp->nc_flag & NCF_DESTROYED) == 0
+                   (sncp->nc_flag & NCF_DESTROYED) == 0
                ) {
                        if (cache_get_nonblock(ncp) == 0) {
                                if (new_ncp)
@@ -1414,15 +1601,15 @@ int
 int
 cache_resolve(struct namecache *ncp, struct ucred *cred)
 {
-       struct namecache *par;
+       struct namecache *par, *sncp;
        int error;
 
 restart:
        /*
         * If the ncp is already resolved we have nothing to do.
         */
-       if ((ncp->nc_flag & NCF_UNRESOLVED) == 0)
-               return (ncp->nc_error);
+       if ((_cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) == 0)
+               return (_cache_grphead_l(ncp)->nc_error);
 
        /*
         * Mount points need special handling because the parent does not
@@ -1461,17 +1648,20 @@ restart:
         * not occur all that often, or if it does not have to go back too
         * many nodes to resolve the ncp.
         */
-       while (ncp->nc_parent->nc_vp == NULL) {
+       while (_cache_grphead(ncp->nc_parent)->nc_vp == NULL) {
+               struct namecache *spar;
+
                /*
                 * This case can occur if a process is CD'd into a
                 * directory which is then rmdir'd.  If the parent is marked
                 * destroyed there is no point trying to resolve it.
                 */
-               if (ncp->nc_parent->nc_flag & NCF_DESTROYED)
+               if (_cache_grphead(ncp->nc_parent)->nc_flag & NCF_DESTROYED)
                        return(ENOENT);
 
                par = ncp->nc_parent;
-               while (par->nc_parent && par->nc_parent->nc_vp == NULL)
+               while (par->nc_parent &&
+                      _cache_grphead(par->nc_parent)->nc_vp == NULL)
                        par = par->nc_parent;
                if (par->nc_parent == NULL) {
                        printf("EXDEV case 2 %*.*s\n",
@@ -1488,20 +1678,23 @@ restart:
                 * will handle any moves.
                 */
                cache_get(par);
+               spar = _cache_grphead_l(par);
                if (par->nc_flag & NCF_MOUNTPT) {
                        cache_resolve_mp(par);
-               } else if (par->nc_parent->nc_vp == NULL) {
+               } else if (_cache_grphead(par->nc_parent)->nc_vp == NULL) {
                        printf("[diagnostic] cache_resolve: raced on %*.*s\n", 
par->nc_nlen, par->nc_nlen, par->nc_name);
                        cache_put(par);
                        continue;
-               } else if (par->nc_flag & NCF_UNRESOLVED) {
-                       par->nc_error = VOP_NRESOLVE(par, cred);
-               }
-               if ((error = par->nc_error) != 0) {
-                       if (par->nc_error != EAGAIN) {
+               } else if (spar->nc_flag & NCF_UNRESOLVED) {
+                       error = VOP_NRESOLVE(par, cred);
+                       spar = _cache_grphead_l(par);
+                       spar->nc_error = error;
+               }
+               if ((error = spar->nc_error) != 0) {
+                       if (spar->nc_error != EAGAIN) {
                                printf("EXDEV case 3 %*.*s error %d\n",
                                    par->nc_nlen, par->nc_nlen, par->nc_name,
-                                   par->nc_error);
+                                   spar->nc_error);
                                cache_put(par);
                                return(error);
                        }
@@ -1521,14 +1714,16 @@ restart:
         * ncp must already be resolved.
         */
        KKASSERT((ncp->nc_flag & NCF_MOUNTPT) == 0);
-       ncp->nc_error = VOP_NRESOLVE(ncp, cred);
-       /*vop_nresolve(*ncp->nc_parent->nc_vp->v_ops, ncp, cred);*/
-       if (ncp->nc_error == EAGAIN) {
+       error = VOP_NRESOLVE(ncp, cred);
+       sncp = _cache_grphead_l(ncp);
+       sncp->nc_error = error;
+       /*vop_nresolve(*_cache_grphead_unlokced(ncp->nc_parent)->nc_vp->v_ops, 
ncp, cred);*/
+       if (error == EAGAIN) {
                printf("[diagnostic] cache_resolve: EAGAIN ncp %p %*.*s\n",
                        ncp, ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name);
                goto restart;
        }
-       return(ncp->nc_error);
+       return(error);
 }
 
 /*
@@ -1549,6 +1744,8 @@ cache_resolve_mp(struct namecache *ncp)
        struct vnode *vp;
        struct mount *mp = ncp->nc_mount;
        int error;
+
+        ncp = _cache_grphead_l(ncp);
 
        KKASSERT(mp != NULL);
        if (ncp->nc_flag & NCF_UNRESOLVED) {
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_default.c
--- a/sys/kern/vfs_default.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_default.c    Mon Jan 23 05:16:56 2006 +0100
@@ -203,7 +203,7 @@ vop_compat_nresolve(struct vop_nresolve_
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        /*
@@ -234,7 +234,7 @@ vop_compat_nresolve(struct vop_nresolve_
                VOP_UNLOCK(vp, 0, curthread);
        if ((cnp.cn_flags & CNP_PDIRUNLOCK) == 0)
                VOP_UNLOCK(dvp, 0, curthread);
-       if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+       if ((cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) == 0) {
                /* was resolved by another process while we were unlocked */
                if (error == 0)
                        vrele(vp);
@@ -245,7 +245,7 @@ vop_compat_nresolve(struct vop_nresolve_
        } else if (error == ENOENT) {
                KKASSERT(vp == NULL);
                if (cnp.cn_flags & CNP_ISWHITEOUT)
-                       ncp->nc_flag |= NCF_WHITEOUT;
+                       cache_grphead_l(ncp)->nc_flag |= NCF_WHITEOUT;
                cache_setvp(ncp, NULL);
        }
        vrele(dvp);
@@ -338,7 +338,7 @@ vop_compat_ncreate(struct vop_ncreate_ar
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -421,7 +421,7 @@ vop_compat_nmkdir(struct vop_nmkdir_args
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -504,7 +504,7 @@ vop_compat_nmknod(struct vop_nmknod_args
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -586,7 +586,7 @@ vop_compat_nlink(struct vop_nlink_args *
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -660,7 +660,7 @@ vop_compat_nsymlink(struct vop_nsymlink_
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -746,7 +746,7 @@ vop_compat_nwhiteout(struct vop_nwhiteou
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -836,7 +836,7 @@ vop_compat_nremove(struct vop_nremove_ar
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -912,7 +912,7 @@ vop_compat_nrmdir(struct vop_nrmdir_args
                return(EPERM);
        if (ncp->nc_parent == NULL)
                return(EPERM);
-       if ((dvp = ncp->nc_parent->nc_vp) == NULL)
+       if ((dvp = cache_grphead(ncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
@@ -1005,7 +1005,7 @@ vop_compat_nrename(struct vop_nrename_ar
                return(EPERM);
        if (fncp->nc_parent == NULL)
                return(EPERM);
-       if ((fdvp = fncp->nc_parent->nc_vp) == NULL)
+       if ((fdvp = cache_grphead(fncp->nc_parent)->nc_vp) == NULL)
                return(EPERM);
 
        /*
@@ -1064,7 +1064,7 @@ vop_compat_nrename(struct vop_nrename_ar
                error = EPERM;
        if (tncp->nc_parent == NULL)
                error = EPERM;
-       if ((tdvp = tncp->nc_parent->nc_vp) == NULL)
+       if ((tdvp = cache_grphead(tncp->nc_parent)->nc_vp) == NULL)
                error = EPERM;
        if (error) {
                vrele(fdvp);
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_journal.c
--- a/sys/kern/vfs_journal.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_journal.c    Mon Jan 23 05:16:56 2006 +0100
@@ -1824,7 +1824,7 @@ jrecord_write_vnode_ref(struct jrecord *
     struct namecache *ncp;
 
     TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
-       if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+       if ((cache_grphead(ncp)->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
            break;
     }
     if (ncp)
@@ -1840,7 +1840,7 @@ jrecord_write_vnode_link(struct jrecord 
     TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) {
        if (ncp == notncp)
            continue;
-       if ((ncp->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
+       if ((cache_grphead(ncp)->nc_flag & (NCF_UNRESOLVED|NCF_DESTROYED)) == 0)
            break;
     }
     if (ncp)
@@ -2533,7 +2533,7 @@ journal_nremove(struct vop_nremove_args 
 
     mp = ap->a_head.a_ops->vv_mount;
     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_REMOVE) &&
-       ap->a_ncp->nc_vp
+       cache_grphead(ap->a_ncp)->nc_vp
     ) {
        jreclist_undo_file(&jreclist, ap->a_ncp->nc_vp, 
                           JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
@@ -2599,7 +2599,7 @@ journal_nrmdir(struct vop_nrmdir_args *a
 
     mp = ap->a_head.a_ops->vv_mount;
     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RMDIR)) {
-       jreclist_undo_file(&jreclist, ap->a_ncp->nc_vp,
+       jreclist_undo_file(&jreclist, cache_grphead(ap->a_ncp)->nc_vp,
                           JRUNDO_VATTR|JRUNDO_GETVP, 0, 0);
     }
     error = vop_journal_operate_ap(&ap->a_head);
@@ -2628,9 +2628,9 @@ journal_nrename(struct vop_nrename_args 
 
     mp = ap->a_head.a_ops->vv_mount;
     if (jreclist_init(mp, &jreclist, &jreccache, JTYPE_RENAME) &&
-       ap->a_tncp->nc_vp
+       cache_grphead(ap->a_tncp)->nc_vp
     ) {
-       jreclist_undo_file(&jreclist, ap->a_tncp->nc_vp, 
+       jreclist_undo_file(&jreclist, cache_grphead(ap->a_tncp)->nc_vp, 
                           JRUNDO_ALL|JRUNDO_GETVP|JRUNDO_CONDLINK, 0, -1);
     }
     error = vop_journal_operate_ap(&ap->a_head);
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_nlookup.c
--- a/sys/kern/vfs_nlookup.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_nlookup.c    Mon Jan 23 05:16:56 2006 +0100
@@ -381,13 +381,15 @@ nlookup(struct nlookupdata *nd)
                ncp = cache_get(ncp);
            } else {
                while ((ncp->nc_flag & NCF_MOUNTPT) && ncp != nd->nl_rootncp) {
-                   if (ncp->nc_parent->nc_flag & NCF_DESTROYED)
+                   if (cache_grphead(ncp->nc_parent)->nc_flag &
+                       NCF_DESTROYED)
                        break;
                    ncp = ncp->nc_parent;       /* get to underlying node */
                    KKASSERT(ncp != NULL && 1);
                }
                if (ncp != nd->nl_rootncp) {
-                       if (ncp->nc_parent->nc_flag & NCF_DESTROYED) {
+                       if (cache_grphead(ncp->nc_parent)->nc_flag &
+                           NCF_DESTROYED) {
                                error = EINVAL;
                                break;
                        }
@@ -421,11 +423,11 @@ nlookup(struct nlookupdata *nd)
         * XXX neither '.' nor '..' should return EAGAIN since they were
         * previously resolved and thus cannot be newly created ncp's.
         */
-       if (ncp->nc_flag & NCF_UNRESOLVED) {
+       if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
            error = cache_resolve(ncp, nd->nl_cred);
            KKASSERT(error != EAGAIN);
        } else {
-           error = ncp->nc_error;
+           error = cache_grphead_l(ncp)->nc_error;
        }
 
        /*
@@ -459,7 +461,7 @@ nlookup(struct nlookupdata *nd)
         * element or it is the last element and we are allowed to
         * follow symlinks, resolve the symlink.
         */
-       if ((ncp->nc_flag & NCF_ISSYMLINK) &&
+       if ((cache_grphead_l(ncp)->nc_flag & NCF_ISSYMLINK) &&
            (*ptr || (nd->nl_flags & NLC_FOLLOW))
        ) {
            if (nd->nl_loopcnt++ >= MAXSYMLINKS) {
@@ -509,24 +511,31 @@ nlookup(struct nlookupdata *nd)
         *
         * XXX NOCROSSMOUNT
         */
-       while ((ncp->nc_flag & NCF_ISDIR) && ncp->nc_vp->v_mountedhere &&
+       while ((cache_grphead_l(ncp)->nc_flag & NCF_ISDIR) &&
+               cache_grphead_l(ncp)->nc_vp->v_mountedhere &&
                (nd->nl_flags & NLC_NOCROSSMOUNT) == 0
        ) {
            struct mount *mp;
            struct vnode *tdp;
 
-           mp = ncp->nc_vp->v_mountedhere;
+           mp = cache_grphead_l(ncp)->nc_vp->v_mountedhere;
            cache_put(ncp);
            ncp = cache_get(mp->mnt_ncp);
 
-           if (ncp->nc_flag & NCF_UNRESOLVED) {
+           if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
                while (vfs_busy(mp, 0, nd->nl_td))
                    ;
                error = VFS_ROOT(mp, &tdp);
                vfs_unbusy(mp, nd->nl_td);
                if (error)
                    break;
-               cache_setvp(ncp, tdp);
+               /*
+                * The VFS_ROOT call might have the side effect of
+                * resolving the ncp. Or is that declared a sin
+                * anywhere?
+                */
+               if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED)
+                       cache_setvp(ncp, tdp);
                vput(tdp);
            }
        }
@@ -543,7 +552,7 @@ nlookup(struct nlookupdata *nd)
         * to the failure case below.
         */
        while (*ptr == '/') {
-           if ((ncp->nc_flag & NCF_ISDIR) == 0 && 
+           if ((cache_grphead_l(ncp)->nc_flag & NCF_ISDIR) == 0 && 
                !(nd->nl_flags & NLC_WILLBEDIR)
            ) {
                break;
@@ -555,7 +564,7 @@ nlookup(struct nlookupdata *nd)
         * Continuation case: additional elements and the current
         * element is a directory.
         */
-       if (*ptr && (ncp->nc_flag & NCF_ISDIR)) {
+       if (*ptr && (cache_grphead_l(ncp)->nc_flag & NCF_ISDIR)) {
            cache_drop(nd->nl_ncp);
            cache_unlock(ncp);
            nd->nl_ncp = ncp;
@@ -619,7 +628,7 @@ nlookup_mp(struct mount *mp, struct name
     error = 0;
     ncp = mp->mnt_ncp;
     cache_get(ncp);
-    if (ncp->nc_flag & NCF_UNRESOLVED) {
+    if (cache_grphead_l(ncp)->nc_flag & NCF_UNRESOLVED) {
        while (vfs_busy(mp, 0, curthread))
            ;
        error = VFS_ROOT(mp, &vp);
@@ -655,7 +664,7 @@ nreadsymlink(struct nlookupdata *nd, str
 
     nlc->nlc_nameptr = NULL;
     nlc->nlc_namelen = 0;
-    if (ncp->nc_vp == NULL)
+    if (cache_grphead_l(ncp)->nc_vp == NULL)
        return(ENOENT);
     if ((error = cache_vget(ncp, nd->nl_cred, LK_SHARED, &vp)) != 0)
        return(error);
@@ -713,13 +722,14 @@ int
 int
 naccess(struct namecache *ncp, int vmode, struct ucred *cred)
 {
-    struct namecache *par;
+    struct namecache *par, *oncp = ncp;
     struct vnode *vp;
     struct vattr va;
     int error;
 
-    if (ncp->nc_flag & NCF_UNRESOLVED) {
+    if (ncp->nc_flag & NCF_UNRESOLVED || ncp->nc_shadowed) {
        cache_lock(ncp);
+       ncp = cache_grphead_l(ncp);
        cache_resolve(ncp, cred);
        cache_unlock(ncp);
     }
@@ -728,7 +738,7 @@ naccess(struct namecache *ncp, int vmode
        if (((vmode & VCREATE) && ncp->nc_vp == NULL) ||
            ((vmode & VDELETE) && ncp->nc_vp != NULL)
        ) {
-           if ((par = ncp->nc_parent) == NULL) {
+           if ((par = oncp->nc_parent) == NULL) {
                if (error != EAGAIN)
                        error = EINVAL;
            } else {
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_syscalls.c
--- a/sys/kern/vfs_syscalls.c   Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_syscalls.c   Mon Jan 23 05:16:56 2006 +0100
@@ -140,7 +140,7 @@ mount(struct mount_args *uap)
        error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
        if (error == 0) {
                if ((error = nlookup(&nd)) == 0) {
-                       if (nd.nl_ncp->nc_vp == NULL)
+                       if (cache_grphead_l(nd.nl_ncp)->nc_vp == NULL)
                                error = ENOENT;
                }
        }
@@ -159,7 +159,7 @@ mount(struct mount_args *uap)
        /*
         * now we have the locked ref'd ncp and unreferenced vnode.
         */
-       vp = ncp->nc_vp;
+       vp = cache_grphead_l(ncp)->nc_vp;
        if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) {
                cache_put(ncp);
                return (error);
@@ -1050,9 +1050,9 @@ fchdir(struct fchdir_args *uap)
        while (!error && (mp = vp->v_mountedhere) != NULL) {
                error = nlookup_mp(mp, &nct);
                if (error == 0) {
+                       vput(vp);
+                       vp = cache_grphead_l(nct)->nc_vp;
                        cache_unlock(nct);      /* leave ref intact */
-                       vput(vp);
-                       vp = nct->nc_vp;
                        error = vget(vp, LK_SHARED, td);
                        KKASSERT(error == 0);
                        cache_drop(ncp);
@@ -1086,7 +1086,7 @@ kern_chdir(struct nlookupdata *nd)
 
        if ((error = nlookup(nd)) != 0)
                return (error);
-       if ((vp = nd->nl_ncp->nc_vp) == NULL)
+       if ((vp = cache_grphead_l(nd->nl_ncp)->nc_vp) == NULL)
                return (ENOENT);
        if ((error = vget(vp, LK_SHARED, td)) != 0)
                return (error);
@@ -1192,7 +1192,7 @@ kern_chroot(struct namecache *ncp)
                if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
                        return (error);
        }
-       if ((vp = ncp->nc_vp) == NULL)
+       if ((vp = cache_grphead_l(ncp)->nc_vp) == NULL)
                return (ENOENT);
 
        if ((error = vget(vp, LK_SHARED, td)) != 0)
@@ -1464,7 +1464,7 @@ kern_mknod(struct nlookupdata *nd, int m
        if ((error = nlookup(nd)) != 0)
                return (error);
        ncp = nd->nl_ncp;
-       if (ncp->nc_vp)
+       if (cache_grphead_l(ncp)->nc_vp)
                return (EEXIST);
 
        VATTR_NULL(&vattr);
@@ -1536,7 +1536,7 @@ kern_mkfifo(struct nlookupdata *nd, int 
        if ((error = nlookup(nd)) != 0)
                return (error);
        ncp = nd->nl_ncp;
-       if (ncp->nc_vp)
+       if (cache_grphead_l(ncp)->nc_vp)
                return (EEXIST);
 
        VATTR_NULL(&vattr);
@@ -1633,7 +1633,7 @@ kern_link(struct nlookupdata *nd, struct
        bwillwrite();
        if ((error = nlookup(nd)) != 0)
                return (error);
-       vp = nd->nl_ncp->nc_vp;
+       vp = cache_grphead_l(nd->nl_ncp)->nc_vp;
        KKASSERT(vp != NULL);
        if (vp->v_type == VDIR)
                return (EPERM);         /* POSIX */
@@ -1654,7 +1654,7 @@ kern_link(struct nlookupdata *nd, struct
                vput(vp);
                return (error);
        }
-       if (linknd->nl_ncp->nc_vp) {
+       if (cache_grphead_l(linknd->nl_ncp)->nc_vp) {
                vput(vp);
                return (EEXIST);
        }
@@ -1704,7 +1704,7 @@ kern_symlink(struct nlookupdata *nd, cha
        if ((error = nlookup(nd)) != 0)
                return (error);
        ncp = nd->nl_ncp;
-       if (ncp->nc_vp)
+       if (cache_grphead_l(ncp)->nc_vp)
                return (EEXIST);
 
        VATTR_NULL(&vattr);
@@ -1922,7 +1922,7 @@ kern_stat(struct nlookupdata *nd, struct
        if ((error = nlookup(nd)) != 0)
                return (error);
 again:
-       if ((vp = nd->nl_ncp->nc_vp) == NULL)
+       if ((vp = cache_grphead_l(nd->nl_ncp)->nc_vp) == NULL)
                return (ENOENT);
 
        td = curthread;
@@ -2718,13 +2718,17 @@ kern_rename(struct nlookupdata *fromnd, 
         * Due to relocking of the source, fromnd->nl_ncp->nc_vp might have
         * become NULL.
         */
-       if (tond->nl_ncp->nc_vp) {
-               if (fromnd->nl_ncp->nc_vp == NULL) {
+       /*
+        * XXX I was lazy to find out who is locked exactly, so just dumbly
+        * cache_grphead() the parties...
+        */
+       if (cache_grphead(tond->nl_ncp)->nc_vp) {
+               if (cache_grphead(fromnd->nl_ncp)->nc_vp == NULL) {
                        error = ENOENT;
-               } else if (fromnd->nl_ncp->nc_vp->v_type == VDIR) {
-                       if (tond->nl_ncp->nc_vp->v_type != VDIR)
+               } else if (cache_grphead(fromnd->nl_ncp)->nc_vp->v_type == 
VDIR) {
+                       if (cache_grphead(tond->nl_ncp)->nc_vp->v_type != VDIR)
                                error = ENOTDIR;
-               } else if (tond->nl_ncp->nc_vp->v_type == VDIR) {
+               } else if (cache_grphead(tond->nl_ncp)->nc_vp->v_type == VDIR) {
                        error = EISDIR;
                }
        }
@@ -2753,7 +2757,7 @@ kern_rename(struct nlookupdata *fromnd, 
         * when we detect the situation.
         */
        if (error == 0) {
-               if (fromnd->nl_ncp->nc_vp == tond->nl_ncp->nc_vp) {
+               if (cache_grphead(fromnd->nl_ncp)->nc_vp == 
cache_grphead(tond->nl_ncp)->nc_vp) {
                        error = VOP_NREMOVE(fromnd->nl_ncp, fromnd->nl_cred);
                } else {
                        error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp, 
@@ -2802,7 +2806,7 @@ kern_mkdir(struct nlookupdata *nd, int m
                return (error);
 
        ncp = nd->nl_ncp;
-       if (ncp->nc_vp)
+       if (cache_grphead_l(ncp)->nc_vp)
                return (EEXIST);
 
        VATTR_NULL(&vattr);
diff -r 820a1f1d791e -r 6db92777ea99 sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c      Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/kern/vfs_vnops.c      Mon Jan 23 05:16:56 2006 +0100
@@ -169,7 +169,7 @@ vn_open(struct nlookupdata *nd, struct f
         */
 again:
        if (fmode & O_CREAT) {
-               if (ncp->nc_vp == NULL) {
+               if (cache_grphead_l(ncp)->nc_vp == NULL) {
                        VATTR_NULL(vap);
                        vap->va_type = VREG;
                        vap->va_mode = cmode;
diff -r 820a1f1d791e -r 6db92777ea99 sys/sys/namecache.h
--- a/sys/sys/namecache.h       Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/sys/namecache.h       Mon Jan 23 05:16:56 2006 +0100
@@ -102,6 +102,7 @@ struct namecache {
     TAILQ_ENTRY(namecache) nc_vnode;   /* scan via vnode->v_namecache */
     struct namecache_list  nc_list;    /* list of children */
     struct namecache *nc_parent;       /* namecache entry for parent */
+    struct namecache *nc_shadowed;     /* lower layer entry in layered fs */
     struct     vnode *nc_vp;           /* vnode representing name or NULL */
     int                nc_refs;                /* ref count prevents deletion 
*/
     u_short    nc_flag;
@@ -150,6 +151,10 @@ void       cache_lock(struct namecache *ncp);
 void   cache_lock(struct namecache *ncp);
 int    cache_lock_nonblock(struct namecache *ncp);
 void   cache_unlock(struct namecache *ncp);
+struct namecache *cache_grphead_l(struct namecache *ncp);
+struct namecache *cache_grphead(struct namecache *ncp);
+int    cache_shadow_attach(struct namecache *ncp, struct namecache *sncp);
+struct namecache *cache_shadow_detach(struct namecache *ncp);
 void   cache_setvp(struct namecache *ncp, struct vnode *vp);
 void   cache_settimeout(struct namecache *ncp, int nticks);
 void   cache_setunresolved(struct namecache *ncp);
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nfs/nfs_serv.c
--- a/sys/vfs/nfs/nfs_serv.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nfs/nfs_serv.c    Mon Jan 23 05:16:56 2006 +0100
@@ -2183,8 +2183,8 @@ nfsrv_rename(struct nfsrv_descript *nfsd
        }
        fromnd.nl_flags |= NLC_NCPISLOCKED;
 
-       tvp = tond.nl_ncp->nc_vp;
-       fvp = fromnd.nl_ncp->nc_vp;
+       tvp = cache_grphead_l(tond.nl_ncp)->nc_vp;
+       fvp = cache_grphead_l(fromnd.nl_ncp)->nc_vp;
 
        if (tvp != NULL) {
                if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nfs/nfs_subs.c
--- a/sys/vfs/nfs/nfs_subs.c    Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nfs/nfs_subs.c    Mon Jan 23 05:16:56 2006 +0100
@@ -1671,7 +1671,7 @@ nfs_namei(struct nlookupdata *nd, struct
                                error = ENXIO;
                        }
                }
-               if (vpp && ncp->nc_vp) {
+               if (vpp && cache_grphead_l(ncp)->nc_vp) {
                        error = cache_vget(ncp, nd->nl_cred, LK_EXCLUSIVE, vpp);
                }
                if (error) {
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nfs/nfs_vnops.c
--- a/sys/vfs/nfs/nfs_vnops.c   Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nfs/nfs_vnops.c   Mon Jan 23 05:16:56 2006 +0100
@@ -883,8 +883,8 @@ nfs_nresolve(struct vop_nresolve_args *a
        cred = ap->a_cred;
        ncp = ap->a_ncp;
 
-       KKASSERT(ncp->nc_parent && ncp->nc_parent->nc_vp);
-       dvp = ncp->nc_parent->nc_vp;
+       KKASSERT(ncp->nc_parent && cache_grphead(ncp->nc_parent)->nc_vp);
+       dvp = cache_grphead(ncp->nc_parent)->nc_vp;
        if ((error = vget(dvp, LK_SHARED, td)) != 0)
                return (error);
 
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nullfs/null.h
--- a/sys/vfs/nullfs/null.h     Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nullfs/null.h     Mon Jan 23 05:16:56 2006 +0100
@@ -43,18 +43,19 @@ struct null_args {
        char            *target;        /* Target of loopback  */
 };
 
-struct null_mount {
-       struct mount    *nullm_vfs;
-       struct vnode    *nullm_rootvp;  /* Reference to root null_node */
-};
-
 #ifdef _KERNEL
-#define        MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
 
 #ifdef NULLFS_DEBUG
-#define NULLFSDEBUG(format, args...) printf(format ,## args)
+#define NULLFSDEBUG(format, args...) \
+       printf("[nullfs] %s:%d: " format, __func__, __LINE__, ## args)
+#define        NULLNCDEBUG(ncp) \
+        NULLFSDEBUG(#ncp " %p: name %s, refs %d, exlocks %d, " \
+                    "nc_mount %p, nc_shadowed %p\n", \
+                    (ncp), (ncp)->nc_name, (ncp)->nc_refs, (ncp)->nc_exlocks, \
+                    (ncp)->nc_mount, (ncp)->nc_shadowed);
 #else
 #define NULLFSDEBUG(format, args...)
+#define NULLNCDEBUG(ncp)
 #endif /* NULLFS_DEBUG */
 
 #endif /* _KERNEL */
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nullfs/null_vfsops.c
--- a/sys/vfs/nullfs/null_vfsops.c      Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nullfs/null_vfsops.c      Mon Jan 23 05:16:56 2006 +0100
@@ -80,12 +80,10 @@ nullfs_mount(struct mount *mp, char *pat
 {
        int error = 0;
        struct null_args args;
-       struct vnode *rootvp;
-       struct null_mount *xmp;
        u_int size;
        struct nlookupdata nd;
 
-       NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
+       NULLFSDEBUG("mp %p\n", (void *)mp);
 
        /*
         * Update is a no-op
@@ -98,118 +96,118 @@ nullfs_mount(struct mount *mp, char *pat
         * Get argument
         */
        error = copyin(data, (caddr_t)&args, sizeof(struct null_args));
-       if (error)
-               return (error);
-
-       /*
-        * Find lower node
-        */
-       rootvp = NULL;
-       error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
+
+       /*
+        * Do a lookup just to see if things are not fundamentally broken...
+        * but it's too early to make a proper use of the result.
+        */
+       if (error == 0)
+               error = nlookup_init(&nd, args.target, UIO_USERSPACE,
+                                    NLC_FOLLOW);
        if (error == 0)
                error = nlookup(&nd);
-       if (error == 0) {
-               error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, 
-                                       &rootvp);
-       }
-
-       xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
-                               M_NULLFSMNT, M_WAITOK); /* XXX */
-
-       /*
-        * Save reference to underlying FS
-        */
-        /*
-         * As lite stacking enters the scene, the old way of doing this
-        * -- via the vnode -- is not good enough anymore...
-        */
-       xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+       if (error)
+               return(error);
+
        nlookup_done(&nd);
 
-       vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, 
-                        null_vnodeop_entries, 0);
-
-       VOP_UNLOCK(rootvp, 0, td);
-
-       /*
-        * Keep a held reference to the root vnode.
-        * It is vrele'd in nullfs_unmount.
-        */
-       xmp->nullm_rootvp = rootvp;
-       /*
-        * XXX What's the proper safety condition for querying
-        * the underlying mount? Is this flag tuning necessary
-        * at all?
-        */
-       if (xmp->nullm_vfs->mnt_flag & MNT_LOCAL)
-               mp->mnt_flag |= MNT_LOCAL;
-       mp->mnt_data = (qaddr_t) xmp;
-       vfs_getnewfsid(mp);
+       vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, null_vnodeop_entries, 0);
+
+       /*
+        * Heck it, let it just be local. I bet I need only five minutes to
+        * find out a sound sounding meaning for "local" by which null mounts
+        * are always local.
+        */
+       mp->mnt_flag |= MNT_LOCAL; vfs_getnewfsid(mp);
 
        (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
            &size);
        bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-       (void)nullfs_statfs(mp, &mp->mnt_stat, td);
-       NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
-               mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntfromname);
+       NULLFSDEBUG("lower %s, alias at %s\n",
+                   mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
        return (0);
 }
 
-/*
- * Free reference to null layer
- */
 static int
 nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
-       void *mntdata;
-       int flags = 0;
-
-       NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
-
-       if (mntflags & MNT_FORCE)
-               flags |= FORCECLOSE;
-
-       /*
-        * Finally, throw away the null_mount structure
-        */
-       mntdata = mp->mnt_data;
-       mp->mnt_data = 0;
-       free(mntdata, M_NULLFSMNT);
+       NULLFSDEBUG("mp %p\n", (void *)mp);
+
+       cache_lock(mp->mnt_ncp);
+       cache_put(cache_shadow_detach(mp->mnt_ncp));
+       cache_unlock(mp->mnt_ncp);
+
        return 0;
 }
 
 static int
+nullfs_start(struct mount *mp, int flags, struct thread *td)
+{
+       int error;
+       struct nlookupdata nd;
+
+       NULLFSDEBUG("nlookup %s\n", mp->mnt_stat.f_mntfromname);
+
+       error = nlookup_init(&nd, mp->mnt_stat.f_mntfromname,
+                                    UIO_SYSSPACE, NLC_FOLLOW);
+       if (error == 0)
+               error = nlookup(&nd);
+       if (error)
+               return(error);
+
+       cache_hold(nd.nl_ncp);
+       cache_lock(mp->mnt_ncp);
+
+       error = mp->mnt_ncp->nc_shadowed ?
+               EINVAL :
+               cache_shadow_attach(mp->mnt_ncp, nd.nl_ncp);
+
+       nlookup_done(&nd);
+
+       NULLNCDEBUG(mp->mnt_ncp);
+#ifdef NULLFS_DEBUG
+       if (mp->mnt_ncp->nc_shadowed)
+               NULLNCDEBUG(mp->mnt_ncp->nc_shadowed);
+#endif
+
+       return (error);
+}      
+
+/*
+ * As the mount won't get aborted if VFS_START fails, we have to check in each 
+ * VFS call whether it has succeeded...
+ */ 
+
+static int
 nullfs_root(struct mount *mp, struct vnode **vpp)
 {
-       struct thread *td = curthread;  /* XXX */
-       struct vnode *vp;
-
-       NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", (void *)mp,
-           (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
-
-       /*
-        * Return locked reference to root.
-        */
-       vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
-       vref(vp);
-
-#ifdef NULLFS_DEBUG
-       if (VOP_ISLOCKED(vp, NULL)) {
-               Debugger("root vnode is locked.\n");
-               vrele(vp);
-               return (EDEADLK);
-       }
-#endif
-       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
-       *vpp = vp;
-       return 0;
+       int error;
+
+       if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+               return (ENXIO);
+
+       error = cache_vget(mp->mnt_ncp, crhold(proc0.p_ucred),
+                          LK_EXCLUSIVE | LK_RETRY, vpp);
+       crfree(proc0.p_ucred);
+
+       return (error);
+}
+
+static __inline
+struct mount *
+nullfs_lowermount_0(struct mount *mp)
+{
+       return (mp->mnt_ncp->nc_shadowed->nc_mount);
 }
 
 static int
 nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, caddr_t arg,
                struct thread *td)
 {
-       return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, td);
+       if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+               return (ENXIO);
+
+       return VFS_QUOTACTL(nullfs_lowermount_0(mp), cmd, uid, arg, td);
 }
 
 static int
@@ -218,12 +216,15 @@ nullfs_statfs(struct mount *mp, struct s
        int error;
        struct statfs mstat;
 
-       NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p)\n", (void *)mp,
-           (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
+       if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+               return (ENXIO);
+
+       NULLFSDEBUG("mp %p, ncp %p, lower mp %p\n",
+                   mp, mp->mnt_ncp, nullfs_lowermount_0(mp));
 
        bzero(&mstat, sizeof(mstat));
 
-       error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, td);
+       error = VFS_STATFS(nullfs_lowermount_0(mp), &mstat, td);
        if (error)
                return (error);
 
@@ -248,23 +249,27 @@ nullfs_checkexp(struct mount *mp, struct
 nullfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
                struct ucred **credanonp)
 {
-
-       return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam, 
-               extflagsp, credanonp);
+       if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+               return (ENXIO);
+
+       return VFS_CHECKEXP(nullfs_lowermount_0(mp), nam, extflagsp, credanonp);
 }
 
 static int                        
 nullfs_extattrctl(struct mount *mp, int cmd, const char *attrname, caddr_t arg,
                  struct thread *td)
 {
-       return VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, attrname,
-           arg, td);
+       if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+               return (ENXIO);
+
+       return VFS_EXTATTRCTL(nullfs_lowermount_0(mp), cmd, attrname, arg, td);
 }
 
 
 static struct vfsops null_vfsops = {
        .vfs_mount =            nullfs_mount,
        .vfs_unmount =          nullfs_unmount,
+       .vfs_start =            nullfs_start,
        .vfs_root =             nullfs_root,
        .vfs_quotactl =         nullfs_quotactl,
        .vfs_statfs =           nullfs_statfs,
diff -r 820a1f1d791e -r 6db92777ea99 sys/vfs/nullfs/null_vnops.c
--- a/sys/vfs/nullfs/null_vnops.c       Mon Jan 23 02:56:43 2006 +0000
+++ b/sys/vfs/nullfs/null_vnops.c       Mon Jan 23 05:16:56 2006 +0100
@@ -109,6 +109,8 @@
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
+#include <sys/namecache.h>
+#include <sys/nlookup.h>
 #include "null.h"
 
 static int     null_nresolve(struct vop_nresolve_args *ap);
@@ -122,18 +124,93 @@ static int        null_nrmdir(struct vop_nrmdir
 static int     null_nrmdir(struct vop_nrmdir_args *ap);
 static int     null_nrename(struct vop_nrename_args *ap);
 
+static __inline
+struct mount *
+nullfs_lowermount_l(struct namecache *ncp)
+{
+       /*
+        * The code in use below allows allows passing through lower mounts.
+        * If we didn't want to do that, we could use
+        *
+        *   ncp->nc_mount->mnt_ncp->nc_shadowed->nc_mount
+        *
+        * Eventually, the choice might be configurable.
+        *
+        *                  -  -  -
+        *
+        * Matt says in
+        * http://leaf.dragonflybsd.org/mailarchive/kernel/2006-01/msg00023.html
+        * :
+        
+    The ncp->nc_mount field was never meant to be used by the VFS code...
+    only to be used internally by cache_*().  It looks like I broke my own
+    rule... I have two references in NFS, but that's for later.
+
+        * Note that both approaches still use nc_mount:
+        *
+        * - If we wanna pass through lower mounts, we do have to find the
+        *   the lower fs ncp-wise, we simply don't have choice.
+        *
+        * - If we just work with a fixed lower fs, we are able to access
+        *   that if we are willing to use nc_mount. Hence it just seems to be
+        *   stupid to keep around a direct reference to the lower fs, but
+        *   that's of course feasible.
+        */
+       return (ncp->nc_shadowed->nc_mount);
+}
+
 static int
 null_nresolve(struct vop_nresolve_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
-
-       return vop_nresolve_ap(ap);
+       struct namecache *ncp = ap->a_ncp;
+       struct mount *lmp;
+
+       if (! ncp->nc_shadowed) {
+               struct nlcomponent nlc;
+               struct namecache *sncp;
+
+               nlc.nlc_nameptr = ncp->nc_name;
+               nlc.nlc_namelen = ncp->nc_nlen;
+
+               KKASSERT(ncp->nc_parent->nc_shadowed);
+               sncp = cache_nlookup(ncp->nc_parent->nc_shadowed, &nlc);
+
+               if ((ncp->nc_error = cache_shadow_attach(ncp, sncp))) {
+                       cache_put(sncp);
+                       cache_setvp(ncp, NULL);
+                       return (ncp->nc_error);
+               }
+
+               NULLNCDEBUG(ncp);
+               NULLNCDEBUG(ncp->nc_shadowed);
+       }
+
+       /*
+        * XXX Querying/ensuring usability of lower fs still not got right.
+        * As a quick hack, we do a simple test here, that will do for
+        * avoiding most obvious fallacies.
+        */
+       if ((lmp = nullfs_lowermount_l(ncp)) &&
+           (ap->a_head.a_ops = lmp->mnt_vn_use_ops)) {
+               /*
+                * Moving down in the shadow chain is for avoiding a recursed
+                * loop (ending up in exhausting the kernel stack).
+                *
+                * Otherwise it's the same whether we use ncp or
+                * ncp->nc_shadowed -- we go for group shared ncp attributes.
+                */
+               ap->a_ncp = ncp->nc_shadowed;
+               return vop_nresolve_ap(ap);
+       }
+
+       return (ENXIO);
 }
 
 static int
 null_ncreate(struct vop_ncreate_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_ncreate_ap(ap);
 }
@@ -141,7 +218,8 @@ static int
 static int
 null_nmkdir(struct vop_nmkdir_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nmkdir_ap(ap);
 }
@@ -149,7 +227,8 @@ static int
 static int
 null_nmknod(struct vop_nmknod_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nmknod_ap(ap);
 }
@@ -157,7 +236,8 @@ static int
 static int
 null_nlink(struct vop_nlink_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nlink_ap(ap);
 }
@@ -165,7 +245,8 @@ static int
 static int
 null_nsymlink(struct vop_nsymlink_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nsymlink_ap(ap);
 }
@@ -173,7 +254,8 @@ static int
 static int
 null_nwhiteout(struct vop_nwhiteout_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nwhiteout_ap(ap);
 }
@@ -181,7 +263,8 @@ static int
 static int
 null_nremove(struct vop_nremove_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nremove_ap(ap);
 }
@@ -189,7 +272,8 @@ static int
 static int
 null_nrmdir(struct vop_nrmdir_args *ap)
 {
-       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+       ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+       ap->a_ncp = ap->a_ncp->nc_shadowed;
 
        return vop_nrmdir_ap(ap);
 }
@@ -197,15 +281,32 @@ static int
 static int
 null_nrename(struct vop_nrename_args *ap)
 {
+       struct namecache *fncp = ap->a_fncp;
+       struct namecache *tncp = ap->a_tncp;
        struct mount *lmp;
-
-       lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
-       if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
-               return (EINVAL);
-
-       ap->a_head.a_ops = lmp->mnt_vn_norm_ops;
-
-       return vop_nrename_ap(ap);
+       int error;
+
+       lmp = nullfs_lowermount_l(fncp);
+       if (lmp != nullfs_lowermount_l(tncp))
+               return (EXDEV);
+
+       ap->a_head.a_ops = lmp->mnt_vn_use_ops;
+
+       /*
+        * Parent-children relations are not subject to shadowing, therefore
+        * we can't just dumbassly delegate, we have some work in both layers.
+        * However, shadowing still saves us from locking or dealing with
+        * vnodes.
+        */
+       ap->a_fncp = fncp->nc_shadowed;
+       ap->a_tncp = tncp->nc_shadowed;
+
+       if ((error = vop_nrename_ap(ap)))
+               return (error);
+
+       cache_rename(fncp, tncp);
+
+       return (error);
 }
 
 /*
@@ -224,4 +325,3 @@ struct vnodeopv_entry_desc null_vnodeop_
        { &vop_nrename_desc,            (vnodeopv_entry_t) null_nrename },
        { NULL, NULL }
 };
-

Reply via email to