nnull.diff (see comments on kernel@)

Csaba Henk Mon, 02 Jan 2006 05:01:32 -0800

# HG changeset patch
# User root@
# Node ID e65b90b999c3e7f71539c8affa762d61d3a5fd17
# Parent  758f5a725024e40c276e253651f9115aa4ba9fbe
patch queue: nnull


diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/Makefile
--- a/sys/vfs/nullfs/Makefile   Mon Jan  2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/Makefile   Mon Jan  2 12:21:05 2006 +0000
@@ -2,7 +2,7 @@
 # $DragonFly: src/sys/vfs/nullfs/Makefile,v 1.4 2004/08/13 17:51:12 dillon Exp 
$
 
 KMOD=  null
-SRCS=  null_subr.c null_vfsops.c null_vnops.c
+SRCS=  null_vfsops.c null_vnops.c
 NOMAN=
 
 .include <bsd.kmod.mk>
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null.h
--- a/sys/vfs/nullfs/null.h     Mon Jan  2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/null.h     Mon Jan  2 12:21:05 2006 +0000
@@ -49,36 +49,7 @@
 };
 
 #ifdef _KERNEL
-/*
- * A cache of vnode references
- */
-struct null_node {
-       struct null_node        *null_next;     /* Hash list */
-       struct vnode            *null_lowervp;  /* vrefed once */
-       struct vnode            *null_vnode;    /* Back pointer */
-};
-
 #define        MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
-#define        VTONULL(vp) ((struct null_node *)(vp)->v_data)
-#define        NULLTOV(xp) ((xp)->null_vnode)
-
-int nullfs_init(struct vfsconf *vfsp);
-int nullfs_uninit(struct vfsconf *vfsp);
-int null_node_add(struct null_node *np);
-void null_node_rem(struct null_node *np);
-int null_node_create(struct mount *mp, struct vnode *target, struct vnode 
**vpp);
-int null_bypass(struct vop_generic_args *ap);
-
-#ifdef DIAGNOSTIC
-struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
-#define        NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
-#else
-#define        NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
-#endif
-
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
-#endif
 
 #ifdef NULLFS_DEBUG
 #define NULLFSDEBUG(format, args...) printf(format ,## args)
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_vfsops.c
--- a/sys/vfs/nullfs/null_vfsops.c      Mon Jan  2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/null_vfsops.c      Mon Jan  2 12:21:05 2006 +0000
@@ -59,8 +59,6 @@
 
 static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure");
 
-static int     nullfs_fhtovp(struct mount *mp, struct fid *fidp,
-                                  struct vnode **vpp);
 static int     nullfs_checkexp(struct mount *mp, struct sockaddr *nam,
                                    int *extflagsp, struct ucred **credanonp);
 static int     nullfs_mount(struct mount *mp, char *path, caddr_t data,
@@ -71,8 +69,6 @@
 static int     nullfs_statfs(struct mount *mp, struct statfs *sbp,
                                   struct thread *td);
 static int     nullfs_unmount(struct mount *mp, int mntflags, struct thread 
*td);
-static int     nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp);
-static int     nullfs_vptofh(struct vnode *vp, struct fid *fhp);
 static int     nullfs_extattrctl(struct mount *mp, int cmd,
                        const char *attrname, caddr_t arg, struct thread *td);
 
@@ -84,11 +80,9 @@
 {
        int error = 0;
        struct null_args args;
-       struct vnode *lowerrootvp, *vp;
-       struct vnode *nullm_rootvp;
+       struct vnode *rootvp;
        struct null_mount *xmp;
        u_int size;
-       int isvnunlocked = 0;
        struct nlookupdata nd;
 
        NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
@@ -108,44 +102,15 @@
                return (error);
 
        /*
-        * Unlock lower node to avoid deadlock.
-        * (XXX) VOP_ISLOCKED is needed?
-        */
-       if ((mp->mnt_vnodecovered->v_tag == VT_NULL) &&
-               VOP_ISLOCKED(mp->mnt_vnodecovered, NULL)) {
-               VOP_UNLOCK(mp->mnt_vnodecovered, 0, td);
-               isvnunlocked = 1;
-       }
-       /*
         * Find lower node
         */
-       lowerrootvp = NULL;
+       rootvp = NULL;
        error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
        if (error == 0)
                error = nlookup(&nd);
        if (error == 0) {
                error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, 
-                                       &lowerrootvp);
-       }
-       nlookup_done(&nd);
-
-       /*
-        * Re-lock vnode.
-        */
-       if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered, NULL))
-               vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, td);
-       if (error)
-               return (error);
-               
-       /*
-        * Sanity check on lower vnode
-        *
-        * Check multi null mount to avoid `lock against myself' panic.
-        */
-       if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
-               NULLFSDEBUG("nullfs_mount: multi null mount?\n");
-               vput(lowerrootvp);
-               return (EDEADLK);
+                                       &rootvp);
        }
 
        xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
@@ -154,37 +119,29 @@
        /*
         * Save reference to underlying FS
         */
-       xmp->nullm_vfs = lowerrootvp->v_mount;
+        /*
+         * As lite stacking enters the scene, the old way of doing this
+        * -- via the vnode -- is not good enough anymore...
+        */
+       xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+       nlookup_done(&nd);
 
        vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, 
                         null_vnodeop_entries, 0);
 
-       /*
-        * Save reference.  Each mount also holds
-        * a reference on the root vnode.
-        */
-       error = null_node_create(mp, lowerrootvp, &vp);
-       /*
-        * Unlock the node (either the lower or the alias)
-        */
-       VOP_UNLOCK(vp, 0, td);
-       /*
-        * Make sure the node alias worked
-        */
-       if (error) {
-               vrele(lowerrootvp);
-               free(xmp, M_NULLFSMNT); /* XXX */
-               return (error);
-       }
+       VOP_UNLOCK(rootvp, 0, td);
 
        /*
         * Keep a held reference to the root vnode.
         * It is vrele'd in nullfs_unmount.
         */
-       nullm_rootvp = vp;
-       nullm_rootvp->v_flag |= VROOT;
-       xmp->nullm_rootvp = nullm_rootvp;
-       if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+       xmp->nullm_rootvp = rootvp;
+       /*
+        * XXX What's the proper safety condition for querying
+        * the underlying mount? Is this flag tuning necessary
+        * at all?
+        */
+       if (xmp->nullm_vfs->mnt_flag & MNT_LOCAL)
                mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_data = (qaddr_t) xmp;
        vfs_getnewfsid(mp);
@@ -205,18 +162,12 @@
 nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
 {
        void *mntdata;
-       int error;
        int flags = 0;
 
        NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
 
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;
-
-       /* There is 1 extra root vnode reference (nullm_rootvp). */
-       error = vflush(mp, 1, flags);
-       if (error)
-               return (error);
 
        /*
         * Finally, throw away the null_mount structure
@@ -233,9 +184,8 @@
        struct thread *td = curthread;  /* XXX */
        struct vnode *vp;
 
-       NULLFSDEBUG("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp,
-           (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
-           (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+       NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", (void *)mp,
+           (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
 
        /*
         * Return locked reference to root.
@@ -268,9 +218,8 @@
        int error;
        struct statfs mstat;
 
-       NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
-           (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
-           (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+       NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p)\n", (void *)mp,
+           (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
 
        bzero(&mstat, sizeof(mstat));
 
@@ -296,32 +245,12 @@
 }
 
 static int
-nullfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
-{
-
-       return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
-}
-
-static int
-nullfs_fhtovp(struct mount *mp, struct fid *fidp, struct vnode **vpp)
-{
-
-       return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp);
-}
-
-static int
 nullfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
                struct ucred **credanonp)
 {
 
        return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam, 
                extflagsp, credanonp);
-}
-
-static int
-nullfs_vptofh(struct vnode *vp, struct fid *fhp)
-{
-       return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
 }
 
 static int                        
@@ -340,12 +269,7 @@
        .vfs_quotactl =         nullfs_quotactl,
        .vfs_statfs =           nullfs_statfs,
        .vfs_sync =             vfs_stdsync,
-       .vfs_vget =             nullfs_vget,
-       .vfs_fhtovp =           nullfs_fhtovp,
        .vfs_checkexp =         nullfs_checkexp,
-       .vfs_vptofh =           nullfs_vptofh,
-       .vfs_init =             nullfs_init,
-       .vfs_uninit =           nullfs_uninit,
        .vfs_extattrctl =       nullfs_extattrctl
 };
 
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_vnops.c
--- a/sys/vfs/nullfs/null_vnops.c       Mon Jan  2 11:42:05 2006 +0000
+++ b/sys/vfs/nullfs/null_vnops.c       Mon Jan  2 12:21:05 2006 +0000
@@ -83,98 +83,22 @@
  *
  * The null layer is the minimum file system layer,
  * simply bypassing all possible operations to the lower layer
- * for processing there.  The majority of its activity centers
- * on the bypass routine, through which nearly all vnode operations
- * pass.
- *
- * The bypass routine accepts arbitrary vnode operations for
- * handling by the lower layer.  It begins by examing vnode
- * operation arguments and replacing any null-nodes by their
- * lower-layer equivlants.  It then invokes the operation
- * on the lower layer.  Finally, it replaces the null-nodes
- * in the arguments and, if a vnode is return by the operation,
- * stacks a null-node on top of the returned vnode.
- *
- * Although bypass handles most operations, vop_getattr, vop_lock,
- * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
- * bypassed. Vop_getattr must change the fsid being returned.
- * Vop_lock and vop_unlock must handle any locking for the
- * current vnode as well as pass the lock request down.
- * Vop_inactive and vop_reclaim are not bypassed so that
- * they can handle freeing null-layer specific data. Vop_print
- * is not bypassed to avoid excessive debugging information.
- * Also, certain vnode operations change the locking state within
- * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
- * and symlink). Ideally these operations should not change the
- * lock state, but should be changed to let the caller of the
- * function unlock them. Otherwise all intermediate vnode layers
- * (such as union, umapfs, etc) must catch these functions to do
- * the necessary locking at their layer.
- *
- *
- * INSTANTIATING VNODE STACKS
- *
- * Mounting associates the null layer with a lower layer,
- * effect stacking two VFSes.  Vnode stacks are instead
- * created on demand as files are accessed.
- *
- * The initial mount creates a single vnode stack for the
- * root of the new null layer.  All other vnode stacks
- * are created as a result of vnode operations on
- * this or other null vnode stacks.
- *
- * New vnode stacks come into existance as a result of
- * an operation which returns a vnode.
- * The bypass routine stacks a null-node above the new
- * vnode before returning it to the caller.
- *
- * For example, imagine mounting a null layer with
- * "mount_null /usr/include /dev/layer/null".
- * Changing directory to /dev/layer/null will assign
- * the root null-node (which was created when the null layer was mounted).
- * Now consider opening "sys".  A vop_old_lookup would be
- * done on the root null-node.  This operation would bypass through
- * to the lower layer which would return a vnode representing
- * the UFS "sys".  Null_bypass then builds a null-node
- * aliasing the UFS "sys" and returns this to the caller.
- * Later operations on the null-node "sys" will repeat this
- * process when constructing other vnode stacks.
- *
- *
- * CREATING OTHER FILE SYSTEM LAYERS
- *
- * One of the easiest ways to construct new file system layers is to make
- * a copy of the null layer, rename all files and variables, and
- * then begin modifing the copy.  Sed can be used to easily rename
- * all variables.
- *
- * The umap layer is an example of a layer descended from the
- * null layer.
- *
- *
- * INVOKING OPERATIONS ON LOWER LAYERS
- *
- * There are two techniques to invoke operations on a lower layer
- * when the operation cannot be completely bypassed.  Each method
- * is appropriate in different situations.  In both cases,
- * it is the responsibility of the aliasing layer to make
- * the operation arguments "correct" for the lower layer
- * by mapping an vnode arguments to the lower layer.
- *
- * The first approach is to call the aliasing layer's bypass routine.
- * This method is most suitable when you wish to invoke the operation
- * currently being handled on the lower layer.  It has the advantage
- * that the bypass routine already must do argument mapping.
- * An example of this is null_getattrs in the null layer.
- *
- * A second approach is to directly invoke vnode operations on
- * the lower layer with the VOP_OPERATIONNAME interface.
- * The advantage of this method is that it is easy to invoke
- * arbitrary operations on the lower layer.  The disadvantage
- * is that vnode arguments must be manualy mapped.
- *
+ * for processing there.  The majority of its activity used to center
+ * on a so-called bypass routine, through which nullfs vnodes
+ * passed on operation to their underlying peer.
+ *
+ * However, with the current implementation nullfs doesn't have any private
+ * vnodes, it rather relies on DragonFly's namecache API. That gives a much
+ * more lightweight null layer, as namecache structures are pure data, with
+ * no private operations, so there is no need of subtle dispatching routines.
+ *
+ * Unlike the old code, this implementation is not a general skeleton overlay
+ * filesystem: to get more comprehensive overlaying, like that of umapfs, we
+ * will need vnode operation dispatch. Other overlay filesystems, like unionfs
+ * might be able to get on with a hybrid solution: overlay some vnodes, and 
rely
+ * on namecache API for the rest.
  */
-
+ 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -187,775 +111,114 @@
 #include <sys/buf.h>
 #include "null.h"
 
-static int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing 
*/
-SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 
-       &null_bug_bypass, 0, "");
-
 static int     null_nresolve(struct vop_nresolve_args *ap);
 static int     null_ncreate(struct vop_ncreate_args *ap);
 static int     null_nmkdir(struct vop_nmkdir_args *ap);
+static int     null_nmknod(struct vop_nmknod_args *ap);
+static int     null_nlink(struct vop_nlink_args *ap);
+static int     null_nsymlink(struct vop_nsymlink_args *ap);
+static int     null_nwhiteout(struct vop_nwhiteout_args *ap);
 static int     null_nremove(struct vop_nremove_args *ap);
 static int     null_nrmdir(struct vop_nrmdir_args *ap);
 static int     null_nrename(struct vop_nrename_args *ap);
 
-static int     null_revoke(struct vop_revoke_args *ap);
-static int     null_access(struct vop_access_args *ap);
-static int     null_createvobject(struct vop_createvobject_args *ap);
-static int     null_destroyvobject(struct vop_destroyvobject_args *ap);
-static int     null_getattr(struct vop_getattr_args *ap);
-static int     null_getvobject(struct vop_getvobject_args *ap);
-static int     null_inactive(struct vop_inactive_args *ap);
-static int     null_islocked(struct vop_islocked_args *ap);
-static int     null_lock(struct vop_lock_args *ap);
-static int     null_lookup(struct vop_old_lookup_args *ap);
-static int     null_open(struct vop_open_args *ap);
-static int     null_print(struct vop_print_args *ap);
-static int     null_reclaim(struct vop_reclaim_args *ap);
-static int     null_rename(struct vop_old_rename_args *ap);
-static int     null_setattr(struct vop_setattr_args *ap);
-static int     null_unlock(struct vop_unlock_args *ap);
-
-/*
- * This is the 10-Apr-92 bypass routine.
- *    This version has been optimized for speed, throwing away some
- * safety checks.  It should still always work, but it's not as
- * robust to programmer errors.
- *
- * In general, we map all vnodes going down and unmap them on the way back.
- * As an exception to this, vnodes can be marked "unmapped" by setting
- * the Nth bit in operation's vdesc_flags.
- *
- * Also, some BSD vnode operations have the side effect of vrele'ing
- * their arguments.  With stacking, the reference counts are held
- * by the upper node, not the lower one, so we must handle these
- * side-effects here.  This is not of concern in Sun-derived systems
- * since there are no such side-effects.
- *
- * This makes the following assumptions:
- * - only one returned vpp
- * - no INOUT vpp's (Sun's vop_open has one of these)
- * - the vnode operation vector of the first vnode should be used
- *   to determine what implementation of the op should be invoked
- * - all mapped vnodes are of our vnode-type (NEEDSWORK:
- *   problems on rmdir'ing mount points and renaming?)
- *
- * null_bypass(struct vnodeop_desc *a_desc, ...)
- */
-int
-null_bypass(struct vop_generic_args *ap)
-{
-       struct vnode **this_vp_p;
-       int error;
-       struct vnode *old_vps[VDESC_MAX_VPS];
-       struct vnode **vps_p[VDESC_MAX_VPS];
-       struct vnode ***vppp;
-       struct vnodeop_desc *descp = ap->a_desc;
-       int reles, i, j;
-
-       if (null_bug_bypass)
-               printf ("null_bypass: %s\n", descp->vdesc_name);
-
-#ifdef DIAGNOSTIC
-       /*
-        * We require at least one vp.
-        */
-       if (descp->vdesc_vp_offsets == NULL ||
-           descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
-               panic ("null_bypass: no vp's in map");
-#endif
-
-       /*
-        * Map the vnodes going in.
-        */
-       reles = descp->vdesc_flags;
-       for (i = 0; i < VDESC_MAX_VPS; ++i) {
-               if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
-                       break;   /* bail out at end of list */
-               vps_p[i] = this_vp_p =
-                       VOPARG_OFFSETTO(struct 
vnode**,descp->vdesc_vp_offsets[i],ap);
-               /*
-                * We're not guaranteed that any but the first vnode
-                * are of our type.  Check for and don't map any
-                * that aren't.  (We must always map first vp or vclean fails.)
-                */
-               if (i && (*this_vp_p == NULLVP ||
-                   (*this_vp_p)->v_tag != VT_NULL)) {
-                       old_vps[i] = NULLVP;
-               } else {
-                       old_vps[i] = *this_vp_p;
-                       *this_vp_p = NULLVPTOLOWERVP(*this_vp_p);
-                       /*
-                        * Several operations have the side effect of vrele'ing
-                        * their vp's.  We must account for that in the lower
-                        * vp we pass down.
-                        */
-                       if (reles & (VDESC_VP0_WILLRELE << i))
-                               vref(*this_vp_p);
-               }
-
-       }
-
-       /*
-        * Call the operation on the lower layer with the modified
-        * argument structure.  We have to adjust a_fm to point to the
-        * lower vp's vop_ops structure.
-        */
-       if (vps_p[0] && *vps_p[0]) {
-               ap->a_ops = *(*(vps_p[0]))->v_ops;
-               error = vop_vnoperate_ap(ap);
-       } else {
-               printf("null_bypass: no map for %s\n", descp->vdesc_name);
-               error = EINVAL;
-       }
-
-       /*
-        * Maintain the illusion of call-by-value by restoring vnodes in the
-        * argument structure to their original value.
-        */
-       reles = descp->vdesc_flags;
-       for (i = 0; i < VDESC_MAX_VPS; ++i) {
-               if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
-                       break;   /* bail out at end of list */
-               if (old_vps[i]) {
-                       *(vps_p[i]) = old_vps[i];
-
-                       /*
-                        * Since we operated on the lowervp's instead of the
-                        * null node vp's, we have to adjust the null node
-                        * vp's based on what the VOP did to the lower vp.
-                        * 
-                        * Note: the unlock case only occurs with rename.
-                        * tdvp and tvp are both locked on call and must be
-                        * unlocked on return.
-                        *
-                        * Unlock semantics indicate that if two locked vp's
-                        * are passed and they are the same vp, they are only
-                        * actually locked once.
-                        */
-                       if (reles & (VDESC_VP0_WILLUNLOCK << i)) {
-                               VOP_UNLOCK(old_vps[i], LK_THISLAYER, curthread);
-                               for (j = i + 1; j < VDESC_MAX_VPS; ++j) {
-                                       if (descp->vdesc_vp_offsets[j] == 
VDESC_NO_OFFSET)
-                                               break;
-                                       if (old_vps[i] == old_vps[j]) {
-                                               reles &= ~(1 << 
(VDESC_VP0_WILLUNLOCK << j));
-                                       }
-                               }
-                       }
-
-                       if (reles & (VDESC_VP0_WILLRELE << i))
-                               vrele(old_vps[i]);
-               }
-       }
-
-       /*
-        * Map the possible out-going vpp
-        * (Assumes that the lower layer always returns
-        * a vref'ed vpp unless it gets an error.)
-        */
-       if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
-           !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
-           !error) {
-               /*
-                * XXX - even though some ops have vpp returned vp's,
-                * several ops actually vrele this before returning.
-                * We must avoid these ops.
-                * (This should go away when these ops are regularized.)
-                */
-               if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
-                       goto out;
-               vppp = VOPARG_OFFSETTO(struct vnode***,
-                                descp->vdesc_vpp_offset,ap);
-               if (*vppp)
-                       error = null_node_create(old_vps[0]->v_mount, **vppp, 
*vppp);
-       }
-
- out:
-       return (error);
-}
-
-/*
- * We have to carry on the locking protocol on the null layer vnodes
- * as we progress through the tree. We also have to enforce read-only
- * if this layer is mounted read-only.
- *
- * null_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
- *             struct componentname *a_cnp)
- */
-static int
-null_lookup(struct vop_old_lookup_args *ap)
-{
-       struct componentname *cnp = ap->a_cnp;
-       struct vnode *dvp = ap->a_dvp;
-       struct thread *td = cnp->cn_td;
-       int flags = cnp->cn_flags;
-       struct vnode *vp, *ldvp, *lvp;
-       int error;
-
-       if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
-           (cnp->cn_nameiop == NAMEI_DELETE || 
-            cnp->cn_nameiop == NAMEI_RENAME)) {
-               return (EROFS);
-       }
-       ldvp = NULLVPTOLOWERVP(dvp);
-
-       /*
-        * If we are doing a ".." lookup we must release the lock on dvp
-        * now, before we run a lookup in the underlying fs, or we may 
-        * deadlock.  If we do this we must protect ldvp by ref'ing it.
-        */
-       if (flags & CNP_ISDOTDOT) {
-               vref(ldvp);
-               VOP_UNLOCK(dvp, LK_THISLAYER, td);
-       }
-
-       /*
-        * Due to the non-deterministic nature of the handling of the
-        * parent directory lock by lookup, we cannot call null_bypass()
-        * here.  We must make a direct call.  It's faster to do a direct
-        * call, anyway.
-        */
-       vp = lvp = NULL;
-       error = VOP_LOOKUP(ldvp, &lvp, cnp);
-       if (error == EJUSTRETURN && 
-           (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
-           (cnp->cn_nameiop == NAMEI_CREATE || 
-            cnp->cn_nameiop == NAMEI_RENAME)) {
-               error = EROFS;
-       }
-
-       if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) {
-               if (ldvp == lvp) {
-                       *ap->a_vpp = dvp;
-                       vref(dvp);
-                       vrele(lvp);
-               } else {
-                       error = null_node_create(dvp->v_mount, lvp, &vp);
-                       if (error == 0)
-                               *ap->a_vpp = vp;
-               }
-       }
-
-       /*
-        * The underlying fs will set PDIRUNLOCK if it unlocked the parent
-        * directory, which means we have to follow suit in the nullfs layer.
-        * Note that the parent directory may have already been unlocked due
-        * to the ".." case.  Note that use of cnp->cn_flags instead of flags.
-        */
-       if (flags & CNP_ISDOTDOT) {
-               if ((cnp->cn_flags & CNP_PDIRUNLOCK) == 0)
-                       VOP_LOCK(dvp, LK_THISLAYER | LK_EXCLUSIVE, td);
-               vrele(ldvp);
-       } else if (cnp->cn_flags & CNP_PDIRUNLOCK) {
-               VOP_UNLOCK(dvp, LK_THISLAYER, td);
-       }
-       return (error);
-}
-
-/*
- * Setattr call. Disallow write attempts if the layer is mounted read-only.
- *
- * null_setattr(struct vnodeop_desc *a_desc, struct vnode *a_vp,
- *             struct vattr *a_vap, struct ucred *a_cred,
- *             struct thread *a_td)
- */
-int
-null_setattr(struct vop_setattr_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct vattr *vap = ap->a_vap;
-
-       if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
-           vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
-           vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
-           (vp->v_mount->mnt_flag & MNT_RDONLY))
-               return (EROFS);
-       if (vap->va_size != VNOVAL) {
-               switch (vp->v_type) {
-               case VDIR:
-                       return (EISDIR);
-               case VCHR:
-               case VBLK:
-               case VSOCK:
-               case VFIFO:
-                       if (vap->va_flags != VNOVAL)
-                               return (EOPNOTSUPP);
-                       return (0);
-               case VREG:
-               case VLNK:
-               default:
-                       /*
-                        * Disallow write attempts if the filesystem is
-                        * mounted read-only.
-                        */
-                       if (vp->v_mount->mnt_flag & MNT_RDONLY)
-                               return (EROFS);
-               }
-       }
-
-       return (null_bypass(&ap->a_head));
-}
-
-/*
- *  We handle getattr only to change the fsid.
- *
- * null_getattr(struct vnode *a_vp, struct vattr *a_vap, struct ucred *a_cred,
- *             struct thread *a_td)
- */
-static int
-null_getattr(struct vop_getattr_args *ap)
-{
-       int error;
-
-       if ((error = null_bypass(&ap->a_head)) != 0)
-               return (error);
-
-       ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
-       return (0);
-}
-
-/*
- * Resolve a locked ncp at the nullfs layer.
- */
 static int
 null_nresolve(struct vop_nresolve_args *ap)
 {
-       return(vop_compat_nresolve(ap));
-}
-
-/*
- * Create a file
- */
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nresolve_ap(ap);
+}
+
 static int
 null_ncreate(struct vop_ncreate_args *ap)
 {
-       return(vop_compat_ncreate(ap));
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_ncreate_ap(ap);
 }
 
 static int
 null_nmkdir(struct vop_nmkdir_args *ap)
 {
-       return(vop_compat_nmkdir(ap));
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nmkdir_ap(ap);
+}
+
+static int
+null_nmknod(struct vop_nmknod_args *ap)
+{
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nmknod_ap(ap);
+}
+
+static int
+null_nlink(struct vop_nlink_args *ap)
+{
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nlink_ap(ap);
+}
+
+static int
+null_nsymlink(struct vop_nsymlink_args *ap)
+{
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nsymlink_ap(ap);
+}
+
+static int
+null_nwhiteout(struct vop_nwhiteout_args *ap)
+{
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nwhiteout_ap(ap);
 }
 
 static int
 null_nremove(struct vop_nremove_args *ap)
 {
-       return(vop_compat_nremove(ap));
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nremove_ap(ap);
 }
 
 static int
 null_nrmdir(struct vop_nrmdir_args *ap)
 {
-       return(vop_compat_nrmdir(ap));
+       ap->a_head.a_ops = 
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+
+       return vop_nrmdir_ap(ap);
 }
 
 static int
 null_nrename(struct vop_nrename_args *ap)
 {
-       return(vop_compat_nrename(ap));
-}
-
-/*
- * revoke is VX locked, we can't go through null_bypass
- */
-static int
-null_revoke(struct vop_revoke_args *ap)
-{
-       struct null_node *np;
-       struct vnode *lvp;
-
-       np = VTONULL(ap->a_vp);
-       vx_unlock(ap->a_vp);
-       if ((lvp = np->null_lowervp) != NULL) {
-               vx_get(lvp);
-               VOP_REVOKE(lvp, ap->a_flags);
-               vx_put(lvp);
-       }
-       vx_lock(ap->a_vp);
-       vgone(ap->a_vp);
-       return(0);
-}
-
-/*
- * Handle to disallow write access if mounted read-only.
- *
- * null_access(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
- *             struct thread *a_td)
- */
-static int
-null_access(struct vop_access_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       mode_t mode = ap->a_mode;
-
-       /*
-        * Disallow write attempts on read-only layers;
-        * unless the file is a socket, fifo, or a block or
-        * character device resident on the file system.
-        */
-       if (mode & VWRITE) {
-               switch (vp->v_type) {
-               case VDIR:
-               case VLNK:
-               case VREG:
-                       if (vp->v_mount->mnt_flag & MNT_RDONLY)
-                               return (EROFS);
-                       break;
-               default:
-                       break;
-               }
-       }
-       return (null_bypass(&ap->a_head));
-}
-
-/*
- * We must handle open to be able to catch MNT_NODEV and friends.
- *
- * null_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred,
- *          struct thread *a_td)
- */
-static int
-null_open(struct vop_open_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp);
-
-       if ((vp->v_mount->mnt_flag & MNT_NODEV) &&
-           (lvp->v_type == VBLK || lvp->v_type == VCHR))
-               return ENXIO;
-
-       return (null_bypass(&ap->a_head));
-}
-
-/*
- * We handle this to eliminate null FS to lower FS
- * file moving. Don't know why we don't allow this,
- * possibly we should.
- *
- * null_rename(struct vnode *a_fdvp, struct vnode *a_fvp,
- *             struct componentname *a_fcnp, struct vnode *a_tdvp,
- *             struct vnode *a_tvp, struct componentname *a_tcnp)
- */
-static int
-null_rename(struct vop_old_rename_args *ap)
-{
-       struct vnode *tdvp = ap->a_tdvp;
-       struct vnode *fvp = ap->a_fvp;
-       struct vnode *fdvp = ap->a_fdvp;
-       struct vnode *tvp = ap->a_tvp;
-
-       /* Check for cross-device rename. */
-       if ((fvp->v_mount != tdvp->v_mount) ||
-           (tvp && (fvp->v_mount != tvp->v_mount))) {
-               if (tdvp == tvp)
-                       vrele(tdvp);
-               else
-                       vput(tdvp);
-               if (tvp)
-                       vput(tvp);
-               vrele(fdvp);
-               vrele(fvp);
-               return (EXDEV);
-       }
-       
-       return (null_bypass(&ap->a_head));
-}
-
-/*
- * A special flag, LK_THISLAYER, causes the locking function to operate
- * ONLY on the nullfs layer.  Otherwise we are responsible for locking not
- * only our layer, but the lower layer as well.
- *
- * null_lock(struct vnode *a_vp, int a_flags, struct thread *a_td)
- */
-static int
-null_lock(struct vop_lock_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       int flags = ap->a_flags;
-       struct null_node *np = VTONULL(vp);
-       struct vnode *lvp;
-       int error;
-
-       /*
-        * Lock the nullfs layer first, disposing of the interlock in the
-        * process.
-        */
-       KKASSERT((flags & LK_INTERLOCK) == 0);
-       error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER,
-                       NULL, ap->a_td);
-
-       /*
-        * If locking only the nullfs layer, or if there is no lower layer,
-        * or if an error occured while attempting to lock the nullfs layer,
-        * we are done.
-        *
-        * np can be NULL is the vnode is being recycled from a previous
-        * hash collision.
-        */
-       if ((flags & LK_THISLAYER) || np == NULL ||
-           np->null_lowervp == NULL || error) {
-               return (error);
-       }
-
-       /*
-        * Lock the underlying vnode.  If we are draining we should not drain
-        * the underlying vnode, since it is not being destroyed, but we do
-        * lock it exclusively in that case.  Note that any interlocks have
-        * already been disposed of above.
-        */
-       lvp = np->null_lowervp;
-       if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
-               NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
-               error = vn_lock(lvp, (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-                               ap->a_td);
-       } else {
-               error = vn_lock(lvp, flags, ap->a_td);
-       }
-
-       /*
-        * If an error occured we have to undo our nullfs lock, then return
-        * the original error.
-        */
-       if (error)
-               lockmgr(&vp->v_lock, LK_RELEASE, NULL, ap->a_td);
-       return(error);
-}
-
-/*
- * A special flag, LK_THISLAYER, causes the unlocking function to operate
- * ONLY on the nullfs layer.  Otherwise we are responsible for unlocking not
- * only our layer, but the lower layer as well.
- *
- * null_unlock(struct vnode *a_vp, int a_flags, struct thread *a_td)
- */
-static int
-null_unlock(struct vop_unlock_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       int flags = ap->a_flags;
-       struct null_node *np = VTONULL(vp);
-       struct vnode *lvp;
-       int error;
-
-       KKASSERT((flags & LK_INTERLOCK) == 0);
-       /*
-        * nullfs layer only
-        */
-       if (flags & LK_THISLAYER) {
-               error = lockmgr(&vp->v_lock, 
-                               (flags & ~LK_THISLAYER) | LK_RELEASE,
-                               NULL, ap->a_td);
-               return (error);
-       }
-
-       /*
-        * If there is no underlying vnode the lock operation occurs at
-        * the nullfs layer.  np can be NULL is the vnode is being recycled
-        * from a previous hash collision.
-        */
-       if (np == NULL || (lvp = np->null_lowervp) == NULL) {
-               error = lockmgr(&vp->v_lock, flags | LK_RELEASE,
-                               NULL, ap->a_td);
-               return(error);
-       }
-
-       /*
-        * Unlock the lower layer first, then our nullfs layer.
-        */
-       VOP_UNLOCK(lvp, flags, ap->a_td);
-       error = lockmgr(&vp->v_lock, flags | LK_RELEASE, NULL, ap->a_td);
-       return (error);
-}
-
-/*
- * null_islocked(struct vnode *a_vp, struct thread *a_td)
- *
- * If a lower layer exists return the lock status of the lower layer,
- * otherwise return the lock status of our nullfs layer.
- */
-static int
-null_islocked(struct vop_islocked_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct vnode *lvp;
-       struct null_node *np = VTONULL(vp);
-       int error;
-
-       lvp = np->null_lowervp;
-       if (lvp == NULL)
-               error = lockstatus(&vp->v_lock, ap->a_td);
-       else
-               error = VOP_ISLOCKED(lvp, ap->a_td);
-       return (error);
-}
-
-
-/*
- * The vnode is no longer active.  However, the new VFS API may retain
- * the node in the vfs cache.  There is no way to tell that someone issued
- * a remove/rmdir operation on the underlying filesystem (yet), but we can't
- * remove the lowervp reference here.
- *
- * null_inactive(struct vnode *a_vp, struct thread *a_td)
- */
-static int
-null_inactive(struct vop_inactive_args *ap)
-{
-       /*struct vnode *vp = ap->a_vp;*/
-       /*struct null_node *np = VTONULL(vp);*/
-
-       /*
-        * At the moment don't do anything here.  All the rest of the code
-        * assumes that lowervp will remain inact, and the inactive nullvp
-        * may be reactivated at any time.  XXX I'm not sure why the 4.x code
-        * even worked.
-        */
-
-       /*
-        * Now it is safe to release our nullfs layer vnode.
-        */
-       return (0);
-}
-
-/*
- * We can free memory in null_inactive, but we do this
- * here. (Possible to guard vp->v_data to point somewhere)
- *
- * null_reclaim(struct vnode *a_vp, struct thread *a_td)
- */
-static int
-null_reclaim(struct vop_reclaim_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct vnode *lowervp;
-       struct null_node *np;
-
-       np = VTONULL(vp);
-       vp->v_data = NULL;
-       /*
-        * null_lowervp reference to lowervp.  The lower vnode's
-        * inactive routine may or may not be called when we do the
-        * final vrele().
-        */
-       if (np) {
-               null_node_rem(np);
-               lowervp = np->null_lowervp;
-               np->null_lowervp = NULLVP;
-               if (lowervp)
-                       vrele(lowervp);
-               free(np, M_NULLFSNODE);
-       }
-       return (0);
-}
-
-/*
- * null_print(struct vnode *a_vp)
- */
-static int
-null_print(struct vop_print_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct null_node *np = VTONULL(vp);
-
-       if (np == NULL) {
-               printf ("\ttag VT_NULLFS, vp=%p, NULL v_data!\n", vp);
-               return(0);
-       }
-       printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, np->null_lowervp);
-       if (np->null_lowervp != NULL) {
-               printf("\tlowervp_lock: ");
-               lockmgr_printinfo(&np->null_lowervp->v_lock);
-       } else {
-               printf("\tnull_lock: ");
-               lockmgr_printinfo(&vp->v_lock);
-       }
-       printf("\n");
-       return (0);
-}
-
-/*
- * Let an underlying filesystem do the work
- *
- * null_createvobject(struct vnode *vp, struct ucred *cred, struct proc *p)
- */
-static int
-null_createvobject(struct vop_createvobject_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-       struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL;
-       int error;
-
-       if (vp->v_type == VNON || lowervp == NULL)
-               return 0;
-       error = VOP_CREATEVOBJECT(lowervp, ap->a_td);
-       if (error)
-               return (error);
-       vp->v_flag |= VOBJBUF;
-       return (0);
-}
-
-/*
- * We have nothing to destroy and this operation shouldn't be bypassed.
- *
- * null_destroyvobject(struct vnode *vp)
- */
-static int
-null_destroyvobject(struct vop_destroyvobject_args *ap)
-{
-       struct vnode *vp = ap->a_vp;
-
-       vp->v_flag &= ~VOBJBUF;
-       return (0);
-}
-
-/*
- * null_getvobject(struct vnode *vp, struct vm_object **objpp)
- *
- * Note that this can be called when a vnode is being recycled, and
- * v_data may be NULL in that case if nullfs had to recycle a vnode
- * due to a null_node collision.
- */
-static int
-null_getvobject(struct vop_getvobject_args *ap)
-{
-       struct vnode *lvp;
-
-       if (ap->a_vp->v_data == NULL)
-               return EINVAL;
-
-       lvp = NULLVPTOLOWERVP(ap->a_vp);
-       if (lvp == NULL)
-               return EINVAL;
-       return (VOP_GETVOBJECT(lvp, ap->a_objpp));
+       struct mount *lmp;
+
+       lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
+       if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
+               return (EINVAL);
+
+       ap->a_head.a_ops = lmp->mnt_vn_norm_ops;
+
+       return vop_nrename_ap(ap);
 }
 
 /*
  * Global vfs data structures
  */
 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
-       { &vop_default_desc,            (vnodeopv_entry_t) null_bypass },
-       { &vop_access_desc,             (vnodeopv_entry_t) null_access },
-       { &vop_createvobject_desc,      (vnodeopv_entry_t) null_createvobject },
-       { &vop_destroyvobject_desc,     (vnodeopv_entry_t) null_destroyvobject 
},
-       { &vop_getattr_desc,            (vnodeopv_entry_t) null_getattr },
-       { &vop_getvobject_desc,         (vnodeopv_entry_t) null_getvobject },
-       { &vop_inactive_desc,           (vnodeopv_entry_t) null_inactive },
-       { &vop_islocked_desc,           (vnodeopv_entry_t) null_islocked },
-       { &vop_lock_desc,               (vnodeopv_entry_t) null_lock },
-       { &vop_old_lookup_desc,         (vnodeopv_entry_t) null_lookup },
-       { &vop_open_desc,               (vnodeopv_entry_t) null_open },
-       { &vop_print_desc,              (vnodeopv_entry_t) null_print },
-       { &vop_reclaim_desc,            (vnodeopv_entry_t) null_reclaim },
-       { &vop_old_rename_desc,         (vnodeopv_entry_t) null_rename },
-       { &vop_setattr_desc,            (vnodeopv_entry_t) null_setattr },
-       { &vop_unlock_desc,             (vnodeopv_entry_t) null_unlock },
-       { &vop_revoke_desc,             (vnodeopv_entry_t) null_revoke },
-
        { &vop_nresolve_desc,           (vnodeopv_entry_t) null_nresolve },
        { &vop_ncreate_desc,            (vnodeopv_entry_t) null_ncreate },
        { &vop_nmkdir_desc,             (vnodeopv_entry_t) null_nmkdir },
+       { &vop_nmknod_desc,             (vnodeopv_entry_t) null_nmknod },
+       { &vop_nlink_desc,              (vnodeopv_entry_t) null_nlink },
+       { &vop_nsymlink_desc,           (vnodeopv_entry_t) null_nsymlink },
+       { &vop_nwhiteout_desc,          (vnodeopv_entry_t) null_nwhiteout },
        { &vop_nremove_desc,            (vnodeopv_entry_t) null_nremove },
        { &vop_nrmdir_desc,             (vnodeopv_entry_t) null_nrmdir },
        { &vop_nrename_desc,            (vnodeopv_entry_t) null_nrename },
diff -r 758f5a725024 -r e65b90b999c3 sys/vfs/nullfs/null_subr.c
--- a/sys/vfs/nullfs/null_subr.c        Mon Jan  2 11:42:05 2006 +0000
+++ /dev/null   Thu Jan  1 00:00:00 1970 +0000
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 1992, 1993
- *     The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *     This product includes software developed by the University of
- *     California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *     @(#)null_subr.c 8.7 (Berkeley) 5/14/95
- *
- * $FreeBSD: src/sys/miscfs/nullfs/null_subr.c,v 1.21.2.4 2001/06/26 04:20:09 
bp Exp $
- * $DragonFly: src/sys/vfs/nullfs/null_subr.c,v 1.17 2004/12/17 00:18:30 
dillon Exp $
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include "null.h"
-
-#define LOG2_SIZEVNODE 7               /* log2(sizeof struct vnode) */
-
-/*
- * Null layer cache:
- * Each cache entry holds a reference to the lower vnode
- * along with a pointer to the alias vnode.  When an
- * entry is added the lower vnode is vref'd.  When the
- * alias is removed the lower vnode is vrele'd.
- */
-
-#define        NULL_NHASH(vp) \
-       (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
-
-static struct null_node **null_node_hashtbl;
-static u_long null_node_hash;
-static struct lwkt_token null_ihash_token;
-
-static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
-
-static int     null_node_alloc(struct mount *mp, struct vnode *lowervp,
-                                    struct vnode **vpp);
-static struct vnode *
-               null_node_find(struct mount *mp, struct vnode *lowervp);
-
-/*
- * Initialise cache headers
- */
-int
-nullfs_init(struct vfsconf *vfsp)
-{
-       NULLFSDEBUG("nullfs_init\n");           /* printed during system boot */
-       null_node_hash = 16;
-       while (null_node_hash < desiredvnodes)
-               null_node_hash <<= 1;
-       null_node_hashtbl = malloc(sizeof(void *) * null_node_hash,
-                                   M_NULLFSHASH, M_WAITOK|M_ZERO);
-       --null_node_hash;
-       lwkt_token_init(&null_ihash_token);
-       return (0);
-}
-
-int
-nullfs_uninit(struct vfsconf *vfsp)
-{
-        if (null_node_hashtbl) {
-               free(null_node_hashtbl, M_NULLFSHASH);
-               null_node_hashtbl = NULL;
-       }
-       return (0);
-}
-
-/*
- * Return a vref'ed alias for lower vnode if already exists, else 0.
- * Lower vnode should be locked (but with no additional refs) on entry
- * and will be unlocked on return if the search was successful, and left
- * locked if the search was not successful.
- */
-static struct vnode *
-null_node_find(struct mount *mp, struct vnode *lowervp)
-{
-       struct thread *td = curthread;  /* XXX */
-       struct null_node *np;
-       struct null_node *xp;
-       struct vnode *vp;
-       lwkt_tokref ilock;
-
-       lwkt_gettoken(&ilock, &null_ihash_token);
-loop:
-       for (np = *NULL_NHASH(lowervp); np; np = np->null_next) {
-               if (np->null_lowervp == lowervp && NULLTOV(np)->v_mount == mp) {
-                       vp = NULLTOV(np);
-                       if (vget(vp, LK_EXCLUSIVE | LK_CANRECURSE, td)) {
-                               printf ("null_node_find: vget failed.\n");
-                               goto loop;
-                       }
-
-                       /*
-                        * vget() might have blocked, we have to check that
-                        * our vnode is still valid.
-                        */
-                       xp = *NULL_NHASH(lowervp);
-                       while (xp) {
-                               if (xp == np && xp->null_lowervp == lowervp &&
-                                   NULLTOV(xp) == vp &&
-                                   NULLTOV(xp)->v_mount == mp) {
-                                       break;
-                               }
-                               xp = xp->null_next;
-                       }
-                       if (xp == NULL) {
-                               printf ("null_node_find: node race, retry.\n");
-                               vput(vp);
-                               goto loop;
-                       }
-                       /*
-                        * SUCCESS!  Returned the locked and referenced vp
-                        * and release the lock on lowervp.
-                        */
-                       VOP_UNLOCK(lowervp, 0, td);
-                       lwkt_reltoken(&ilock);
-                       return (vp);
-               }
-       }
-
-       /*
-        * Failure, leave lowervp locked on return.
-        */
-       lwkt_reltoken(&ilock);
-       return(NULL);
-}
-
-int
-null_node_add(struct null_node *np)
-{
-       struct null_node **npp;
-       struct null_node *n2;
-       lwkt_tokref ilock;
-
-       lwkt_gettoken(&ilock, &null_ihash_token);
-       npp = NULL_NHASH(np->null_lowervp);
-       while ((n2 = *npp) != NULL) {
-               if (n2->null_lowervp == np->null_lowervp &&
-                   n2->null_vnode->v_mount == np->null_vnode->v_mount) {
-                       lwkt_reltoken(&ilock);
-                       return(EBUSY);
-               }
-               npp = &n2->null_next;
-       }
-       np->null_next = NULL;
-       *npp = np;
-       lwkt_reltoken(&ilock);
-       return(0);
-}
-
-void
-null_node_rem(struct null_node *np)
-{
-       struct null_node **npp;
-       struct null_node *n2;
-       lwkt_tokref ilock;
-
-       lwkt_gettoken(&ilock, &null_ihash_token);
-       npp = NULL_NHASH(np->null_lowervp);
-       while ((n2 = *npp) != NULL) {
-               if (n2 == np)
-                       break;
-               npp = &n2->null_next;
-       }
-       KKASSERT(np == n2);
-       *npp = np->null_next;
-       np->null_next = NULL;
-       lwkt_reltoken(&ilock);
-}
-
-/*
- * Make a new null_node node.  vp is the null mount vnode, lowervp is the
- * lower vnode.  Maintain a reference to (lowervp).  lowervp must be
- * locked on call.
- */
-static int
-null_node_alloc(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
-{
-       struct null_node *np;
-       struct thread *td;
-       struct vnode *vp;
-       int error;
-
-       td = curthread;
-retry:
-       /*
-        * If we have already hashed the vp we can just return it.
-        */
-       *vpp = null_node_find(mp, lowervp);
-       if (*vpp)
-               return 0;
-
-       /*
-        * lowervp is locked but not referenced at this point.
-        */
-       MALLOC(np, struct null_node *, sizeof(struct null_node),
-              M_NULLFSNODE, M_WAITOK);
-
-       error = getnewvnode(VT_NULL, mp, vpp, 0, LK_CANRECURSE);
-       if (error) {
-               FREE(np, M_NULLFSNODE);
-               return (error);
-       }
-       vp = *vpp;
-
-       /*
-        * Set up the np/vp relationship and set the lower vnode.
-        *
-        * XXX:
-        * When nullfs encounters sockets or device nodes, it
-        * has a hard time working with the normal vp union, probably
-        * because the device has not yet been opened.  Needs investigation.
-        */
-       vp->v_type = lowervp->v_type;
-       if (vp->v_type == VCHR || vp->v_type == VBLK)
-               addaliasu(vp, lowervp->v_udev);
-       else
-               vp->v_un = lowervp->v_un;       /* XXX why this assignment? */
-       np->null_vnode = vp;
-       np->null_lowervp = lowervp;
-
-       /*
-        * Our new vnode is already VX locked (which is effective
-        * LK_THISLAYER, which is what we want).
-        */
-
-       /*
-        * Try to add our new node to the hash table.  If a collision
-        * occurs someone else beat us to it and we need to destroy the
-        * vnode and retry.
-        */
-       if (null_node_add(np) != 0) {
-               free(np, M_NULLFSNODE);
-               vput(vp);
-               goto retry;
-       }
-
-       /*
-        * Finish up.  Link the vnode and null_node together, ref lowervp
-        * for the null node.  lowervp is already locked so the lock state
-        * is already properly synchronized.
-        *
-        * Set the vnode up to reclaim as quickly as possible
-        */
-       vp->v_data = np;
-       vp->v_flag |= VAGE;
-       vref(lowervp);
-       return (0);
-}
-
-
-/*
- * Try to find an existing null_node vnode refering to the given underlying
- * vnode (which should be locked and referenced). If no vnode found, create
- * a new null_node vnode which contains a reference to the lower vnode.
- */
-int
-null_node_create(struct mount *mp, struct vnode *lowervp, struct vnode 
**newvpp)
-{
-       struct vnode *aliasvp;
-
-       aliasvp = null_node_find(mp, lowervp);
-       if (aliasvp) {
-               /*
-                * null_node_find() has unlocked lowervp for us, so we just
-                * have to get rid of the reference.
-                */
-               vrele(lowervp);
-#ifdef NULLFS_DEBUG
-               vprint("null_node_create: exists", aliasvp);
-#endif
-       } else {
-               int error;
-
-               /*
-                * Get new vnode.  Note that lowervp is locked and referenced
-                * at this point (as it was passed to us).
-                */
-               NULLFSDEBUG("null_node_create: create new alias vnode\n");
-
-               /*
-                * Make new vnode reference the null_node.
-                */
-               error = null_node_alloc(mp, lowervp, &aliasvp);
-               vrele(lowervp);
-               if (error)
-                       return error;
-
-               /*
-                * aliasvp is already locked and ref'd by getnewvnode()
-                */
-       }
-
-#ifdef DIAGNOSTIC
-       if (lowervp->v_usecount < 1) {
-               /* Should never happen... */
-               vprint ("null_node_create: alias ", aliasvp);
-               vprint ("null_node_create: lower ", lowervp);
-               panic ("null_node_create: lower has 0 usecount.");
-       };
-#endif
-
-#ifdef NULLFS_DEBUG
-       vprint("null_node_create: alias", aliasvp);
-       vprint("null_node_create: lower", lowervp);
-#endif
-
-       *newvpp = aliasvp;
-       return (0);
-}
-
-#ifdef DIAGNOSTIC
-#include "opt_ddb.h"
-
-#ifdef DDB
-#define        null_checkvp_barrier    1
-#else
-#define        null_checkvp_barrier    0
-#endif
-
-struct vnode *
-null_checkvp(struct vnode *vp, char *fil, int lno)
-{
-       struct null_node *a = VTONULL(vp);
-       if (a->null_lowervp == NULLVP) {
-               /* Should never happen */
-               int i; u_long *p;
-               printf("vp = %p, ZERO ptr\n", (void *)vp);
-               for (p = (u_long *) a, i = 0; i < 8; i++)
-                       printf(" %lx", p[i]);
-               printf("\n");
-               /* wait for debugger */
-               while (null_checkvp_barrier) /*WAIT*/ ;
-               panic("null_checkvp");
-       }
-       if (a->null_lowervp->v_usecount < 1) {
-               int i; u_long *p;
-               printf("vp = %p, unref'ed lowervp\n", (void *)vp);
-               for (p = (u_long *) a, i = 0; i < 8; i++)
-                       printf(" %lx", p[i]);
-               printf("\n");
-               /* wait for debugger */
-               while (null_checkvp_barrier) /*WAIT*/ ;
-               panic ("null with unref'ed lowervp");
-       };
-#ifdef notyet
-       printf("null %x/%d -> %x/%d [%s, %d]\n",
-               NULLTOV(a), NULLTOV(a)->v_usecount,
-               a->null_lowervp, a->null_lowervp->v_usecount,
-               fil, lno);
-#endif
-       return a->null_lowervp;
-}
-#endif

nnull.diff (see comments on kernel@)

Reply via email to