Sorry for the arbitrary indexing, there would be valid reasons for
calling it 3rd or 4th (or else) as well. Second such changeset from me,
at least.
See explanation on [EMAIL PROTECTED]
Csaba
diff -r b30705ca4860 sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c Thu Feb 2 19:50:43 2006 +0100
+++ b/sys/kern/vfs_cache.c Tue Feb 7 19:55:06 2006 +0100
@@ -108,6 +108,16 @@
#define NCHHASH(hash) (&nchashtbl[(hash) & nchash])
#define MINNEG 1024
+/* Direction markers for shadow tree traversal. */
+#define STREE_AWAY 0x1
+#define STREE_BACK 0x2
+
+/* Shadow tree updater function with metadata (direction). */
+struct stree_updater {
+ int (*updater)(struct namecache *ncp, void *param);
+ int flags;
+};
+
MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
@@ -225,7 +235,7 @@ _cache_drop(struct namecache *ncp)
(ncp->nc_flag & NCF_UNRESOLVED) &&
TAILQ_EMPTY(&ncp->nc_list)
) {
- KKASSERT(ncp->nc_exlocks == 0);
+ KKASSERT(ncp->nc_lockby->nc_exlocks == 0);
cache_lock(ncp);
cache_zap(ncp);
} else {
@@ -295,7 +305,9 @@ cache_alloc(int nlen)
ncp->nc_error = ENOTCONN; /* needs to be resolved */
ncp->nc_refs = 1;
ncp->nc_fsmid = 1;
+ ncp->nc_lockby = ncp;
TAILQ_INIT(&ncp->nc_list);
+ LIST_INIT(&ncp->nc_shadow_list);
cache_lock(ncp);
return(ncp);
}
@@ -323,6 +335,277 @@ cache_drop(struct namecache *ncp)
{
_cache_drop(ncp);
}
+
+/*
+ * Iterative tree traversal routine with actually more features than we need:
+ * supports making callbacks either in "away" or "back" direction, pruning
+ * the traversal at certain nodes (ie., skipping the respective subtree), and
+ * safe leaf cutting.
+ *
+ * Now it's pretty much the same if callbacks are made away or backway, and we
+ * never prune or cut off leaves...
+ *
+ * In fact, all we need is to be able to traverse a subtree identified by its
+ * root.
+ */
+static void
+cache_group_walk(struct namecache *ncp, struct stree_updater *sup,
+ void *param)
+{
+ struct namecache *pncp = ncp; /* previous ncp during traversal */
+ struct namecache *qncp; /* current ncp during traversal */
+ struct namecache *xncp; /* auxiliary ncp */
+ int prune;
+#ifdef INVARIANTS
+ int height = 0, turns = 0, pruned = 0;
+
+#define STAWAY_ASSERT(ncp) do { \
+ KKASSERT(((ncp)->nc_flag & NCF_TREEPASS) == 0); \
+ (ncp)->nc_flag |= NCF_TREEPASS; \
+} while (0)
+#define STBACK_ASSERT(ncp) do { \
+ KKASSERT((ncp)->nc_flag & NCF_TREEPASS); \
+ (ncp)->nc_flag &= ~NCF_TREEPASS; \
+} while (0)
+#else
+#define STAWAY_ASSERT(ncp)
+#define STBACK_ASSERT(ncp)
+#endif
+
+ STAWAY_ASSERT(ncp);
+ prune = sup->flags & STREE_AWAY ? sup->updater(ncp, param) : 0;
+ if (prune || ! (qncp = LIST_FIRST(&ncp->nc_shadow_list)))
+ goto out;
+
+ for (;;) {
+#ifdef INVARIANTS
+ turns++;
+#endif
+ if (qncp->nc_shadowed == pncp) {
+ /* heading toward leafs */
+ KKASSERT(height++ >= 0);
+ STAWAY_ASSERT(qncp);
+ prune = sup->flags & STREE_AWAY ?
+ sup->updater(qncp, param) :
+ 0;
+#ifdef INVARIANTS
+ if (prune)
+ pruned = 1;
+#endif
+ if (prune || LIST_EMPTY(&qncp->nc_shadow_list)) {
+ /*
+ * we hit a leaf or were asked to prune the
+ * branch, turn back
+ */
+ xncp = pncp;
+ pncp = qncp;
+ qncp = xncp;
+ } else {
+ /* keep going on */
+ pncp = qncp;
+ qncp = LIST_FIRST(&qncp->nc_shadow_list);
+ }
+ } else {
+ /* heading back toward root */
+ KKASSERT(--height >= 0);
+ STBACK_ASSERT(pncp);
+ if (LIST_NEXT(pncp, nc_shadow_entry))
+ /* turn to leaf direction using neighbour */
+ qncp = LIST_NEXT(pncp, nc_shadow_entry);
+ else
+ /*
+ * No neighbour,
+ * try to keep going on to root direction.
+ */
+ qncp = qncp == ncp ? NULL : qncp->nc_shadowed;
+ xncp = pncp->nc_shadowed;
+ /*
+ * We have to take care about finding out the next pncp
+ * candidate before calling the updater, because the
+ * updater might remove it from the tree, rendering
+ * pncp unuseable wrt. traversal.
+ */
+ if (sup->flags & STREE_BACK)
+ sup->updater(pncp, param);
+#ifdef INVARIANTS
+ if (! pncp->nc_shadowed)
+ turns -= 2;
+#endif
+ if (! qncp)
+ break;
+ pncp = xncp;
+ }
+ }
+
+out:
+ STBACK_ASSERT(ncp);
+ KASSERT(height == 0, ("shadow tree left with height %d\n", height));
+ KASSERT(turns == 2 * ncp->nc_treesize || (pruned && turns < 2 *
ncp->nc_treesize),
+ ("inconsistent traversal turn count: turns %d, ncp->nc_treesize
%d, "
+ "pruned %d", turns, ncp->nc_treesize, pruned));
+
+ if (sup->flags & STREE_BACK)
+ sup->updater(ncp, param);
+}
+
+static int
+migrate_updater(struct namecache *ncp, void *param)
+{
+ struct namecache *lncp = ncp->nc_lockby;
+
+ ncp->nc_lockby = param;
+
+ if (lncp->nc_flag & NCF_LOCKREQ) {
+ lncp->nc_flag &= ~NCF_LOCKREQ;
+ wakeup(lncp);
+ }
+
+ return(0);
+}
+
+static struct stree_updater stree_migrate_updater = {
+ .updater = migrate_updater,
+ .flags = STREE_BACK
+};
+
+/*
+ * Join ncp into the shadow group of sncp.
+ *
+ * ncp must be unlocked on entry, while sncp must be locked on entry. Caller
+ * also has to hold a dedicated reference of sncp.
+ *
+ * The routine will fail and return ELOOP if the intended shadowing association
+ * yielded a loop in the shadow chain. It will fail with EEXIST if ncp gets
+ * resolved or acquires a shadow association from elsewhere during the attach
+ * attempt (it is [at least theoretically] possbile due to the fact that ncp
+ * is unlocked).
+ *
+ * - On success ncp will be a representative of the joint shadow group, which
+ * then will be locked (both via ncp and sncp).
+ * - On failure the namecache entries will exist separately just as they did
+ * before; both entries will be locked.
+ */
+int
+cache_shadow_attach(struct namecache *ncp, struct namecache *sncp)
+{
+#ifdef INVARIANTS
+ struct namecache *xncp;
+#endif
+
+ if (ncp == sncp)
+ return(ELOOP);
+
+ KKASSERT(ncp->nc_lockby->nc_locktd != curthread);
+ KKASSERT(sncp->nc_lockby->nc_locktd == curthread);
+
+ cache_lock_two(ncp, sncp);
+
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 || ncp->nc_shadowed)
+ return(EEXIST);
+
+ KKASSERT(ncp->nc_lockby == ncp);
+ KKASSERT(sncp->nc_lockby == sncp->nc_lockby->nc_lockby);
+
+ if (sncp->nc_lockby == ncp)
+ return(ELOOP);
+
+ ncp->nc_shadowed = sncp;
+ LIST_INSERT_HEAD(&sncp->nc_shadow_list, ncp, nc_shadow_entry);
+ cache_group_walk(ncp, &stree_migrate_updater, sncp->nc_lockby);
+
+#ifdef INVARIANTS
+ xncp = sncp;
+ for (;;) {
+ KKASSERT(xncp != ncp);
+ KKASSERT(xncp->nc_treesize++ >= 0);
+ xncp->nc_treesize += ncp->nc_treesize;
+ if (! xncp->nc_shadowed)
+ break;
+ xncp = xncp->nc_shadowed;
+ }
+ KKASSERT(xncp == ncp->nc_lockby);
+#endif
+
+ return(0);
+}
+
+static __inline
+void
+delete_shadow(struct namecache *ncp)
+{
+#ifdef INVARIANTS
+ struct namecache *xncp = ncp->nc_shadowed;
+
+ KKASSERT(ncp->nc_shadowed);
+ for (;;) {
+ KKASSERT(xncp != ncp);
+ xncp->nc_treesize -= ncp->nc_treesize;
+ KKASSERT(--xncp->nc_treesize >= 0);
+ if (! xncp->nc_shadowed)
+ break;
+ xncp = xncp->nc_shadowed;
+ }
+#endif
+
+ LIST_REMOVE(ncp, nc_shadow_entry);
+ ncp->nc_shadowed = NULL;
+}
+
+/*
+ * Take out namecache entry from its shadow group.
+ *
+ * ncp must really shadow someone, and the shadow group must be locked
+ * upon entry.
+ *
+ * After the routine returns, ncp will be the head of a new (possibly
singleton)
+ * shadow group. The routine returns the former successor of ncp in the
original
+ * shadow group in a locked+ref'd state.
+ */
+struct namecache *
+cache_shadow_detach(struct namecache *ncp)
+{
+ struct namecache *sncp = ncp->nc_shadowed;
+
+ KKASSERT(ncp->nc_shadowed);
+ KKASSERT(sncp->nc_lockby == ncp->nc_lockby);
+ KKASSERT(ncp->nc_lockby == ncp->nc_lockby->nc_lockby);
+
+ delete_shadow(ncp);
+ ncp->nc_locktd = curthread;
+ cache_group_walk(ncp, &stree_migrate_updater, ncp);
+
+ return(sncp);
+}
+
+static int
+vhold_updater(struct namecache *ncp, void *param)
+{
+
+ if (ncp->nc_vp)
+ vhold(ncp->nc_vp);
+
+ return(0);
+}
+
+static struct stree_updater stree_vhold_updater = {
+ .updater = vhold_updater,
+ .flags = STREE_AWAY
+};
+
+static int
+vdrop_updater(struct namecache *ncp, void *param)
+{
+
+ if (ncp->nc_vp)
+ vdrop(ncp->nc_vp);
+
+ return(0);
+}
+
+static struct stree_updater stree_vdrop_updater = {
+ .updater = vdrop_updater,
+ .flags = STREE_AWAY
+};
/*
* Namespace locking. The caller must already hold a reference to the
@@ -349,15 +632,19 @@ cache_lock(struct namecache *ncp)
{
thread_t td;
int didwarn;
+ struct namecache *lncp;
KKASSERT(ncp->nc_refs != 0);
didwarn = 0;
td = curthread;
for (;;) {
- if (ncp->nc_exlocks == 0) {
- ncp->nc_exlocks = 1;
- ncp->nc_locktd = td;
+ lncp = ncp->nc_lockby;
+ KKASSERT(lncp);
+ KKASSERT(lncp->nc_refs != 0);
+ if (lncp->nc_exlocks == 0) {
+ lncp->nc_exlocks = 1;
+ lncp->nc_locktd = td;
/*
* The vp associated with a locked ncp must be held
* to prevent it from being recycled (which would
@@ -365,16 +652,15 @@ cache_lock(struct namecache *ncp)
*
* XXX loop on race for later MPSAFE work.
*/
- if (ncp->nc_vp)
- vhold(ncp->nc_vp);
+ cache_group_walk(lncp, &stree_vhold_updater, NULL);
break;
}
- if (ncp->nc_locktd == td) {
- ++ncp->nc_exlocks;
+ if (lncp->nc_locktd == td) {
+ ++lncp->nc_exlocks;
break;
}
- ncp->nc_flag |= NCF_LOCKREQ;
- if (tsleep(ncp, 0, "clock", nclockwarn) == EWOULDBLOCK) {
+ lncp->nc_flag |= NCF_LOCKREQ;
+ if (tsleep(lncp, 0, "clock", nclockwarn) == EWOULDBLOCK) {
if (didwarn)
continue;
didwarn = 1;
@@ -398,12 +684,15 @@ cache_lock_nonblock(struct namecache *nc
cache_lock_nonblock(struct namecache *ncp)
{
thread_t td;
+ struct namecache *lncp = ncp->nc_lockby;
KKASSERT(ncp->nc_refs != 0);
+ KKASSERT(lncp);
+ KKASSERT(lncp->nc_refs != 0);
td = curthread;
- if (ncp->nc_exlocks == 0) {
- ncp->nc_exlocks = 1;
- ncp->nc_locktd = td;
+ if (lncp->nc_exlocks == 0) {
+ lncp->nc_exlocks = 1;
+ lncp->nc_locktd = td;
/*
* The vp associated with a locked ncp must be held
* to prevent it from being recycled (which would
@@ -411,8 +700,7 @@ cache_lock_nonblock(struct namecache *nc
*
* XXX loop on race for later MPSAFE work.
*/
- if (ncp->nc_vp)
- vhold(ncp->nc_vp);
+ cache_group_walk(lncp, &stree_vhold_updater, NULL);
return(0);
} else {
return(EWOULDBLOCK);
@@ -423,17 +711,43 @@ cache_unlock(struct namecache *ncp)
cache_unlock(struct namecache *ncp)
{
thread_t td = curthread;
+ struct namecache *lncp = ncp->nc_lockby;
KKASSERT(ncp->nc_refs > 0);
- KKASSERT(ncp->nc_exlocks > 0);
- KKASSERT(ncp->nc_locktd == td);
- if (--ncp->nc_exlocks == 0) {
- if (ncp->nc_vp)
- vdrop(ncp->nc_vp);
- ncp->nc_locktd = NULL;
- if (ncp->nc_flag & NCF_LOCKREQ) {
- ncp->nc_flag &= ~NCF_LOCKREQ;
- wakeup(ncp);
+ KKASSERT(lncp);
+ KKASSERT(lncp->nc_refs > 0);
+ KKASSERT(lncp->nc_exlocks > 0);
+ KKASSERT(lncp->nc_locktd == td);
+ if (lncp->nc_exlocks == 1)
+ cache_group_walk(lncp, &stree_vdrop_updater, NULL);
+ if (--lncp->nc_exlocks == 0) {
+ lncp->nc_locktd = NULL;
+ if (lncp->nc_flag & NCF_LOCKREQ) {
+ lncp->nc_flag &= ~NCF_LOCKREQ;
+ wakeup(lncp);
+ }
+ }
+}
+
+/*
+ * Obtain lock on both of uncp and lncp.
+ *
+ * On entry, uncp is assumed to be unlocked, and lncp is assumed to be
+ * locked.
+ *
+ * After this function returns, caller is responsible for checking
+ * the state of lncp which might have got unlocked temporarily.
+ */
+void
+cache_lock_two(struct namecache *uncp, struct namecache *lncp)
+{
+ if (cache_lock_nonblock(uncp) != 0) {
+ if (uncp > lncp)
+ cache_lock(uncp);
+ else {
+ cache_unlock(lncp);
+ cache_lock(uncp);
+ cache_lock(lncp);
}
}
}
@@ -453,7 +767,8 @@ cache_get_nonblock(struct namecache *ncp
cache_get_nonblock(struct namecache *ncp)
{
/* XXX MP */
- if (ncp->nc_exlocks == 0 || ncp->nc_locktd == curthread) {
+ if (ncp->nc_lockby->nc_exlocks == 0 ||
+ ncp->nc_lockby->nc_locktd == curthread) {
_cache_hold(ncp);
cache_lock(ncp);
return(0);
@@ -521,6 +836,13 @@ cache_settimeout(struct namecache *ncp,
ncp->nc_timeout = 1;
}
+static int unresolver_updater(struct namecache *ncp, void *param);
+
+static struct stree_updater stree_unresolver_updater = {
+ .updater = unresolver_updater,
+ .flags = STREE_BACK
+};
+
/*
* Disassociate the vnode or negative-cache association and mark a
* namecache entry as unresolved again. Note that the ncp is still
@@ -541,10 +863,17 @@ void
void
cache_setunresolved(struct namecache *ncp)
{
+ cache_group_walk(ncp, &stree_unresolver_updater, NULL);
+}
+
+static int
+unresolver_updater(struct namecache *ncp, void *param)
+{
struct vnode *vp;
- if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && ncp != param) {
ncp->nc_flag |= NCF_UNRESOLVED;
+ /* XXX Why don't we reset NCF_DESTROYED ? */
ncp->nc_flag &= ~(NCF_WHITEOUT|NCF_ISDIR|NCF_ISSYMLINK|
NCF_FSMID);
ncp->nc_timeout = 0;
@@ -563,13 +892,15 @@ cache_setunresolved(struct namecache *nc
*/
if (!TAILQ_EMPTY(&ncp->nc_list))
vdrop(vp);
- if (ncp->nc_exlocks)
+ if (ncp->nc_lockby->nc_exlocks)
vdrop(vp);
} else {
TAILQ_REMOVE(&ncneglist, ncp, nc_vnode);
--numneg;
}
}
+
+ return(0);
}
/*
@@ -619,7 +950,7 @@ cache_inval(struct namecache *ncp, int f
struct namecache *nextkid;
int rcnt = 0;
- KKASSERT(ncp->nc_exlocks);
+ KKASSERT(ncp->nc_lockby->nc_exlocks);
cache_setunresolved(ncp);
if (flags & CINV_DESTROY)
@@ -715,6 +1046,7 @@ restart:
* XXX the disconnection could pose a problem, check code paths to make
* sure any code that blocks can handle the parent being changed out from
* under it. Maybe we should lock the children (watch out for deadlocks) ?
+ * [UPDATE: attempt made to lock children, see in situ explanation]
*
* After we return the caller has the option of calling cache_setvp() if
* the vnode of the new target ncp is known.
@@ -726,26 +1058,62 @@ cache_rename(struct namecache *fncp, str
cache_rename(struct namecache *fncp, struct namecache *tncp)
{
struct namecache *scan;
- int didwarn = 0;
-
+ int didwarn[] = { 0, 0 };
+
+ /* XXX should we rather make here a non-equality assertion? */
+ if (fncp == tncp)
+ return;
+
+again:
cache_setunresolved(fncp);
cache_setunresolved(tncp);
+
+ /*
+ * It seems we need to unlock fncp before calling cache_inval():
+ * cache_inval() does a lot of lock/unlock/relock-ing (with tncp
+ * and its children), therefore keeping fncp locked might be
+ * deadlocky...
+ */
+ cache_unlock(fncp);
+
while (cache_inval(tncp, CINV_CHILDREN) != 0) {
- if (didwarn++ % 10 == 0) {
- printf("Warning: cache_rename: race during "
+ if (didwarn[0]++ % 10 == 0) {
+ printf("Warning: cache_rename: race #1 during "
"rename %s->%s\n",
fncp->nc_name, tncp->nc_name);
}
tsleep(tncp, 0, "mvrace", hz / 10);
cache_setunresolved(tncp);
}
+
+ cache_unlock(tncp);
+ cache_lock(fncp);
+
while ((scan = TAILQ_FIRST(&fncp->nc_list)) != NULL) {
- cache_hold(scan);
+ cache_unlock(fncp);
+ /*
+ * We have to lock fncp's kids in order to unresolve
+ * their shadow kids...
+ */
+ cache_get(scan);
cache_unlink_parent(scan);
+ cache_group_walk(scan, &stree_unresolver_updater, scan);
cache_link_parent(scan, tncp);
if (scan->nc_flag & NCF_HASHED)
cache_rehash(scan);
- cache_drop(scan);
+ cache_put(scan);
+ cache_lock(fncp);
+ }
+
+ cache_lock_two(tncp, fncp);
+
+ if ((fncp->nc_flag & tncp->nc_flag & NCF_UNRESOLVED) == 0) {
+ if (didwarn[1]++ % 10 == 0) {
+ printf("Warning: cache_rename: race #2 during "
+ "rename %s->%s\n",
+ fncp->nc_name, tncp->nc_name);
+ }
+ goto again;
}
}
@@ -1207,6 +1575,9 @@ cache_zap(struct namecache *ncp)
vdrop(par->nc_vp);
}
+ if (ncp->nc_shadowed)
+ cache_put(cache_shadow_detach(ncp));
+
/*
* ncp should not have picked up any refs. Physically
* destroy the ncp.
diff -r b30705ca4860 sys/sys/namecache.h
--- a/sys/sys/namecache.h Thu Feb 2 19:50:43 2006 +0100
+++ b/sys/sys/namecache.h Tue Feb 7 19:55:06 2006 +0100
@@ -71,6 +71,7 @@ struct vnode;
struct vnode;
TAILQ_HEAD(namecache_list, namecache);
+LIST_HEAD(namecache_shadow_list, namecache);
/*
* The namecache structure is used to manage the filesystem namespace. Most
@@ -100,8 +101,12 @@ struct namecache {
LIST_ENTRY(namecache) nc_hash; /* hash chain (nc_parent,name) */
TAILQ_ENTRY(namecache) nc_entry; /* scan via nc_parent->nc_list */
TAILQ_ENTRY(namecache) nc_vnode; /* scan via vnode->v_namecache */
+ LIST_ENTRY(namecache) nc_shadow_entry; /* scan via
nc_shadowed->nc_shadow_list */
struct namecache_list nc_list; /* list of children */
+ struct namecache_shadow_list nc_shadow_list; /* list of shadow overlays */
struct namecache *nc_parent; /* namecache entry for parent */
+ struct namecache *nc_shadowed; /* lower layer entry in layered fs */
+ struct namecache *nc_lockby; /* entry via which this one can be
locked */
struct vnode *nc_vp; /* vnode representing name or NULL */
int nc_refs; /* ref count prevents deletion
*/
u_short nc_flag;
@@ -114,6 +119,9 @@ struct namecache {
struct thread *nc_locktd; /* namespace locking */
struct mount *nc_mount; /* associated mount for vopops */
int64_t nc_fsmid; /* filesystem modified id */
+#ifdef INVARIANTS
+ int nc_treesize; /* shadow tree size under ncp */
+#endif
};
typedef struct namecache *namecache_t;
@@ -133,6 +141,9 @@ typedef struct namecache *namecache_t;
#define NCF_ISDIR 0x0200 /* represents a directory */
#define NCF_DESTROYED 0x0400 /* name association is considered destroyed */
#define NCF_FSMID 0x0800 /* FSMID updated */
+#ifdef INVARIANTS
+#define NCF_TREEPASS 0x2000
+#endif
/*
* cache_inval[_vp]() flags
@@ -150,6 +161,9 @@ void cache_lock(struct namecache *ncp);
void cache_lock(struct namecache *ncp);
int cache_lock_nonblock(struct namecache *ncp);
void cache_unlock(struct namecache *ncp);
+void cache_lock_two(struct namecache *uncp, struct namecache *lncp);
+int cache_shadow_attach(struct namecache *ncp, struct namecache *sncp);
+struct namecache *cache_shadow_detach(struct namecache *ncp);
void cache_setvp(struct namecache *ncp, struct vnode *vp);
void cache_settimeout(struct namecache *ncp, int nticks);
void cache_setunresolved(struct namecache *ncp);
diff -r b30705ca4860 sys/vfs/nullfs/null.h
--- a/sys/vfs/nullfs/null.h Thu Feb 2 19:50:43 2006 +0100
+++ b/sys/vfs/nullfs/null.h Tue Feb 7 19:55:06 2006 +0100
@@ -43,18 +43,21 @@ struct null_args {
char *target; /* Target of loopback */
};
-struct null_mount {
- struct mount *nullm_vfs;
- struct vnode *nullm_rootvp; /* Reference to root null_node */
-};
-
#ifdef _KERNEL
-#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#ifdef NULLFS_DEBUG
-#define NULLFSDEBUG(format, args...) printf(format ,## args)
+#define NULLFSDEBUG(format, args...) \
+ printf(" [nullfs] %s:%d: " format, __func__, __LINE__, ## args)
+#define NULLNCDEBUG(ncp)
\
+ NULLFSDEBUG(#ncp " %p: name %s, refs %d, exlocks %d, nc_flag 0x%x, "
\
+ "nc_mount %p, nc_shadowed %p, nc_lockby %p, nc_vp %p\n",
\
+ (ncp), (ncp)->nc_name, (ncp)->nc_refs,
\
+ (ncp)->nc_lockby->nc_exlocks, (ncp)->nc_flag,
\
+ (ncp)->nc_mount, (ncp)->nc_shadowed,
\
+ (ncp)->nc_lockby, (ncp)->nc_vp)
#else
#define NULLFSDEBUG(format, args...)
+#define NULLNCDEBUG(ncp)
#endif /* NULLFS_DEBUG */
#endif /* _KERNEL */
diff -r b30705ca4860 sys/vfs/nullfs/null_vfsops.c
--- a/sys/vfs/nullfs/null_vfsops.c Thu Feb 2 19:50:43 2006 +0100
+++ b/sys/vfs/nullfs/null_vfsops.c Tue Feb 7 19:55:06 2006 +0100
@@ -53,6 +53,7 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/nlookup.h>
+#include <sys/namecache.h>
#include "null.h"
extern struct vnodeopv_entry_desc null_vnodeop_entries[];
@@ -80,12 +81,10 @@ nullfs_mount(struct mount *mp, char *pat
{
int error = 0;
struct null_args args;
- struct vnode *rootvp;
- struct null_mount *xmp;
u_int size;
struct nlookupdata nd;
- NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
+ NULLFSDEBUG("mp %p\n", (void *)mp);
/*
* Update is a no-op
@@ -98,118 +97,113 @@ nullfs_mount(struct mount *mp, char *pat
* Get argument
*/
error = copyin(data, (caddr_t)&args, sizeof(struct null_args));
- if (error)
- return (error);
-
- /*
- * Find lower node
- */
- rootvp = NULL;
- error = nlookup_init(&nd, args.target, UIO_USERSPACE, NLC_FOLLOW);
+
+ /*
+ * Do a lookup just to see if things are not fundamentally broken...
+ * but it's too early to make a proper use of the result.
+ */
+ if (error == 0)
+ error = nlookup_init(&nd, args.target, UIO_USERSPACE,
+ NLC_FOLLOW);
if (error == 0)
error = nlookup(&nd);
- if (error == 0) {
- error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE,
- &rootvp);
- }
-
- xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
- M_NULLFSMNT, M_WAITOK); /* XXX */
-
- /*
- * Save reference to underlying FS
- */
- /*
- * As lite stacking enters the scene, the old way of doing this
- * -- via the vnode -- is not good enough anymore...
- */
- xmp->nullm_vfs = nd.nl_ncp->nc_mount;
+ if (error)
+ return(error);
+
nlookup_done(&nd);
- vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops,
- null_vnodeop_entries, 0);
-
- VOP_UNLOCK(rootvp, 0, td);
-
- /*
- * Keep a held reference to the root vnode.
- * It is vrele'd in nullfs_unmount.
- */
- xmp->nullm_rootvp = rootvp;
- /*
- * XXX What's the proper safety condition for querying
- * the underlying mount? Is this flag tuning necessary
- * at all?
- */
- if (xmp->nullm_vfs->mnt_flag & MNT_LOCAL)
- mp->mnt_flag |= MNT_LOCAL;
- mp->mnt_data = (qaddr_t) xmp;
+ vfs_add_vnodeops(mp, &mp->mnt_vn_norm_ops, null_vnodeop_entries, 0);
+
+ /*
+ * Heck it, let it just be local. I bet I need only five minutes to
+ * find out a sound sounding meaning for "local" by which null mounts
+ * are always local.
+ */
+ mp->mnt_flag |= MNT_LOCAL;
vfs_getnewfsid(mp);
(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
&size);
bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
- (void)nullfs_statfs(mp, &mp->mnt_stat, td);
- NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
- mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntfromname);
+ NULLFSDEBUG("lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
return (0);
}
-/*
- * Free reference to null layer
- */
static int
nullfs_unmount(struct mount *mp, int mntflags, struct thread *td)
{
- void *mntdata;
- int flags = 0;
-
- NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
-
- if (mntflags & MNT_FORCE)
- flags |= FORCECLOSE;
-
- /*
- * Finally, throw away the null_mount structure
- */
- mntdata = mp->mnt_data;
- mp->mnt_data = 0;
- free(mntdata, M_NULLFSMNT);
+ NULLNCDEBUG(mp->mnt_ncp);
+
+ cache_lock(mp->mnt_ncp);
+ cache_put(cache_shadow_detach(mp->mnt_ncp));
+ cache_unlock(mp->mnt_ncp);
+
return 0;
}
static int
+nullfs_start(struct mount *mp, int flags, struct thread *td)
+{
+ int error;
+ struct namecache *sncp;
+
+ NULLFSDEBUG("nlookup %s\n", mp->mnt_stat.f_mntfromname);
+
+ sncp = nlookup_simple(mp->mnt_stat.f_mntfromname,
+ UIO_SYSSPACE, NLC_FOLLOW, &error);
+
+ if (! sncp)
+ return (error);
+
+ if ((error = cache_shadow_attach(mp->mnt_ncp, sncp)))
+ cache_put(sncp);
+
+ NULLNCDEBUG(mp->mnt_ncp);
+#ifdef NULLFS_DEBUG
+ if (mp->mnt_ncp->nc_shadowed)
+ NULLNCDEBUG(mp->mnt_ncp->nc_shadowed);
+#endif
+
+ cache_unlock(mp->mnt_ncp);
+ return (error);
+}
+
+/*
+ * As the mount won't get aborted if VFS_START fails, we have to check in each
+ * VFS call whether it has succeeded...
+ */
+
+static int
nullfs_root(struct mount *mp, struct vnode **vpp)
{
- struct thread *td = curthread; /* XXX */
- struct vnode *vp;
-
- NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
-
- /*
- * Return locked reference to root.
- */
- vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
- vref(vp);
-
-#ifdef NULLFS_DEBUG
- if (VOP_ISLOCKED(vp, NULL)) {
- Debugger("root vnode is locked.\n");
- vrele(vp);
- return (EDEADLK);
- }
-#endif
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- *vpp = vp;
- return 0;
+ int error;
+
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ error = cache_vget(mp->mnt_ncp->nc_shadowed, crhold(proc0.p_ucred),
+ LK_EXCLUSIVE | LK_RETRY, vpp);
+ crfree(proc0.p_ucred);
+
+ return (error);
+}
+
+static __inline
+struct mount *
+nullfs_lowermount_0(struct mount *mp)
+{
+ return (mp->mnt_ncp->nc_shadowed->nc_mount);
}
static int
nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, caddr_t arg,
struct thread *td)
{
- return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, td);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ return VFS_QUOTACTL(nullfs_lowermount_0(mp), cmd, uid, arg, td);
}
static int
@@ -218,12 +212,15 @@ nullfs_statfs(struct mount *mp, struct s
int error;
struct statfs mstat;
- NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p)\n", (void *)mp,
- (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ NULLFSDEBUG("mp %p, ncp %p, lower mp %p\n",
+ mp, mp->mnt_ncp, nullfs_lowermount_0(mp));
bzero(&mstat, sizeof(mstat));
- error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, td);
+ error = VFS_STATFS(nullfs_lowermount_0(mp), &mstat, td);
if (error)
return (error);
@@ -248,23 +245,27 @@ nullfs_checkexp(struct mount *mp, struct
nullfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
struct ucred **credanonp)
{
-
- return VFS_CHECKEXP(MOUNTTONULLMOUNT(mp)->nullm_vfs, nam,
- extflagsp, credanonp);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ return VFS_CHECKEXP(nullfs_lowermount_0(mp), nam, extflagsp, credanonp);
}
static int
nullfs_extattrctl(struct mount *mp, int cmd, const char *attrname, caddr_t arg,
struct thread *td)
{
- return VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, attrname,
- arg, td);
+ if (! mp->mnt_ncp || ! mp->mnt_ncp->nc_shadowed)
+ return (ENXIO);
+
+ return VFS_EXTATTRCTL(nullfs_lowermount_0(mp), cmd, attrname, arg, td);
}
static struct vfsops null_vfsops = {
.vfs_mount = nullfs_mount,
.vfs_unmount = nullfs_unmount,
+ .vfs_start = nullfs_start,
.vfs_root = nullfs_root,
.vfs_quotactl = nullfs_quotactl,
.vfs_statfs = nullfs_statfs,
diff -r b30705ca4860 sys/vfs/nullfs/null_vnops.c
--- a/sys/vfs/nullfs/null_vnops.c Thu Feb 2 19:50:43 2006 +0100
+++ b/sys/vfs/nullfs/null_vnops.c Tue Feb 7 19:55:06 2006 +0100
@@ -109,6 +109,8 @@
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/buf.h>
+#include <sys/namecache.h>
+#include <sys/nlookup.h>
#include "null.h"
static int null_nresolve(struct vop_nresolve_args *ap);
@@ -122,18 +124,114 @@ static int null_nrmdir(struct vop_nrmdir
static int null_nrmdir(struct vop_nrmdir_args *ap);
static int null_nrename(struct vop_nrename_args *ap);
+static __inline
+struct mount *
+nullfs_lowermount_l(struct namecache *ncp)
+{
+ /*
+ * The code in use below allows allows passing through lower mounts.
+ * If we didn't want to do that, we could use
+ *
+ * ncp->nc_mount->mnt_ncp->nc_shadowed->nc_mount
+ *
+ * Eventually, the choice might be configurable.
+ *
+ * - - -
+ *
+ * Matt says in
+ * http://leaf.dragonflybsd.org/mailarchive/kernel/2006-01/msg00023.html
+ * :
+
+ The ncp->nc_mount field was never meant to be used by the VFS code...
+ only to be used internally by cache_*(). It looks like I broke my own
+ rule... I have two references in NFS, but that's for later.
+
+ * Note that both approaches still use nc_mount:
+ *
+ * - If we wanna pass through lower mounts, we do have to find the
+ * the lower fs ncp-wise, we simply don't have choice.
+ *
+ * - If we just work with a fixed lower fs, we are able to access
+ * that if we are willing to use nc_mount. Hence it just seems to be
+ * stupid to keep around a direct reference to the lower fs, but
+ * that's of course feasible.
+ */
+ return (ncp->nc_shadowed->nc_mount);
+}
+
static int
null_nresolve(struct vop_nresolve_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
-
- return vop_nresolve_ap(ap);
+ struct namecache *ncp = ap->a_ncp;
+ struct nlcomponent nlc;
+ struct namecache *sncp;
+ struct mount *lmp;
+ int error = 0;
+
+ if (ncp->nc_shadowed)
+ cache_put(cache_shadow_detach(ncp));
+
+ nlc.nlc_nameptr = ncp->nc_name;
+ nlc.nlc_namelen = ncp->nc_nlen;
+
+ KKASSERT(ncp->nc_parent->nc_shadowed);
+ cache_unlock(ncp);
+ sncp = cache_nlookup(ncp->nc_parent->nc_shadowed, &nlc);
+
+ if ((error = cache_shadow_attach(ncp, sncp))) {
+ cache_put(sncp);
+ if (ncp->nc_flag & NCF_UNRESOLVED)
+ cache_setvp(ncp, NULL);
+ return (error);
+ }
+
+ NULLNCDEBUG(ncp);
+ NULLNCDEBUG(ncp->nc_shadowed);
+
+checkready:
+
+ if ((ncp->nc_shadowed->nc_flag & NCF_UNRESOLVED) == 0) {
+ cache_setvp(ncp, ncp->nc_shadowed->nc_vp);
+ return (ncp->nc_shadowed->nc_error);
+ }
+
+ /*
+ * XXX Querying/ensuring usability of lower fs still not got right.
+ * As a quick hack, we do a simple test here, that will do for
+ * avoiding most obvious fallacies.
+ */
+ if ((lmp = nullfs_lowermount_l(ncp)) &&
+ (ap->a_head.a_ops = lmp->mnt_vn_use_ops)) {
+ /*
+ * Moving down in the shadow chain is for avoiding a recursed
+ * loop (ending up in exhausting the kernel stack).
+ *
+ * Otherwise it's the same whether we use ncp or
+ * ncp->nc_shadowed -- we go for group shared ncp attributes.
+ */
+ ap->a_ncp = ncp->nc_shadowed;
+ /*
+ * According to cache_resolve(), the primary place for
+ * VOP_NRESOLVE calls, the caller of the nresolve method
+ * is the one who should take care about ncp->nc_error.
+ */
+ ap->a_ncp->nc_error = vop_nresolve_ap(ap);
+
+ goto checkready;
+ }
+
+ return (ENXIO);
}
static int
null_ncreate(struct vop_ncreate_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_ncreate_ap(ap);
}
@@ -141,7 +239,12 @@ static int
static int
null_nmkdir(struct vop_nmkdir_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nmkdir_ap(ap);
}
@@ -149,7 +252,12 @@ static int
static int
null_nmknod(struct vop_nmknod_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nmknod_ap(ap);
}
@@ -157,7 +265,12 @@ static int
static int
null_nlink(struct vop_nlink_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nlink_ap(ap);
}
@@ -165,7 +278,12 @@ static int
static int
null_nsymlink(struct vop_nsymlink_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nsymlink_ap(ap);
}
@@ -173,7 +291,12 @@ static int
static int
null_nwhiteout(struct vop_nwhiteout_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nwhiteout_ap(ap);
}
@@ -181,7 +304,12 @@ static int
static int
null_nremove(struct vop_nremove_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nremove_ap(ap);
}
@@ -189,7 +317,12 @@ static int
static int
null_nrmdir(struct vop_nrmdir_args *ap)
{
- ap->a_head.a_ops =
MOUNTTONULLMOUNT(ap->a_ncp->nc_mount)->nullm_vfs->mnt_vn_norm_ops;
+ struct namecache *ncp = ap->a_ncp;
+
+ NULLNCDEBUG(ap->a_ncp);
+ NULLNCDEBUG(ap->a_ncp->nc_shadowed);
+ ap->a_head.a_ops = nullfs_lowermount_l(ap->a_ncp)->mnt_vn_use_ops;
+ ap->a_ncp = ncp->nc_shadowed;
return vop_nrmdir_ap(ap);
}
@@ -197,13 +330,21 @@ static int
static int
null_nrename(struct vop_nrename_args *ap)
{
+ struct namecache *fncp = ap->a_fncp;
+ struct namecache *tncp = ap->a_tncp;
struct mount *lmp;
- lmp = MOUNTTONULLMOUNT(ap->a_fncp->nc_mount)->nullm_vfs;
- if (lmp != MOUNTTONULLMOUNT(ap->a_tncp->nc_mount)->nullm_vfs)
- return (EINVAL);
-
- ap->a_head.a_ops = lmp->mnt_vn_norm_ops;
+ lmp = nullfs_lowermount_l(fncp);
+ if (lmp != nullfs_lowermount_l(tncp))
+ return (EXDEV);
+
+ NULLNCDEBUG(ap->a_fncp);
+ NULLNCDEBUG(ap->a_fncp->nc_shadowed);
+ NULLNCDEBUG(ap->a_tncp);
+ NULLNCDEBUG(ap->a_tncp->nc_shadowed);
+ ap->a_head.a_ops = lmp->mnt_vn_use_ops;
+ ap->a_fncp = fncp->nc_shadowed;
+ ap->a_tncp = tncp->nc_shadowed;
return vop_nrename_ap(ap);
}
@@ -224,4 +365,3 @@ struct vnodeopv_entry_desc null_vnodeop_
{ &vop_nrename_desc, (vnodeopv_entry_t) null_nrename },
{ NULL, NULL }
};
-