Module Name:    src
Committed By:   hannken
Date:           Mon Jan 21 09:14:01 UTC 2013

Modified Files:
        src/sys/kern: vfs_trans.c

Log Message:
Replace the rwlock based implementation with passive serialization
from pserialize(9) and mutex / condvar.

The fast paths (fstrans_start/fstrans_done on a file system not
suspended or suspending and fscow_run with no change pending) now
run without locks or other atomic operations.  Suspension and cow
handler insertion and removal is done with mutex / condvars.

The API remains unchanged.


To generate a diff of this commit:
cvs rdiff -u -r1.25 -r1.26 src/sys/kern/vfs_trans.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_trans.c
diff -u src/sys/kern/vfs_trans.c:1.25 src/sys/kern/vfs_trans.c:1.26
--- src/sys/kern/vfs_trans.c:1.25	Tue May 12 11:42:12 2009
+++ src/sys/kern/vfs_trans.c	Mon Jan 21 09:14:01 2013
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_trans.c,v 1.25 2009/05/12 11:42:12 yamt Exp $	*/
+/*	$NetBSD: vfs_trans.c,v 1.26 2013/01/21 09:14:01 hannken Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.25 2009/05/12 11:42:12 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.26 2013/01/21 09:14:01 hannken Exp $");
 
 /*
  * File system transaction operations.
@@ -38,16 +38,13 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,
 
 #include "opt_ddb.h"
 
-#if defined(DDB)
-#define _LWP_API_PRIVATE	/* Need _lwp_getspecific_by_lwp() */
-#endif
-
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/buf.h>
 #include <sys/kmem.h>
 #include <sys/mount.h>
-#include <sys/rwlock.h>
+#include <sys/pserialize.h>
 #include <sys/vnode.h>
 #define _FSTRANS_API_PRIVATE
 #include <sys/fstrans.h>
@@ -57,34 +54,47 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,
 #include <miscfs/syncfs/syncfs.h>
 
 struct fscow_handler {
-	SLIST_ENTRY(fscow_handler) ch_list;
+	LIST_ENTRY(fscow_handler) ch_list;
 	int (*ch_func)(void *, struct buf *, bool);
 	void *ch_arg;
 };
 struct fstrans_lwp_info {
 	struct fstrans_lwp_info *fli_succ;
+	struct lwp *fli_self;
 	struct mount *fli_mount;
 	int fli_trans_cnt;
 	int fli_cow_cnt;
 	enum fstrans_lock_type fli_lock_type;
+	LIST_ENTRY(fstrans_lwp_info) fli_list;
 };
 struct fstrans_mount_info {
 	enum fstrans_state fmi_state;
-	krwlock_t fmi_shared_lock;
-	krwlock_t fmi_lazy_lock;
-	krwlock_t fmi_cow_lock;
-	SLIST_HEAD(, fscow_handler) fmi_cow_handler;
+	unsigned int fmi_ref_cnt;
+	bool fmi_cow_change;
+	LIST_HEAD(, fscow_handler) fmi_cow_handler;
 };
 
-static specificdata_key_t lwp_data_key;
+static specificdata_key_t lwp_data_key;	/* Our specific data key. */
 static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
-static pool_cache_t fstrans_cache;
+static kmutex_t fstrans_lock;		/* Fstrans big lock. */
+static kcondvar_t fstrans_state_cv;	/* Fstrans or cow state changed. */
+static kcondvar_t fstrans_count_cv;	/* Fstrans or cow count changed. */
+static pserialize_t fstrans_psz;	/* Pserialize state. */
+static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
+					/* List of all fstrans_lwp_info. */
+static pool_cache_t fstrans_cache;	/* Pool of struct fstrans_lwp_info. */
 
 static void fstrans_lwp_dtor(void *);
-static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *);
+static void fstrans_mount_dtor(struct mount *);
+static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *, bool);
+static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
+static bool state_change_done(const struct mount *);
+static bool cow_state_change_done(const struct mount *);
+static void cow_change_enter(const struct mount *);
+static void cow_change_done(const struct mount *);
 
 /*
- * Initialize
+ * Initialize.
  */
 void
 fstrans_init(void)
@@ -95,120 +105,183 @@ fstrans_init(void)
 	KASSERT(error == 0);
 
 	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&fstrans_state_cv, "fstchg");
+	cv_init(&fstrans_count_cv, "fstcnt");
+	fstrans_psz = pserialize_create();
+	LIST_INIT(&fstrans_fli_head);
 	fstrans_cache = pool_cache_init(sizeof(struct fstrans_lwp_info), 0, 0,
 	    0, "fstrans", NULL, IPL_NONE, NULL, NULL, NULL);
 }
 
 /*
- * Deallocate lwp state
+ * Deallocate lwp state.
  */
 static void
 fstrans_lwp_dtor(void *arg)
 {
 	struct fstrans_lwp_info *fli, *fli_next;
 
+	mutex_enter(&fstrans_lock);
 	for (fli = arg; fli; fli = fli_next) {
 		KASSERT(fli->fli_trans_cnt == 0);
 		KASSERT(fli->fli_cow_cnt == 0);
+		if (fli->fli_mount != NULL)
+			fstrans_mount_dtor(fli->fli_mount);
 		fli_next = fli->fli_succ;
+		LIST_REMOVE(fli, fli_list);
 		pool_cache_put(fstrans_cache, fli);
 	}
+	mutex_exit(&fstrans_lock);
+}
+
+/*
+ * Dereference mount state.
+ */
+static void
+fstrans_mount_dtor(struct mount *mp)
+{
+	struct fstrans_mount_info *fmi;
+
+	fmi = mp->mnt_transinfo;
+	if (atomic_dec_uint_nv(&fmi->fmi_ref_cnt) > 0)
+		return;
+
+	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
+	KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
+
+	kmem_free(fmi, sizeof(*fmi));
+	mp->mnt_iflag &= ~IMNT_HAS_TRANS;
+	mp->mnt_transinfo = NULL;
+
+	vfs_destroy(mp);
 }
 
 /*
- * Allocate mount state
+ * Allocate mount state.
  */
 int
 fstrans_mount(struct mount *mp)
 {
+	int error;
 	struct fstrans_mount_info *new;
 
+	error = vfs_busy(mp, NULL);
+	if (error)
+		return error;
 	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
 		return ENOMEM;
 	new->fmi_state = FSTRANS_NORMAL;
-	rw_init(&new->fmi_lazy_lock);
-	rw_init(&new->fmi_shared_lock);
-	SLIST_INIT(&new->fmi_cow_handler);
-	rw_init(&new->fmi_cow_lock);
+	new->fmi_ref_cnt = 1;
+	LIST_INIT(&new->fmi_cow_handler);
+	new->fmi_cow_change = false;
 
 	mp->mnt_transinfo = new;
 	mp->mnt_iflag |= IMNT_HAS_TRANS;
 
+	vfs_unbusy(mp, true, NULL);
+
 	return 0;
 }
 
 /*
- * Deallocate mount state
+ * Deallocate mount state.
  */
 void
 fstrans_unmount(struct mount *mp)
 {
-	struct fstrans_mount_info *fmi;
-	struct fscow_handler *hp;
 
-	if ((fmi = mp->mnt_transinfo) == NULL)
-		return;
+	KASSERT(mp->mnt_transinfo != NULL);
 
-	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
-	rw_destroy(&fmi->fmi_lazy_lock);
-	rw_destroy(&fmi->fmi_shared_lock);
-	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
-	while ((hp = SLIST_FIRST(&fmi->fmi_cow_handler)) != NULL) {
-		SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list);
-		kmem_free(hp, sizeof(*hp));
-	}
-	rw_exit(&fmi->fmi_cow_lock);
-	rw_destroy(&fmi->fmi_cow_lock);
-	kmem_free(fmi, sizeof(*fmi));
-	mp->mnt_iflag &= ~IMNT_HAS_TRANS;
-	mp->mnt_transinfo = NULL;
+	fstrans_mount_dtor(mp);
 }
 
 /*
- * Retrieve the per lwp info for this mount
+ * Retrieve the per lwp info for this mount allocating if necessary.
  */
 static struct fstrans_lwp_info *
-fstrans_get_lwp_info(struct mount *mp)
+fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
 {
-	struct fstrans_lwp_info *fli, *new_fli;
+	struct fstrans_lwp_info *fli, *res;
+	struct fstrans_mount_info *fmi;
 
-	new_fli = NULL;
+	/*
+	 * Scan our list for a match clearing entries whose mount is gone.
+	 */
+	res = NULL;
 	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
-		if (fli->fli_mount == mp)
-			return fli;
-		else if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0 &&
-		    new_fli == NULL)
-			new_fli = fli;
+		if (fli->fli_mount == mp) {
+			KASSERT(res == NULL);
+			res = fli;
+		} else if (fli->fli_mount != NULL &&
+		    (fli->fli_mount->mnt_iflag & IMNT_GONE) != 0 &&
+		    fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
+			fstrans_mount_dtor(fli->fli_mount);
+			fli->fli_mount = NULL;
+		}
 	}
+	if (__predict_true(res != NULL))
+		return res;
 
-	if (new_fli == NULL) {
-		new_fli = pool_cache_get(fstrans_cache, PR_WAITOK);
-		new_fli->fli_trans_cnt = 0;
-		new_fli->fli_cow_cnt = 0;
-		new_fli->fli_succ = lwp_getspecific(lwp_data_key);
-		lwp_setspecific(lwp_data_key, new_fli);
+	if (! do_alloc)
+		return NULL;
+
+	/*
+	 * Try to reuse a cleared entry or allocate a new one.
+	 */
+	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
+		if (fli->fli_mount == NULL) {
+			KASSERT(fli->fli_trans_cnt == 0);
+			KASSERT(fli->fli_cow_cnt == 0);
+			break;
+		}
+	}
+	if (fli == NULL) {
+		fli = pool_cache_get(fstrans_cache, PR_WAITOK);
+		mutex_enter(&fstrans_lock);
+		memset(fli, 0, sizeof(*fli));
+		fli->fli_self = curlwp;
+		LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
+		mutex_exit(&fstrans_lock);
+		fli->fli_succ = lwp_getspecific(lwp_data_key);
+		lwp_setspecific(lwp_data_key, fli);
 	}
 
-	KASSERT(new_fli->fli_trans_cnt == 0);
-	KASSERT(new_fli->fli_cow_cnt == 0);
+	/*
+	 * Attach the entry to the mount.
+	 */
+	fmi = mp->mnt_transinfo;
+	fli->fli_mount = mp;
+	atomic_inc_uint(&fmi->fmi_ref_cnt);
+
+	return fli;
+}
+
+/*
+ * Check if this lock type is granted at this state.
+ */
+static bool
+grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
+{
 
-	new_fli->fli_mount = mp;
+	if (__predict_true(state == FSTRANS_NORMAL))
+		return true;
+	if (type == FSTRANS_EXCL)
+		return true;
+	if  (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
+		return true;
 
-	return new_fli;
+	return false;
 }
 
 /*
  * Start a transaction.  If this thread already has a transaction on this
  * file system increment the reference counter.
- * A thread with an exclusive transaction lock may get a shared or lazy one.
- * A thread with a shared or lazy transaction lock cannot upgrade to an
- * exclusive one yet.
  */
 int
 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
 {
-	krwlock_t *lock_p;
-	krw_t lock_op;
+	int s;
 	struct fstrans_lwp_info *fli;
 	struct fstrans_mount_info *fmi;
 
@@ -217,31 +290,35 @@ _fstrans_start(struct mount *mp, enum fs
 	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
 		return 0;
 
-	fli = fstrans_get_lwp_info(mp);
+	fli = fstrans_get_lwp_info(mp, true);
 
 	if (fli->fli_trans_cnt > 0) {
-		if (fli->fli_lock_type != FSTRANS_EXCL &&
-		    lock_type == FSTRANS_EXCL)
-			panic("fstrans_start: cannot upgrade lock");
+		KASSERT(lock_type != FSTRANS_EXCL);
 		fli->fli_trans_cnt += 1;
+
 		return 0;
 	}
 
+	s = pserialize_read_enter();
 	fmi = mp->mnt_transinfo;
+	if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
+		fli->fli_trans_cnt = 1;
+		fli->fli_lock_type = lock_type;
+		pserialize_read_exit(s);
 
-	if (lock_type == FSTRANS_LAZY)
-		lock_p = &fmi->fmi_lazy_lock;
-	else
-		lock_p = &fmi->fmi_shared_lock;
-	lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER);
+		return 0;
+	}
+	pserialize_read_exit(s);
 
-	if (wait)
-		rw_enter(lock_p, lock_op);
-	else if (rw_tryenter(lock_p, lock_op) == 0)
+	if (! wait)
 		return EBUSY;
 
+	mutex_enter(&fstrans_lock);
+	while (! grant_lock(fmi->fmi_state, lock_type))
+		cv_wait(&fstrans_state_cv, &fstrans_lock);
 	fli->fli_trans_cnt = 1;
 	fli->fli_lock_type = lock_type;
+	mutex_exit(&fstrans_lock);
 
 	return 0;
 }
@@ -252,31 +329,37 @@ _fstrans_start(struct mount *mp, enum fs
 void
 fstrans_done(struct mount *mp)
 {
+	int s;
 	struct fstrans_lwp_info *fli;
 	struct fstrans_mount_info *fmi;
 
 	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
 		return;
 
-	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
-		if (fli->fli_mount == mp) {
-			fli->fli_trans_cnt -= 1;
-			if (fli->fli_trans_cnt > 0)
-				return;
-			break;
-		}
-	}
-
+	fli = fstrans_get_lwp_info(mp, false);
 	KASSERT(fli != NULL);
-	KASSERT(fli->fli_mount == mp);
-	KASSERT(fli->fli_trans_cnt == 0);
+	KASSERT(fli->fli_trans_cnt > 0);
+
+	if (fli->fli_trans_cnt > 1) {
+		fli->fli_trans_cnt -= 1;
 
+		return;
+	}
+
+	s = pserialize_read_enter();
 	fmi = mp->mnt_transinfo;
-	KASSERT(fmi != NULL);
-	if (fli->fli_lock_type == FSTRANS_LAZY)
-		rw_exit(&fmi->fmi_lazy_lock);
-	else
-		rw_exit(&fmi->fmi_shared_lock);
+	if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
+		fli->fli_trans_cnt = 0;
+		pserialize_read_exit(s);
+
+		return;
+	}
+	pserialize_read_exit(s);
+
+	mutex_enter(&fstrans_lock);
+	fli->fli_trans_cnt = 0;
+	cv_signal(&fstrans_count_cv);
+	mutex_exit(&fstrans_lock);
 }
 
 /*
@@ -287,72 +370,91 @@ fstrans_is_owner(struct mount *mp)
 {
 	struct fstrans_lwp_info *fli;
 
-	if (mp == NULL)
-		return 0;
-	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
+	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
 		return 0;
 
-	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ)
-		if (fli->fli_mount == mp)
-			break;
-
+	fli = fstrans_get_lwp_info(mp, false);
 	if (fli == NULL || fli->fli_trans_cnt == 0)
 		return 0;
 
 	KASSERT(fli->fli_mount == mp);
 	KASSERT(fli->fli_trans_cnt > 0);
+
 	return (fli->fli_lock_type == FSTRANS_EXCL);
 }
 
 /*
+ * True, if no thread is in a transaction not granted at the current state.
+ */
+static bool
+state_change_done(const struct mount *mp)
+{
+	struct fstrans_lwp_info *fli;
+	struct fstrans_mount_info *fmi;
+
+	KASSERT(mutex_owned(&fstrans_lock));
+
+	fmi = mp->mnt_transinfo;
+	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
+		if (fli->fli_mount != mp)
+			continue;
+		if (fli->fli_trans_cnt == 0)
+			continue;
+		if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
+			continue;
+
+		return false;
+	}
+
+	return true;
+}
+
+/*
  * Set new file system state.
  */
 int
 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
 {
+	int error;
+	enum fstrans_state old_state;
 	struct fstrans_mount_info *fmi;
 
 	fmi = mp->mnt_transinfo;
+	old_state = fmi->fmi_state;
+	if (old_state == new_state)
+		return 0;
 
-	switch (new_state) {
-	case FSTRANS_SUSPENDING:
-		KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
-		fstrans_start(mp, FSTRANS_EXCL);
-		fmi->fmi_state = FSTRANS_SUSPENDING;
-		break;
+	mutex_enter(&fstrans_lock);
+	fmi->fmi_state = new_state;
+	pserialize_perform(fstrans_psz);
+
+	/*
+	 * All threads see the new state now.
+	 * Wait for transactions invalid at this state to leave.
+	 */
+	error = 0;
+	while (! state_change_done(mp)) {
+		error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
+		if (error) {
+			new_state = fmi->fmi_state = FSTRANS_NORMAL;
+			break;
+		}
+	}
+	cv_broadcast(&fstrans_state_cv);
+	mutex_exit(&fstrans_lock);
 
-	case FSTRANS_SUSPENDED:
-		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
-			fmi->fmi_state == FSTRANS_SUSPENDING);
-		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
-			fstrans_is_owner(mp));
-		if (fmi->fmi_state == FSTRANS_NORMAL)
+	if (old_state != new_state) {
+		if (old_state == FSTRANS_NORMAL)
 			fstrans_start(mp, FSTRANS_EXCL);
-		rw_enter(&fmi->fmi_lazy_lock, RW_WRITER);
-		fmi->fmi_state = FSTRANS_SUSPENDED;
-		break;
-
-	case FSTRANS_NORMAL:
-		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
-			fstrans_is_owner(mp));
-		if (fmi->fmi_state == FSTRANS_SUSPENDED)
-			rw_exit(&fmi->fmi_lazy_lock);
-		if (fmi->fmi_state == FSTRANS_SUSPENDING ||
-		    fmi->fmi_state == FSTRANS_SUSPENDED) {
-			fmi->fmi_state = FSTRANS_NORMAL;
+		if (new_state == FSTRANS_NORMAL)
 			fstrans_done(mp);
-		}
-		break;
-
-	default:
-		panic("%s: illegal state %d", __func__, new_state);
 	}
 
-	return 0;
+	return error;
 }
 
 /*
- * Get current file system state
+ * Get current file system state.
  */
 enum fstrans_state
 fstrans_getstate(struct mount *mp)
@@ -360,6 +462,7 @@ fstrans_getstate(struct mount *mp)
 	struct fstrans_mount_info *fmi;
 
 	fmi = mp->mnt_transinfo;
+	KASSERT(fmi != NULL);
 
 	return fmi->fmi_state;
 }
@@ -379,7 +482,6 @@ vfs_suspend(struct mount *mp, int nowait
 		mutex_enter(&vfs_suspend_lock);
 
 	mutex_enter(&syncer_mutex);
-
 	if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) {
 		mutex_exit(&syncer_mutex);
 		mutex_exit(&vfs_suspend_lock);
@@ -400,103 +502,85 @@ vfs_resume(struct mount *mp)
 	mutex_exit(&vfs_suspend_lock);
 }
 
-#if defined(DDB)
-void fstrans_dump(int);
 
-static void
-fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
+/*
+ * True, if no thread is running a cow handler.
+ */
+static bool
+cow_state_change_done(const struct mount *mp)
 {
-	char prefix[9];
 	struct fstrans_lwp_info *fli;
+	struct fstrans_mount_info *fmi;
 
-	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
-	for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key);
-	     fli;
-	     fli = fli->fli_succ) {
-		if (!verbose && fli->fli_trans_cnt == 0)
+	fmi = mp->mnt_transinfo;
+
+	KASSERT(mutex_owned(&fstrans_lock));
+	KASSERT(fmi->fmi_cow_change);
+
+	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
+		if (fli->fli_mount != mp)
 			continue;
-		printf("%-8s", prefix);
-		if (verbose)
-			printf(" @%p", fli);
-		if (fli->fli_mount != NULL)
-			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
-		else
-			printf(" NULL");
-		switch (fli->fli_lock_type) {
-		case FSTRANS_LAZY:
-			printf(" lazy");
-			break;
-		case FSTRANS_SHARED:
-			printf(" shared");
-			break;
-		case FSTRANS_EXCL:
-			printf(" excl");
-			break;
-		default:
-			printf(" %#x", fli->fli_lock_type);
-			break;
-		}
-		printf(" %d\n", fli->fli_trans_cnt);
-		prefix[0] = '\0';
+		if (fli->fli_cow_cnt == 0)
+			continue;
+
+		return false;
 	}
+
+	return true;
 }
 
+/*
+ * Prepare for changing this mounts cow list.
+ * Returns with fstrans_lock locked.
+ */
 static void
-fstrans_print_mount(struct mount *mp, int verbose)
+cow_change_enter(const struct mount *mp)
 {
 	struct fstrans_mount_info *fmi;
 
 	fmi = mp->mnt_transinfo;
-	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
-		return;
 
-	printf("%-16s ", mp->mnt_stat.f_mntonname);
-	if (fmi == NULL) {
-		printf("(null)\n");
-		return;
-	}
-	switch (fmi->fmi_state) {
-	case FSTRANS_NORMAL:
-		printf("state normal\n");
-		break;
-	case FSTRANS_SUSPENDING:
-		printf("state suspending\n");
-		break;
-	case FSTRANS_SUSPENDED:
-		printf("state suspended\n");
-		break;
-	default:
-		printf("state %#x\n", fmi->fmi_state);
-		break;
-	}
-	printf("%16s r=%d w=%d\n", "lock_lazy:",
-	    rw_read_held(&fmi->fmi_lazy_lock),
-	    rw_write_held(&fmi->fmi_lazy_lock));
-	printf("%16s r=%d w=%d\n", "lock_shared:",
-	    rw_read_held(&fmi->fmi_shared_lock),
-	    rw_write_held(&fmi->fmi_shared_lock));
+	mutex_enter(&fstrans_lock);
+
+	/*
+	 * Wait for other threads changing the list.
+	 */
+	while (fmi->fmi_cow_change)
+		cv_wait(&fstrans_state_cv, &fstrans_lock);
+
+	/*
+	 * Wait until all threads are aware of a state change.
+	 */
+	fmi->fmi_cow_change = true;
+	pserialize_perform(fstrans_psz);
+
+	while (! cow_state_change_done(mp))
+		cv_wait(&fstrans_count_cv, &fstrans_lock);
 }
 
-void
-fstrans_dump(int full)
+/*
+ * Done changing this mounts cow list.
+ */
+static void
+cow_change_done(const struct mount *mp)
 {
-	const struct proclist_desc *pd;
-	struct proc *p;
-	struct lwp *l;
-	struct mount *mp;
+	struct fstrans_mount_info *fmi;
 
-	printf("Fstrans locks by lwp:\n");
-	for (pd = proclists; pd->pd_list != NULL; pd++)
-		PROCLIST_FOREACH(p, pd->pd_list)
-			LIST_FOREACH(l, &p->p_lwps, l_sibling)
-				fstrans_print_lwp(p, l, full == 1);
+	KASSERT(mutex_owned(&fstrans_lock));
 
-	printf("Fstrans state by mount:\n");
-	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
-		fstrans_print_mount(mp, full == 1);
+	fmi = mp->mnt_transinfo;
+
+	fmi->fmi_cow_change = false;
+	pserialize_perform(fstrans_psz);
+
+	cv_broadcast(&fstrans_state_cv);
+
+	mutex_exit(&fstrans_lock);
 }
-#endif /* defined(DDB) */
 
+/*
+ * Add a handler to this mount.
+ */
 int
 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
     void *arg)
@@ -508,18 +592,23 @@ fscow_establish(struct mount *mp, int (*
 		return EINVAL;
 
 	fmi = mp->mnt_transinfo;
+	KASSERT(fmi != NULL);
 
 	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
 		return ENOMEM;
 	new->ch_func = func;
 	new->ch_arg = arg;
-	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
-	SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list);
-	rw_exit(&fmi->fmi_cow_lock);
+
+	cow_change_enter(mp);
+	LIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list);
+	cow_change_done(mp);
 
 	return 0;
 }
 
+/*
+ * Remove a handler from this mount.
+ */
 int
 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
     void *arg)
@@ -531,56 +620,199 @@ fscow_disestablish(struct mount *mp, int
 		return EINVAL;
 
 	fmi = mp->mnt_transinfo;
+	KASSERT(fmi != NULL);
 
-	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
-	SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
+	cow_change_enter(mp);
+	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
 		if (hp->ch_func == func && hp->ch_arg == arg)
 			break;
 	if (hp != NULL) {
-		SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list);
+		LIST_REMOVE(hp, ch_list);
 		kmem_free(hp, sizeof(*hp));
 	}
-	rw_exit(&fmi->fmi_cow_lock);
+	cow_change_done(mp);
 
 	return hp ? 0 : EINVAL;
 }
 
+/*
+ * Check for need to copy block that is about to be written.
+ */
 int
 fscow_run(struct buf *bp, bool data_valid)
 {
-	int error = 0;
+	int error, s;
 	struct mount *mp;
 	struct fstrans_lwp_info *fli;
 	struct fstrans_mount_info *fmi;
 	struct fscow_handler *hp;
 
+	/*
+	 * First check if we need run the copy-on-write handler.
+	 */
 	if ((bp->b_flags & B_COWDONE))
-		goto done;
-	if (bp->b_vp == NULL)
-		goto done;
+		return 0;
+	if (bp->b_vp == NULL) {
+		bp->b_flags |= B_COWDONE;
+		return 0;
+	}
 	if (bp->b_vp->v_type == VBLK)
 		mp = bp->b_vp->v_specmountpoint;
 	else
 		mp = bp->b_vp->v_mount;
-	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
-		goto done;
+	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) {
+		bp->b_flags |= B_COWDONE;
+		return 0;
+	}
 
-	fli = fstrans_get_lwp_info(mp);
+	fli = fstrans_get_lwp_info(mp, true);
 	fmi = mp->mnt_transinfo;
 
-	if (fli->fli_cow_cnt++ == 0)
-		rw_enter(&fmi->fmi_cow_lock, RW_READER);
+	/*
+	 * On non-recursed run check if other threads
+	 * want to change the list.
+	 */
+	if (fli->fli_cow_cnt == 0) {
+		s = pserialize_read_enter();
+		if (__predict_false(fmi->fmi_cow_change)) {
+			pserialize_read_exit(s);
+			mutex_enter(&fstrans_lock);
+			while (fmi->fmi_cow_change)
+				cv_wait(&fstrans_state_cv, &fstrans_lock);
+			fli->fli_cow_cnt = 1;
+			mutex_exit(&fstrans_lock);
+		} else {
+			fli->fli_cow_cnt = 1;
+			pserialize_read_exit(s);
+		}
+	} else
+		fli->fli_cow_cnt += 1;
 
-	SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
+	/*
+	 * Run all copy-on-write handlers, stop on error.
+	 */
+	error = 0;
+	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
 		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
 			break;
-
-	if (--fli->fli_cow_cnt == 0)
-		rw_exit(&fmi->fmi_cow_lock);
-
- done:
  	if (error == 0)
  		bp->b_flags |= B_COWDONE;
 
+	/*
+	 * Check if other threads want to change the list.
+	 */
+	if (fli->fli_cow_cnt > 1) {
+		fli->fli_cow_cnt -= 1;
+	} else {
+		s = pserialize_read_enter();
+		if (__predict_false(fmi->fmi_cow_change)) {
+			pserialize_read_exit(s);
+			mutex_enter(&fstrans_lock);
+			fli->fli_cow_cnt = 0;
+			cv_signal(&fstrans_count_cv);
+			mutex_exit(&fstrans_lock);
+		} else {
+			fli->fli_cow_cnt = 0;
+			pserialize_read_exit(s);
+		}
+	}
+
 	return error;
 }
+
+#if defined(DDB)
+void fstrans_dump(int);
+
+static void
+fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
+{
+	char prefix[9];
+	struct fstrans_lwp_info *fli;
+
+	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
+	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
+		if (fli->fli_self != l)
+			continue;
+		if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
+			if (! verbose)
+				continue;
+		}
+		printf("%-8s", prefix);
+		if (verbose)
+			printf(" @%p", fli);
+		if (fli->fli_mount != NULL)
+			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
+		else
+			printf(" NULL");
+		if (fli->fli_trans_cnt == 0) {
+			printf(" -");
+		} else {
+			switch (fli->fli_lock_type) {
+			case FSTRANS_LAZY:
+				printf(" lazy");
+				break;
+			case FSTRANS_SHARED:
+				printf(" shared");
+				break;
+			case FSTRANS_EXCL:
+				printf(" excl");
+				break;
+			default:
+				printf(" %#x", fli->fli_lock_type);
+				break;
+			}
+		}
+		printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt);
+		prefix[0] = '\0';
+	}
+}
+
+static void
+fstrans_print_mount(struct mount *mp, int verbose)
+{
+	struct fstrans_mount_info *fmi;
+
+	fmi = mp->mnt_transinfo;
+	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
+		return;
+
+	printf("%-16s ", mp->mnt_stat.f_mntonname);
+	if (fmi == NULL) {
+		printf("(null)\n");
+		return;
+	}
+	switch (fmi->fmi_state) {
+	case FSTRANS_NORMAL:
+		printf("state normal\n");
+		break;
+	case FSTRANS_SUSPENDING:
+		printf("state suspending\n");
+		break;
+	case FSTRANS_SUSPENDED:
+		printf("state suspended\n");
+		break;
+	default:
+		printf("state %#x\n", fmi->fmi_state);
+		break;
+	}
+}
+
+void
+fstrans_dump(int full)
+{
+	const struct proclist_desc *pd;
+	struct proc *p;
+	struct lwp *l;
+	struct mount *mp;
+
+	printf("Fstrans locks by lwp:\n");
+	for (pd = proclists; pd->pd_list != NULL; pd++)
+		PROCLIST_FOREACH(p, pd->pd_list)
+			LIST_FOREACH(l, &p->p_lwps, l_sibling)
+				fstrans_print_lwp(p, l, full == 1);
+
+	printf("Fstrans state by mount:\n");
+	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
+		fstrans_print_mount(mp, full == 1);
+}
+#endif /* defined(DDB) */

Reply via email to