Module Name:    src
Committed By:   riastradh
Date:           Mon Oct 15 14:15:59 UTC 2012

Modified Files:
        src/external/cddl/osnet/dist/uts/common/fs/zfs: zfs_rlock.c
        src/external/cddl/osnet/dist/uts/common/fs/zfs/sys: zfs_rlock.h

Log Message:
Do reference counting for zfs range lock waiters.

Avoid cv_broadcast(&cv); cv_destroy(&cv); which works in Solaris only
by abuse of the condvar abstraction.

There are parts of this code that should be factored into smaller
subroutines, mainly range lock allocation and initialization, but
that would make it harder to merge newer versions of zfs, so for now
I've just expanded those parts further in-line.


To generate a diff of this commit:
cvs rdiff -u -r1.1.1.2 -r1.2 \
    src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c
cvs rdiff -u -r1.1.1.1 -r1.2 \
    src/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c
diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c:1.1.1.2 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c:1.2
--- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c:1.1.1.2	Sat Feb 27 22:31:26 2010
+++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_rlock.c	Mon Oct 15 14:15:59 2012
@@ -94,6 +94,33 @@
 
 #include <sys/zfs_rlock.h>
 
+static int
+zfs_range_lock_hold(rl_t *rl)
+{
+
+	KASSERT(mutex_owned(&rl->r_zp->z_range_lock));
+
+	if (rl->r_refcnt >= ULONG_MAX)
+		return (ENFILE); /* XXX What to do?  */
+
+	rl->r_refcnt++;
+	return (0);
+}
+
+static void
+zfs_range_lock_rele(rl_t *rl)
+{
+
+	KASSERT(mutex_owned(&rl->r_zp->z_range_lock));
+	KASSERT(rl->r_refcnt > 0);
+
+	if (--rl->r_refcnt == 0) {
+		cv_destroy(&rl->r_wr_cv);
+		cv_destroy(&rl->r_rd_cv);
+		kmem_free(rl, sizeof (rl_t));
+	}
+}
+
 /*
  * Check if a write lock can be grabbed, or wait and recheck until available.
  */
@@ -169,10 +196,12 @@ zfs_range_lock_writer(znode_t *zp, rl_t 
 		return;
 wait:
 		if (!rl->r_write_wanted) {
-			cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
 			rl->r_write_wanted = B_TRUE;
 		}
+		if (zfs_range_lock_hold(rl) != 0)
+			panic("too many waiters on zfs range lock %p", rl);
 		cv_wait(&rl->r_wr_cv, &zp->z_range_lock);
+		zfs_range_lock_rele(rl);
 
 		/* reset to original */
 		new->r_off = off;
@@ -205,8 +234,11 @@ zfs_range_proxify(avl_tree_t *tree, rl_t
 	proxy->r_cnt = 1;
 	proxy->r_type = RL_READER;
 	proxy->r_proxy = B_TRUE;
+	cv_init(&proxy->r_wr_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&proxy->r_rd_cv, NULL, CV_DEFAULT, NULL);
 	proxy->r_write_wanted = B_FALSE;
 	proxy->r_read_wanted = B_FALSE;
+	proxy->r_refcnt = 1;
 	avl_add(tree, proxy);
 
 	return (proxy);
@@ -234,6 +266,9 @@ zfs_range_split(avl_tree_t *tree, rl_t *
 	rear->r_cnt = rl->r_cnt;
 	rear->r_type = RL_READER;
 	rear->r_proxy = B_TRUE;
+	cv_init(&rear->r_wr_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&rear->r_rd_cv, NULL, CV_DEFAULT, NULL);
+	rear->r_refcnt = 1;
 	rear->r_write_wanted = B_FALSE;
 	rear->r_read_wanted = B_FALSE;
 
@@ -259,8 +294,11 @@ zfs_range_new_proxy(avl_tree_t *tree, ui
 	rl->r_cnt = 1;
 	rl->r_type = RL_READER;
 	rl->r_proxy = B_TRUE;
+	cv_init(&rl->r_wr_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&rl->r_rd_cv, NULL, CV_DEFAULT, NULL);
 	rl->r_write_wanted = B_FALSE;
 	rl->r_read_wanted = B_FALSE;
+	rl->r_refcnt = 1;
 	avl_add(tree, rl);
 }
 
@@ -372,10 +410,13 @@ retry:
 	if (prev && (off < prev->r_off + prev->r_len)) {
 		if ((prev->r_type == RL_WRITER) || (prev->r_write_wanted)) {
 			if (!prev->r_read_wanted) {
-				cv_init(&prev->r_rd_cv, NULL, CV_DEFAULT, NULL);
 				prev->r_read_wanted = B_TRUE;
 			}
+			if (zfs_range_lock_hold(prev) != 0)
+				panic("too many waiters on zfs range lock %p",
+				    prev);
 			cv_wait(&prev->r_rd_cv, &zp->z_range_lock);
+			zfs_range_lock_rele(prev);
 			goto retry;
 		}
 		if (off + len < prev->r_off + prev->r_len)
@@ -395,10 +436,13 @@ retry:
 			goto got_lock;
 		if ((next->r_type == RL_WRITER) || (next->r_write_wanted)) {
 			if (!next->r_read_wanted) {
-				cv_init(&next->r_rd_cv, NULL, CV_DEFAULT, NULL);
 				next->r_read_wanted = B_TRUE;
 			}
+			if (zfs_range_lock_hold(next) != 0)
+				panic("too many waiters on zfs range lock %p",
+				    next);
 			cv_wait(&next->r_rd_cv, &zp->z_range_lock);
+			zfs_range_lock_rele(next);
 			goto retry;
 		}
 		if (off + len <= next->r_off + next->r_len)
@@ -435,20 +479,25 @@ zfs_range_lock(znode_t *zp, uint64_t off
 	new->r_cnt = 1; /* assume it's going to be in the tree */
 	new->r_type = type;
 	new->r_proxy = B_FALSE;
+	cv_init(&new->r_wr_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&new->r_rd_cv, NULL, CV_DEFAULT, NULL);
 	new->r_write_wanted = B_FALSE;
 	new->r_read_wanted = B_FALSE;
+	new->r_refcnt = 1;
 
 	mutex_enter(&zp->z_range_lock);
 	if (type == RL_READER) {
 		/*
 		 * First check for the usual case of no locks
 		 */
-		if (avl_numnodes(&zp->z_range_avl) == 0)
+		if (avl_numnodes(&zp->z_range_avl) == 0) {
 			avl_add(&zp->z_range_avl, new);
-		else
+		} else {
 			zfs_range_lock_reader(zp, new);
-	} else
+		}
+	} else {
 		zfs_range_lock_writer(zp, new); /* RL_WRITER or RL_APPEND */
+	}
 	mutex_exit(&zp->z_range_lock);
 	return (new);
 }
@@ -474,11 +523,9 @@ zfs_range_unlock_reader(znode_t *zp, rl_
 		avl_remove(tree, remove);
 		if (remove->r_write_wanted) {
 			cv_broadcast(&remove->r_wr_cv);
-			cv_destroy(&remove->r_wr_cv);
 		}
 		if (remove->r_read_wanted) {
 			cv_broadcast(&remove->r_rd_cv);
-			cv_destroy(&remove->r_rd_cv);
 		}
 	} else {
 		ASSERT3U(remove->r_cnt, ==, 0);
@@ -507,17 +554,15 @@ zfs_range_unlock_reader(znode_t *zp, rl_
 				avl_remove(tree, rl);
 				if (rl->r_write_wanted) {
 					cv_broadcast(&rl->r_wr_cv);
-					cv_destroy(&rl->r_wr_cv);
 				}
 				if (rl->r_read_wanted) {
 					cv_broadcast(&rl->r_rd_cv);
-					cv_destroy(&rl->r_rd_cv);
 				}
-				kmem_free(rl, sizeof (rl_t));
+				zfs_range_lock_rele(rl);
 			}
 		}
 	}
-	kmem_free(remove, sizeof (rl_t));
+	zfs_range_lock_rele(remove);
 }
 
 /*
@@ -536,16 +581,14 @@ zfs_range_unlock(rl_t *rl)
 	if (rl->r_type == RL_WRITER) {
 		/* writer locks can't be shared or split */
 		avl_remove(&zp->z_range_avl, rl);
-		mutex_exit(&zp->z_range_lock);
 		if (rl->r_write_wanted) {
 			cv_broadcast(&rl->r_wr_cv);
-			cv_destroy(&rl->r_wr_cv);
 		}
 		if (rl->r_read_wanted) {
 			cv_broadcast(&rl->r_rd_cv);
-			cv_destroy(&rl->r_rd_cv);
 		}
-		kmem_free(rl, sizeof (rl_t));
+		zfs_range_lock_rele(rl);
+		mutex_exit(&zp->z_range_lock);
 	} else {
 		/*
 		 * lock may be shared, let zfs_range_unlock_reader()
@@ -577,11 +620,11 @@ zfs_range_reduce(rl_t *rl, uint64_t off,
 	mutex_enter(&zp->z_range_lock);
 	rl->r_off = off;
 	rl->r_len = len;
-	mutex_exit(&zp->z_range_lock);
 	if (rl->r_write_wanted)
 		cv_broadcast(&rl->r_wr_cv);
 	if (rl->r_read_wanted)
 		cv_broadcast(&rl->r_rd_cv);
+	mutex_exit(&zp->z_range_lock);
 }
 
 /*

Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h
diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h:1.1.1.1 src/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h:1.2
--- src/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h:1.1.1.1	Fri Aug  7 18:33:43 2009
+++ src/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_rlock.h	Mon Oct 15 14:15:59 2012
@@ -54,6 +54,7 @@ typedef struct rl {
 	uint8_t r_proxy;	/* acting for original range */
 	uint8_t r_write_wanted;	/* writer wants to lock this range */
 	uint8_t r_read_wanted;	/* reader wants to lock this range */
+	unsigned long r_refcnt; /* reference count for cv waits */
 } rl_t;
 
 /*

Reply via email to