Re: [OpenZFS Developer] zfsvfs->z_teardown_lock congestion

Alexander Motin Sun, 08 Jun 2014 13:55:30 -0700

On 08.06.2014 21:32, Alexander Motin wrote:

On 07.06.2014 22:35, Matthew Ahrens wrote:

code review:


zfsvfs_hold/rele need to pass the correct tags through to rrw_enter().
The existing code will not work (if reference_tracking_enable), because
you will be holding with tag "zfsvfs_hold" and releasing with
"zfsvfs_rele". The existing code holds and releases with tag e.g.
"zfs_prop_set_userquota"; that needs to remain the case.


Indeed there was a bug, thank you.

I think it would be cleaner to encapsulate the multitude of locks within
the rrw code, but I can live with it as is.


I had such thought initially, but then decided it will be too big mess
to do it inside. Also it may not always be needed. But now you've asked,
I've made intermediate solution: let me introduce Recursable Read-Mostly
locks, as a wrapper on top of existing Recursable Read-Write locks. :)

The attached patch implements rrmlock, and uses them for teardown lock.
It indeed looks much better. Thanks for pushing me to do it. :) What do
you think?

I'm sorry, I've missed a bug until built kernel with debugging. Fixedversion is attached.


--
Alexander Motin

Index: rrwlock.c
===================================================================
--- rrwlock.c   (revision 267215)
+++ rrwlock.c   (working copy)
@@ -286,3 +286,73 @@ rrw_tsd_destroy(void *arg)
                    (void *)curthread, (void *)rn->rn_rrl);
        }
 }
+
+
+void
+rrm_init(rrmlock_t *rrl, boolean_t track_all)
+{
+       int i;
+
+       for (i = 0; i < RRM_NUM_LOCKS; i++)
+               rrw_init(&rrl->locks[i], track_all);
+}
+
+void
+rrm_destroy(rrmlock_t *rrl)
+{
+       int i;
+
+       for (i = 0; i < RRM_NUM_LOCKS; i++)
+               rrw_destroy(&rrl->locks[i]);
+}
+
+void
+rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag)
+{
+       if (rw == RW_READER)
+               rrm_enter_read(rrl, tag);
+       else
+               rrm_enter_write(rrl);
+}
+
+#define        RRM_TD_LOCK()   (((uint32_t)(uintptr_t)(curthread)) % 
RRM_NUM_LOCKS)
+
+void
+rrm_enter_read(rrmlock_t *rrl, void *tag)
+{
+
+       rrw_enter_read(&rrl->locks[RRM_TD_LOCK()], tag);
+}
+
+void
+rrm_enter_write(rrmlock_t *rrl)
+{
+       int i;
+
+       for (i = 0; i < RRM_NUM_LOCKS; i++)
+               rrw_enter_write(&rrl->locks[i]);
+}
+
+void
+rrm_exit(rrmlock_t *rrl, void *tag)
+{
+       int i;
+
+       if (rrl->locks[0].rr_writer == curthread) {
+               for (i = 0; i < RRM_NUM_LOCKS; i++)
+                       rrw_exit(&rrl->locks[i], tag);
+       } else {
+               rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag);
+       }
+}
+
+boolean_t
+rrm_held(rrmlock_t *rrl, krw_t rw)
+{
+
+       if (rw == RW_WRITER) {
+               return (rrw_held(&rrl->locks[0], rw));
+       } else {
+               return (rrw_held(&rrl->locks[RRM_TD_LOCK()], rw));
+       }
+}
Index: sys/rrwlock.h
===================================================================
--- sys/rrwlock.h       (revision 267215)
+++ sys/rrwlock.h       (working copy)
@@ -79,6 +79,24 @@ void rrw_tsd_destroy(void *arg);
 #define        RRW_LOCK_HELD(x) \
        (rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
 
+#define        RRM_NUM_LOCKS           17
+typedef struct rrmlock {
+       rrwlock_t       locks[RRM_NUM_LOCKS];
+} rrmlock_t;
+
+void rrm_init(rrmlock_t *rrl, boolean_t track_all);
+void rrm_destroy(rrmlock_t *rrl);
+void rrm_enter(rrmlock_t *rrl, krw_t rw, void *tag);
+void rrm_enter_read(rrmlock_t *rrl, void *tag);
+void rrm_enter_write(rrmlock_t *rrl);
+void rrm_exit(rrmlock_t *rrl, void *tag);
+boolean_t rrm_held(rrmlock_t *rrl, krw_t rw);
+
+#define        RRM_READ_HELD(x)        rrm_held(x, RW_READER)
+#define        RRM_WRITE_HELD(x)       rrm_held(x, RW_WRITER)
+#define        RRM_LOCK_HELD(x) \
+       (rrm_held(x, RW_WRITER) || rrm_held(x, RW_READER))
+
 #ifdef __cplusplus
 }
 #endif
Index: sys/zfs_vfsops.h
===================================================================
--- sys/zfs_vfsops.h    (revision 267215)
+++ sys/zfs_vfsops.h    (working copy)
@@ -64,7 +64,7 @@ struct zfsvfs {
        int             z_norm;         /* normalization flags */
        boolean_t       z_atime;        /* enable atimes mount option */
        boolean_t       z_unmounted;    /* unmounted */
-       rrwlock_t       z_teardown_lock;
+       rrmlock_t       z_teardown_lock;
        krwlock_t       z_teardown_inactive_lock;
        list_t          z_all_znodes;   /* all vnodes in the fs */
        kmutex_t        z_znodes_lock;  /* lock for z_all_znodes */
Index: sys/zfs_znode.h
===================================================================
--- sys/zfs_znode.h     (revision 267215)
+++ sys/zfs_znode.h     (working copy)
@@ -256,7 +256,7 @@ VTOZ(vnode_t *vp)
 /* Called on entry to each ZFS vnode and vfs operation  */
 #define        ZFS_ENTER(zfsvfs) \
        { \
-               rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
+               rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
                if ((zfsvfs)->z_unmounted) { \
                        ZFS_EXIT(zfsvfs); \
                        return (EIO); \
@@ -265,10 +265,10 @@ VTOZ(vnode_t *vp)
 
 /* Called on entry to each ZFS vnode and vfs operation that can not return EIO 
*/
 #define        ZFS_ENTER_NOERROR(zfsvfs) \
-       rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG)
+       rrm_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG)
 
 /* Must be called before exiting the vop */
-#define        ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
+#define        ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
 /* Verifies the znode is valid */
 #define        ZFS_VERIFY_ZP(zp) \
Index: zfs_ioctl.c
===================================================================
--- zfs_ioctl.c (revision 267215)
+++ zfs_ioctl.c (working copy)
@@ -1464,7 +1464,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t
        if (getzfsvfs(name, zfvp) != 0)
                error = zfsvfs_create(name, zfvp);
        if (error == 0) {
-               rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
+               rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
                    RW_READER, tag);
                if ((*zfvp)->z_unmounted) {
                        /*
@@ -1472,7 +1472,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t
                         * thread should be just about to disassociate the
                         * objset from the zfsvfs.
                         */
-                       rrw_exit(&(*zfvp)->z_teardown_lock, tag);
+                       rrm_exit(&(*zfvp)->z_teardown_lock, tag);
                        return (SET_ERROR(EBUSY));
                }
        }
@@ -1482,7 +1482,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t
 static void
 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
 {
-       rrw_exit(&zfsvfs->z_teardown_lock, tag);
+       rrm_exit(&zfsvfs->z_teardown_lock, tag);
 
        if (zfsvfs->z_vfs) {
                VFS_RELE(zfsvfs->z_vfs);
Index: zfs_vfsops.c
===================================================================
--- zfs_vfsops.c        (revision 267215)
+++ zfs_vfsops.c        (working copy)
@@ -988,7 +988,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
        mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
        list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
            offsetof(znode_t, z_link_node));
-       rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
+       rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
        rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
        rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
        for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1104,7 +1104,7 @@ zfsvfs_free(zfsvfs_t *zfsvfs)
        mutex_destroy(&zfsvfs->z_znodes_lock);
        mutex_destroy(&zfsvfs->z_lock);
        list_destroy(&zfsvfs->z_all_znodes);
-       rrw_destroy(&zfsvfs->z_teardown_lock);
+       rrm_destroy(&zfsvfs->z_teardown_lock);
        rw_destroy(&zfsvfs->z_teardown_inactive_lock);
        rw_destroy(&zfsvfs->z_fuid_lock);
        for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@@ -1833,7 +1833,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmoun
 {
        znode_t *zp;
 
-       rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+       rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
 
        if (!unmounting) {
                /*
@@ -1866,7 +1866,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmoun
         */
        if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
                rw_exit(&zfsvfs->z_teardown_inactive_lock);
-               rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+               rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
                return (SET_ERROR(EIO));
        }
 
@@ -1893,7 +1893,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmoun
         */
        if (unmounting) {
                zfsvfs->z_unmounted = B_TRUE;
-               rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+               rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
                rw_exit(&zfsvfs->z_teardown_inactive_lock);
        }
 
@@ -1970,9 +1970,9 @@ zfs_umount(vfs_t *vfsp, int fflag)
                 * vflush(FORCECLOSE). This way we ensure no future vnops
                 * will be called and risk operating on DOOMED vnodes.
                 */
-               rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
+               rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
                zfsvfs->z_unmounted = B_TRUE;
-               rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+               rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
        }
 
        /*
@@ -2240,7 +2240,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname
        znode_t *zp;
        uint64_t sa_obj = 0;
 
-       ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
+       ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
        ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
 
        /*
@@ -2296,7 +2296,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname
 bail:
        /* release the VOPs */
        rw_exit(&zfsvfs->z_teardown_inactive_lock);
-       rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
+       rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 
        if (err) {
                /*
Index: zfs_znode.c
===================================================================
--- zfs_znode.c (revision 267215)
+++ zfs_znode.c (working copy)
@@ -276,7 +276,7 @@ zfs_znode_move(void *buf, void *newbuf, size_t siz
         * can safely ensure that the filesystem is not and will not be
         * unmounted. The next statement is equivalent to ZFS_ENTER().
         */
-       rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
+       rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
        if (zfsvfs->z_unmounted) {
                ZFS_EXIT(zfsvfs);
                rw_exit(&zfsvfs_lock);

_______________________________________________
developer mailing list
[email protected]
http://lists.open-zfs.org/mailman/listinfo/developer

Re: [OpenZFS Developer] zfsvfs->z_teardown_lock congestion

Reply via email to