__sb_start/end_write() can use percpu_down/up_read().
---
 fs/super.c         |  135 +++++++++++++++++-----------------------------------
 include/linux/fs.h |   14 +----
 2 files changed, 47 insertions(+), 102 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 94303fc..6e336b8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -147,7 +147,7 @@ static void destroy_super(struct super_block *s)
        list_lru_destroy(&s->s_dentry_lru);
        list_lru_destroy(&s->s_inode_lru);
        for (i = 0; i < SB_FREEZE_LEVELS; i++)
-               percpu_counter_destroy(&s->s_writers.counter[i]);
+               percpu_free_rwsem(s->s_writers.rw_sem + i);
        security_sb_free(s);
        WARN_ON(!list_empty(&s->s_mounts));
        kfree(s->s_subtype);
@@ -178,14 +178,10 @@ static struct super_block *alloc_super(struct 
file_system_type *type, int flags)
                goto fail;
 
        for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-               if (percpu_counter_init(&s->s_writers.counter[i], 0,
-                                       GFP_KERNEL) < 0)
-                       goto fail;
-               lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
-                                &type->s_writers_key[i], 0);
+               __percpu_init_rwsem(&s->s_writers.rw_sem[i],
+                                       sb_writers_name[i],
+                                       &type->s_writers_key[i]);
        }
-       init_waitqueue_head(&s->s_writers.wait);
-       init_waitqueue_head(&s->s_writers.wait_unfrozen);
        s->s_bdi = &noop_backing_dev_info;
        s->s_flags = flags;
        INIT_HLIST_NODE(&s->s_instances);
@@ -1146,43 +1142,10 @@ out:
  */
 void __sb_end_write(struct super_block *sb, int level)
 {
-       percpu_counter_dec(&sb->s_writers.counter[level-1]);
-       /*
-        * Make sure s_writers are updated before we wake up waiters in
-        * freeze_super().
-        */
-       smp_mb();
-       if (waitqueue_active(&sb->s_writers.wait))
-               wake_up(&sb->s_writers.wait);
-       rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+       percpu_up_read(sb->s_writers.rw_sem + level-1);
 }
 EXPORT_SYMBOL(__sb_end_write);
 
-#ifdef CONFIG_LOCKDEP
-/*
- * We want lockdep to tell us about possible deadlocks with freezing but
- * it's it bit tricky to properly instrument it. Getting a freeze protection
- * works as getting a read lock but there are subtle problems. XFS for example
- * gets freeze protection on internal level twice in some cases, which is OK
- * only because we already hold a freeze protection also on higher level. Due
- * to these cases we have to tell lockdep we are doing trylock when we
- * already hold a freeze protection for a higher freeze level.
- */
-static void acquire_freeze_lock(struct super_block *sb, int level, bool 
trylock,
-                               unsigned long ip)
-{
-       int i;
-
-       if (!trylock) {
-               for (i = 0; i < level - 1; i++)
-                       if (lock_is_held(&sb->s_writers.lock_map[i])) {
-                               trylock = true;
-                               break;
-                       }
-       }
-       rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
-}
-#endif
 
 /*
  * This is an internal function, please use sb_start_{write,pagefault,intwrite}
@@ -1190,27 +1153,15 @@ static void acquire_freeze_lock(struct super_block *sb, 
int level, bool trylock,
  */
 int __sb_start_write(struct super_block *sb, int level, bool wait)
 {
-retry:
-       if (unlikely(sb->s_writers.frozen >= level)) {
-               if (!wait)
-                       return 0;
-               wait_event(sb->s_writers.wait_unfrozen,
-                          sb->s_writers.frozen < level);
-       }
-
-#ifdef CONFIG_LOCKDEP
-       acquire_freeze_lock(sb, level, !wait, _RET_IP_);
-#endif
-       percpu_counter_inc(&sb->s_writers.counter[level-1]);
        /*
-        * Make sure counter is updated before we check for frozen.
-        * freeze_super() first sets frozen and then checks the counter.
+        * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        * !!!!!!!!! percpu_down_read_trylock() wasn't merged yet !!!!!!!!!!!!!
+        * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
         */
-       smp_mb();
-       if (unlikely(sb->s_writers.frozen >= level)) {
-               __sb_end_write(sb, level);
-               goto retry;
-       }
+       if (!wait)
+               return 0;
+
+       percpu_down_read(sb->s_writers.rw_sem + level-1);
        return 1;
 }
 EXPORT_SYMBOL(__sb_start_write);
@@ -1227,42 +1178,46 @@ EXPORT_SYMBOL(__sb_start_write);
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
-       s64 writers;
-
-       rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
-
-       do {
-               DEFINE_WAIT(wait);
-
-               /*
-                * We use a barrier in prepare_to_wait() to separate setting
-                * of frozen and checking of the counter
-                */
-               prepare_to_wait(&sb->s_writers.wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
-
-               writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
-               if (writers)
-                       schedule();
-
-               finish_wait(&sb->s_writers.wait, &wait);
-       } while (writers);
+       percpu_down_write(sb->s_writers.rw_sem + level-1);
 }
 
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * !!!!!!!!!!! Move this code into kernel/locking/percpu-rwsem.c !!!!!!!!!!!!!!
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+#include "../kernel/locking/rwsem.h"
 /* Avoid the warning from lockdep_sys_exit() */
 static void sb_lockdep_release(struct super_block *sb)
 {
        int level;
+       struct percpu_rw_semaphore *sem;
+
+       for (level = 0; level < SB_FREEZE_LEVELS; ++level) {
+               sem = sb->s_writers.rw_sem + level;
+               rwsem_clear_owner(&sem->rw_sem);
+               rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+       }
+}
+
+static void sb_lockdep_acquire(struct super_block *sb)
+{
+       int level;
+       struct percpu_rw_semaphore *sem;
 
        for (level = 0; level < SB_FREEZE_LEVELS; ++level) {
-               rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
+               sem = sb->s_writers.rw_sem + level;
+               rwsem_set_owner(&sem->rw_sem); /* unneeded */
+               rwsem_acquire(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
        }
 }
 
 static void sb_unlock_frozen(struct super_block *sb)
 {
-       smp_wmb();
-       wake_up(&sb->s_writers.wait_unfrozen);
+       int level;
+
+       for (level = 0; level < SB_FREEZE_LEVELS; ++level)
+               percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
 /**
@@ -1323,8 +1278,6 @@ int freeze_super(struct super_block *sb)
 
        /* From now on, no new normal writers can start */
        sb->s_writers.frozen = SB_FREEZE_WRITE;
-       smp_wmb();
-
        /* Release s_umount to preserve sb_start_write -> s_umount ordering */
        up_write(&sb->s_umount);
 
@@ -1332,9 +1285,8 @@ int freeze_super(struct super_block *sb)
 
        /* Now we go and block page faults... */
        down_write(&sb->s_umount);
-       sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
-       smp_wmb();
 
+       sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
        sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 
        /* All writers are done so after syncing there won't be dirty data */
@@ -1342,7 +1294,6 @@ int freeze_super(struct super_block *sb)
 
        /* Now wait for internal filesystem counter */
        sb->s_writers.frozen = SB_FREEZE_FS;
-       smp_wmb();
        sb_wait_write(sb, SB_FREEZE_FS);
 
        if (sb->s_op->freeze_fs) {
@@ -1350,8 +1301,8 @@ int freeze_super(struct super_block *sb)
                if (ret) {
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
-                       sb->s_writers.frozen = SB_UNFROZEN;
                        sb_unlock_frozen(sb);
+                       sb->s_writers.frozen = SB_UNFROZEN;
                        deactivate_locked_super(sb);
                        return ret;
                }
@@ -1386,6 +1337,8 @@ int thaw_super(struct super_block *sb)
        if (sb->s_flags & MS_RDONLY)
                goto out;
 
+       sb_lockdep_acquire(sb);
+
        if (sb->s_op->unfreeze_fs) {
                error = sb->s_op->unfreeze_fs(sb);
                if (error) {
@@ -1396,9 +1349,9 @@ int thaw_super(struct super_block *sb)
                }
        }
 
+       sb_unlock_frozen(sb);
 out:
        sb->s_writers.frozen = SB_UNFROZEN;
-       sb_unlock_frozen(sb);
        deactivate_locked_super(sb);
 
        return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e..314e2d0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_FS_H
 #define _LINUX_FS_H
 
-
 #include <linux/linkage.h>
 #include <linux/wait.h>
 #include <linux/kdev_t.h>
@@ -30,6 +29,7 @@
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
+#include <linux/percpu-rwsem.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1246,16 +1246,8 @@ enum {
 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
 
 struct sb_writers {
-       /* Counters for counting writers at each level */
-       struct percpu_counter   counter[SB_FREEZE_LEVELS];
-       wait_queue_head_t       wait;           /* queue for waiting for
-                                                  writers / faults to finish */
-       int                     frozen;         /* Is sb frozen? */
-       wait_queue_head_t       wait_unfrozen;  /* queue for waiting for
-                                                  sb to be thawed */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-       struct lockdep_map      lock_map[SB_FREEZE_LEVELS];
-#endif
+       int                             frozen;         /* Is sb frozen? */
+       struct percpu_rw_semaphore      rw_sem[SB_FREEZE_LEVELS];
 };
 
 struct super_block {
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to