Add rcu_sync_struct->exclusive boolean set by rcu_sync_init(), it
obviously controls the exclusiveness of rcu_sync_enter(). This is
what percpu_down_write() actually wants.

We turn ->gp_wait into "struct completion gp_comp", it is used as
a resource counter in "exclusive" mode. Otherwise we only use its
completion->wait member for wait_event/wake_up_all. We never mix
the completion/wait_queue_head_t operations.

TODO: we can cleanup this logic and avoid "struct completion", but
this needs a bit more changes.

Signed-off-by: Oleg Nesterov <o...@redhat.com>
---
 include/linux/percpu-rwsem.h  |    2 +-
 include/linux/rcusync.h       |   29 ++++++++++++++++-------------
 kernel/locking/percpu-rwsem.c |    2 +-
 kernel/rcu/sync.c             |   25 ++++++++++++++++++++-----
 4 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index e12ce86..9202e73 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -21,7 +21,7 @@ static DEFINE_PER_CPU(unsigned int, 
__percpu_rwsem_refcount_##name);  \
 static struct percpu_rw_semaphore name = {                             \
        .refcount = &__percpu_rwsem_refcount_##name,                    \
        .state = 0,                                                     \
-       .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),        \
+       .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC, 1),     \
        .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),           \
        .rw_sem = __RWSEM_INITIALIZER(name.rw_sem),                     \
 }
diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h
index 0135838..aaea86a 100644
--- a/include/linux/rcusync.h
+++ b/include/linux/rcusync.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_RCUSYNC_H_
 #define _LINUX_RCUSYNC_H_
 
-#include <linux/wait.h>
+#include <linux/completion.h>
 #include <linux/rcupdate.h>
 
 enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
@@ -9,11 +9,12 @@ enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
 struct rcu_sync_struct {
        int                     gp_state;
        int                     gp_count;
-       wait_queue_head_t       gp_wait;
+       struct completion       gp_comp;
 
        int                     cb_state;
        struct rcu_head         cb_head;
 
+       bool                    exclusive;
        enum rcu_sync_type      gp_type;
 };
 
@@ -28,30 +29,32 @@ static inline bool rcu_sync_is_idle(struct rcu_sync_struct 
*rss)
 #endif
 }
 
-extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type);
+extern void rcu_sync_init(struct rcu_sync_struct *,
+                               enum rcu_sync_type, bool excl);
 extern void rcu_sync_enter(struct rcu_sync_struct *);
 extern void rcu_sync_exit(struct rcu_sync_struct *);
 extern void rcu_sync_dtor(struct rcu_sync_struct *);
 
-#define __RCU_SYNC_INITIALIZER(name, type) {                           \
+#define __RCU_SYNC_INITIALIZER(name, type, excl) {                     \
                .gp_state = 0,                                          \
                .gp_count = 0,                                          \
-               .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
+               .gp_comp = COMPLETION_INITIALIZER(name.gp_comp),        \
                .cb_state = 0,                                          \
+               .exclusive = excl,                                      \
                .gp_type = type,                                        \
        }
 
-#define        __DEFINE_RCU_SYNC(name, type)   \
-       struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
+#define        __DEFINE_RCU_SYNC(name, type, excl)     \
+       struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type, excl)
 
-#define DEFINE_RCU_SYNC(name)          \
-       __DEFINE_RCU_SYNC(name, RCU_SYNC)
+#define DEFINE_RCU_SYNC(name, excl)            \
+       __DEFINE_RCU_SYNC(name, RCU_SYNC, excl)
 
-#define DEFINE_RCU_SCHED_SYNC(name)    \
-       __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
+#define DEFINE_RCU_SCHED_SYNC(name, excl)      \
+       __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC, excl)
 
-#define DEFINE_RCU_BH_SYNC(name)       \
-       __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+#define DEFINE_RCU_BH_SYNC(name, excl) \
+       __DEFINE_RCU_SYNC(name, RCU_BH_SYNC, excl)
 
 #endif /* _LINUX_RCUSYNC_H_ */
 
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 915646c..014d2f4 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
                return -ENOMEM;
 
        sem->state = readers_slow;
-       rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+       rcu_sync_init(&sem->rss, RCU_SCHED_SYNC, true);
        init_waitqueue_head(&sem->writer);
        __init_rwsem(&sem->rw_sem, name, rwsem_key);
 
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 8835ad1..03ddc61 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -38,7 +38,8 @@ static const struct {
 enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
 enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
-#define        rss_lock        gp_wait.lock
+#define        rss_lock        gp_comp.wait.lock
+#define        gp_wait         gp_comp.wait
 
 #ifdef CONFIG_PROVE_RCU
 bool __rcu_sync_is_idle(struct rcu_sync_struct *rss)
@@ -49,10 +50,12 @@ bool __rcu_sync_is_idle(struct rcu_sync_struct *rss)
 EXPORT_SYMBOL_GPL(__rcu_sync_is_idle);
 #endif
 
-void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type)
+void rcu_sync_init(struct rcu_sync_struct *rss,
+                       enum rcu_sync_type type, bool excl)
 {
        memset(rss, 0, sizeof(*rss));
-       init_waitqueue_head(&rss->gp_wait);
+       init_completion(&rss->gp_comp);
+       rss->exclusive = excl;
        rss->gp_type = type;
 }
 
@@ -72,9 +75,13 @@ void rcu_sync_enter(struct rcu_sync_struct *rss)
        if (need_sync) {
                gp_ops[rss->gp_type].sync();
                rss->gp_state = GP_PASSED;
-               wake_up_all(&rss->gp_wait);
+               if (!rss->exclusive)
+                       wake_up_all(&rss->gp_wait);
        } else if (need_wait) {
-               wait_event(rss->gp_wait, rss->gp_state == GP_PASSED);
+               if (!rss->exclusive)
+                       wait_event(rss->gp_wait, rss->gp_state == GP_PASSED);
+               else
+                       wait_for_completion(&rss->gp_comp);
        } else {
                /*
                 * Possible when there's a pending CB from a rcu_sync_exit().
@@ -119,6 +126,12 @@ static void rcu_sync_func(struct rcu_head *rcu)
        spin_unlock_irqrestore(&rss->rss_lock, flags);
 }
 
+static inline void __complete_locked(struct completion *x)
+{
+       x->done++;
+       __wake_up_locked(&x->wait, TASK_NORMAL, 1);
+}
+
 void rcu_sync_exit(struct rcu_sync_struct *rss)
 {
        spin_lock_irq(&rss->rss_lock);
@@ -129,6 +142,8 @@ void rcu_sync_exit(struct rcu_sync_struct *rss)
                } else if (rss->cb_state == CB_PENDING) {
                        rss->cb_state = CB_REPLAY;
                }
+       } else if (rss->exclusive) {
+               __complete_locked(&rss->gp_comp);
        }
        spin_unlock_irq(&rss->rss_lock);
 }
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to