[PATCH 3/3] percpu-rwsem: introduce percpu_rw_semaphore->recursive mode

2015-06-28 Thread Oleg Nesterov
Add percpu_rw_semaphore->recursive boolean. If it is true then the
recursive percpu_down_read() is safe, percpu_down_write() doesn't
exclude the new readers, like cpu_hotplug_begin().

Signed-off-by: Oleg Nesterov 
---
 include/linux/percpu-rwsem.h  |   15 ++-
 kernel/events/uprobes.c   |2 +-
 kernel/locking/percpu-rwsem.c |   15 +++
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 9202e73..9441abd 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -13,16 +13,18 @@ struct percpu_rw_semaphore {
int state;
struct rcu_sync_struct  rss;
wait_queue_head_t   writer;
+   boolrecursive;
struct rw_semaphore rw_sem;
 };
 
-#define DEFINE_STATIC_PERCPU_RWSEM(name)   \
+#define DEFINE_STATIC_PERCPU_RWSEM(name, rec)  \
 static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_refcount_##name);   \
 static struct percpu_rw_semaphore name = { \
.refcount = &__percpu_rwsem_refcount_##name,\
.state = 0, \
.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC, 1), \
.writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),   \
+   .recursive = rec,   \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
 }
 
@@ -37,7 +39,10 @@ static inline void percpu_down_read(struct 
percpu_rw_semaphore *sem)
 {
might_sleep();
 
-   rwsem_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_);
+   if (sem->recursive)
+   rwlock_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_);
+   else
+   rwsem_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_);
 
preempt_disable();
/*
@@ -97,14 +102,14 @@ static inline void percpu_up_read(struct 
percpu_rw_semaphore *sem)
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
+extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, bool,
const char *, struct lock_class_key *);
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
-#define percpu_init_rwsem(sem) \
+#define percpu_init_rwsem(sem, recursive)  \
 ({ \
static struct lock_class_key rwsem_key; \
-   __percpu_init_rwsem(sem, #sem, _key); \
+   __percpu_init_rwsem(sem, recursive, #sem, _key);  \
 })
 
 #endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index cb346f2..a4813a1 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1985,7 +1985,7 @@ static int __init init_uprobes(void)
for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(_mmap_mutex[i]);
 
-   if (percpu_init_rwsem(_mmap_sem))
+   if (percpu_init_rwsem(_mmap_sem, false))
return -ENOMEM;
 
return register_die_notifier(_exception_nb);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 609c13b..3db7c45 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -10,7 +10,7 @@
 
 enum { readers_slow, readers_block };
 
-int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
+int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, bool recursive,
const char *name, struct lock_class_key *rwsem_key)
 {
sem->refcount = alloc_percpu(unsigned int);
@@ -20,6 +20,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
sem->state = readers_slow;
rcu_sync_init(>rss, RCU_SCHED_SYNC, true);
init_waitqueue_head(>writer);
+   sem->recursive = recursive;
__init_rwsem(>rw_sem, name, rwsem_key);
 
return 0;
@@ -124,9 +125,15 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
  */
 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 {
-   if (per_cpu_sum(*sem->refcount) != 0)
+   if (sem->recursive && !down_write_trylock(>rw_sem))
return false;
 
+   if (per_cpu_sum(*sem->refcount) != 0) {
+   if (sem->recursive)
+   up_write(>rw_sem);
+   return false;
+   }
+
/*
 * If we observed the decrement; ensure we see the entire critical
 * section.
@@ -155,8 +162,8 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
 * then we are guaranteed to see their sem->refcount increment, and
 * therefore will wait for them.
 */
-
-   down_write(>rw_sem);
+   if (!sem->recursive)
+   

[PATCH 3/3] percpu-rwsem: introduce percpu_rw_semaphore-recursive mode

2015-06-28 Thread Oleg Nesterov
Add percpu_rw_semaphore-recursive boolean. If it is true then the
recursive percpu_down_read() is safe, percpu_down_write() doesn't
exclude the new readers, like cpu_hotplug_begin().

Signed-off-by: Oleg Nesterov o...@redhat.com
---
 include/linux/percpu-rwsem.h  |   15 ++-
 kernel/events/uprobes.c   |2 +-
 kernel/locking/percpu-rwsem.c |   15 +++
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 9202e73..9441abd 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -13,16 +13,18 @@ struct percpu_rw_semaphore {
int state;
struct rcu_sync_struct  rss;
wait_queue_head_t   writer;
+   boolrecursive;
struct rw_semaphore rw_sem;
 };
 
-#define DEFINE_STATIC_PERCPU_RWSEM(name)   \
+#define DEFINE_STATIC_PERCPU_RWSEM(name, rec)  \
 static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_refcount_##name);   \
 static struct percpu_rw_semaphore name = { \
.refcount = __percpu_rwsem_refcount_##name,\
.state = 0, \
.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC, 1), \
.writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),   \
+   .recursive = rec,   \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
 }
 
@@ -37,7 +39,10 @@ static inline void percpu_down_read(struct 
percpu_rw_semaphore *sem)
 {
might_sleep();
 
-   rwsem_acquire_read(sem-rw_sem.dep_map, 0, 0, _RET_IP_);
+   if (sem-recursive)
+   rwlock_acquire_read(sem-rw_sem.dep_map, 0, 0, _RET_IP_);
+   else
+   rwsem_acquire_read(sem-rw_sem.dep_map, 0, 0, _RET_IP_);
 
preempt_disable();
/*
@@ -97,14 +102,14 @@ static inline void percpu_up_read(struct 
percpu_rw_semaphore *sem)
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
+extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, bool,
const char *, struct lock_class_key *);
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
-#define percpu_init_rwsem(sem) \
+#define percpu_init_rwsem(sem, recursive)  \
 ({ \
static struct lock_class_key rwsem_key; \
-   __percpu_init_rwsem(sem, #sem, rwsem_key); \
+   __percpu_init_rwsem(sem, recursive, #sem, rwsem_key);  \
 })
 
 #endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index cb346f2..a4813a1 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1985,7 +1985,7 @@ static int __init init_uprobes(void)
for (i = 0; i  UPROBES_HASH_SZ; i++)
mutex_init(uprobes_mmap_mutex[i]);
 
-   if (percpu_init_rwsem(dup_mmap_sem))
+   if (percpu_init_rwsem(dup_mmap_sem, false))
return -ENOMEM;
 
return register_die_notifier(uprobe_exception_nb);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 609c13b..3db7c45 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -10,7 +10,7 @@
 
 enum { readers_slow, readers_block };
 
-int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
+int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, bool recursive,
const char *name, struct lock_class_key *rwsem_key)
 {
sem-refcount = alloc_percpu(unsigned int);
@@ -20,6 +20,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
sem-state = readers_slow;
rcu_sync_init(sem-rss, RCU_SCHED_SYNC, true);
init_waitqueue_head(sem-writer);
+   sem-recursive = recursive;
__init_rwsem(sem-rw_sem, name, rwsem_key);
 
return 0;
@@ -124,9 +125,15 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
  */
 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 {
-   if (per_cpu_sum(*sem-refcount) != 0)
+   if (sem-recursive  !down_write_trylock(sem-rw_sem))
return false;
 
+   if (per_cpu_sum(*sem-refcount) != 0) {
+   if (sem-recursive)
+   up_write(sem-rw_sem);
+   return false;
+   }
+
/*
 * If we observed the decrement; ensure we see the entire critical
 * section.
@@ -155,8 +162,8 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
 * then we are guaranteed to see their sem-refcount increment, and
 * therefore will wait for them.
 */
-
-