On Fri, Aug 26, 2016 at 02:45:28PM +0200, Oleg Nesterov wrote:
> Otherwise this logic only works if mode is "compatible" with another
> exclusive waiter.
> 
> If some wq has both TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE waiters,
> abort_exclusive_wait() won't wait an uninterruptible waiter.
> 
> The main user is __wait_on_bit_lock() and currently it is fine but only
> because TASK_KILLABLE includes TASK_UNINTERRUPTIBLE and we do not have
> lock_page_interruptible() yet.

So mixing INTERRUPTIBLE and UNINTERRUPTIBLE and then not using
TASK_NORMAL for wakeups is a mis-feature/abuse of waitqueues IMO.

That said, people do 'creative' things, so maybe we should add some
debug infra to detect this mis-match.

Something like the below perhaps? It will miss people using the (old)
add_wait_queue() (which are plenty :/) but there's nothing quick we can
do about those.

Completely untested..

---
 include/linux/wait.h | 13 ++++++++++++-
 kernel/sched/wait.c  | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index c3ff74d764fa..e99ea720c5f9 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -39,6 +39,9 @@ struct wait_bit_queue {
 struct __wait_queue_head {
        spinlock_t              lock;
        struct list_head        task_list;
+#ifdef CONFIG_DEBUG_WAITQUEUE
+       unsigned int            state;
+#endif
 };
 typedef struct __wait_queue_head wait_queue_head_t;
 
@@ -48,6 +51,13 @@ struct task_struct;
  * Macros for declaration and initialisaton of the datatypes
  */
 
+#ifdef CONFIG_DEBUG_WAITQUEUE
+#define __DEBUG_WAIT_QUEUE_HEAD_INIT(name)                             \
+       .state = -1,
+#else
+#define __DEBUG_WAIT_QUEUE_HEAD_INIT(name)
+#endif
+
 #define __WAITQUEUE_INITIALIZER(name, tsk) {                           \
        .private        = tsk,                                          \
        .func           = default_wake_function,                        \
@@ -58,7 +68,8 @@ struct task_struct;
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                          \
        .lock           = __SPIN_LOCK_UNLOCKED(name.lock),              \
-       .task_list      = { &(name).task_list, &(name).task_list } }
+       .task_list      = { &(name).task_list, &(name).task_list },     \
+       __DEBUG_WAIT_QUEUE_HEAD_INIT(name) }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
        wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f15d6b6a538a..cb71c56c5e76 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -16,6 +16,9 @@ void __init_waitqueue_head(wait_queue_head_t *q, const char 
*name, struct lock_c
        spin_lock_init(&q->lock);
        lockdep_set_class_and_name(&q->lock, key, name);
        INIT_LIST_HEAD(&q->task_list);
+#ifdef CONFIG_DEBUG_WAITQUEUE
+       q->state = -1;
+#endif
 }
 
 EXPORT_SYMBOL(__init_waitqueue_head);
@@ -67,6 +70,16 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned 
int mode,
 {
        wait_queue_t *curr, *next;
 
+#ifdef CONFIG_DEBUG_WAITQUEUE
+       if (q->state != -1) {
+               /*
+                * WARN if we have INTERRUPTIBLE and UNINTERRUPTIBLE
+                * waiters and do not use TASK_NORMAL to wake.
+                */
+               WARN_ON_ONCE(q->state != (mode & TASK_NORMAL));
+       }
+#endif
+
        list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
                unsigned flags = curr->flags;
 
@@ -156,6 +169,17 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int 
mode, int nr_exclusive)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);     /* For internal use only */
 
+static inline void prepare_debug(struct wait_queue_head *q, int state)
+{
+#ifdef CONFIG_DEBUG_WAITQUEUE
+       if (q->state == -1) {
+               q->state = state & TASK_NORMAL;
+       } else {
+               q->state |= state & TASK_NORMAL;
+       }
+#endif
+}
+
 /*
  * Note: we use "set_current_state()" _after_ the wait-queue add,
  * because we need a memory barrier there on SMP, so that any
@@ -178,6 +202,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, 
int state)
        if (list_empty(&wait->task_list))
                __add_wait_queue(q, wait);
        set_current_state(state);
+       prepare_debug(q, state);
        spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
@@ -192,6 +217,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, 
wait_queue_t *wait, int state)
        if (list_empty(&wait->task_list))
                __add_wait_queue_tail(q, wait);
        set_current_state(state);
+       prepare_debug(q, state);
        spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
@@ -214,6 +240,7 @@ long prepare_to_wait_event(wait_queue_head_t *q, 
wait_queue_t *wait, int state)
                        __add_wait_queue(q, wait);
        }
        set_current_state(state);
+       prepare_debug(q, state);
        spin_unlock_irqrestore(&q->lock, flags);
 
        return 0;

Reply via email to