On Thu, 2017-06-22 at 18:34 +0200, Sebastian Andrzej Siewior wrote:
> On 2017-06-20 09:45:06 [+0200], Mike Galbraith wrote:
> > See ! and ?
> 
> See see.
> What about this:

I'll give it a go, likely during the weekend.

I moved 4.11-rt today (also repros nicely) due to ftrace annoying me.
 After yet more staring at ever more huge traces (opposite of goal;),
then taking a break to stare at source again, I decided that the dual
wake_q business should die.. and the stall died with it.

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1014,8 +1014,20 @@ struct wake_q_head {
>  #define WAKE_Q(name)                                 \
>       struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
>  
> -extern void wake_q_add(struct wake_q_head *head,
> -                           struct task_struct *task);
> +extern void __wake_q_add(struct wake_q_head *head,
> +                      struct task_struct *task, bool sleeper);
> +static inline void wake_q_add(struct wake_q_head *head,
> +                           struct task_struct *task)
> +{
> +     __wake_q_add(head, task, false);
> +}
> +
> +static inline void wake_q_add_sleeper(struct wake_q_head *head,
> +                                   struct task_struct *task)
> +{
> +     __wake_q_add(head, task, true);
> +}
> +
>  extern void __wake_up_q(struct wake_q_head *head, bool sleeper);
>  
>  static inline void wake_up_q(struct wake_q_head *head)
> @@ -1745,6 +1757,7 @@ struct task_struct {
>       raw_spinlock_t pi_lock;
>  
>       struct wake_q_node wake_q;
> +     struct wake_q_node wake_q_sleeper;
>  
>  #ifdef CONFIG_RT_MUTEXES
>       /* PI waiters blocked on a rt_mutex held by this task */
> diff --git a/kernel/fork.c b/kernel/fork.c
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -558,6 +558,7 @@ static struct task_struct *dup_task_struct(struct 
> task_struct *orig, int node)
>       tsk->splice_pipe = NULL;
>       tsk->task_frag.page = NULL;
>       tsk->wake_q.next = NULL;
> +     tsk->wake_q_sleeper.next = NULL;
>  
>       account_kernel_stack(tsk, 1);
>  
> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -1506,7 +1506,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head 
> *wake_q,
>        */
>       preempt_disable();
>       if (waiter->savestate)
> -             wake_q_add(wake_sleeper_q, waiter->task);
> +             wake_q_add_sleeper(wake_sleeper_q, waiter->task);
>       else
>               wake_q_add(wake_q, waiter->task);
>       raw_spin_unlock(&current->pi_lock);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -430,9 +430,15 @@ static bool set_nr_if_polling(struct task_struct *p)
>  #endif
>  #endif
>  
> -void wake_q_add(struct wake_q_head *head, struct task_struct *task)
> +void __wake_q_add(struct wake_q_head *head, struct task_struct *task,
> +               bool sleeper)
>  {
> -     struct wake_q_node *node = &task->wake_q;
> +     struct wake_q_node *node;
> +
> +     if (sleeper)
> +             node = &task->wake_q_sleeper;
> +     else
> +             node = &task->wake_q;
>  
>       /*
>        * Atomically grab the task, if ->wake_q is !nil already it means
> @@ -461,11 +467,17 @@ void __wake_up_q(struct wake_q_head *head, bool sleeper)
>       while (node != WAKE_Q_TAIL) {
>               struct task_struct *task;
>  
> -             task = container_of(node, struct task_struct, wake_q);
> +             if (sleeper)
> +                     task = container_of(node, struct task_struct, 
> wake_q_sleeper);
> +             else
> +                     task = container_of(node, struct task_struct, wake_q);
>               BUG_ON(!task);
>               /* task can safely be re-inserted now */
>               node = node->next;
> -             task->wake_q.next = NULL;
> +             if (sleeper)
> +                     task->wake_q_sleeper.next = NULL;
> +             else
> +                     task->wake_q.next = NULL;
>  
>               /*
>                * wake_up_process() implies a wmb() to pair with the queueing

Reply via email to