On Tue, 07 Jun 2016 21:56:37 +0200
Peter Zijlstra <[email protected]> wrote:
\
> --- a/include/linux/sched/rt.h
> +++ b/include/linux/sched/rt.h
> @@ -19,6 +19,7 @@ static inline int rt_task(struct task_st
>  extern int rt_mutex_getprio(struct task_struct *p);
>  extern void rt_mutex_setprio(struct task_struct *p, int prio);
>  extern int rt_mutex_get_effective_prio(struct task_struct *task, int 
> newprio);
> +extern void rt_mutex_update_top_task(struct task_struct *p);
>  extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
>  extern void rt_mutex_adjust_pi(struct task_struct *p);
>  static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1219,6 +1219,7 @@ static void rt_mutex_init_task(struct ta
>  #ifdef CONFIG_RT_MUTEXES
>       p->pi_waiters = RB_ROOT;
>       p->pi_waiters_leftmost = NULL;
> +     p->pi_top_task = NULL;
>       p->pi_blocked_on = NULL;
>  #endif
>  }
> --- a/kernel/locking/rtmutex.c
> +++ b/kernel/locking/rtmutex.c
> @@ -256,6 +256,16 @@ rt_mutex_dequeue_pi(struct task_struct *
>       RB_CLEAR_NODE(&waiter->pi_tree_entry);
>  }
>  
> +void rt_mutex_update_top_task(struct task_struct *p)
> +{
> +     if (!task_has_pi_waiters(p)) {
> +             p->pi_top_task = NULL;
> +             return;
> +     }
> +
> +     p->pi_top_task = task_top_pi_waiter(p)->task;
> +}
> +
>  /*
>   * Calculate task priority from the waiter tree priority
>   *
> @@ -273,10 +283,7 @@ int rt_mutex_getprio(struct task_struct
>  
>  struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
>  {
> -     if (likely(!task_has_pi_waiters(task)))
> -             return NULL;
> -
> -     return task_top_pi_waiter(task)->task;
> +     return task->pi_top_task;
>  }
>  
>  /*
> @@ -285,12 +292,12 @@ struct task_struct *rt_mutex_get_top_tas
>   */
>  int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
>  {
> -     if (!task_has_pi_waiters(task))
> +     struct task_struct *top_task = rt_mutex_get_top_task(task);
> +
> +     if (!top_task)
>               return newprio;
>  
> -     if (task_top_pi_waiter(task)->task->prio <= newprio)
> -             return task_top_pi_waiter(task)->task->prio;
> -     return newprio;
> +     return min(top_task->prio, newprio);
>  }
>  
>  /*
> @@ -307,24 +314,6 @@ static void __rt_mutex_adjust_prio(struc
>  }
>  
>  /*
> - * Adjust task priority (undo boosting). Called from the exit path of
> - * rt_mutex_slowunlock() and rt_mutex_slowlock().
> - *
> - * (Note: We do this outside of the protection of lock->wait_lock to
> - * allow the lock to be taken while or before we readjust the priority
> - * of task. We do not use the spin_xx_mutex() variants here as we are
> - * outside of the debug path.)
> - */
> -void rt_mutex_adjust_prio(struct task_struct *task)
> -{
> -     unsigned long flags;
> -
> -     raw_spin_lock_irqsave(&task->pi_lock, flags);
> -     __rt_mutex_adjust_prio(task);
> -     raw_spin_unlock_irqrestore(&task->pi_lock, flags);
> -}
> -
> -/*
>   * Deadlock detection is conditional:
>   *
>   * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
> @@ -987,6 +976,7 @@ static void mark_wakeup_next_waiter(stru
>        * lock->wait_lock.
>        */
>       rt_mutex_dequeue_pi(current, waiter);
> +     __rt_mutex_adjust_prio(current);
>  
>       /*
>        * As we are waking up the top waiter, and the waiter stays
> @@ -1325,6 +1315,16 @@ static bool __sched rt_mutex_slowunlock(
>        */
>       mark_wakeup_next_waiter(wake_q, lock);
>  
> +     /*
> +      * We should deboost before waking the top waiter task such that
> +      * we don't run two tasks with the 'same' priority. This however
> +      * can lead to prio-inversion if we would get preempted after
> +      * the deboost but before waking our high-prio task, hence the
> +      * preempt_disable before unlock. Pairs with preempt_enable() in
> +      * rt_mutex_postunlock();
> +      */
> +     preempt_disable();
> +

This looks like a possible maintenance nightmare. Can we add some more
comments at the start of the functions that state that
rt_mutex_slowunlock() calls must be paired with rt_mutex_postunlock()?

Other than that...

Acked-by: Steven Rostedt <[email protected]>

-- Steve


>       raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>  
>       /* check PI boosting */
> @@ -1400,20 +1400,9 @@ rt_mutex_fastunlock(struct rt_mutex *loc
>   */
>  void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost)
>  {
> -     /*
> -      * We should deboost before waking the top waiter task such that
> -      * we don't run two tasks with the 'same' priority. This however
> -      * can lead to prio-inversion if we would get preempted after
> -      * the deboost but before waking our high-prio task, hence the
> -      * preempt_disable.
> -      */
> -     if (deboost) {
> -             preempt_disable();
> -             rt_mutex_adjust_prio(current);
> -     }
> -
>       wake_up_q(wake_q);
>  
> +     /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
>       if (deboost)
>               preempt_enable();
>  }
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3568,6 +3568,8 @@ void rt_mutex_setprio(struct task_struct
>               goto out_unlock;
>       }
>  
> +     rt_mutex_update_top_task(p);
> +
>       trace_sched_pi_setprio(p, prio);
>       oldprio = p->prio;
>  
> 

Reply via email to