On Tue, 07 Jun 2016 21:56:37 +0200 Peter Zijlstra <[email protected]> wrote: \ > --- a/include/linux/sched/rt.h > +++ b/include/linux/sched/rt.h > @@ -19,6 +19,7 @@ static inline int rt_task(struct task_st > extern int rt_mutex_getprio(struct task_struct *p); > extern void rt_mutex_setprio(struct task_struct *p, int prio); > extern int rt_mutex_get_effective_prio(struct task_struct *task, int > newprio); > +extern void rt_mutex_update_top_task(struct task_struct *p); > extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); > extern void rt_mutex_adjust_pi(struct task_struct *p); > static inline bool tsk_is_pi_blocked(struct task_struct *tsk) > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -1219,6 +1219,7 @@ static void rt_mutex_init_task(struct ta > #ifdef CONFIG_RT_MUTEXES > p->pi_waiters = RB_ROOT; > p->pi_waiters_leftmost = NULL; > + p->pi_top_task = NULL; > p->pi_blocked_on = NULL; > #endif > } > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -256,6 +256,16 @@ rt_mutex_dequeue_pi(struct task_struct * > RB_CLEAR_NODE(&waiter->pi_tree_entry); > } > > +void rt_mutex_update_top_task(struct task_struct *p) > +{ > + if (!task_has_pi_waiters(p)) { > + p->pi_top_task = NULL; > + return; > + } > + > + p->pi_top_task = task_top_pi_waiter(p)->task; > +} > + > /* > * Calculate task priority from the waiter tree priority > * > @@ -273,10 +283,7 @@ int rt_mutex_getprio(struct task_struct > > struct task_struct *rt_mutex_get_top_task(struct task_struct *task) > { > - if (likely(!task_has_pi_waiters(task))) > - return NULL; > - > - return task_top_pi_waiter(task)->task; > + return task->pi_top_task; > } > > /* > @@ -285,12 +292,12 @@ struct task_struct *rt_mutex_get_top_tas > */ > int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) > { > - if (!task_has_pi_waiters(task)) > + struct task_struct *top_task = rt_mutex_get_top_task(task); > + > + if (!top_task) > return newprio; > > - if (task_top_pi_waiter(task)->task->prio <= newprio) > - return task_top_pi_waiter(task)->task->prio; > - return newprio; > + return min(top_task->prio, newprio); > } > > /* > @@ -307,24 +314,6 @@ static void __rt_mutex_adjust_prio(struc > } > > /* > - * Adjust task priority (undo boosting). Called from the exit path of > - * rt_mutex_slowunlock() and rt_mutex_slowlock(). > - * > - * (Note: We do this outside of the protection of lock->wait_lock to > - * allow the lock to be taken while or before we readjust the priority > - * of task. We do not use the spin_xx_mutex() variants here as we are > - * outside of the debug path.) > - */ > -void rt_mutex_adjust_prio(struct task_struct *task) > -{ > - unsigned long flags; > - > - raw_spin_lock_irqsave(&task->pi_lock, flags); > - __rt_mutex_adjust_prio(task); > - raw_spin_unlock_irqrestore(&task->pi_lock, flags); > -} > - > -/* > * Deadlock detection is conditional: > * > * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted > @@ -987,6 +976,7 @@ static void mark_wakeup_next_waiter(stru > * lock->wait_lock. > */ > rt_mutex_dequeue_pi(current, waiter); > + __rt_mutex_adjust_prio(current); > > /* > * As we are waking up the top waiter, and the waiter stays > @@ -1325,6 +1315,16 @@ static bool __sched rt_mutex_slowunlock( > */ > mark_wakeup_next_waiter(wake_q, lock); > > + /* > + * We should deboost before waking the top waiter task such that > + * we don't run two tasks with the 'same' priority. This however > + * can lead to prio-inversion if we would get preempted after > + * the deboost but before waking our high-prio task, hence the > + * preempt_disable before unlock. Pairs with preempt_enable() in > + * rt_mutex_postunlock(); > + */ > + preempt_disable(); > +
This looks like a possible maintenance nightmare. Can we add some more comments at the start of the functions that state that rt_mutex_slowunlock() calls must be paired with rt_mutex_postunlock()? Other than that... Acked-by: Steven Rostedt <[email protected]> -- Steve > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > > /* check PI boosting */ > @@ -1400,20 +1400,9 @@ rt_mutex_fastunlock(struct rt_mutex *loc > */ > void rt_mutex_postunlock(struct wake_q_head *wake_q, bool deboost) > { > - /* > - * We should deboost before waking the top waiter task such that > - * we don't run two tasks with the 'same' priority. This however > - * can lead to prio-inversion if we would get preempted after > - * the deboost but before waking our high-prio task, hence the > - * preempt_disable. > - */ > - if (deboost) { > - preempt_disable(); > - rt_mutex_adjust_prio(current); > - } > - > wake_up_q(wake_q); > > + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ > if (deboost) > preempt_enable(); > } > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -3568,6 +3568,8 @@ void rt_mutex_setprio(struct task_struct > goto out_unlock; > } > > + rt_mutex_update_top_task(p); > + > trace_sched_pi_setprio(p, prio); > oldprio = p->prio; > >

