[PATCH 10/17] futex: Pull rt_mutex_futex_unlock() out from under hb->lock
From: Peter Zijlstra commit 16ffa12d742534d4ff73e8b3a4e81c1de39196f0 upstream. There's a number of 'interesting' problems, all caused by holding hb->lock while doing the rt_mutex_unlock() equivalient. Notably: - a PI inversion on hb->lock; and, - a SCHED_DEADLINE crash because of pointer instability. The previous changes: - changed the locking rules to cover {uval,pi_state} with wait_lock. - allow to do rt_mutex_futex_unlock() without dropping wait_lock; which in turn allows to rely on wait_lock atomicity completely. - simplified the waiter conundrum. It's now sufficient to hold rtmutex::wait_lock and a reference on the pi_state to protect the state consistency, so hb->lock can be dropped before calling rt_mutex_futex_unlock(). Signed-off-by: Peter Zijlstra (Intel) Cc: juri.le...@arm.com Cc: bige...@linutronix.de Cc: xlp...@redhat.com Cc: rost...@goodmis.org Cc: mathieu.desnoy...@efficios.com Cc: jdesfos...@efficios.com Cc: dvh...@infradead.org Cc: bris...@redhat.com Link: http://lkml.kernel.org/r/20170322104151.92...@infradead.org Signed-off-by: Thomas Gleixner Conflicts: kernel/futex.c Tested-by:Henrik Austad --- kernel/futex.c | 154 + 1 file changed, 100 insertions(+), 54 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 09f698a..7054ca3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -918,10 +918,12 @@ void exit_pi_state_list(struct task_struct *curr) pi_state->owner = NULL; raw_spin_unlock_irq(>pi_lock); - rt_mutex_futex_unlock(_state->pi_mutex); - + get_pi_state(pi_state); spin_unlock(>lock); + rt_mutex_futex_unlock(_state->pi_mutex); + put_pi_state(pi_state); + raw_spin_lock_irq(>pi_lock); } raw_spin_unlock_irq(>pi_lock); @@ -1034,6 +1036,11 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), * which in turn means that futex_lock_pi() still has a reference on * our pi_state. +* +* The waiter holding a reference on @pi_state also protects against +* the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() +* and futex_wait_requeue_pi() as it cannot go to 0 and consequently +* free pi_state before we can take a reference ourselves. */ WARN_ON(!atomic_read(_state->refcount)); @@ -1377,48 +1384,40 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) smp_store_release(>lock_ptr, NULL); } -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter, -struct futex_hash_bucket *hb) +/* + * Caller must hold a reference on @pi_state. + */ +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) { - struct task_struct *new_owner; - struct futex_pi_state *pi_state = top_waiter->pi_state; u32 uninitialized_var(curval), newval; + struct task_struct *new_owner; + bool deboost = false; WAKE_Q(wake_q); - bool deboost; int ret = 0; - if (!pi_state) - return -EINVAL; - - /* -* If current does not own the pi_state then the futex is -* inconsistent and user space fiddled with the futex value. -*/ - if (pi_state->owner != current) - return -EINVAL; - raw_spin_lock_irq(_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(_state->pi_mutex); - - /* -* When we interleave with futex_lock_pi() where it does -* rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter, -* but the rt_mutex's wait_list can be empty (either still, or again, -* depending on which side we land). -* -* When this happens, give up our locks and try again, giving the -* futex_lock_pi() instance time to complete, either by waiting on the -* rtmutex or removing itself from the futex queue. -*/ if (!new_owner) { - raw_spin_unlock_irq(_state->pi_mutex.wait_lock); - return -EAGAIN; + /* +* Since we held neither hb->lock nor wait_lock when coming +* into this function, we could have raced with futex_lock_pi() +* such that we might observe @this futex_q waiter, but the +* rt_mutex's wait_list can be empty (either still, or again, +* depending on which side we land). +* +* When this happens, give up our locks and try again, giving +* the futex_lock_pi() instance time to complete, either by +* waiting on the rtmutex or removing itself from the futex +* queue. +*/ +
[PATCH 10/17] futex: Pull rt_mutex_futex_unlock() out from under hb->lock
From: Peter Zijlstra commit 16ffa12d742534d4ff73e8b3a4e81c1de39196f0 upstream. There's a number of 'interesting' problems, all caused by holding hb->lock while doing the rt_mutex_unlock() equivalient. Notably: - a PI inversion on hb->lock; and, - a SCHED_DEADLINE crash because of pointer instability. The previous changes: - changed the locking rules to cover {uval,pi_state} with wait_lock. - allow to do rt_mutex_futex_unlock() without dropping wait_lock; which in turn allows to rely on wait_lock atomicity completely. - simplified the waiter conundrum. It's now sufficient to hold rtmutex::wait_lock and a reference on the pi_state to protect the state consistency, so hb->lock can be dropped before calling rt_mutex_futex_unlock(). Signed-off-by: Peter Zijlstra (Intel) Cc: juri.le...@arm.com Cc: bige...@linutronix.de Cc: xlp...@redhat.com Cc: rost...@goodmis.org Cc: mathieu.desnoy...@efficios.com Cc: jdesfos...@efficios.com Cc: dvh...@infradead.org Cc: bris...@redhat.com Link: http://lkml.kernel.org/r/20170322104151.92...@infradead.org Signed-off-by: Thomas Gleixner Conflicts: kernel/futex.c Tested-by:Henrik Austad --- kernel/futex.c | 154 + 1 file changed, 100 insertions(+), 54 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 09f698a..7054ca3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -918,10 +918,12 @@ void exit_pi_state_list(struct task_struct *curr) pi_state->owner = NULL; raw_spin_unlock_irq(>pi_lock); - rt_mutex_futex_unlock(_state->pi_mutex); - + get_pi_state(pi_state); spin_unlock(>lock); + rt_mutex_futex_unlock(_state->pi_mutex); + put_pi_state(pi_state); + raw_spin_lock_irq(>pi_lock); } raw_spin_unlock_irq(>pi_lock); @@ -1034,6 +1036,11 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval, * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), * which in turn means that futex_lock_pi() still has a reference on * our pi_state. +* +* The waiter holding a reference on @pi_state also protects against +* the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() +* and futex_wait_requeue_pi() as it cannot go to 0 and consequently +* free pi_state before we can take a reference ourselves. */ WARN_ON(!atomic_read(_state->refcount)); @@ -1377,48 +1384,40 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) smp_store_release(>lock_ptr, NULL); } -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter, -struct futex_hash_bucket *hb) +/* + * Caller must hold a reference on @pi_state. + */ +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) { - struct task_struct *new_owner; - struct futex_pi_state *pi_state = top_waiter->pi_state; u32 uninitialized_var(curval), newval; + struct task_struct *new_owner; + bool deboost = false; WAKE_Q(wake_q); - bool deboost; int ret = 0; - if (!pi_state) - return -EINVAL; - - /* -* If current does not own the pi_state then the futex is -* inconsistent and user space fiddled with the futex value. -*/ - if (pi_state->owner != current) - return -EINVAL; - raw_spin_lock_irq(_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(_state->pi_mutex); - - /* -* When we interleave with futex_lock_pi() where it does -* rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter, -* but the rt_mutex's wait_list can be empty (either still, or again, -* depending on which side we land). -* -* When this happens, give up our locks and try again, giving the -* futex_lock_pi() instance time to complete, either by waiting on the -* rtmutex or removing itself from the futex queue. -*/ if (!new_owner) { - raw_spin_unlock_irq(_state->pi_mutex.wait_lock); - return -EAGAIN; + /* +* Since we held neither hb->lock nor wait_lock when coming +* into this function, we could have raced with futex_lock_pi() +* such that we might observe @this futex_q waiter, but the +* rt_mutex's wait_list can be empty (either still, or again, +* depending on which side we land). +* +* When this happens, give up our locks and try again, giving +* the futex_lock_pi() instance time to complete, either by +* waiting on the rtmutex or removing itself from the futex +* queue. +*/ +