[PATCH 04/17] rtmutex: Make wait_lock irq safe
From: Thomas Gleixner commit b4abf91047cf054f203dcfac97e1038388826937 upstream. Sasha reported a lockdep splat about a potential deadlock between RCU boosting rtmutex and the posix timer it_lock. CPU0CPU1 rtmutex_lock(>rt_mutex) spin_lock(>rt_mutex.wait_lock) local_irq_disable() spin_lock(>it_lock) spin_lock(>mutex.wait_lock) --> Interrupt spin_lock(>it_lock) This is caused by the following code sequence on CPU1 rcu_read_lock() x = lookup(); if (x) spin_lock_irqsave(>it_lock); rcu_read_unlock(); return x; We could fix that in the posix timer code by keeping rcu read locked across the spinlocked and irq disabled section, but the above sequence is common and there is no reason not to support it. Taking rt_mutex.wait_lock irq safe prevents the deadlock. Reported-by: Sasha Levin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Tested-by:Henrik Austad --- kernel/futex.c | 18 +++ kernel/locking/rtmutex.c | 135 +-- 2 files changed, 81 insertions(+), 72 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 9e92f12..0f44952 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1307,7 +1307,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter if (pi_state->owner != current) return -EINVAL; - raw_spin_lock(_state->pi_mutex.wait_lock); + raw_spin_lock_irq(_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(_state->pi_mutex); /* @@ -1343,22 +1343,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter ret = -EINVAL; } if (ret) { - raw_spin_unlock(_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(_state->pi_mutex.wait_lock); return ret; } - raw_spin_lock_irq(_state->owner->pi_lock); + raw_spin_lock(_state->owner->pi_lock); WARN_ON(list_empty(_state->list)); list_del_init(_state->list); - raw_spin_unlock_irq(_state->owner->pi_lock); + raw_spin_unlock(_state->owner->pi_lock); - raw_spin_lock_irq(_owner->pi_lock); + raw_spin_lock(_owner->pi_lock); WARN_ON(!list_empty(_state->list)); list_add(_state->list, _owner->pi_state_list); pi_state->owner = new_owner; - raw_spin_unlock_irq(_owner->pi_lock); + raw_spin_unlock(_owner->pi_lock); - raw_spin_unlock(_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(_state->pi_mutex.wait_lock); deboost = rt_mutex_futex_unlock(_state->pi_mutex, _q); @@ -2269,11 +2269,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * we returned due to timeout or signal without taking the * rt_mutex. Too late. */ - raw_spin_lock(>pi_state->pi_mutex.wait_lock); + raw_spin_lock_irq(>pi_state->pi_mutex.wait_lock); owner = rt_mutex_owner(>pi_state->pi_mutex); if (!owner) owner = rt_mutex_next_owner(>pi_state->pi_mutex); - raw_spin_unlock(>pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(>pi_state->pi_mutex.wait_lock); ret = fixup_pi_state_owner(uaddr, q, owner); goto out; } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6cf9dab7..b8d08c7 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -163,13 +163,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) * 2) Drop lock->wait_lock * 3) Try to unlock the lock with cmpxchg */ -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, + unsigned long flags) __releases(lock->wait_lock) { struct task_struct *owner = rt_mutex_owner(lock); clear_rt_mutex_waiters(lock); - raw_spin_unlock(>wait_lock); + raw_spin_unlock_irqrestore(>wait_lock, flags); /* * If a new waiter comes in between the unlock and the cmpxchg * we have two situations: @@ -211,11 +212,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) /* * Simple slow path only version: lock->owner is protected by lock->wait_lock. */ -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, + unsigned long flags) __releases(lock->wait_lock) { lock->owner = NULL; - raw_spin_unlock(>wait_lock); + raw_spin_unlock_irqrestore(>wait_lock, flags); return
[PATCH 04/17] rtmutex: Make wait_lock irq safe
From: Thomas Gleixner commit b4abf91047cf054f203dcfac97e1038388826937 upstream. Sasha reported a lockdep splat about a potential deadlock between RCU boosting rtmutex and the posix timer it_lock. CPU0CPU1 rtmutex_lock(>rt_mutex) spin_lock(>rt_mutex.wait_lock) local_irq_disable() spin_lock(>it_lock) spin_lock(>mutex.wait_lock) --> Interrupt spin_lock(>it_lock) This is caused by the following code sequence on CPU1 rcu_read_lock() x = lookup(); if (x) spin_lock_irqsave(>it_lock); rcu_read_unlock(); return x; We could fix that in the posix timer code by keeping rcu read locked across the spinlocked and irq disabled section, but the above sequence is common and there is no reason not to support it. Taking rt_mutex.wait_lock irq safe prevents the deadlock. Reported-by: Sasha Levin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul McKenney Tested-by:Henrik Austad --- kernel/futex.c | 18 +++ kernel/locking/rtmutex.c | 135 +-- 2 files changed, 81 insertions(+), 72 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 9e92f12..0f44952 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1307,7 +1307,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter if (pi_state->owner != current) return -EINVAL; - raw_spin_lock(_state->pi_mutex.wait_lock); + raw_spin_lock_irq(_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(_state->pi_mutex); /* @@ -1343,22 +1343,22 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter ret = -EINVAL; } if (ret) { - raw_spin_unlock(_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(_state->pi_mutex.wait_lock); return ret; } - raw_spin_lock_irq(_state->owner->pi_lock); + raw_spin_lock(_state->owner->pi_lock); WARN_ON(list_empty(_state->list)); list_del_init(_state->list); - raw_spin_unlock_irq(_state->owner->pi_lock); + raw_spin_unlock(_state->owner->pi_lock); - raw_spin_lock_irq(_owner->pi_lock); + raw_spin_lock(_owner->pi_lock); WARN_ON(!list_empty(_state->list)); list_add(_state->list, _owner->pi_state_list); pi_state->owner = new_owner; - raw_spin_unlock_irq(_owner->pi_lock); + raw_spin_unlock(_owner->pi_lock); - raw_spin_unlock(_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(_state->pi_mutex.wait_lock); deboost = rt_mutex_futex_unlock(_state->pi_mutex, _q); @@ -2269,11 +2269,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * we returned due to timeout or signal without taking the * rt_mutex. Too late. */ - raw_spin_lock(>pi_state->pi_mutex.wait_lock); + raw_spin_lock_irq(>pi_state->pi_mutex.wait_lock); owner = rt_mutex_owner(>pi_state->pi_mutex); if (!owner) owner = rt_mutex_next_owner(>pi_state->pi_mutex); - raw_spin_unlock(>pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(>pi_state->pi_mutex.wait_lock); ret = fixup_pi_state_owner(uaddr, q, owner); goto out; } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6cf9dab7..b8d08c7 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -163,13 +163,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) * 2) Drop lock->wait_lock * 3) Try to unlock the lock with cmpxchg */ -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, + unsigned long flags) __releases(lock->wait_lock) { struct task_struct *owner = rt_mutex_owner(lock); clear_rt_mutex_waiters(lock); - raw_spin_unlock(>wait_lock); + raw_spin_unlock_irqrestore(>wait_lock, flags); /* * If a new waiter comes in between the unlock and the cmpxchg * we have two situations: @@ -211,11 +212,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) /* * Simple slow path only version: lock->owner is protected by lock->wait_lock. */ -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, + unsigned long flags) __releases(lock->wait_lock) { lock->owner = NULL; - raw_spin_unlock(>wait_lock); + raw_spin_unlock_irqrestore(>wait_lock, flags); return