commit:     78ff2030a414af3cdccfd6b634571a3006ce13c0
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Tue Aug 10 16:21:29 2021 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Tue Aug 10 16:21:29 2021 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=78ff2030

Linux patch 4.4.280

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

 0000_README              |    4 +
 1279_linux-4.4.280.patch | 1100 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1104 insertions(+)

diff --git a/0000_README b/0000_README
index 5eea747..878287e 100644
--- a/0000_README
+++ b/0000_README
@@ -1159,6 +1159,10 @@ Patch:  1278_linux-4.4.279.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.4.279
 
+Patch:  1279_linux-4.4.280.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.4.280
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1279_linux-4.4.280.patch b/1279_linux-4.4.280.patch
new file mode 100644
index 0000000..b08ce93
--- /dev/null
+++ b/1279_linux-4.4.280.patch
@@ -0,0 +1,1100 @@
+diff --git a/Makefile b/Makefile
+index 7dc479e9a6655..870bd763830e6 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 279
++SUBLEVEL = 280
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+ 
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 0a93e9d1708e2..3072e9c93ae6b 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -880,9 +880,7 @@ static __always_inline void rcu_read_lock(void)
+  * Unfortunately, this function acquires the scheduler's runqueue and
+  * priority-inheritance spinlocks.  This means that deadlock could result
+  * if the caller of rcu_read_unlock() already holds one of these locks or
+- * any lock that is ever acquired while holding them; or any lock which
+- * can be taken from interrupt context because rcu_boost()->rt_mutex_lock()
+- * does not disable irqs while taking ->wait_lock.
++ * any lock that is ever acquired while holding them.
+  *
+  * That said, RCU readers are never priority boosted unless they were
+  * preempted.  Therefore, one way to avoid deadlock is to make sure
+diff --git a/kernel/futex.c b/kernel/futex.c
+index ff5499b0c5b34..6d47b7dc1cfbe 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -825,7 +825,7 @@ static int refill_pi_state_cache(void)
+       return 0;
+ }
+ 
+-static struct futex_pi_state * alloc_pi_state(void)
++static struct futex_pi_state *alloc_pi_state(void)
+ {
+       struct futex_pi_state *pi_state = current->pi_state_cache;
+ 
+@@ -858,10 +858,18 @@ static void pi_state_update_owner(struct futex_pi_state 
*pi_state,
+       }
+ }
+ 
++static void get_pi_state(struct futex_pi_state *pi_state)
++{
++      WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
++}
++
+ /*
++ * Drops a reference to the pi_state object and frees or caches it
++ * when the last reference is gone.
++ *
+  * Must be called with the hb lock held.
+  */
+-static void free_pi_state(struct futex_pi_state *pi_state)
++static void put_pi_state(struct futex_pi_state *pi_state)
+ {
+       if (!pi_state)
+               return;
+@@ -898,7 +906,7 @@ static void free_pi_state(struct futex_pi_state *pi_state)
+  * Look up the task based on what TID userspace gave us.
+  * We dont trust it.
+  */
+-static struct task_struct * futex_find_get_task(pid_t pid)
++static struct task_struct *futex_find_get_task(pid_t pid)
+ {
+       struct task_struct *p;
+ 
+@@ -958,10 +966,12 @@ static void exit_pi_state_list(struct task_struct *curr)
+               pi_state->owner = NULL;
+               raw_spin_unlock_irq(&curr->pi_lock);
+ 
+-              rt_mutex_futex_unlock(&pi_state->pi_mutex);
+-
++              get_pi_state(pi_state);
+               spin_unlock(&hb->lock);
+ 
++              rt_mutex_futex_unlock(&pi_state->pi_mutex);
++              put_pi_state(pi_state);
++
+               raw_spin_lock_irq(&curr->pi_lock);
+       }
+       raw_spin_unlock_irq(&curr->pi_lock);
+@@ -1075,6 +1085,11 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 
uval,
+        * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
+        * which in turn means that futex_lock_pi() still has a reference on
+        * our pi_state.
++       *
++       * The waiter holding a reference on @pi_state also protects against
++       * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
++       * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
++       * free pi_state before we can take a reference ourselves.
+        */
+       WARN_ON(!atomic_read(&pi_state->refcount));
+ 
+@@ -1146,7 +1161,7 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 
uval,
+               goto out_einval;
+ 
+ out_attach:
+-      atomic_inc(&pi_state->refcount);
++      get_pi_state(pi_state);
+       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+       *ps = pi_state;
+       return 0;
+@@ -1529,48 +1544,35 @@ static void mark_wake_futex(struct wake_q_head 
*wake_q, struct futex_q *q)
+       q->lock_ptr = NULL;
+ }
+ 
+-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
+-                       struct futex_hash_bucket *hb)
++/*
++ * Caller must hold a reference on @pi_state.
++ */
++static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state 
*pi_state)
+ {
+-      struct task_struct *new_owner;
+-      struct futex_pi_state *pi_state = this->pi_state;
+       u32 uninitialized_var(curval), newval;
++      struct task_struct *new_owner;
++      bool deboost = false;
+       WAKE_Q(wake_q);
+-      bool deboost;
+       int ret = 0;
+ 
+-      if (!pi_state)
+-              return -EINVAL;
+-
+-      /*
+-       * If current does not own the pi_state then the futex is
+-       * inconsistent and user space fiddled with the futex value.
+-       */
+-      if (pi_state->owner != current)
+-              return -EINVAL;
+-
+-      raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+       new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
+-
+-      /*
+-       * When we interleave with futex_lock_pi() where it does
+-       * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter,
+-       * but the rt_mutex's wait_list can be empty (either still, or again,
+-       * depending on which side we land).
+-       *
+-       * When this happens, give up our locks and try again, giving the
+-       * futex_lock_pi() instance time to complete, either by waiting on the
+-       * rtmutex or removing itself from the futex queue.
+-       */
+-      if (!new_owner) {
+-              raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+-              return -EAGAIN;
++      if (WARN_ON_ONCE(!new_owner)) {
++              /*
++               * As per the comment in futex_unlock_pi() this should not 
happen.
++               *
++               * When this happens, give up our locks and try again, giving
++               * the futex_lock_pi() instance time to complete, either by
++               * waiting on the rtmutex or removing itself from the futex
++               * queue.
++               */
++              ret = -EAGAIN;
++              goto out_unlock;
+       }
+ 
+       /*
+-       * We pass it to the next owner. The WAITERS bit is always
+-       * kept enabled while there is PI state around. We cleanup the
+-       * owner died bit, because we are the owner.
++       * We pass it to the next owner. The WAITERS bit is always kept
++       * enabled while there is PI state around. We cleanup the owner
++       * died bit, because we are the owner.
+        */
+       newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
+ 
+@@ -1603,15 +1605,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, 
struct futex_q *this,
+               deboost = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+       }
+ 
++out_unlock:
+       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+-      spin_unlock(&hb->lock);
+ 
+       if (deboost) {
+               wake_up_q(&wake_q);
+               rt_mutex_adjust_prio(current);
+       }
+ 
+-      return 0;
++      return ret;
+ }
+ 
+ /*
+@@ -2121,7 +2123,7 @@ retry_private:
+               case 0:
+                       break;
+               case -EFAULT:
+-                      free_pi_state(pi_state);
++                      put_pi_state(pi_state);
+                       pi_state = NULL;
+                       double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
+@@ -2139,7 +2141,7 @@ retry_private:
+                        *   exit to complete.
+                        * - EAGAIN: The user space value changed.
+                        */
+-                      free_pi_state(pi_state);
++                      put_pi_state(pi_state);
+                       pi_state = NULL;
+                       double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
+@@ -2201,7 +2203,7 @@ retry_private:
+                */
+               if (requeue_pi) {
+                       /* Prepare the waiter to take the rt_mutex. */
+-                      atomic_inc(&pi_state->refcount);
++                      get_pi_state(pi_state);
+                       this->pi_state = pi_state;
+                       ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+                                                       this->rt_waiter,
+@@ -2214,7 +2216,7 @@ retry_private:
+                       } else if (ret) {
+                               /* -EDEADLK */
+                               this->pi_state = NULL;
+-                              free_pi_state(pi_state);
++                              put_pi_state(pi_state);
+                               goto out_unlock;
+                       }
+               }
+@@ -2223,7 +2225,7 @@ retry_private:
+       }
+ 
+ out_unlock:
+-      free_pi_state(pi_state);
++      put_pi_state(pi_state);
+       double_unlock_hb(hb1, hb2);
+       wake_up_q(&wake_q);
+       hb_waiters_dec(hb2);
+@@ -2277,20 +2279,7 @@ queue_unlock(struct futex_hash_bucket *hb)
+       hb_waiters_dec(hb);
+ }
+ 
+-/**
+- * queue_me() - Enqueue the futex_q on the futex_hash_bucket
+- * @q:        The futex_q to enqueue
+- * @hb:       The destination hash bucket
+- *
+- * The hb->lock must be held by the caller, and is released here. A call to
+- * queue_me() is typically paired with exactly one call to unqueue_me().  The
+- * exceptions involve the PI related operations, which may use unqueue_me_pi()
+- * or nothing if the unqueue is done as part of the wake process and the 
unqueue
+- * state is implicit in the state of woken task (see futex_wait_requeue_pi() 
for
+- * an example).
+- */
+-static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+-      __releases(&hb->lock)
++static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+ {
+       int prio;
+ 
+@@ -2307,6 +2296,24 @@ static inline void queue_me(struct futex_q *q, struct 
futex_hash_bucket *hb)
+       plist_node_init(&q->list, prio);
+       plist_add(&q->list, &hb->chain);
+       q->task = current;
++}
++
++/**
++ * queue_me() - Enqueue the futex_q on the futex_hash_bucket
++ * @q:        The futex_q to enqueue
++ * @hb:       The destination hash bucket
++ *
++ * The hb->lock must be held by the caller, and is released here. A call to
++ * queue_me() is typically paired with exactly one call to unqueue_me().  The
++ * exceptions involve the PI related operations, which may use unqueue_me_pi()
++ * or nothing if the unqueue is done as part of the wake process and the 
unqueue
++ * state is implicit in the state of woken task (see futex_wait_requeue_pi() 
for
++ * an example).
++ */
++static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
++      __releases(&hb->lock)
++{
++      __queue_me(q, hb);
+       spin_unlock(&hb->lock);
+ }
+ 
+@@ -2376,7 +2383,7 @@ static void unqueue_me_pi(struct futex_q *q)
+       __unqueue_futex(q);
+ 
+       BUG_ON(!q->pi_state);
+-      free_pi_state(q->pi_state);
++      put_pi_state(q->pi_state);
+       q->pi_state = NULL;
+ 
+       spin_unlock(q->lock_ptr);
+@@ -2430,10 +2437,22 @@ retry:
+               }
+ 
+               /*
+-               * Since we just failed the trylock; there must be an owner.
++               * The trylock just failed, so either there is an owner or
++               * there is a higher priority waiter than this one.
+                */
+               newowner = rt_mutex_owner(&pi_state->pi_mutex);
+-              BUG_ON(!newowner);
++              /*
++               * If the higher priority waiter has not yet taken over the
++               * rtmutex then newowner is NULL. We can't return here with
++               * that state because it's inconsistent vs. the user space
++               * state. So drop the locks and try again. It's a valid
++               * situation and not any different from the other retry
++               * conditions.
++               */
++              if (unlikely(!newowner)) {
++                      err = -EAGAIN;
++                      goto handle_fault;
++              }
+       } else {
+               WARN_ON_ONCE(argowner != current);
+               if (oldowner == current) {
+@@ -2454,7 +2473,7 @@ retry:
+       if (get_futex_value_locked(&uval, uaddr))
+               goto handle_fault;
+ 
+-      while (1) {
++      for (;;) {
+               newval = (uval & FUTEX_OWNER_DIED) | newtid;
+ 
+               if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+@@ -2812,6 +2831,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
+ {
+       struct hrtimer_sleeper timeout, *to = NULL;
+       struct task_struct *exiting = NULL;
++      struct rt_mutex_waiter rt_waiter;
+       struct futex_hash_bucket *hb;
+       struct futex_q q = futex_q_init;
+       int res, ret;
+@@ -2872,24 +2892,51 @@ retry_private:
+               }
+       }
+ 
++      WARN_ON(!q.pi_state);
++
+       /*
+        * Only actually queue now that the atomic ops are done:
+        */
+-      queue_me(&q, hb);
++      __queue_me(&q, hb);
+ 
+-      WARN_ON(!q.pi_state);
+-      /*
+-       * Block on the PI mutex:
+-       */
+-      if (!trylock) {
+-              ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to);
+-      } else {
++      if (trylock) {
+               ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
+               /* Fixup the trylock return value: */
+               ret = ret ? 0 : -EWOULDBLOCK;
++              goto no_block;
++      }
++
++      /*
++       * We must add ourselves to the rt_mutex waitlist while holding hb->lock
++       * such that the hb and rt_mutex wait lists match.
++       */
++      rt_mutex_init_waiter(&rt_waiter);
++      ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, 
current);
++      if (ret) {
++              if (ret == 1)
++                      ret = 0;
++
++              goto no_block;
+       }
+ 
++      spin_unlock(q.lock_ptr);
++
++      if (unlikely(to))
++              hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
++
++      ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
++
+       spin_lock(q.lock_ptr);
++      /*
++       * If we failed to acquire the lock (signal/timeout), we must
++       * first acquire the hb->lock before removing the lock from the
++       * rt_mutex waitqueue, such that we can keep the hb and rt_mutex
++       * wait lists consistent.
++       */
++      if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, 
&rt_waiter))
++              ret = 0;
++
++no_block:
+       /*
+        * Fixup the pi_state owner and possibly acquire the lock if we
+        * haven't already.
+@@ -2913,8 +2960,10 @@ out_unlock_put_key:
+ out_put_key:
+       put_futex_key(&q.key);
+ out:
+-      if (to)
++      if (to) {
++              hrtimer_cancel(&to->timer);
+               destroy_hrtimer_on_stack(&to->timer);
++      }
+       return ret != -EINTR ? ret : -ERESTARTNOINTR;
+ 
+ uaddr_faulted:
+@@ -2967,10 +3016,39 @@ retry:
+        */
+       match = futex_top_waiter(hb, &key);
+       if (match) {
+-              ret = wake_futex_pi(uaddr, uval, match, hb);
++              struct futex_pi_state *pi_state = match->pi_state;
++
++              ret = -EINVAL;
++              if (!pi_state)
++                      goto out_unlock;
++
++              /*
++               * If current does not own the pi_state then the futex is
++               * inconsistent and user space fiddled with the futex value.
++               */
++              if (pi_state->owner != current)
++                      goto out_unlock;
++
++              get_pi_state(pi_state);
++              /*
++               * Since modifying the wait_list is done while holding both
++               * hb->lock and wait_lock, holding either is sufficient to
++               * observe it.
++               *
++               * By taking wait_lock while still holding hb->lock, we ensure
++               * there is no point where we hold neither; and therefore
++               * wake_futex_pi() must observe a state consistent with what we
++               * observed.
++               */
++              raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
++              spin_unlock(&hb->lock);
++
++              ret = wake_futex_pi(uaddr, uval, pi_state);
++
++              put_pi_state(pi_state);
++
+               /*
+-               * In case of success wake_futex_pi dropped the hash
+-               * bucket lock.
++               * Success, we're done! No tricky corner cases.
+                */
+               if (!ret)
+                       goto out_putkey;
+@@ -2985,7 +3063,6 @@ retry:
+                * setting the FUTEX_WAITERS bit. Try again.
+                */
+               if (ret == -EAGAIN) {
+-                      spin_unlock(&hb->lock);
+                       put_futex_key(&key);
+                       goto retry;
+               }
+@@ -2993,7 +3070,7 @@ retry:
+                * wake_futex_pi has detected invalid state. Tell user
+                * space.
+                */
+-              goto out_unlock;
++              goto out_putkey;
+       }
+ 
+       /*
+@@ -3003,8 +3080,10 @@ retry:
+        * preserve the WAITERS bit not the OWNER_DIED one. We are the
+        * owner.
+        */
+-      if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))
++      if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) {
++              spin_unlock(&hb->lock);
+               goto pi_faulted;
++      }
+ 
+       /*
+        * If uval has changed, let user space handle it.
+@@ -3018,7 +3097,6 @@ out_putkey:
+       return ret;
+ 
+ pi_faulted:
+-      spin_unlock(&hb->lock);
+       put_futex_key(&key);
+ 
+       ret = fault_in_user_writeable(uaddr);
+@@ -3148,10 +3226,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
+        * The waiter is allocated on our stack, manipulated by the requeue
+        * code while we sleep on uaddr.
+        */
+-      debug_rt_mutex_init_waiter(&rt_waiter);
+-      RB_CLEAR_NODE(&rt_waiter.pi_tree_entry);
+-      RB_CLEAR_NODE(&rt_waiter.tree_entry);
+-      rt_waiter.task = NULL;
++      rt_mutex_init_waiter(&rt_waiter);
+ 
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE);
+       if (unlikely(ret != 0))
+@@ -3210,7 +3285,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
+                        * Drop the reference to the pi state which
+                        * the requeue_pi() code acquired for us.
+                        */
+-                      free_pi_state(q.pi_state);
++                      put_pi_state(q.pi_state);
+                       spin_unlock(q.lock_ptr);
+                       /*
+                        * Adjust the return value. It's either -EFAULT or
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 1c0cb5c3c6ad6..532986d82179b 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -163,13 +163,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex 
*lock)
+  * 2) Drop lock->wait_lock
+  * 3) Try to unlock the lock with cmpxchg
+  */
+-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
++                                      unsigned long flags)
+       __releases(lock->wait_lock)
+ {
+       struct task_struct *owner = rt_mutex_owner(lock);
+ 
+       clear_rt_mutex_waiters(lock);
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+       /*
+        * If a new waiter comes in between the unlock and the cmpxchg
+        * we have two situations:
+@@ -211,11 +212,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex 
*lock)
+ /*
+  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
+  */
+-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
++static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
++                                      unsigned long flags)
+       __releases(lock->wait_lock)
+ {
+       lock->owner = NULL;
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+       return true;
+ }
+ #endif
+@@ -497,7 +499,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+       int ret = 0, depth = 0;
+       struct rt_mutex *lock;
+       bool detect_deadlock;
+-      unsigned long flags;
+       bool requeue = true;
+ 
+       detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
+@@ -540,7 +541,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+       /*
+        * [1] Task cannot go away as we did a get_task() before !
+        */
+-      raw_spin_lock_irqsave(&task->pi_lock, flags);
++      raw_spin_lock_irq(&task->pi_lock);
+ 
+       /*
+        * [2] Get the waiter on which @task is blocked on.
+@@ -624,7 +625,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+        * operations.
+        */
+       if (!raw_spin_trylock(&lock->wait_lock)) {
+-              raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++              raw_spin_unlock_irq(&task->pi_lock);
+               cpu_relax();
+               goto retry;
+       }
+@@ -655,7 +656,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+               /*
+                * No requeue[7] here. Just release @task [8]
+                */
+-              raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++              raw_spin_unlock(&task->pi_lock);
+               put_task_struct(task);
+ 
+               /*
+@@ -663,14 +664,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+                * If there is no owner of the lock, end of chain.
+                */
+               if (!rt_mutex_owner(lock)) {
+-                      raw_spin_unlock(&lock->wait_lock);
++                      raw_spin_unlock_irq(&lock->wait_lock);
+                       return 0;
+               }
+ 
+               /* [10] Grab the next task, i.e. owner of @lock */
+               task = rt_mutex_owner(lock);
+               get_task_struct(task);
+-              raw_spin_lock_irqsave(&task->pi_lock, flags);
++              raw_spin_lock(&task->pi_lock);
+ 
+               /*
+                * No requeue [11] here. We just do deadlock detection.
+@@ -685,8 +686,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+               top_waiter = rt_mutex_top_waiter(lock);
+ 
+               /* [13] Drop locks */
+-              raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+-              raw_spin_unlock(&lock->wait_lock);
++              raw_spin_unlock(&task->pi_lock);
++              raw_spin_unlock_irq(&lock->wait_lock);
+ 
+               /* If owner is not blocked, end of chain. */
+               if (!next_lock)
+@@ -707,7 +708,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+       rt_mutex_enqueue(lock, waiter);
+ 
+       /* [8] Release the task */
+-      raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++      raw_spin_unlock(&task->pi_lock);
+       put_task_struct(task);
+ 
+       /*
+@@ -725,14 +726,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+                */
+               if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
+                       wake_up_process(rt_mutex_top_waiter(lock)->task);
+-              raw_spin_unlock(&lock->wait_lock);
++              raw_spin_unlock_irq(&lock->wait_lock);
+               return 0;
+       }
+ 
+       /* [10] Grab the next task, i.e. the owner of @lock */
+       task = rt_mutex_owner(lock);
+       get_task_struct(task);
+-      raw_spin_lock_irqsave(&task->pi_lock, flags);
++      raw_spin_lock(&task->pi_lock);
+ 
+       /* [11] requeue the pi waiters if necessary */
+       if (waiter == rt_mutex_top_waiter(lock)) {
+@@ -786,8 +787,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+       top_waiter = rt_mutex_top_waiter(lock);
+ 
+       /* [13] Drop the locks */
+-      raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock(&task->pi_lock);
++      raw_spin_unlock_irq(&lock->wait_lock);
+ 
+       /*
+        * Make the actual exit decisions [12], based on the stored
+@@ -810,7 +811,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+       goto again;
+ 
+  out_unlock_pi:
+-      raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++      raw_spin_unlock_irq(&task->pi_lock);
+  out_put_task:
+       put_task_struct(task);
+ 
+@@ -820,7 +821,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+ /*
+  * Try to take an rt-mutex
+  *
+- * Must be called with lock->wait_lock held.
++ * Must be called with lock->wait_lock held and interrupts disabled
+  *
+  * @lock:   The lock to be acquired.
+  * @task:   The task which wants to acquire the lock
+@@ -830,8 +831,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct 
*task,
+ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct 
*task,
+                               struct rt_mutex_waiter *waiter)
+ {
+-      unsigned long flags;
+-
+       /*
+        * Before testing whether we can acquire @lock, we set the
+        * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
+@@ -916,7 +915,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, 
struct task_struct *task,
+        * case, but conditionals are more expensive than a redundant
+        * store.
+        */
+-      raw_spin_lock_irqsave(&task->pi_lock, flags);
++      raw_spin_lock(&task->pi_lock);
+       task->pi_blocked_on = NULL;
+       /*
+        * Finish the lock acquisition. @task is the new owner. If
+@@ -925,7 +924,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, 
struct task_struct *task,
+        */
+       if (rt_mutex_has_waiters(lock))
+               rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
+-      raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++      raw_spin_unlock(&task->pi_lock);
+ 
+ takeit:
+       /* We got the lock. */
+@@ -945,7 +944,7 @@ takeit:
+  *
+  * Prepare waiter and propagate pi chain
+  *
+- * This must be called with lock->wait_lock held.
++ * This must be called with lock->wait_lock held and interrupts disabled
+  */
+ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+                                  struct rt_mutex_waiter *waiter,
+@@ -956,7 +955,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+       struct rt_mutex_waiter *top_waiter = waiter;
+       struct rt_mutex *next_lock;
+       int chain_walk = 0, res;
+-      unsigned long flags;
+ 
+       /*
+        * Early deadlock detection. We really don't want the task to
+@@ -970,7 +968,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+       if (owner == task)
+               return -EDEADLK;
+ 
+-      raw_spin_lock_irqsave(&task->pi_lock, flags);
++      raw_spin_lock(&task->pi_lock);
+       __rt_mutex_adjust_prio(task);
+       waiter->task = task;
+       waiter->lock = lock;
+@@ -983,12 +981,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ 
+       task->pi_blocked_on = waiter;
+ 
+-      raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++      raw_spin_unlock(&task->pi_lock);
+ 
+       if (!owner)
+               return 0;
+ 
+-      raw_spin_lock_irqsave(&owner->pi_lock, flags);
++      raw_spin_lock(&owner->pi_lock);
+       if (waiter == rt_mutex_top_waiter(lock)) {
+               rt_mutex_dequeue_pi(owner, top_waiter);
+               rt_mutex_enqueue_pi(owner, waiter);
+@@ -1003,7 +1001,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+       /* Store the lock on which owner is blocked or NULL */
+       next_lock = task_blocked_on_lock(owner);
+ 
+-      raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
++      raw_spin_unlock(&owner->pi_lock);
+       /*
+        * Even if full deadlock detection is on, if the owner is not
+        * blocked itself, we can avoid finding this out in the chain
+@@ -1019,12 +1017,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex 
*lock,
+        */
+       get_task_struct(owner);
+ 
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irq(&lock->wait_lock);
+ 
+       res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
+                                        next_lock, waiter, task);
+ 
+-      raw_spin_lock(&lock->wait_lock);
++      raw_spin_lock_irq(&lock->wait_lock);
+ 
+       return res;
+ }
+@@ -1033,15 +1031,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex 
*lock,
+  * Remove the top waiter from the current tasks pi waiter tree and
+  * queue it up.
+  *
+- * Called with lock->wait_lock held.
++ * Called with lock->wait_lock held and interrupts disabled.
+  */
+ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
+                                   struct rt_mutex *lock)
+ {
+       struct rt_mutex_waiter *waiter;
+-      unsigned long flags;
+ 
+-      raw_spin_lock_irqsave(&current->pi_lock, flags);
++      raw_spin_lock(&current->pi_lock);
+ 
+       waiter = rt_mutex_top_waiter(lock);
+ 
+@@ -1063,7 +1060,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head 
*wake_q,
+        */
+       lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
+ 
+-      raw_spin_unlock_irqrestore(&current->pi_lock, flags);
++      raw_spin_unlock(&current->pi_lock);
+ 
+       wake_q_add(wake_q, waiter->task);
+ }
+@@ -1071,7 +1068,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head 
*wake_q,
+ /*
+  * Remove a waiter from a lock and give up
+  *
+- * Must be called with lock->wait_lock held and
++ * Must be called with lock->wait_lock held and interrupts disabled. I must
+  * have just failed to try_to_take_rt_mutex().
+  */
+ static void remove_waiter(struct rt_mutex *lock,
+@@ -1080,12 +1077,11 @@ static void remove_waiter(struct rt_mutex *lock,
+       bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
+       struct task_struct *owner = rt_mutex_owner(lock);
+       struct rt_mutex *next_lock;
+-      unsigned long flags;
+ 
+-      raw_spin_lock_irqsave(&current->pi_lock, flags);
++      raw_spin_lock(&current->pi_lock);
+       rt_mutex_dequeue(lock, waiter);
+       current->pi_blocked_on = NULL;
+-      raw_spin_unlock_irqrestore(&current->pi_lock, flags);
++      raw_spin_unlock(&current->pi_lock);
+ 
+       /*
+        * Only update priority if the waiter was the highest priority
+@@ -1094,7 +1090,7 @@ static void remove_waiter(struct rt_mutex *lock,
+       if (!owner || !is_top_waiter)
+               return;
+ 
+-      raw_spin_lock_irqsave(&owner->pi_lock, flags);
++      raw_spin_lock(&owner->pi_lock);
+ 
+       rt_mutex_dequeue_pi(owner, waiter);
+ 
+@@ -1106,7 +1102,7 @@ static void remove_waiter(struct rt_mutex *lock,
+       /* Store the lock on which owner is blocked or NULL */
+       next_lock = task_blocked_on_lock(owner);
+ 
+-      raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
++      raw_spin_unlock(&owner->pi_lock);
+ 
+       /*
+        * Don't walk the chain, if the owner task is not blocked
+@@ -1118,12 +1114,12 @@ static void remove_waiter(struct rt_mutex *lock,
+       /* gets dropped in rt_mutex_adjust_prio_chain()! */
+       get_task_struct(owner);
+ 
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irq(&lock->wait_lock);
+ 
+       rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
+                                  next_lock, NULL, current);
+ 
+-      raw_spin_lock(&lock->wait_lock);
++      raw_spin_lock_irq(&lock->wait_lock);
+ }
+ 
+ /*
+@@ -1155,15 +1151,23 @@ void rt_mutex_adjust_pi(struct task_struct *task)
+                                  next_lock, NULL, task);
+ }
+ 
++void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
++{
++      debug_rt_mutex_init_waiter(waiter);
++      RB_CLEAR_NODE(&waiter->pi_tree_entry);
++      RB_CLEAR_NODE(&waiter->tree_entry);
++      waiter->task = NULL;
++}
++
+ /**
+  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+  * @lock:              the rt_mutex to take
+  * @state:             the state the task should block in (TASK_INTERRUPTIBLE
+- *                     or TASK_UNINTERRUPTIBLE)
++ *                     or TASK_UNINTERRUPTIBLE)
+  * @timeout:           the pre-initialized and started timer, or NULL for none
+  * @waiter:            the pre-initialized rt_mutex_waiter
+  *
+- * lock->wait_lock must be held by the caller.
++ * Must be called with lock->wait_lock held and interrupts disabled
+  */
+ static int __sched
+ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+@@ -1191,13 +1195,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+                               break;
+               }
+ 
+-              raw_spin_unlock(&lock->wait_lock);
++              raw_spin_unlock_irq(&lock->wait_lock);
+ 
+               debug_rt_mutex_print_deadlock(waiter);
+ 
+               schedule();
+ 
+-              raw_spin_lock(&lock->wait_lock);
++              raw_spin_lock_irq(&lock->wait_lock);
+               set_current_state(state);
+       }
+ 
+@@ -1234,17 +1238,24 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+                 enum rtmutex_chainwalk chwalk)
+ {
+       struct rt_mutex_waiter waiter;
++      unsigned long flags;
+       int ret = 0;
+ 
+-      debug_rt_mutex_init_waiter(&waiter);
+-      RB_CLEAR_NODE(&waiter.pi_tree_entry);
+-      RB_CLEAR_NODE(&waiter.tree_entry);
++      rt_mutex_init_waiter(&waiter);
+ 
+-      raw_spin_lock(&lock->wait_lock);
++      /*
++       * Technically we could use raw_spin_[un]lock_irq() here, but this can
++       * be called in early boot if the cmpxchg() fast path is disabled
++       * (debug, no architecture support). In this case we will acquire the
++       * rtmutex with lock->wait_lock held. But we cannot unconditionally
++       * enable interrupts in that early boot case. So we need to use the
++       * irqsave/restore variants.
++       */
++      raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ 
+       /* Try to acquire the lock again: */
+       if (try_to_take_rt_mutex(lock, current, NULL)) {
+-              raw_spin_unlock(&lock->wait_lock);
++              raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+               return 0;
+       }
+ 
+@@ -1273,7 +1284,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
+        */
+       fixup_rt_mutex_waiters(lock);
+ 
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+       /* Remove pending timer: */
+       if (unlikely(timeout))
+@@ -1302,6 +1313,7 @@ static inline int __rt_mutex_slowtrylock(struct rt_mutex 
*lock)
+  */
+ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
+ {
++      unsigned long flags;
+       int ret;
+ 
+       /*
+@@ -1313,14 +1325,14 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex 
*lock)
+               return 0;
+ 
+       /*
+-       * The mutex has currently no owner. Lock the wait lock and
+-       * try to acquire the lock.
++       * The mutex has currently no owner. Lock the wait lock and try to
++       * acquire the lock. We use irqsave here to support early boot calls.
+        */
+-      raw_spin_lock(&lock->wait_lock);
++      raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ 
+       ret = __rt_mutex_slowtrylock(lock);
+ 
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+       return ret;
+ }
+@@ -1332,7 +1344,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex 
*lock)
+ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
+                                       struct wake_q_head *wake_q)
+ {
+-      raw_spin_lock(&lock->wait_lock);
++      unsigned long flags;
++
++      /* irqsave required to support early boot calls */
++      raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ 
+       debug_rt_mutex_unlock(lock);
+ 
+@@ -1369,10 +1384,10 @@ static bool __sched rt_mutex_slowunlock(struct 
rt_mutex *lock,
+        */
+       while (!rt_mutex_has_waiters(lock)) {
+               /* Drops lock->wait_lock ! */
+-              if (unlock_rt_mutex_safe(lock) == true)
++              if (unlock_rt_mutex_safe(lock, flags) == true)
+                       return false;
+               /* Relock the rtmutex and try again */
+-              raw_spin_lock(&lock->wait_lock);
++              raw_spin_lock_irqsave(&lock->wait_lock, flags);
+       }
+ 
+       /*
+@@ -1383,7 +1398,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex 
*lock,
+        */
+       mark_wakeup_next_waiter(wake_q, lock);
+ 
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+       /* check PI boosting */
+       return true;
+@@ -1482,19 +1497,6 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex 
*lock)
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+ 
+-/*
+- * Futex variant with full deadlock detection.
+- * Futex variants must not use the fast-path, see __rt_mutex_futex_unlock().
+- */
+-int __sched rt_mutex_timed_futex_lock(struct rt_mutex *lock,
+-                            struct hrtimer_sleeper *timeout)
+-{
+-      might_sleep();
+-
+-      return rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE,
+-                               timeout, RT_MUTEX_FULL_CHAINWALK);
+-}
+-
+ /*
+  * Futex variant, must not use fastpath.
+  */
+@@ -1687,10 +1689,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ {
+       int ret;
+ 
+-      raw_spin_lock(&lock->wait_lock);
++      raw_spin_lock_irq(&lock->wait_lock);
+ 
+       if (try_to_take_rt_mutex(lock, task, NULL)) {
+-              raw_spin_unlock(&lock->wait_lock);
++              raw_spin_unlock_irq(&lock->wait_lock);
+               return 1;
+       }
+ 
+@@ -1711,7 +1713,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+       if (unlikely(ret))
+               remove_waiter(lock, waiter);
+ 
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irq(&lock->wait_lock);
+ 
+       debug_rt_mutex_print_deadlock(waiter);
+ 
+@@ -1761,20 +1763,16 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+ {
+       int ret;
+ 
+-      raw_spin_lock(&lock->wait_lock);
+-
+-      set_current_state(TASK_INTERRUPTIBLE);
+-
++      raw_spin_lock_irq(&lock->wait_lock);
+       /* sleep on the mutex */
++      set_current_state(TASK_INTERRUPTIBLE);
+       ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+-
+       /*
+        * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+        * have to fix that up.
+        */
+       fixup_rt_mutex_waiters(lock);
+-
+-      raw_spin_unlock(&lock->wait_lock);
++      raw_spin_unlock_irq(&lock->wait_lock);
+ 
+       return ret;
+ }
+@@ -1804,15 +1802,32 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+       bool cleanup = false;
+ 
+       raw_spin_lock_irq(&lock->wait_lock);
++      /*
++       * Do an unconditional try-lock, this deals with the lock stealing
++       * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
++       * sets a NULL owner.
++       *
++       * We're not interested in the return value, because the subsequent
++       * test on rt_mutex_owner() will infer that. If the trylock succeeded,
++       * we will own the lock and it will have removed the waiter. If we
++       * failed the trylock, we're still not owner and we need to remove
++       * ourselves.
++       */
++      try_to_take_rt_mutex(lock, current, waiter);
+       /*
+        * Unless we're the owner; we're still enqueued on the wait_list.
+        * So check if we became owner, if not, take us off the wait_list.
+        */
+       if (rt_mutex_owner(lock) != current) {
+               remove_waiter(lock, waiter);
+-              fixup_rt_mutex_waiters(lock);
+               cleanup = true;
+       }
++      /*
++       * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
++       * have to fix that up.
++       */
++      fixup_rt_mutex_waiters(lock);
++
+       raw_spin_unlock_irq(&lock->wait_lock);
+ 
+       return cleanup;
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index 4584db96265d4..97c048c494f00 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -102,6 +102,7 @@ extern struct task_struct *rt_mutex_next_owner(struct 
rt_mutex *lock);
+ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+                                      struct task_struct *proxy_owner);
+ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
++extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
+ extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+                                    struct rt_mutex_waiter *waiter,
+                                    struct task_struct *task);
+@@ -110,7 +111,6 @@ extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+                              struct rt_mutex_waiter *waiter);
+ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+                                struct rt_mutex_waiter *waiter);
+-extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct 
hrtimer_sleeper *to);
+ extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+ extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
+ 

Reply via email to