Ideally we want the CPU to be preemptible even when inside or waiting
for a lock. We cannot make it preemptible when inside a lock critical
section, but we can try to make the task voluntarily yield the CPU
when waiting for a lock.

This patch checks the need_sched() flag and yields the CPU when the
preemption count is 1. IOW, the spin_lock() call isn't done in a
region that doesn't allow preemption. Otherwise, it will just perform
RT spinning with a minimum priority of 1.

Signed-off-by: Waiman Long <[email protected]>
---
 kernel/locking/qspinlock_rt.h | 68 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/qspinlock_rt.h b/kernel/locking/qspinlock_rt.h
index 0c4d051..18ec1f8 100644
--- a/kernel/locking/qspinlock_rt.h
+++ b/kernel/locking/qspinlock_rt.h
@@ -43,6 +43,16 @@
  * it will have to break out of the MCS wait queue just like what is done
  * in the OSQ lock. Then it has to retry RT spinning if it has been boosted
  * to RT priority.
+ *
+ * Another RT requirement is that the CPU need to be preemptible even when
+ * waiting for a spinlock. If the task has already acquired the lock, we
+ * will let it run to completion to release the lock and reenable preemption.
+ * For non-nested spinlock, a spinlock waiter will periodically check
+ * need_resched flag to see if it should break out of the waiting loop and
+ * yield the CPU as long as the preemption count indicates just one
+ * preempt_disabled(). For nested spinlock with outer lock acquired, it will
+ * boost its priority to the highest RT priority level to try to acquire the
+ * inner lock, finish up its work, release the locks and reenable preemption.
  */
 #include <linux/sched.h>
 
@@ -51,6 +61,15 @@
 #endif
 
 /*
+ * Rescheduling is only needed when it is in the task context, the
+ * PREEMPT_NEED_RESCHED flag is set and the preemption count is one.
+ * If only the TIF_NEED_RESCHED flag is set, it will be moved to RT
+ * spinning with a minimum priority of 1.
+ */
+#define rt_should_resched()    (preempt_count() == \
+                               (PREEMPT_OFFSET | PREEMPT_NEED_RESCHED))
+
+/*
  * For proper unqueuing from the MCS wait queue, we need to store the encoded
  * tail code as well the previous node pointer into the extra MCS node. Since
  * CPUs in interrupt context won't use the per-CPU MCS nodes anymore. So only
@@ -133,9 +152,12 @@ static bool __rt_spin_trylock(struct qspinlock *lock,
        if (!task)
                min_prio = in_nmi() ? MAX_RT_PRIO + 1
                         : in_irq() ? MAX_RT_PRIO : 1;
+       else if (need_resched() && !min_prio)
+               min_prio = 1;
        if (!(prio = rt_task_priority(task, min_prio)))
                return false;
 
+
        /*
         * Spin on the lock and try to set its priority into the pending byte.
         */
@@ -189,6 +211,33 @@ static bool __rt_spin_trylock(struct qspinlock *lock,
                prio = MAX(ol ? ol->pending : 0,
                           rt_task_priority(task, min_prio));
 
+               /*
+                * If another task needs this CPU, we will yield it if in
+                * the process context and it is not a nested spinlock call.
+                * Otherwise, we will raise our RT priority to try to get
+                * the lock ASAP.
+                */
+               if (!task || !rt_should_resched())
+                       continue;
+
+               if (outerlock) {
+                       if (min_prio < MAX_RT_PRIO)
+                               min_prio = MAX_RT_PRIO;
+                       continue;
+               }
+
+               /*
+                * In the unlikely event that we need to relinquish the CPU,
+                * we need to make sure that we are not the highest priority
+                * task waiting for the lock.
+                */
+               if (mypdprio) {
+                       lockpend = READ_ONCE(l->locked_pending);
+                       pdprio = (u8)(lockpend >> _Q_PENDING_OFFSET);
+                       if (pdprio == mypdprio)
+                               cmpxchg_relaxed(&l->pending, pdprio, 0);
+               }
+               schedule_preempt_disabled();
        }
        return true;
 }
@@ -293,7 +342,7 @@ static bool rt_wait_node_or_unqueue(struct qspinlock *lock,
        rt_write_prev(node, prev);      /* Save previous node pointer */
 
        while (!READ_ONCE(node->locked)) {
-               if (rt_task_priority(current, 0))
+               if (rt_task_priority(current, 0) || need_resched())
                        goto unqueue;
                cpu_relax();
        }
@@ -354,6 +403,12 @@ static bool rt_wait_node_or_unqueue(struct qspinlock *lock,
         */
        __this_cpu_dec(mcs_nodes[0].count);
 
+       /*
+        * Yield the CPU if needed by another task with the right condition.
+        */
+       if (rt_should_resched())
+               schedule_preempt_disabled();
+
        return true;    /* Need to retry RT spinning */
 }
 
@@ -385,9 +440,10 @@ static u32 rt_spin_lock_or_retry(struct qspinlock *lock,
                }
                /*
                 * We need to break out of the non-RT wait queue and do
-                * RT spinnning if we become an RT task.
+                * RT spinnning if we become an RT task or another task needs
+                * the CPU.
                 */
-               if (rt_task_priority(current, 0)) {
+               if (rt_task_priority(current, 0) || need_resched()) {
                        retry = true;
                        goto unlock;
                }
@@ -427,6 +483,12 @@ static u32 rt_spin_lock_or_retry(struct qspinlock *lock,
         */
        __this_cpu_dec(mcs_nodes[0].count);
 
+       /*
+        * Yield the CPU if needed by another task with the right condition.
+        */
+       if (retry && rt_should_resched())
+               schedule_preempt_disabled();
+
        return retry ? RT_RETRY : 1;
 }
 
-- 
1.8.3.1

Reply via email to