An over-committed guest with more vCPUs than pCPUs has a heavy overload
in osq_lock().

This is because vCPU A hold the osq lock and yield out, vCPU B wait
per_cpu node->locked to be set. IOW, vCPU B wait vCPU A to run and
unlock the osq lock.

So lets also use neet_yield_to() to detect if we need stop the spinning

Signed-off-by: Pan Xinhui <[email protected]>
---
 kernel/locking/osq_lock.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 05a3785..4287603 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -21,6 +21,11 @@ static inline int encode_cpu(int cpu_nr)
        return cpu_nr + 1;
 }
 
+static inline int node_cpu(struct optimistic_spin_node *node)
+{
+       return node->cpu - 1;
+}
+
 static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
 {
        int cpu_nr = encoded_cpu_val - 1;
@@ -84,9 +89,10 @@ osq_wait_next(struct optimistic_spin_queue *lock,
 bool osq_lock(struct optimistic_spin_queue *lock)
 {
        struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
-       struct optimistic_spin_node *prev, *next;
+       struct optimistic_spin_node *prev, *next, *prev_old;
        int curr = encode_cpu(smp_processor_id());
        int old;
+       unsigned int yield_count;
 
        node->locked = 0;
        node->next = NULL;
@@ -114,14 +120,20 @@ bool osq_lock(struct optimistic_spin_queue *lock)
         * guaranteed their existence -- this allows us to apply
         * cmpxchg in an attempt to undo our queueing.
         */
-
+       prev_old = prev;
+       yield_count = vcpu_get_yield_count(node_cpu(prev));
        while (!READ_ONCE(node->locked)) {
                /*
                 * If we need to reschedule bail... so we can block.
                 */
-               if (need_resched())
+               if (need_resched() ||
+                       need_yield_to(node_cpu(prev), yield_count))
                        goto unqueue;
 
+               prev = READ_ONCE(node->prev);
+               if (prev != prev_old)
+                       yield_count = vcpu_get_yield_count(node_cpu(prev));
+
                cpu_relax_lowlatency();
        }
        return true;
-- 
2.4.11

Reply via email to