The problem:

On -RT, an emulated LAPIC timer instances has the following path:

1) hard interrupt
2) ksoftirqd is scheduled
3) ksoftirqd wakes up vcpu thread
4) vcpu thread is scheduled

This extra context switch introduces unnecessary latency in the 
LAPIC path for a KVM guest.

The solution:

Allow waking up vcpu thread from hardirq context,
thus avoiding the need for ksoftirqd to be scheduled.

Normal waitqueues make use of spinlocks, which on -RT 
are sleepable locks. Therefore, waking up a waitqueue 
waiter involves locking a sleeping lock, which 
is not allowed from hard interrupt context.

cyclictest command line:
# cyclictest -m -n -q -p99 -l 1000000 -h60  -D 1m

This patch reduces the average latency in my tests from 14us to 11us.

Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com>

---
 arch/arm/kvm/arm.c                  |    4 ++--
 arch/arm/kvm/psci.c                 |    4 ++--
 arch/mips/kvm/kvm_mips.c            |    8 ++++----
 arch/powerpc/include/asm/kvm_host.h |    4 ++--
 arch/powerpc/kvm/book3s_hv.c        |   20 ++++++++++----------
 arch/s390/include/asm/kvm_host.h    |    2 +-
 arch/s390/kvm/interrupt.c           |   22 ++++++++++------------
 arch/s390/kvm/sigp.c                |   16 ++++++++--------
 arch/x86/kvm/lapic.c                |    6 +++---
 include/linux/kvm_host.h            |    4 ++--
 virt/kvm/async_pf.c                 |    4 ++--
 virt/kvm/kvm_main.c                 |   16 ++++++++--------
 12 files changed, 54 insertions(+), 56 deletions(-)

Index: linux-stable-rt/arch/arm/kvm/arm.c
===================================================================
--- linux-stable-rt.orig/arch/arm/kvm/arm.c     2014-11-25 14:13:39.188899952 
-0200
+++ linux-stable-rt/arch/arm/kvm/arm.c  2014-11-25 14:14:38.620810092 -0200
@@ -495,9 +495,9 @@
 
 static void vcpu_pause(struct kvm_vcpu *vcpu)
 {
-       wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+       struct swait_head *wq = kvm_arch_vcpu_wq(vcpu);
 
-       wait_event_interruptible(*wq, !vcpu->arch.pause);
+       swait_event_interruptible(*wq, !vcpu->arch.pause);
 }
 
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
Index: linux-stable-rt/arch/arm/kvm/psci.c
===================================================================
--- linux-stable-rt.orig/arch/arm/kvm/psci.c    2014-11-25 14:13:39.189899951 
-0200
+++ linux-stable-rt/arch/arm/kvm/psci.c 2014-11-25 14:14:38.620810092 -0200
@@ -36,7 +36,7 @@
 {
        struct kvm *kvm = source_vcpu->kvm;
        struct kvm_vcpu *vcpu = NULL, *tmp;
-       wait_queue_head_t *wq;
+       struct swait_head *wq;
        unsigned long cpu_id;
        unsigned long mpidr;
        phys_addr_t target_pc;
@@ -80,7 +80,7 @@
        smp_mb();               /* Make sure the above is visible */
 
        wq = kvm_arch_vcpu_wq(vcpu);
-       wake_up_interruptible(wq);
+       swait_wake_interruptible(wq);
 
        return KVM_PSCI_RET_SUCCESS;
 }
Index: linux-stable-rt/arch/mips/kvm/kvm_mips.c
===================================================================
--- linux-stable-rt.orig/arch/mips/kvm/kvm_mips.c       2014-11-25 
14:13:39.191899948 -0200
+++ linux-stable-rt/arch/mips/kvm/kvm_mips.c    2014-11-25 14:14:38.621810091 
-0200
@@ -464,8 +464,8 @@
 
        dvcpu->arch.wait = 0;
 
-       if (waitqueue_active(&dvcpu->wq)) {
-               wake_up_interruptible(&dvcpu->wq);
+       if (swaitqueue_active(&dvcpu->wq)) {
+               swait_wake_interruptible(&dvcpu->wq);
        }
 
        return 0;
@@ -971,8 +971,8 @@
        kvm_mips_callbacks->queue_timer_int(vcpu);
 
        vcpu->arch.wait = 0;
-       if (waitqueue_active(&vcpu->wq)) {
-               wake_up_interruptible(&vcpu->wq);
+       if (swaitqueue_active(&vcpu->wq)) {
+               swait_wake_nterruptible(&vcpu->wq);
        }
 }
 
Index: linux-stable-rt/arch/powerpc/include/asm/kvm_host.h
===================================================================
--- linux-stable-rt.orig/arch/powerpc/include/asm/kvm_host.h    2014-11-25 
14:13:39.193899944 -0200
+++ linux-stable-rt/arch/powerpc/include/asm/kvm_host.h 2014-11-25 
14:14:38.621810091 -0200
@@ -295,7 +295,7 @@
        u8 in_guest;
        struct list_head runnable_threads;
        spinlock_t lock;
-       wait_queue_head_t wq;
+       struct swait_head wq;
        u64 stolen_tb;
        u64 preempt_tb;
        struct kvm_vcpu *runner;
@@ -612,7 +612,7 @@
        u8 prodded;
        u32 last_inst;
 
-       wait_queue_head_t *wqp;
+       struct swait_head *wqp;
        struct kvmppc_vcore *vcore;
        int ret;
        int trap;
Index: linux-stable-rt/arch/powerpc/kvm/book3s_hv.c
===================================================================
--- linux-stable-rt.orig/arch/powerpc/kvm/book3s_hv.c   2014-11-25 
14:13:39.195899942 -0200
+++ linux-stable-rt/arch/powerpc/kvm/book3s_hv.c        2014-11-25 
14:14:38.625810085 -0200
@@ -74,11 +74,11 @@
 {
        int me;
        int cpu = vcpu->cpu;
-       wait_queue_head_t *wqp;
+       struct swait_head *wqp;
 
        wqp = kvm_arch_vcpu_wq(vcpu);
-       if (waitqueue_active(wqp)) {
-               wake_up_interruptible(wqp);
+       if (swaitqueue_active(wqp)) {
+               swait_wake_interruptible(wqp);
                ++vcpu->stat.halt_wakeup;
        }
 
@@ -583,8 +583,8 @@
                tvcpu->arch.prodded = 1;
                smp_mb();
                if (vcpu->arch.ceded) {
-                       if (waitqueue_active(&vcpu->wq)) {
-                               wake_up_interruptible(&vcpu->wq);
+                       if (swaitqueue_active(&vcpu->wq)) {
+                               swait_wake_interruptible(&vcpu->wq);
                                vcpu->stat.halt_wakeup++;
                        }
                }
@@ -1199,7 +1199,7 @@
                if (vcore) {
                        INIT_LIST_HEAD(&vcore->runnable_threads);
                        spin_lock_init(&vcore->lock);
-                       init_waitqueue_head(&vcore->wq);
+                       init_swait_head(&vcore->wq);
                        vcore->preempt_tb = TB_NIL;
                        vcore->lpcr = kvm->arch.lpcr;
                        vcore->first_vcpuid = core * threads_per_core;
@@ -1566,13 +1566,13 @@
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
-       DEFINE_WAIT(wait);
+       DEFINE_SWAITER(wait);
 
-       prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+       swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE);
        vc->vcore_state = VCORE_SLEEPING;
        spin_unlock(&vc->lock);
        schedule();
-       finish_wait(&vc->wq, &wait);
+       swait_finish(&vc->wq, &wait);
        spin_lock(&vc->lock);
        vc->vcore_state = VCORE_INACTIVE;
 }
@@ -1613,7 +1613,7 @@
                        kvmppc_create_dtl_entry(vcpu, vc);
                        kvmppc_start_thread(vcpu);
                } else if (vc->vcore_state == VCORE_SLEEPING) {
-                       wake_up(&vc->wq);
+                       swait_wake(&vc->wq);
                }
 
        }
Index: linux-stable-rt/arch/s390/include/asm/kvm_host.h
===================================================================
--- linux-stable-rt.orig/arch/s390/include/asm/kvm_host.h       2014-11-25 
14:13:39.196899940 -0200
+++ linux-stable-rt/arch/s390/include/asm/kvm_host.h    2014-11-25 
14:14:38.627810082 -0200
@@ -233,7 +233,7 @@
        atomic_t active;
        struct kvm_s390_float_interrupt *float_int;
        int timer_due; /* event indicator for waitqueue below */
-       wait_queue_head_t *wq;
+       struct swait_head *wq;
        atomic_t *cpuflags;
        unsigned int action_bits;
 };
Index: linux-stable-rt/arch/s390/kvm/interrupt.c
===================================================================
--- linux-stable-rt.orig/arch/s390/kvm/interrupt.c      2014-11-25 
14:13:39.197899939 -0200
+++ linux-stable-rt/arch/s390/kvm/interrupt.c   2014-11-25 14:14:38.630810077 
-0200
@@ -403,7 +403,7 @@
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 {
        u64 now, sltime;
-       DECLARE_WAITQUEUE(wait, current);
+       DECLARE_SWAITER(wait);
 
        vcpu->stat.exit_wait_state++;
        if (kvm_cpu_has_interrupt(vcpu))
@@ -440,12 +440,11 @@
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        spin_lock(&vcpu->arch.local_int.float_int->lock);
        spin_lock_bh(&vcpu->arch.local_int.lock);
-       add_wait_queue(&vcpu->wq, &wait);
+       swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
        while (list_empty(&vcpu->arch.local_int.list) &&
                list_empty(&vcpu->arch.local_int.float_int->list) &&
                (!vcpu->arch.local_int.timer_due) &&
                !signal_pending(current)) {
-               set_current_state(TASK_INTERRUPTIBLE);
                spin_unlock_bh(&vcpu->arch.local_int.lock);
                spin_unlock(&vcpu->arch.local_int.float_int->lock);
                schedule();
@@ -453,8 +452,7 @@
                spin_lock_bh(&vcpu->arch.local_int.lock);
        }
        __unset_cpu_idle(vcpu);
-       __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&vcpu->wq, &wait);
+       swait_finish(&vcpu->wq, &wait);
        spin_unlock_bh(&vcpu->arch.local_int.lock);
        spin_unlock(&vcpu->arch.local_int.float_int->lock);
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -469,8 +467,8 @@
 
        spin_lock(&vcpu->arch.local_int.lock);
        vcpu->arch.local_int.timer_due = 1;
-       if (waitqueue_active(&vcpu->wq))
-               wake_up_interruptible(&vcpu->wq);
+       if (swaitqueue_active(&vcpu->wq))
+               swait_wake_interruptible(&vcpu->wq);
        spin_unlock(&vcpu->arch.local_int.lock);
 }
 
@@ -617,7 +615,7 @@
        spin_lock_bh(&li->lock);
        list_add(&inti->list, &li->list);
        atomic_set(&li->active, 1);
-       BUG_ON(waitqueue_active(li->wq));
+       BUG_ON(swaitqueue_active(li->wq));
        spin_unlock_bh(&li->lock);
        return 0;
 }
@@ -750,8 +748,8 @@
        li = fi->local_int[sigcpu];
        spin_lock_bh(&li->lock);
        atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-       if (waitqueue_active(li->wq))
-               wake_up_interruptible(li->wq);
+       if (swaitqueue_active(li->wq))
+               swait_wake_interruptible(li->wq);
        spin_unlock_bh(&li->lock);
        spin_unlock(&fi->lock);
        mutex_unlock(&kvm->lock);
@@ -836,8 +834,8 @@
        if (inti->type == KVM_S390_SIGP_STOP)
                li->action_bits |= ACTION_STOP_ON_STOP;
        atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-       if (waitqueue_active(&vcpu->wq))
-               wake_up_interruptible(&vcpu->wq);
+       if (swaitqueue_active(&vcpu->wq))
+               swait_wake_interruptible(&vcpu->wq);
        spin_unlock_bh(&li->lock);
        mutex_unlock(&vcpu->kvm->lock);
        return 0;
Index: linux-stable-rt/arch/s390/kvm/sigp.c
===================================================================
--- linux-stable-rt.orig/arch/s390/kvm/sigp.c   2014-11-25 14:13:39.198899937 
-0200
+++ linux-stable-rt/arch/s390/kvm/sigp.c        2014-11-25 14:14:38.633810073 
-0200
@@ -79,8 +79,8 @@
        list_add_tail(&inti->list, &li->list);
        atomic_set(&li->active, 1);
        atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-       if (waitqueue_active(li->wq))
-               wake_up_interruptible(li->wq);
+       if (swaitqueue_active(li->wq))
+               swait_wake_interruptible(li->wq);
        spin_unlock_bh(&li->lock);
        rc = SIGP_CC_ORDER_CODE_ACCEPTED;
        VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
@@ -148,8 +148,8 @@
        list_add_tail(&inti->list, &li->list);
        atomic_set(&li->active, 1);
        atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-       if (waitqueue_active(li->wq))
-               wake_up_interruptible(li->wq);
+       if (swaitqueue_active(li->wq))
+               swait_wake_interruptible(li->wq);
        spin_unlock_bh(&li->lock);
        rc = SIGP_CC_ORDER_CODE_ACCEPTED;
        VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
@@ -179,8 +179,8 @@
        atomic_set(&li->active, 1);
        atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
        li->action_bits |= action;
-       if (waitqueue_active(li->wq))
-               wake_up_interruptible(li->wq);
+       if (swaitqueue_active(li->wq))
+               swait_wake_interruptible(li->wq);
 out:
        spin_unlock_bh(&li->lock);
 
@@ -288,8 +288,8 @@
 
        list_add_tail(&inti->list, &li->list);
        atomic_set(&li->active, 1);
-       if (waitqueue_active(li->wq))
-               wake_up_interruptible(li->wq);
+       if (swaitqueue_active(li->wq))
+               swait_wake_interruptible(li->wq);
        rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
        VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
Index: linux-stable-rt/arch/x86/kvm/lapic.c
===================================================================
--- linux-stable-rt.orig/arch/x86/kvm/lapic.c   2014-11-25 14:13:39.199899935 
-0200
+++ linux-stable-rt/arch/x86/kvm/lapic.c        2014-11-25 14:14:38.636810068 
-0200
@@ -1533,7 +1533,7 @@
        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, 
lapic_timer);
        struct kvm_vcpu *vcpu = apic->vcpu;
-       wait_queue_head_t *q = &vcpu->wq;
+       struct swait_head *q = &vcpu->wq;
 
        /*
         * There is a race window between reading and incrementing, but we do
@@ -1547,8 +1547,8 @@
                kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
        }
 
-       if (waitqueue_active(q))
-               wake_up_interruptible(q);
+       if (swaitqueue_active(q))
+               swait_wake_interruptible(q);
 
        if (lapic_is_periodic(apic)) {
                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
Index: linux-stable-rt/include/linux/kvm_host.h
===================================================================
--- linux-stable-rt.orig/include/linux/kvm_host.h       2014-11-25 
14:13:39.201899932 -0200
+++ linux-stable-rt/include/linux/kvm_host.h    2014-11-25 14:14:38.638810065 
-0200
@@ -231,7 +231,7 @@
 
        int fpu_active;
        int guest_fpu_loaded, guest_xcr0_loaded;
-       wait_queue_head_t wq;
+       struct swait_head wq;
        struct pid *pid;
        int sigset_active;
        sigset_t sigset;
@@ -677,7 +677,7 @@
 }
 #endif
 
-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct swait_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
        return vcpu->arch.wqp;
Index: linux-stable-rt/virt/kvm/async_pf.c
===================================================================
--- linux-stable-rt.orig/virt/kvm/async_pf.c    2014-11-25 14:13:39.202899931 
-0200
+++ linux-stable-rt/virt/kvm/async_pf.c 2014-11-25 14:14:38.639810063 -0200
@@ -82,8 +82,8 @@
 
        trace_kvm_async_pf_completed(addr, gva);
 
-       if (waitqueue_active(&vcpu->wq))
-               wake_up_interruptible(&vcpu->wq);
+       if (swaitqueue_active(&vcpu->wq))
+               swait_wake_interruptible(&vcpu->wq);
 
        mmput(mm);
        kvm_put_kvm(vcpu->kvm);
Index: linux-stable-rt/virt/kvm/kvm_main.c
===================================================================
--- linux-stable-rt.orig/virt/kvm/kvm_main.c    2014-11-25 14:13:39.204899928 
-0200
+++ linux-stable-rt/virt/kvm/kvm_main.c 2014-11-25 14:14:38.641810060 -0200
@@ -219,7 +219,7 @@
        vcpu->kvm = kvm;
        vcpu->vcpu_id = id;
        vcpu->pid = NULL;
-       init_waitqueue_head(&vcpu->wq);
+       init_swait_head(&vcpu->wq);
        kvm_async_pf_vcpu_init(vcpu);
 
        page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -1675,10 +1675,10 @@
  */
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
-       DEFINE_WAIT(wait);
+       DEFINE_SWAITER(wait);
 
        for (;;) {
-               prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+               swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
                if (kvm_arch_vcpu_runnable(vcpu)) {
                        kvm_make_request(KVM_REQ_UNHALT, vcpu);
@@ -1692,7 +1692,7 @@
                schedule();
        }
 
-       finish_wait(&vcpu->wq, &wait);
+       swait_finish(&vcpu->wq, &wait);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
@@ -1704,11 +1704,11 @@
 {
        int me;
        int cpu = vcpu->cpu;
-       wait_queue_head_t *wqp;
+       struct swait_head *wqp;
 
        wqp = kvm_arch_vcpu_wq(vcpu);
-       if (waitqueue_active(wqp)) {
-               wake_up_interruptible(wqp);
+       if (swaitqueue_active(wqp)) {
+               swait_wake_interruptible(wqp);
                ++vcpu->stat.halt_wakeup;
        }
 
@@ -1814,7 +1814,7 @@
                                continue;
                        if (vcpu == me)
                                continue;
-                       if (waitqueue_active(&vcpu->wq))
+                       if (swaitqueue_active(&vcpu->wq))
                                continue;
                        if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
                                continue;


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to