The PIT injection logic is problematic under the following cases:

1) If there is a higher priority vector to be delivered by the time
kvm_pit_timer_intr_post is invoked ps->inject_pending won't be set. 
This opens the possibility for missing many PIT event injections (say if
guest executes hlt at this point).

2) ps->inject_pending is racy with more than two vcpus. Since there's no locking
around read/dec of pt->pending, two vcpu's can inject two interrupts for a 
single
pt->pending count.

Fix 1 by using an irq ack notifier: only reinject when the previous irq 
has been acked. Fix 2 with appropriate locking around manipulation of 
pending count and irq_ack by the injection / ack paths.

Also, count_load_time should be set at the time the count is reloaded,
not when the interrupt is injected (BTW, LAPIC uses the same apparently
broken scheme, could someone explain what was the reasoning behind
that? kvm_apic_timer_intr_post).

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -207,6 +207,8 @@ static int __pit_timer_fn(struct kvm_kpi
 
        pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
        pt->scheduled = ktime_to_ns(pt->timer.expires);
+       if (pt->period)
+               ps->channels[0].count_load_time = pt->timer.expires;
 
        return (pt->period == 0 ? 0 : 1);
 }
@@ -215,12 +217,22 @@ int pit_has_pending_timer(struct kvm_vcp
 {
        struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 
-       if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending)
+       if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
                return atomic_read(&pit->pit_state.pit_timer.pending);
-
        return 0;
 }
 
+void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+       struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
+                                                irq_ack_notifier);
+       spin_lock(&ps->inject_lock);
+       if (atomic_dec_return(&ps->pit_timer.pending) < 0)
+               WARN_ON(1);
+       ps->irq_ack = 1;
+       spin_unlock(&ps->inject_lock);
+}
+
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
        struct kvm_kpit_state *ps;
@@ -255,8 +267,9 @@ static void destroy_pit_timer(struct kvm
        hrtimer_cancel(&pt->timer);
 }
 
-static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
+       struct kvm_kpit_timer *pt = &ps->pit_timer;
        s64 interval;
 
        interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -268,6 +281,7 @@ static void create_pit_timer(struct kvm_
        pt->period = (is_period == 0) ? 0 : interval;
        pt->timer.function = pit_timer_fn;
        atomic_set(&pt->pending, 0);
+       ps->irq_ack = 1;
 
        hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
                      HRTIMER_MODE_ABS);
@@ -302,11 +316,11 @@ static void pit_load_count(struct kvm *k
        case 1:
         /* FIXME: enhance mode 4 precision */
        case 4:
-               create_pit_timer(&ps->pit_timer, val, 0);
+               create_pit_timer(ps, val, 0);
                break;
        case 2:
        case 3:
-               create_pit_timer(&ps->pit_timer, val, 1);
+               create_pit_timer(ps, val, 1);
                break;
        default:
                destroy_pit_timer(&ps->pit_timer);
@@ -520,7 +534,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
        mutex_unlock(&pit->pit_state.lock);
 
        atomic_set(&pit->pit_state.pit_timer.pending, 0);
-       pit->pit_state.inject_pending = 1;
+       pit->pit_state.irq_ack = 1;
 }
 
 struct kvm_pit *kvm_create_pit(struct kvm *kvm)
@@ -534,6 +548,7 @@ struct kvm_pit *kvm_create_pit(struct kv
 
        mutex_init(&pit->pit_state.lock);
        mutex_lock(&pit->pit_state.lock);
+       spin_lock_init(&pit->pit_state.inject_lock);
 
        /* Initialize PIO device */
        pit->dev.read = pit_ioport_read;
@@ -555,6 +570,9 @@ struct kvm_pit *kvm_create_pit(struct kv
        pit_state->pit = pit;
        hrtimer_init(&pit_state->pit_timer.timer,
                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+       pit_state->irq_ack_notifier.gsi = 0;
+       pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
+       kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
        mutex_unlock(&pit->pit_state.lock);
 
        kvm_pit_reset(pit);
@@ -592,37 +610,19 @@ void kvm_inject_pit_timer_irqs(struct kv
        struct kvm_kpit_state *ps;
 
        if (vcpu && pit) {
+               int inject = 0;
                ps = &pit->pit_state;
 
-               /* Try to inject pending interrupts when:
-                * 1. Pending exists
-                * 2. Last interrupt was accepted or waited for too long time*/
-               if (atomic_read(&ps->pit_timer.pending) &&
-                   (ps->inject_pending ||
-                   (jiffies - ps->last_injected_time
-                               >= KVM_MAX_PIT_INTR_INTERVAL))) {
-                       ps->inject_pending = 0;
-                       __inject_pit_timer_intr(kvm);
-                       ps->last_injected_time = jiffies;
-               }
-       }
-}
-
-void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-       struct kvm_arch *arch = &vcpu->kvm->arch;
-       struct kvm_kpit_state *ps;
-
-       if (vcpu && arch->vpit) {
-               ps = &arch->vpit->pit_state;
-               if (atomic_read(&ps->pit_timer.pending) &&
-               (((arch->vpic->pics[0].imr & 1) == 0 &&
-                 arch->vpic->pics[0].irq_base == vec) ||
-                 (arch->vioapic->redirtbl[0].fields.vector == vec &&
-                 arch->vioapic->redirtbl[0].fields.mask != 1))) {
-                       ps->inject_pending = 1;
-                       atomic_dec(&ps->pit_timer.pending);
-                       ps->channels[0].count_load_time = ktime_get();
+               /* Try to inject pending interrupts when
+                * last one has been acked.
+                */
+               spin_lock(&ps->inject_lock);
+               if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
+                       ps->irq_ack = 0;
+                       inject = 1;
                }
+               spin_unlock(&ps->inject_lock);
+               if (inject)
+                       __inject_pit_timer_intr(kvm);
        }
 }
Index: kvm/arch/x86/kvm/i8254.h
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.h
+++ kvm/arch/x86/kvm/i8254.h
@@ -8,7 +8,6 @@ struct kvm_kpit_timer {
        int irq;
        s64 period; /* unit: ns */
        s64 scheduled;
-       ktime_t last_update;
        atomic_t pending;
 };
 
@@ -34,8 +33,9 @@ struct kvm_kpit_state {
        u32    speaker_data_on;
        struct mutex lock;
        struct kvm_pit *pit;
-       bool inject_pending; /* if inject pending interrupts */
-       unsigned long last_injected_time;
+       spinlock_t inject_lock;
+       unsigned long irq_ack;
+       struct kvm_irq_ack_notifier irq_ack_notifier;
 };
 
 struct kvm_pit {
@@ -54,7 +54,6 @@ struct kvm_pit {
 #define KVM_PIT_CHANNEL_MASK       0x3
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
 struct kvm_pit *kvm_create_pit(struct kvm *kvm);
 void kvm_free_pit(struct kvm *kvm);
Index: kvm/arch/x86/kvm/irq.c
===================================================================
--- kvm.orig/arch/x86/kvm/irq.c
+++ kvm/arch/x86/kvm/irq.c
@@ -90,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_tim
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 {
        kvm_apic_timer_intr_post(vcpu, vec);
-       kvm_pit_timer_intr_post(vcpu, vec);
        /* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_timer_intr_post);

-- 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to