On Thu, Jan 15, 2009 at 03:20:06PM +0800, Sheng Yang wrote:
> > +    * Since reinjection is not rate-limited, use the delay
> > +    * to inject the last interrupt as an estimate.
> > +    */
> > +   if (unlikely(atomic_read(&apic->timer.pending) > 0)) {
> > +           remaining = apic->timer.injection_delay;
> > +           if (ktime_to_ns(remaining) > apic->timer.period)
> > +                   remaining = ns_to_ktime(apic->timer.period);
> > +        } else
> > +           remaining = hrtimer_expires_remaining(&apic->timer.dev);
> 
> A little doubt...
> 
> A: time_fire
> B: intr_post
> C: read TMCCT
> 
> The sequence can be ABC or ACB.
> 
> injection_delay = time(B) - time(A)
> 
> So it didn't count time from read TMCCT... And guest get interrupt at 
> time(B), 
> not quite understand why time(B) - time(A) matters here...

Its an estimate of the delay it takes to inject an interrupt to the
guest once fired. Its only used if there have been accumulated ones, so
ACB sequence with pending=0 will use hrtimer_expires_remaining().

> I think the reasonable here means, this interval is usable later after the 
> accumulated interrupts are injected. From this point of view, I think current 
> solution is reasonable. It just assume the delayed interrupts have been 
> injected.
> 
> However, seriously, any value here is wrong, no elegant one. 

I agree.

> But I still prefer to the current solution...

Why? The proposed version is smaller and simpler than the current
one IMO, and also not vulnerable to whatever bug is causing now <
last_update.

And more precise, since the current scheme uses interrupt injection time
as if it was "count-down restart" time, which is not true.

> And here is not the really problem for now I think. The current mechanism is 
> mostly OK, but where is the bug... We have either have a simple fix(e.g. if 
> now < last_update, then return 0) or dig into it. Did it worth a try? Anyway, 
> it would return a buggy result if we have pending interrupts...

The overflow calculation is not necessary as discussed. Alexander, can
you please try the following? Sheng, do you have any other suggestion to
test?

/proc/timer_list output of the host when ESX is running too.




diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c..e9f13cc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -509,6 +509,37 @@ static void apic_send_ipi(struct kvm_lapic *apic)
        }
 }
 
+
+static void record_last_update(struct kvm_lapic *apic)
+{
+       int rec_idx = apic->timer.rec_idx;
+
+       apic->timer.rec[rec_idx].when = ktime_get();
+       apic->timer.rec[rec_idx].last_update = apic->timer.last_update;
+       apic->timer.rec[rec_idx].pending = atomic_read(&apic->timer.pending);
+       apic->timer.rec_idx++;
+
+       if (apic->timer.rec_idx >= KVM_LAPIC_REC_NR)
+               apic->timer.rec_idx = 0;
+}
+
+static void print_last_update_record(struct kvm_lapic *apic)
+{
+       int iter = 0;
+       int i = apic->timer.rec_idx;
+
+       while (iter < KVM_LAPIC_REC_NR) {
+               printk("rec[%d] when=%lld last_update=%lld pend=%d\n",
+                       i, ktime_to_ns(apic->timer.rec[i].when),
+                       ktime_to_ns(apic->timer.rec[i].last_update),
+                       apic->timer.rec[i].pending);
+               iter++;
+               i--;
+               if (i < 0)
+                       i = KVM_LAPIC_REC_NR-1;
+       }
+}
+
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
        u64 counter_passed;
@@ -532,7 +563,10 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
                                    .tv64 = KTIME_MAX -
                                    (apic->timer.last_update).tv64}; }
                                   ), now);
-               apic_debug("time elapsed\n");
+               printk("last_update = %lld now = %lld\n",
+                       ktime_to_ns(apic->timer.last_update),
+                       ktime_to_ns(now));
+               print_last_update_record(apic);
        } else
                passed = ktime_sub(now, apic->timer.last_update);
 
@@ -544,14 +578,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
                        /* one-shot timers stick at 0 until reset */
                        tmcct = 0;
                } else {
-                       /*
-                        * periodic timers reset to APIC_TMICT when they
-                        * hit 0. The while loop simulates this happening N
-                        * times. (counter_passed %= tmcct) would also work,
-                        * but might be slower or not work on 32-bit??
-                        */
-                       while (counter_passed > tmcct)
-                               counter_passed -= tmcct;
+                       counter_passed %= (u64)tmcct;
                        tmcct -= counter_passed;
                }
        } else {
@@ -666,7 +693,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
                      ktime_add_ns(now, apic->timer.period),
                      HRTIMER_MODE_ABS);
 
-       apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+       printk("%s: bus cycle is %" PRId64 "ns, now 0x%016"
                           PRIx64 ", "
                           "timer initial count 0x%x, period %lldns, "
                           "expire @ 0x%016" PRIx64 ".\n", __func__,
@@ -1114,10 +1141,12 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, 
int vec)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
-       if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
+       if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) {
                apic->timer.last_update = ktime_add_ns(
                                apic->timer.last_update,
                                apic->timer.period);
+               record_last_update(apic);
+       }
 }
 
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 8185888..27baadb 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -5,6 +5,8 @@
 
 #include <linux/kvm_host.h>
 
+#define KVM_LAPIC_REC_NR 50
+
 struct kvm_lapic {
        unsigned long base_address;
        struct kvm_io_device dev;
@@ -13,6 +15,13 @@ struct kvm_lapic {
                s64 period;     /* unit: ns */
                u32 divide_count;
                ktime_t last_update;
+               struct {
+                       ktime_t when;
+                       ktime_t last_update;
+                       int pending;
+               } rec[KVM_LAPIC_REC_NR];
+               int rec_idx;
+
                struct hrtimer dev;
        } timer;
        struct kvm_vcpu *vcpu;

Reply via email to