    Add back pending irqs for apic timer to give guest
    precise APIC timer interrupt number.
    
    Signed-off-by: Yaozu (Eddie) Dong <Eddie.Dong@intel.com>

diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index ed6d20a..8867c82 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -110,7 +110,7 @@ struct kvm_lapic {
 	unsigned long base_address;
 	struct kvm_io_device dev;
 	struct {
-		unsigned long pending;
+		atomic_t pending;
 		s64 period;	/* unit: ns */
 		u32 divide_count;
 		ktime_t last_update;
@@ -153,5 +153,7 @@ int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+void kvm_pt_intr_post(struct kvm_vcpu *vcpu, int vec);
+void kvm_pt_update_irq(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index 352b8a7..cb4d3ca 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -30,6 +30,7 @@
 #include <asm/page.h>
 #include <asm/current.h>
 #include <asm/apicdef.h>
+#include <asm/atomic.h>
 #include "irq.h"
 
 #define PRId64 "d"
@@ -300,6 +301,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			     int vector, int level, int trig_mode)
 {
 	int result = 0;
+	int orig_irr;
 
 	switch (delivery_mode) {
 	case APIC_DM_FIXED:
@@ -308,7 +310,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (unlikely(!apic_enabled(apic)))
 			break;
 
-		if (apic_test_and_set_irr(vector, apic) && trig_mode) {
+		orig_irr = apic_test_and_set_irr(vector, apic);
+		if (orig_irr && trig_mode) {
 			apic_debug("level trig mode repeatedly for vector %d",
 				   vector);
 			break;
@@ -322,7 +325,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
 		kvm_vcpu_kick(apic->vcpu);
 
-		result = 1;
+		result = (orig_irr == 0);
 		break;
 
 	case APIC_DM_REMRD:
@@ -352,18 +355,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	return result;
 }
 
-static inline int apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
-				  int vector, int level, int trig_mode)
-{
-	int result = 0;
-
-	spin_lock_bh(&apic->lock);
-	result = __apic_accept_irq(apic, delivery_mode,
-				   vector, level, trig_mode);
-	spin_unlock_bh(&apic->lock);
-	return result;
-}
-
 struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
 				       unsigned long bitmap)
 {
@@ -595,6 +586,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
 			}
+			atomic_set(&apic->timer.pending, 0);
 
 		}
 		break;
@@ -632,12 +624,14 @@ static void apic_mmio_write(struct kvm_io_device *this,
 			apic->timer.last_update = now;
 			apic->timer.period =
 			    APIC_BUS_CYCLE_NS * apic->timer.divide_count * val;
+			apic_debug("LAPIC period = %lld\n", apic->timer.period);
 
 			/* Make sure the lock ordering is coherent */
 			spin_unlock_bh(&apic->lock);
 			hrtimer_cancel(&apic->timer.dev);
+			atomic_set(&apic->timer.pending, 0);
 			hrtimer_start(&apic->timer.dev,
-				      ktime_add_ns(now, offset),
+				      ktime_add_ns(now, apic->timer.period),
 				      HRTIMER_MODE_ABS);
 
 			apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
@@ -816,7 +810,7 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
 	apic->timer.divide_count = 0;
-	apic->timer.pending = 0;
+	atomic_set(&apic->timer.pending, 0);
 	if (vcpu->vcpu_id == 0)
 		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
@@ -857,38 +851,30 @@ EXPORT_SYMBOL_GPL(kvm_lapic_get_regs);
  * timer interface
  *----------------------------------------------------------------------
  */
+
+/* TODO: make sure __apic_timer_fn runs in current pCPU */
 static int __apic_timer_fn(struct kvm_lapic *apic)
 {
-	u32 vector;
 	int result = 0;
 
-	if (unlikely(!apic_enabled(apic) ||
-		     !apic_lvt_enabled(apic, APIC_LVTT))) {
-		apic_debug("%s: time interrupt although apic is down\n",
-			   __FUNCTION__);
-		return 0;
-	}
-
-	vector = apic_lvt_vector(apic, APIC_LVTT);
-	apic->timer.last_update = apic->timer.dev.expires;
-	apic->timer.pending++;
-	__apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
-
+	atomic_inc (&apic->timer.pending);
 	if (apic_lvtt_period(apic)) {
-		u32 offset;
-		u32 tmict = apic_get_reg(apic, APIC_TMICT);
-
-		offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count * tmict;
-
 		result = 1;
 		apic->timer.dev.expires = ktime_add_ns(
 					apic->timer.dev.expires,
 					apic->timer.period);
 	}
-
 	return result;
 }
 
+static int inject_apic_timer_irq(struct kvm_lapic *apic)
+{
+	int vector;
+
+	vector = apic_lvt_vector(apic, APIC_LVTT);
+	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+}
+
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_lapic *apic;
@@ -963,6 +949,28 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	return highest_irr;
 }
 
+void kvm_pt_update_irq(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->apic;
+
+	if (apic && atomic_read(&apic->timer.pending) > 0) {
+		if (inject_apic_timer_irq(apic))
+			atomic_dec(&apic->timer.pending);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_pt_update_irq);
+
+void kvm_pt_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+	struct kvm_lapic *apic = vcpu->apic;
+
+	if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
+		apic->timer.last_update = ktime_add_ns(
+				apic->timer.last_update,
+				apic->timer.period);
+}
+EXPORT_SYMBOL_GPL(kvm_pt_intr_post);
+
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
@@ -992,7 +1000,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
  	/* TODO: following code can be in a common API */
 	spin_lock_bh(&apic->lock);
  	hrtimer_cancel(&apic->timer.dev);
-	apic->timer.pending = 0;
+	atomic_set(&apic->timer.pending, 0);
 	val = apic_get_reg(apic, APIC_TDCR);
 	tmp = ((val & 0x3) | ((val & 0x8) >> 1)) + 1;
 	apic->timer.divide_count = 0x1 << (tmp & 0x7);
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 96db35a..ec172a1 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1461,7 +1461,9 @@ static void svm_intr_assist(struct vcpu_svm *svm)
 {
 	struct vmcb *vmcb = svm->vmcb;
 	int intr_vector = -1;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 
+	kvm_pt_update_irq(vcpu);
 	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
 	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
 		intr_vector = vmcb->control.exit_int_info &
@@ -1474,7 +1476,7 @@ static void svm_intr_assist(struct vcpu_svm *svm)
 	if (vmcb->control.int_ctl & V_IRQ_MASK)
 		return;
 
-	if (!kvm_cpu_has_interrupt(&svm->vcpu))
+	if (!kvm_cpu_has_interrupt(vcpu))
 		return;
 
 	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
@@ -1486,8 +1488,9 @@ static void svm_intr_assist(struct vcpu_svm *svm)
 		return;
 	}
 	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
-	intr_vector = kvm_cpu_get_interrupt(&svm->vcpu);
+	intr_vector = kvm_cpu_get_interrupt(vcpu);
 	svm_inject_irq(svm, intr_vector);
+	kvm_pt_intr_post(vcpu, intr_vector);
 }
 
 static void kvm_reput_irq(struct vcpu_svm *svm)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 019197b..499bb45 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -2144,7 +2144,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
 	u32 idtv_info_field, intr_info_field;
 	int has_ext_irq, interrupt_window_open;
+	int vector;
 
+	kvm_pt_update_irq(vcpu);
 	has_ext_irq = kvm_cpu_has_interrupt(vcpu);
 	intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
 	idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
@@ -2174,9 +2176,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 	interrupt_window_open =
 		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
 		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
-	if (interrupt_window_open)
-		vmx_inject_irq(vcpu, kvm_cpu_get_interrupt(vcpu));
-	else
+	if (interrupt_window_open) {
+		vector = kvm_cpu_get_interrupt(vcpu);
+		vmx_inject_irq(vcpu, vector);
+		kvm_pt_intr_post(vcpu, vector);
+	} else
 		enable_irq_window(vcpu);
 }
 
