Re: [PATCH RFC V3 6/6] KVM: LAPIC: Add APIC Timer periodic/oneshot mode VMX preemption timer support

2016-10-13 Thread Wanpeng Li
2016-10-13 20:35 GMT+08:00 Paolo Bonzini :
>
>
> On 13/10/2016 13:34, Wanpeng Li wrote:
>> From: Wanpeng Li 
>>
>> Most windows guests still utilize APIC Timer periodic/oneshot mode
>> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
>> mode are still emulated by high overhead hrtimer on host. This patch
>> converts the expected expire time of the periodic/oneshot mode to
>> guest deadline tsc in order to leverage VMX preemption timer logic
>> for APIC Timer tsc-deadline mode. After each preemption timer vmexit
>> preemption timer is restarted to emulate LVTT current-count register
>> is automatically reloaded from the initial-count register when the
>> count reaches 0.
>>
>> Cc: Paolo Bonzini 
>> Cc: Radim Krčmář 
>> Cc: Yunhong Jiang 
>> Signed-off-by: Wanpeng Li 
>> ---
>>  arch/x86/kvm/lapic.c | 100 
>> ---
>>  1 file changed, 39 insertions(+), 61 deletions(-)
>>
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index e93e549..7663246 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -1090,7 +1090,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>>
>>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
>>  {
>> - ktime_t remaining;
>> + ktime_t remaining, now;
>>   s64 ns;
>>   u32 tmcct;
>>
>> @@ -1101,7 +1101,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>>   apic->lapic_timer.period == 0)
>>   return 0;
>>
>> - remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
>> + now = apic->lapic_timer.timer.base->get_time();
>> + remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
>>   if (ktime_to_ns(remaining) < 0)
>>   remaining = ktime_set(0, 0);
>>
>> @@ -1349,46 +1350,9 @@ static void start_sw_tscdeadline(struct kvm_lapic 
>> *apic)
>>
>>  static void start_sw_period(struct kvm_lapic *apic)
>>  {
>> - ktime_t now;
>> -
>> - /* lapic timer in oneshot or periodic mode */
>> - now = apic->lapic_timer.timer.base->get_time();
>> - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
>> - * APIC_BUS_CYCLE_NS * apic->divide_count;
>> -
>> - if (!apic->lapic_timer.period)
>> - return;
>> - /*
>> -  * Do not allow the guest to program periodic timers with small
>> -  * interval, since the hrtimers are not throttled by the host
>> -  * scheduler.
>> -  */
>> - if (apic_lvtt_period(apic)) {
>> - s64 min_period = min_timer_period_us * 1000LL;
>> -
>> - if (apic->lapic_timer.period < min_period) {
>> - pr_info_ratelimited(
>> - "kvm: vcpu %i: requested %lld ns "
>> - "lapic timer period limited to %lld ns\n",
>> - apic->vcpu->vcpu_id,
>> - apic->lapic_timer.period, min_period);
>> - apic->lapic_timer.period = min_period;
>> - }
>> - }
>> -
>>   hrtimer_start(&apic->lapic_timer.timer,
>> -   ktime_add_ns(now, apic->lapic_timer.period),
>> -   HRTIMER_MODE_ABS_PINNED);
>> -
>> - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
>> -PRIx64 ", "
>> -"timer initial count 0x%x, period %lldns, "
>> -"expire @ 0x%016" PRIx64 ".\n", __func__,
>> -APIC_BUS_CYCLE_NS, ktime_to_ns(now),
>> -kvm_lapic_get_reg(apic, APIC_TMICT),
>> -apic->lapic_timer.period,
>> -ktime_to_ns(ktime_add_ns(now,
>> - apic->lapic_timer.period)));
>> + apic->lapic_timer.target_expiration,
>> + HRTIMER_MODE_ABS_PINNED);
>>  }
>>
>>  static bool set_target_expiration(struct kvm_lapic *apic)
>> @@ -1453,22 +1417,12 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
>>   apic->lapic_timer.hv_timer_in_use = false;
>>  }
>>
>> -void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
>> -{
>> - struct kvm_lapic *apic = vcpu->arch.apic;
>> -
>> - WARN_ON(!apic->lapic_timer.hv_timer_in_use);
>> - WARN_ON(swait_active(&vcpu->wq));
>> - cancel_hv_timer(apic);
>> - apic_timer_expired(apic);
>> -}
>> -EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
>> -
>>  static bool start_hv_timer(struct kvm_lapic *apic)
>>  {
>>   u64 tscdeadline = apic->lapic_timer.tscdeadline;
>
> I think things would be simpler if you change this to:
>
> if (!kvm_x86_ops->set_hv_timer)
> return false;
>
> if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
> if (!set_target_expiration(apic))
> return true;
> }

set_target_expiration() in start_hv_timer() is not correct as pointed
out by Radim. https://lkml.org/lkml/2016/10/12/93

Regards,
Wanpeng Li

>
> tscdeadline = apic->lapic_timer.tscdeadline;
>
> You c

Re: [PATCH RFC V3 6/6] KVM: LAPIC: Add APIC Timer periodic/oneshot mode VMX preemption timer support

2016-10-13 Thread Paolo Bonzini


On 13/10/2016 13:34, Wanpeng Li wrote:
> From: Wanpeng Li 
> 
> Most windows guests still utilize APIC Timer periodic/oneshot mode 
> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot 
> mode are still emulated by high overhead hrtimer on host. This patch 
> converts the expected expire time of the periodic/oneshot mode to
> guest deadline tsc in order to leverage VMX preemption timer logic 
> for APIC Timer tsc-deadline mode. After each preemption timer vmexit 
> preemption timer is restarted to emulate LVTT current-count register
> is automatically reloaded from the initial-count register when the 
> count reaches 0. 
> 
> Cc: Paolo Bonzini 
> Cc: Radim Krčmář 
> Cc: Yunhong Jiang 
> Signed-off-by: Wanpeng Li 
> ---
>  arch/x86/kvm/lapic.c | 100 
> ---
>  1 file changed, 39 insertions(+), 61 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index e93e549..7663246 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1090,7 +1090,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>  
>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
>  {
> - ktime_t remaining;
> + ktime_t remaining, now;
>   s64 ns;
>   u32 tmcct;
>  
> @@ -1101,7 +1101,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>   apic->lapic_timer.period == 0)
>   return 0;
>  
> - remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
> + now = apic->lapic_timer.timer.base->get_time();
> + remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
>   if (ktime_to_ns(remaining) < 0)
>   remaining = ktime_set(0, 0);
>  
> @@ -1349,46 +1350,9 @@ static void start_sw_tscdeadline(struct kvm_lapic 
> *apic)
>  
>  static void start_sw_period(struct kvm_lapic *apic)
>  {
> - ktime_t now;
> -
> - /* lapic timer in oneshot or periodic mode */
> - now = apic->lapic_timer.timer.base->get_time();
> - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> - * APIC_BUS_CYCLE_NS * apic->divide_count;
> -
> - if (!apic->lapic_timer.period)
> - return;
> - /*
> -  * Do not allow the guest to program periodic timers with small
> -  * interval, since the hrtimers are not throttled by the host
> -  * scheduler.
> -  */
> - if (apic_lvtt_period(apic)) {
> - s64 min_period = min_timer_period_us * 1000LL;
> -
> - if (apic->lapic_timer.period < min_period) {
> - pr_info_ratelimited(
> - "kvm: vcpu %i: requested %lld ns "
> - "lapic timer period limited to %lld ns\n",
> - apic->vcpu->vcpu_id,
> - apic->lapic_timer.period, min_period);
> - apic->lapic_timer.period = min_period;
> - }
> - }
> -
>   hrtimer_start(&apic->lapic_timer.timer,
> -   ktime_add_ns(now, apic->lapic_timer.period),
> -   HRTIMER_MODE_ABS_PINNED);
> -
> - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
> -PRIx64 ", "
> -"timer initial count 0x%x, period %lldns, "
> -"expire @ 0x%016" PRIx64 ".\n", __func__,
> -APIC_BUS_CYCLE_NS, ktime_to_ns(now),
> -kvm_lapic_get_reg(apic, APIC_TMICT),
> -apic->lapic_timer.period,
> -ktime_to_ns(ktime_add_ns(now,
> - apic->lapic_timer.period)));
> + apic->lapic_timer.target_expiration,
> + HRTIMER_MODE_ABS_PINNED);
>  }
>  
>  static bool set_target_expiration(struct kvm_lapic *apic)
> @@ -1453,22 +1417,12 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
>   apic->lapic_timer.hv_timer_in_use = false;
>  }
>  
> -void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
> -{
> - struct kvm_lapic *apic = vcpu->arch.apic;
> -
> - WARN_ON(!apic->lapic_timer.hv_timer_in_use);
> - WARN_ON(swait_active(&vcpu->wq));
> - cancel_hv_timer(apic);
> - apic_timer_expired(apic);
> -}
> -EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
> -
>  static bool start_hv_timer(struct kvm_lapic *apic)
>  {
>   u64 tscdeadline = apic->lapic_timer.tscdeadline;

I think things would be simpler if you change this to:

if (!kvm_x86_ops->set_hv_timer)
return false;

if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
if (!set_target_expiration(apic))
return true;
}

tscdeadline = apic->lapic_timer.tscdeadline;

You can also add a corresponding

static void start_sw_timer(struct kvm_lapic *apic)
{
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
start_sw_period(apic);
else
start_sw_tscdeadline(apic);
}

so that the caller can be just this:

if (!start_

[PATCH RFC V3 6/6] KVM: LAPIC: Add APIC Timer periodic/oneshot mode VMX preemption timer support

2016-10-13 Thread Wanpeng Li
From: Wanpeng Li 

Most windows guests still utilize APIC Timer periodic/oneshot mode 
instead of tsc-deadline mode, and the APIC Timer periodic/oneshot 
mode are still emulated by high overhead hrtimer on host. This patch 
converts the expected expire time of the periodic/oneshot mode to
guest deadline tsc in order to leverage VMX preemption timer logic 
for APIC Timer tsc-deadline mode. After each preemption timer vmexit 
preemption timer is restarted to emulate LVTT current-count register
is automatically reloaded from the initial-count register when the 
count reaches 0. 

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Cc: Yunhong Jiang 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/lapic.c | 100 ---
 1 file changed, 39 insertions(+), 61 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e93e549..7663246 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1090,7 +1090,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
-   ktime_t remaining;
+   ktime_t remaining, now;
s64 ns;
u32 tmcct;
 
@@ -1101,7 +1101,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
apic->lapic_timer.period == 0)
return 0;
 
-   remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+   now = apic->lapic_timer.timer.base->get_time();
+   remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
 
@@ -1349,46 +1350,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 
 static void start_sw_period(struct kvm_lapic *apic)
 {
-   ktime_t now;
-
-   /* lapic timer in oneshot or periodic mode */
-   now = apic->lapic_timer.timer.base->get_time();
-   apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
-   * APIC_BUS_CYCLE_NS * apic->divide_count;
-
-   if (!apic->lapic_timer.period)
-   return;
-   /*
-* Do not allow the guest to program periodic timers with small
-* interval, since the hrtimers are not throttled by the host
-* scheduler.
-*/
-   if (apic_lvtt_period(apic)) {
-   s64 min_period = min_timer_period_us * 1000LL;
-
-   if (apic->lapic_timer.period < min_period) {
-   pr_info_ratelimited(
-   "kvm: vcpu %i: requested %lld ns "
-   "lapic timer period limited to %lld ns\n",
-   apic->vcpu->vcpu_id,
-   apic->lapic_timer.period, min_period);
-   apic->lapic_timer.period = min_period;
-   }
-   }
-
hrtimer_start(&apic->lapic_timer.timer,
- ktime_add_ns(now, apic->lapic_timer.period),
- HRTIMER_MODE_ABS_PINNED);
-
-   apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
-  PRIx64 ", "
-  "timer initial count 0x%x, period %lldns, "
-  "expire @ 0x%016" PRIx64 ".\n", __func__,
-  APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-  kvm_lapic_get_reg(apic, APIC_TMICT),
-  apic->lapic_timer.period,
-  ktime_to_ns(ktime_add_ns(now,
-   apic->lapic_timer.period)));
+   apic->lapic_timer.target_expiration,
+   HRTIMER_MODE_ABS_PINNED);
 }
 
 static bool set_target_expiration(struct kvm_lapic *apic)
@@ -1453,22 +1417,12 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
apic->lapic_timer.hv_timer_in_use = false;
 }
 
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
-   struct kvm_lapic *apic = vcpu->arch.apic;
-
-   WARN_ON(!apic->lapic_timer.hv_timer_in_use);
-   WARN_ON(swait_active(&vcpu->wq));
-   cancel_hv_timer(apic);
-   apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
 static bool start_hv_timer(struct kvm_lapic *apic)
 {
u64 tscdeadline = apic->lapic_timer.tscdeadline;
 
-   if (atomic_read(&apic->lapic_timer.pending) ||
+   if ((atomic_read(&apic->lapic_timer.pending) &&
+   !apic_lvtt_period(apic)) ||
kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
if (apic->lapic_timer.hv_timer_in_use)
cancel_hv_timer(apic);
@@ -1477,7 +1431,8 @@ static bool start_hv_timer(struct kvm_lapic *apic)
hrtimer_cancel(&apic->lapic_timer.timer);
 
/* In case the sw timer triggered in the window */
-   if (atomic_read(&apic->lapic_timer.pending))
+   if (atomic_read(&apic->lapic_timer.pending) &&
+   !apic_lvtt_period(apic))
cancel_hv_timer(apic);
}
trace_kvm_hv_timer_state(apic->vcpu->vcpu