On 24/08/2017 12:35, Wanpeng Li wrote:
> From: Wanpeng Li <[email protected]>
> 
> vmx_complete_interrupts() assumes that the exception is always injected, 
> so it would be dropped by kvm_clear_exception_queue(). This patch separates 
> exception.pending from exception.injected, exception.inject represents the 
> exception is injected or the exception should be reinjected due to vmexit 
> occurs during event delivery in VMX non-root operation. exception.pending 
> represents the exception is queued and will be cleared when injecting the 
> exception to the guest. So exception.pending and exception.injected can 
> cooperate to guarantee exception will not be lost.
> 
> Reported-by: Radim Krčmář <[email protected]>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> v3 -> v4: 
>  * avoid the double fault
>  * exception must be injected immediately
> v2 -> v3:
>  * the changes to inject_pending_event and adds the WARN_ON
>  * combine injected and pending exception for GET/SET_VCPU_EVENTS
> 
>  arch/x86/include/asm/kvm_host.h |  2 +-
>  arch/x86/kvm/svm.c              |  2 +-
>  arch/x86/kvm/vmx.c              |  4 +--
>  arch/x86/kvm/x86.c              | 78 
> +++++++++++++++++++++++++++--------------
>  arch/x86/kvm/x86.h              |  4 +--
>  5 files changed, 57 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9d90787..6e385ac 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -547,8 +547,8 @@ struct kvm_vcpu_arch {
>  
>       struct kvm_queued_exception {
>               bool pending;
> +             bool injected;
>               bool has_error_code;
> -             bool reinject;
>               u8 nr;
>               u32 error_code;
>               u8 nested_apf;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index a2fddce..6a439a1 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -655,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
>       struct vcpu_svm *svm = to_svm(vcpu);
>       unsigned nr = vcpu->arch.exception.nr;
>       bool has_error_code = vcpu->arch.exception.has_error_code;
> -     bool reinject = vcpu->arch.exception.reinject;
> +     bool reinject = vcpu->arch.exception.injected;
>       u32 error_code = vcpu->arch.exception.error_code;
>  
>       /*
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index c5f43ab..902b780 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2503,7 +2503,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>       unsigned nr = vcpu->arch.exception.nr;
>       bool has_error_code = vcpu->arch.exception.has_error_code;
> -     bool reinject = vcpu->arch.exception.reinject;
> +     bool reinject = vcpu->arch.exception.injected;
>       u32 error_code = vcpu->arch.exception.error_code;
>       u32 intr_info = nr | INTR_INFO_VALID_MASK;
>  
> @@ -10948,7 +10948,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu 
> *vcpu,
>       u32 idt_vectoring;
>       unsigned int nr;
>  
> -     if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
> +     if (vcpu->arch.exception.injected) {
>               nr = vcpu->arch.exception.nr;
>               idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8f41b88..fa79cd7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -389,15 +389,20 @@ static void kvm_multiple_exception(struct kvm_vcpu 
> *vcpu,
>  
>       kvm_make_request(KVM_REQ_EVENT, vcpu);
>  
> -     if (!vcpu->arch.exception.pending) {
> +     if (!vcpu->arch.exception.pending ||
> +             !vcpu->arch.exception.injected) {
>       queue:
>               if (has_error && !is_protmode(vcpu))
>                       has_error = false;
> -             vcpu->arch.exception.pending = true;
> +             if (reinject)

I'd like to add a WARN_ON here:

+                       /*
+                        * vcpu->arch.exception.pending is only true on
+                        * vmentry if an event injection was blocked by
+                        * nested_run_pending.  In that case, however,
+                        * vcpu_enter_guest requests an immediate exit,
+                        * so the guest shouldn't proceed far enough to
+                        * need reinjection.
+                        */
+                       WARN_ON_ONCE(vcpu->arch.exception.pending);

I'll test it and add it if it is useful.

> +                     vcpu->arch.exception.injected = true;
> +             else {
> +                     vcpu->arch.exception.pending = true;
> +                     vcpu->arch.exception.injected = false;
> +             }
>               vcpu->arch.exception.has_error_code = has_error;
>               vcpu->arch.exception.nr = nr;
>               vcpu->arch.exception.error_code = error_code;
> -             vcpu->arch.exception.reinject = reinject;
>               return;
>       }

The double fault case can still result in pending && injected.  We need
to set exception.pending = true to allow the double fault to trigger a
vmexit, but exception.injected must be reset.

Otherwise looks good.  This was exhausting...

Paolo

> @@ -3069,8 +3074,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct 
> kvm_vcpu *vcpu,
>                                              struct kvm_vcpu_events *events)
>  {
>       process_nmi(vcpu);
> +     /*
> +      * FIXME: pass injected and pending separately.  This is only
> +      * needed for nested virtualization, whose state cannot be
> +      * migrated yet.  For now we combine them just in case.
> +      */
>       events->exception.injected =
> -             vcpu->arch.exception.pending &&
> +             (vcpu->arch.exception.pending ||
> +              vcpu->arch.exception.injected) &&
>               !kvm_exception_is_soft(vcpu->arch.exception.nr);
>       events->exception.nr = vcpu->arch.exception.nr;
>       events->exception.has_error_code = vcpu->arch.exception.has_error_code;
> @@ -3125,6 +3136,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct 
> kvm_vcpu *vcpu,
>               return -EINVAL;
>  
>       process_nmi(vcpu);
> +     vcpu->arch.exception.injected = false;
>       vcpu->arch.exception.pending = events->exception.injected;
>       vcpu->arch.exception.nr = events->exception.nr;
>       vcpu->arch.exception.has_error_code = events->exception.has_error_code;
> @@ -6350,33 +6362,25 @@ static int inject_pending_event(struct kvm_vcpu 
> *vcpu, bool req_int_win)
>       int r;
>  
>       /* try to reinject previous events if any */
> -     if (vcpu->arch.exception.pending) {
> -             trace_kvm_inj_exception(vcpu->arch.exception.nr,
> -                                     vcpu->arch.exception.has_error_code,
> -                                     vcpu->arch.exception.error_code);
> -
> -             if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> -                     __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> -                                          X86_EFLAGS_RF);
> -
> -             if (vcpu->arch.exception.nr == DB_VECTOR &&
> -                 (vcpu->arch.dr7 & DR7_GD)) {
> -                     vcpu->arch.dr7 &= ~DR7_GD;
> -                     kvm_update_dr7(vcpu);
> -             }
> -
> +     if (vcpu->arch.exception.injected) {
>               kvm_x86_ops->queue_exception(vcpu);
>               return 0;
>       }
>  
> -     if (vcpu->arch.nmi_injected) {
> -             kvm_x86_ops->set_nmi(vcpu);
> -             return 0;
> -     }
> +     /*
> +      * Exceptions must be injected immediately, or the exception
> +      * frame will have the address of the NMI or interrupt handler.
> +      */
> +     if (!vcpu->arch.exception.pending) {
> +             if (vcpu->arch.nmi_injected) {
> +                     kvm_x86_ops->set_nmi(vcpu);
> +                     return 0;
> +             }
>  
> -     if (vcpu->arch.interrupt.pending) {
> -             kvm_x86_ops->set_irq(vcpu);
> -             return 0;
> +             if (vcpu->arch.interrupt.pending) {
> +                     kvm_x86_ops->set_irq(vcpu);
> +                     return 0;
> +             }
>       }
>  
>       if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
> @@ -6386,7 +6390,25 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, 
> bool req_int_win)
>       }
>  
>       /* try to inject new event if pending */
> -     if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
> +     if (vcpu->arch.exception.pending) {
> +             trace_kvm_inj_exception(vcpu->arch.exception.nr,
> +                                     vcpu->arch.exception.has_error_code,
> +                                     vcpu->arch.exception.error_code);
> +
> +             vcpu->arch.exception.pending = false;
> +             vcpu->arch.exception.injected = true;
> +             kvm_x86_ops->queue_exception(vcpu);
> +
> +             if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
> +                     __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
> +                                          X86_EFLAGS_RF);
> +
> +             if (vcpu->arch.exception.nr == DB_VECTOR &&
> +                 (vcpu->arch.dr7 & DR7_GD)) {
> +                     vcpu->arch.dr7 &= ~DR7_GD;
> +                     kvm_update_dr7(vcpu);
> +             }
> +     } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
>               vcpu->arch.smi_pending = false;
>               enter_smm(vcpu);
>       } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
> @@ -6862,6 +6884,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>                               kvm_x86_ops->enable_nmi_window(vcpu);
>                       if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
>                               kvm_x86_ops->enable_irq_window(vcpu);
> +                     WARN_ON(vcpu->arch.exception.pending);
>               }
>  
>               if (kvm_lapic_enabled(vcpu)) {
> @@ -7738,6 +7761,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool 
> init_event)
>       vcpu->arch.nmi_injected = false;
>       kvm_clear_interrupt_queue(vcpu);
>       kvm_clear_exception_queue(vcpu);
> +     vcpu->arch.exception.pending = false;
>  
>       memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
>       kvm_update_dr0123(vcpu);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 1134603..e6ec0de 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -11,7 +11,7 @@
>  
>  static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
>  {
> -     vcpu->arch.exception.pending = false;
> +     vcpu->arch.exception.injected = false;
>  }
>  
>  static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct 
> kvm_vcpu *vcpu)
>  
>  static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
>  {
> -     return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
> +     return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
>               vcpu->arch.nmi_injected;
>  }
>  
> 

Reply via email to