On 2011-09-15 16:45, Avi Kivity wrote:
> If simultaneous NMIs happen, we're supposed to queue the second
> and next (collapsing them), but currently we sometimes collapse
> the second into the first.
Can you describe the race in a few more details here ("sometimes" sounds
like "I don't know when" :) )?
>
> Fix by using a counter for pending NMIs instead of a bool; collapsing
> happens when the NMI window reopens.
>
> Signed-off-by: Avi Kivity <[email protected]>
> ---
>
> Not sure whether this interacts correctly with NMI-masked-by-STI or with
> save/restore.
>
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/svm.c | 1 +
> arch/x86/kvm/vmx.c | 3 ++-
> arch/x86/kvm/x86.c | 33 +++++++++++++++------------------
> arch/x86/kvm/x86.h | 7 +++++++
> 5 files changed, 26 insertions(+), 20 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6ab4241..3a95885 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -413,7 +413,7 @@ struct kvm_vcpu_arch {
> u32 tsc_catchup_mult;
> s8 tsc_catchup_shift;
>
> - bool nmi_pending;
> + atomic_t nmi_pending;
> bool nmi_injected;
>
> struct mtrr_state_type mtrr_state;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index e7ed4b1..d4c792f 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3609,6 +3609,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> *svm)
> if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
> && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
> svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
> + kvm_collapse_pending_nmis(&svm->vcpu);
> kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
> }
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a0d6bd9..745dadb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4761,6 +4761,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
> cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
> vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
> ++vcpu->stat.nmi_window_exits;
> + kvm_collapse_pending_nmis(vcpu);
> kvm_make_request(KVM_REQ_EVENT, vcpu);
>
> return 1;
> @@ -5790,7 +5791,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> if (vmx_interrupt_allowed(vcpu)) {
> vmx->soft_vnmi_blocked = 0;
> } else if (vmx->vnmi_blocked_time > 1000000000LL &&
> - vcpu->arch.nmi_pending) {
> + atomic_read(&vcpu->arch.nmi_pending)) {
> /*
> * This CPU don't support us in finding the end of an
> * NMI-blocked window if the guest runs with IRQs
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6b37f18..d4f45e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -359,8 +359,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct
> x86_exception *fault)
>
> void kvm_inject_nmi(struct kvm_vcpu *vcpu)
> {
> + atomic_inc(&vcpu->arch.nmi_pending);
> kvm_make_request(KVM_REQ_EVENT, vcpu);
> - vcpu->arch.nmi_pending = 1;
Does the reordering matter? Do we need barriers?
> }
> EXPORT_SYMBOL_GPL(kvm_inject_nmi);
>
> @@ -2844,7 +2844,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct
> kvm_vcpu *vcpu,
> KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
>
> events->nmi.injected = vcpu->arch.nmi_injected;
> - events->nmi.pending = vcpu->arch.nmi_pending;
> + events->nmi.pending = atomic_read(&vcpu->arch.nmi_pending) != 0;
> events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
> events->nmi.pad = 0;
>
> @@ -2878,7 +2878,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct
> kvm_vcpu *vcpu,
>
> vcpu->arch.nmi_injected = events->nmi.injected;
> if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
> - vcpu->arch.nmi_pending = events->nmi.pending;
> + atomic_set(&vcpu->arch.nmi_pending, events->nmi.pending);
> kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
>
> if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
> @@ -4763,7 +4763,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu
> *vcpu, int irq, int inc_eip)
> kvm_set_rflags(vcpu, ctxt->eflags);
>
> if (irq == NMI_VECTOR)
> - vcpu->arch.nmi_pending = false;
> + atomic_set(&vcpu->arch.nmi_pending, 0);
> else
> vcpu->arch.interrupt.pending = false;
>
> @@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
> }
>
> /* try to inject new event if pending */
> - if (vcpu->arch.nmi_pending) {
> + if (atomic_read(&vcpu->arch.nmi_pending)) {
> if (kvm_x86_ops->nmi_allowed(vcpu)) {
> - vcpu->arch.nmi_pending = false;
> + atomic_dec(&vcpu->arch.nmi_pending);
Here we lost NMIs in the past by overwriting nmi_pending while another
one was already queued, right?
> vcpu->arch.nmi_injected = true;
> kvm_x86_ops->set_nmi(vcpu);
> }
> @@ -5604,10 +5604,14 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
> }
> }
>
> +static bool nmi_in_progress(struct kvm_vcpu *vcpu)
> +{
> + return vcpu->arch.nmi_injected || kvm_x86_ops->get_nmi_mask(vcpu);
> +}
> +
> static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> {
> int r;
> - bool nmi_pending;
> bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> vcpu->run->request_interrupt_window;
>
> @@ -5654,19 +5658,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> if (unlikely(r))
> goto out;
>
> - /*
> - * An NMI can be injected between local nmi_pending read and
> - * vcpu->arch.nmi_pending read inside inject_pending_event().
> - * But in that case, KVM_REQ_EVENT will be set, which makes
> - * the race described above benign.
> - */
> - nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
> -
> if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> inject_pending_event(vcpu);
>
> /* enable NMI/IRQ window open exits if needed */
> - if (nmi_pending)
> + if (atomic_read(&vcpu->arch.nmi_pending)
> + && nmi_in_progress(vcpu))
Is nmi_pending && !nmi_in_progress possible at all? Is it rather a BUG
condition? If not, what will happen next?
> kvm_x86_ops->enable_nmi_window(vcpu);
> else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> kvm_x86_ops->enable_irq_window(vcpu);
> @@ -6374,7 +6371,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>
> int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
> {
> - vcpu->arch.nmi_pending = false;
> + atomic_set(&vcpu->arch.nmi_pending, 0);
> vcpu->arch.nmi_injected = false;
>
> vcpu->arch.switch_db_regs = 0;
> @@ -6649,7 +6646,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
> !vcpu->arch.apf.halted)
> || !list_empty_careful(&vcpu->async_pf.done)
> || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
> - || vcpu->arch.nmi_pending ||
> + || atomic_read(&vcpu->arch.nmi_pending) ||
> (kvm_arch_interrupt_allowed(vcpu) &&
> kvm_cpu_has_interrupt(vcpu));
> }
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index d36fe23..ed04e2b 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -125,4 +125,11 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt
> *ctxt,
> gva_t addr, void *val, unsigned int bytes,
> struct x86_exception *exception);
>
> +static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu)
> +{
> + /* Collapse all NMIs queued while an NMI handler was running to one */
> + if (atomic_read(&vcpu->arch.nmi_pending))
> + atomic_set(&vcpu->arch.nmi_pending, 1);
Is it OK that NMIs injected after the collapse will increment this to >
1 again? Or is that impossible?
> +}
> +
> #endif
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html