On 10/12/20 18:09, Tom Lendacky wrote:
@@ -2797,7 +2838,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
  {
-       return kvm_emulate_wrmsr(&svm->vcpu);
+       u32 ecx;
+       u64 data;
+
+       if (!sev_es_guest(svm->vcpu.kvm))
+               return kvm_emulate_wrmsr(&svm->vcpu);
+
+       ecx = kvm_rcx_read(&svm->vcpu);
+       data = kvm_read_edx_eax(&svm->vcpu);
+       if (kvm_set_msr(&svm->vcpu, ecx, data)) {
+               trace_kvm_msr_write_ex(ecx, data);
+               ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+               ghcb_set_sw_exit_info_2(svm->ghcb,
+                                       X86_TRAP_GP |
+                                       SVM_EVTINJ_TYPE_EXEPT |
+                                       SVM_EVTINJ_VALID);
+               return 1;
+       }
+
+       trace_kvm_msr_write(ecx, data);
+
+       return kvm_skip_emulated_instruction(&svm->vcpu);
  }
static int msr_interception(struct vcpu_svm *svm)

This code duplication is ugly, and does not work with userspace MSR filters too.

But we can instead trap the completion of the MSR read/write to use ghcb_set_sw_exit_info_1 instead of kvm_inject_gp, with a callback like

static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
        if (!sev_es_guest(svm->vcpu.kvm) || !err)
                return kvm_complete_insn_gp(&svm->vcpu, err);

        ghcb_set_sw_exit_info_1(svm->ghcb, 1);
        ghcb_set_sw_exit_info_2(svm->ghcb,
                                X86_TRAP_GP |
                                SVM_EVTINJ_TYPE_EXEPT |
                                SVM_EVTINJ_VALID);
        return 1;
}


...
        .complete_emulated_msr = svm_complete_emulated_msr,

@@ -2827,7 +2888,14 @@ static int interrupt_window_interception(struct vcpu_svm 
*svm)
  static int pause_interception(struct vcpu_svm *svm)
  {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       bool in_kernel = (svm_get_cpl(vcpu) == 0);
+       bool in_kernel;
+
+       /*
+        * CPL is not made available for an SEV-ES guest, so just set in_kernel
+        * to true.
+        */
+       in_kernel = (sev_es_guest(svm->vcpu.kvm)) ? true
+                                                 : (svm_get_cpl(vcpu) == 0);
if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);

See below.

@@ -3273,6 +3351,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb *vmcb = svm->vmcb;
+ /*
+        * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+        * bit to determine the state of the IF flag.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return !(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK);

This seems wrong, you have to take into account SVM_INTERRUPT_SHADOW_MASK as well. Also, even though GIF is not really used by SEV-ES guests, I think it's nicer to put this check afterwards.

That is:

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4372e45c8f06..2dd9c9698480 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3247,7 +3247,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        if (!gif_set(svm))
                return true;

-       if (is_guest_mode(vcpu)) {
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt 
mask
+                * bit to determine the state of the IF flag.
+                */
+               if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+                       return true;
+       } else if (is_guest_mode(vcpu)) {
                /* As long as interrupts are being delivered...  */
                if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
                    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)



        if (!gif_set(svm))
                return true;
@@ -3458,6 +3543,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                svm->vcpu.arch.nmi_injected = true;
                break;
        case SVM_EXITINTINFO_TYPE_EXEPT:
+               /*
+                * Never re-inject a #VC exception.
+                */
+               if (vector == X86_TRAP_VC)
+                       break;
+
                /*
                 * In case of software exceptions, do not reinject the vector,
                 * but re-execute the instruction instead. Rewind RIP first
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a3fdc16cfd6f..b6809a2851d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4018,7 +4018,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
        int idx;
- if (vcpu->preempted)
+       if (vcpu->preempted && !vcpu->arch.guest_state_protected)
                vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);

This has to be true, otherwise no directed yield will be done at all:

        if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
            !kvm_arch_vcpu_in_kernel(vcpu))
                continue;

Or more easily, just use in_kernel == false in pause_interception, like

+       /*
+        * CPL is not made available for an SEV-ES guest, therefore
+        * vcpu->arch.preempted_in_kernel can never be true.  Just
+        * set in_kernel to false as well.
+        */
+       in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;

/*
@@ -8161,7 +8161,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
  {
        struct kvm_run *kvm_run = vcpu->run;
- kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       kvm_run->if_flag = (vcpu->arch.guest_state_protected)
+               ? kvm_arch_interrupt_allowed(vcpu)
+               : (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;

Here indeed you only want the interrupt allowed bit, not the interrupt window. But we can just be bold and always set it to true.

- for userspace irqchip, kvm_run->ready_for_interrupt_injection is set just below and it will always be false if kvm_arch_interrupt_allowed is false

- for in-kernel APIC, if_flag is documented to be invalid (though it actually is valid). For split irqchip, they can just use kvm_run->ready_for_interrupt_injection; for entirely in-kernel interrupt handling, userspace does not need if_flag at all.

Paolo

        kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);



Reply via email to