Unconditionally disable PML in vmcs02, KVM emulates PML purely in the
MMU, e.g. vmx_flush_pml_buffer() doesn't even try to copy the L2 GPAs
from vmcs02's buffer to vmcs12.  At best, enabling PML is a nop.  At
worst, it will cause vmx_flush_pml_buffer() to record bogus GFNs in the
dirty logs.

Initialize vmcs02.GUEST_PML_INDEX such that PML writes would trigger
VM-Exit if PML was somehow enabled, skip flushing the buffer for guest
mode since the index is bogus, and freak out if a PML full exit occurs
when L2 is active.

Signed-off-by: Sean Christopherson <sea...@google.com>
---
 arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++--------------
 arch/x86/kvm/vmx/vmx.c    | 12 ++++++++++--
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b2f0b5e9cd63..0c6dda9980a6 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2167,15 +2167,13 @@ static void prepare_vmcs02_constant_state(struct 
vcpu_vmx *vmx)
                vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
 
        /*
-        * The PML address never changes, so it is constant in vmcs02.
-        * Conceptually we want to copy the PML index from vmcs01 here,
-        * and then back to vmcs01 on nested vmexit.  But since we flush
-        * the log and reset GUEST_PML_INDEX on each vmexit, the PML
-        * index is also effectively constant in vmcs02.
+        * PML is emulated for L2, but never enabled in hardware as the MMU
+        * handles A/D emulation.  Disabling PML for L2 also avoids having to
+        * deal with filtering out L2 GPAs from the buffer.
         */
        if (enable_pml) {
-               vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-               vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+               vmcs_write64(PML_ADDRESS, 0);
+               vmcs_write16(GUEST_PML_INDEX, -1);
        }
 
        if (cpu_has_vmx_encls_vmexit())
@@ -2210,7 +2208,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx 
*vmx,
 
 static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 {
-       u32 exec_control, vmcs12_exec_ctrl;
+       u32 exec_control;
        u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
 
        if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
@@ -2284,11 +2282,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, 
struct vmcs12 *vmcs12)
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_ENABLE_VMFUNC);
                if (nested_cpu_has(vmcs12,
-                                  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
-                       vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
-                               ~SECONDARY_EXEC_ENABLE_PML;
-                       exec_control |= vmcs12_exec_ctrl;
-               }
+                                  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+                       exec_control |= vmcs12->secondary_vm_exec_control;
+
+               /* PML is emulated and never enabled in hardware for L2. */
+               exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
                /* VMCS shadowing for L2 is emulated for now */
                exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
@@ -5793,7 +5791,10 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu 
*vcpu,
        case EXIT_REASON_PREEMPTION_TIMER:
                return true;
        case EXIT_REASON_PML_FULL:
-               /* We emulate PML support to L1. */
+               /*
+                * PML is emulated for an L1 VMM and should never be enabled in
+                * vmcs02, always "handle" PML_FULL by exiting to userspace.
+                */
                return true;
        case EXIT_REASON_VMFUNC:
                /* VM functions are emulated through L2->L0 vmexits. */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e0a3a9be654b..b47ed3f412ef 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5976,9 +5976,10 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, 
fastpath_t exit_fastpath)
         * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
         * querying dirty_bitmap, we only need to kick all vcpus out of guest
         * mode as if vcpus is in root mode, the PML buffer must has been
-        * flushed already.
+        * flushed already.  Note, PML is never enabled in hardware while
+        * running L2.
         */
-       if (enable_pml)
+       if (enable_pml && !is_guest_mode(vcpu))
                vmx_flush_pml_buffer(vcpu);
 
        /*
@@ -5994,6 +5995,13 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, 
fastpath_t exit_fastpath)
                return handle_invalid_guest_state(vcpu);
 
        if (is_guest_mode(vcpu)) {
+               /*
+                * PML is never enabled when running L2, bail immediately if a
+                * PML full exit occurs as something is horribly wrong.
+                */
+               if (exit_reason.basic == EXIT_REASON_PML_FULL)
+                       goto unexpected_vmexit;
+
                /*
                 * The host physical addresses of some pages of guest memory
                 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
-- 
2.30.0.478.g8a0d178c01-goog

Reply via email to