Hardware records DecodeAssist information, i.e. the instruction length and
bytes, in the VMCB for exits such as #NPF.  KVM does not currently expose
that information when an L2 exit is reflected to L1, so a nested hypervisor
cannot consume the hardware-provided decode state and may need to fetch and
decode the L2 instruction itself.

Advertise DecodeAssists to L1 when the host supports it, and copy
insn_len/insn_bytes from VMCB02 to VMCB12 for nested VM-Exits that
reflect a real hardware exit.  Gate the copy on L1's guest CPUID so the
advertised feature and the propagated state stay in sync.

Report decode information only for exits that actually have fresh hardware
state.  KVM synthesizes several nested VM-Exits, e.g. VMRUN failures,
synthetic #NPF exits, reflected exceptions, and interrupt/NMI windows, that
do not carry VMCB02 DecodeAssist output.  Track only the verbatim VMCB02
reflection path with nested.decode_assists_valid, clear VMCB02's decode
fields before VMRUN, and clear VMCB12's fields for ineligible exits.  Use
insn_len == 0 to tell L1 that no decode information is available, and avoid
exposing stale bytes from a previous L2 exit.

Tested-by: Yongwei Xu <[email protected]>
Signed-off-by: Tina Zhang <[email protected]>
---
 arch/x86/kvm/svm/nested.c | 47 ++++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/svm/svm.c    |  3 +++
 arch/x86/kvm/svm/svm.h    |  6 +++++
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b340dc9991ad..50e9ffa5f9fd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -33,6 +33,33 @@
 
 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
 
+static void nested_svm_clear_decode_assists(struct vmcb *vmcb)
+{
+       vmcb->control.insn_len = 0;
+       memset(vmcb->control.insn_bytes, 0,
+              sizeof(vmcb->control.insn_bytes));
+}
+
+static void nested_svm_copy_decode_assists(struct vmcb *to, struct vmcb *from)
+{
+       u8 insn_len = from->control.insn_len;
+
+       nested_svm_clear_decode_assists(to);
+
+       /*
+        * Hardware leaves insn_len zero when no DecodeAssist data is available.
+        * Keep that as the nested-visible "no decode info" marker.
+        */
+       if (!insn_len)
+               return;
+
+       if (WARN_ON_ONCE(insn_len > sizeof(from->control.insn_bytes)))
+               return;
+
+       to->control.insn_len = insn_len;
+       memcpy(to->control.insn_bytes, from->control.insn_bytes, insn_len);
+}
+
 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
                                       struct x86_exception *fault)
 {
@@ -838,7 +865,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm 
*svm)
        /*
         * Filled at exit: exit_code, exit_info_1, exit_info_2, exit_int_info,
         * exit_int_info_err, next_rip, insn_len, insn_bytes.
+        * Clear stale DecodeAssist data before L2 runs.
         */
+       nested_svm_clear_decode_assists(vmcb02);
+       svm->nested.decode_assists_valid = false;
 
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
            (vmcb12_ctrl->int_ctl & V_GIF_ENABLE_MASK))
@@ -1251,6 +1281,18 @@ static int nested_svm_vmexit_update_vmcb12(struct 
kvm_vcpu *vcpu)
        if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
                vmcb12->control.next_rip  = vmcb02->control.next_rip;
 
+       /*
+        * Copy DecodeAssist data only for real VMCB02 exits.  KVM-synthesized
+        * exits report no decode info to L1.
+        */
+       if (svm->nested.decode_assists_valid &&
+           guest_cpu_cap_has(vcpu, X86_FEATURE_DECODEASSISTS))
+               nested_svm_copy_decode_assists(vmcb12, vmcb02);
+       else
+               nested_svm_clear_decode_assists(vmcb12);
+
+       svm->nested.decode_assists_valid = false;
+
        if (nested_vmcb12_has_lbrv(vcpu))
                svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
 
@@ -1593,8 +1635,11 @@ int nested_svm_exit_handled(struct vcpu_svm *svm)
 
        vmexit = nested_svm_intercept(svm);
 
-       if (vmexit == NESTED_EXIT_DONE)
+       if (vmexit == NESTED_EXIT_DONE) {
+               /* This path reflects a real VMCB02 exit directly to L1. */
+               svm->nested.decode_assists_valid = true;
                nested_svm_vmexit(svm);
+       }
 
        return vmexit;
 }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e02a38da5296..366e7ef787b2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5470,6 +5470,9 @@ static __init void svm_set_cpu_caps(void)
                 */
                kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
 
+               if (boot_cpu_has(X86_FEATURE_DECODEASSISTS))
+                       kvm_cpu_cap_set(X86_FEATURE_DECODEASSISTS);
+
                if (nrips)
                        kvm_cpu_cap_set(X86_FEATURE_NRIPS);
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5137416be593..be7fdcd124b7 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -238,6 +238,12 @@ struct svm_nested_state {
         * on its side.
         */
        bool force_msr_bitmap_recalc;
+
+       /*
+        * True only while reflecting a real VMCB02 exit whose DecodeAssist
+        * fields may be copied to VMCB12.  KVM-synthesized exits leave it 
clear.
+        */
+       bool decode_assists_valid;
 };
 
 struct vcpu_sev_es_state {
-- 
2.43.0


Reply via email to