On Thu, Feb 12, 2026 at 07:58:52AM -0800, Jim Mattson wrote:
> When the vCPU is in guest mode with nested NPT enabled, guest accesses to
> IA32_PAT are redirected to the gPAT register, which is stored in
> svm->nested.save.g_pat.
>
> Non-guest accesses (e.g. from userspace) to IA32_PAT are always redirected
> to hPAT, which is stored in vcpu->arch.pat.
>
> This is architected behavior. It also makes it possible to restore a new
> checkpoint on an old kernel with reasonable semantics. After the restore,
> gPAT will be lost, and L2 will run on L1's PAT. Note that the old kernel
> would have always run L2 on L1's PAT.
>
> Add WARN_ON_ONCE to both svm_get_msr() and svm_set_msr() to flag any
> host-initiated accesses originating from KVM itself rather than userspace.
>
> Fixes: 15038e147247 ("KVM: SVM: obey guest PAT")
> Signed-off-by: Jim Mattson <[email protected]>
> ---
> arch/x86/kvm/svm/nested.c | 9 ---------
> arch/x86/kvm/svm/svm.c | 37 ++++++++++++++++++++++++++++++-------
> arch/x86/kvm/svm/svm.h | 17 ++++++++++++++++-
> 3 files changed, 46 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index dc8275837120..69b577a4915c 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -706,15 +706,6 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu,
> unsigned long cr3,
> return 0;
> }
>
> -void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
> -{
> - if (!svm->nested.vmcb02.ptr)
> - return;
> -
> - /* FIXME: merge g_pat from vmcb01 and vmcb12. */
> - vmcb_set_gpat(svm->nested.vmcb02.ptr, svm->vmcb01.ptr->save.g_pat);
> -}
> -
> static void nested_vmcb02_prepare_save(struct vcpu_svm *svm)
> {
> struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 529cbac57814..205bf07896ad 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -2837,6 +2837,21 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct
> msr_data *msr_info)
> case MSR_AMD64_DE_CFG:
> msr_info->data = svm->msr_decfg;
> break;
> + case MSR_IA32_CR_PAT:
> + /*
> + * When nested NPT is enabled, L2 has a separate PAT from
> + * L1. Guest accesses to IA32_PAT while running L2 target
> + * L2's gPAT; host-initiated accesses always target L1's
> + * hPAT for backward and forward KVM_GET_MSRS compatibility
> + * with older kernels.
> + */
> + WARN_ON_ONCE(msr_info->host_initiated && vcpu->wants_to_run);
> + if (!msr_info->host_initiated && is_guest_mode(vcpu) &&
> + nested_npt_enabled(svm))
> + msr_info->data = svm->nested.save.g_pat;
> + else
> + msr_info->data = vcpu->arch.pat;
> + break;
> default:
> return kvm_get_msr_common(vcpu, msr_info);
> }
> @@ -2920,13 +2935,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct
> msr_data *msr)
>
> break;
> case MSR_IA32_CR_PAT:
> - ret = kvm_set_msr_common(vcpu, msr);
> - if (ret)
> - break;
> -
> - vmcb_set_gpat(svm->vmcb01.ptr, data);
> - if (is_guest_mode(vcpu))
> - nested_vmcb02_compute_g_pat(svm);
> + if (!kvm_pat_valid(data))
> + return 1;
> + /*
> + * When nested NPT is enabled, L2 has a separate PAT from
> + * L1. Guest accesses to IA32_PAT while running L2 target
> + * L2's gPAT; host-initiated accesses always target L1's
> + * hPAT for backward and forward KVM_SET_MSRS compatibility
> + * with older kernels.
> + */
> + WARN_ON_ONCE(msr->host_initiated && vcpu->wants_to_run);
> + if (!msr->host_initiated && is_guest_mode(vcpu) &&
> + nested_npt_enabled(svm))
> + svm_set_gpat(svm, data);
> + else
> + svm_set_hpat(svm, data);
> break;
> case MSR_IA32_SPEC_CTRL:
> if (!msr->host_initiated &&
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index a49c48459e0b..88549705133f 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -607,6 +607,22 @@ static inline bool nested_npt_enabled(struct vcpu_svm
> *svm)
> return svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_NP;
> }
>
> +static inline void svm_set_gpat(struct vcpu_svm *svm, u64 data)
> +{
> + svm->nested.save.g_pat = data;
> + vmcb_set_gpat(svm->nested.vmcb02.ptr, data);
> +}
> +
> +static inline void svm_set_hpat(struct vcpu_svm *svm, u64 data)
> +{
> + svm->vcpu.arch.pat = data;
> + if (npt_enabled) {
> + vmcb_set_gpat(svm->vmcb01.ptr, data);
> + if (is_guest_mode(&svm->vcpu) && !nested_npt_enabled(svm))
> + vmcb_set_gpat(svm->nested.vmcb02.ptr, data);
> + }
> +}
Is it me, or is it a bit confusing that svm_set_gpat() sets L2's gPAT
not L1's, and svm_set_hpat() calls vmcb_set_gpat()?
"gpat" means different things in the context of the VMCB or otherwise,
which kinda makes sense but is also not super clear. Maybe
svm_set_l1_gpat() and svm_set_l2_gpat() is more clear?
Will leave it up to Sean to decide if the naming is good enough, but
honestly I don't want to stall this series, so hopefully any renames can
be done as a follow up or when the series is applied.
> +
> static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
> {
> return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VNMI) &&
> @@ -840,7 +856,6 @@ void nested_copy_vmcb_control_to_cache(struct vcpu_svm
> *svm,
> void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
> struct vmcb_save_area *save);
> void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
> -void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
> void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info
> *target_vmcb);
>
> extern struct kvm_x86_nested_ops svm_nested_ops;
> --
> 2.53.0.239.g8d8fc8a987-goog
>