Thanks a lot for the patch, Sean.

I know this has been queued for quite a while. But I just realized I have
another question of kvm_mmu_pte_write():

> Remove the update_pte() shadow paging logic, which was obsoleted by
> commit 4731d4c7a077 ("KVM: MMU: out of sync shadow core"), but never
> removed.  As pointed out by Yu, KVM never write protects leaf page
> tables for the purposes of shadow paging, and instead marks their
> associated shadow page as unsync so that the guest can write PTEs at
> will.
> 
> The update_pte() path, which predates the unsync logic, optimizes COW
> scenarios by refreshing leaf SPTEs when they are written, as opposed to
> zapping the SPTE, restarting the guest, and installing the new SPTE on
> the subsequent fault.  Since KVM no longer write-protects leaf page
> tables, update_pte() is unreachable and can be dropped.
> 
> Reported-by: Yu Zhang <[email protected]>
> Signed-off-by: Sean Christopherson <[email protected]>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 --
>  arch/x86/kvm/mmu/mmu.c          | 49 ++-------------------------------
>  arch/x86/kvm/x86.c              |  1 -
>  3 files changed, 2 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3d6616f6f6ef..ed575c5655dd 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -358,8 +358,6 @@ struct kvm_mmu {
>       int (*sync_page)(struct kvm_vcpu *vcpu,
>                        struct kvm_mmu_page *sp);
>       void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
> -     void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> -                        u64 *spte, const void *pte);
>       hpa_t root_hpa;
>       gpa_t root_pgd;
>       union kvm_mmu_role mmu_role;
> @@ -1031,7 +1029,6 @@ struct kvm_arch {
>  struct kvm_vm_stat {
>       ulong mmu_shadow_zapped;
>       ulong mmu_pte_write;
> -     ulong mmu_pte_updated;
>       ulong mmu_pde_zapped;
>       ulong mmu_flooded;
>       ulong mmu_recycled;
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 6d16481aa29d..3a2c25852b1f 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -1723,13 +1723,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
>       return 0;
>  }
>  
> -static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
> -                              struct kvm_mmu_page *sp, u64 *spte,
> -                              const void *pte)
> -{
> -     WARN_ON(1);
> -}
> -
>  #define KVM_PAGE_ARRAY_NR 16
>  
>  struct kvm_mmu_pages {
> @@ -3813,7 +3806,6 @@ static void nonpaging_init_context(struct kvm_vcpu 
> *vcpu,
>       context->gva_to_gpa = nonpaging_gva_to_gpa;
>       context->sync_page = nonpaging_sync_page;
>       context->invlpg = NULL;
> -     context->update_pte = nonpaging_update_pte;
>       context->root_level = 0;
>       context->shadow_root_level = PT32E_ROOT_LEVEL;
>       context->direct_map = true;
> @@ -4395,7 +4387,6 @@ static void paging64_init_context_common(struct 
> kvm_vcpu *vcpu,
>       context->gva_to_gpa = paging64_gva_to_gpa;
>       context->sync_page = paging64_sync_page;
>       context->invlpg = paging64_invlpg;
> -     context->update_pte = paging64_update_pte;
>       context->shadow_root_level = level;
>       context->direct_map = false;
>  }
> @@ -4424,7 +4415,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
>       context->gva_to_gpa = paging32_gva_to_gpa;
>       context->sync_page = paging32_sync_page;
>       context->invlpg = paging32_invlpg;
> -     context->update_pte = paging32_update_pte;
>       context->shadow_root_level = PT32E_ROOT_LEVEL;
>       context->direct_map = false;
>  }
> @@ -4506,7 +4496,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>       context->page_fault = kvm_tdp_page_fault;
>       context->sync_page = nonpaging_sync_page;
>       context->invlpg = NULL;
> -     context->update_pte = nonpaging_update_pte;
>       context->shadow_root_level = kvm_mmu_get_tdp_level(vcpu);
>       context->direct_map = true;
>       context->get_guest_pgd = get_cr3;
> @@ -4678,7 +4667,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, 
> bool execonly,
>       context->gva_to_gpa = ept_gva_to_gpa;
>       context->sync_page = ept_sync_page;
>       context->invlpg = ept_invlpg;
> -     context->update_pte = ept_update_pte;
>       context->root_level = level;
>       context->direct_map = false;
>       context->mmu_role.as_u64 = new_role.as_u64;
> @@ -4826,19 +4814,6 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_unload);
>  
> -static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
> -                               struct kvm_mmu_page *sp, u64 *spte,
> -                               const void *new)
> -{
> -     if (sp->role.level != PG_LEVEL_4K) {
> -             ++vcpu->kvm->stat.mmu_pde_zapped;
> -             return;
> -        }
> -
> -     ++vcpu->kvm->stat.mmu_pte_updated;
> -     vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
> -}
> -
>  static bool need_remote_flush(u64 old, u64 new)
>  {
>       if (!is_shadow_present_pte(old))
> @@ -4954,22 +4929,6 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, 
> gpa_t gpa, int *nspte)
>       return spte;
>  }
>  
> -/*
> - * Ignore various flags when determining if a SPTE can be immediately
> - * overwritten for the current MMU.
> - *  - level: explicitly checked in mmu_pte_write_new_pte(), and will never
> - *    match the current MMU role, as MMU's level tracks the root level.
> - *  - access: updated based on the new guest PTE
> - *  - quadrant: handled by get_written_sptes()
> - *  - invalid: always false (loop only walks valid shadow pages)
> - */
> -static const union kvm_mmu_page_role role_ign = {
> -     .level = 0xf,
> -     .access = 0x7,
> -     .quadrant = 0x3,
> -     .invalid = 0x1,
> -};
> -
>  static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
>                             const u8 *new, int bytes,
>                             struct kvm_page_track_notifier_node *node)
> @@ -5020,14 +4979,10 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, 
> gpa_t gpa,
>  
>               local_flush = true;
>               while (npte--) {
> -                     u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
> -
>                       entry = *spte;
>                       mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
> -                     if (gentry &&
> -                         !((sp->role.word ^ base_role) & ~role_ign.word) &&
> -                         rmap_can_add(vcpu))
> -                             mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
> +                     if (gentry && sp->role.level != PG_LEVEL_4K)

I am wondering, if there's any chance the sp->role.level would be PG_LEVEL_4K
in kvm_mmu_pte_write()? My previous understanding was that, since the gfn of
guest leaf page tables are never page tracked, the sp here shall only be with
level greater than PG_LEVEL_4K. Did I miss anything here? Thanks! :)

B.R.
Yu

> +                             ++vcpu->kvm->stat.mmu_pde_zapped;
>                       if (need_remote_flush(entry, *spte))
>                               remote_flush = true;
>                       ++spte;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a480804ae27a..d9f5d9acccc1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -233,7 +233,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
>       VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
>       VM_STAT("mmu_pte_write", mmu_pte_write),
> -     VM_STAT("mmu_pte_updated", mmu_pte_updated),
>       VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
>       VM_STAT("mmu_flooded", mmu_flooded),
>       VM_STAT("mmu_recycled", mmu_recycled),
> -- 
> 2.30.0.284.gd98b1dd5eaa7-goog
> 

Reply via email to