In order to interoperate correctly with the rest of KVM and other Linux
subsystems, the TDP MMU must correctly handle various MMU notifiers. Add
a hook and handle the change_pte MMU notifier.

Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell
machine. This series introduced no new failures.

This series can be viewed in Gerrit at:
        https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538

Signed-off-by: Ben Gardon <bgar...@google.com>
---
 arch/x86/kvm/mmu/mmu.c          | 21 ++++++-------
 arch/x86/kvm/mmu/mmu_internal.h | 29 +++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.c      | 56 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.h      |  3 ++
 4 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e6ab79d8f215f..ef9ea3f45241b 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -135,9 +135,6 @@ enum {
 
 #include <trace/events/kvm.h>
 
-#define SPTE_HOST_WRITEABLE    (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
-#define SPTE_MMU_WRITEABLE     (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
-
 /* make pte_list_desc fit well in cache line */
 #define PTE_LIST_EXT 3
 
@@ -1615,13 +1612,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct 
kvm_rmap_head *rmap_head,
                        pte_list_remove(rmap_head, sptep);
                        goto restart;
                } else {
-                       new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
-                       new_spte |= (u64)new_pfn << PAGE_SHIFT;
-
-                       new_spte &= ~PT_WRITABLE_MASK;
-                       new_spte &= ~SPTE_HOST_WRITEABLE;
-
-                       new_spte = mark_spte_for_access_track(new_spte);
+                       new_spte = kvm_mmu_changed_pte_notifier_make_spte(
+                                       *sptep, new_pfn);
 
                        mmu_spte_clear_track_bits(sptep);
                        mmu_spte_set(sptep, new_spte);
@@ -1777,7 +1769,14 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long 
start, unsigned long end,
 
 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-       return kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+       int r;
+
+       r = kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+
+       if (kvm->arch.tdp_mmu_enabled)
+               r |= kvm_tdp_mmu_set_spte_hva(kvm, hva, &pte);
+
+       return r;
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index d886fe750be38..49c3a04d2b894 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -115,6 +115,12 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
        (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
                                                * PT64_LEVEL_BITS))) - 1))
 
+#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
+#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
+#else
+#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#endif
+
 #define ACC_EXEC_MASK    1
 #define ACC_WRITE_MASK   PT_WRITABLE_MASK
 #define ACC_USER_MASK    PT_USER_MASK
@@ -132,6 +138,12 @@ static u64 __read_mostly shadow_x_mask;    /* mutual 
exclusive with nx_mask */
  */
 static u64 __read_mostly shadow_acc_track_mask;
 
+#define PT_FIRST_AVAIL_BITS_SHIFT 10
+#define PT64_SECOND_AVAIL_BITS_SHIFT 54
+
+#define SPTE_HOST_WRITEABLE    (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+#define SPTE_MMU_WRITEABLE     (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+
 /* Functions for interpreting SPTEs */
 static inline bool is_mmio_spte(u64 spte)
 {
@@ -264,4 +276,21 @@ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache 
*mc);
 
 u64 mark_spte_for_access_track(u64 spte);
 
+static inline u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte,
+                                                        kvm_pfn_t new_pfn)
+{
+       u64 new_spte;
+
+       new_spte = old_spte & ~PT64_BASE_ADDR_MASK;
+       new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+       new_spte &= ~PT_WRITABLE_MASK;
+       new_spte &= ~SPTE_HOST_WRITEABLE;
+
+       new_spte = mark_spte_for_access_track(new_spte);
+
+       return new_spte;
+}
+
+
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 575970d8805a4..90abd55c89375 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -677,3 +677,59 @@ int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned 
long hva)
        return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0,
                                            test_age_gfn);
 }
+
+/*
+ * Handle the changed_pte MMU notifier for the TDP MMU.
+ * data is a pointer to the new pte_t mapping the HVA specified by the MMU
+ * notifier.
+ * Returns non-zero if a flush is needed before releasing the MMU lock.
+ */
+static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot,
+                       struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused,
+                       unsigned long data)
+{
+       struct tdp_iter iter;
+       pte_t *ptep = (pte_t *)data;
+       kvm_pfn_t new_pfn;
+       u64 new_spte;
+       int need_flush = 0;
+
+       WARN_ON(pte_huge(*ptep));
+
+       new_pfn = pte_pfn(*ptep);
+
+       tdp_root_for_each_pte(iter, root, gfn, gfn + 1) {
+               if (iter.level != PG_LEVEL_4K)
+                       continue;
+
+               if (!is_shadow_present_pte(iter.old_spte))
+                       break;
+
+               tdp_mmu_set_spte(kvm, &iter, 0);
+
+               kvm_flush_remote_tlbs_with_address(kvm, iter.gfn, 1);
+
+               if (!pte_write(*ptep)) {
+                       new_spte = kvm_mmu_changed_pte_notifier_make_spte(
+                                       iter.old_spte, new_pfn);
+
+                       tdp_mmu_set_spte(kvm, &iter, new_spte);
+               }
+
+               need_flush = 1;
+       }
+
+       if (need_flush)
+               kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
+
+       return 0;
+}
+
+int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address,
+                            pte_t *host_ptep)
+{
+       return kvm_tdp_mmu_handle_hva_range(kvm, address, address + 1,
+                                           (unsigned long)host_ptep,
+                                           set_tdp_spte);
+}
+
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index bdb86f61e75eb..6569792f40d4f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -25,4 +25,7 @@ int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long 
start,
 int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start,
                              unsigned long end);
 int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva);
+
+int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address,
+                            pte_t *host_ptep);
 #endif /* __KVM_X86_MMU_TDP_MMU_H */
-- 
2.28.0.1011.ga647a8990f-goog

Reply via email to