The hardware uses the guest-physical address and bits 11:7 of the
address accessed to lookup the SPPT to fetch a write permission bit for
the 128 byte wide sub-page region being accessed within the 4K
guest-physical page. If the sub-page region write permission bit is set,
the write is allowed; otherwise the write is disallowed and results in
an EPT violation.

Guest-physical pages mapped via leaf EPT-paging-structures for which the
accumulated write-access bit and the SPP bits are both clear (0) generate
EPT violations on memory writes accesses. Guest-physical pages mapped via
EPT-paging-structure for which the accumulated write-access bit is set
(1) allow writes, effectively ignoring the SPP bit on the leaf EPT-paging
structure.

Software will setup the spp page table level4,3,2 as well as EPT page
structure, and fill the level1 via the 32 bit bitmap per a single 4K page.
Now it could be divided to 32 x 128 sub-pages.

Signed-off-by: Zhang Yi <[email protected]>
---
 arch/x86/include/asm/kvm_host.h |   4 ++
 arch/x86/kvm/mmu.c              | 123 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3218d91..ce6d258 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1402,6 +1402,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
                       void *insn, int insn_len);
+
+int kvm_mmu_setup_spp_structure(struct kvm_vcpu *vcpu,
+                               u32 access_map, gfn_t gfn);
+
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool 
skip_tlb_flush);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d512125..287ee62 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -206,6 +206,11 @@ static const union kvm_mmu_page_role mmu_base_role_mask = {
                ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; });  \
             __shadow_walk_next(&(_walker), spte))
 
+#define for_each_shadow_spp_entry(_vcpu, _addr, _walker)    \
+       for (shadow_spp_walk_init(&(_walker), _vcpu, _addr);    \
+            shadow_walk_okay(&(_walker));                      \
+            shadow_walk_next(&(_walker)))
+
 static struct kmem_cache *pte_list_desc_cache;
 static struct kmem_cache *mmu_page_header_cache;
 static struct percpu_counter kvm_total_used_mmu_pages;
@@ -476,6 +481,11 @@ static int is_shadow_present_pte(u64 pte)
        return (pte != 0) && !is_mmio_spte(pte);
 }
 
+static int is_spp_mide_page_present(u64 pte)
+{
+       return pte & PT_PRESENT_MASK;
+}
+
 static int is_large_pte(u64 pte)
 {
        return pte & PT_PAGE_SIZE_MASK;
@@ -495,6 +505,11 @@ static bool is_executable_pte(u64 spte)
        return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
 }
 
+static bool is_spp_spte(struct kvm_mmu_page *sp)
+{
+       return sp->role.spp;
+}
+
 static kvm_pfn_t spte_to_pfn(u64 pte)
 {
        return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -2606,6 +2621,16 @@ static void shadow_walk_init(struct 
kvm_shadow_walk_iterator *iterator,
                                    addr);
 }
 
+static void shadow_spp_walk_init(struct kvm_shadow_walk_iterator *iterator,
+                                struct kvm_vcpu *vcpu, u64 addr)
+{
+       iterator->addr = addr;
+       iterator->shadow_addr = vcpu->arch.mmu->sppt_root;
+
+       /* SPP Table is a 4-level paging structure */
+       iterator->level = 4;
+}
+
 static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
 {
        if (iterator->level < PT_PAGE_TABLE_LEVEL)
@@ -2656,6 +2681,18 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 
*sptep,
                mark_unsync(sptep);
 }
 
+static void link_spp_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
+                                struct kvm_mmu_page *sp)
+{
+       u64 spte;
+
+       spte = __pa(sp->spt) | PT_PRESENT_MASK;
+
+       mmu_spte_set(sptep, spte);
+
+       mmu_page_add_parent_pte(vcpu, sp, sptep);
+}
+
 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                                   unsigned direct_access)
 {
@@ -2686,7 +2723,13 @@ static bool mmu_page_zap_pte(struct kvm *kvm, struct 
kvm_mmu_page *sp,
 
        pte = *spte;
        if (is_shadow_present_pte(pte)) {
-               if (is_last_spte(pte, sp->role.level)) {
+               if (is_spp_spte(sp)) {
+                       if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+                               //spp page do not need to release rmap.
+                               return true;
+                       child = page_header(pte & PT64_BASE_ADDR_MASK);
+                       drop_parent_pte(child, spte);
+               } else if (is_last_spte(pte, sp->role.level)) {
                        drop_spte(kvm, spte);
                        if (is_large_pte(pte))
                                --kvm->stat.lpages;
@@ -4231,6 +4274,77 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t 
gpa, u32 error_code,
        return RET_PF_RETRY;
 }
 
+static u64 format_spp_spte(u32 spp_wp_bitmap)
+{
+       u64 new_spte = 0;
+       int i = 0;
+
+       /*
+        * One 4K page contains 32 sub-pages, in SPP table L4E, old bits
+        * are reserved, so we need to transfer u32 subpage write
+        * protect bitmap to u64 SPP L4E format.
+        */
+       while (i < 32) {
+               if (spp_wp_bitmap & (1ULL << i))
+                       new_spte |= 1ULL << (i * 2);
+
+               i++;
+       }
+
+       return new_spte;
+}
+
+static void mmu_spp_spte_set(u64 *sptep, u64 new_spte)
+{
+       __set_spte(sptep, new_spte);
+}
+
+int kvm_mmu_setup_spp_structure(struct kvm_vcpu *vcpu,
+                               u32 access_map, gfn_t gfn)
+{
+       struct kvm_shadow_walk_iterator iter;
+       struct kvm_mmu_page *sp;
+       gfn_t pseudo_gfn;
+       u64 old_spte, spp_spte;
+       struct kvm *kvm = vcpu->kvm;
+
+       spin_lock(&kvm->mmu_lock);
+
+       /* direct_map spp start */
+
+       if (!VALID_PAGE(vcpu->arch.mmu->sppt_root))
+               goto out_unlock;
+
+       for_each_shadow_spp_entry(vcpu, (u64)gfn << PAGE_SHIFT, iter) {
+               if (iter.level == PT_PAGE_TABLE_LEVEL) {
+                       spp_spte = format_spp_spte(access_map);
+                       old_spte = mmu_spte_get_lockless(iter.sptep);
+                       if (old_spte != spp_spte) {
+                               mmu_spp_spte_set(iter.sptep, spp_spte);
+                               kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+                       }
+                       break;
+               }
+
+               if (!is_spp_mide_page_present(*iter.sptep)) {
+                       u64 base_addr = iter.addr;
+
+                       base_addr &= PT64_LVL_ADDR_MASK(iter.level);
+                       pseudo_gfn = base_addr >> PAGE_SHIFT;
+                       sp = kvm_mmu_get_spp_page(vcpu, pseudo_gfn,
+                                                 iter.level - 1);
+                       link_spp_shadow_page(vcpu, iter.sptep, sp);
+               }
+       }
+
+       spin_unlock(&kvm->mmu_lock);
+       return 0;
+
+out_unlock:
+       spin_unlock(&kvm->mmu_lock);
+       return -EFAULT;
+}
+
 int kvm_mmu_get_subpages(struct kvm *kvm, struct kvm_subpage *spp_info)
 {
        u32 *access = spp_info->access_map;
@@ -4255,9 +4369,10 @@ int kvm_mmu_set_subpages(struct kvm *kvm, struct 
kvm_subpage *spp_info)
        gfn_t gfn = spp_info->base_gfn;
        int npages = spp_info->npages;
        struct kvm_memory_slot *slot;
+       struct kvm_vcpu *vcpu;
        u32 *wp_map;
        int ret;
-       int i;
+       int i, j;
 
        for (i = 0; i < npages; i++, gfn++) {
                slot = gfn_to_memslot(kvm, gfn);
@@ -4281,6 +4396,10 @@ int kvm_mmu_set_subpages(struct kvm *kvm, struct 
kvm_subpage *spp_info)
                                "Please try to disable the huge page\n", gfn);
                        return -EFAULT;
                }
+
+               kvm_for_each_vcpu(j, vcpu, kvm)
+                       kvm_mmu_setup_spp_structure(vcpu, access, gfn);
+
                wp_map = gfn_to_subpage_wp_info(slot, gfn);
                *wp_map = access;
        }
-- 
2.7.4

Reply via email to