[patch 2/4] KVM: MMU: allow pinning spte translations (TDP-only)

mtosatti Wed, 09 Jul 2014 12:18:28 -0700

Allow vcpus to pin spte translations by:

1) Creating a per-vcpu list of pinned ranges.
2) On mmu reload request:
        - Fault ranges.
        - Mark sptes with a pinned bit.
        - Mark shadow pages as pinned.


3) Then modify the following actions:
        - Page age => skip spte flush.
        - MMU notifiers => force mmu reload request (which kicks cpu out of
                                guest mode).
        - GET_DIRTY_LOG => force mmu reload request.
        - SLAB shrinker => skip shadow page deletion.

TDP-only.

Signed-off-by: Marcelo Tosatti <[email protected]>

---
 arch/x86/include/asm/kvm_host.h |   13 +
 arch/x86/kvm/mmu.c              |  294 +++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/mmu.h              |    7 
 arch/x86/kvm/mmutrace.h         |   23 +++
 arch/x86/kvm/paging_tmpl.h      |    4 
 arch/x86/kvm/x86.c              |    8 -
 include/linux/kvm_host.h        |    3 
 include/uapi/linux/kvm.h        |    2 
 virt/kvm/kvm_main.c             |   18 +-
 9 files changed, 340 insertions(+), 32 deletions(-)

Index: kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/include/asm/kvm_host.h       2014-07-09 
12:05:34.836161266 -0300
+++ kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h    2014-07-09 
12:08:45.341762782 -0300
@@ -97,6 +97,8 @@
 #define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
+#define KVM_MAX_PER_VCPU_PINNED_RANGE 10
+
 #define ASYNC_PF_PER_VCPU 64
 
 struct kvm_vcpu;
@@ -221,6 +223,8 @@
        /* hold the gfn of each spte inside spt */
        gfn_t *gfns;
        bool unsync;
+       bool pinned;
+
        int root_count;          /* Currently serving as active root */
        unsigned int unsync_children;
        unsigned long parent_ptes;      /* Reverse mapping for parent_pte */
@@ -337,6 +341,12 @@
        KVM_DEBUGREG_WONT_EXIT = 2,
 };
 
+struct kvm_pinned_page_range {
+       gfn_t base_gfn;
+       unsigned long npages;
+       struct list_head link;
+};
+
 struct kvm_vcpu_arch {
        /*
         * rip and regs accesses must go through
@@ -392,6 +402,9 @@
        struct kvm_mmu_memory_cache mmu_page_cache;
        struct kvm_mmu_memory_cache mmu_page_header_cache;
 
+       struct list_head pinned_mmu_pages;
+       atomic_t nr_pinned_ranges;
+
        struct fpu guest_fpu;
        u64 xcr0;
        u64 guest_supported_xcr0;
Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c    2014-07-09 12:05:34.837161264 
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-07-09 12:09:21.856684314 -0300
@@ -148,6 +148,9 @@
 
 #define SPTE_HOST_WRITEABLE    (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
 #define SPTE_MMU_WRITEABLE     (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+#define SPTE_PINNED            (1ULL << (PT64_SECOND_AVAIL_BITS_SHIFT))
+
+#define SPTE_PINNED_BIT PT64_SECOND_AVAIL_BITS_SHIFT
 
 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
 
@@ -327,6 +330,11 @@
        return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
 }
 
+static int is_pinned_spte(u64 spte)
+{
+       return spte & SPTE_PINNED && is_shadow_present_pte(spte);
+}
+
 static int is_large_pte(u64 pte)
 {
        return pte & PT_PAGE_SIZE_MASK;
@@ -1176,6 +1184,16 @@
                kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
+static bool vcpu_has_pinned(struct kvm_vcpu *vcpu)
+{
+       return atomic_read(&vcpu->arch.nr_pinned_ranges);
+}
+
+static void mmu_reload_pinned_vcpus(struct kvm *kvm)
+{
+       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, &vcpu_has_pinned);
+}
+
 /*
  * Write-protect on the specified @sptep, @pt_protect indicates whether
  * spte write-protection is caused by protecting shadow page table.
@@ -1268,7 +1286,8 @@
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                          struct kvm_memory_slot *slot, unsigned long data)
+                          struct kvm_memory_slot *slot, unsigned long data,
+                          bool age)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1278,6 +1297,14 @@
                BUG_ON(!(*sptep & PT_PRESENT_MASK));
                rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, 
*sptep);
 
+               if (is_pinned_spte(*sptep)) {
+                       /* don't nuke pinned sptes if page aging: return
+                        * young=yes instead.
+                        */
+                       if (age)
+                               return 1;
+                       mmu_reload_pinned_vcpus(kvm);
+               }
                drop_spte(kvm, sptep);
                need_tlb_flush = 1;
        }
@@ -1286,7 +1313,8 @@
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                            struct kvm_memory_slot *slot, unsigned long data)
+                            struct kvm_memory_slot *slot, unsigned long data,
+                            bool age)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1304,6 +1332,9 @@
 
                need_flush = 1;
 
+               if (is_pinned_spte(*sptep))
+                       mmu_reload_pinned_vcpus(kvm);
+
                if (pte_write(*ptep)) {
                        drop_spte(kvm, sptep);
                        sptep = rmap_get_first(*rmapp, &iter);
@@ -1334,7 +1365,8 @@
                                int (*handler)(struct kvm *kvm,
                                               unsigned long *rmapp,
                                               struct kvm_memory_slot *slot,
-                                              unsigned long data))
+                                              unsigned long data,
+                                              bool age))
 {
        int j;
        int ret = 0;
@@ -1374,7 +1406,7 @@
                        rmapp = __gfn_to_rmap(gfn_start, j, memslot);
 
                        for (; idx <= idx_end; ++idx)
-                               ret |= handler(kvm, rmapp++, memslot, data);
+                               ret |= handler(kvm, rmapp++, memslot, data, 
false);
                }
        }
 
@@ -1385,7 +1417,8 @@
                          unsigned long data,
                          int (*handler)(struct kvm *kvm, unsigned long *rmapp,
                                         struct kvm_memory_slot *slot,
-                                        unsigned long data))
+                                        unsigned long data,
+                                        bool age))
 {
        return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
 }
@@ -1406,7 +1439,8 @@
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                        struct kvm_memory_slot *slot, unsigned long data)
+                        struct kvm_memory_slot *slot, unsigned long data,
+                        bool age)
 {
        u64 *sptep;
        struct rmap_iterator uninitialized_var(iter);
@@ -1421,7 +1455,7 @@
         * out actively used pages or breaking up actively used hugepages.
         */
        if (!shadow_accessed_mask) {
-               young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+               young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true);
                goto out;
        }
 
@@ -1442,7 +1476,8 @@
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-                             struct kvm_memory_slot *slot, unsigned long data)
+                             struct kvm_memory_slot *slot, unsigned long data,
+                             bool age)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1480,7 +1515,7 @@
 
        rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-       kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+       kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false);
        kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
@@ -2753,7 +2788,8 @@
 }
 
 static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
-                               pfn_t pfn, unsigned access, int *ret_val)
+                               pfn_t pfn, unsigned access, int *ret_val,
+                               bool pin)
 {
        bool ret = true;
 
@@ -2763,8 +2799,14 @@
                goto exit;
        }
 
-       if (unlikely(is_noslot_pfn(pfn)))
+       if (unlikely(is_noslot_pfn(pfn))) {
+               /* pinned sptes must point to RAM */
+               if (unlikely(pin)) {
+                       *ret_val = -EFAULT;
+                       goto exit;
+               }
                vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+       }
 
        ret = false;
 exit:
@@ -2818,7 +2860,7 @@
  * - false: let the real page fault path to fix it.
  */
 static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
-                           u32 error_code)
+                           u32 error_code, bool pin)
 {
        struct kvm_shadow_walk_iterator iterator;
        struct kvm_mmu_page *sp;
@@ -2828,6 +2870,9 @@
        if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                return false;
 
+       if (pin)
+               return false;
+
        if (!page_fault_can_be_fast(error_code))
                return false;
 
@@ -2895,9 +2940,71 @@
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-                        gva_t gva, pfn_t *pfn, bool write, bool *writable);
+                        gva_t gva, pfn_t *pfn, bool write, bool *writable,
+                        bool pin);
 static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
 
+
+static int get_sptep_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes[4])
+
+{
+       struct kvm_shadow_walk_iterator iterator;
+       int nr_sptes = 0;
+
+       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+               return nr_sptes;
+
+       for_each_shadow_entry(vcpu, addr, iterator) {
+               sptes[iterator.level-1] = iterator.sptep;
+               nr_sptes++;
+               if (!is_shadow_present_pte(*iterator.sptep))
+                       break;
+       }
+
+       return nr_sptes;
+}
+
+static bool __direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn, bool pin)
+{
+       u64 *sptes[4];
+       int r, i, level;
+
+       r = get_sptep_hierarchy(vcpu, gfn << PAGE_SHIFT, sptes);
+       if (!r)
+               return false;
+
+       level = 5 - r;
+       if (!is_last_spte(*sptes[level-1], level))
+               return false;
+       if (!is_shadow_present_pte(*sptes[level-1]))
+               return false;
+
+       for (i = 0; i < r; i++) {
+               u64 *sptep = sptes[3-i];
+               struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+               if (pin) {
+                       sp->pinned = true;
+                       set_bit(SPTE_PINNED_BIT, (unsigned long *)sptep);
+               } else {
+                       sp->pinned = false;
+                       clear_bit(SPTE_PINNED_BIT, (unsigned long *)sptep);
+               }
+       }
+
+       return true;
+}
+
+static bool direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+       return __direct_pin_sptes(vcpu, gfn, true);
+}
+
+static bool direct_unpin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+       return __direct_pin_sptes(vcpu, gfn, false);
+}
+
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
                         gfn_t gfn, bool prefault, bool pin, bool *pinned)
 {
@@ -2923,16 +3030,17 @@
        } else
                level = PT_PAGE_TABLE_LEVEL;
 
-       if (fast_page_fault(vcpu, v, level, error_code))
+       if (fast_page_fault(vcpu, v, level, error_code, pin))
                return 0;
 
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+       if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable,
+                        pin))
                return 0;
 
-       if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+       if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r, pin))
                return r;
 
        spin_lock(&vcpu->kvm->mmu_lock);
@@ -2943,6 +3051,8 @@
                transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
        r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
                         prefault);
+       if (pin)
+               *pinned = direct_pin_sptes(vcpu, gfn);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
 
@@ -3131,7 +3241,7 @@
 
                        lm_root = (void*)get_zeroed_page(GFP_KERNEL);
                        if (lm_root == NULL)
-                               return 1;
+                               return -ENOMEM;
 
                        lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
 
@@ -3349,7 +3459,8 @@
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-                        gva_t gva, pfn_t *pfn, bool write, bool *writable)
+                        gva_t gva, pfn_t *pfn, bool write, bool *writable,
+                        bool pin)
 {
        bool async;
 
@@ -3358,7 +3469,7 @@
        if (!async)
                return false; /* *pfn has correct page already */
 
-       if (!prefault && can_do_async_pf(vcpu)) {
+       if (!prefault && !pin && can_do_async_pf(vcpu)) {
                trace_kvm_try_async_get_page(gva, gfn);
                if (kvm_find_async_pf_gfn(vcpu, gfn)) {
                        trace_kvm_async_pf_doublefault(gva, gfn);
@@ -3406,16 +3517,17 @@
        } else
                level = PT_PAGE_TABLE_LEVEL;
 
-       if (fast_page_fault(vcpu, gpa, level, error_code))
+       if (fast_page_fault(vcpu, gpa, level, error_code, pin))
                return 0;
 
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+       if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable,
+                        pin))
                return 0;
 
-       if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+       if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r, pin))
                return r;
 
        spin_lock(&vcpu->kvm->mmu_lock);
@@ -3426,6 +3538,8 @@
                transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
        r = __direct_map(vcpu, gpa, write, map_writable,
                         level, gfn, pfn, prefault);
+       if (pin)
+               *pinned = direct_pin_sptes(vcpu, gfn);
        spin_unlock(&vcpu->kvm->mmu_lock);
 
        return r;
@@ -3903,6 +4017,141 @@
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
+                                 gfn_t base_gfn, unsigned long npages)
+{
+       struct kvm_pinned_page_range *p;
+
+       if (!tdp_enabled) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+               if (p->base_gfn == base_gfn && p->npages == npages) {
+                       return -EEXIST;
+               }
+       }
+
+       if (atomic_read(&vcpu->arch.nr_pinned_ranges) >=
+           KVM_MAX_PER_VCPU_PINNED_RANGE)
+               return -ENOSPC;
+
+       p = kzalloc(sizeof(struct kvm_pinned_page_range), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       atomic_inc(&vcpu->arch.nr_pinned_ranges);
+
+       trace_kvm_mmu_register_pinned_range(vcpu->vcpu_id, base_gfn, npages);
+
+       INIT_LIST_HEAD(&p->link);
+       p->base_gfn = base_gfn;
+       p->npages = npages;
+       list_add(&p->link, &vcpu->arch.pinned_mmu_pages);
+       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+
+       return 0;
+}
+
+
+void unregister_pinned_sptes(struct kvm_vcpu *vcpu, unsigned long base_gfn,
+                            unsigned long npages)
+{
+       gfn_t gfn;
+
+       for (gfn = base_gfn; gfn < base_gfn+npages; gfn++)
+               direct_unpin_sptes(vcpu, gfn);
+
+}
+
+int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu,
+                                   gfn_t base_gfn, unsigned long npages)
+{
+       struct kvm_pinned_page_range *p;
+
+       list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+               if (p->base_gfn == base_gfn && p->npages == npages) {
+                       list_del(&p->link);
+                       atomic_dec(&vcpu->arch.nr_pinned_ranges);
+                       spin_lock(&vcpu->kvm->mmu_lock);
+                       mmu_reload_pinned_vcpus(vcpu->kvm);
+                       unregister_pinned_sptes(vcpu, base_gfn, npages);
+                       spin_unlock(&vcpu->kvm->mmu_lock);
+                       kfree(p);
+                       return 0;
+               }
+       }
+
+       return -ENOENT;
+}
+
+void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu)
+{
+       struct kvm_pinned_page_range *p, *p2;
+
+       list_for_each_entry_safe(p, p2, &vcpu->arch.pinned_mmu_pages, link) {
+               list_del(&p->link);
+               kfree(p);
+       }
+}
+
+/*
+ * Pin KVM MMU page translations. This guarantees, for valid
+ * addresses registered by kvm_mmu_register_pinned_range (valid address
+ * meaning address which posses sufficient information for fault to
+ * be resolved), valid translations exist while in guest mode and
+ * therefore no VM-exits due to faults will occur.
+ *
+ * Failure to instantiate pages will abort guest entry.
+ *
+ * Pinning is not guaranteed while executing as L2 guest.
+ *
+ */
+
+static int kvm_mmu_pin_pages(struct kvm_vcpu *vcpu)
+{
+       struct kvm_pinned_page_range *p;
+       int r = 1;
+
+       if (is_guest_mode(vcpu))
+               return r;
+
+       if (!vcpu->arch.mmu.direct_map)
+               return r;
+
+       ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+       list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+               gfn_t gfn_offset;
+
+               for (gfn_offset = 0; gfn_offset < p->npages; gfn_offset++) {
+                       gfn_t gfn = p->base_gfn + gfn_offset;
+                       int r;
+                       bool pinned = false;
+
+                       r = vcpu->arch.mmu.page_fault(vcpu, gfn << PAGE_SHIFT,
+                                                    PFERR_WRITE_MASK, false,
+                                                    true, &pinned);
+                       /* MMU notifier sequence window: retry */
+                       if (!r && !pinned)
+                               kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+                       if (r) {
+                               vcpu->run->exit_reason = 
KVM_EXIT_INTERNAL_ERROR;
+                               vcpu->run->internal.suberror =
+                                       KVM_INTERNAL_ERROR_PIN_FAILURE;
+                               vcpu->run->internal.ndata = 1;
+                               vcpu->run->internal.data[0] = gfn;
+                               r = 0;
+                               goto out;
+                       }
+
+               }
+       }
+out:
+       return r;
+}
+
 int kvm_mmu_load(struct kvm_vcpu *vcpu)
 {
        int r;
@@ -3916,6 +4165,7 @@
                goto out;
        /* set_cr3() should ensure TLB has been flushed */
        vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+       r = kvm_mmu_pin_pages(vcpu);
 out:
        return r;
 }
Index: kvm.pinned-sptes/arch/x86/kvm/mmu.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.h    2014-07-09 12:05:30.018171068 
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.h 2014-07-09 12:08:45.343762778 -0300
@@ -94,7 +94,7 @@
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
        if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
-               return 0;
+               return 1;
 
        return kvm_mmu_load(vcpu);
 }
@@ -178,4 +178,9 @@
 }
 
 void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
+                                gfn_t base_gfn, unsigned long npages);
+int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu,
+                                gfn_t base_gfn, unsigned long npages);
+void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu);
 #endif
Index: kvm.pinned-sptes/arch/x86/kvm/x86.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/x86.c    2014-07-09 12:05:34.838161262 
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/x86.c 2014-07-09 12:08:45.346762771 -0300
@@ -6017,7 +6017,7 @@
        }
 
        r = kvm_mmu_reload(vcpu);
-       if (unlikely(r)) {
+       if (unlikely(r <= 0)) {
                goto cancel_injection;
        }
 
@@ -7049,6 +7049,8 @@
 
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
+       INIT_LIST_HEAD(&vcpu->arch.pinned_mmu_pages);
+       atomic_set(&vcpu->arch.nr_pinned_ranges, 0);
 
        return 0;
 fail_free_wbinvd_dirty_mask:
@@ -7069,6 +7071,7 @@
 {
        int idx;
 
+       kvm_mmu_free_pinned_ranges(vcpu);
        kvm_pmu_destroy(vcpu);
        kfree(vcpu->arch.mce_banks);
        kvm_free_lapic(vcpu);
@@ -7113,6 +7116,7 @@
        int r;
        r = vcpu_load(vcpu);
        BUG_ON(r);
+       kvm_mmu_free_pinned_ranges(vcpu);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
 }
@@ -7408,7 +7412,7 @@
                return;
 
        r = kvm_mmu_reload(vcpu);
-       if (unlikely(r))
+       if (unlikely(r <= 0))
                return;
 
        if (!vcpu->arch.mmu.direct_map &&
Index: kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/paging_tmpl.h    2014-07-09 
12:05:34.837161264 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h 2014-07-09 12:08:45.346762771 
-0300
@@ -747,11 +747,11 @@
        smp_rmb();
 
        if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
-                        &map_writable))
+                        &map_writable, false))
                return 0;
 
        if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
-                               walker.gfn, pfn, walker.pte_access, &r))
+                               walker.gfn, pfn, walker.pte_access, &r, false))
                return r;
 
        /*
Index: kvm.pinned-sptes/arch/x86/kvm/mmutrace.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmutrace.h       2014-07-09 
12:05:30.018171068 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmutrace.h    2014-07-09 12:08:45.347762769 
-0300
@@ -322,6 +322,29 @@
                  __entry->kvm_gen == __entry->spte_gen
        )
 );
+
+TRACE_EVENT(
+       kvm_mmu_register_pinned_range,
+       TP_PROTO(unsigned int vcpu_id, gfn_t gfn, unsigned long npages),
+       TP_ARGS(vcpu_id, gfn, npages),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vcpu_id )
+               __field(        gfn_t,          gfn     )
+               __field(        unsigned long,  npages  )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id                = vcpu_id;
+               __entry->gfn                    = gfn;
+               __entry->npages                 = npages;
+       ),
+
+       TP_printk("vcpu_id %u gfn %llx npages %lx",
+                 __entry->vcpu_id,
+                 __entry->gfn,
+                 __entry->npages)
+);
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
Index: kvm.pinned-sptes/include/uapi/linux/kvm.h
===================================================================
--- kvm.pinned-sptes.orig/include/uapi/linux/kvm.h      2014-07-09 
12:05:30.019171066 -0300
+++ kvm.pinned-sptes/include/uapi/linux/kvm.h   2014-07-09 12:08:45.347762769 
-0300
@@ -180,6 +180,8 @@
 #define KVM_INTERNAL_ERROR_SIMUL_EX    2
 /* Encounter unexpected vm-exit due to delivery event. */
 #define KVM_INTERNAL_ERROR_DELIVERY_EV 3
+/* Failure to pin address translation. */
+#define KVM_INTERNAL_ERROR_PIN_FAILURE 4
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
Index: kvm.pinned-sptes/include/linux/kvm_host.h
===================================================================
--- kvm.pinned-sptes.orig/include/linux/kvm_host.h      2014-07-09 
12:05:30.019171066 -0300
+++ kvm.pinned-sptes/include/linux/kvm_host.h   2014-07-09 12:08:45.348762767 
-0300
@@ -591,6 +591,9 @@
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
+bool make_all_cpus_request(struct kvm *kvm, unsigned int req,
+                          bool (*vcpukick)(struct kvm_vcpu *));
+
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
Index: kvm.pinned-sptes/virt/kvm/kvm_main.c
===================================================================
--- kvm.pinned-sptes.orig/virt/kvm/kvm_main.c   2014-07-09 12:05:30.019171066 
-0300
+++ kvm.pinned-sptes/virt/kvm/kvm_main.c        2014-07-09 12:08:45.349762765 
-0300
@@ -152,7 +152,8 @@
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool make_all_cpus_request(struct kvm *kvm, unsigned int req,
+                          bool (*vcpukick)(struct kvm_vcpu *))
 {
        int i, cpu, me;
        cpumask_var_t cpus;
@@ -163,6 +164,8 @@
 
        me = get_cpu();
        kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (vcpukick && !vcpukick(vcpu))
+                       continue;
                kvm_make_request(req, vcpu);
                cpu = vcpu->cpu;
 
@@ -189,7 +192,7 @@
        long dirty_count = kvm->tlbs_dirty;
 
        smp_mb();
-       if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+       if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH, NULL))
                ++kvm->stat.remote_tlb_flush;
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -197,17 +200,22 @@
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL);
+}
+
+void kvm_reload_pinned_remote_mmus(struct kvm *kvm)
+{
+       make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-       make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+       make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS, NULL);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-       make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+       make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC, NULL);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 2/4] KVM: MMU: allow pinning spte translations (TDP-only)

Reply via email to