Allow vcpus to pin spte translations by:
1) Creating a per-vcpu list of pinned ranges.
2) On mmu reload request:
- Fault ranges.
- Mark sptes with a pinned bit.
- Mark shadow pages as pinned.
3) Then modify the following actions:
- Page age => skip spte flush.
- MMU notifiers => force mmu reload request (which kicks cpu out of
guest mode).
- GET_DIRTY_LOG => force mmu reload request.
- SLAB shrinker => skip shadow page deletion.
TDP-only.
Signed-off-by: Marcelo Tosatti <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 13 +
arch/x86/kvm/mmu.c | 294 +++++++++++++++++++++++++++++++++++++---
arch/x86/kvm/mmu.h | 7
arch/x86/kvm/mmutrace.h | 23 +++
arch/x86/kvm/paging_tmpl.h | 4
arch/x86/kvm/x86.c | 8 -
include/linux/kvm_host.h | 3
include/uapi/linux/kvm.h | 2
virt/kvm/kvm_main.c | 18 +-
9 files changed, 340 insertions(+), 32 deletions(-)
Index: kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/include/asm/kvm_host.h 2014-07-09
12:05:34.836161266 -0300
+++ kvm.pinned-sptes/arch/x86/include/asm/kvm_host.h 2014-07-09
12:08:45.341762782 -0300
@@ -97,6 +97,8 @@
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
+#define KVM_MAX_PER_VCPU_PINNED_RANGE 10
+
#define ASYNC_PF_PER_VCPU 64
struct kvm_vcpu;
@@ -221,6 +223,8 @@
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
bool unsync;
+ bool pinned;
+
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
unsigned long parent_ptes; /* Reverse mapping for parent_pte */
@@ -337,6 +341,12 @@
KVM_DEBUGREG_WONT_EXIT = 2,
};
+struct kvm_pinned_page_range {
+ gfn_t base_gfn;
+ unsigned long npages;
+ struct list_head link;
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -392,6 +402,9 @@
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ struct list_head pinned_mmu_pages;
+ atomic_t nr_pinned_ranges;
+
struct fpu guest_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
Index: kvm.pinned-sptes/arch/x86/kvm/mmu.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.c 2014-07-09 12:05:34.837161264
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.c 2014-07-09 12:09:21.856684314 -0300
@@ -148,6 +148,9 @@
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+#define SPTE_PINNED (1ULL << (PT64_SECOND_AVAIL_BITS_SHIFT))
+
+#define SPTE_PINNED_BIT PT64_SECOND_AVAIL_BITS_SHIFT
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
@@ -327,6 +330,11 @@
return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
}
+static int is_pinned_spte(u64 spte)
+{
+ return spte & SPTE_PINNED && is_shadow_present_pte(spte);
+}
+
static int is_large_pte(u64 pte)
{
return pte & PT_PAGE_SIZE_MASK;
@@ -1176,6 +1184,16 @@
kvm_flush_remote_tlbs(vcpu->kvm);
}
+static bool vcpu_has_pinned(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.nr_pinned_ranges);
+}
+
+static void mmu_reload_pinned_vcpus(struct kvm *kvm)
+{
+ make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, &vcpu_has_pinned);
+}
+
/*
* Write-protect on the specified @sptep, @pt_protect indicates whether
* spte write-protection is caused by protecting shadow page table.
@@ -1268,7 +1286,8 @@
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data,
+ bool age)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1278,6 +1297,14 @@
BUG_ON(!(*sptep & PT_PRESENT_MASK));
rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep,
*sptep);
+ if (is_pinned_spte(*sptep)) {
+ /* don't nuke pinned sptes if page aging: return
+ * young=yes instead.
+ */
+ if (age)
+ return 1;
+ mmu_reload_pinned_vcpus(kvm);
+ }
drop_spte(kvm, sptep);
need_tlb_flush = 1;
}
@@ -1286,7 +1313,8 @@
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data,
+ bool age)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1304,6 +1332,9 @@
need_flush = 1;
+ if (is_pinned_spte(*sptep))
+ mmu_reload_pinned_vcpus(kvm);
+
if (pte_write(*ptep)) {
drop_spte(kvm, sptep);
sptep = rmap_get_first(*rmapp, &iter);
@@ -1334,7 +1365,8 @@
int (*handler)(struct kvm *kvm,
unsigned long *rmapp,
struct kvm_memory_slot *slot,
- unsigned long data))
+ unsigned long data,
+ bool age))
{
int j;
int ret = 0;
@@ -1374,7 +1406,7 @@
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
for (; idx <= idx_end; ++idx)
- ret |= handler(kvm, rmapp++, memslot, data);
+ ret |= handler(kvm, rmapp++, memslot, data,
false);
}
}
@@ -1385,7 +1417,8 @@
unsigned long data,
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
struct kvm_memory_slot *slot,
- unsigned long data))
+ unsigned long data,
+ bool age))
{
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
@@ -1406,7 +1439,8 @@
}
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data,
+ bool age)
{
u64 *sptep;
struct rmap_iterator uninitialized_var(iter);
@@ -1421,7 +1455,7 @@
* out actively used pages or breaking up actively used hugepages.
*/
if (!shadow_accessed_mask) {
- young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+ young = kvm_unmap_rmapp(kvm, rmapp, slot, data, true);
goto out;
}
@@ -1442,7 +1476,8 @@
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, unsigned long data,
+ bool age)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1480,7 +1515,7 @@
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0, false);
kvm_flush_remote_tlbs(vcpu->kvm);
}
@@ -2753,7 +2788,8 @@
}
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
- pfn_t pfn, unsigned access, int *ret_val)
+ pfn_t pfn, unsigned access, int *ret_val,
+ bool pin)
{
bool ret = true;
@@ -2763,8 +2799,14 @@
goto exit;
}
- if (unlikely(is_noslot_pfn(pfn)))
+ if (unlikely(is_noslot_pfn(pfn))) {
+ /* pinned sptes must point to RAM */
+ if (unlikely(pin)) {
+ *ret_val = -EFAULT;
+ goto exit;
+ }
vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+ }
ret = false;
exit:
@@ -2818,7 +2860,7 @@
* - false: let the real page fault path to fix it.
*/
static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
- u32 error_code)
+ u32 error_code, bool pin)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
@@ -2828,6 +2870,9 @@
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return false;
+ if (pin)
+ return false;
+
if (!page_fault_can_be_fast(error_code))
return false;
@@ -2895,9 +2940,71 @@
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, pfn_t *pfn, bool write, bool *writable);
+ gva_t gva, pfn_t *pfn, bool write, bool *writable,
+ bool pin);
static void make_mmu_pages_available(struct kvm_vcpu *vcpu);
+
+static int get_sptep_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes[4])
+
+{
+ struct kvm_shadow_walk_iterator iterator;
+ int nr_sptes = 0;
+
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return nr_sptes;
+
+ for_each_shadow_entry(vcpu, addr, iterator) {
+ sptes[iterator.level-1] = iterator.sptep;
+ nr_sptes++;
+ if (!is_shadow_present_pte(*iterator.sptep))
+ break;
+ }
+
+ return nr_sptes;
+}
+
+static bool __direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn, bool pin)
+{
+ u64 *sptes[4];
+ int r, i, level;
+
+ r = get_sptep_hierarchy(vcpu, gfn << PAGE_SHIFT, sptes);
+ if (!r)
+ return false;
+
+ level = 5 - r;
+ if (!is_last_spte(*sptes[level-1], level))
+ return false;
+ if (!is_shadow_present_pte(*sptes[level-1]))
+ return false;
+
+ for (i = 0; i < r; i++) {
+ u64 *sptep = sptes[3-i];
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
+
+ if (pin) {
+ sp->pinned = true;
+ set_bit(SPTE_PINNED_BIT, (unsigned long *)sptep);
+ } else {
+ sp->pinned = false;
+ clear_bit(SPTE_PINNED_BIT, (unsigned long *)sptep);
+ }
+ }
+
+ return true;
+}
+
+static bool direct_pin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ return __direct_pin_sptes(vcpu, gfn, true);
+}
+
+static bool direct_unpin_sptes(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ return __direct_pin_sptes(vcpu, gfn, false);
+}
+
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
gfn_t gfn, bool prefault, bool pin, bool *pinned)
{
@@ -2923,16 +3030,17 @@
} else
level = PT_PAGE_TABLE_LEVEL;
- if (fast_page_fault(vcpu, v, level, error_code))
+ if (fast_page_fault(vcpu, v, level, error_code, pin))
return 0;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+ if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable,
+ pin))
return 0;
- if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+ if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r, pin))
return r;
spin_lock(&vcpu->kvm->mmu_lock);
@@ -2943,6 +3051,8 @@
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
prefault);
+ if (pin)
+ *pinned = direct_pin_sptes(vcpu, gfn);
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3131,7 +3241,7 @@
lm_root = (void*)get_zeroed_page(GFP_KERNEL);
if (lm_root == NULL)
- return 1;
+ return -ENOMEM;
lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
@@ -3349,7 +3459,8 @@
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gva_t gva, pfn_t *pfn, bool write, bool *writable)
+ gva_t gva, pfn_t *pfn, bool write, bool *writable,
+ bool pin)
{
bool async;
@@ -3358,7 +3469,7 @@
if (!async)
return false; /* *pfn has correct page already */
- if (!prefault && can_do_async_pf(vcpu)) {
+ if (!prefault && !pin && can_do_async_pf(vcpu)) {
trace_kvm_try_async_get_page(gva, gfn);
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
trace_kvm_async_pf_doublefault(gva, gfn);
@@ -3406,16 +3517,17 @@
} else
level = PT_PAGE_TABLE_LEVEL;
- if (fast_page_fault(vcpu, gpa, level, error_code))
+ if (fast_page_fault(vcpu, gpa, level, error_code, pin))
return 0;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable,
+ pin))
return 0;
- if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+ if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r, pin))
return r;
spin_lock(&vcpu->kvm->mmu_lock);
@@ -3426,6 +3538,8 @@
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, gpa, write, map_writable,
level, gfn, pfn, prefault);
+ if (pin)
+ *pinned = direct_pin_sptes(vcpu, gfn);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
@@ -3903,6 +4017,141 @@
}
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
+ gfn_t base_gfn, unsigned long npages)
+{
+ struct kvm_pinned_page_range *p;
+
+ if (!tdp_enabled) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+ if (p->base_gfn == base_gfn && p->npages == npages) {
+ return -EEXIST;
+ }
+ }
+
+ if (atomic_read(&vcpu->arch.nr_pinned_ranges) >=
+ KVM_MAX_PER_VCPU_PINNED_RANGE)
+ return -ENOSPC;
+
+ p = kzalloc(sizeof(struct kvm_pinned_page_range), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ atomic_inc(&vcpu->arch.nr_pinned_ranges);
+
+ trace_kvm_mmu_register_pinned_range(vcpu->vcpu_id, base_gfn, npages);
+
+ INIT_LIST_HEAD(&p->link);
+ p->base_gfn = base_gfn;
+ p->npages = npages;
+ list_add(&p->link, &vcpu->arch.pinned_mmu_pages);
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+
+ return 0;
+}
+
+
+void unregister_pinned_sptes(struct kvm_vcpu *vcpu, unsigned long base_gfn,
+ unsigned long npages)
+{
+ gfn_t gfn;
+
+ for (gfn = base_gfn; gfn < base_gfn+npages; gfn++)
+ direct_unpin_sptes(vcpu, gfn);
+
+}
+
+int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu,
+ gfn_t base_gfn, unsigned long npages)
+{
+ struct kvm_pinned_page_range *p;
+
+ list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+ if (p->base_gfn == base_gfn && p->npages == npages) {
+ list_del(&p->link);
+ atomic_dec(&vcpu->arch.nr_pinned_ranges);
+ spin_lock(&vcpu->kvm->mmu_lock);
+ mmu_reload_pinned_vcpus(vcpu->kvm);
+ unregister_pinned_sptes(vcpu, base_gfn, npages);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ kfree(p);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pinned_page_range *p, *p2;
+
+ list_for_each_entry_safe(p, p2, &vcpu->arch.pinned_mmu_pages, link) {
+ list_del(&p->link);
+ kfree(p);
+ }
+}
+
+/*
+ * Pin KVM MMU page translations. This guarantees, for valid
+ * addresses registered by kvm_mmu_register_pinned_range (valid address
+ * meaning address which posses sufficient information for fault to
+ * be resolved), valid translations exist while in guest mode and
+ * therefore no VM-exits due to faults will occur.
+ *
+ * Failure to instantiate pages will abort guest entry.
+ *
+ * Pinning is not guaranteed while executing as L2 guest.
+ *
+ */
+
+static int kvm_mmu_pin_pages(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pinned_page_range *p;
+ int r = 1;
+
+ if (is_guest_mode(vcpu))
+ return r;
+
+ if (!vcpu->arch.mmu.direct_map)
+ return r;
+
+ ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+ list_for_each_entry(p, &vcpu->arch.pinned_mmu_pages, link) {
+ gfn_t gfn_offset;
+
+ for (gfn_offset = 0; gfn_offset < p->npages; gfn_offset++) {
+ gfn_t gfn = p->base_gfn + gfn_offset;
+ int r;
+ bool pinned = false;
+
+ r = vcpu->arch.mmu.page_fault(vcpu, gfn << PAGE_SHIFT,
+ PFERR_WRITE_MASK, false,
+ true, &pinned);
+ /* MMU notifier sequence window: retry */
+ if (!r && !pinned)
+ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ if (r) {
+ vcpu->run->exit_reason =
KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_PIN_FAILURE;
+ vcpu->run->internal.ndata = 1;
+ vcpu->run->internal.data[0] = gfn;
+ r = 0;
+ goto out;
+ }
+
+ }
+ }
+out:
+ return r;
+}
+
int kvm_mmu_load(struct kvm_vcpu *vcpu)
{
int r;
@@ -3916,6 +4165,7 @@
goto out;
/* set_cr3() should ensure TLB has been flushed */
vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+ r = kvm_mmu_pin_pages(vcpu);
out:
return r;
}
Index: kvm.pinned-sptes/arch/x86/kvm/mmu.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmu.h 2014-07-09 12:05:30.018171068
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmu.h 2014-07-09 12:08:45.343762778 -0300
@@ -94,7 +94,7 @@
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
- return 0;
+ return 1;
return kvm_mmu_load(vcpu);
}
@@ -178,4 +178,9 @@
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
+int kvm_mmu_register_pinned_range(struct kvm_vcpu *vcpu,
+ gfn_t base_gfn, unsigned long npages);
+int kvm_mmu_unregister_pinned_range(struct kvm_vcpu *vcpu,
+ gfn_t base_gfn, unsigned long npages);
+void kvm_mmu_free_pinned_ranges(struct kvm_vcpu *vcpu);
#endif
Index: kvm.pinned-sptes/arch/x86/kvm/x86.c
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/x86.c 2014-07-09 12:05:34.838161262
-0300
+++ kvm.pinned-sptes/arch/x86/kvm/x86.c 2014-07-09 12:08:45.346762771 -0300
@@ -6017,7 +6017,7 @@
}
r = kvm_mmu_reload(vcpu);
- if (unlikely(r)) {
+ if (unlikely(r <= 0)) {
goto cancel_injection;
}
@@ -7049,6 +7049,8 @@
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
+ INIT_LIST_HEAD(&vcpu->arch.pinned_mmu_pages);
+ atomic_set(&vcpu->arch.nr_pinned_ranges, 0);
return 0;
fail_free_wbinvd_dirty_mask:
@@ -7069,6 +7071,7 @@
{
int idx;
+ kvm_mmu_free_pinned_ranges(vcpu);
kvm_pmu_destroy(vcpu);
kfree(vcpu->arch.mce_banks);
kvm_free_lapic(vcpu);
@@ -7113,6 +7116,7 @@
int r;
r = vcpu_load(vcpu);
BUG_ON(r);
+ kvm_mmu_free_pinned_ranges(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
@@ -7408,7 +7412,7 @@
return;
r = kvm_mmu_reload(vcpu);
- if (unlikely(r))
+ if (unlikely(r <= 0))
return;
if (!vcpu->arch.mmu.direct_map &&
Index: kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/paging_tmpl.h 2014-07-09
12:05:34.837161264 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/paging_tmpl.h 2014-07-09 12:08:45.346762771
-0300
@@ -747,11 +747,11 @@
smp_rmb();
if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
- &map_writable))
+ &map_writable, false))
return 0;
if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
- walker.gfn, pfn, walker.pte_access, &r))
+ walker.gfn, pfn, walker.pte_access, &r, false))
return r;
/*
Index: kvm.pinned-sptes/arch/x86/kvm/mmutrace.h
===================================================================
--- kvm.pinned-sptes.orig/arch/x86/kvm/mmutrace.h 2014-07-09
12:05:30.018171068 -0300
+++ kvm.pinned-sptes/arch/x86/kvm/mmutrace.h 2014-07-09 12:08:45.347762769
-0300
@@ -322,6 +322,29 @@
__entry->kvm_gen == __entry->spte_gen
)
);
+
+TRACE_EVENT(
+ kvm_mmu_register_pinned_range,
+ TP_PROTO(unsigned int vcpu_id, gfn_t gfn, unsigned long npages),
+ TP_ARGS(vcpu_id, gfn, npages),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, vcpu_id )
+ __field( gfn_t, gfn )
+ __field( unsigned long, npages )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->gfn = gfn;
+ __entry->npages = npages;
+ ),
+
+ TP_printk("vcpu_id %u gfn %llx npages %lx",
+ __entry->vcpu_id,
+ __entry->gfn,
+ __entry->npages)
+);
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
Index: kvm.pinned-sptes/include/uapi/linux/kvm.h
===================================================================
--- kvm.pinned-sptes.orig/include/uapi/linux/kvm.h 2014-07-09
12:05:30.019171066 -0300
+++ kvm.pinned-sptes/include/uapi/linux/kvm.h 2014-07-09 12:08:45.347762769
-0300
@@ -180,6 +180,8 @@
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
/* Encounter unexpected vm-exit due to delivery event. */
#define KVM_INTERNAL_ERROR_DELIVERY_EV 3
+/* Failure to pin address translation. */
+#define KVM_INTERNAL_ERROR_PIN_FAILURE 4
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
Index: kvm.pinned-sptes/include/linux/kvm_host.h
===================================================================
--- kvm.pinned-sptes.orig/include/linux/kvm_host.h 2014-07-09
12:05:30.019171066 -0300
+++ kvm.pinned-sptes/include/linux/kvm_host.h 2014-07-09 12:08:45.348762767
-0300
@@ -591,6 +591,9 @@
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+bool make_all_cpus_request(struct kvm *kvm, unsigned int req,
+ bool (*vcpukick)(struct kvm_vcpu *));
+
void kvm_flush_remote_tlbs(struct kvm *kvm);
void kvm_reload_remote_mmus(struct kvm *kvm);
void kvm_make_mclock_inprogress_request(struct kvm *kvm);
Index: kvm.pinned-sptes/virt/kvm/kvm_main.c
===================================================================
--- kvm.pinned-sptes.orig/virt/kvm/kvm_main.c 2014-07-09 12:05:30.019171066
-0300
+++ kvm.pinned-sptes/virt/kvm/kvm_main.c 2014-07-09 12:08:45.349762765
-0300
@@ -152,7 +152,8 @@
{
}
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool make_all_cpus_request(struct kvm *kvm, unsigned int req,
+ bool (*vcpukick)(struct kvm_vcpu *))
{
int i, cpu, me;
cpumask_var_t cpus;
@@ -163,6 +164,8 @@
me = get_cpu();
kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpukick && !vcpukick(vcpu))
+ continue;
kvm_make_request(req, vcpu);
cpu = vcpu->cpu;
@@ -189,7 +192,7 @@
long dirty_count = kvm->tlbs_dirty;
smp_mb();
- if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+ if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH, NULL))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
@@ -197,17 +200,22 @@
void kvm_reload_remote_mmus(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL);
+}
+
+void kvm_reload_pinned_remote_mmus(struct kvm *kvm)
+{
+ make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD, NULL);
}
void kvm_make_mclock_inprogress_request(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+ make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS, NULL);
}
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+ make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC, NULL);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html