This is the same as before but it uses the age_page callback to prevent the guest OS working set to be swapped out. It works well here so far. This depends on the memslot locking with mmu lock patch and on the mmu notifiers #v3 patch that I'll post in CC with linux-mm shortly that implements the age_page callback and that changes follow_page to set the young bit in the pte instead of setting the referenced bit (so the age_page will be called again later when the VM clears the young bit).
Signed-off-by: Andrea Arcangeli <[EMAIL PROTECTED]> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4086080..c527d7d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -18,6 +18,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on ARCH_SUPPORTS_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 324ff9a..189f3e1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -532,6 +532,38 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) kvm_flush_remote_tlbs(kvm); } +static void unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +void kvm_rmap_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + unsigned long *rmapp; + u64 *spte, *curr_spte; + + spin_lock(&kvm->mmu_lock); + rmapp = kvm_hva_to_rmapp(kvm, hva); + if (!rmapp) + goto out_unlock; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); + curr_spte = spte; + spte = rmap_next(kvm, rmapp, spte); + unmap_spte(kvm, curr_spte); + } +out_unlock: + spin_unlock(&kvm->mmu_lock); +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a90403..35a2ee0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3159,6 +3159,35 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.pio_data); } +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + return container_of(mn, struct kvm, mmu_notifier); +} + +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + kvm_rmap_unmap_hva(kvm, address); +} + +void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + for (; start < end; start += PAGE_SIZE) + kvm_mmu_notifier_invalidate_page(mn, mm, start); +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_range = kvm_mmu_notifier_invalidate_range, + .invalidate_page = kvm_mmu_notifier_invalidate_page, + /* age page will drop the spte so follow_page will set the young bit */ + .age_page = kvm_mmu_notifier_invalidate_page, +}; + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); @@ -3167,6 +3196,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; return kvm; } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d6db0de..522028b 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -404,6 +404,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_rmap_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_zap_all(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2714068..eae8734 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -117,6 +117,7 @@ struct kvm { struct kvm_io_bus pio_bus; struct kvm_vm_stat stat; struct kvm_arch arch; + struct mmu_notifier mmu_notifier; }; /* The guest did something we don't support. */ @@ -163,6 +164,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc); gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); +unsigned long *kvm_hva_to_rmapp(struct kvm *kvm, unsigned long addr); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4295623..a67e38f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -165,6 +165,7 @@ static struct kvm *kvm_create_vm(void) kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); + mmu_notifier_register(&kvm->mmu_notifier, kvm->mm); spin_lock_init(&kvm->mmu_lock); kvm_io_bus_init(&kvm->pio_bus); mutex_init(&kvm->lock); @@ -454,6 +467,28 @@ static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); } +/* if mmap_sem isn't taken, it can be safely called with only the mmu_lock */ +unsigned long *kvm_hva_to_rmapp(struct kvm *kvm, unsigned long addr) +{ + int i; + + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end = start + (memslot->npages << PAGE_SHIFT); + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + if (addr >= start && addr < end) { + gfn_t gfn_offset = (addr - start) >> PAGE_SHIFT; + return &memslot->rmap[gfn_offset]; + } + } + return NULL; +} + /* * Requires current->mm->mmap_sem to be held */ ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel