On Mon, Aug 06, 2012 at 08:06:03PM +1000, Paul Mackerras wrote:
> >From 44067a8ee15021583636bea4cc1d47e5370b8397 Mon Sep 17 00:00:00 2001
> From: Paul Mackerras <[email protected]>
> Date: Mon, 30 Jul 2012 16:40:54 +1000
> Subject:
>
> At present the HV style of KVM doesn't handle deletion or modification
> of memory slots correctly. Deletion occurs when userspace does the
> KVM_SET_USER_MEMORY_REGION with arguments specifying a new length of
> zero for a slot that contains memory. Modification occurs when the
> arguments specify a new guest_phys_addr or flags.
>
> To allow the HV code to know which operation (creation, deletion or
> modification) is being requested, it needs to see the old and new
> contents of the memslot. kvm_arch_prepare_memory_region has this
> information, so we modify it to pass it down to
> kvmppc_core_prepare_memory_region. We then modify the HV version
> of that to check which operation is being performed. If it is a
> deletion, we call a new function kvmppc_unmap_memslot to remove any
> HPT (hashed page table) entries referencing the memory being removed.
> Similarly, if we are modifying the guest physical address we also
> remove any HPT entries. If the KVM_MEM_LOG_DIRTY_PAGES flag is now
> set and wasn't before, we call kvmppc_hv_get_dirty_log to clear any
> dirty bits so we can detect all modifications from now on.
>
> Signed-off-by: Paul Mackerras <[email protected]>
> ---
> arch/powerpc/include/asm/kvm_ppc.h | 4 +++
> arch/powerpc/kvm/book3s_64_mmu_hv.c | 27 ++++++++++++++--
> arch/powerpc/kvm/book3s_hv.c | 61
> +++++++++++++++++++++++------------
> arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +-
> arch/powerpc/kvm/book3s_pr.c | 2 ++
> arch/powerpc/kvm/booke.c | 2 ++
> arch/powerpc/kvm/powerpc.c | 2 +-
> 7 files changed, 76 insertions(+), 24 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..044c921 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -140,11 +140,15 @@ extern void kvm_release_hpt(struct kvmppc_linear_info
> *li);
> extern int kvmppc_core_init_vm(struct kvm *kvm);
> extern void kvmppc_core_destroy_vm(struct kvm *kvm);
> extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
> + struct kvm_memory_slot *memslot,
> + struct kvm_memory_slot *old,
> struct kvm_userspace_memory_region *mem);
> extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
> struct kvm_userspace_memory_region *mem);
> extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
> struct kvm_ppc_smmu_info *info);
> +extern void kvmppc_unmap_memslot(struct kvm *kvm,
> + struct kvm_memory_slot *memslot);
>
> extern int kvmppc_bookehv_init(void);
> extern void kvmppc_bookehv_exit(void);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 3c635c0..87735a7 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -850,7 +850,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long
> *rmapp,
> psize = hpte_page_size(hptep[0], ptel);
> if ((hptep[0] & HPTE_V_VALID) &&
> hpte_rpn(ptel, psize) == gfn) {
> - hptep[0] |= HPTE_V_ABSENT;
> + if (kvm->arch.using_mmu_notifiers)
> + hptep[0] |= HPTE_V_ABSENT;
> kvmppc_invalidate_hpte(kvm, hptep, i);
> /* Harvest R and C */
> rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
> @@ -877,6 +878,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long
> start, unsigned long end)
> return 0;
> }
>
> +void kvmppc_unmap_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> + unsigned long *rmapp;
> + unsigned long gfn;
> + unsigned long n;
> +
> + rmapp = memslot->rmap;
> + gfn = memslot->base_gfn;
> + for (n = memslot->npages; n; --n) {
> + /*
> + * Testing the present bit without locking is OK because
> + * the memslot has been marked invalid already, and hence
> + * no new HPTEs referencing this page can be created,
> + * thus the present bit can't go from 0 to 1.
> + */
> + if (*rmapp & KVMPPC_RMAP_PRESENT)
> + kvm_unmap_rmapp(kvm, rmapp, gfn);
> + ++rmapp;
> + ++gfn;
> + }
> +}
> +
> static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
> unsigned long gfn)
> {
> @@ -1039,7 +1062,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct
> kvm_memory_slot *memslot)
> rmapp = memslot->rmap;
> map = memslot->dirty_bitmap;
> for (i = 0; i < memslot->npages; ++i) {
> - if (kvm_test_clear_dirty(kvm, rmapp))
> + if (kvm_test_clear_dirty(kvm, rmapp) && map)
> __set_bit_le(i, map);
> ++rmapp;
> }
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 83e929e..aad20ca0 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1299,26 +1299,6 @@ static unsigned long slb_pgsize_encoding(unsigned long
> psize)
> return senc;
> }
>
> -int kvmppc_core_prepare_memory_region(struct kvm *kvm,
> - struct kvm_userspace_memory_region *mem)
> -{
> - unsigned long npages;
> - unsigned long *phys;
> -
> - /* Allocate a slot_phys array */
> - phys = kvm->arch.slot_phys[mem->slot];
> - if (!kvm->arch.using_mmu_notifiers && !phys) {
> - npages = mem->memory_size >> PAGE_SHIFT;
> - phys = vzalloc(npages * sizeof(unsigned long));
> - if (!phys)
> - return -ENOMEM;
> - kvm->arch.slot_phys[mem->slot] = phys;
> - kvm->arch.slot_npages[mem->slot] = npages;
> - }
> -
> - return 0;
> -}
> -
> static void unpin_slot(struct kvm *kvm, int slot_id)
> {
> unsigned long *physp;
> @@ -1343,6 +1323,47 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
> }
> }
>
> +int kvmppc_core_prepare_memory_region(struct kvm *kvm,
> + struct kvm_memory_slot *memslot,
> + struct kvm_memory_slot *old,
> + struct kvm_userspace_memory_region *mem)
> +{
> + unsigned long npages;
> + unsigned long *phys;
> +
> + /* Are we creating, deleting, or modifying the slot? */
> + if (!memslot->npages) {
> + /* deleting */
> + kvmppc_unmap_memslot(kvm, old);
> + if (!kvm->arch.using_mmu_notifiers)
> + unpin_slot(kvm, mem->slot);
> + return 0;
> + }
The !memslot->npages case is handled in __kvm_set_memory_region
(please read that part, before kvm_arch_prepare_memory_region() call).
kvm_arch_flush_shadow should be implemented.
> + if (old->npages) {
> + /* modifying guest_phys or flags */
> + if (old->base_gfn != memslot->base_gfn)
> + kvmppc_unmap_memslot(kvm, old);
This case is also handled generically by the last kvm_arch_flush_shadow
call in __kvm_set_memory_region.
> + if (memslot->dirty_bitmap &&
> + old->dirty_bitmap != memslot->dirty_bitmap)
> + kvmppc_hv_get_dirty_log(kvm, old);
> + return 0;
> + }
Better clear dirty log unconditionally on kvm_arch_commit_memory_region,
similarly to x86 (just so its consistent).
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long
> pte_index,
> ptel = rev->guest_rpte |= rcbits;
> gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
> memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
> - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
> + if (!memslot)
> return;
Why remove this check? (i don't know why it was there in the first
place, just checking).
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html