On Mon, Jan 24, 2011 at 10:37:56PM -0700, Alex Williamson wrote:
> On Mon, 2011-01-24 at 08:44 -0700, Alex Williamson wrote:
> > I'll look at how we might be
> > able to allocate slots on demand.  Thanks,
> 
> Here's a first cut just to see if this looks agreeable.  This allows the
> slot array to grow on demand.  This works with current userspace, as
> well as userspace trivially modified to double KVMState.slots and
> hotplugging enough pci-assign devices to exceed the previous limit (w/ &
> w/o ept).  Hopefully I got the rcu bits correct.  Does this look like
> the right path?  If so, I'll work on removing the fixed limit from
> userspace next.  Thanks,
> 
> Alex
> 
> 
> kvm: Allow memory slot array to grow on demand
> 
> Remove fixed KVM_MEMORY_SLOTS limit, allowing the slot array
> to grow on demand.  Private slots are now allocated at the
> front instead of the end.  Only x86 seems to use private slots,
> so this is now zero for all other archs.  The memslots pointer
> is already updated using rcu, so changing the size off the
> array when it's replaces is straight forward.  x86 also keeps
> a bitmap of slots used by a kvm_mmu_page, which requires a
> shadow tlb flush whenever we increase the number of slots.
> This forces the pages to be rebuilt with the new bitmap size.
> 
> Signed-off-by: Alex Williamson <alex.william...@redhat.com>
> ---
> 
>  arch/ia64/include/asm/kvm_host.h    |    4 --
>  arch/ia64/kvm/kvm-ia64.c            |    2 +
>  arch/powerpc/include/asm/kvm_host.h |    3 --
>  arch/s390/include/asm/kvm_host.h    |    3 --
>  arch/x86/include/asm/kvm_host.h     |    3 +-
>  arch/x86/include/asm/vmx.h          |    6 ++-
>  arch/x86/kvm/mmu.c                  |    7 +++-
>  arch/x86/kvm/x86.c                  |    6 ++-
>  include/linux/kvm_host.h            |    7 +++-
>  virt/kvm/kvm_main.c                 |   65 
> ++++++++++++++++++++++++-----------
>  10 files changed, 63 insertions(+), 43 deletions(-)
> 
> 
> diff --git a/arch/ia64/include/asm/kvm_host.h 
> b/arch/ia64/include/asm/kvm_host.h
> index 2689ee5..11d0ab2 100644
> --- a/arch/ia64/include/asm/kvm_host.h
> +++ b/arch/ia64/include/asm/kvm_host.h
> @@ -23,10 +23,6 @@
>  #ifndef __ASM_KVM_HOST_H
>  #define __ASM_KVM_HOST_H
>  
> -#define KVM_MEMORY_SLOTS 32
> -/* memory slots that does not exposed to userspace */
> -#define KVM_PRIVATE_MEM_SLOTS 4
> -
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
>  
>  /* define exit reasons from vmm to kvm*/
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 70d224d..f1adda2 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -1814,7 +1814,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>       mutex_lock(&kvm->slots_lock);
>  
>       r = -EINVAL;
> -     if (log->slot >= KVM_MEMORY_SLOTS)
> +     if (log->slot >= kvm->memslots->nmemslots)
>               goto out;
>  
>       memslot = &kvm->memslots->memslots[log->slot];
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index bba3b9b..dc80057 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -29,9 +29,6 @@
>  #include <asm/kvm_asm.h>
>  
>  #define KVM_MAX_VCPUS 1
> -#define KVM_MEMORY_SLOTS 32
> -/* memory slots that does not exposed to userspace */
> -#define KVM_PRIVATE_MEM_SLOTS 4
>  
>  #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
>  
> diff --git a/arch/s390/include/asm/kvm_host.h 
> b/arch/s390/include/asm/kvm_host.h
> index cef7dbf..92a964c 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -20,9 +20,6 @@
>  #include <asm/cpu.h>
>  
>  #define KVM_MAX_VCPUS 64
> -#define KVM_MEMORY_SLOTS 32
> -/* memory slots that does not exposed to userspace */
> -#define KVM_PRIVATE_MEM_SLOTS 4
>  
>  struct sca_entry {
>       atomic_t scn;
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ffd7f8d..df1382c 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -27,7 +27,6 @@
>  #include <asm/msr-index.h>
>  
>  #define KVM_MAX_VCPUS 64
> -#define KVM_MEMORY_SLOTS 32
>  /* memory slots that does not exposed to userspace */
>  #define KVM_PRIVATE_MEM_SLOTS 4
>  
> @@ -207,7 +206,7 @@ struct kvm_mmu_page {
>        * One bit set per slot which has memory
>        * in this shadow page.
>        */
> -     DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> +     unsigned long *slot_bitmap;
>       bool multimapped;         /* More than one parent_pte? */
>       bool unsync;
>       int root_count;          /* Currently serving as active root */
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 84471b8..7fd8c89 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -370,9 +370,9 @@ enum vmcs_field {
>  
>  #define AR_RESERVD_MASK 0xfffe0f00
>  
> -#define TSS_PRIVATE_MEMSLOT                  (KVM_MEMORY_SLOTS + 0)
> -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT     (KVM_MEMORY_SLOTS + 1)
> -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT   (KVM_MEMORY_SLOTS + 2)
> +#define TSS_PRIVATE_MEMSLOT                  0
> +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT     1
> +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT   2
>  
>  #define VMX_NR_VPIDS                         (1 << 16)
>  #define VMX_VPID_EXTENT_SINGLE_CONTEXT               1
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index ccacf0b..8c2533a 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1032,6 +1032,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct 
> kvm_mmu_page *sp)
>       ASSERT(is_empty_shadow_page(sp->spt));
>       hlist_del(&sp->hash_link);
>       list_del(&sp->link);
> +     kfree(sp->slot_bitmap);
>       __free_page(virt_to_page(sp->spt));
>       if (!sp->role.direct)
>               __free_page(virt_to_page(sp->gfns));
> @@ -1048,6 +1049,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
> kvm_vcpu *vcpu,
>                                              u64 *parent_pte, int direct)
>  {
>       struct kvm_mmu_page *sp;
> +     struct kvm_memslots *slots = kvm_memslots(vcpu->kvm);
>  
>       sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof 
> *sp);
>       sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
> @@ -1056,7 +1058,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct 
> kvm_vcpu *vcpu,
>                                                 PAGE_SIZE);
>       set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
>       list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
> -     bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
> +     sp->slot_bitmap = kzalloc(sizeof(long) *
> +                               BITS_TO_LONGS(slots->nmemslots), GFP_KERNEL);
> +     if (!sp->slot_bitmap)
> +             return NULL;
>       sp->multimapped = 0;
>       sp->parent_pte = parent_pte;
>       kvm_mod_used_mmu_pages(vcpu->kvm, +1);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5eccdba..c002fac 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1978,7 +1978,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>               r = KVM_MAX_VCPUS;
>               break;
>       case KVM_CAP_NR_MEMSLOTS:
> -             r = KVM_MEMORY_SLOTS;
> +             r = INT_MAX - KVM_PRIVATE_MEM_SLOTS;
>               break;
>       case KVM_CAP_PV_MMU:    /* obsolete */
>               r = 0;
> @@ -3201,7 +3201,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
>       mutex_lock(&kvm->slots_lock);
>  
>       r = -EINVAL;
> -     if (log->slot >= KVM_MEMORY_SLOTS)
> +     if (log->slot >= kvm->memslots->nmemslots)
>               goto out;
>  
>       memslot = &kvm->memslots->memslots[log->slot];
> @@ -6068,7 +6068,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>       int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
>  
>       /* Prevent internal slot pages from being moved by fork()/COW. */
> -     if (memslot->id >= KVM_MEMORY_SLOTS)
> +     if (memslot->id < KVM_PRIVATE_MEM_SLOTS)
>               map_flags = MAP_SHARED | MAP_ANONYMOUS;
>  
>       /*To keep backward compatibility with older userspace,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b5021db..4cb9f94 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -27,6 +27,10 @@
>  
>  #include <asm/kvm_host.h>
>  
> +#ifndef KVM_PRIVATE_MEM_SLOTS
> + #define KVM_PRIVATE_MEM_SLOTS 0
> +#endif
> +
>  /*
>   * vcpu->requests bit members
>   */
> @@ -206,8 +210,7 @@ struct kvm_irq_routing_table {};
>  struct kvm_memslots {
>       int nmemslots;
>       u64 generation;
> -     struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
> -                                     KVM_PRIVATE_MEM_SLOTS];
> +     struct kvm_memory_slot memslots[];
>  };
>  
>  struct kvm {
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index fd67bcd..32f023c 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -623,13 +623,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
>                           struct kvm_userspace_memory_region *mem,
>                           int user_alloc)
>  {
> -     int r;
> +     int r, nmemslots;
>       gfn_t base_gfn;
>       unsigned long npages;
>       unsigned long i;
> -     struct kvm_memory_slot *memslot;
> -     struct kvm_memory_slot old, new;
> +     struct kvm_memory_slot *memslot = NULL;
> +     struct kvm_memory_slot old = {}, new = {};
>       struct kvm_memslots *slots, *old_memslots;
> +     bool flush = false;
>  
>       r = -EINVAL;
>       /* General sanity checks */
> @@ -639,12 +640,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
>               goto out;
>       if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
>               goto out;
> -     if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
> -             goto out;
>       if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
>               goto out;
>  
> -     memslot = &kvm->memslots->memslots[mem->slot];
>       base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
>       npages = mem->memory_size >> PAGE_SHIFT;
>  
> @@ -655,7 +653,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>       if (!npages)
>               mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
>  
> -     new = old = *memslot;
> +     if (mem->slot < kvm->memslots->nmemslots) {
> +             memslot = &kvm->memslots->memslots[mem->slot];
> +             new = old = *memslot;
> +     }

>               if (s == memslot || !s->npages)
> @@ -752,12 +753,19 @@ skip_lpage:
>  
>       if (!npages) {
>               r = -ENOMEM;
> -             slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +
> +             nmemslots = (mem->slot >= kvm->memslots->nmemslots) ?
> +                         mem->slot + 1 : kvm->memslots->nmemslots;
> +
> +             slots = kzalloc(sizeof(struct kvm_memslots) +
> +                             nmemslots * sizeof(struct kvm_memory_slot),
> +                             GFP_KERNEL);
>               if (!slots)
>                       goto out_free;
> -             memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> -             if (mem->slot >= slots->nmemslots)
> -                     slots->nmemslots = mem->slot + 1;
> +             memcpy(slots, kvm->memslots,
> +                    sizeof(struct kvm_memslots) + kvm->memslots->nmemslots *
> +                    sizeof(struct kvm_memory_slot));
> +             slots->nmemslots = nmemslots;

Other than the upper limit, should disallow increasing the number of
slots in case the new slot is being deleted (npages == 0). So none of
this additions to !npages case should be necessary.

Also, must disallow shrinking the number of slots.

>               slots->generation++;
>               slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
>  
> @@ -787,12 +795,21 @@ skip_lpage:
>       }
>  
>       r = -ENOMEM;
> -     slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +
> +     if (mem->slot >= kvm->memslots->nmemslots) {
> +             nmemslots = mem->slot + 1;
> +             flush = true;
> +     } else
> +             nmemslots = kvm->memslots->nmemslots;
> +
> +     slots = kzalloc(sizeof(struct kvm_memslots) +
> +                     nmemslots * sizeof(struct kvm_memory_slot),
> +                     GFP_KERNEL);
>       if (!slots)
>               goto out_free;
> -     memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> -     if (mem->slot >= slots->nmemslots)
> -             slots->nmemslots = mem->slot + 1;
> +     memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots) +
> +            kvm->memslots->nmemslots * sizeof(struct kvm_memory_slot));
> +     slots->nmemslots = nmemslots;
>       slots->generation++;
>  
>       /* actual memory is freed via old in kvm_free_physmem_slot below */
> @@ -808,6 +825,9 @@ skip_lpage:
>       rcu_assign_pointer(kvm->memslots, slots);

It should be: 

    spin_lock(kvm->mmu_lock)
    rcu_assign_pointer()
    kvm_arch_flush_shadow()
    spin_unlock(kvm->mmu_lock)

But kvm_arch_flush_shadow() takes mmu_lock currently, so that needs
fixing. 

Otherwise looks fine.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to