Resending with Cc: added

On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:

> plain text document attachment (introduce-srcu-and-use-for-slots)
> Use two steps for memslot deletion: mark the slot invalid (which stops 
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
> 
> Also simplifies kvm_handle_hva locking.
> 
> Signed-off-by: Marcelo Tosatti <[email protected]>
> 
> Index: kvm-slotslock/include/linux/kvm.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm.h
> +++ kvm-slotslock/include/linux/kvm.h
> @@ -39,7 +39,7 @@ struct kvm_userspace_memory_region {
>  
>  /* for kvm_memory_region::flags */
>  #define KVM_MEM_LOG_DIRTY_PAGES  1UL
> -
> +#define KVM_MEMSLOT_INVALID      (1UL << 1)
>  
>  /* for KVM_IRQ_LINE */
>  struct kvm_irq_level {
> Index: kvm-slotslock/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/kvm_main.c
> +++ kvm-slotslock/virt/kvm/kvm_main.c
> @@ -43,6 +43,7 @@
>  #include <linux/swap.h>
>  #include <linux/bitops.h>
>  #include <linux/spinlock.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -352,11 +353,15 @@ static struct kvm *kvm_create_vm(void)
>       kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
>       if (!kvm->memslots)
>               goto out_err;
> +     if (init_srcu_struct(&kvm->srcu))
> +             goto out_err;
>  
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -     if (!page)
> +     if (!page) {
> +             cleanup_srcu_struct(&kvm->srcu);
>               goto out_err;
> +     }
>  
>       kvm->coalesced_mmio_ring =
>                       (struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -367,6 +372,7 @@ static struct kvm *kvm_create_vm(void)
>               kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>               r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>               if (r) {
> +                     cleanup_srcu_struct(&kvm->srcu);
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>                       put_page(page);
>  #endif
> @@ -462,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *k
>       kvm_arch_flush_shadow(kvm);
>  #endif
>       kvm_arch_destroy_vm(kvm);
> +     cleanup_srcu_struct(&kvm->srcu);
>       hardware_disable_all();
>       mmdrop(mm);
>  }
> @@ -502,12 +509,13 @@ int __kvm_set_memory_region(struct kvm *
>                           struct kvm_userspace_memory_region *mem,
>                           int user_alloc)
>  {
> -     int r;
> +     int r, flush_shadow = 0;
>       gfn_t base_gfn;
>       unsigned long npages;
>       unsigned long i;
>       struct kvm_memory_slot *memslot;
>       struct kvm_memory_slot old, new;
> +     struct kvm_memslots *slots, *old_memslots;
>  
>       r = -EINVAL;
>       /* General sanity checks */
> @@ -569,15 +577,7 @@ int __kvm_set_memory_region(struct kvm *
>               memset(new.rmap, 0, npages * sizeof(*new.rmap));
>  
>               new.user_alloc = user_alloc;
> -             /*
> -              * hva_to_rmmap() serialzies with the mmu_lock and to be
> -              * safe it has to ignore memslots with !user_alloc &&
> -              * !userspace_addr.
> -              */
> -             if (user_alloc)
> -                     new.userspace_addr = mem->userspace_addr;
> -             else
> -                     new.userspace_addr = 0;
> +             new.userspace_addr = mem->userspace_addr;
>       }
>       if (!npages)
>               goto skip_lpage;
> @@ -632,8 +632,9 @@ skip_lpage:
>               if (!new.dirty_bitmap)
>                       goto out_free;
>               memset(new.dirty_bitmap, 0, dirty_bytes);
> +             /* destroy any largepage mappings for dirty tracking */
>               if (old.npages)
> -                     kvm_arch_flush_shadow(kvm);
> +                     flush_shadow = 1;
>       }
>  #else  /* not defined CONFIG_S390 */
>       new.user_alloc = user_alloc;
> @@ -641,34 +642,69 @@ skip_lpage:
>               new.userspace_addr = mem->userspace_addr;
>  #endif /* not defined CONFIG_S390 */
>  
> -     if (!npages)
> +     if (!npages) {
> +             slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +             if (!slots)
> +                     goto out_free;
> +             memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Nothing wrong with the above line, but it makes me think if

                  *slots = *kvm->memslots;

would save us the function call overhead

> +             if (mem->slot >= slots->nmemslots)
> +                     slots->nmemslots = mem->slot + 1;
> +             slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> +             old_memslots = kvm->memslots;
> +             rcu_assign_pointer(kvm->memslots, slots);
> +             synchronize_srcu(&kvm->srcu);
> +             /* From this point no new shadow pages pointing to a deleted
> +              * memslot will be created.
> +              *
> +              * validation of sp->gfn happens in:
> +              *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> +              *      - kvm_is_visible_gfn (mmu_check_roots)
> +              */
>               kvm_arch_flush_shadow(kvm);
> +             kfree(old_memslots);
> +     }
>  
>       r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
>       if (r)
>               goto out_free;
>  
> -     spin_lock(&kvm->mmu_lock);
> -     if (mem->slot >= kvm->memslots->nmemslots)
> -             kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> +     /* map the pages in iommu page table */
> +     if (npages)
> +             r = kvm_iommu_map_pages(kvm, &new);
> +             if (r)
> +                     goto out_free;
> +#endif
>  
> -     *memslot = new;
> -     spin_unlock(&kvm->mmu_lock);
> +     slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +     if (!slots)
> +             goto out_free;
> +     memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Ditto

Cheers,

Fernando Carrijo.

> +     if (mem->slot >= slots->nmemslots)
> +             slots->nmemslots = mem->slot + 1;
> +
> +     /* actual memory is freed via old in kvm_free_physmem_slot below */
> +     if (!npages) {
> +             new.rmap = NULL;
> +             new.dirty_bitmap = NULL;
> +             for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> +                     new.lpage_info[i] = NULL;
> +     }
> +
> +     slots->memslots[mem->slot] = new;
> +     old_memslots = kvm->memslots;
> +     rcu_assign_pointer(kvm->memslots, slots);
> +     synchronize_srcu(&kvm->srcu);
>  
>       kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
>  
> -     kvm_free_physmem_slot(&old, npages ? &new : NULL);
> -     /* Slot deletion case: we have to update the current slot */
> -     spin_lock(&kvm->mmu_lock);
> -     if (!npages)
> -             *memslot = old;
> -     spin_unlock(&kvm->mmu_lock);
> -#ifdef CONFIG_DMAR
> -     /* map the pages in iommu page table */
> -     r = kvm_iommu_map_pages(kvm, memslot);
> -     if (r)
> -             goto out;
> -#endif
> +     kvm_free_physmem_slot(&old, &new);
> +     kfree(old_memslots);
> +
> +     if (flush_shadow)
> +             kvm_arch_flush_shadow(kvm);
> +
>       return 0;
>  
>  out_free:
> @@ -768,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
>  struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
>  {
>       int i;
> -     struct kvm_memslots *slots = kvm->memslots;
> +     struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>       for (i = 0; i < slots->nmemslots; ++i) {
>               struct kvm_memory_slot *memslot = &slots->memslots[i];
> @@ -790,12 +826,15 @@ struct kvm_memory_slot *gfn_to_memslot(s
>  int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
>  {
>       int i;
> -     struct kvm_memslots *slots = kvm->memslots;
> +     struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>       gfn = unalias_gfn(kvm, gfn);
>       for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
>               struct kvm_memory_slot *memslot = &slots->memslots[i];
>  
> +             if (memslot->flags & KVM_MEMSLOT_INVALID)
> +                     continue;
> +
>               if (gfn >= memslot->base_gfn
>                   && gfn < memslot->base_gfn + memslot->npages)
>                       return 1;
> @@ -810,7 +849,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
>  
>       gfn = unalias_gfn(kvm, gfn);
>       slot = gfn_to_memslot_unaliased(kvm, gfn);
> -     if (!slot)
> +     if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
>               return bad_hva();
>       return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
>  }
> Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> ===================================================================
> --- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
> +++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> @@ -1834,6 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>       struct kvm_memory_slot *memslot;
>       int is_dirty = 0;
>  
> +     down_write(&kvm->slots_lock);
>       spin_lock(&kvm->arch.dirty_log_lock);
>  
>       r = kvm_ia64_sync_dirty_log(kvm, log);
> @@ -1853,6 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>       }
>       r = 0;
>  out:
> +     up_write(&kvm->slots_lock);
>       spin_unlock(&kvm->arch.dirty_log_lock);
>       return r;
>  }
> Index: kvm-slotslock/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/mmu.c
> +++ kvm-slotslock/arch/x86/kvm/mmu.c
> @@ -29,6 +29,7 @@
>  #include <linux/swap.h>
>  #include <linux/hugetlb.h>
>  #include <linux/compiler.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/page.h>
>  #include <asm/cmpxchg.h>
> @@ -766,23 +767,18 @@ static int kvm_unmap_rmapp(struct kvm *k
>  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
>                         int (*handler)(struct kvm *kvm, unsigned long *rmapp))
>  {
> -     int i, j;
> +     int i, j, idx;
>       int retval = 0;
> -     struct kvm_memslots *slots = kvm->memslots;
> +     struct kvm_memslots *slots;
> +
> +     idx = srcu_read_lock(&kvm->srcu);
> +     slots = rcu_dereference(kvm->memslots);
>  
> -     /*
> -      * If mmap_sem isn't taken, we can look the memslots with only
> -      * the mmu_lock by skipping over the slots with userspace_addr == 0.
> -      */
>       for (i = 0; i < slots->nmemslots; i++) {
>               struct kvm_memory_slot *memslot = &slots->memslots[i];
>               unsigned long start = memslot->userspace_addr;
>               unsigned long end;
>  
> -             /* mmu_lock protects userspace_addr */
> -             if (!start)
> -                     continue;
> -
>               end = start + (memslot->npages << PAGE_SHIFT);
>               if (hva >= start && hva < end) {
>                       gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
> @@ -797,6 +793,7 @@ static int kvm_handle_hva(struct kvm *kv
>                       }
>               }
>       }
> +     srcu_read_unlock(&kvm->srcu, idx);
>  
>       return retval;
>  }
> @@ -2966,16 +2963,20 @@ nomem:
>   */
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>  {
> -     int i;
> +     int i, idx;
>       unsigned int nr_mmu_pages;
>       unsigned int  nr_pages = 0;
> +     struct kvm_memslots *slots;
>  
> -     for (i = 0; i < kvm->memslots->nmemslots; i++)
> -             nr_pages += kvm->memslots->memslots[i].npages;
> +     idx = srcu_read_lock(&kvm->srcu);
> +     slots = rcu_dereference(kvm->memslots);
> +     for (i = 0; i < slots->nmemslots; i++)
> +             nr_pages += slots->memslots[i].npages;
>  
>       nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
>       nr_mmu_pages = max(nr_mmu_pages,
>                       (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> +     srcu_read_unlock(&kvm->srcu, idx);
>  
>       return nr_mmu_pages;
>  }
> @@ -3241,10 +3242,12 @@ static void audit_mappings(struct kvm_vc
>  static int count_rmaps(struct kvm_vcpu *vcpu)
>  {
>       int nmaps = 0;
> -     int i, j, k;
> +     int i, j, k, idx;
>  
> +     idx = srcu_read_lock(&kvm->srcu);
> +     slots = rcu_dereference(kvm->memslots);
>       for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
> -             struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
> +             struct kvm_memory_slot *m = &slots->memslots[i];
>               struct kvm_rmap_desc *d;
>  
>               for (j = 0; j < m->npages; ++j) {
> @@ -3267,6 +3270,7 @@ static int count_rmaps(struct kvm_vcpu *
>                       }
>               }
>       }
> +     srcu_read_unlock(&kvm->srcu, idx);
>       return nmaps;
>  }
>  
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
>  #include <linux/mm.h>
>  #include <linux/highmem.h>
>  #include <linux/sched.h>
> +#include <linux/srcu.h>
>  #include <linux/moduleparam.h>
>  #include <linux/ftrace_event.h>
>  #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu 
>  static gva_t rmode_tss_base(struct kvm *kvm)
>  {
>       if (!kvm->arch.tss_addr) {
> -             gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> -                              kvm->memslots->memslots[0].npages - 3;
> +             struct kvm_memslots *slots;
> +             gfn_t base_gfn;
> +             int idx;
> +
> +             idx = srcu_read_lock(&kvm->srcu);
> +             slots = rcu_dereference(kvm->memslots);
> +             base_gfn = slots->memslots[0].base_gfn +
> +                              slots->memslots[0].npages - 3;
> +             srcu_read_unlock(&kvm->srcu, idx);
>               return base_gfn << PAGE_SHIFT;
>       }
> +
>       return kvm->arch.tss_addr;
>  }
>  
> Index: kvm-slotslock/include/linux/kvm_host.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm_host.h
> +++ kvm-slotslock/include/linux/kvm_host.h
> @@ -163,6 +163,7 @@ struct kvm {
>       struct rw_semaphore slots_lock;
>       struct mm_struct *mm; /* userspace tied to this vm */
>       struct kvm_memslots *memslots;
> +     struct srcu_struct srcu;
>  #ifdef CONFIG_KVM_APIC_ARCHITECTURE
>       u32 bsp_vcpu_id;
>       struct kvm_vcpu *bsp_vcpu;
> Index: kvm-slotslock/virt/kvm/assigned-dev.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/assigned-dev.c
> +++ kvm-slotslock/virt/kvm/assigned-dev.c
> @@ -504,11 +504,11 @@ out:
>  static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>                                     struct kvm_assigned_pci_dev *assigned_dev)
>  {
> -     int r = 0;
> +     int r = 0, idx;
>       struct kvm_assigned_dev_kernel *match;
>       struct pci_dev *dev;
>  
> -     down_read(&kvm->slots_lock);
> +     idx = srcu_read_lock(&kvm->srcu);
>       mutex_lock(&kvm->lock);
>  
>       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> @@ -574,7 +574,7 @@ static int kvm_vm_ioctl_assign_device(st
>  
>  out:
>       mutex_unlock(&kvm->lock);
> -     up_read(&kvm->slots_lock);
> +     srcu_read_unlock(&kvm->srcu, idx);
>       return r;
>  out_list_del:
>       list_del(&match->list);
> @@ -586,7 +586,7 @@ out_put:
>  out_free:
>       kfree(match);
>       mutex_unlock(&kvm->lock);
> -     up_read(&kvm->slots_lock);
> +     srcu_read_unlock(&kvm->srcu, idx);
>       return r;
>  }
>  
> Index: kvm-slotslock/virt/kvm/iommu.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/iommu.c
> +++ kvm-slotslock/virt/kvm/iommu.c
> @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct
>       int i, r = 0;
>       struct kvm_memslots *slots;
>  
> -     slots = kvm->memslots;
> +     slots = rcu_dereference(kvm->memslots);
>  
>       for (i = 0; i < slots->nmemslots; i++) {
>               r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
> @@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(stru
>       int i;
>       struct kvm_memslots *slots;
>  
> -     slots = kvm->memslots;
> +     slots = rcu_dereference(kvm->memslots);
>  
>       for (i = 0; i < slots->nmemslots; i++) {
>               kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to