By this patch, bitmap allocation is replaced with do_mmap() and
bitmap manipulation is replaced with *_user() functions.

Note that this does not change the APIs between kernel and user space.
To get more advantage from this hack, we need to add a new interface
for triggering the bitmap swith and getting the bitmap addresses: the
addresses is in user space and we can export them to qemu.

TODO:
1. We want to use copy_in_user() for 32bit case too.
   Note that this is only for the compatibility issue: in the future,
   we hope, qemu will not need to use this ioctl.
2. We have to implement test_bit_user() to avoid extra set_bit.

Signed-off-by: Takuya Yoshikawa <[email protected]>
Signed-off-by: Fernando Luis Vazquez Cao <[email protected]>
---
 arch/x86/kvm/x86.c       |  118 +++++++++++++++++++++++++++++++++++++--------
 include/linux/kvm_host.h |    4 ++
 virt/kvm/kvm_main.c      |   30 +++++++++++-
 3 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 450ecfe..995b970 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2642,16 +2642,99 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
        return 0;
 }
 
+int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+       unsigned long user_addr1;
+       unsigned long user_addr2;
+       int dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
+
+       down_write(&current->mm->mmap_sem);
+       user_addr1 = do_mmap(NULL, 0, dirty_bytes,
+                            PROT_READ | PROT_WRITE,
+                            MAP_PRIVATE | MAP_ANONYMOUS, 0);
+       if (IS_ERR((void *)user_addr1)) {
+               up_write(&current->mm->mmap_sem);
+               return PTR_ERR((void *)user_addr1);
+       }
+       user_addr2 = do_mmap(NULL, 0, dirty_bytes,
+                            PROT_READ | PROT_WRITE,
+                            MAP_PRIVATE | MAP_ANONYMOUS, 0);
+       if (IS_ERR((void *)user_addr2)) {
+               do_munmap(current->mm, user_addr1, dirty_bytes);
+               up_write(&current->mm->mmap_sem);
+               return PTR_ERR((void *)user_addr2);
+       }
+       up_write(&current->mm->mmap_sem);
+
+       memslot->dirty_bitmap = (unsigned long __user *)user_addr1;
+       memslot->dirty_bitmap_old = (unsigned long __user *)user_addr2;
+       clear_user(memslot->dirty_bitmap, dirty_bytes);
+       clear_user(memslot->dirty_bitmap_old, dirty_bytes);
+
+       return 0;
+}
+
+void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+       int n = kvm_dirty_bitmap_bytes(memslot);
+
+       if (!memslot->dirty_bitmap)
+               return;
+
+       down_write(&current->mm->mmap_sem);
+       do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap, n);
+       do_munmap(current->mm, (unsigned long)memslot->dirty_bitmap_old, n);
+       up_write(&current->mm->mmap_sem);
+
+       memslot->dirty_bitmap = NULL;
+       memslot->dirty_bitmap_old = NULL;
+}
+
+static int kvm_copy_dirty_bitmap(unsigned long __user *to,
+                                const unsigned long __user *from, int n)
+{
+#ifdef CONFIG_X86_64
+       if (copy_in_user(to, from, n) < 0) {
+               printk(KERN_WARNING "%s: copy_in_user failed\n", __func__);
+               return -EFAULT;
+       }
+       return 0;
+#else
+       int ret = 0;
+       void *p = vmalloc(n);
+
+       if (!p) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       if (copy_from_user(p, from, n) < 0) {
+               printk(KERN_WARNING "%s: copy_from_user failed\n", __func__);
+               ret = -EFAULT;
+               goto out_free;
+       }
+       if (copy_to_user(to, p, n) < 0) {
+               printk(KERN_WARNING "%s: copy_to_user failed\n", __func__);
+               ret = -EFAULT;
+               goto out_free;
+       }
+
+out_free:
+       vfree(p);
+out:
+       return ret;
+#endif
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
                                      struct kvm_dirty_log *log)
 {
-       int r, n, i;
+       int r, n;
        struct kvm_memory_slot *memslot;
-       unsigned long is_dirty = 0;
-       unsigned long *dirty_bitmap = NULL;
+       unsigned long __user *dirty_bitmap;
+       unsigned long __user *dirty_bitmap_old;
 
        mutex_lock(&kvm->slots_lock);
 
@@ -2664,44 +2747,37 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        if (!memslot->dirty_bitmap)
                goto out;
 
-       n = kvm_dirty_bitmap_bytes(memslot);
-
-       r = -ENOMEM;
-       dirty_bitmap = vmalloc(n);
-       if (!dirty_bitmap)
-               goto out;
-       memset(dirty_bitmap, 0, n);
+       dirty_bitmap = memslot->dirty_bitmap;
+       dirty_bitmap_old = memslot->dirty_bitmap_old;
 
-       for (i = 0; !is_dirty && i < n/sizeof(long); i++)
-               is_dirty = memslot->dirty_bitmap[i];
+       n = kvm_dirty_bitmap_bytes(memslot);
+       clear_user(dirty_bitmap_old, n);
 
        /* If nothing is dirty, don't bother messing with page tables. */
-       if (is_dirty) {
+       if (memslot->is_dirty) {
                struct kvm_memslots *slots, *old_slots;
 
                spin_lock(&kvm->mmu_lock);
                kvm_mmu_slot_remove_write_access(kvm, log->slot);
                spin_unlock(&kvm->mmu_lock);
 
+               r = -ENOMEM;
                slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
                if (!slots)
-                       goto out_free;
+                       goto out;
 
                memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
-               slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
+               slots->memslots[log->slot].dirty_bitmap = dirty_bitmap_old;
+               slots->memslots[log->slot].dirty_bitmap_old = dirty_bitmap;
+               slots->memslots[log->slot].is_dirty = false;
 
                old_slots = kvm->memslots;
                rcu_assign_pointer(kvm->memslots, slots);
                synchronize_srcu_expedited(&kvm->srcu);
-               dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
                kfree(old_slots);
        }
 
-       r = 0;
-       if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
-               r = -EFAULT;
-out_free:
-       vfree(dirty_bitmap);
+       r = kvm_copy_dirty_bitmap(log->dirty_bitmap, dirty_bitmap, n);
 out:
        mutex_unlock(&kvm->slots_lock);
        return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 07092d6..834812f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -276,6 +276,10 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                struct kvm_userspace_memory_region *mem,
                                struct kvm_memory_slot old,
                                int user_alloc);
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+int kvm_arch_create_dirty_bitmap(struct kvm_memory_slot *memslot);
+void kvm_arch_destroy_dirty_bitmap(struct kvm_memory_slot *memslot);
+#endif
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f919bd1..038a677 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -433,8 +433,12 @@ out_err_nodisable:
 
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+       kvm_arch_destroy_dirty_bitmap(memslot);
+#else
        vfree(memslot->dirty_bitmap);
        memslot->dirty_bitmap = NULL;
+#endif
 }
 
 /*
@@ -463,13 +467,26 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot 
*free,
        free->rmap = NULL;
 }
 
+/*
+ * We don't munmap dirty bitmaps by ourselves in the case of vm destruction.
+ */
+static void kvm_pre_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+       memslot->dirty_bitmap = NULL;
+       memslot->dirty_bitmap_old = NULL;
+#endif
+}
+
 void kvm_free_physmem(struct kvm *kvm)
 {
        int i;
        struct kvm_memslots *slots = kvm->memslots;
 
-       for (i = 0; i < slots->nmemslots; ++i)
+       for (i = 0; i < slots->nmemslots; ++i) {
+               kvm_pre_destroy_dirty_bitmap(&slots->memslots[i]);
                kvm_free_physmem_slot(&slots->memslots[i], NULL);
+       }
 
        kfree(kvm->memslots);
 }
@@ -523,6 +540,9 @@ static int kvm_vm_release(struct inode *inode, struct file 
*filp)
 
 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+       return kvm_arch_create_dirty_bitmap(memslot);
+#else
        int dirty_bytes = kvm_dirty_bitmap_bytes(memslot);
 
        memslot->dirty_bitmap = vmalloc(dirty_bytes);
@@ -530,6 +550,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot 
*memslot)
                return -ENOMEM;
 
        memset(memslot->dirty_bitmap, 0, dirty_bytes);
+#endif
        return 0;
 }
 
@@ -1197,9 +1218,16 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
        if (memslot && memslot->dirty_bitmap) {
                unsigned long rel_gfn = gfn - memslot->base_gfn;
 
+#ifdef __KVM_HAVE_USER_DIRTYBITMAP
+               if (set_bit_user(rel_gfn, memslot->dirty_bitmap) < 0)
+                       printk(KERN_WARNING "%s: set_bit_user failed\n", 
__func__);
+
+               memslot->is_dirty = true;
+#else
                /* avoid RMW */
                if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap))
                        generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
+#endif
        }
 }
 
-- 
1.6.3.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to