This patch adds all needed structures to batch MMIOs.
Until an architecture uses it, it is not compiled.

MMIO batching introduces two ioctl() to define where are the MMIO zones that
can be delayed and batched:

- KVM_REGISTER_DELAYED_MMIO registers a delayed MMIO zone.
  It requests one parameter (struct kvm_delayed_mmio_zone) which defines
  a memory area where MMIOs can be delayed until the next switch to
  user space. The maxium number of MMIO zones is KVM_MAX_DELAYED_MMIO_ZONE (100)

- KVM_UNREGISTER_DELAYED_MMIO cancels all registered zones inside
  the given bounds (bounds are also given by struct kvm_delayed_mmio_zone).

The userspace client can check kernel MMIO batching availability by asking
ioctl(KVM_CHECK_EXTENSION) for the KVM_CAP_DELAYED_MMIO capability.
The ioctl() call to KVM_CAP_DELAYED_MMIO will return 0 if not supported,
or the page offset where will be stored the batching ring buffer.
The page offset depends on the architecture.

After an ioctl(KVM_RUN), the first page of the KVM memory mapped points to
a kvm_run structure. The offset given by KVM_CAP_DELAYED_MMIO is
an offset to the MMIO batching ring expressed in PAGE_SIZE relatively
to the address of the start of th kvm_run structure. The MMIO ring buffer
is defined by the structure kvm_batch.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
---
 include/linux/kvm.h      |   26 ++++++++
 include/linux/kvm_host.h |    4 +
 virt/kvm/delayed_mmio.c  |  152 ++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/delayed_mmio.h  |   23 +++++++
 virt/kvm/kvm_main.c      |   54 ++++++++++++++++
 5 files changed, 259 insertions(+), 0 deletions(-)
 create mode 100644 virt/kvm/delayed_mmio.c
 create mode 100644 virt/kvm/delayed_mmio.h

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a281afe..80f44d8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -173,6 +173,29 @@ struct kvm_run {
        };
 };
 
+/* for KVM_REGISTER_DELAYED_MMIO / KVM_UNREGISTER_DELAYED_MMIO */
+
+struct kvm_delayed_mmio_zone {
+       __u64 addr;
+       __u32 size;
+       __u32 pad;
+};

+struct kvm_batch_ring {
+       __u64 phys_addr;
+       __u32 len;
+       __u32 pad;
+       __u8  data[8];
+};
+
+struct kvm_batch {
+       __u32 first, last;
+       struct kvm_batch_ring mmio[0];
+};
+
+#define KVM_MAX_BATCH ((PAGE_SIZE - sizeof(struct kvm_batch)) / \
+                                               sizeof(struct kvm_batch_ring))
+
 /* for KVM_TRANSLATE */
 struct kvm_translation {
        /* in */
@@ -346,6 +369,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_NOP_IO_DELAY 12
 #define KVM_CAP_PV_MMU 13
 #define KVM_CAP_MP_STATE 14
+#define KVM_CAP_DELAYED_MMIO 15
 
 /*
  * ioctls for VM fds
@@ -371,6 +395,8 @@ struct kvm_trace_rec {
 #define KVM_CREATE_PIT           _IO(KVMIO,  0x64)
 #define KVM_GET_PIT              _IOWR(KVMIO, 0x65, struct kvm_pit_state)
 #define KVM_SET_PIT              _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_REGISTER_DELAYED_MMIO _IOW(KVMIO,  0x67, struct 
kvm_delayed_mmio_zone)
+#define KVM_UNREGISTER_DELAYED_MMIO _IOW(KVMIO,  0x68, struct 
kvm_delayed_mmio_zone)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 57b376b..355a4ab 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -116,6 +116,10 @@ struct kvm {
        struct kvm_vm_stat stat;
        struct kvm_arch arch;
        atomic_t users_count;
+#ifdef KVM_MMIO_PAGE_OFFSET
+       struct kvm_delayed_mmio *delayed_mmio;
+       struct kvm_batch *batch;
+#endif
 };
 
 /* The guest did something we don't support. */
diff --git a/virt/kvm/delayed_mmio.c b/virt/kvm/delayed_mmio.c
new file mode 100644
index 0000000..9d7784c
--- /dev/null
+++ b/virt/kvm/delayed_mmio.c
@@ -0,0 +1,152 @@
+/*
+ * KVM delayed MMIO
+ *
+ * Copyright (c) 2008 Bull S.A.S.
+ *
+ *  Author: Laurent Vivier <[EMAIL PROTECTED]>
+ *
+ */
+
+#include "iodev.h"
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include "delayed_mmio.h"
+
+static int delayed_mmio_in_range(struct kvm_io_device *this,
+                                gpa_t addr, int len, int is_write)
+{
+       struct kvm_delayed_mmio *mmio = (struct kvm_delayed_mmio*)this->private;
+       struct kvm_delayed_mmio_zone *zone;
+       int next;
+       int i;
+
+       if (!is_write)
+               return 0;
+
+       /* kvm->lock is taken by caller and must be not released before
+         * dev.read/write
+         */
+
+       /* Are we able to batch it ? */
+
+       /* last is the first free entry
+        * check if we don't meet the first used entry
+        * there is always one unused entry in the buffer
+        */
+
+       next = (mmio->kvm->batch->last + 1) % KVM_MAX_BATCH;
+       if (next == mmio->kvm->batch->first) {
+               /* full */
+               return 0;
+       }
+
+       /* is there in a batchable area ? */
+
+       for (i = 0; i < mmio->nb_zones; i++) {
+               zone = &mmio->zone[i];
+
+               /* (addr,len) is fully included in
+                * (zone->addr, zone->size)
+                */
+
+               if (zone->addr <= addr &&
+                   addr + len <= zone->addr + zone->size)
+                       return 1;
+       }
+       return 0;
+}
+
+static void delayed_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+                              const void *val)
+{
+        struct kvm_delayed_mmio *mmio = (struct 
kvm_delayed_mmio*)this->private;
+       struct kvm_batch *batch = mmio->kvm->batch;
+
+       /* kvm->lock must be taken by caller before call to in_range()*/
+
+       /* copy data in first free entry of the ring */
+
+       batch->mmio[batch->last].phys_addr = addr;
+       batch->mmio[batch->last].len = len;
+       memcpy(batch->mmio[batch->last].data, val, len);
+       batch->last = (batch->last + 1) % KVM_MAX_BATCH;
+}
+
+static void delayed_mmio_destructor(struct kvm_io_device *this)
+{
+       kfree(this);
+}
+
+int kvm_delayed_mmio_init(struct kvm *kvm)
+{
+       struct kvm_delayed_mmio *mmio;
+
+       mmio = kzalloc(sizeof(struct kvm_delayed_mmio), GFP_KERNEL);
+       if (!mmio)
+               return -ENOMEM;
+       mmio->dev.write  = delayed_mmio_write;
+       mmio->dev.in_range  = delayed_mmio_in_range;
+       mmio->dev.destructor  = delayed_mmio_destructor;
+       mmio->dev.private  = mmio;
+       mmio->kvm = kvm;
+       kvm->delayed_mmio = mmio;
+       kvm_io_bus_register_dev(&kvm->mmio_bus, &mmio->dev);
+
+       return 0;
+}
+
+int kvm_vm_ioctl_register_delayed_mmio(struct kvm *kvm,
+                                      struct kvm_delayed_mmio_zone *zone)
+{
+       struct kvm_delayed_mmio *delayed_mmio = kvm->delayed_mmio;
+
+       if (delayed_mmio == NULL)
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       if (delayed_mmio->nb_zones >= KVM_MAX_DELAYED_MMIO_ZONE) {
+               mutex_unlock(&kvm->lock);
+               return -ENOBUFS;
+       }
+
+       delayed_mmio->zone[kvm->delayed_mmio->nb_zones] = *zone;
+       delayed_mmio->nb_zones++;
+
+       mutex_unlock(&kvm->lock);
+       return 0;
+}
+
+int kvm_vm_ioctl_unregister_delayed_mmio(struct kvm *kvm,
+                                        struct kvm_delayed_mmio_zone *zone)
+{
+       int i;
+       struct kvm_delayed_mmio *delayed_mmio = kvm->delayed_mmio;
+       struct kvm_delayed_mmio_zone *z;
+
+       if (delayed_mmio == NULL)
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+
+       i = delayed_mmio->nb_zones;
+       while(i) {
+               z = &delayed_mmio->zone[i - 1];
+
+               /* unregister all zones
+                * included in (zone->addr, zone->size)
+                */
+
+               if (zone->addr <= z->addr &&
+                   z->addr + z->size <= zone->addr + zone->size) {
+                       delayed_mmio->nb_zones--;
+                       *z = delayed_mmio->zone[delayed_mmio->nb_zones];
+               }
+               i--;
+       }
+
+       mutex_unlock(&kvm->lock);
+
+       return 0;
+}
diff --git a/virt/kvm/delayed_mmio.h b/virt/kvm/delayed_mmio.h
new file mode 100644
index 0000000..cee8e49
--- /dev/null
+++ b/virt/kvm/delayed_mmio.h
@@ -0,0 +1,23 @@
+/*
+ * KVM delayed MMIO
+ *
+ * Copyright (c) 2008 Bull S.A.S.
+ *
+ *  Author: Laurent Vivier <[EMAIL PROTECTED]>
+ *
+ */
+
+#define KVM_MAX_DELAYED_MMIO_ZONE 100
+
+struct kvm_delayed_mmio {
+       struct kvm_io_device dev;
+       struct kvm *kvm;
+       int nb_zones;
+       struct kvm_delayed_mmio_zone zone[KVM_MAX_DELAYED_MMIO_ZONE];
+};
+
+int kvm_delayed_mmio_init(struct kvm *kvm);
+int kvm_vm_ioctl_register_delayed_mmio(struct kvm *kvm,
+                                       struct kvm_delayed_mmio_zone *zone);
+int kvm_vm_ioctl_unregister_delayed_mmio(struct kvm *kvm,
+                                         struct kvm_delayed_mmio_zone *zone);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d602700..de39d40 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,10 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
+#ifdef KVM_MMIO_PAGE_OFFSET
+#include "delayed_mmio.h"
+#endif
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -185,10 +189,20 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 static struct kvm *kvm_create_vm(void)
 {
        struct kvm *kvm = kvm_arch_create_vm();
+       struct page *page;
 
        if (IS_ERR(kvm))
                goto out;
 
+#ifdef KVM_MMIO_PAGE_OFFSET
+       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+       if (!page) {
+               kfree(kvm);
+               return ERR_PTR(-ENOMEM);
+       }
+       kvm->batch = (struct kvm_batch *)page_address(page);
+#endif
+
        kvm->mm = current->mm;
        atomic_inc(&kvm->mm->mm_count);
        spin_lock_init(&kvm->mmu_lock);
@@ -200,6 +214,9 @@ static struct kvm *kvm_create_vm(void)
        spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
        spin_unlock(&kvm_lock);
+#ifdef KVM_MMIO_PAGE_OFFSET
+       kvm_delayed_mmio_init(kvm);
+#endif
 out:
        return kvm;
 }
@@ -243,6 +260,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
        kvm_io_bus_destroy(&kvm->pio_bus);
        kvm_io_bus_destroy(&kvm->mmio_bus);
        kvm_arch_destroy_vm(kvm);
+#ifdef KVM_MMIO_PAGE_OFFSET
+       if (kvm->batch != NULL)
+               free_page((unsigned long)kvm->batch);
+#endif
        mmdrop(mm);
 }
 
@@ -826,6 +847,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, 
struct vm_fault *vmf)
        else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
                page = virt_to_page(vcpu->arch.pio_data);
 #endif
+#ifdef KVM_MMIO_PAGE_OFFSET
+       else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
+               page = virt_to_page(vcpu->kvm->batch);
+#endif
        else
                return VM_FAULT_SIGBUS;
        get_page(page);
@@ -1148,6 +1173,32 @@ static long kvm_vm_ioctl(struct file *filp,
                        goto out;
                break;
        }
+#ifdef KVM_MMIO_PAGE_OFFSET
+       case KVM_REGISTER_DELAYED_MMIO: {
+               struct kvm_delayed_mmio_zone zone;
+               r = -EFAULT;
+               if (copy_from_user(&zone, argp, sizeof zone))
+                       goto out;
+               r = -ENXIO;
+               r = kvm_vm_ioctl_register_delayed_mmio(kvm, &zone);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_UNREGISTER_DELAYED_MMIO: {
+               struct kvm_delayed_mmio_zone zone;
+               r = -EFAULT;
+               if (copy_from_user(&zone, argp, sizeof zone))
+                       goto out;
+               r = -ENXIO;
+               r = kvm_vm_ioctl_unregister_delayed_mmio(kvm, &zone);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
+#endif
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
        }
@@ -1232,6 +1283,9 @@ static long kvm_dev_ioctl(struct file *filp,
 #ifdef CONFIG_X86
                r += PAGE_SIZE;    /* pio data page */
 #endif
+#ifdef KVM_MMIO_PAGE_OFFSET
+               r += PAGE_SIZE;    /* mmio batch page */
+#endif
                break;
        case KVM_TRACE_ENABLE:
        case KVM_TRACE_PAUSE:
-- 
1.5.2.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to