This patch is the kernel part of the "batch writes to MMIO" patch.

It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
Inside a zone, we can define sub-part we must not delay.

If an MMIO can be delayed, it is stored in a ring buffer which common for all 
VCPUs.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c         |  172 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/kvm.h      |    7 ++
 include/asm-x86/kvm_host.h |   23 ++++++
 include/linux/kvm.h        |   16 ++++
 virt/kvm/kvm_main.c        |    3 +
 5 files changed, 221 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dab3d4f..930986b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1518,6 +1518,103 @@ out:
        return r;
 }
 
+static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
+                                                       u64 addr, u32 size)
+{
+       int i;
+       struct kvm_delayed_mmio_zone *zone;
+
+       for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
+               zone = &kvm->arch.mmio_zone[i];
+
+               /* (addr,size) is fully included in
+                * (zone->addr, zone->size)
+                */
+
+               if (zone->addr <= addr &&
+                   addr + size <= zone->addr + zone->size)
+                       return zone;
+       }
+       return NULL;
+}
+
+static struct kvm_excluded_mmio_zone *
+kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 size)
+{
+       static struct kvm_excluded_mmio_zone *excluded;
+       int i;
+
+       addr -= zone->addr;
+       for (i = 0; i < zone->nb_excluded_zones; i++) {
+               excluded = &zone->excluded[i];
+
+               if ((excluded->offset <= addr &&
+                    addr < excluded->offset + excluded->size) ||
+                    (excluded->offset < addr + size &&
+                     addr + size <= excluded->offset +
+                                   excluded->size))
+                       return excluded;
+       }
+       return NULL;
+}
+
+static int kvm_is_delayed_mmio(struct kvm *kvm, u64 addr, u32 size)
+{
+       struct kvm_delayed_mmio_zone *zone;
+       struct kvm_excluded_mmio_zone *excluded;
+
+       zone = kvm_mmio_find_zone(kvm, addr, size);
+       if (zone == NULL)
+               return 0;       /* not a delayed MMIO address */
+
+       excluded = kvm_mmio_find_excluded(zone, addr, size);
+       return excluded == NULL;
+}
+
+static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
+                                struct kvm_mmio_zone *zone)
+{
+       struct kvm_delayed_mmio_zone *z;
+
+       if (zone->is_delayed &&
+           kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
+               return -ENOMEM;
+
+       if (zone->is_delayed) {
+
+               /* already defined ? */
+
+               if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
+                   kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
+                       return 0;
+
+               z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
+               z->addr = zone->addr;
+               z->size = zone->size;
+               kvm->arch.nb_mmio_zones++;
+               return 0;
+       }
+
+       /* exclude some parts of the delayed MMIO zone */
+
+       z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
+       if (z == NULL)
+               return -EINVAL;
+
+       if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
+               return -ENOMEM;
+
+       if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
+           kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
+               return 0;
+
+       z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
+       z->excluded[z->nb_excluded_zones].size = zone->size;
+       z->nb_excluded_zones++;
+
+       return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
 {
@@ -1671,6 +1768,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
                break;
        }
+       case KVM_SET_MMIO: {
+               struct kvm_mmio_zone zone;
+               r = -EFAULT;
+               if (copy_from_user(&zone, argp, sizeof zone))
+                       goto out;
+               r = -ENXIO;
+               r = kvm_vm_ioctl_set_mmio(kvm, &zone);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
        default:
                ;
        }
@@ -2706,6 +2815,52 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
        mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
 }
 
+static int batch_mmio(struct kvm_vcpu *vcpu)
+{
+       struct kvm_batch *batch = vcpu->kvm->arch.batch;
+       spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
+       int next;
+
+       /* check if this MMIO can be delayed */
+
+       if (!kvm_is_delayed_mmio(vcpu->kvm,
+                                vcpu->mmio_phys_addr, vcpu->mmio_size))
+               return 0;
+
+       /* check if ring is full
+        * we have no lock on "first"
+        * as it can only increase we can only have
+        * a false "full".
+        */
+
+       spin_lock(lock);
+
+       /* last is the first free entry
+        * check if we don't meet the first used entry
+        * there is always one unused entry in the buffer
+        */
+
+       next = (batch->last + 1) % KVM_MAX_BATCH;
+       if (next == batch->first) {
+               /* full */
+               spin_unlock(lock);
+               return 0;
+       }
+
+       /* batch it */
+
+       /* copy data in first free entry of the ring */
+
+       batch->mmio[batch->last].phys_addr = vcpu->mmio_phys_addr;
+       batch->mmio[batch->last].len = vcpu->mmio_size;
+       memcpy(batch->mmio[batch->last].data, vcpu->mmio_data, vcpu->mmio_size);
+       batch->last = next;
+
+       spin_unlock(lock);
+
+       return 1;
+}
+
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        int r;
@@ -2857,6 +3012,11 @@ again:
                        goto again;
        }
 
+       if (!r &&
+           vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
+           && !need_resched() && batch_mmio(vcpu))
+               goto again;
+
 out:
        up_read(&vcpu->kvm->slots_lock);
        if (r > 0) {
@@ -3856,12 +4016,22 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 struct  kvm *kvm_arch_create_vm(void)
 {
        struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+       struct page *page;
 
        if (!kvm)
                return ERR_PTR(-ENOMEM);
 
+       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+       if (!page) {
+               kfree(kvm);
+               return ERR_PTR(-ENOMEM);
+       }
+
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 
+       kvm->arch.batch_lock = __SPIN_LOCK_UNLOCKED(batch_lock);
+       kvm->arch.batch = (struct kvm_batch *)page_address(page);
+
        return kvm;
 }
 
@@ -3902,6 +4072,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                put_page(kvm->arch.apic_access_page);
        if (kvm->arch.ept_identity_pagetable)
                put_page(kvm->arch.ept_identity_pagetable);
+       if (kvm->arch.batch)
+               free_page((unsigned long)kvm->arch.batch);
        kfree(kvm);
 }
 
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..3c4a611 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -209,6 +209,13 @@ struct kvm_pit_state {
        struct kvm_pit_channel_state channels[3];
 };
 
+struct kvm_mmio_zone {
+       __u8 is_delayed;
+       __u8 pad[3];
+       __u32 size;
+       __u64 addr;
+};
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1466c3f..df42cdb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -26,6 +26,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 4
 
 #define KVM_PIO_PAGE_OFFSET 1
+#define KVM_MMIO_PAGE_OFFSET 2
 
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@@ -293,6 +294,21 @@ struct kvm_mem_alias {
        gfn_t target_gfn;
 };
 
+#define KVM_MAX_DELAYED_MMIO_ZONE 10
+#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
+
+struct kvm_excluded_mmio_zone {
+       u32 offset;
+       u32 size;
+};
+
+struct kvm_delayed_mmio_zone {
+       u64 addr;
+       u32 size;
+       u32 nb_excluded_zones;
+       struct kvm_excluded_mmio_zone excluded[KVM_MAX_EXCLUDED_MMIO_ZONE];
+};
+
 struct kvm_arch{
        int naliases;
        struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -317,6 +333,13 @@ struct kvm_arch{
 
        struct page *ept_identity_pagetable;
        bool ept_identity_pagetable_done;
+
+       /* MMIO batch */
+
+       spinlock_t batch_lock;
+       struct kvm_batch *batch;
+       int nb_mmio_zones;
+       struct kvm_delayed_mmio_zone mmio_zone[KVM_MAX_DELAYED_MMIO_ZONE];
 };
 
 struct kvm_vm_stat {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a281afe..b57010d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -173,6 +173,21 @@ struct kvm_run {
        };
 };
 
+struct kvm_mmio {
+       __u64 phys_addr;
+       __u32 len;
+       __u32 pad;
+       __u8  data[8];
+};
+
+struct kvm_batch {
+       __u32 first, last;
+       struct kvm_mmio mmio[0];
+};
+
+#define KVM_MAX_BATCH ((PAGE_SIZE - sizeof(struct kvm_batch)) / \
+                                               sizeof(struct kvm_mmio))
+
 /* for KVM_TRANSLATE */
 struct kvm_translation {
        /* in */
@@ -371,6 +386,7 @@ struct kvm_trace_rec {
 #define KVM_CREATE_PIT           _IO(KVMIO,  0x64)
 #define KVM_GET_PIT              _IOWR(KVMIO, 0x65, struct kvm_pit_state)
 #define KVM_SET_PIT              _IOR(KVMIO,  0x66, struct kvm_pit_state)
+#define KVM_SET_MMIO             _IOW(KVMIO,  0x67, struct kvm_mmio_zone)
 
 /*
  * ioctls for vcpu fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 64ed402..c8f1bdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, 
struct vm_fault *vmf)
 #ifdef CONFIG_X86
        else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
                page = virt_to_page(vcpu->arch.pio_data);
+       else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
+               page = virt_to_page(vcpu->kvm->arch.batch);
 #endif
        else
                return VM_FAULT_SIGBUS;
@@ -1230,6 +1232,7 @@ static long kvm_dev_ioctl(struct file *filp,
                r = PAGE_SIZE;     /* struct kvm_run */
 #ifdef CONFIG_X86
                r += PAGE_SIZE;    /* pio data page */
+               r += PAGE_SIZE;    /* mmio batch page */
 #endif
                break;
        case KVM_TRACE_ENABLE:
-- 
1.5.2.4


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to