This patch is the kernel part of the "batch writes to MMIO" patch.
It intoduces the ioctl interface to define MMIO zone it is allowed to delay.
Inside a zone, we can define sub-part we must not delay.
If an MMIO can be delayed, it is stored in a ring buffer which common for all
VCPUs.
Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
---
arch/x86/kvm/x86.c | 172 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm.h | 7 ++
include/asm-x86/kvm_host.h | 23 ++++++
include/linux/kvm.h | 16 ++++
virt/kvm/kvm_main.c | 3 +
5 files changed, 221 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dab3d4f..930986b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1518,6 +1518,103 @@ out:
return r;
}
+static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm,
+ u64 addr, u32 size)
+{
+ int i;
+ struct kvm_delayed_mmio_zone *zone;
+
+ for (i = 0; i < kvm->arch.nb_mmio_zones; i++) {
+ zone = &kvm->arch.mmio_zone[i];
+
+ /* (addr,size) is fully included in
+ * (zone->addr, zone->size)
+ */
+
+ if (zone->addr <= addr &&
+ addr + size <= zone->addr + zone->size)
+ return zone;
+ }
+ return NULL;
+}
+
+static struct kvm_excluded_mmio_zone *
+kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 size)
+{
+ static struct kvm_excluded_mmio_zone *excluded;
+ int i;
+
+ addr -= zone->addr;
+ for (i = 0; i < zone->nb_excluded_zones; i++) {
+ excluded = &zone->excluded[i];
+
+ if ((excluded->offset <= addr &&
+ addr < excluded->offset + excluded->size) ||
+ (excluded->offset < addr + size &&
+ addr + size <= excluded->offset +
+ excluded->size))
+ return excluded;
+ }
+ return NULL;
+}
+
+static int kvm_is_delayed_mmio(struct kvm *kvm, u64 addr, u32 size)
+{
+ struct kvm_delayed_mmio_zone *zone;
+ struct kvm_excluded_mmio_zone *excluded;
+
+ zone = kvm_mmio_find_zone(kvm, addr, size);
+ if (zone == NULL)
+ return 0; /* not a delayed MMIO address */
+
+ excluded = kvm_mmio_find_excluded(zone, addr, size);
+ return excluded == NULL;
+}
+
+static int kvm_vm_ioctl_set_mmio(struct kvm *kvm,
+ struct kvm_mmio_zone *zone)
+{
+ struct kvm_delayed_mmio_zone *z;
+
+ if (zone->is_delayed &&
+ kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE)
+ return -ENOMEM;
+
+ if (zone->is_delayed) {
+
+ /* already defined ? */
+
+ if (kvm_mmio_find_zone(kvm, zone->addr, 1) ||
+ kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1))
+ return 0;
+
+ z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones];
+ z->addr = zone->addr;
+ z->size = zone->size;
+ kvm->arch.nb_mmio_zones++;
+ return 0;
+ }
+
+ /* exclude some parts of the delayed MMIO zone */
+
+ z = kvm_mmio_find_zone(kvm, zone->addr, zone->size);
+ if (z == NULL)
+ return -EINVAL;
+
+ if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE)
+ return -ENOMEM;
+
+ if (kvm_mmio_find_excluded(z, zone->addr, 1) ||
+ kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1))
+ return 0;
+
+ z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr;
+ z->excluded[z->nb_excluded_zones].size = zone->size;
+ z->nb_excluded_zones++;
+
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1671,6 +1768,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_SET_MMIO: {
+ struct kvm_mmio_zone zone;
+ r = -EFAULT;
+ if (copy_from_user(&zone, argp, sizeof zone))
+ goto out;
+ r = -ENXIO;
+ r = kvm_vm_ioctl_set_mmio(kvm, &zone);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
default:
;
}
@@ -2706,6 +2815,52 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
}
+static int batch_mmio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_batch *batch = vcpu->kvm->arch.batch;
+ spinlock_t *lock = &vcpu->kvm->arch.batch_lock;
+ int next;
+
+ /* check if this MMIO can be delayed */
+
+ if (!kvm_is_delayed_mmio(vcpu->kvm,
+ vcpu->mmio_phys_addr, vcpu->mmio_size))
+ return 0;
+
+ /* check if ring is full
+ * we have no lock on "first"
+ * as it can only increase we can only have
+ * a false "full".
+ */
+
+ spin_lock(lock);
+
+ /* last is the first free entry
+ * check if we don't meet the first used entry
+ * there is always one unused entry in the buffer
+ */
+
+ next = (batch->last + 1) % KVM_MAX_BATCH;
+ if (next == batch->first) {
+ /* full */
+ spin_unlock(lock);
+ return 0;
+ }
+
+ /* batch it */
+
+ /* copy data in first free entry of the ring */
+
+ batch->mmio[batch->last].phys_addr = vcpu->mmio_phys_addr;
+ batch->mmio[batch->last].len = vcpu->mmio_size;
+ memcpy(batch->mmio[batch->last].data, vcpu->mmio_data, vcpu->mmio_size);
+ batch->last = next;
+
+ spin_unlock(lock);
+
+ return 1;
+}
+
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -2857,6 +3012,11 @@ again:
goto again;
}
+ if (!r &&
+ vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO
+ && !need_resched() && batch_mmio(vcpu))
+ goto again;
+
out:
up_read(&vcpu->kvm->slots_lock);
if (r > 0) {
@@ -3856,12 +4016,22 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
struct kvm *kvm_arch_create_vm(void)
{
struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+ struct page *page;
if (!kvm)
return ERR_PTR(-ENOMEM);
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page) {
+ kfree(kvm);
+ return ERR_PTR(-ENOMEM);
+ }
+
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ kvm->arch.batch_lock = __SPIN_LOCK_UNLOCKED(batch_lock);
+ kvm->arch.batch = (struct kvm_batch *)page_address(page);
+
return kvm;
}
@@ -3902,6 +4072,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
+ if (kvm->arch.batch)
+ free_page((unsigned long)kvm->arch.batch);
kfree(kvm);
}
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 6f18408..3c4a611 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -209,6 +209,13 @@ struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
+struct kvm_mmio_zone {
+ __u8 is_delayed;
+ __u8 pad[3];
+ __u32 size;
+ __u64 addr;
+};
+
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1466c3f..df42cdb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -26,6 +26,7 @@
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_MMIO_PAGE_OFFSET 2
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@@ -293,6 +294,21 @@ struct kvm_mem_alias {
gfn_t target_gfn;
};
+#define KVM_MAX_DELAYED_MMIO_ZONE 10
+#define KVM_MAX_EXCLUDED_MMIO_ZONE 10
+
+struct kvm_excluded_mmio_zone {
+ u32 offset;
+ u32 size;
+};
+
+struct kvm_delayed_mmio_zone {
+ u64 addr;
+ u32 size;
+ u32 nb_excluded_zones;
+ struct kvm_excluded_mmio_zone excluded[KVM_MAX_EXCLUDED_MMIO_ZONE];
+};
+
struct kvm_arch{
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -317,6 +333,13 @@ struct kvm_arch{
struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
+
+ /* MMIO batch */
+
+ spinlock_t batch_lock;
+ struct kvm_batch *batch;
+ int nb_mmio_zones;
+ struct kvm_delayed_mmio_zone mmio_zone[KVM_MAX_DELAYED_MMIO_ZONE];
};
struct kvm_vm_stat {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a281afe..b57010d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -173,6 +173,21 @@ struct kvm_run {
};
};
+struct kvm_mmio {
+ __u64 phys_addr;
+ __u32 len;
+ __u32 pad;
+ __u8 data[8];
+};
+
+struct kvm_batch {
+ __u32 first, last;
+ struct kvm_mmio mmio[0];
+};
+
+#define KVM_MAX_BATCH ((PAGE_SIZE - sizeof(struct kvm_batch)) / \
+ sizeof(struct kvm_mmio))
+
/* for KVM_TRANSLATE */
struct kvm_translation {
/* in */
@@ -371,6 +386,7 @@ struct kvm_trace_rec {
#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
+#define KVM_SET_MMIO _IOW(KVMIO, 0x67, struct kvm_mmio_zone)
/*
* ioctls for vcpu fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 64ed402..c8f1bdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma,
struct vm_fault *vmf)
#ifdef CONFIG_X86
else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
page = virt_to_page(vcpu->arch.pio_data);
+ else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
+ page = virt_to_page(vcpu->kvm->arch.batch);
#endif
else
return VM_FAULT_SIGBUS;
@@ -1230,6 +1232,7 @@ static long kvm_dev_ioctl(struct file *filp,
r = PAGE_SIZE; /* struct kvm_run */
#ifdef CONFIG_X86
r += PAGE_SIZE; /* pio data page */
+ r += PAGE_SIZE; /* mmio batch page */
#endif
break;
case KVM_TRACE_ENABLE:
--
1.5.2.4
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel