This patch is the kernel part of the "batch writes to MMIO" patch.
It intoduces the ioctl interface to define MMIO zone it is allowed to delay. Inside a zone, we can define sub-part we must not delay. If an MMIO can be delayed, it is stored in a ring buffer which common for all VCPUs. Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]> --- arch/x86/kvm/x86.c | 172 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/kvm.h | 7 ++ include/asm-x86/kvm_host.h | 23 ++++++ include/linux/kvm.h | 16 ++++ virt/kvm/kvm_main.c | 3 + 5 files changed, 221 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dab3d4f..930986b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1518,6 +1518,103 @@ out: return r; } +static struct kvm_delayed_mmio_zone *kvm_mmio_find_zone(struct kvm *kvm, + u64 addr, u32 size) +{ + int i; + struct kvm_delayed_mmio_zone *zone; + + for (i = 0; i < kvm->arch.nb_mmio_zones; i++) { + zone = &kvm->arch.mmio_zone[i]; + + /* (addr,size) is fully included in + * (zone->addr, zone->size) + */ + + if (zone->addr <= addr && + addr + size <= zone->addr + zone->size) + return zone; + } + return NULL; +} + +static struct kvm_excluded_mmio_zone * +kvm_mmio_find_excluded(struct kvm_delayed_mmio_zone *zone, u64 addr, u32 size) +{ + static struct kvm_excluded_mmio_zone *excluded; + int i; + + addr -= zone->addr; + for (i = 0; i < zone->nb_excluded_zones; i++) { + excluded = &zone->excluded[i]; + + if ((excluded->offset <= addr && + addr < excluded->offset + excluded->size) || + (excluded->offset < addr + size && + addr + size <= excluded->offset + + excluded->size)) + return excluded; + } + return NULL; +} + +static int kvm_is_delayed_mmio(struct kvm *kvm, u64 addr, u32 size) +{ + struct kvm_delayed_mmio_zone *zone; + struct kvm_excluded_mmio_zone *excluded; + + zone = kvm_mmio_find_zone(kvm, addr, size); + if (zone == NULL) + return 0; /* not a delayed MMIO address */ + + excluded = kvm_mmio_find_excluded(zone, addr, size); + return excluded == NULL; +} + +static int kvm_vm_ioctl_set_mmio(struct kvm *kvm, + struct kvm_mmio_zone *zone) +{ + struct kvm_delayed_mmio_zone *z; + + if (zone->is_delayed && + kvm->arch.nb_mmio_zones >= KVM_MAX_DELAYED_MMIO_ZONE) + return -ENOMEM; + + if (zone->is_delayed) { + + /* already defined ? */ + + if (kvm_mmio_find_zone(kvm, zone->addr, 1) || + kvm_mmio_find_zone(kvm, zone->addr + zone->size - 1, 1)) + return 0; + + z = &kvm->arch.mmio_zone[kvm->arch.nb_mmio_zones]; + z->addr = zone->addr; + z->size = zone->size; + kvm->arch.nb_mmio_zones++; + return 0; + } + + /* exclude some parts of the delayed MMIO zone */ + + z = kvm_mmio_find_zone(kvm, zone->addr, zone->size); + if (z == NULL) + return -EINVAL; + + if (z->nb_excluded_zones >= KVM_MAX_EXCLUDED_MMIO_ZONE) + return -ENOMEM; + + if (kvm_mmio_find_excluded(z, zone->addr, 1) || + kvm_mmio_find_excluded(z, zone->addr + zone->size - 1, 1)) + return 0; + + z->excluded[z->nb_excluded_zones].offset = zone->addr - z->addr; + z->excluded[z->nb_excluded_zones].size = zone->size; + z->nb_excluded_zones++; + + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1671,6 +1768,18 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_MMIO: { + struct kvm_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_set_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } default: ; } @@ -2706,6 +2815,52 @@ static void vapic_exit(struct kvm_vcpu *vcpu) mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); } +static int batch_mmio(struct kvm_vcpu *vcpu) +{ + struct kvm_batch *batch = vcpu->kvm->arch.batch; + spinlock_t *lock = &vcpu->kvm->arch.batch_lock; + int next; + + /* check if this MMIO can be delayed */ + + if (!kvm_is_delayed_mmio(vcpu->kvm, + vcpu->mmio_phys_addr, vcpu->mmio_size)) + return 0; + + /* check if ring is full + * we have no lock on "first" + * as it can only increase we can only have + * a false "full". + */ + + spin_lock(lock); + + /* last is the first free entry + * check if we don't meet the first used entry + * there is always one unused entry in the buffer + */ + + next = (batch->last + 1) % KVM_MAX_BATCH; + if (next == batch->first) { + /* full */ + spin_unlock(lock); + return 0; + } + + /* batch it */ + + /* copy data in first free entry of the ring */ + + batch->mmio[batch->last].phys_addr = vcpu->mmio_phys_addr; + batch->mmio[batch->last].len = vcpu->mmio_size; + memcpy(batch->mmio[batch->last].data, vcpu->mmio_data, vcpu->mmio_size); + batch->last = next; + + spin_unlock(lock); + + return 1; +} + static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -2857,6 +3012,11 @@ again: goto again; } + if (!r && + vcpu->mmio_is_write && kvm_run->exit_reason == KVM_EXIT_MMIO + && !need_resched() && batch_mmio(vcpu)) + goto again; + out: up_read(&vcpu->kvm->slots_lock); if (r > 0) { @@ -3856,12 +4016,22 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + struct page *page; if (!kvm) return ERR_PTR(-ENOMEM); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + kfree(kvm); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + kvm->arch.batch_lock = __SPIN_LOCK_UNLOCKED(batch_lock); + kvm->arch.batch = (struct kvm_batch *)page_address(page); + return kvm; } @@ -3902,6 +4072,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) put_page(kvm->arch.apic_access_page); if (kvm->arch.ept_identity_pagetable) put_page(kvm->arch.ept_identity_pagetable); + if (kvm->arch.batch) + free_page((unsigned long)kvm->arch.batch); kfree(kvm); } diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 6f18408..3c4a611 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -209,6 +209,13 @@ struct kvm_pit_state { struct kvm_pit_channel_state channels[3]; }; +struct kvm_mmio_zone { + __u8 is_delayed; + __u8 pad[3]; + __u32 size; + __u64 addr; +}; + #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 1466c3f..df42cdb 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -26,6 +26,7 @@ #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_PIO_PAGE_OFFSET 1 +#define KVM_MMIO_PAGE_OFFSET 2 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) @@ -293,6 +294,21 @@ struct kvm_mem_alias { gfn_t target_gfn; }; +#define KVM_MAX_DELAYED_MMIO_ZONE 10 +#define KVM_MAX_EXCLUDED_MMIO_ZONE 10 + +struct kvm_excluded_mmio_zone { + u32 offset; + u32 size; +}; + +struct kvm_delayed_mmio_zone { + u64 addr; + u32 size; + u32 nb_excluded_zones; + struct kvm_excluded_mmio_zone excluded[KVM_MAX_EXCLUDED_MMIO_ZONE]; +}; + struct kvm_arch{ int naliases; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; @@ -317,6 +333,13 @@ struct kvm_arch{ struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; + + /* MMIO batch */ + + spinlock_t batch_lock; + struct kvm_batch *batch; + int nb_mmio_zones; + struct kvm_delayed_mmio_zone mmio_zone[KVM_MAX_DELAYED_MMIO_ZONE]; }; struct kvm_vm_stat { diff --git a/include/linux/kvm.h b/include/linux/kvm.h index a281afe..b57010d 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -173,6 +173,21 @@ struct kvm_run { }; }; +struct kvm_mmio { + __u64 phys_addr; + __u32 len; + __u32 pad; + __u8 data[8]; +}; + +struct kvm_batch { + __u32 first, last; + struct kvm_mmio mmio[0]; +}; + +#define KVM_MAX_BATCH ((PAGE_SIZE - sizeof(struct kvm_batch)) / \ + sizeof(struct kvm_mmio)) + /* for KVM_TRANSLATE */ struct kvm_translation { /* in */ @@ -371,6 +386,7 @@ struct kvm_trace_rec { #define KVM_CREATE_PIT _IO(KVMIO, 0x64) #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_SET_MMIO _IOW(KVMIO, 0x67, struct kvm_mmio_zone) /* * ioctls for vcpu fds diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 64ed402..c8f1bdf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -824,6 +824,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) #ifdef CONFIG_X86 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); + else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET) + page = virt_to_page(vcpu->kvm->arch.batch); #endif else return VM_FAULT_SIGBUS; @@ -1230,6 +1232,7 @@ static long kvm_dev_ioctl(struct file *filp, r = PAGE_SIZE; /* struct kvm_run */ #ifdef CONFIG_X86 r += PAGE_SIZE; /* pio data page */ + r += PAGE_SIZE; /* mmio batch page */ #endif break; case KVM_TRACE_ENABLE: -- 1.5.2.4 ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel