Batch pte updates and tlb flushes in lazy MMU mode. v1->v2: - report individual hypercall error code, have multicall return number of processed entries. - cover entire multicall duration with slots_lock instead of acquiring/reacquiring.
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Cc: Anthony Liguori <[EMAIL PROTECTED]> Index: kvm.paravirt2/arch/x86/kernel/kvm.c =================================================================== --- kvm.paravirt2.orig/arch/x86/kernel/kvm.c +++ kvm.paravirt2/arch/x86/kernel/kvm.c @@ -25,6 +25,77 @@ #include <linux/kvm_para.h> #include <linux/cpu.h> #include <linux/mm.h> +#include <linux/hardirq.h> + +#define MAX_MULTICALL_NR (PAGE_SIZE / sizeof(struct kvm_multicall_entry)) + +struct kvm_para_state { + struct kvm_multicall_entry queue[MAX_MULTICALL_NR]; + int queue_index; + enum paravirt_lazy_mode mode; +}; + +static DEFINE_PER_CPU(struct kvm_para_state, para_state); + +static int can_defer_hypercall(struct kvm_para_state *state, unsigned int nr) +{ + if (state->mode == PARAVIRT_LAZY_MMU) { + switch (nr) { + case KVM_HYPERCALL_MMU_WRITE: + case KVM_HYPERCALL_FLUSH_TLB: + return 1; + } + } + return 0; +} + +static void hypercall_queue_flush(struct kvm_para_state *state) +{ + long ret; + + if (state->queue_index) { + ret = kvm_hypercall2(KVM_HYPERCALL_MULTICALL, + __pa(&state->queue), state->queue_index); + WARN_ON (ret != state->queue_index); + state->queue_index = 0; + } +} + +static void kvm_hypercall_defer(struct kvm_para_state *state, + unsigned int nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3) +{ + struct kvm_multicall_entry *entry; + + BUG_ON(preemptible()); + + if (state->queue_index == MAX_MULTICALL_NR) + hypercall_queue_flush(state); + + entry = &state->queue[state->queue_index++]; + entry->nr = nr; + entry->a0 = a0; + entry->a1 = a1; + entry->a2 = a2; + entry->a3 = a3; +} + +static long kvm_hypercall(unsigned int nr, unsigned long a0, + unsigned long a1, unsigned long a2, + unsigned long a3) +{ + struct kvm_para_state *state = &get_cpu_var(para_state); + long ret = 0; + + if (can_defer_hypercall(state, nr)) + kvm_hypercall_defer(state, nr, a0, a1, a2, a3); + else + ret = kvm_hypercall4(nr, a0, a1, a2, a3); + + put_cpu_var(para_state); + return ret; +} /* * No need for any "IO delay" on KVM @@ -44,8 +115,8 @@ static void kvm_mmu_write(void *dest, co if (size == 2) a1 = *(u32 *)&p[4]; #endif - kvm_hypercall3(KVM_HYPERCALL_MMU_WRITE, (unsigned long)__pa(dest), a0, - a1); + kvm_hypercall(KVM_HYPERCALL_MMU_WRITE, (unsigned long)__pa(dest), a0, + a1, 0); } /* @@ -110,12 +181,31 @@ static void kvm_set_pud(pud_t *pudp, pud static void kvm_flush_tlb(void) { - kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB); + kvm_hypercall(KVM_HYPERCALL_FLUSH_TLB, 0, 0, 0, 0); } static void kvm_release_pt(u32 pfn) { - kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT); + kvm_hypercall(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT, 0, 0, 0); +} + +static void kvm_enter_lazy_mmu(void) +{ + struct kvm_para_state *state + = &per_cpu(para_state, smp_processor_id()); + + paravirt_enter_lazy_mmu(); + state->mode = paravirt_get_lazy_mode(); +} + +static void kvm_leave_lazy_mmu(void) +{ + struct kvm_para_state *state + = &per_cpu(para_state, smp_processor_id()); + + hypercall_queue_flush(state); + paravirt_leave_lazy(paravirt_get_lazy_mode()); + state->mode = paravirt_get_lazy_mode(); } static void paravirt_ops_setup(void) @@ -144,6 +234,11 @@ static void paravirt_ops_setup(void) pv_mmu_ops.release_pt = kvm_release_pt; pv_mmu_ops.release_pd = kvm_release_pt; } + + if (kvm_para_has_feature(KVM_FEATURE_MULTICALL)) { + pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; + } } void __init kvm_guest_init(void) Index: kvm.paravirt2/arch/x86/kvm/x86.c =================================================================== --- kvm.paravirt2.orig/arch/x86/kvm/x86.c +++ kvm.paravirt2/arch/x86/kvm/x86.c @@ -79,6 +79,8 @@ struct kvm_stats_debugfs_item debugfs_en { "fpu_reload", VCPU_STAT(fpu_reload) }, { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, + { "multicall", VCPU_STAT(multicall) }, + { "multicall_nr", VCPU_STAT(multicall_nr) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -856,8 +858,10 @@ long kvm_arch_dev_ioctl(struct file *fil } case KVM_GET_PARA_FEATURES: { __u32 para_features = KVM_PARA_FEATURES; - if (tdp_enabled) + if (tdp_enabled) { para_features &= ~(1UL << KVM_FEATURE_MMU_WRITE); + para_features &= ~(1UL << KVM_FEATURE_MULTICALL); + } r = -EFAULT; if (copy_to_user(argp, ¶_features, sizeof para_features)) @@ -1759,22 +1763,31 @@ mmio: return X86EMUL_UNHANDLEABLE; } -static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, +static int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes) { int ret; - down_read(&vcpu->kvm->slots_lock); ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); - if (ret < 0) { - up_read(&vcpu->kvm->slots_lock); + if (ret < 0) return 0; - } + kvm_mmu_pte_write(vcpu, gpa, val, bytes); - up_read(&vcpu->kvm->slots_lock); return 1; } +static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes) +{ + int ret; + + down_read(&vcpu->kvm->slots_lock); + ret = __emulator_write_phys(vcpu, gpa, val, bytes); + up_read(&vcpu->kvm->slots_lock); + return ret; +} + + static int emulator_write_emulated_onepage(unsigned long addr, const void *val, unsigned int bytes, @@ -2403,6 +2416,61 @@ static int kvm_hypercall_release_pt(stru return 0; } +static int dispatch_hypercall(struct kvm_vcpu *vcpu, unsigned long nr, + unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3) +{ + switch (nr) { + case KVM_HC_VAPIC_POLL_IRQ: + return 0; + case KVM_HYPERCALL_MMU_WRITE: + return kvm_hypercall_mmu_write(vcpu, a0, a1, a2); + case KVM_HYPERCALL_FLUSH_TLB: + return kvm_hypercall_flush_tlb(vcpu); + case KVM_HYPERCALL_RELEASE_PT: + return kvm_hypercall_release_pt(vcpu, a0); + } + + return -KVM_ENOSYS; +} + +static int kvm_hypercall_multicall(struct kvm_vcpu *vcpu, gpa_t addr, u32 nents) +{ + int i, nr_processed = 0; + + ++vcpu->stat.multicall; + vcpu->stat.multicall_nr += nents; + + down_read(&vcpu->kvm->slots_lock); + for (i = 0; i < nents; i++) { + struct kvm_multicall_entry mc; + int ret; + + ret = kvm_read_guest(vcpu->kvm, addr, &mc, sizeof(mc)); + if (ret) { + up_read(&vcpu->kvm->slots_lock); + return nr_processed; + } + + mc.error_code = dispatch_hypercall(vcpu, mc.nr, mc.a0, mc.a1, + mc.a2, mc.a3); + if (mc.error_code) { + ret = kvm_write_guest(vcpu->kvm, addr, &mc, + sizeof(mc)); + if (ret) { + up_read(&vcpu->kvm->slots_lock); + return nr_processed; + } + } else + nr_processed++; + + addr += sizeof(mc); + } + up_read(&vcpu->kvm->slots_lock); + + return nr_processed; +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -2423,23 +2491,11 @@ int kvm_emulate_hypercall(struct kvm_vcp a3 &= 0xFFFFFFFF; } - switch (nr) { - case KVM_HC_VAPIC_POLL_IRQ: - ret = 0; - break; - case KVM_HYPERCALL_MMU_WRITE: - ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2); - break; - case KVM_HYPERCALL_FLUSH_TLB: - ret = kvm_hypercall_flush_tlb(vcpu); - break; - case KVM_HYPERCALL_RELEASE_PT: - ret = kvm_hypercall_release_pt(vcpu, a0); - break; - default: - ret = -KVM_ENOSYS; - break; - } + if (nr == KVM_HYPERCALL_MULTICALL) + ret = kvm_hypercall_multicall(vcpu, a0, a1); + else + ret = dispatch_hypercall(vcpu, nr, a0, a1, a2, a3); + vcpu->arch.regs[VCPU_REGS_RAX] = ret; kvm_x86_ops->decache_regs(vcpu); return 0; Index: kvm.paravirt2/include/asm-x86/kvm_host.h =================================================================== --- kvm.paravirt2.orig/include/asm-x86/kvm_host.h +++ kvm.paravirt2/include/asm-x86/kvm_host.h @@ -327,6 +327,8 @@ struct kvm_vcpu_stat { u32 fpu_reload; u32 insn_emulation; u32 insn_emulation_fail; + u32 multicall; + u32 multicall_nr; }; struct descriptor_table { Index: kvm.paravirt2/include/asm-x86/kvm_para.h =================================================================== --- kvm.paravirt2.orig/include/asm-x86/kvm_para.h +++ kvm.paravirt2/include/asm-x86/kvm_para.h @@ -13,6 +13,7 @@ #define KVM_FEATURE_CLOCKSOURCE 0 #define KVM_FEATURE_NOP_IO_DELAY 1 #define KVM_FEATURE_MMU_WRITE 2 +#define KVM_FEATURE_MULTICALL 3 #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 @@ -37,13 +38,22 @@ struct kvm_wall_clock { uint32_t wc_nsec; } __attribute__((__packed__)); +struct kvm_multicall_entry { + u32 nr; + u32 error_code; + u64 a0; + u64 a1; + u64 a2; + u64 a3; +}; extern void kvmclock_init(void); #define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY) | \ (1UL << KVM_FEATURE_CLOCKSOURCE) | \ - (1UL << KVM_FEATURE_MMU_WRITE)) + (1UL << KVM_FEATURE_MMU_WRITE) | \ + (1UL << KVM_FEATURE_MULTICALL)) /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. Index: kvm.paravirt2/include/linux/kvm_para.h =================================================================== --- kvm.paravirt2.orig/include/linux/kvm_para.h +++ kvm.paravirt2/include/linux/kvm_para.h @@ -13,11 +13,13 @@ #define KVM_ENOSYS 1000 #define KVM_EFAULT EFAULT #define KVM_E2BIG E2BIG +#define KVM_EINVAL EINVAL #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HYPERCALL_MMU_WRITE 2 #define KVM_HYPERCALL_FLUSH_TLB 3 #define KVM_HYPERCALL_RELEASE_PT 4 +#define KVM_HYPERCALL_MULTICALL 5 /* * hypercalls use architecture specific -- ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel