On Sat, Jun 21, 2008 at 10:04:18AM +0300, Avi Kivity wrote: >> /* >> * Sync the rsp and rip registers into the vcpu structure. This allows >> * registers to be accessed by indexing vcpu->arch.regs. >> */ >> >> But I think it just refers to the interface in general, so that nobody >> would try to access RSP or RIP (and RAX in AMD's case) before calling >> ->cache_regs(). >> > > It refers to the fact that sometimes you don't know which registers you > refer to, e.g. in the emulator.
How's this? Test performed with idle UP guest booted with "nohz=off clocksource=acpi_pm", so most of the exits are acpi timer reads. This average is from available entries in the dmesg buffer, about 1500. "regs_available" and "regs_dirty" could be 8 bits, and placed in a hole instead of in the middle of longs. avg cycles to exit to qemu: before: 3376 after: 3181 195 cycles (~= 6.1% improvement) avg cycles to exit to qemu, handle exit and return to __vcpu_run before irq_disable: before: 7482 cycles. after: 7227 cycles. 255 cycles (~= 3.5% improvement) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 73f43de..ecf26e3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -32,6 +32,7 @@ #include <asm/current.h> #include <asm/apicdef.h> #include <asm/atomic.h> +#include "kvm_cache_regs.h" #include "irq.h" #define PRId64 "d" @@ -558,7 +559,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) struct kvm_run *run = vcpu->run; set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_RIP); run->tpr_access.rip = vcpu->arch.rip; run->tpr_access.is_write = write; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 238e8f3..8554b37 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -709,21 +709,38 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) rdtscll(vcpu->arch.host_tsc); } -static void svm_cache_regs(struct kvm_vcpu *vcpu) +static void svm_cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs) { struct vcpu_svm *svm = to_svm(vcpu); - vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; - vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.rip = svm->vmcb->save.rip; + switch (regs) { + case REGS_GPR: + vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + break; + case REGS_RSP: + vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + break; + case REGS_RIP: + vcpu->arch.rip = svm->vmcb->save.rip; + break; + } } -static void svm_decache_regs(struct kvm_vcpu *vcpu) +static void svm_decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; - svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; - svm->vmcb->save.rip = vcpu->arch.rip; + + switch (regs) { + case REGS_GPR: + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; + break; + case REGS_RSP: + svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; + break; + case REGS_RIP: + svm->vmcb->save.rip = vcpu->arch.rip; + break; + } } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6e4278d..b7a988c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -26,6 +26,7 @@ #include <linux/highmem.h> #include <linux/sched.h> #include <linux/moduleparam.h> +#include "kvm_cache_regs.h" #include <asm/io.h> #include <asm/desc.h> @@ -707,9 +708,11 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) unsigned long rip; u32 interruptibility; - rip = vmcs_readl(GUEST_RIP); + cache_regs(vcpu, REGS_RIP); + rip = vcpu->arch.rip; rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - vmcs_writel(GUEST_RIP, rip); + vcpu->arch.rip = rip; + decache_regs(vcpu, REGS_RIP); /* * We emulated an instruction, so temporary interrupt blocking @@ -935,20 +938,48 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) * Sync the rsp and rip registers into the vcpu structure. This allows * registers to be accessed by indexing vcpu->arch.regs. */ -static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu) +static void vmx_cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs) { - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); - vcpu->arch.rip = vmcs_readl(GUEST_RIP); + switch (regs) { + case REGS_RSP: + vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); + break; + case REGS_RIP: + vcpu->arch.rip = vmcs_readl(GUEST_RIP); + break; + case REGS_GPR: + break; + } } /* * Syncs rsp and rip back into the vmcs. Should be called after possible * modification. */ +static void vmx_decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs) +{ + switch (regs) { + case REGS_RSP: + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + break; + case REGS_RIP: + vmcs_writel(GUEST_RIP, vcpu->arch.rip); + break; + case REGS_GPR: + break; + } +} + +static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu) +{ + cache_regs(vcpu, REGS_RSP); + cache_regs(vcpu, REGS_RIP); +} + static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) { - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - vmcs_writel(GUEST_RIP, vcpu->arch.rip); + decache_regs(vcpu, REGS_RSP); + decache_regs(vcpu, REGS_RIP); } static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) @@ -3213,8 +3244,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, - .cache_regs = vcpu_load_rsp_rip, - .decache_regs = vcpu_put_rsp_rip, + .cache_regs = vmx_cache_regs, + .decache_regs = vmx_decache_regs, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26b051b..0a8d83b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -19,6 +19,7 @@ #include "mmu.h" #include "i8254.h" #include "tss.h" +#include "kvm_cache_regs.h" #include <linux/clocksource.h> #include <linux/kvm.h> @@ -61,6 +62,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); struct kvm_x86_ops *kvm_x86_ops; +EXPORT_SYMBOL(kvm_x86_ops); struct kvm_stats_debugfs_item debugfs_entries[] = { { "pf_fixed", VCPU_STAT(pf_fixed) }, @@ -1778,6 +1780,16 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return dev; } +static void flush_regs(struct kvm_vcpu *vcpu) +{ + if (__test_and_clear_bit(REGS_RSP, &vcpu->arch.regs_dirty)) + kvm_x86_ops->decache_regs(vcpu, REGS_RSP); + if (__test_and_clear_bit(REGS_RIP, &vcpu->arch.regs_dirty)) + kvm_x86_ops->decache_regs(vcpu, REGS_RIP); + if (__test_and_clear_bit(REGS_GPR, &vcpu->arch.regs_dirty)) + kvm_x86_ops->decache_regs(vcpu, REGS_GPR); +} + int emulator_read_std(unsigned long addr, void *val, unsigned int bytes, @@ -2060,7 +2072,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, struct decode_cache *c; vcpu->arch.mmio_fault_cr2 = cr2; - kvm_x86_ops->cache_regs(vcpu); + cache_all_regs(vcpu); vcpu->mmio_is_write = 0; vcpu->arch.pio.string = 0; @@ -2141,7 +2153,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } - kvm_x86_ops->decache_regs(vcpu); + decache_all_regs(vcpu); kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); if (vcpu->mmio_is_write) { @@ -2195,7 +2207,7 @@ int complete_pio(struct kvm_vcpu *vcpu) long delta; int r; - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_GPR); if (!io->string) { if (io->in) @@ -2205,7 +2217,7 @@ int complete_pio(struct kvm_vcpu *vcpu) if (io->in) { r = pio_copy_data(vcpu); if (r) { - kvm_x86_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu, REGS_GPR); return r; } } @@ -2228,7 +2240,7 @@ int complete_pio(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RSI] += delta; } - kvm_x86_ops->decache_regs(vcpu); + decache_regs(vcpu, REGS_GPR); io->count -= io->cur_count; io->cur_count = 0; @@ -2302,7 +2314,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, handler); - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_GPR); memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); kvm_x86_ops->skip_emulated_instruction(vcpu); @@ -2488,7 +2500,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int r = 1; - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_GPR); nr = vcpu->arch.regs[VCPU_REGS_RAX]; a0 = vcpu->arch.regs[VCPU_REGS_RBX]; @@ -2518,7 +2530,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; } vcpu->arch.regs[VCPU_REGS_RAX] = ret; - kvm_x86_ops->decache_regs(vcpu); + decache_regs(vcpu, REGS_GPR); ++vcpu->stat.hypercalls; return r; } @@ -2537,7 +2549,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) */ kvm_mmu_zap_all(vcpu->kvm); - kvm_x86_ops->cache_regs(vcpu); + cache_all_regs(vcpu); kvm_x86_ops->patch_hypercall(vcpu, instruction); if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu) != X86EMUL_CONTINUE) @@ -2669,7 +2681,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) u32 function, index; struct kvm_cpuid_entry2 *e, *best; - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_GPR); function = vcpu->arch.regs[VCPU_REGS_RAX]; index = vcpu->arch.regs[VCPU_REGS_RCX]; vcpu->arch.regs[VCPU_REGS_RAX] = 0; @@ -2698,7 +2710,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx; vcpu->arch.regs[VCPU_REGS_RDX] = best->edx; } - kvm_x86_ops->decache_regs(vcpu); + decache_regs(vcpu, REGS_GPR); kvm_x86_ops->skip_emulated_instruction(vcpu); KVMTRACE_5D(CPUID, vcpu, function, (u32)vcpu->arch.regs[VCPU_REGS_RAX], @@ -2811,6 +2823,8 @@ again: } } + flush_regs(vcpu); + clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); kvm_inject_pending_timer_irqs(vcpu); @@ -2865,6 +2879,8 @@ again: local_irq_enable(); ++vcpu->stat.exits; + vcpu->arch.regs_available = 0; + vcpu->arch.regs_dirty = 0; /* * We must have an instruction between local_irq_enable() and @@ -2884,7 +2900,7 @@ again: * Profile KVM exit RIPs: */ if (unlikely(prof_on == KVM_PROFILING)) { - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_RIP); profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip); } @@ -2969,9 +2985,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } #endif if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { - kvm_x86_ops->cache_regs(vcpu); + cache_regs(vcpu, REGS_GPR); vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; - kvm_x86_ops->decache_regs(vcpu); + decache_regs(vcpu, REGS_GPR); } r = __vcpu_run(vcpu, kvm_run); @@ -2988,7 +3004,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { vcpu_load(vcpu); - kvm_x86_ops->cache_regs(vcpu); + cache_all_regs(vcpu); regs->rax = vcpu->arch.regs[VCPU_REGS_RAX]; regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX]; @@ -3049,7 +3065,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.rip = regs->rip; kvm_x86_ops->set_rflags(vcpu, regs->rflags); - kvm_x86_ops->decache_regs(vcpu); + decache_all_regs(vcpu); vcpu->arch.exception.pending = false; @@ -3525,8 +3541,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); } + cache_all_regs(vcpu); kvm_x86_ops->skip_emulated_instruction(vcpu); - kvm_x86_ops->cache_regs(vcpu); if (nseg_desc.type & 8) ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, @@ -3551,7 +3567,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) tr_seg.type = 11; kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); out: - kvm_x86_ops->decache_regs(vcpu); + decache_all_regs(vcpu); return ret; } EXPORT_SYMBOL_GPL(kvm_task_switch); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 851184d..56f01cc 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -120,6 +120,12 @@ enum { VCPU_SREG_LDTR, }; +enum kvm_reg_set { + REGS_GPR, + REGS_RSP, + REGS_RIP, +}; + #include <asm/kvm_x86_emulate.h> #define KVM_NR_MEM_OBJS 40 @@ -217,8 +223,10 @@ struct kvm_vcpu_arch { int interrupt_window_open; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); - unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ - unsigned long rip; /* needs vcpu_load_rsp_rip() */ + unsigned long regs[NR_VCPU_REGS]; /* needs cache_regs() */ + unsigned long rip; /* needs cache_regs() */ + u16 regs_available; + u16 regs_dirty; unsigned long cr0; unsigned long cr2; @@ -410,8 +418,8 @@ struct kvm_x86_ops { unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, int *exception); - void (*cache_regs)(struct kvm_vcpu *vcpu); - void (*decache_regs)(struct kvm_vcpu *vcpu); + void (*cache_regs)(struct kvm_vcpu *vcpu, enum kvm_reg_set regs); + void (*decache_regs)(struct kvm_vcpu *vcpu, enum kvm_reg_set regs); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); --- /dev/null 2008-06-20 14:49:09.418709161 -0300 +++ b/arch/x86/kvm/kvm_cache_regs.h 2008-06-21 14:12:49.000000000 -0300 @@ -0,0 +1,27 @@ +#ifndef ASM_KVM_CACHE_REGS_H +#define ASM_KVM_CACHE_REGS_H +static inline void cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set reg_set) +{ + if (!__test_and_set_bit(reg_set, &vcpu->arch.regs_available)) + kvm_x86_ops->cache_regs(vcpu, reg_set); +} + +static inline void decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set reg_set) +{ + __set_bit(reg_set, &vcpu->arch.regs_dirty); +} + +static inline void cache_all_regs(struct kvm_vcpu *vcpu) +{ + cache_regs(vcpu, REGS_RSP); + cache_regs(vcpu, REGS_RIP); + cache_regs(vcpu, REGS_GPR); +} + +static inline void decache_all_regs(struct kvm_vcpu *vcpu) +{ + decache_regs(vcpu, REGS_RSP); + decache_regs(vcpu, REGS_RIP); + decache_regs(vcpu, REGS_GPR); +} +#endif ----- End forwarded message ----- ----- End forwarded message ----- -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html