>From 6a7207a0f3ee8af6ebafcec9d40a75b87f00a129 Mon Sep 17 00:00:00 2001 From: Izik Eidus <[EMAIL PROTECTED]> Date: Thu, 13 Mar 2008 02:34:21 +0200 Subject: [PATCH] KVM: hardware task switching support
Signed-off-by: Izik Eidus <[EMAIL PROTECTED]> --- arch/x86/kvm/svm.c | 11 +- arch/x86/kvm/tss_segment.h | 59 +++++++ arch/x86/kvm/vmx.c | 15 ++ arch/x86/kvm/x86.c | 385 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/kvm_host.h | 2 + 5 files changed, 469 insertions(+), 3 deletions(-) create mode 100644 arch/x86/kvm/tss_segment.h diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4e1dd61..be78278 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1121,9 +1121,14 @@ static int invalid_op_interception(struct vcpu_svm *svm, static int task_switch_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __func__); - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - return 0; + u16 tss_selector; + + tss_selector = (u16)svm->vmcb->control.exit_info_1; + if(svm->vmcb->control.exit_info_2 & ((unsigned long)1 << 36)) + return kvm_task_switch(&svm->vcpu, tss_selector, 1); + if(svm->vmcb->control.exit_info_2 & ((unsigned long)1 << 38)) + return kvm_task_switch(&svm->vcpu, tss_selector, 2); + return kvm_task_switch(&svm->vcpu, tss_selector, 0); } static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) diff --git a/arch/x86/kvm/tss_segment.h b/arch/x86/kvm/tss_segment.h new file mode 100644 index 0000000..622aa10 --- /dev/null +++ b/arch/x86/kvm/tss_segment.h @@ -0,0 +1,59 @@ +#ifndef __TSS_SEGMENT_H +#define __TSS_SEGMENT_H + +struct tss_segment_32 { + u32 prev_task_link; + u32 esp0; + u32 ss0; + u32 esp1; + u32 ss1; + u32 esp2; + u32 ss2; + u32 cr3; + u32 eip; + u32 eflags; + u32 eax; + u32 ecx; + u32 edx; + u32 ebx; + u32 esp; + u32 ebp; + u32 esi; + u32 edi; + u32 es; + u32 cs; + u32 ss; + u32 ds; + u32 fs; + u32 gs; + u32 ldt_selector; + u16 t; + u16 io_map; +}; + +struct tss_segment_16 { + u16 prev_task_link; + u16 sp0; + u16 ss0; + u16 sp1; + u16 ss1; + u16 sp2; + u16 ss2; + u16 ip; + u16 flag; + u16 ax; + u16 cx; + u16 dx; + u16 bx; + u16 sp; + u16 bp; + u16 si; + u16 di; + u16 es; + u16 cs; + u16 ss; + u16 ds; + u16 ldt; +}; + +#endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 459b0bd..e5fe09b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2246,6 +2246,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + unsigned long exit_qualification; + u16 tss_selector; + int reason; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + reason = (u32)exit_qualification >> 30; + tss_selector = (u16)exit_qualification; + + return kvm_task_switch(vcpu, tss_selector, reason); +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2268,6 +2282,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_TASK_SWITCH] = handle_task_switch, }; static const int kvm_vmx_max_exit_handlers = diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5339ab1..7feb41e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -18,6 +18,7 @@ #include "irq.h" #include "mmu.h" #include "i8254.h" +#include "tss_segment.h" #include <linux/clocksource.h> #include <linux/kvm.h> @@ -3050,6 +3051,390 @@ static void set_segment(struct kvm_vcpu *vcpu, kvm_x86_ops->set_segment(vcpu, var, seg); } +static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, + struct kvm_segment *kvm_desct) +{ + kvm_desct->base = seg_desc->base0; + kvm_desct->base |= seg_desc->base1 << 16; + kvm_desct->base |= seg_desc->base2 << 24; + kvm_desct->limit = seg_desc->limit0; + kvm_desct->limit |= seg_desc->limit << 16; + kvm_desct->selector = selector; + kvm_desct->type = seg_desc->type; + kvm_desct->present = seg_desc->p; + kvm_desct->dpl = seg_desc->dpl; + kvm_desct->db = seg_desc->d; + kvm_desct->s = seg_desc->s; + kvm_desct->l = seg_desc->l; + kvm_desct->g = seg_desc->g; + kvm_desct->avl = seg_desc->avl; + kvm_desct->unusable = 0; + kvm_desct->padding = 0; +} + +/* allowed just for 8 bytes segments */ +static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + struct desc_struct *seg_desc) +{ + struct descriptor_table gdt_ldt; + u16 index = selector >> 3; + + if (selector & 1 << 2) + kvm_x86_ops->get_ldt(vcpu, &gdt_ldt); + else + kvm_x86_ops->get_gdt(vcpu, &gdt_ldt); + if (gdt_ldt.limit < index * 8 + 7) + return 1; + return kvm_read_guest(vcpu->kvm, gdt_ldt.base + index * 8, seg_desc, 8); +} + +/* allowed just for 8 bytes segments */ +static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + struct desc_struct *seg_desc) +{ + struct descriptor_table gdt_ldt; + u16 index = selector >> 3; + + if (selector & 1 << 2) + kvm_x86_ops->get_ldt(vcpu, &gdt_ldt); + else + kvm_x86_ops->get_gdt(vcpu, &gdt_ldt); + if (gdt_ldt.limit < index * 8 + 7) + return 1; + return kvm_write_guest(vcpu->kvm, gdt_ldt.base + index * 8, seg_desc, 8); +} + +static int load_tss_segment32(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_32 *tss) +{ + u32 base_addr; + + base_addr = seg_desc->base0; + base_addr |= (seg_desc->base1 << 16); + base_addr |= (seg_desc->base2 << 24); + + return kvm_read_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_32)); +} + +static int save_tss_segment32(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_32 *tss) +{ + u32 base_addr; + + base_addr = seg_desc->base0; + base_addr |= (seg_desc->base1 << 16); + base_addr |= (seg_desc->base2 << 24); + + return kvm_write_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_32)); +} + +static int load_tss_segment16(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_16 *tss) +{ + u32 base_addr; + + base_addr = seg_desc->base0; + base_addr |= (seg_desc->base1 << 16); + base_addr |= (seg_desc->base2 << 24); + + return kvm_read_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_16)); +} + +static int save_tss_segment16(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc, + struct tss_segment_16 *tss) +{ + u32 base_addr; + + base_addr = seg_desc->base0; + base_addr |= (seg_desc->base1 << 16); + base_addr |= (seg_desc->base2 << 24); + + return kvm_write_guest(vcpu->kvm, base_addr, tss, + sizeof(struct tss_segment_16)); +} + +static void save_state_to_tss32(struct kvm_vcpu *vcpu, + struct tss_segment_32 *tss) +{ + struct kvm_segment kvm_seg; + + tss->cr3 = vcpu->arch.cr3; + tss->eip = vcpu->arch.rip; + tss->eflags = kvm_x86_ops->get_rflags(vcpu); + tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; + tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; + tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; + tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; + tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; + tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; + tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; + tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; + + get_segment(vcpu, &kvm_seg, VCPU_SREG_ES); + tss->es = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); + tss->cs = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_SS); + tss->ss = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_DS); + tss->ds = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_FS); + tss->fs = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_GS); + tss->gs = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + tss->ldt_selector = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_TR); + tss->prev_task_link = kvm_seg.selector; +} + +static void load_state_from_tss32(struct kvm_vcpu *vcpu, + struct tss_segment_32 *tss) +{ + struct desc_struct seg_desc; + struct kvm_segment kvm_seg; + + if (vcpu->arch.cr0 & 1 << 31) + kvm_set_cr3(vcpu, tss->cr3); + + vcpu->arch.rip = tss->eip; + kvm_x86_ops->set_rflags(vcpu, (tss->eflags | 2) & ~0x8028); + + vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; + vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; + vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; + vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; + vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; + vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; + vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; + vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; + + load_guest_segment_descriptor(vcpu, tss->ldt_selector, + &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->ldt_selector, + &kvm_seg); + set_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + + load_guest_segment_descriptor(vcpu, tss->es, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->es, &kvm_seg); + kvm_seg.type |= 1; + if (!kvm_seg.s) + kvm_seg.unusable = 1; + set_segment(vcpu, &kvm_seg, VCPU_SREG_ES); + + load_guest_segment_descriptor(vcpu, tss->cs, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->cs, &kvm_seg); + kvm_seg.type |= 9; + set_segment(vcpu, &kvm_seg, VCPU_SREG_CS); + + load_guest_segment_descriptor(vcpu, tss->ss, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->ss, &kvm_seg); + kvm_seg.type |= 1; + set_segment(vcpu, &kvm_seg, VCPU_SREG_SS); + + load_guest_segment_descriptor(vcpu, tss->ds, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->ds, &kvm_seg); + kvm_seg.type |= 1; + if (!kvm_seg.s) + kvm_seg.unusable = 1; + set_segment(vcpu, &kvm_seg, VCPU_SREG_DS); + + load_guest_segment_descriptor(vcpu, tss->fs, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->fs, &kvm_seg); + kvm_seg.type |= 1; + if (!kvm_seg.s) + kvm_seg.unusable = 1; + set_segment(vcpu, &kvm_seg, VCPU_SREG_FS); + + load_guest_segment_descriptor(vcpu, tss->gs, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->gs, &kvm_seg); + kvm_seg.type |= 1; + if (!kvm_seg.s) + kvm_seg.unusable = 1; + set_segment(vcpu, &kvm_seg, VCPU_SREG_GS); +} + +static void save_state_to_tss16(struct kvm_vcpu *vcpu, + struct tss_segment_16 *tss) +{ + struct kvm_segment kvm_seg; + + tss->ip = vcpu->arch.rip; + tss->flag = kvm_x86_ops->get_rflags(vcpu); + tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; + tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; + tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; + tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; + tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; + tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; + tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; + tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; + + get_segment(vcpu, &kvm_seg, VCPU_SREG_ES); + tss->es = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_CS); + tss->cs = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_SS); + tss->ss = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_DS); + tss->ds = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + tss->ldt = kvm_seg.selector; + get_segment(vcpu, &kvm_seg, VCPU_SREG_TR); + tss->prev_task_link = kvm_seg.selector; + +} + +static void load_state_from_tss16(struct kvm_vcpu *vcpu, + struct tss_segment_16 *tss) +{ + struct desc_struct seg_desc; + struct kvm_segment kvm_seg; + + vcpu->arch.rip = tss->ip; + kvm_x86_ops->set_rflags(vcpu, tss->flag); + vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; + vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; + vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; + vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; + vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; + vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; + vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; + vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; + + load_guest_segment_descriptor(vcpu, tss->es, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->es, &kvm_seg); + set_segment(vcpu, &kvm_seg, VCPU_SREG_ES); + + load_guest_segment_descriptor(vcpu, tss->cs, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->cs, &kvm_seg); + set_segment(vcpu, &kvm_seg, VCPU_SREG_CS); + + load_guest_segment_descriptor(vcpu, tss->ss, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->ss, &kvm_seg); + set_segment(vcpu, &kvm_seg, VCPU_SREG_SS); + + load_guest_segment_descriptor(vcpu, tss->ds, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->ds, &kvm_seg); + set_segment(vcpu, &kvm_seg, VCPU_SREG_DS); + + load_guest_segment_descriptor(vcpu, tss->ldt, &seg_desc); + seg_desct_to_kvm_desct(&seg_desc, tss->ldt, &kvm_seg); + set_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); +} + +int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, + struct desc_struct *cseg_desc, + struct desc_struct *nseg_desc) +{ + struct tss_segment_16 tss_segment_16; + int ret = 0; + + if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) + goto out; + + save_state_to_tss16(vcpu, &tss_segment_16); + save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); + + load_tss_segment16(vcpu, nseg_desc, &tss_segment_16); + load_state_from_tss16(vcpu, &tss_segment_16); + + ret = 1; +out: + return ret; +} + +int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, + struct desc_struct *cseg_desc, + struct desc_struct *nseg_desc) +{ + struct tss_segment_32 tss_segment_32; + int ret = 0; + + if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) + goto out; + + save_state_to_tss32(vcpu, &tss_segment_32); + save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); + + if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) + goto out; + load_state_from_tss32(vcpu, &tss_segment_32); + + ret = 1; +out: + return ret; +} + +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) +{ + struct kvm_segment tr_seg; + struct desc_struct cseg_desc; + struct desc_struct nseg_desc; + int ret = 0; + + get_segment(vcpu, &tr_seg, VCPU_SREG_TR); + + if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) + goto out; + + if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) + goto out; + + if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) + goto out; + + if (reason == 1 || reason == 2) { + cseg_desc.type &= ~(1 << 8); //clear the B flag + save_guest_segment_descriptor(vcpu, tr_seg.selector, + &cseg_desc); + } + + if (reason == 1) { + u32 eflags = kvm_x86_ops->get_rflags(vcpu); + kvm_x86_ops->set_rflags(vcpu, eflags &~(1 << 14)); + } + + kvm_x86_ops->skip_emulated_instruction(vcpu); + kvm_x86_ops->cache_regs(vcpu); + + if (nseg_desc.type & 8) + ret = kvm_task_switch_32(vcpu, tss_selector, reason, + &cseg_desc, &nseg_desc); + else + ret = kvm_task_switch_16(vcpu, tss_selector, reason, + &cseg_desc, &nseg_desc); + + if (reason == 0 || reason == 3) { + u32 eflags = kvm_x86_ops->get_rflags(vcpu); + kvm_x86_ops->set_rflags(vcpu, eflags | (1 << 14)); + } + + if (reason != 1) { + nseg_desc.type &= ~(1 << 8); + save_guest_segment_descriptor(vcpu, tss_selector, + &nseg_desc); + } + + kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | 1 << 3); + seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); + tr_seg.type = 11; + set_segment(vcpu, &tr_seg, VCPU_SREG_TR); +out: + kvm_x86_ops->decache_regs(vcpu); + if (!ret) + kvm_queue_exception(vcpu, GP_VECTOR); + return ret; +} +EXPORT_SYMBOL_GPL(kvm_task_switch); + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d9806e2..7ca0462 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -472,6 +472,8 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value); +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); + void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); -- 1.5.3.6 ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel