Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>

Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -266,6 +266,7 @@ struct kvm_arch_ops {
        void (*decache_regs)(struct kvm_vcpu *vcpu);
 
        int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
+       void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
        unsigned long vmx_return; /* temporary hack */
 };
 
@@ -300,6 +301,14 @@ static inline struct page *gfn_to_page(s
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
+enum emulation_result {
+       EMULATE_DONE,       /* no further processing */
+       EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
+       EMULATE_FAIL,         /* can't emulate this instruction */
+};
+
+int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
+                       unsigned long cr2, u16 error_code);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
 void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
@@ -309,10 +318,22 @@ unsigned long realmode_get_cr(struct kvm
 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
                     unsigned long *rflags);
 
+void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
+
+void inject_gp(struct kvm_vcpu *vcpu);
+
+#ifdef __x86_64__
+void set_efer(struct kvm_vcpu *vcpu, u64 efer);
+#endif
+
+
 void load_msrs(struct vmx_msr_entry *e, int n);
 void save_msrs(struct vmx_msr_entry *e, int n);
 void kvm_resched(struct kvm_vcpu *vcpu);
-int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 
 int kvm_read_guest(struct kvm_vcpu *vcpu,
               gva_t addr,
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -533,7 +533,7 @@ void vmcs_writel(unsigned long field, un
 }
 EXPORT_SYMBOL_GPL(vmcs_writel);
 
-static void inject_gp(struct kvm_vcpu *vcpu)
+void inject_gp(struct kvm_vcpu *vcpu)
 {
        printk(KERN_DEBUG "inject_general_protection: rip 0x%lx\n",
               vmcs_readl(GUEST_RIP));
@@ -544,6 +544,7 @@ static void inject_gp(struct kvm_vcpu *v
                     INTR_INFO_DELIEVER_CODE_MASK |
                     INTR_INFO_VALID_MASK);
 }
+EXPORT_SYMBOL_GPL(inject_gp);
 
 /*
  * reads and returns guest's timestamp counter "register"
@@ -821,7 +822,7 @@ static int pdptrs_have_reserved_bits_set
        return i != 4;
 }
 
-static void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        if (cr0 & CR0_RESEVED_BITS) {
                printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
@@ -879,8 +880,9 @@ static void set_cr0(struct kvm_vcpu *vcp
        spin_unlock(&vcpu->kvm->lock);
        return;
 }
+EXPORT_SYMBOL_GPL(set_cr0);
 
-static void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
        unsigned long cr0 = vcpu->cr0;
 
@@ -895,6 +897,7 @@ static void lmsw(struct kvm_vcpu *vcpu, 
                                | (msw & LMSW_GUEST_MASK));
        vcpu->cr0 = (vcpu->cr0 & ~0xfffful) | msw;
 }
+EXPORT_SYMBOL_GPL(lmsw);
 
 static void __set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
@@ -904,7 +907,7 @@ static void __set_cr4(struct kvm_vcpu *v
        vcpu->cr4 = cr4;
 }
 
-static void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & CR4_RESEVED_BITS) {
                printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
@@ -935,8 +938,9 @@ static void set_cr4(struct kvm_vcpu *vcp
        kvm_mmu_reset_context(vcpu);
        spin_unlock(&vcpu->kvm->lock);
 }
+EXPORT_SYMBOL_GPL(set_cr4);
 
-static void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
        if (is_long_mode()) {
                if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
@@ -964,8 +968,9 @@ static void set_cr3(struct kvm_vcpu *vcp
        vcpu->mmu.new_cr3(vcpu);
        spin_unlock(&vcpu->kvm->lock);
 }
+EXPORT_SYMBOL_GPL(set_cr3);
 
-static void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
        if ( cr8 & CR8_RESEVED_BITS) {
                printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
@@ -974,6 +979,7 @@ static void set_cr8(struct kvm_vcpu *vcp
        }
        vcpu->cr8 = cr8;
 }
+EXPORT_SYMBOL_GPL(set_cr8);
 
 static u32 get_rdx_init_val(void)
 {
@@ -1534,25 +1540,6 @@ void mark_page_dirty(struct kvm *kvm, gf
        }
 }
 
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
-{
-       unsigned long rip;
-       u32 interruptibility;
-
-       rip = vmcs_readl(GUEST_RIP);
-       rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-       vmcs_writel(GUEST_RIP, rip);
-
-       /*
-        * We emulated an instruction, so temporary interrupt blocking
-        * should be removed, if set.
-        */
-       interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-       if (interruptibility & 3)
-               vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
-                            interruptibility & ~3);
-}
-
 static int emulator_read_std(unsigned long addr,
                             unsigned long *val,
                             unsigned int bytes,
@@ -1694,16 +1681,10 @@ struct x86_emulate_ops emulate_ops = {
        .cmpxchg_emulated    = emulator_cmpxchg_emulated,
 };
 
-enum emulation_result {
-       EMULATE_DONE,       /* no further processing */
-       EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
-       EMULATE_FAIL,         /* can't emulate this instruction */
-};
-
-static int emulate_instruction(struct kvm_vcpu *vcpu,
-                              struct kvm_run *run,
-                              unsigned long cr2,
-                              u16 error_code)
+int emulate_instruction(struct kvm_vcpu *vcpu,
+                       struct kvm_run *run,
+                       unsigned long cr2,
+                       u16 error_code)
 {
        struct x86_emulate_ctxt emulate_ctxt;
        int r;
@@ -1762,6 +1743,7 @@ static int emulate_instruction(struct kv
 
        return EMULATE_DONE;
 }
+EXPORT_SYMBOL_GPL(emulate_instruction);
 
 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
 {
@@ -1826,298 +1808,6 @@ void realmode_set_cr(struct kvm_vcpu *vc
        }
 }
 
-static int handle_rmode_exception(struct kvm_vcpu *vcpu,
-                                 int vec, u32 err_code)
-{
-       if (!vcpu->rmode.active)
-               return 0;
-
-       if (vec == GP_VECTOR && err_code == 0)
-               if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
-                       return 1;
-       return 0;
-}
-
-static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u32 intr_info, error_code;
-       unsigned long cr2, rip;
-       u32 vect_info;
-       enum emulation_result er;
-
-       vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-       intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
-       if ((vect_info & VECTORING_INFO_VALID_MASK) &&
-                                               !is_page_fault(intr_info)) {
-               printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
-                      "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
-       }
-
-       if (is_external_interrupt(vect_info)) {
-               int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
-               set_bit(irq, vcpu->irq_pending);
-               set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
-       }
-
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
-               asm ("int $2");
-               return 1;
-       }
-       error_code = 0;
-       rip = vmcs_readl(GUEST_RIP);
-       if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
-               error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
-       if (is_page_fault(intr_info)) {
-               cr2 = vmcs_readl(EXIT_QUALIFICATION);
-
-               spin_lock(&vcpu->kvm->lock);
-               if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) {
-                       spin_unlock(&vcpu->kvm->lock);
-                       return 1;
-               }
-
-               er = emulate_instruction(vcpu, kvm_run, cr2, error_code);
-               spin_unlock(&vcpu->kvm->lock);
-
-               switch (er) {
-               case EMULATE_DONE:
-                       return 1;
-               case EMULATE_DO_MMIO:
-                       ++kvm_stat.mmio_exits;
-                       kvm_run->exit_reason = KVM_EXIT_MMIO;
-                       return 0;
-                case EMULATE_FAIL:
-                       vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__);
-                       break;
-               default:
-                       BUG();
-               }
-       }
-
-       if (vcpu->rmode.active &&
-           handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
-                                                               error_code))
-               return 1;
-
-       if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == 
(INTR_TYPE_EXCEPTION | 1)) {
-               kvm_run->exit_reason = KVM_EXIT_DEBUG;
-               return 0;
-       }
-       kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
-       kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
-       kvm_run->ex.error_code = error_code;
-       return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-                                    struct kvm_run *kvm_run)
-{
-       ++kvm_stat.irq_exits;
-       return 1;
-}
-
-
-static int get_io_count(struct kvm_vcpu *vcpu, u64 *count)
-{
-       u64 inst;
-       gva_t rip;
-       int countr_size;
-       int i, n;
-
-       if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) {
-               countr_size = 2;
-       } else {
-               u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
-
-               countr_size = (cs_ar & AR_L_MASK) ? 8:
-                             (cs_ar & AR_DB_MASK) ? 4: 2;
-       }
-
-       rip =  vmcs_readl(GUEST_RIP);
-       if (countr_size != 8)
-               rip += vmcs_readl(GUEST_CS_BASE);
-
-       n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst);
-
-       for (i = 0; i < n; i++) {
-               switch (((u8*)&inst)[i]) {
-               case 0xf0:
-               case 0xf2:
-               case 0xf3:
-               case 0x2e:
-               case 0x36:
-               case 0x3e:
-               case 0x26:
-               case 0x64:
-               case 0x65:
-               case 0x66:
-                       break;
-               case 0x67:
-                       countr_size = (countr_size == 2) ? 4: (countr_size >> 
1);
-               default:
-                       goto done;
-               }
-       }
-       return 0;
-done:
-       countr_size *= 8;
-       *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
-       return 1;
-}
-
-static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u64 exit_qualification;
-
-       ++kvm_stat.io_exits;
-       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
-       kvm_run->exit_reason = KVM_EXIT_IO;
-       if (exit_qualification & 8)
-               kvm_run->io.direction = KVM_EXIT_IO_IN;
-       else
-               kvm_run->io.direction = KVM_EXIT_IO_OUT;
-       kvm_run->io.size = (exit_qualification & 7) + 1;
-       kvm_run->io.string = (exit_qualification & 16) != 0;
-       kvm_run->io.string_down
-               = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
-       kvm_run->io.rep = (exit_qualification & 32) != 0;
-       kvm_run->io.port = exit_qualification >> 16;
-       if (kvm_run->io.string) {
-               if (!get_io_count(vcpu, &kvm_run->io.count))
-                       return 1;
-               kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS);
-       } else
-               kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */
-       return 0;
-}
-
-static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u64 address = vmcs_read64(EXIT_QUALIFICATION);
-       int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-       spin_lock(&vcpu->kvm->lock);
-       vcpu->mmu.inval_page(vcpu, address);
-       spin_unlock(&vcpu->kvm->lock);
-       vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length);
-       return 1;
-}
-
-static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u64 exit_qualification;
-       int cr;
-       int reg;
-
-#ifdef KVM_DEBUG
-       if (guest_cpl() != 0) {
-               vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
-               inject_gp(vcpu);
-               return 1;
-       }
-#endif
-
-       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
-       cr = exit_qualification & 15;
-       reg = (exit_qualification >> 8) & 15;
-       switch ((exit_qualification >> 4) & 3) {
-       case 0: /* mov to cr */
-               switch (cr) {
-               case 0:
-                       kvm_arch_ops->cache_regs(vcpu);
-                       set_cr0(vcpu, vcpu->regs[reg]);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
-               case 3:
-                       kvm_arch_ops->cache_regs(vcpu);
-                       set_cr3(vcpu, vcpu->regs[reg]);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
-               case 4:
-                       kvm_arch_ops->cache_regs(vcpu);
-                       set_cr4(vcpu, vcpu->regs[reg]);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
-               case 8:
-                       kvm_arch_ops->cache_regs(vcpu);
-                       set_cr8(vcpu, vcpu->regs[reg]);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
-               };
-               break;
-       case 1: /*mov from cr*/
-               switch (cr) {
-               case 3:
-                       kvm_arch_ops->cache_regs(vcpu);
-                       vcpu->regs[reg] = vcpu->cr3;
-                       kvm_arch_ops->decache_regs(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
-               case 8:
-                       printk(KERN_DEBUG "handle_cr: read CR8 "
-                              "cpu erratum AA15\n");
-                       kvm_arch_ops->cache_regs(vcpu);
-                       vcpu->regs[reg] = vcpu->cr8;
-                       kvm_arch_ops->decache_regs(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
-               }
-               break;
-       case 3: /* lmsw */
-               lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 
0x0f);
-
-               skip_emulated_instruction(vcpu);
-               return 1;
-       default:
-               break;
-       }
-       kvm_run->exit_reason = 0;
-       printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n",
-              (int)(exit_qualification >> 4) & 3, cr);
-       return 0;
-}
-
-static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u64 exit_qualification;
-       unsigned long val;
-       int dr, reg;
-
-       /*
-        * FIXME: this code assumes the host is debugging the guest.
-        *        need to deal with guest debugging itself too.
-        */
-       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
-       dr = exit_qualification & 7;
-       reg = (exit_qualification >> 8) & 15;
-       kvm_arch_ops->cache_regs(vcpu);
-       if (exit_qualification & 16) {
-               /* mov from dr */
-               switch (dr) {
-               case 6:
-                       val = 0xffff0ff0;
-                       break;
-               case 7:
-                       val = 0x400;
-                       break;
-               default:
-                       val = 0;
-               }
-               vcpu->regs[reg] = val;
-       } else {
-               /* mov to dr */
-       }
-       kvm_arch_ops->decache_regs(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
-}
-
-static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       kvm_run->exit_reason = KVM_EXIT_CPUID;
-       return 0;
-}
-
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -2128,23 +1818,6 @@ static int get_msr(struct kvm_vcpu *vcpu
        return kvm_arch_ops->get_msr(vcpu, msr_index, pdata);
 }
 
-static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u32 ecx = vcpu->regs[VCPU_REGS_RCX];
-       u64 data;
-
-       if (get_msr(vcpu, ecx, &data)) {
-               inject_gp(vcpu);
-               return 1;
-       }
-
-       /* FIXME: handling of bits 32:63 of rax, rdx */
-       vcpu->regs[VCPU_REGS_RAX] = data & -1u;
-       vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
-       skip_emulated_instruction(vcpu);
-       return 1;
-}
-
 #ifdef __x86_64__
 
 void set_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -2175,7 +1848,6 @@ void set_efer(struct kvm_vcpu *vcpu, u64
        if (!(efer & EFER_LMA))
            efer &= ~EFER_LME;
        msr->data = efer;
-       skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(set_efer);
 
@@ -2191,90 +1863,6 @@ static int set_msr(struct kvm_vcpu *vcpu
        return kvm_arch_ops->set_msr(vcpu, msr_index, data);
 }
 
-static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       u32 ecx = vcpu->regs[VCPU_REGS_RCX];
-       u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
-               | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
-
-       if (set_msr(vcpu, ecx, data) != 0) {
-               inject_gp(vcpu);
-               return 1;
-       }
-
-       if (ecx != MSR_EFER)
-               skip_emulated_instruction(vcpu);
-       return 1;
-}
-
-static int handle_interrupt_window(struct kvm_vcpu *vcpu,
-                                  struct kvm_run *kvm_run)
-{
-       /* Turn off interrupt window reporting. */
-       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-                    vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
-                    & ~CPU_BASED_VIRTUAL_INTR_PENDING);
-       return 1;
-}
-
-static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       skip_emulated_instruction(vcpu);
-       if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF))
-               return 1;
-
-       kvm_run->exit_reason = KVM_EXIT_HLT;
-       return 0;
-}
-
-/*
- * The exit handlers return 1 if the exit was handled fully and guest execution
- * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
- * to be done to userspace and return 0.
- */
-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
-                                     struct kvm_run *kvm_run) = {
-       [EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
-       [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
-       [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
-       [EXIT_REASON_INVLPG]                  = handle_invlpg,
-       [EXIT_REASON_CR_ACCESS]               = handle_cr,
-       [EXIT_REASON_DR_ACCESS]               = handle_dr,
-       [EXIT_REASON_CPUID]                   = handle_cpuid,
-       [EXIT_REASON_MSR_READ]                = handle_rdmsr,
-       [EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
-       [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
-       [EXIT_REASON_HLT]                     = handle_halt,
-};
-
-static const int kvm_vmx_max_exit_handlers =
-       sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers);
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-       u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-       u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
-
-       if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&
-                               exit_reason != EXIT_REASON_EXCEPTION_NMI )
-               printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
-                      "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
-       kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-       if (exit_reason < kvm_vmx_max_exit_handlers
-           && kvm_vmx_exit_handlers[exit_reason])
-               return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
-       else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = exit_reason;
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_handle_exit);
-
 void kvm_resched(struct kvm_vcpu *vcpu)
 {
        vcpu_put(vcpu);
@@ -2315,7 +1903,7 @@ static int kvm_dev_ioctl_run(struct kvm 
                return -ENOENT;
 
        if (kvm_run->emulated) {
-               skip_emulated_instruction(vcpu);
+               kvm_arch_ops->skip_emulated_instruction(vcpu);
                kvm_run->emulated = 0;
        }
 
Index: linux-2.6/drivers/kvm/vmx.c
===================================================================
--- linux-2.6.orig/drivers/kvm/vmx.c
+++ linux-2.6/drivers/kvm/vmx.c
@@ -44,11 +44,24 @@ u64 guest_read_tsc(void);
 void guest_write_tsc(u64 guest_tsc);
 struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr);
 
-#ifdef __x86_64__
+static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+       unsigned long rip;
+       u32 interruptibility;
 
-void set_efer(struct kvm_vcpu *vcpu, u64 efer);
+       rip = vmcs_readl(GUEST_RIP);
+       rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+       vmcs_writel(GUEST_RIP, rip);
 
-#endif
+       /*
+        * We emulated an instruction, so temporary interrupt blocking
+        * should be removed, if set.
+        */
+       interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+       if (interruptibility & 3)
+               vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+                            interruptibility & ~3);
+}
 
 static void reload_tss(void)
 {
@@ -521,6 +534,397 @@ static void kvm_guest_debug_pre(struct k
        }
 }
 
+static int handle_rmode_exception(struct kvm_vcpu *vcpu,
+                                 int vec, u32 err_code)
+{
+       if (!vcpu->rmode.active)
+               return 0;
+
+       if (vec == GP_VECTOR && err_code == 0)
+               if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
+                       return 1;
+       return 0;
+}
+
+static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u32 intr_info, error_code;
+       unsigned long cr2, rip;
+       u32 vect_info;
+       enum emulation_result er;
+
+       vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+       intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+       if ((vect_info & VECTORING_INFO_VALID_MASK) &&
+                                               !is_page_fault(intr_info)) {
+               printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
+                      "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
+       }
+
+       if (is_external_interrupt(vect_info)) {
+               int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
+               set_bit(irq, vcpu->irq_pending);
+               set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+       }
+
+       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+               asm ("int $2");
+               return 1;
+       }
+       error_code = 0;
+       rip = vmcs_readl(GUEST_RIP);
+       if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
+               error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+       if (is_page_fault(intr_info)) {
+               cr2 = vmcs_readl(EXIT_QUALIFICATION);
+
+               spin_lock(&vcpu->kvm->lock);
+               if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) {
+                       spin_unlock(&vcpu->kvm->lock);
+                       return 1;
+               }
+
+               er = emulate_instruction(vcpu, kvm_run, cr2, error_code);
+               spin_unlock(&vcpu->kvm->lock);
+
+               switch (er) {
+               case EMULATE_DONE:
+                       return 1;
+               case EMULATE_DO_MMIO:
+                       ++kvm_stat.mmio_exits;
+                       kvm_run->exit_reason = KVM_EXIT_MMIO;
+                       return 0;
+                case EMULATE_FAIL:
+                       vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__);
+                       break;
+               default:
+                       BUG();
+               }
+       }
+
+       if (vcpu->rmode.active &&
+           handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
+                                                               error_code))
+               return 1;
+
+       if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == 
(INTR_TYPE_EXCEPTION | 1)) {
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               return 0;
+       }
+       kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+       kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
+       kvm_run->ex.error_code = error_code;
+       return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+                                    struct kvm_run *kvm_run)
+{
+       ++kvm_stat.irq_exits;
+       return 1;
+}
+
+
+static int get_io_count(struct kvm_vcpu *vcpu, u64 *count)
+{
+       u64 inst;
+       gva_t rip;
+       int countr_size;
+       int i, n;
+
+       if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) {
+               countr_size = 2;
+       } else {
+               u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
+
+               countr_size = (cs_ar & AR_L_MASK) ? 8:
+                             (cs_ar & AR_DB_MASK) ? 4: 2;
+       }
+
+       rip =  vmcs_readl(GUEST_RIP);
+       if (countr_size != 8)
+               rip += vmcs_readl(GUEST_CS_BASE);
+
+       n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst);
+
+       for (i = 0; i < n; i++) {
+               switch (((u8*)&inst)[i]) {
+               case 0xf0:
+               case 0xf2:
+               case 0xf3:
+               case 0x2e:
+               case 0x36:
+               case 0x3e:
+               case 0x26:
+               case 0x64:
+               case 0x65:
+               case 0x66:
+                       break;
+               case 0x67:
+                       countr_size = (countr_size == 2) ? 4: (countr_size >> 
1);
+               default:
+                       goto done;
+               }
+       }
+       return 0;
+done:
+       countr_size *= 8;
+       *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
+       return 1;
+}
+
+static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u64 exit_qualification;
+
+       ++kvm_stat.io_exits;
+       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+       kvm_run->exit_reason = KVM_EXIT_IO;
+       if (exit_qualification & 8)
+               kvm_run->io.direction = KVM_EXIT_IO_IN;
+       else
+               kvm_run->io.direction = KVM_EXIT_IO_OUT;
+       kvm_run->io.size = (exit_qualification & 7) + 1;
+       kvm_run->io.string = (exit_qualification & 16) != 0;
+       kvm_run->io.string_down
+               = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
+       kvm_run->io.rep = (exit_qualification & 32) != 0;
+       kvm_run->io.port = exit_qualification >> 16;
+       if (kvm_run->io.string) {
+               if (!get_io_count(vcpu, &kvm_run->io.count))
+                       return 1;
+               kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+       } else
+               kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */
+       return 0;
+}
+
+static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u64 address = vmcs_read64(EXIT_QUALIFICATION);
+       int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+       spin_lock(&vcpu->kvm->lock);
+       vcpu->mmu.inval_page(vcpu, address);
+       spin_unlock(&vcpu->kvm->lock);
+       vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length);
+       return 1;
+}
+
+static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u64 exit_qualification;
+       int cr;
+       int reg;
+
+#ifdef KVM_DEBUG
+       if (guest_cpl() != 0) {
+               vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
+               inject_gp(vcpu);
+               return 1;
+       }
+#endif
+
+       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+       cr = exit_qualification & 15;
+       reg = (exit_qualification >> 8) & 15;
+       switch ((exit_qualification >> 4) & 3) {
+       case 0: /* mov to cr */
+               switch (cr) {
+               case 0:
+                       vcpu_load_rsp_rip(vcpu);
+                       set_cr0(vcpu, vcpu->regs[reg]);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               case 3:
+                       vcpu_load_rsp_rip(vcpu);
+                       set_cr3(vcpu, vcpu->regs[reg]);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               case 4:
+                       vcpu_load_rsp_rip(vcpu);
+                       set_cr4(vcpu, vcpu->regs[reg]);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               case 8:
+                       vcpu_load_rsp_rip(vcpu);
+                       set_cr8(vcpu, vcpu->regs[reg]);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               };
+               break;
+       case 1: /*mov from cr*/
+               switch (cr) {
+               case 3:
+                       vcpu_load_rsp_rip(vcpu);
+                       vcpu->regs[reg] = vcpu->cr3;
+                       vcpu_put_rsp_rip(vcpu);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               case 8:
+                       printk(KERN_DEBUG "handle_cr: read CR8 "
+                              "cpu erratum AA15\n");
+                       vcpu_load_rsp_rip(vcpu);
+                       vcpu->regs[reg] = vcpu->cr8;
+                       vcpu_put_rsp_rip(vcpu);
+                       skip_emulated_instruction(vcpu);
+                       return 1;
+               }
+               break;
+       case 3: /* lmsw */
+               lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 
0x0f);
+
+               skip_emulated_instruction(vcpu);
+               return 1;
+       default:
+               break;
+       }
+       kvm_run->exit_reason = 0;
+       printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n",
+              (int)(exit_qualification >> 4) & 3, cr);
+       return 0;
+}
+
+static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u64 exit_qualification;
+       unsigned long val;
+       int dr, reg;
+
+       /*
+        * FIXME: this code assumes the host is debugging the guest.
+        *        need to deal with guest debugging itself too.
+        */
+       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+       dr = exit_qualification & 7;
+       reg = (exit_qualification >> 8) & 15;
+       vcpu_load_rsp_rip(vcpu);
+       if (exit_qualification & 16) {
+               /* mov from dr */
+               switch (dr) {
+               case 6:
+                       val = 0xffff0ff0;
+                       break;
+               case 7:
+                       val = 0x400;
+                       break;
+               default:
+                       val = 0;
+               }
+               vcpu->regs[reg] = val;
+       } else {
+               /* mov to dr */
+       }
+       vcpu_put_rsp_rip(vcpu);
+       skip_emulated_instruction(vcpu);
+       return 1;
+}
+
+static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       kvm_run->exit_reason = KVM_EXIT_CPUID;
+       return 0;
+}
+
+static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+       u64 data;
+
+       if (vmx_get_msr(vcpu, ecx, &data)) {
+               inject_gp(vcpu);
+               return 1;
+       }
+
+       /* FIXME: handling of bits 32:63 of rax, rdx */
+       vcpu->regs[VCPU_REGS_RAX] = data & -1u;
+       vcpu->regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
+       skip_emulated_instruction(vcpu);
+       return 1;
+}
+
+static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+       u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
+               | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
+
+       if (vmx_set_msr(vcpu, ecx, data) != 0) {
+               inject_gp(vcpu);
+               return 1;
+       }
+
+       skip_emulated_instruction(vcpu);
+       return 1;
+}
+
+static int handle_interrupt_window(struct kvm_vcpu *vcpu,
+                                  struct kvm_run *kvm_run)
+{
+       /* Turn off interrupt window reporting. */
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
+                    vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
+                    & ~CPU_BASED_VIRTUAL_INTR_PENDING);
+       return 1;
+}
+
+static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       skip_emulated_instruction(vcpu);
+       if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF))
+               return 1;
+
+       kvm_run->exit_reason = KVM_EXIT_HLT;
+       return 0;
+}
+
+/*
+ * The exit handlers return 1 if the exit was handled fully and guest execution
+ * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
+ * to be done to userspace and return 0.
+ */
+static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
+                                     struct kvm_run *kvm_run) = {
+       [EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
+       [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
+       [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
+       [EXIT_REASON_INVLPG]                  = handle_invlpg,
+       [EXIT_REASON_CR_ACCESS]               = handle_cr,
+       [EXIT_REASON_DR_ACCESS]               = handle_dr,
+       [EXIT_REASON_CPUID]                   = handle_cpuid,
+       [EXIT_REASON_MSR_READ]                = handle_rdmsr,
+       [EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
+       [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
+       [EXIT_REASON_HLT]                     = handle_halt,
+};
+
+static const int kvm_vmx_max_exit_handlers =
+       sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers);
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+       u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+       u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
+
+       if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&
+                               exit_reason != EXIT_REASON_EXCEPTION_NMI )
+               printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
+                      "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
+       kvm_run->instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+       if (exit_reason < kvm_vmx_max_exit_handlers
+           && kvm_vmx_exit_handlers[exit_reason])
+               return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
+       else {
+               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+               kvm_run->hw.hardware_exit_reason = exit_reason;
+       }
+       return 0;
+}
+
 static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        u8 fail;
@@ -753,6 +1157,7 @@ static struct kvm_arch_ops vmx_arch_ops 
        .decache_regs = vcpu_put_rsp_rip,
 
        .run = vmx_vcpu_run,
+       .skip_emulated_instruction = skip_emulated_instruction,
        .vmx_return = (unsigned long)kvm_vmx_return,
 };
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to