[This resend adds the sign-offs and fixes a typo that was in the previous patch]

This patch aims to allow emulation whenever guest state is not valid for VMX 
operation, which occurs while trying to emulate big real mode on guests 
such as older versions of gfxboot and FreeDOS with HIMEM.

The patch aims to address this issue, it introduces the following:

- A function that invokes the x86 emulator when the guest state is not valid 
(borrowed from Guillaume Thouvenin's real mode patches)
- A function that checks that guest register state is VMX compliant
- A module parameter that enables these operations. It is disabled by default, 
in order not to intervene with KVM's normal operation

The next step needed in order to correctly emulate real mode would be to add 
more instructions in the x86 emulator whenever needed.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Guillaume Thouvenin <[EMAIL PROTECTED]>
Signed-off-by: Mohammed Gamal <[EMAIL PROTECTED]>
---
 arch/x86/kvm/vmx.c |  232 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 224 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..2b5dd68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
 static int enable_ept = 1;
 module_param(enable_ept, bool, 0);
 
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
 struct vmcs {
        u32 revision_id;
        u32 abort;
@@ -95,6 +98,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 
 static int init_rmode(struct kvm *kvm);
 static u64 construct_eptp(unsigned long root_hpa);
+static int invalid_guest_state_handler(struct kvm_vcpu *vcpu);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1275,6 +1279,177 @@ static __exit void hardware_unsetup(void)
        free_kvm_area();
 }
 
+/*
+ * Check if guest state is valid. Returns true if valid, false if
+ * not.
+ * We assume that registers are always usable
+ */
+static bool guest_state_valid(struct kvm_vcpu *vcpu)
+{
+       u16 cs,ds,ss,es,fs,gs,tr,ldtr;
+       u64 cs_limit, ds_limit, ss_limit, es_limit, fs_limit, gs_limit;
+       u64 tr_limit, ldtr_limit;
+       u32 cs_ar, ds_ar, ss_ar, es_ar, fs_ar, gs_ar, tr_ar, ldtr_ar;
+       
+       cs = vmcs_read16(GUEST_CS_SELECTOR);
+       ds = vmcs_read16(GUEST_DS_SELECTOR);
+       ss = vmcs_read16(GUEST_SS_SELECTOR);
+       es = vmcs_read16(GUEST_ES_SELECTOR);
+       fs = vmcs_read16(GUEST_FS_SELECTOR);
+       gs = vmcs_read16(GUEST_GS_SELECTOR);
+       tr = vmcs_read16(GUEST_TR_SELECTOR);
+       ldtr = vmcs_read16(GUEST_LDTR_SELECTOR);
+
+       cs_limit = vmcs_readl(GUEST_SS_LIMIT);
+       ds_limit = vmcs_readl(GUEST_DS_LIMIT);
+       ss_limit = vmcs_readl(GUEST_SS_LIMIT);
+       es_limit = vmcs_readl(GUEST_ES_LIMIT);
+       fs_limit = vmcs_readl(GUEST_FS_LIMIT);
+       gs_limit = vmcs_readl(GUEST_GS_LIMIT);
+       tr_limit = vmcs_readl(GUEST_TR_LIMIT);
+
+       cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
+       ds_ar = vmcs_read32(GUEST_DS_AR_BYTES);
+       ss_ar = vmcs_read32(GUEST_SS_AR_BYTES);
+       es_ar = vmcs_read32(GUEST_ES_AR_BYTES);
+       fs_ar = vmcs_read32(GUEST_FS_AR_BYTES);
+       gs_ar = vmcs_read32(GUEST_GS_AR_BYTES);
+       tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+       ldtr_ar = vmcs_read32(GUEST_LDTR_AR_BYTES);
+
+       if(tr & 0x02)   /* TI = 1 */
+               return false;
+
+       if(ldtr & 0x02) /* TI = 1 */
+               return false;
+
+       /* vm86 mode guest state checks */
+       if(vcpu->arch.rmode.active) {
+               /* Check segment limits */
+               if( (cs_limit != 0xffff) || (ds_limit != 0xffff) ||
+                   (ss_limit != 0xffff) || (es_limit != 0xffff) ||
+                   (fs_limit != 0xffff) || (gs_limit != 0xffff) )
+                       return false;
+               
+               /* Check access rights */
+               if( (cs_ar != 0xf3) || (ds_ar != 0xf3) || (ss_ar != 0xf3) ||
+                   (es_ar != 0xf3) || (fs_ar != 0xf3) || (gs_ar != 0xf3) )
+                       return false;
+       }
+       else { /* protected mode guest state checks */
+               
+               /* SS RPL bits must equal CS RPL bits */ 
+               if((cs & 0x03) != (ss & 0x03))
+                       return false;
+               
+               /* Begin access rights bits check */
+               
+               /* Type field checks */
+               if(! (cs_ar & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)) )
+                       return false;
+               if( ((ss_ar & AR_TYPE_MASK) != 3) || ((ss_ar & AR_TYPE_MASK) != 
7) )
+                       return false;
+               if(! ((ds_ar & AR_TYPE_ACCESSES_MASK) || (es_ar & 
AR_TYPE_ACCESSES_MASK) || 
+                     (fs_ar & AR_TYPE_ACCESSES_MASK) || (gs_ar & 
AR_TYPE_ACCESSES_MASK)) )
+                       return false;
+               if(! (( (ds_ar & AR_TYPE_CODE_MASK) && (ds_ar & 
AR_TYPE_READABLE_MASK) ) ||
+                     ( (es_ar & AR_TYPE_CODE_MASK) && (es_ar & 
AR_TYPE_READABLE_MASK) ) ||
+                     ( (fs_ar & AR_TYPE_CODE_MASK) && (fs_ar & 
AR_TYPE_READABLE_MASK) ) ||
+                     ( (gs_ar & AR_TYPE_CODE_MASK) && (gs_ar & 
AR_TYPE_READABLE_MASK) )) )
+                       return false;
+
+               /* S field checks */
+               if(! ((cs_ar & AR_S_MASK) || (ds_ar & AR_S_MASK) || (ss_ar & 
AR_S_MASK) ||
+                     (es_ar & AR_S_MASK) || (fs_ar & AR_S_MASK) || (gs_ar & 
AR_S_MASK)) )
+                       return false;
+
+               /* DPL field checks */
+               if( ((cs_ar & AR_TYPE_MASK) <= 0xb) && ((cs_ar & AR_TYPE_MASK) 
>= 0x8) ) {
+                       if(AR_DPL(cs_ar) != (cs & 0x03))
+                               return false;
+               }
+               else if( ((cs_ar & AR_TYPE_MASK) <= 0xf) && ((cs_ar & 
AR_TYPE_MASK) >= 0xc) ) {
+                       if(AR_DPL(cs_ar) > (cs & 0x03))
+                               return false;
+               }
+
+               if(AR_DPL(ss_ar) != (ss & 0x03))
+                       return false;
+
+               if((ds_ar & AR_TYPE_MASK) <= 0xb)
+                       if(AR_DPL(ds_ar) < (ds & 0x03))
+                               return false;
+               if((es_ar & AR_TYPE_MASK) <= 0xb)
+                       if(AR_DPL(es_ar) < (es & 0x03))
+                               return false;
+               if((fs_ar & AR_TYPE_MASK) <= 0xb)
+                       if(AR_DPL(fs_ar) < (fs & 0x03))
+                               return false;
+               if((gs_ar & AR_TYPE_MASK) <= 0xb)
+                       if(AR_DPL(gs_ar) < (gs & 0x03))
+                               return false; 
+
+               /* P field check */
+               if(! ((cs_ar & AR_P_MASK) || (ds_ar & AR_P_MASK) || (ss_ar & 
AR_P_MASK) ||
+                     (es_ar & AR_P_MASK) || (fs_ar & AR_P_MASK) || (gs_ar & 
AR_P_MASK)) )
+                       return false;
+
+               /* Reserved fields check */
+               if( (cs_ar & AR_RESERVD_MASK) || (ds_ar & AR_RESERVD_MASK) || 
+                   (ss_ar & AR_RESERVD_MASK) || (es_ar & AR_RESERVD_MASK) ||
+                   (fs_ar & AR_RESERVD_MASK) || (gs_ar & AR_RESERVD_MASK) )
+                       return false;
+
+               /* TODO: 
+                * - Add checks on G and D/B fields
+                * - Add checks on the unusable mask
+                */
+       }
+
+       /* TR access rights bits checks */
+       if(tr_ar & AR_S_MASK)
+               return false;
+       if((tr_ar & AR_P_MASK) != 1)
+               return false;
+       if(tr_ar & AR_RESERVD_MASK)
+               return false;
+       if(tr_ar & AR_UNUSABLE_MASK)
+               return false;
+       if((tr_limit & 0x00000fff) != 0x00000fff) {
+               if(tr_ar & AR_G_MASK)
+                       return false;
+       }
+       else if(tr_limit & 0xffff0000) {
+               if(!(tr_ar & AR_G_MASK))
+                       return false;
+       }
+       
+       /* LDTR access right bits checks */
+       if(!(ldtr_ar & AR_TYPE_LDT))
+               return false;
+       if(ldtr_ar & AR_S_MASK)
+               return false;
+       if((ldtr_ar & AR_P_MASK) != 1)
+               return false;
+       if(ldtr_ar & AR_RESERVD_MASK)
+               return false;
+       if((ldtr_limit & 0x00000fff) != 0x00000fff) {
+               if(ldtr_ar & AR_G_MASK)
+                       return false;
+       }
+       else if(ldtr_limit & 0xffff0000) {
+               if(ldtr_ar & AR_G_MASK)
+                       return false;
+       }
+
+       /* TODO:
+        * - Add checks on RIP
+        * - Add checks on RFLAGS
+        */
+       
+       return true;
+}
+
 static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
 {
        struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1311,10 +1486,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
        update_exception_bitmap(vcpu);
 
-       fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
-       fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
-       fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
-       fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+       if(!emulate_invalid_guest_state) {
+               fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+               fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+               fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+               fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+       }
 
        vmcs_write16(GUEST_SS_SELECTOR, 0);
        vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -1322,6 +1499,13 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
        vmcs_write16(GUEST_CS_SELECTOR,
                     vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
        vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+       if(emulate_invalid_guest_state) {
+               while(!guest_state_valid(vcpu)) {
+                       if(!invalid_guest_state_handler(vcpu))
+                               break;
+               }
+       }
 }
 
 static gva_t rmode_tss_base(struct kvm *kvm)
@@ -1383,13 +1567,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
                vmcs_writel(GUEST_CS_BASE, 0xf0000);
        vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
 
-       fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
-       fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
-       fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
-       fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+       if(!emulate_invalid_guest_state) {
+               fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+               fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+               fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+               fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+       }
 
        kvm_mmu_reset_context(vcpu);
        init_rmode(vcpu->kvm);
+       
+       if(emulate_invalid_guest_state) {
+               while(!guest_state_valid(vcpu)) {
+                       if(!invalid_guest_state_handler(vcpu))
+                               break;
+               }
+       }
 }
 
 #ifdef CONFIG_X86_64
@@ -2708,6 +2901,29 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
        return 1;
 }
 
+static int invalid_guest_state_handler(struct kvm_vcpu *vcpu)
+{
+       u8 opcodes[4];
+       unsigned long rip = kvm_rip_read(vcpu);
+       unsigned long rip_linear;
+       int err;
+       
+       rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
+       emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+       err = emulate_instruction(vcpu, NULL, 0, 0, 0);
+               
+       switch (err) {
+               case EMULATE_DONE:
+                       return 1;
+               case EMULATE_DO_MMIO:
+                       printk(KERN_INFO "mmio?\n");
+                       return 0;
+               default:
+                       kvm_report_emulation_failure(vcpu, "vmentry failure");
+                       return 0;
+       }
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to