[Patch V2 1/2] x86,mce: Basic support to add LMCE support to QEMU

2015-12-10 Thread Ashok Raj
This patch adds basic enumeration, control msr's required to support
Local Machine Check Exception Support (LMCE).

- Added Local Machine Check definitions, changed MCG_CAP
- Added support for IA32_FEATURE_CONTROL.
- When delivering MCE to guest, we deliver to just a single CPU
  when guest OS has opted in to Local delivery.

Signed-off-by: Ashok Raj <ashok@intel.com>
Tested-by: Gong Chen <gong.c...@intel.com>
---
Resending with proper commit message for second patch

 target-i386/cpu.c |  8 
 target-i386/cpu.h |  8 ++--
 target-i386/kvm.c | 38 +++---
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 11e5e39..167669a 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu)
 }
 }
 
+static void feature_control_init(X86CPU *cpu)
+{
+   CPUX86State *cenv = >env;
+
+   cenv->msr_ia32_feature_control = ((1<<20) | (1<<0));
+}
+
 #ifndef CONFIG_USER_ONLY
 static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
 {
@@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error 
**errp)
 #endif
 
 mce_init(cpu);
+feature_control_init(cpu);
 
 #ifndef CONFIG_USER_ONLY
 if (tcg_enabled()) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 84edfd0..a567d7a 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -282,8 +282,9 @@
 
 #define MCG_CTL_P   (1ULL<<8)   /* MCG_CAP register available */
 #define MCG_SER_P   (1ULL<<24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */
 
-#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
+#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P)
 #define MCE_BANKS_DEF   10
 
 #define MCG_CAP_BANKS_MASK 0xff
@@ -291,6 +292,7 @@
 #define MCG_STATUS_RIPV (1ULL<<0)   /* restart ip valid */
 #define MCG_STATUS_EIPV (1ULL<<1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP (1ULL<<2)   /* machine check in progress */
+#define MCG_STATUS_LMCE (1ULL<<3)   /* Local MCE signaled */
 
 #define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
 #define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
@@ -333,6 +335,7 @@
 #define MSR_MCG_CAP 0x179
 #define MSR_MCG_STATUS  0x17a
 #define MSR_MCG_CTL 0x17b
+#define MSR_MCG_EXT_CTL0x4d0
 
 #define MSR_P6_EVNTSEL0 0x186
 
@@ -892,7 +895,6 @@ typedef struct CPUX86State {
 
 uint64_t mcg_status;
 uint64_t msr_ia32_misc_enable;
-uint64_t msr_ia32_feature_control;
 
 uint64_t msr_fixed_ctr_ctrl;
 uint64_t msr_global_ctrl;
@@ -977,8 +979,10 @@ typedef struct CPUX86State {
 int64_t tsc_khz;
 void *kvm_xsave_buf;
 
+uint64_t msr_ia32_feature_control;
 uint64_t mcg_cap;
 uint64_t mcg_ctl;
+uint64_t mcg_ext_ctl;
 uint64_t mce_banks[MCE_BANKS_DEF*4];
 
 uint64_t tsc_aux;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6dc9846..c61fe1f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -72,6 +72,7 @@ static bool has_msr_tsc_aux;
 static bool has_msr_tsc_adjust;
 static bool has_msr_tsc_deadline;
 static bool has_msr_feature_control;
+static bool has_msr_ext_mcg_ctl;
 static bool has_msr_async_pf_en;
 static bool has_msr_pv_eoi_en;
 static bool has_msr_misc_enable;
@@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int 
code)
 uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
   MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
 uint64_t mcg_status = MCG_STATUS_MCIP;
+int flags = 0;
+CPUState *cs = CPU(cpu);
+
+/*
+ * We need to read back the value of MSR_EXT_MCG_CTL that was set by the
+ * guest kernel back into Qemu
+ */
+cpu_synchronize_state(cs);
+
+flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
 
 if (code == BUS_MCEERR_AR) {
-status |= MCI_STATUS_AR | 0x134;
-mcg_status |= MCG_STATUS_EIPV;
+   status |= MCI_STATUS_AR | 0x134;
+   mcg_status |= MCG_STATUS_EIPV;
+   if (env->mcg_ext_ctl & 0x1) {
+   mcg_status |= MCG_STATUS_LMCE;
+   flags = 0; /* No Broadcast when LMCE is opted by guest */
+   }
 } else {
 status |= 0xc0;
 mcg_status |= MCG_STATUS_RIPV;
 }
 cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
-   (MCM_ADDR_PHYS << 6) | 0xc,
-   cpu_x86_support_mca_broadcast(env) ?
-   MCE_INJECT_BROADCAST : 0);
+  (MCM_ADDR_PHYS << 6) | 0xc, flags);
 }
 
 static void hardware_memory_error(void)
@@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
 c = cpuid_fi

[Patch V2 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest.

2015-12-10 Thread Ashok Raj
From: Gong Chen 

When we need to test error injection to a specific address using EINJ,
there needs to be a way to translate GPA to HPA. This will allow host EINJ
to inject error to test how guest behavior is when a bad address is consumed.
This permits guest OS to perform its own recovery.

Signed-off-by: Gong Chen 
---
Sorry about the spam :-(.
Resending with proper Commit Message. Previous had a bogus From. Fixed that.
before sending.

 hmp-commands.hx   | 14 ++
 include/exec/memory.h |  2 ++
 kvm-all.c | 24 
 memory.c  | 13 +
 monitor.c | 16 
 5 files changed, 69 insertions(+)
 mode change 100644 => 100755 include/exec/memory.h
 mode change 100644 => 100755 kvm-all.c
 mode change 100644 => 100755 memory.c
 mode change 100644 => 100755 monitor.c

diff --git a/hmp-commands.hx b/hmp-commands.hx
index bb52e4d..673c00e 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -444,6 +444,20 @@ Start gdbserver session (default @var{port}=1234)
 ETEXI
 
 {
+.name = "x-gpa2hva",
+.args_type= "fmt:/,addr:l",
+.params   = "/fmt addr",
+.help = "translate guest physical 'addr' to host virtual 
address, only for debugging",
+.mhandler.cmd = do_gpa2hva,
+},
+
+STEXI
+@item x-gpa2hva @var{addr}
+@findex x-gpa2hva
+Translate guest physical @var{addr} to host virtual address, only for 
debugging.
+ETEXI
+
+{
 .name   = "x",
 .args_type  = "fmt:/,addr:l",
 .params = "/fmt addr",
diff --git a/include/exec/memory.h b/include/exec/memory.h
old mode 100644
new mode 100755
index 0f07159..57d7bf8
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -222,6 +222,7 @@ struct MemoryListener {
hwaddr addr, hwaddr len);
 void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection 
*section,
hwaddr addr, hwaddr len);
+int  (*translate_gpa2hva)(MemoryListener *listener, uint64_t paddr, 
uint64_t *vaddr);
 /* Lower = earlier (during add), later (during del) */
 unsigned priority;
 AddressSpace *address_space_filter;
@@ -1123,6 +1124,7 @@ void memory_global_dirty_log_start(void);
 void memory_global_dirty_log_stop(void);
 
 void mtree_info(fprintf_function mon_printf, void *f);
+int  memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr);
 
 /**
  * memory_region_dispatch_read: perform a read directly to the specified
diff --git a/kvm-all.c b/kvm-all.c
old mode 100644
new mode 100755
index c648b81..cb029be
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -197,6 +197,29 @@ static KVMSlot 
*kvm_lookup_overlapping_slot(KVMMemoryListener *kml,
 return found;
 }
 
+
+static int kvm_translate_gpa2hva(MemoryListener *listener, uint64_t paddr, 
uint64_t *vaddr)
+{
+KVMState *s = kvm_state;
+KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, 
listener);
+KVMSlot *mem = NULL;
+int i;
+
+for (i = 0; i < s->nr_slots; i++) {
+mem = >slots[i];
+if (paddr >= mem->start_addr && paddr < mem->start_addr + 
mem->memory_size) {
+*vaddr = (uint64_t)mem->ram + paddr - mem->start_addr;
+break;
+   }
+}
+
+if (i == s->nr_slots) {
+fprintf(stderr, "fail to find target physical addr(%ld) in KVM memory 
range\n", paddr);
+   return 1;
+}
+return 0;
+}
+
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
hwaddr *phys_addr)
 {
@@ -902,6 +925,7 @@ void kvm_memory_listener_register(KVMState *s, 
KVMMemoryListener *kml,
 kml->listener.log_start = kvm_log_start;
 kml->listener.log_stop = kvm_log_stop;
 kml->listener.log_sync = kvm_log_sync;
+kml->listener.translate_gpa2hva = kvm_translate_gpa2hva;
 kml->listener.priority = 10;
 
 memory_listener_register(>listener, as);
diff --git a/memory.c b/memory.c
old mode 100644
new mode 100755
index e193658..979dcf8
--- a/memory.c
+++ b/memory.c
@@ -2294,6 +2294,19 @@ static const TypeInfo memory_region_info = {
 .instance_finalize  = memory_region_finalize,
 };
 
+int memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr){
+MemoryListener *ml = NULL;
+int ret = 1;
+
+QTAILQ_FOREACH(ml, _listeners, link) {
+if(ml->translate_gpa2hva)
+ret = ml->translate_gpa2hva(ml, paddr, vaddr);
+   if(0 == ret)
+   break;
+}
+return ret;
+}
+
 static void memory_register_types(void)
 {
 type_register_static(_region_info);
diff --git a/monitor.c b/monitor.c
old mode 100644
new mode 100755
index 9a35d72..408e1fa
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,7 @@
 #include "qapi-event.h"
 #include "qmp-introspect.h"
 #include "sysemu/block-backend.h"
+#include "exec/memory.h"
 
 /* for hmp_info_irq/pic */
 #if defined(TARGET_SPARC)
@@ -1681,6 +1682,21 @@ static 

[Patch V0] This patch adds some support required for KVM in order to support LMCE.

2015-12-09 Thread Ashok Raj
- Add support for MSR_IA32_MCG_EXT_CTL
- Add MCG_LMCE_P to KVM_MCE_CAP_SUPPORTED
- Changes to IA32_FEATURE_CONTROL, allow this MSR to be defined just not for
  nested VMM, but now its required for Local MCE.

Reviewed-by: Andi Kleen <andi.kl...@intel.com>
Reviewed-by: Tony Luck <tony.l...@intel.com>
Tested-by: Gong Chen <gong.c...@intel.com>
Signed-off-by: Ashok Raj <ashok@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c  | 26 +-
 arch/x86/kvm/x86.c  | 17 -
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 30cfd64..6940141 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -525,6 +525,7 @@ struct kvm_vcpu_arch {
u64 mcg_cap;
u64 mcg_status;
u64 mcg_ctl;
+   u64 mcg_ext_ctl;
u64 *mce_banks;
 
/* Cache MMIO info */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 87acc52..c2ce9f4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2747,6 +2747,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 *pdata)
return 0;
 }
 
+bool can_feature_control_exist(struct kvm_vcpu *vcpu)
+{
+   /*
+* There are some features that require BIOS enabling.
+* In such cases BIOS is supposed to set this bit and indicate
+* the feature is enabled and available to the OS.
+* Local Machine Check Exception (LMCE) is one such feature.
+*/
+   if (vcpu->arch.mcg_cap & MCG_LMCE_P)
+   return true;
+
+   return (nested_vmx_allowed(vcpu));
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -2789,9 +2803,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
case MSR_IA32_FEATURE_CONTROL:
-   if (!nested_vmx_allowed(vcpu))
+   if (can_feature_control_exist(vcpu))
+   msr_info->data =
+   to_vmx(vcpu)->nested.msr_ia32_feature_control;
+   else
return 1;
-   msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -2882,9 +2898,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
ret = kvm_set_msr_common(vcpu, msr_info);
break;
case MSR_IA32_FEATURE_CONTROL:
-   if (!nested_vmx_allowed(vcpu) ||
-   (to_vmx(vcpu)->nested.msr_ia32_feature_control &
-FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
+   if ((can_feature_control_exist(vcpu) == false) ||
+   ((to_vmx(vcpu)->nested.msr_ia32_feature_control &
+FEATURE_CONTROL_LOCKED) && !msr_info->host_initiated))
return 1;
vmx->nested.msr_ia32_feature_control = data;
if (msr_info->host_initiated && data == 0)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00462bd..0da3871 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -70,7 +70,7 @@
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P | MCG_LMCE_P)
 
 #define emul_to_vcpu(ctxt) \
container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
@@ -974,6 +974,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL,
+   MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
 };
 
@@ -1913,6 +1914,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, 
u64 data)
return -1;
vcpu->arch.mcg_ctl = data;
break;
+   case MSR_IA32_MCG_EXT_CTL:
+   if (!(mcg_cap & MCG_LMCE_P))
+   return 1;
+   if (data != 0 && data != 0x1)
+   return -1;
+   vcpu->arch.mcg_ext_ctl = data;
+   break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
msr < MSR_IA32_MCx_CTL(bank_num)) {
@@ -2170,6 +2178,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
+   case MSR_IA32_MCG_EXT_CTL:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return set_msr_mce(vcpu, msr, data);
 
@@ -2266,6 +2275,11 @@ static int get_msr_mce(struct kvm_vcpu