[Patch V2 1/2] x86,mce: Basic support to add LMCE support to QEMU
This patch adds basic enumeration, control msr's required to support Local Machine Check Exception Support (LMCE). - Added Local Machine Check definitions, changed MCG_CAP - Added support for IA32_FEATURE_CONTROL. - When delivering MCE to guest, we deliver to just a single CPU when guest OS has opted in to Local delivery. Signed-off-by: Ashok Raj <ashok@intel.com> Tested-by: Gong Chen <gong.c...@intel.com> --- Resending with proper commit message for second patch target-i386/cpu.c | 8 target-i386/cpu.h | 8 ++-- target-i386/kvm.c | 38 +++--- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 11e5e39..167669a 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu) } } +static void feature_control_init(X86CPU *cpu) +{ + CPUX86State *cenv = >env; + + cenv->msr_ia32_feature_control = ((1<<20) | (1<<0)); +} + #ifndef CONFIG_USER_ONLY static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) { @@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #endif mce_init(cpu); +feature_control_init(cpu); #ifndef CONFIG_USER_ONLY if (tcg_enabled()) { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 84edfd0..a567d7a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -282,8 +282,9 @@ #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ +#define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */ -#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P) +#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P) #define MCE_BANKS_DEF 10 #define MCG_CAP_BANKS_MASK 0xff @@ -291,6 +292,7 @@ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ +#define MCG_STATUS_LMCE (1ULL<<3) /* Local MCE signaled */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ @@ -333,6 +335,7 @@ #define MSR_MCG_CAP 0x179 #define MSR_MCG_STATUS 0x17a #define MSR_MCG_CTL 0x17b +#define MSR_MCG_EXT_CTL0x4d0 #define MSR_P6_EVNTSEL0 0x186 @@ -892,7 +895,6 @@ typedef struct CPUX86State { uint64_t mcg_status; uint64_t msr_ia32_misc_enable; -uint64_t msr_ia32_feature_control; uint64_t msr_fixed_ctr_ctrl; uint64_t msr_global_ctrl; @@ -977,8 +979,10 @@ typedef struct CPUX86State { int64_t tsc_khz; void *kvm_xsave_buf; +uint64_t msr_ia32_feature_control; uint64_t mcg_cap; uint64_t mcg_ctl; +uint64_t mcg_ext_ctl; uint64_t mce_banks[MCE_BANKS_DEF*4]; uint64_t tsc_aux; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 6dc9846..c61fe1f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -72,6 +72,7 @@ static bool has_msr_tsc_aux; static bool has_msr_tsc_adjust; static bool has_msr_tsc_deadline; static bool has_msr_feature_control; +static bool has_msr_ext_mcg_ctl; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; @@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; uint64_t mcg_status = MCG_STATUS_MCIP; +int flags = 0; +CPUState *cs = CPU(cpu); + +/* + * We need to read back the value of MSR_EXT_MCG_CTL that was set by the + * guest kernel back into Qemu + */ +cpu_synchronize_state(cs); + +flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; if (code == BUS_MCEERR_AR) { -status |= MCI_STATUS_AR | 0x134; -mcg_status |= MCG_STATUS_EIPV; + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_EIPV; + if (env->mcg_ext_ctl & 0x1) { + mcg_status |= MCG_STATUS_LMCE; + flags = 0; /* No Broadcast when LMCE is opted by guest */ + } } else { status |= 0xc0; mcg_status |= MCG_STATUS_RIPV; } cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, - cpu_x86_support_mca_broadcast(env) ? - MCE_INJECT_BROADCAST : 0); + (MCM_ADDR_PHYS << 6) | 0xc, flags); } static void hardware_memory_error(void) @@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs) c = cpuid_fi
[Patch V2 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest.
From: Gong ChenWhen we need to test error injection to a specific address using EINJ, there needs to be a way to translate GPA to HPA. This will allow host EINJ to inject error to test how guest behavior is when a bad address is consumed. This permits guest OS to perform its own recovery. Signed-off-by: Gong Chen --- Sorry about the spam :-(. Resending with proper Commit Message. Previous had a bogus From. Fixed that. before sending. hmp-commands.hx | 14 ++ include/exec/memory.h | 2 ++ kvm-all.c | 24 memory.c | 13 + monitor.c | 16 5 files changed, 69 insertions(+) mode change 100644 => 100755 include/exec/memory.h mode change 100644 => 100755 kvm-all.c mode change 100644 => 100755 memory.c mode change 100644 => 100755 monitor.c diff --git a/hmp-commands.hx b/hmp-commands.hx index bb52e4d..673c00e 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -444,6 +444,20 @@ Start gdbserver session (default @var{port}=1234) ETEXI { +.name = "x-gpa2hva", +.args_type= "fmt:/,addr:l", +.params = "/fmt addr", +.help = "translate guest physical 'addr' to host virtual address, only for debugging", +.mhandler.cmd = do_gpa2hva, +}, + +STEXI +@item x-gpa2hva @var{addr} +@findex x-gpa2hva +Translate guest physical @var{addr} to host virtual address, only for debugging. +ETEXI + +{ .name = "x", .args_type = "fmt:/,addr:l", .params = "/fmt addr", diff --git a/include/exec/memory.h b/include/exec/memory.h old mode 100644 new mode 100755 index 0f07159..57d7bf8 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -222,6 +222,7 @@ struct MemoryListener { hwaddr addr, hwaddr len); void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); +int (*translate_gpa2hva)(MemoryListener *listener, uint64_t paddr, uint64_t *vaddr); /* Lower = earlier (during add), later (during del) */ unsigned priority; AddressSpace *address_space_filter; @@ -1123,6 +1124,7 @@ void memory_global_dirty_log_start(void); void memory_global_dirty_log_stop(void); void mtree_info(fprintf_function mon_printf, void *f); +int memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr); /** * memory_region_dispatch_read: perform a read directly to the specified diff --git a/kvm-all.c b/kvm-all.c old mode 100644 new mode 100755 index c648b81..cb029be --- a/kvm-all.c +++ b/kvm-all.c @@ -197,6 +197,29 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml, return found; } + +static int kvm_translate_gpa2hva(MemoryListener *listener, uint64_t paddr, uint64_t *vaddr) +{ +KVMState *s = kvm_state; +KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); +KVMSlot *mem = NULL; +int i; + +for (i = 0; i < s->nr_slots; i++) { +mem = >slots[i]; +if (paddr >= mem->start_addr && paddr < mem->start_addr + mem->memory_size) { +*vaddr = (uint64_t)mem->ram + paddr - mem->start_addr; +break; + } +} + +if (i == s->nr_slots) { +fprintf(stderr, "fail to find target physical addr(%ld) in KVM memory range\n", paddr); + return 1; +} +return 0; +} + int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, hwaddr *phys_addr) { @@ -902,6 +925,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, kml->listener.log_start = kvm_log_start; kml->listener.log_stop = kvm_log_stop; kml->listener.log_sync = kvm_log_sync; +kml->listener.translate_gpa2hva = kvm_translate_gpa2hva; kml->listener.priority = 10; memory_listener_register(>listener, as); diff --git a/memory.c b/memory.c old mode 100644 new mode 100755 index e193658..979dcf8 --- a/memory.c +++ b/memory.c @@ -2294,6 +2294,19 @@ static const TypeInfo memory_region_info = { .instance_finalize = memory_region_finalize, }; +int memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr){ +MemoryListener *ml = NULL; +int ret = 1; + +QTAILQ_FOREACH(ml, _listeners, link) { +if(ml->translate_gpa2hva) +ret = ml->translate_gpa2hva(ml, paddr, vaddr); + if(0 == ret) + break; +} +return ret; +} + static void memory_register_types(void) { type_register_static(_region_info); diff --git a/monitor.c b/monitor.c old mode 100644 new mode 100755 index 9a35d72..408e1fa --- a/monitor.c +++ b/monitor.c @@ -76,6 +76,7 @@ #include "qapi-event.h" #include "qmp-introspect.h" #include "sysemu/block-backend.h" +#include "exec/memory.h" /* for hmp_info_irq/pic */ #if defined(TARGET_SPARC) @@ -1681,6 +1682,21 @@ static
[Patch V0] This patch adds some support required for KVM in order to support LMCE.
- Add support for MSR_IA32_MCG_EXT_CTL - Add MCG_LMCE_P to KVM_MCE_CAP_SUPPORTED - Changes to IA32_FEATURE_CONTROL, allow this MSR to be defined just not for nested VMM, but now its required for Local MCE. Reviewed-by: Andi Kleen <andi.kl...@intel.com> Reviewed-by: Tony Luck <tony.l...@intel.com> Tested-by: Gong Chen <gong.c...@intel.com> Signed-off-by: Ashok Raj <ashok@intel.com> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx.c | 26 +- arch/x86/kvm/x86.c | 17 - 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30cfd64..6940141 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -525,6 +525,7 @@ struct kvm_vcpu_arch { u64 mcg_cap; u64 mcg_status; u64 mcg_ctl; + u64 mcg_ext_ctl; u64 *mce_banks; /* Cache MMIO info */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 87acc52..c2ce9f4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2747,6 +2747,20 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return 0; } +bool can_feature_control_exist(struct kvm_vcpu *vcpu) +{ + /* +* There are some features that require BIOS enabling. +* In such cases BIOS is supposed to set this bit and indicate +* the feature is enabled and available to the OS. +* Local Machine Check Exception (LMCE) is one such feature. +*/ + if (vcpu->arch.mcg_cap & MCG_LMCE_P) + return true; + + return (nested_vmx_allowed(vcpu)); +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -2789,9 +2803,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; case MSR_IA32_FEATURE_CONTROL: - if (!nested_vmx_allowed(vcpu)) + if (can_feature_control_exist(vcpu)) + msr_info->data = + to_vmx(vcpu)->nested.msr_ia32_feature_control; + else return 1; - msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) @@ -2882,9 +2898,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); break; case MSR_IA32_FEATURE_CONTROL: - if (!nested_vmx_allowed(vcpu) || - (to_vmx(vcpu)->nested.msr_ia32_feature_control & -FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + if ((can_feature_control_exist(vcpu) == false) || + ((to_vmx(vcpu)->nested.msr_ia32_feature_control & +FEATURE_CONTROL_LOCKED) && !msr_info->host_initiated)) return 1; vmx->nested.msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00462bd..0da3871 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -70,7 +70,7 @@ #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 -#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) +#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P | MCG_LMCE_P) #define emul_to_vcpu(ctxt) \ container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) @@ -974,6 +974,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, + MSR_IA32_MCG_EXT_CTL, MSR_IA32_SMBASE, }; @@ -1913,6 +1914,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) return -1; vcpu->arch.mcg_ctl = data; break; + case MSR_IA32_MCG_EXT_CTL: + if (!(mcg_cap & MCG_LMCE_P)) + return 1; + if (data != 0 && data != 0x1) + return -1; + vcpu->arch.mcg_ext_ctl = data; + break; default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { @@ -2170,6 +2178,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: + case MSR_IA32_MCG_EXT_CTL: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: return set_msr_mce(vcpu, msr, data); @@ -2266,6 +2275,11 @@ static int get_msr_mce(struct kvm_vcpu