Up to now we've been assuming that all CPUs would have the same number of reporting banks. However, on upcoming AMD CPUs this isn't the case, and one can observe
(XEN) mce.c:666: Different bank number on cpu <N> indicating that Machine Check support would not be enabled on the affected CPUs. Convert the count variable to a per-CPU one, and adjust code where needed to cope with the values not being the same. In particular the mcabanks_alloc() invocations during AP bringup need to now allocate maximum-size bitmaps, because the truly needed size can't be known until we actually execute on that CPU, yet mcheck_init() gets called too early to do any allocations itself. Take the liberty and also - make mca_cap_init() static, - replace several __get_cpu_var() uses when a local variable suitable for use with per_cpu() appears, - correct which CPU's cpu_data[] entry x86_mc_msrinject_verify() uses, - replace a BUG() by panic(). Signed-off-by: Jan Beulich <jbeul...@suse.com> --- a/xen/arch/x86/cpu/mcheck/mce.c +++ b/xen/arch/x86/cpu/mcheck/mce.c @@ -34,7 +34,7 @@ bool __read_mostly opt_mce = true; boolean_param("mce", opt_mce); bool __read_mostly mce_broadcast; bool is_mc_panic; -unsigned int __read_mostly nr_mce_banks; +DEFINE_PER_CPU_READ_MOSTLY(unsigned int, nr_mce_banks); unsigned int __read_mostly firstbank; uint8_t __read_mostly cmci_apic_vector; @@ -120,7 +120,7 @@ void mce_recoverable_register(mce_recove mc_recoverable_scan = cbfunc; } -struct mca_banks *mcabanks_alloc(void) +struct mca_banks *mcabanks_alloc(unsigned int nr_mce_banks) { struct mca_banks *mb; @@ -128,6 +128,13 @@ struct mca_banks *mcabanks_alloc(void) if ( !mb ) return NULL; + /* + * For APs allocations get done by the BSP, i.e. when the bank count may + * may not be known yet. A zero bank count is a clear indication of this. + */ + if ( !nr_mce_banks ) + nr_mce_banks = MCG_CAP_COUNT; + mb->bank_map = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_mce_banks)); if ( !mb->bank_map ) @@ -319,7 +326,7 @@ mcheck_mca_logout(enum mca_source who, s */ recover = mc_recoverable_scan ? 1 : 0; - for ( i = 0; i < nr_mce_banks; i++ ) + for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) { /* Skip bank if corresponding bit in bankmask is clear */ if ( !mcabanks_test(i, bankmask) ) @@ -565,7 +572,7 @@ void mcheck_mca_clearbanks(struct mca_ba { int i; - for ( i = 0; i < nr_mce_banks; i++ ) + for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) { if ( !mcabanks_test(i, bankmask) ) continue; @@ -638,54 +645,56 @@ static void set_poll_bankmask(struct cpu if ( cmci_support && opt_mce ) { - mb->num = per_cpu(no_cmci_banks, cpu)->num; - bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map, - nr_mce_banks); + const struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu); + + if ( unlikely(cmci->num < mb->num) ) + bitmap_fill(mb->bank_map, mb->num); + bitmap_copy(mb->bank_map, cmci->bank_map, min(mb->num, cmci->num)); } else { - bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks); + bitmap_copy(mb->bank_map, mca_allbanks->bank_map, + per_cpu(nr_mce_banks, cpu)); if ( mce_firstbank(c) ) mcabanks_clear(0, mb); } } /* The perbank ctl/status init is platform specific because of AMD's quirk */ -int mca_cap_init(void) +static int mca_cap_init(void) { uint64_t msr_content; + unsigned int nr, cpu = smp_processor_id(); rdmsrl(MSR_IA32_MCG_CAP, msr_content); if ( msr_content & MCG_CTL_P ) /* Control register present ? */ wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL); - if ( nr_mce_banks && (msr_content & MCG_CAP_COUNT) != nr_mce_banks ) - { - dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n", - smp_processor_id()); - return -ENODEV; - } - nr_mce_banks = msr_content & MCG_CAP_COUNT; + per_cpu(nr_mce_banks, cpu) = nr = MASK_EXTR(msr_content, MCG_CAP_COUNT); - if ( !nr_mce_banks ) + if ( !nr ) { - printk(XENLOG_INFO "CPU%u: No MCE banks present. " - "Machine check support disabled\n", smp_processor_id()); + printk(XENLOG_INFO + "CPU%u: No MCE banks present. Machine check support disabled\n", + cpu); return -ENODEV; } /* mcabanks_alloc depends on nr_mce_banks */ - if ( !mca_allbanks ) + if ( !mca_allbanks || nr > mca_allbanks->num ) { - int i; + unsigned int i; + struct mca_banks *all = mcabanks_alloc(nr); - mca_allbanks = mcabanks_alloc(); - for ( i = 0; i < nr_mce_banks; i++ ) + if ( !all ) + return -ENOMEM; + for ( i = 0; i < nr; i++ ) mcabanks_set(i, mca_allbanks); + mcabanks_free(xchg(&mca_allbanks, all)); } - return mca_allbanks ? 0 : -ENOMEM; + return 0; } static void cpu_bank_free(unsigned int cpu) @@ -702,8 +711,9 @@ static void cpu_bank_free(unsigned int c static int cpu_bank_alloc(unsigned int cpu) { - struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc(); - struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc(); + unsigned int nr = per_cpu(nr_mce_banks, cpu); + struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc(nr); + struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc(nr); if ( !poll || !clr ) { @@ -752,6 +762,7 @@ static struct notifier_block cpu_nfb = { void mcheck_init(struct cpuinfo_x86 *c, bool bsp) { enum mcheck_type inited = mcheck_none; + unsigned int cpu = smp_processor_id(); if ( !opt_mce ) { @@ -762,8 +773,7 @@ void mcheck_init(struct cpuinfo_x86 *c, if ( !mce_available(c) ) { - printk(XENLOG_INFO "CPU%i: No machine check support available\n", - smp_processor_id()); + printk(XENLOG_INFO "CPU%i: No machine check support available\n", cpu); return; } @@ -771,9 +781,13 @@ void mcheck_init(struct cpuinfo_x86 *c, if ( mca_cap_init() ) return; - /* Early MCE initialisation for BSP. */ - if ( bsp && cpu_bank_alloc(smp_processor_id()) ) - BUG(); + if ( !bsp ) + { + per_cpu(poll_bankmask, cpu)->num = per_cpu(nr_mce_banks, cpu); + per_cpu(mce_clear_banks, cpu)->num = per_cpu(nr_mce_banks, cpu); + } + else if ( cpu_bank_alloc(cpu) ) + panic("Insufficient memory for MCE bank allocations\n"); switch ( c->x86_vendor ) { @@ -1111,24 +1125,22 @@ bool intpose_inval(unsigned int cpu_nr, return true; } -#define IS_MCA_BANKREG(r) \ +#define IS_MCA_BANKREG(r, cpu) \ ((r) >= MSR_IA32_MC0_CTL && \ - (r) <= MSR_IA32_MCx_MISC(nr_mce_banks - 1) && \ - ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */ + (r) <= MSR_IA32_MCx_MISC(per_cpu(nr_mce_banks, cpu) - 1) && \ + ((r) - MSR_IA32_MC0_CTL) % 4) /* excludes MCi_CTL */ static bool x86_mc_msrinject_verify(struct xen_mc_msrinject *mci) { - struct cpuinfo_x86 *c; + const struct cpuinfo_x86 *c = &cpu_data[mci->mcinj_cpunr]; int i, errs = 0; - c = &cpu_data[smp_processor_id()]; - for ( i = 0; i < mci->mcinj_count; i++ ) { uint64_t reg = mci->mcinj_msr[i].reg; const char *reason = NULL; - if ( IS_MCA_BANKREG(reg) ) + if ( IS_MCA_BANKREG(reg, mci->mcinj_cpunr) ) { if ( c->x86_vendor == X86_VENDOR_AMD ) { @@ -1448,7 +1460,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m break; case XEN_MC_msrinject: - if ( nr_mce_banks == 0 ) + if ( !mca_allbanks || !mca_allbanks->num ) return x86_mcerr("do_mca inject", -ENODEV); mc_msrinject = &op->u.mc_msrinject; @@ -1461,6 +1473,9 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m return x86_mcerr("do_mca inject: target offline", -EINVAL); + if ( !per_cpu(nr_mce_banks, target) ) + return x86_mcerr("do_mca inject: no banks", -ENOENT); + if ( mc_msrinject->mcinj_count == 0 ) return 0; @@ -1521,7 +1536,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m break; case XEN_MC_mceinject: - if ( nr_mce_banks == 0 ) + if ( !mca_allbanks || !mca_allbanks->num ) return x86_mcerr("do_mca #MC", -ENODEV); mc_mceinject = &op->u.mc_mceinject; @@ -1533,6 +1548,9 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m if ( !cpu_online(target) ) return x86_mcerr("do_mca #MC: target offline", -EINVAL); + if ( !per_cpu(nr_mce_banks, target) ) + return x86_mcerr("do_mca #MC: no banks", -ENOENT); + add_taint(TAINT_ERROR_INJECT); if ( mce_broadcast ) @@ -1548,7 +1566,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m cpumask_var_t cmv; bool broadcast = op->u.mc_inject_v2.flags & XEN_MC_INJECT_CPU_BROADCAST; - if ( nr_mce_banks == 0 ) + if ( !mca_allbanks || !mca_allbanks->num ) return x86_mcerr("do_mca #MC", -ENODEV); if ( broadcast ) @@ -1570,6 +1588,16 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m "Not all required CPUs are online\n"); } + for_each_cpu(target, cpumap) + if ( cpu_online(target) && !per_cpu(nr_mce_banks, target) ) + { + ret = x86_mcerr("do_mca #MC: CPU%u has no banks", + -ENOENT, target); + break; + } + if ( ret ) + break; + switch ( op->u.mc_inject_v2.flags & XEN_MC_INJECT_TYPE_MASK ) { case XEN_MC_INJECT_TYPE_MCE: --- a/xen/arch/x86/cpu/mcheck/mce_amd.c +++ b/xen/arch/x86/cpu/mcheck/mce_amd.c @@ -297,7 +297,7 @@ amd_mcheck_init(struct cpuinfo_x86 *ci) x86_mce_vector_register(mcheck_cmn_handler); mce_need_clearbank_register(amd_need_clearbank_scan); - for ( i = 0; i < nr_mce_banks; i++ ) + for ( i = 0; i < this_cpu(nr_mce_banks); i++ ) { if ( quirkflag == MCEQUIRK_K8_GART && i == 4 ) mcequirk_amd_apply(quirkflag); --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -535,16 +535,16 @@ out: static void cmci_discover(void) { unsigned long flags; - int i; + unsigned int i, cpu = smp_processor_id(); mctelem_cookie_t mctc; struct mca_summary bs; - mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id()); + mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%u\n", cpu); spin_lock_irqsave(&cmci_discover_lock, flags); - for ( i = 0; i < nr_mce_banks; i++ ) - if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) ) + for ( i = 0; i < per_cpu(nr_mce_banks, cpu); i++ ) + if ( !mcabanks_test(i, per_cpu(mce_banks_owned, cpu)) ) do_cmci_discover(i); spin_unlock_irqrestore(&cmci_discover_lock, flags); @@ -557,7 +557,7 @@ static void cmci_discover(void) */ mctc = mcheck_mca_logout( - MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL); + MCA_CMCI_HANDLER, per_cpu(mce_banks_owned, cpu), &bs, NULL); if ( bs.errcnt && mctc != NULL ) { @@ -576,9 +576,9 @@ static void cmci_discover(void) mctelem_dismiss(mctc); mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", - smp_processor_id(), - *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map), - *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map)); + cpu, + per_cpu(mce_banks_owned, cpu)->bank_map[0], + per_cpu(no_cmci_banks, cpu)->bank_map[0]); } /* @@ -613,24 +613,24 @@ static void cpu_mcheck_distribute_cmci(v static void clear_cmci(void) { - int i; + unsigned int i, cpu = smp_processor_id(); if ( !cmci_support || !opt_mce ) return; - mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n", - smp_processor_id()); + mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%u\n", cpu); - for ( i = 0; i < nr_mce_banks; i++ ) + for ( i = 0; i < per_cpu(nr_mce_banks, cpu); i++ ) { unsigned msr = MSR_IA32_MCx_CTL2(i); u64 val; - if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) ) + + if ( !mcabanks_test(i, per_cpu(mce_banks_owned, cpu)) ) continue; rdmsrl(msr, val); if ( val & (CMCI_EN|CMCI_THRESHOLD_MASK) ) wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK)); - mcabanks_clear(i, __get_cpu_var(mce_banks_owned)); + mcabanks_clear(i, per_cpu(mce_banks_owned, cpu)); } } @@ -826,7 +826,7 @@ static void intel_init_mce(void) intel_mce_post_reset(); /* clear all banks */ - for ( i = firstbank; i < nr_mce_banks; i++ ) + for ( i = firstbank; i < this_cpu(nr_mce_banks); i++ ) { /* * Some banks are shared across cores, use MCi_CTRL to judge whether @@ -866,8 +866,9 @@ static void cpu_mcabank_free(unsigned in static int cpu_mcabank_alloc(unsigned int cpu) { - struct mca_banks *cmci = mcabanks_alloc(); - struct mca_banks *owned = mcabanks_alloc(); + unsigned int nr = per_cpu(nr_mce_banks, cpu); + struct mca_banks *cmci = mcabanks_alloc(nr); + struct mca_banks *owned = mcabanks_alloc(nr); if ( !cmci || !owned ) goto out; @@ -924,6 +925,13 @@ enum mcheck_type intel_mcheck_init(struc register_cpu_notifier(&cpu_nfb); mcheck_intel_therm_init(); } + else + { + unsigned int cpu = smp_processor_id(); + + per_cpu(no_cmci_banks, cpu)->num = per_cpu(nr_mce_banks, cpu); + per_cpu(mce_banks_owned, cpu)->num = per_cpu(nr_mce_banks, cpu); + } intel_init_mca(c); --- a/xen/arch/x86/cpu/mcheck/x86_mca.h +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h @@ -125,7 +125,7 @@ static inline int mcabanks_test(int bit, return test_bit(bit, banks->bank_map); } -struct mca_banks *mcabanks_alloc(void); +struct mca_banks *mcabanks_alloc(unsigned int nr); void mcabanks_free(struct mca_banks *banks); extern struct mca_banks *mca_allbanks; --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -2374,7 +2374,7 @@ static int svm_is_erratum_383(struct cpu return 0; /* Clear MCi_STATUS registers */ - for (i = 0; i < nr_mce_banks; i++) + for (i = 0; i < this_cpu(nr_mce_banks); i++) wrmsrl(MSR_IA32_MCx_STATUS(i), 0ULL); rdmsrl(MSR_IA32_MCG_STATUS, msr_content); --- a/xen/include/asm-x86/mce.h +++ b/xen/include/asm-x86/mce.h @@ -40,6 +40,6 @@ extern int vmce_rdmsr(uint32_t msr, uint extern bool vmce_has_lmce(const struct vcpu *v); extern int vmce_enable_mca_cap(struct domain *d, uint64_t cap); -extern unsigned int nr_mce_banks; +DECLARE_PER_CPU(unsigned int, nr_mce_banks); #endif _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel