On Fri, Oct 30, 2020 at 12:04:03PM -0700, Luck, Tony wrote:

Bah, didn't notice this conversation didn't include LKML.

> The Xeon versions of Sandy Bridge, Ivy Bridge and Haswell support an
> optional additional error logging mode which is enabled by an MSR.
> 
> Previously this mode was enabled from the mcelog(8) tool via /dev/cpu,
> but the kernel is now very picky about which MSRs may be written. So
> move the enabling into the kernel.
> 
> Suggested-by: Boris Petkov <b...@alien8.de>
> Signed-off-by: Tony Luck <tony.l...@intel.com>
> ---
> 
> N.B. I don't have any of these old systems in my lab any more. So
> this is untested :-(
> 
>  arch/x86/include/asm/msr-index.h |  1 +
>  arch/x86/kernel/cpu/mce/intel.c  | 20 ++++++++++++++++++++
>  2 files changed, 21 insertions(+)
> 
> diff --git a/arch/x86/include/asm/msr-index.h 
> b/arch/x86/include/asm/msr-index.h
> index 972a34d93505..b2dd2648c0e2 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -139,6 +139,7 @@
>  #define MSR_IA32_MCG_CAP             0x00000179
>  #define MSR_IA32_MCG_STATUS          0x0000017a
>  #define MSR_IA32_MCG_CTL             0x0000017b
> +#define MSR_ERROR_CONTROL            0x0000017f
>  #define MSR_IA32_MCG_EXT_CTL         0x000004d0
>  
>  #define MSR_OFFCORE_RSP_0            0x000001a6
> diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
> index abe9fe0fb851..b47883e364b4 100644
> --- a/arch/x86/kernel/cpu/mce/intel.c
> +++ b/arch/x86/kernel/cpu/mce/intel.c
> @@ -509,12 +509,32 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
>       }
>  }
>  
> +/*
> + * Enable additional error logs from the integrated
> + * memory controller on processors that support this.
> + */
> +static void intel_imc_init(struct cpuinfo_x86 *c)
> +{
> +     u64 error_control;
> +
> +     switch (c->x86_model) {
> +     case INTEL_FAM6_SANDYBRIDGE_X:
> +     case INTEL_FAM6_IVYBRIDGE_X:
> +     case INTEL_FAM6_HASWELL_X:
> +             rdmsrl(MSR_ERROR_CONTROL, error_control);
> +             error_control |= 2;
> +             wrmsrl(MSR_ERROR_CONTROL, error_control);
> +             break;
> +     }
> +}
> +
>  void mce_intel_feature_init(struct cpuinfo_x86 *c)
>  {
>       intel_init_thermal(c);
>       intel_init_cmci();
>       intel_init_lmce();
>       intel_ppin_init(c);
> +     intel_imc_init(c);
>  }
>  
>  void mce_intel_feature_clear(struct cpuinfo_x86 *c)
> -- 
> 2.21.1
> 

Reply via email to