Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package mcelog for openSUSE:Factory checked in at 2021-01-29 14:56:24 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/mcelog (Old) and /work/SRC/openSUSE:Factory/.mcelog.new.28504 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "mcelog" Fri Jan 29 14:56:24 2021 rev:56 rq:867083 version:175 Changes: -------- --- /work/SRC/openSUSE:Factory/mcelog/mcelog.changes 2020-10-03 18:53:07.633315397 +0200 +++ /work/SRC/openSUSE:Factory/.mcelog.new.28504/mcelog.changes 2021-01-29 14:56:44.405477876 +0100 @@ -1,0 +2,24 @@ +Tue Jan 26 17:43:06 UTC 2021 - [email protected] + +- Update to version 175 (jsc#SLE-14450): + * mcelog: Add a test case to test page error counter replacement. + * mcelog: Use 'num-errors' to specify the number of mce records to be injected. + * mcelog: Report how often the replacement of page CE counter happened + * mcelog: Limit memory consumption for counting CEs per page + * mcelog: Add support for Sapphirerapids server. (jsc#SLE-14450) + * mcelog: i10nm: Fix mapping from bank number to functional unit + +- Only refreshing patches, due to tarball modifications: +M Start-consolidating-AMD-specific-stuff.patch +M add-f10h-support.patch +M add-f11h-support.patch +M add-f12h-support.patch +M add-f14h-support.patch +M add-f15h-support.patch +M add-f16h-support.patch +M email.patch +M fix_setgroups_missing_call.patch +M mcelog_invert_prefill_db_warning.patch + + +------------------------------------------------------------------- Old: ---- mcelog-173.tar.gz New: ---- mcelog-175.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ mcelog.spec ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.793479919 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.797479924 +0100 @@ -1,7 +1,7 @@ # # spec file for package mcelog # -# Copyright (c) 2020 SUSE LLC +# Copyright (c) 2021 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -21,7 +21,7 @@ %define _fillupdir %{_localstatedir}/adm/fillup-templates %endif Name: mcelog -Version: 173 +Version: 175 Release: 0 Summary: Log Machine Check Events License: GPL-2.0-only ++++++ Start-consolidating-AMD-specific-stuff.patch ++++++ ++++ 668 lines (skipped) ++++ between /work/SRC/openSUSE:Factory/mcelog/Start-consolidating-AMD-specific-stuff.patch ++++ and /work/SRC/openSUSE:Factory/.mcelog.new.28504/Start-consolidating-AMD-specific-stuff.patch ++++++ _service ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.853480007 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.857480012 +0100 @@ -3,8 +3,8 @@ <param name="scm">git</param> <param name="url">git://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git</param> <param name="changesgenerate">enable</param> - <param name="revision">v173</param> - <param name="versionformat">173</param> + <param name="revision">v175</param> + <param name="versionformat">175</param> </service> <service name="recompress" mode="disabled"> <param name="file">mcelog*.tar</param> ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.877480042 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.877480042 +0100 @@ -3,4 +3,4 @@ <param name="url">https://github.com/andikleen/mcelog</param> <param name="changesrevision">ee90ff20ce6a4d5e016aa249ce8b37f359f9fda4</param></service><service name="tar_scm"> <param name="url">git://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git</param> - <param name="changesrevision">2e923ddb0b39726f913ca929219ff5c477646e36</param></service></servicedata> \ No newline at end of file + <param name="changesrevision">7b776a8c005b60572f49797e81287540f99fff1f</param></service></servicedata> \ No newline at end of file ++++++ add-f10h-support.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.889480060 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.889480060 +0100 @@ -1,8 +1,15 @@ Add F10h decoding support Signed-off-by: Borislav Petkov <[email protected]> ---- mcelog-1.64+git20190805.e53631f.orig/amd.c 2019-09-06 14:09:39.241237130 +0200 -+++ mcelog-1.64+git20190805.e53631f/amd.c 2019-09-06 14:09:42.485237303 +0200 +--- + amd.c | 488 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- + amd.h | 42 ++++- + mcelog.c | 26 +-- + mcelog.h | 1 + 4 files changed, 506 insertions(+), 51 deletions(-) + +--- a/amd.c ++++ b/amd.c @@ -14,7 +14,7 @@ #include "mcelog.h" #include "amd.h" @@ -552,8 +559,8 @@ + } + return 1; } ---- mcelog-1.64+git20190805.e53631f.orig/amd.h 2019-09-06 14:09:40.217237182 +0200 -+++ mcelog-1.64+git20190805.e53631f/amd.h 2019-09-06 14:09:42.485237303 +0200 +--- a/amd.h ++++ b/amd.h @@ -1,6 +1,25 @@ +#include <stdbool.h> + @@ -628,19 +635,9 @@ +#define CASE_AMD_CPUS \ + case CPU_K8: \ + case CPU_F10H ---- mcelog-1.64+git20190805.e53631f.orig/mcelog.h 2019-09-06 14:06:56.229228424 +0200 -+++ mcelog-1.64+git20190805.e53631f/mcelog.h 2019-09-06 14:09:42.485237303 +0200 -@@ -117,6 +117,7 @@ - CPU_P6OLD, - CPU_CORE2, /* 65nm and 45nm */ - CPU_K8, -+ CPU_F10H, - CPU_P4, - CPU_NEHALEM, - CPU_DUNNINGTON, ---- mcelog-1.64+git20190805.e53631f.orig/mcelog.c 2019-09-06 14:09:39.245237130 +0200 -+++ mcelog-1.64+git20190805.e53631f/mcelog.c 2019-09-06 14:09:42.485237303 +0200 -@@ -147,19 +147,20 @@ +--- a/mcelog.c ++++ b/mcelog.c +@@ -148,19 +148,20 @@ static int mce_filter(struct mce *m, unsigned recordlen) { @@ -665,7 +662,7 @@ } static void print_tsc(int cpunum, __u64 tsc, unsigned long time) -@@ -226,6 +227,7 @@ +@@ -228,6 +229,7 @@ [CPU_P6OLD] = "Intel PPro/P2/P3/old Xeon", [CPU_CORE2] = "Intel Core", /* 65nm and 45nm */ [CPU_K8] = "AMD K8 and derivates", @@ -673,7 +670,7 @@ [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", -@@ -256,6 +258,7 @@ +@@ -267,6 +269,7 @@ { "p6old", CPU_P6OLD }, { "core2", CPU_CORE2 }, { "k8", CPU_K8 }, @@ -681,7 +678,7 @@ { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, -@@ -367,9 +370,7 @@ +@@ -388,9 +391,7 @@ case X86_VENDOR_INTEL: return select_intel_cputype(family, model); case X86_VENDOR_AMD: @@ -692,7 +689,7 @@ default: Eprintf("Unknown CPU type vendor %u family %u model %u", cpuvendor, family, model); -@@ -552,14 +553,9 @@ +@@ -579,14 +580,9 @@ } if (seen == ALL) { @@ -710,3 +707,13 @@ Eprintf("ERROR: Hygon Processor family %d: mcelog does not support this processor. Please use the edac_mce_amd module instead.\n", family); return 0; } else if (!strcmp(vendor,"GenuineIntel")) +--- a/mcelog.h ++++ b/mcelog.h +@@ -119,6 +119,7 @@ + CPU_P6OLD, + CPU_CORE2, /* 65nm and 45nm */ + CPU_K8, ++ CPU_F10H, + CPU_P4, + CPU_NEHALEM, + CPU_DUNNINGTON, ++++++ add-f11h-support.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.901480077 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.901480077 +0100 @@ -1,11 +1,16 @@ Add F11h decoding support Signed-off-by: Borislav Petkov <[email protected]> -Index: mcelog-1.60/amd.c -=================================================================== ---- mcelog-1.60.orig/amd.c 2018-09-24 15:15:05.902689347 +0200 -+++ mcelog-1.60/amd.c 2018-09-24 15:15:10.454960116 +0200 -@@ -155,6 +155,8 @@ enum cputype select_amd_cputype(u32 fami +--- + amd.c | 21 +++++++++++++++++++++ + amd.h | 3 ++- + mcelog.c | 2 ++ + mcelog.h | 1 + + 4 files changed, 26 insertions(+), 1 deletion(-) + +--- a/amd.c ++++ b/amd.c +@@ -155,6 +155,8 @@ return CPU_K8; case 0x10: return CPU_F10H; @@ -14,7 +19,7 @@ default: break; } -@@ -367,6 +369,16 @@ static bool f10h_mc0_mce(u16 ec, u8 xec) +@@ -367,6 +369,16 @@ return f12h_mc0_mce(ec, xec); } @@ -31,7 +36,7 @@ static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -630,6 +642,12 @@ struct amd_decoder_ops fam_ops[] = { +@@ -630,6 +642,12 @@ .mc1_mce = k8_mc1_mce, .mc2_mce = k8_mc2_mce, }, @@ -44,7 +49,7 @@ }; static void __decode_amd_mc(enum cputype cpu, struct mce *mce) -@@ -640,6 +658,9 @@ static void __decode_amd_mc(enum cputype +@@ -640,6 +658,9 @@ case CPU_F10H: ops = &fam_ops[AMD_F10H]; break; @@ -54,34 +59,18 @@ default: Eprintf("Huh? What family is it: 0x%x?!\n", cpu); return; -Index: mcelog-1.60/amd.h -=================================================================== ---- mcelog-1.60.orig/amd.h 2018-09-24 15:15:05.902689347 +0200 -+++ mcelog-1.60/amd.h 2018-09-24 15:15:10.454960116 +0200 -@@ -93,4 +93,5 @@ enum rrrr_ids { +--- a/amd.h ++++ b/amd.h +@@ -93,4 +93,5 @@ #define CASE_AMD_CPUS \ case CPU_K8: \ - case CPU_F10H + case CPU_F10H: \ + case CPU_F11H -Index: mcelog-1.60/mcelog.h -=================================================================== ---- mcelog-1.60.orig/mcelog.h 2018-09-24 15:15:05.902689347 +0200 -+++ mcelog-1.60/mcelog.h 2018-09-24 15:15:10.454960116 +0200 -@@ -118,6 +118,7 @@ enum cputype { - CPU_CORE2, /* 65nm and 45nm */ - CPU_K8, - CPU_F10H, -+ CPU_F11H, - CPU_P4, - CPU_NEHALEM, - CPU_DUNNINGTON, -Index: mcelog-1.60/mcelog.c -=================================================================== ---- mcelog-1.60.orig/mcelog.c 2018-09-24 15:15:05.906689585 +0200 -+++ mcelog-1.60/mcelog.c 2018-09-24 15:15:10.458960355 +0200 -@@ -228,6 +228,7 @@ static char *cputype_name[] = { +--- a/mcelog.c ++++ b/mcelog.c +@@ -230,6 +230,7 @@ [CPU_CORE2] = "Intel Core", /* 65nm and 45nm */ [CPU_K8] = "AMD K8 and derivates", [CPU_F10H] = "AMD Greyhound", @@ -89,7 +78,7 @@ [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", -@@ -258,6 +259,7 @@ static struct config_choice cpu_choices[ +@@ -270,6 +271,7 @@ { "core2", CPU_CORE2 }, { "k8", CPU_K8 }, { "f10h", CPU_F10H }, @@ -97,3 +86,13 @@ { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, +--- a/mcelog.h ++++ b/mcelog.h +@@ -120,6 +120,7 @@ + CPU_CORE2, /* 65nm and 45nm */ + CPU_K8, + CPU_F10H, ++ CPU_F11H, + CPU_P4, + CPU_NEHALEM, + CPU_DUNNINGTON, ++++++ add-f12h-support.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.913480095 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.913480095 +0100 @@ -1,11 +1,16 @@ Add F12h decoding support Signed-off-by: Borislav Petkov <[email protected]> -Index: mcelog-1.60/amd.c -=================================================================== ---- mcelog-1.60.orig/amd.c 2018-09-24 15:15:10.454960116 +0200 -+++ mcelog-1.60/amd.c 2018-09-24 15:15:15.607266576 +0200 -@@ -157,6 +157,8 @@ enum cputype select_amd_cputype(u32 fami +--- + amd.c | 11 +++++++++++ + amd.h | 4 +++- + mcelog.c | 2 ++ + mcelog.h | 1 + + 4 files changed, 17 insertions(+), 1 deletion(-) + +--- a/amd.c ++++ b/amd.c +@@ -157,6 +157,8 @@ return CPU_F10H; case 0x11: return CPU_F11H; @@ -14,7 +19,7 @@ default: break; } -@@ -648,6 +650,12 @@ struct amd_decoder_ops fam_ops[] = { +@@ -648,6 +650,12 @@ .mc1_mce = k8_mc1_mce, .mc2_mce = k8_mc2_mce, }, @@ -27,7 +32,7 @@ }; static void __decode_amd_mc(enum cputype cpu, struct mce *mce) -@@ -661,6 +669,9 @@ static void __decode_amd_mc(enum cputype +@@ -661,6 +669,9 @@ case CPU_F11H: ops = &fam_ops[AMD_F11H]; break; @@ -37,11 +42,9 @@ default: Eprintf("Huh? What family is it: 0x%x?!\n", cpu); return; -Index: mcelog-1.60/amd.h -=================================================================== ---- mcelog-1.60.orig/amd.h 2018-09-24 15:15:10.454960116 +0200 -+++ mcelog-1.60/amd.h 2018-09-24 15:15:15.607266576 +0200 -@@ -9,6 +9,7 @@ enum amdcpu { +--- a/amd.h ++++ b/amd.h +@@ -9,6 +9,7 @@ AMD_K8 = 0, AMD_F10H, AMD_F11H, @@ -49,30 +52,16 @@ AMD_F14H, AMD_F15H, AMD_F16H, -@@ -94,4 +95,5 @@ enum rrrr_ids { +@@ -94,4 +95,5 @@ #define CASE_AMD_CPUS \ case CPU_K8: \ case CPU_F10H: \ - case CPU_F11H + case CPU_F11H: \ + case CPU_F12H -Index: mcelog-1.60/mcelog.h -=================================================================== ---- mcelog-1.60.orig/mcelog.h 2018-09-24 15:15:10.454960116 +0200 -+++ mcelog-1.60/mcelog.h 2018-09-24 15:15:15.607266576 +0200 -@@ -119,6 +119,7 @@ enum cputype { - CPU_K8, - CPU_F10H, - CPU_F11H, -+ CPU_F12H, - CPU_P4, - CPU_NEHALEM, - CPU_DUNNINGTON, -Index: mcelog-1.60/mcelog.c -=================================================================== ---- mcelog-1.60.orig/mcelog.c 2018-09-24 15:15:10.458960355 +0200 -+++ mcelog-1.60/mcelog.c 2018-09-24 15:15:15.611266814 +0200 -@@ -229,6 +229,7 @@ static char *cputype_name[] = { +--- a/mcelog.c ++++ b/mcelog.c +@@ -231,6 +231,7 @@ [CPU_K8] = "AMD K8 and derivates", [CPU_F10H] = "AMD Greyhound", [CPU_F11H] = "AMD Griffin", @@ -80,7 +69,7 @@ [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", -@@ -260,6 +261,7 @@ static struct config_choice cpu_choices[ +@@ -272,6 +273,7 @@ { "k8", CPU_K8 }, { "f10h", CPU_F10H }, { "f11h", CPU_F11H }, @@ -88,3 +77,13 @@ { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, +--- a/mcelog.h ++++ b/mcelog.h +@@ -121,6 +121,7 @@ + CPU_K8, + CPU_F10H, + CPU_F11H, ++ CPU_F12H, + CPU_P4, + CPU_NEHALEM, + CPU_DUNNINGTON, ++++++ add-f14h-support.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.921480107 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.925480113 +0100 @@ -1,11 +1,16 @@ Add F14h decoding support Signed-off-by: Borislav Petkov <[email protected]> -Index: mcelog-1.60/amd.c -=================================================================== ---- mcelog-1.60.orig/amd.c 2018-09-24 15:15:15.607266576 +0200 -+++ mcelog-1.60/amd.c 2018-09-24 15:15:19.007468820 +0200 -@@ -159,6 +159,8 @@ enum cputype select_amd_cputype(u32 fami +--- + amd.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 3 +- + mcelog.c | 2 + + mcelog.h | 1 + 4 files changed, 93 insertions(+), 1 deletion(-) + +--- a/amd.c ++++ b/amd.c +@@ -159,6 +159,8 @@ return CPU_F11H; case 0x12: return CPU_F12H; @@ -14,7 +19,7 @@ default: break; } -@@ -381,6 +383,58 @@ static bool k8_mc0_mce(u16 ec, u8 xec) +@@ -381,6 +383,58 @@ return f10h_mc0_mce(ec, xec); } @@ -73,7 +78,7 @@ static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -402,6 +456,31 @@ static void decode_mc0_mce(struct amd_de +@@ -402,6 +456,31 @@ Eprintf("Corrupted MC0 MCE info?\n"); } @@ -105,7 +110,7 @@ static void decode_mc1_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -656,6 +735,12 @@ struct amd_decoder_ops fam_ops[] = { +@@ -656,6 +735,12 @@ .mc1_mce = k8_mc1_mce, .mc2_mce = k8_mc2_mce, }, @@ -118,7 +123,7 @@ }; static void __decode_amd_mc(enum cputype cpu, struct mce *mce) -@@ -672,6 +757,9 @@ static void __decode_amd_mc(enum cputype +@@ -672,6 +757,9 @@ case CPU_F12H: ops = &fam_ops[AMD_F12H]; break; @@ -128,34 +133,18 @@ default: Eprintf("Huh? What family is it: 0x%x?!\n", cpu); return; -Index: mcelog-1.60/mcelog.h -=================================================================== ---- mcelog-1.60.orig/mcelog.h 2018-09-24 15:15:15.607266576 +0200 -+++ mcelog-1.60/mcelog.h 2018-09-24 15:15:19.007468820 +0200 -@@ -120,6 +120,7 @@ enum cputype { - CPU_F10H, - CPU_F11H, - CPU_F12H, -+ CPU_F14H, - CPU_P4, - CPU_NEHALEM, - CPU_DUNNINGTON, -Index: mcelog-1.60/amd.h -=================================================================== ---- mcelog-1.60.orig/amd.h 2018-09-24 15:15:15.607266576 +0200 -+++ mcelog-1.60/amd.h 2018-09-24 15:15:19.007468820 +0200 -@@ -96,4 +96,5 @@ enum rrrr_ids { +--- a/amd.h ++++ b/amd.h +@@ -96,4 +96,5 @@ case CPU_K8: \ case CPU_F10H: \ case CPU_F11H: \ - case CPU_F12H + case CPU_F12H: \ + case CPU_F14H -Index: mcelog-1.60/mcelog.c -=================================================================== ---- mcelog-1.60.orig/mcelog.c 2018-09-24 15:15:15.611266814 +0200 -+++ mcelog-1.60/mcelog.c 2018-09-24 15:15:19.011469058 +0200 -@@ -230,6 +230,7 @@ static char *cputype_name[] = { +--- a/mcelog.c ++++ b/mcelog.c +@@ -232,6 +232,7 @@ [CPU_F10H] = "AMD Greyhound", [CPU_F11H] = "AMD Griffin", [CPU_F12H] = "AMD Llano", @@ -163,7 +152,7 @@ [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", -@@ -262,6 +263,7 @@ static struct config_choice cpu_choices[ +@@ -274,6 +275,7 @@ { "f10h", CPU_F10H }, { "f11h", CPU_F11H }, { "f12h", CPU_F12H }, @@ -171,3 +160,13 @@ { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, +--- a/mcelog.h ++++ b/mcelog.h +@@ -122,6 +122,7 @@ + CPU_F10H, + CPU_F11H, + CPU_F12H, ++ CPU_F14H, + CPU_P4, + CPU_NEHALEM, + CPU_DUNNINGTON, ++++++ add-f15h-support.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.933480125 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.937480130 +0100 @@ -1,11 +1,16 @@ Add F15h decoding support Signed-off-by: Borislav Petkov <[email protected]> -Index: mcelog-1.60/amd.c -=================================================================== ---- mcelog-1.60.orig/amd.c 2018-09-24 15:15:19.007468820 +0200 -+++ mcelog-1.60/amd.c 2018-09-24 15:15:22.903700568 +0200 -@@ -72,6 +72,43 @@ static char *nbextendederr[] = { +--- + amd.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 3 - + mcelog.c | 2 + mcelog.h | 1 + 4 files changed, 165 insertions(+), 1 deletion(-) + +--- a/amd.c ++++ b/amd.c +@@ -72,6 +72,43 @@ "L3 Cache LRU Error" }; @@ -49,7 +54,7 @@ static const char * const mc4_mce_desc[] = { "DRAM ECC error detected on the NB", "CRC error detected on HT link", -@@ -161,6 +198,8 @@ enum cputype select_amd_cputype(u32 fami +@@ -161,6 +198,8 @@ return CPU_F12H; case 0x14: return CPU_F14H; @@ -58,7 +63,7 @@ default: break; } -@@ -435,6 +474,53 @@ static bool cat_mc0_mce(u16 ec, u8 xec) +@@ -435,6 +474,53 @@ return ret; } @@ -112,7 +117,7 @@ static void decode_mc0_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -481,6 +567,36 @@ static bool cat_mc1_mce(u16 ec, u8 xec) +@@ -481,6 +567,36 @@ return ret; } @@ -149,7 +154,7 @@ static void decode_mc1_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -537,6 +653,40 @@ static bool k8_mc2_mce(u16 ec, u8 xec) +@@ -537,6 +653,40 @@ return ret; } @@ -190,7 +195,7 @@ static void decode_mc2_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -741,6 +891,12 @@ struct amd_decoder_ops fam_ops[] = { +@@ -741,6 +891,12 @@ .mc1_mce = cat_mc1_mce, .mc2_mce = k8_mc2_mce, }, @@ -203,7 +208,7 @@ }; static void __decode_amd_mc(enum cputype cpu, struct mce *mce) -@@ -760,6 +916,10 @@ static void __decode_amd_mc(enum cputype +@@ -760,6 +916,10 @@ case CPU_F14H: ops = &fam_ops[AMD_F14H]; break; @@ -214,34 +219,18 @@ default: Eprintf("Huh? What family is it: 0x%x?!\n", cpu); return; -Index: mcelog-1.60/mcelog.h -=================================================================== ---- mcelog-1.60.orig/mcelog.h 2018-09-24 15:15:19.007468820 +0200 -+++ mcelog-1.60/mcelog.h 2018-09-24 15:15:22.907700806 +0200 -@@ -121,6 +121,7 @@ enum cputype { - CPU_F11H, - CPU_F12H, - CPU_F14H, -+ CPU_F15H, - CPU_P4, - CPU_NEHALEM, - CPU_DUNNINGTON, -Index: mcelog-1.60/amd.h -=================================================================== ---- mcelog-1.60.orig/amd.h 2018-09-24 15:15:19.007468820 +0200 -+++ mcelog-1.60/amd.h 2018-09-24 15:15:22.907700806 +0200 -@@ -97,4 +97,5 @@ enum rrrr_ids { +--- a/amd.h ++++ b/amd.h +@@ -97,4 +97,5 @@ case CPU_F10H: \ case CPU_F11H: \ case CPU_F12H: \ - case CPU_F14H + case CPU_F14H: \ + case CPU_F15H -Index: mcelog-1.60/mcelog.c -=================================================================== ---- mcelog-1.60.orig/mcelog.c 2018-09-24 15:15:19.011469058 +0200 -+++ mcelog-1.60/mcelog.c 2018-09-24 15:15:22.907700806 +0200 -@@ -231,6 +231,7 @@ static char *cputype_name[] = { +--- a/mcelog.c ++++ b/mcelog.c +@@ -233,6 +233,7 @@ [CPU_F11H] = "AMD Griffin", [CPU_F12H] = "AMD Llano", [CPU_F14H] = "AMD Bobcat", @@ -249,7 +238,7 @@ [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", -@@ -264,6 +265,7 @@ static struct config_choice cpu_choices[ +@@ -276,6 +277,7 @@ { "f11h", CPU_F11H }, { "f12h", CPU_F12H }, { "f14h", CPU_F14H }, @@ -257,3 +246,13 @@ { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, +--- a/mcelog.h ++++ b/mcelog.h +@@ -123,6 +123,7 @@ + CPU_F11H, + CPU_F12H, + CPU_F14H, ++ CPU_F15H, + CPU_P4, + CPU_NEHALEM, + CPU_DUNNINGTON, ++++++ add-f16h-support.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.945480142 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.945480142 +0100 @@ -1,11 +1,16 @@ Add F16h decoding support Signed-off-by: Borislav Petkov <[email protected]> -Index: mcelog-1.60/amd.c -=================================================================== ---- mcelog-1.60.orig/amd.c 2018-09-24 15:15:33.848351555 +0200 -+++ mcelog-1.60/amd.c 2018-09-24 15:15:35.660459339 +0200 -@@ -200,6 +200,8 @@ enum cputype select_amd_cputype(u32 fami +--- + amd.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + amd.h | 3 ++- + mcelog.c | 2 ++ + mcelog.h | 1 + + 4 files changed, 58 insertions(+), 1 deletion(-) + +--- a/amd.c ++++ b/amd.c +@@ -200,6 +200,8 @@ return CPU_F14H; case 0x15: return CPU_F15H; @@ -14,7 +19,7 @@ default: break; } -@@ -687,6 +689,47 @@ static bool f15h_mc2_mce(u16 ec, u8 xec) +@@ -687,6 +689,47 @@ return ret; } @@ -62,7 +67,7 @@ static void decode_mc2_mce(struct amd_decoder_ops *ops, struct mce *m) { u16 ec = EC(m->status); -@@ -897,6 +940,12 @@ struct amd_decoder_ops fam_ops[] = { +@@ -897,6 +940,12 @@ .mc1_mce = f15h_mc1_mce, .mc2_mce = f15h_mc2_mce, }, @@ -75,7 +80,7 @@ }; static void __decode_amd_mc(enum cputype cpu, struct mce *mce) -@@ -920,6 +969,10 @@ static void __decode_amd_mc(enum cputype +@@ -920,6 +969,10 @@ xec_mask = 0x1f; ops = &fam_ops[AMD_F15H]; break; @@ -86,34 +91,18 @@ default: Eprintf("Huh? What family is it: 0x%x?!\n", cpu); return; -Index: mcelog-1.60/mcelog.h -=================================================================== ---- mcelog-1.60.orig/mcelog.h 2018-09-24 15:15:33.848351555 +0200 -+++ mcelog-1.60/mcelog.h 2018-09-24 15:15:35.664459576 +0200 -@@ -122,6 +122,7 @@ enum cputype { - CPU_F12H, - CPU_F14H, - CPU_F15H, -+ CPU_F16H, - CPU_P4, - CPU_NEHALEM, - CPU_DUNNINGTON, -Index: mcelog-1.60/amd.h -=================================================================== ---- mcelog-1.60.orig/amd.h 2018-09-24 15:15:33.848351555 +0200 -+++ mcelog-1.60/amd.h 2018-09-24 15:15:35.664459576 +0200 -@@ -98,4 +98,5 @@ enum rrrr_ids { +--- a/amd.h ++++ b/amd.h +@@ -98,4 +98,5 @@ case CPU_F11H: \ case CPU_F12H: \ case CPU_F14H: \ - case CPU_F15H + case CPU_F15H: \ + case CPU_F16H -Index: mcelog-1.60/mcelog.c -=================================================================== ---- mcelog-1.60.orig/mcelog.c 2018-09-24 15:15:33.848351555 +0200 -+++ mcelog-1.60/mcelog.c 2018-09-24 15:15:35.668459814 +0200 -@@ -232,6 +232,7 @@ static char *cputype_name[] = { +--- a/mcelog.c ++++ b/mcelog.c +@@ -234,6 +234,7 @@ [CPU_F12H] = "AMD Llano", [CPU_F14H] = "AMD Bobcat", [CPU_F15H] = "AMD Bulldozer", @@ -121,7 +110,7 @@ [CPU_P4] = "Intel P4", [CPU_NEHALEM] = "Intel Xeon 5500 series / Core i3/5/7 (\"Nehalem/Westmere\")", [CPU_DUNNINGTON] = "Intel Xeon 7400 series", -@@ -266,6 +267,7 @@ static struct config_choice cpu_choices[ +@@ -278,6 +279,7 @@ { "f12h", CPU_F12H }, { "f14h", CPU_F14H }, { "f15h", CPU_F15H }, @@ -129,3 +118,13 @@ { "p4", CPU_P4 }, { "dunnington", CPU_DUNNINGTON }, { "xeon74xx", CPU_DUNNINGTON }, +--- a/mcelog.h ++++ b/mcelog.h +@@ -124,6 +124,7 @@ + CPU_F12H, + CPU_F14H, + CPU_F15H, ++ CPU_F16H, + CPU_P4, + CPU_NEHALEM, + CPU_DUNNINGTON, ++++++ email.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.957480160 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.957480160 +0100 @@ -1,22 +1,20 @@ --- - Makefile | 10 ++- - email.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Makefile | 13 +++- + email.c | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ email.h | 34 ++++++++++ - mcelog.c | 93 +++++++++++++++++++++++++++++ + mcelog.c | 93 ++++++++++++++++++++++++++++- mcelog.h | 1 msg.c | 8 ++ - 6 files changed, 343 insertions(+), 2 deletions(-) + 6 files changed, 346 insertions(+), 3 deletions(-) -Index: mcelog-1.66/Makefile -=================================================================== ---- mcelog-1.66.orig/Makefile 2019-11-12 20:05:12.000000000 +0100 -+++ mcelog-1.66/Makefile 2019-11-20 15:22:48.408692817 +0100 +--- a/Makefile ++++ b/Makefile @@ -1,3 +1,4 @@ +CONFIG_EMAIL := 1 CFLAGS := -g -Os prefix := /usr etcprefix := -@@ -37,15 +38,23 @@ OBJ := p4.o k8.o mcelog.o dmi.o tsc.o co +@@ -38,15 +39,23 @@ broadwell_de.o broadwell_epex.o skylake_xeon.o \ denverton.o i10nm.o \ msr.o bus.o unknown.o @@ -41,7 +39,7 @@ # dbquery intentionally not installed by default install: mcelog mcelog.conf mcelog.conf.5 mcelog.triggers.5 -@@ -80,7 +89,7 @@ dbquery: db.o dbquery.o memutil.o +@@ -81,7 +90,7 @@ depend: .depend %.o: %.c @@ -50,10 +48,8 @@ version.tmp: FORCE ( printf "char version[] = \"" ; \ -Index: mcelog-1.66/email.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ mcelog-1.66/email.c 2019-11-20 15:22:48.408692817 +0100 +--- /dev/null ++++ b/email.c @@ -0,0 +1,200 @@ +#include <unistd.h> +#include <signal.h> @@ -255,10 +251,8 @@ + smtp_destroy_session (session); + return 0; +} -Index: mcelog-1.66/email.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ mcelog-1.66/email.h 2019-11-20 15:22:48.408692817 +0100 +--- /dev/null ++++ b/email.h @@ -0,0 +1,34 @@ +#ifndef _MCELOG_EMAIL_H_ +#define _MCELOG_EMAIL_H_ @@ -294,10 +288,8 @@ +#endif + +#endif -Index: mcelog-1.66/mcelog.c -=================================================================== ---- mcelog-1.66.orig/mcelog.c 2019-11-12 20:05:12.000000000 +0100 -+++ mcelog-1.66/mcelog.c 2019-11-20 15:22:48.408692817 +0100 +--- a/mcelog.c ++++ b/mcelog.c @@ -37,6 +37,7 @@ #include <assert.h> #include <signal.h> @@ -316,7 +308,7 @@ enum cputype cputype = CPU_GENERIC; char *logfn = LOG_DEV_FILENAME; -@@ -71,7 +75,7 @@ static double cpumhz; +@@ -71,7 +75,7 @@ static int cpumhz_forced; int ascii_mode; int dump_raw_ascii; @@ -325,23 +317,23 @@ static char *inputfile; char *processor_flags; static int foreground; -@@ -1003,6 +1007,7 @@ void usage(void) - "--is-cpu-supported Exit with return code indicating whether the CPU is supported\n" +@@ -1022,6 +1026,7 @@ + "--max-corr-err-counters Max page correctable error counters\n" "--help Display this message.\n" ); + email_usage(); printf("\n"); print_cputypes(); } -@@ -1072,6 +1077,7 @@ static struct option options[] = { - { "no-imc-log", 0, NULL, O_NO_IMC_LOG }, +@@ -1093,6 +1098,7 @@ + { "max-corr-err-counters", 1, NULL, O_MAX_CORR_ERR_COUNTERS }, { "help", 0, NULL, O_HELP }, { "is-cpu-supported", 0, NULL, O_IS_CPU_SUPPORTED }, + EMAIL_OPTIONS {} }; -@@ -1263,11 +1269,86 @@ static void drop_cred(void) +@@ -1287,11 +1293,86 @@ } } @@ -428,7 +420,7 @@ if (recordlen == 0) { Wprintf("no data in mce record\n"); -@@ -1294,12 +1375,16 @@ static void process(int fd, unsigned rec +@@ -1318,12 +1399,16 @@ finish = 1; if (!mce_filter(mce, recordlen)) continue; @@ -445,7 +437,7 @@ flushlog(); } -@@ -1413,6 +1498,8 @@ int main(int ac, char **av) +@@ -1437,6 +1522,8 @@ noargs(ac, av); fprintf(stderr, "mcelog %s\n", MCELOG_VERSION); exit(0); @@ -454,7 +446,7 @@ } else if (opt == 0) break; } -@@ -1447,6 +1534,10 @@ int main(int ac, char **av) +@@ -1471,6 +1558,10 @@ usage(); exit(1); } @@ -465,11 +457,9 @@ checkdmi(); general_setup(); -Index: mcelog-1.66/mcelog.h -=================================================================== ---- mcelog-1.66.orig/mcelog.h 2019-11-12 20:05:12.000000000 +0100 -+++ mcelog-1.66/mcelog.h 2019-11-20 15:22:48.408692817 +0100 -@@ -148,6 +148,7 @@ enum cputype { +--- a/mcelog.h ++++ b/mcelog.h +@@ -156,6 +156,7 @@ enum option_ranges { O_COMMON = 500, O_DISKDB = 1000, @@ -477,10 +467,8 @@ }; enum syslog_opt { -Index: mcelog-1.66/msg.c -=================================================================== ---- mcelog-1.66.orig/msg.c 2019-11-12 20:05:12.000000000 +0100 -+++ mcelog-1.66/msg.c 2019-11-20 15:22:48.408692817 +0100 +--- a/msg.c ++++ b/msg.c @@ -8,10 +8,13 @@ #include "mcelog.h" #include "msg.h" @@ -495,7 +483,7 @@ static char *output_fn; int need_stdout(void) -@@ -135,6 +138,11 @@ int Wprintf(char *fmt, ...) +@@ -135,6 +138,11 @@ n = vfprintf(output_fh ? output_fh : stdout, fmt, ap); va_end(ap); } ++++++ fix_setgroups_missing_call.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:45.965480172 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:45.969480177 +0100 @@ -1,7 +1,9 @@ -Index: mcelog-1.60/mcelog.c -=================================================================== ---- mcelog-1.60.orig/mcelog.c 2018-09-24 15:15:35.668459814 +0200 -+++ mcelog-1.60/mcelog.c 2018-09-24 15:15:41.648815524 +0200 +--- + mcelog.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/mcelog.c ++++ b/mcelog.c @@ -37,6 +37,7 @@ #include <assert.h> #include <signal.h> @@ -10,7 +12,7 @@ #include <sys/wait.h> #include <fnmatch.h> #include "mcelog.h" -@@ -1247,6 +1248,14 @@ static void general_setup(void) +@@ -1284,6 +1285,14 @@ static void drop_cred(void) { ++++++ mcelog-173.tar.gz -> mcelog-175.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/Makefile new/mcelog-175/Makefile --- old/mcelog-173/Makefile 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/Makefile 2021-01-08 17:50:02.000000000 +0100 @@ -23,7 +23,8 @@ iomca-error-trigger \ unknown-error-trigger \ page-error-pre-sync-soft-trigger \ - page-error-post-sync-soft-trigger + page-error-post-sync-soft-trigger \ + page-error-counter-replacement-trigger all: mcelog diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/i10nm.c new/mcelog-175/i10nm.c --- old/mcelog-173/i10nm.c 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/i10nm.c 2021-01-08 17:50:02.000000000 +0100 @@ -170,8 +170,6 @@ [0x02] = "Data parity error", [0x03] = "Data ECC error", [0x04] = "Data byte enable parity error", - [0x05] = "Received uncorrectable data", - [0x06] = "Received uncorrectable metadata", [0x07] = "Transaction ID parity error", [0x08] = "Corrected patrol scrub error", [0x10] = "Uncorrected patrol scrub error", @@ -185,8 +183,6 @@ static char *imc_1[] = { [0x00] = "WDB read parity error", [0x03] = "RPA parity error", - [0x04] = "RPA parity error", - [0x05] = "WPA parity error", [0x06] = "DDR_T_DPPP data BE error", [0x07] = "DDR_T_DPPP data error", [0x08] = "DDR link failure", @@ -202,11 +198,6 @@ static char *imc_4[] = { [0x00] = "RPQ parity (primary) error", - [0x01] = "RPQ parity (buddy) error", - [0x04] = "WPQ parity (primary) error", - [0x05] = "WPQ parity (buddy) error", - [0x08] = "RPB parity (primary) error", - [0x09] = "RPB parity (buddy) error", }; static char *imc_8[] = { @@ -250,6 +241,21 @@ [0x25] = "TME_CMI_UFL_ERR", [0x26] = "TME_TEM_SECURE_ERR", [0x27] = "TME_UFILL_PAR_ERR", + [0x29] = "INTERNAL_ERR", + [0x2A] = "TME_INTEGRITY_ERR", + [0x2B] = "TME_TDX_ERR", + [0x2C] = "TME_UFILL_TEM_SECURE_ERR", + [0x2D] = "TME_KEY_POISON_ERR", + [0x2E] = "TME_SECURITY_ENGINE_ERR", +}; + +static char *imc_10[] = { + [0x08] = "CORR_PATSCRUB_MIRR2ND_ERR", + [0x10] = "UC_PATSCRUB_MIRR2ND_ERR", + [0x20] = "COR_SPARE_MIRR2ND_ERR", + [0x40] = "UC_SPARE_MIRR2ND_ERR", + [0x80] = "HA_RD_MIRR2ND_ERR", + [0xA0] = "HA_UNCORR_RD_MIRR2ND_ERR", }; static struct field imc0[] = { @@ -277,6 +283,11 @@ {} }; +static struct field imc10[] = { + FIELD(0, imc_10), + {} +}; + static void i10nm_imc_misc(u64 status, u64 misc) { u32 column = EXTRACT(misc, 9, 18) << 2; @@ -306,12 +317,76 @@ Wprintf("transient\n"); } +enum banktype { + BT_UNKNOWN, + BT_PCU, + BT_UPI, + BT_M2M, + BT_IMC, +}; + +static enum banktype icelake[32] = { + [4] = BT_PCU, + [5] = BT_UPI, + [7 ... 8] = BT_UPI, + [12] = BT_M2M, + [16] = BT_M2M, + [20] = BT_M2M, + [24] = BT_M2M, + [13 ... 15] = BT_IMC, + [17 ... 19] = BT_IMC, + [21 ... 23] = BT_IMC, + [25 ... 27] = BT_IMC, +}; + +static enum banktype icelake_de[32] = { + [4] = BT_PCU, + [12] = BT_M2M, + [16] = BT_M2M, + [13 ... 15] = BT_IMC, + [17 ... 19] = BT_IMC, +}; + +static enum banktype tremont[32] = { + [4] = BT_PCU, + [12] = BT_M2M, + [13 ... 15] = BT_IMC, +}; + +static enum banktype sapphire[32] = { + [4] = BT_PCU, + [5] = BT_UPI, + [12] = BT_M2M, + [13 ... 20] = BT_IMC, +}; + void i10nm_decode_model(int cputype, int bank, u64 status, u64 misc) { + enum banktype banktype; u64 f; - switch (bank) { - case 4: + switch (cputype) { + case CPU_ICELAKE_XEON: + banktype = icelake[bank]; + break; + case CPU_ICELAKE_DE: + banktype = icelake_de[bank]; + break; + case CPU_TREMONT_D: + banktype = tremont[bank]; + break; + case CPU_SAPPHIRERAPIDS: + banktype = sapphire[bank]; + break; + default: + return; + } + + switch (banktype) { + case BT_UNKNOWN: + break; + + case BT_PCU: Wprintf("PCU: "); f = EXTRACT(status, 24, 31); if (f) @@ -324,9 +399,7 @@ decode_bitfield(f, pcu3); break; - case 5: - case 7: - case 8: + case BT_UPI: Wprintf("UPI: "); f = EXTRACT(status, 22, 31); if (f) @@ -335,10 +408,7 @@ decode_bitfield(f, upi2); break; - case 12: - case 16: - case 20: - case 24: + case BT_M2M: Wprintf("M2M: "); f = EXTRACT(status, 24, 25); Wprintf("MscodDDRType=0x%llx\n", f); @@ -347,18 +417,7 @@ decode_bitfield(status, m2m); break; - case 13: - case 14: - case 15: - case 17: - case 18: - case 19: - case 21: - case 22: - case 23: - case 25: - case 26: - case 27: + case BT_IMC: Wprintf("MemCtrl: "); f = EXTRACT(status, 16, 23); switch (EXTRACT(status, 24, 31)) { @@ -367,6 +426,7 @@ case 2: decode_bitfield(f, imc2); break; case 4: decode_bitfield(f, imc4); break; case 8: decode_bitfield(f, imc8); break; + case 0x10: decode_bitfield(f, imc10); break; } i10nm_imc_misc(status, misc); break; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/input/spr_uc_patscrub new/mcelog-175/input/spr_uc_patscrub --- old/mcelog-173/input/spr_uc_patscrub 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-175/input/spr_uc_patscrub 2021-01-08 17:50:02.000000000 +0100 @@ -0,0 +1,4 @@ +# Sapphire rapids UC_PATSCRUB_MIRR2ND_ERR +CPU 0 13 +PROCESSOR 0:0x806f0 +STATUS 0x8000000010100080 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/intel.c new/mcelog-175/intel.c --- old/mcelog-173/intel.c 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/intel.c 2021-01-08 17:50:02.000000000 +0100 @@ -43,7 +43,8 @@ cpu == CPU_ICELAKE_XEON || cpu == CPU_ICELAKE_DE || cpu == CPU_TREMONT_D || cpu == CPU_COMETLAKE || cpu == CPU_TIGERLAKE || cpu == CPU_ROCKETLAKE || - cpu == CPU_ALDERLAKE || cpu == CPU_LAKEFIELD) + cpu == CPU_ALDERLAKE || cpu == CPU_LAKEFIELD || + cpu == CPU_SAPPHIRERAPIDS) memory_error_support = 1; } @@ -122,6 +123,8 @@ return CPU_ALDERLAKE; else if (model == 0x8A) return CPU_LAKEFIELD; + else if (model == 0x8F) + return CPU_SAPPHIRERAPIDS; if (model > 0x1a) { Eprintf("Family 6 Model %u CPU: only decoding architectural errors\n", model); @@ -174,6 +177,7 @@ case CPU_ICELAKE_XEON: case CPU_ICELAKE_DE: case CPU_TREMONT_D: + case CPU_SAPPHIRERAPIDS: i10nm_memerr_misc(m, channel, dimm); break; default: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/intel.h new/mcelog-175/intel.h --- old/mcelog-173/intel.h 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/intel.h 2021-01-08 17:50:02.000000000 +0100 @@ -37,5 +37,6 @@ case CPU_TIGERLAKE: \ case CPU_ROCKETLAKE: \ case CPU_ALDERLAKE: \ - case CPU_LAKEFIELD + case CPU_LAKEFIELD: \ + case CPU_SAPPHIRERAPIDS diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/list.h new/mcelog-175/list.h --- old/mcelog-173/list.h 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/list.h 2021-01-08 17:50:02.000000000 +0100 @@ -111,6 +111,17 @@ } /** + * list_is_first - tests whether @list is the first entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return list == head->next; +} + +/** * list_empty - tests whether a list is empty * @head: the list to test. */ @@ -141,6 +152,17 @@ list_entry((ptr)->next, type, member) /** + * list_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/mcelog.c new/mcelog-175/mcelog.c --- old/mcelog-173/mcelog.c 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/mcelog.c 2021-01-08 17:50:02.000000000 +0100 @@ -85,6 +85,7 @@ static int debug_numerrors; int imc_log = -1; static int check_only = 0; +int max_corr_err_counters = 4158; static int is_cpu_supported(void); @@ -254,6 +255,7 @@ [CPU_ROCKETLAKE] = "Rocketlake", [CPU_ALDERLAKE] = "Alderlake", [CPU_LAKEFIELD] = "Lakefield", + [CPU_SAPPHIRERAPIDS] = "Sapphirerapids server", }; static struct config_choice cpu_choices[] = { @@ -315,6 +317,7 @@ { "rocketlake", CPU_ROCKETLAKE }, { "alderlake", CPU_ALDERLAKE }, { "lakefield", CPU_LAKEFIELD }, + { "sapphirerapids_server", CPU_SAPPHIRERAPIDS }, { NULL } }; @@ -491,7 +494,8 @@ cputype != CPU_ICELAKE_XEON && cputype != CPU_ICELAKE_DE && cputype != CPU_TREMONT_D && cputype != CPU_COMETLAKE && cputype != CPU_TIGERLAKE && cputype != CPU_ROCKETLAKE && - cputype != CPU_ALDERLAKE && cputype != CPU_LAKEFIELD) + cputype != CPU_ALDERLAKE && cputype != CPU_LAKEFIELD && + cputype != CPU_SAPPHIRERAPIDS) resolveaddr(m->addr); } @@ -1015,6 +1019,7 @@ "--pidfile file Write pid of daemon into file\n" "--no-imc-log Disable extended iMC logging\n" "--is-cpu-supported Exit with return code indicating whether the CPU is supported\n" +"--max-corr-err-counters Max page correctable error counters\n" "--help Display this message.\n" ); printf("\n"); @@ -1050,6 +1055,7 @@ O_DEBUG_NUMERRORS, O_NO_IMC_LOG, O_IS_CPU_SUPPORTED, + O_MAX_CORR_ERR_COUNTERS, O_HELP, }; @@ -1084,6 +1090,7 @@ { "pidfile", 1, NULL, O_PIDFILE }, { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ { "no-imc-log", 0, NULL, O_NO_IMC_LOG }, + { "max-corr-err-counters", 1, NULL, O_MAX_CORR_ERR_COUNTERS }, { "help", 0, NULL, O_HELP }, { "is-cpu-supported", 0, NULL, O_IS_CPU_SUPPORTED }, {} @@ -1194,6 +1201,9 @@ case O_NO_IMC_LOG: imc_log = 0; break; + case O_MAX_CORR_ERR_COUNTERS: + max_corr_err_counters = atoi(optarg); + break; case O_IS_CPU_SUPPORTED: check_only = 1; break; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/mcelog.conf new/mcelog-175/mcelog.conf --- old/mcelog-173/mcelog.conf 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/mcelog.conf 2021-01-08 17:50:02.000000000 +0100 @@ -159,6 +159,13 @@ # Trigger script for corrected errors. # memory-ce-trigger = page-error-trigger +# Memory error counter per 4K memory page. +# Threshold for the counter replacements trigger script. +memory-ce-counter-replacement-threshold = 20 / 24h + +# Trigger script for counter replacements. +memory-ce-counter-replacement-trigger = page-error-counter-replacement-trigger + # Should page threshold events be logged explicitly? memory-ce-log = yes diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/mcelog.h new/mcelog-175/mcelog.h --- old/mcelog-173/mcelog.h 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/mcelog.h 2021-01-08 17:50:02.000000000 +0100 @@ -91,6 +91,7 @@ #define endof_field(t, f) (sizeof(((t *)0)->f) + offsetof(t, f)) #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) +#define roundup(x,y) (((x) + (y) - 1) / (y) * (y)) #define round_down(x,y) ((x) & ~((y)-1)) #define BITS_PER_INT (sizeof(unsigned) * 8) @@ -149,6 +150,7 @@ CPU_ROCKETLAKE, CPU_ALDERLAKE, CPU_LAKEFIELD, + CPU_SAPPHIRERAPIDS, }; enum option_ranges { @@ -174,4 +176,5 @@ extern enum cputype cputype; extern int filter_memory_errors; extern int imc_log; +extern int max_corr_err_counters; extern void set_imc_log(int cputype); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/p4.c new/mcelog-175/p4.c --- old/mcelog-173/p4.c 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/p4.c 2021-01-08 17:50:02.000000000 +0100 @@ -308,6 +308,7 @@ case CPU_SKYLAKE_XEON: return skylake_s_ce_type(bank, status, misc); case CPU_ICELAKE_XEON: + case CPU_SAPPHIRERAPIDS: return i10nm_ce_type(bank, status, misc); default: return 0; @@ -459,6 +460,7 @@ case CPU_ICELAKE_XEON: case CPU_ICELAKE_DE: case CPU_TREMONT_D: + case CPU_SAPPHIRERAPIDS: i10nm_decode_model(cputype, log->bank, log->status, log->misc); break; case CPU_DENVERTON: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/page.c new/mcelog-175/page.c --- old/mcelog-173/page.c 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/page.c 2021-01-08 17:50:02.000000000 +0100 @@ -29,10 +29,13 @@ #include <fcntl.h> #include <errno.h> #include <string.h> +#include <sys/mman.h> +#include <assert.h> #include "memutil.h" #include "trigger.h" #include "mcelog.h" #include "rbtree.h" +#include "list.h" #include "leaky-bucket.h" #include "page.h" #include "config.h" @@ -54,8 +57,31 @@ struct err_type ce; }; +#define N ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(struct mempage)) +#define to_cluster(mp) (struct mempage_cluster *)((long)(mp) & ~((long)(PAGE_SIZE - 1))) + +struct mempage_cluster { + struct list_head lru; + struct mempage mp[N]; + int mp_used; +}; + +struct mempage_replacement { + struct leaky_bucket bucket; + unsigned count; +}; + +enum { + MAX_ENV = 20, +}; + +static int corr_err_counters; +static struct mempage_cluster *mp_cluster; +static struct mempage_replacement mp_repalcement; static struct rb_root mempage_root; +static LIST_HEAD(mempage_cluster_lru_list); static struct bucket_conf page_trigger_conf; +static struct bucket_conf mp_replacement_trigger_conf; static char *page_error_pre_soft_trigger, *page_error_post_soft_trigger; static const char *page_state[] = { @@ -64,6 +90,28 @@ [PAGE_OFFLINE_FAILED] = "offline-failed", }; +static struct mempage *mempage_alloc(void) +{ + if (!mp_cluster || mp_cluster->mp_used == N) { + mp_cluster = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mp_cluster == MAP_FAILED) + Enomem(); + } + + return &mp_cluster->mp[mp_cluster->mp_used++]; +} + +static struct mempage *mempage_replace(void) +{ + /* If no free mp_cluster, reuse the last mp_cluster of the LRU list */ + if (mp_cluster->mp_used == N) { + mp_cluster = list_last_entry(&mempage_cluster_lru_list, struct mempage_cluster, lru); + mp_cluster->mp_used = 0; + } + + return &mp_cluster->mp[mp_cluster->mp_used++]; +} + static struct mempage *mempage_lookup(u64 addr) { struct rb_node *n = mempage_root.rb_node; @@ -111,6 +159,26 @@ return mp; } +static void mempage_rb_tree_update(u64 addr, struct mempage *mp) +{ + rb_erase(&mp->nd, &mempage_root); + mempage_insert(addr, mp); +} + +static void mempage_cluster_lru_list_insert(struct mempage_cluster *mp_cluster) +{ + list_add(&mp_cluster->lru, &mempage_cluster_lru_list); +} + +static void mempage_cluster_lru_list_update(struct mempage_cluster *mp_cluster) +{ + if (list_is_first(&mp_cluster->lru, &mempage_cluster_lru_list)) + return; + + list_del(&mp_cluster->lru); + list_add(&mp_cluster->lru, &mempage_cluster_lru_list); +} + /* Following arrays need to be all kept in sync with the enum */ enum otype { @@ -168,10 +236,47 @@ mp->offlined = PAGE_OFFLINE; } +/* Run a user defined trigger when the replacement threshold of page error counter crossed. */ +static void counter_trigger(char *msg, time_t t, struct mempage_replacement *mr, + struct bucket_conf *bc, bool sync) +{ + struct leaky_bucket *bk = &mr->bucket; + char *env[MAX_ENV], *out, *thresh; + int i, ei = 0; + + thresh = bucket_output(bc, bk); + xasprintf(&out, "%s: %s", msg, thresh); + + if (bc->log) + Gprintf("%s\n", out); + + if (!bc->trigger) + goto out; + + xasprintf(&env[ei++], "THRESHOLD=%s", thresh); + xasprintf(&env[ei++], "TOTALCOUNT=%lu", mr->count); + if (t) + xasprintf(&env[ei++], "LASTEVENT=%lu", t); + xasprintf(&env[ei++], "AGETIME=%u", bc->agetime); + xasprintf(&env[ei++], "MESSAGE=%s", out); + xasprintf(&env[ei++], "THRESHOLD_COUNT=%d", bk->count); + env[ei] = NULL; + assert(ei < MAX_ENV); + + run_trigger(bc->trigger, NULL, env, sync, "page-error-counter"); + + for (i = 0; i < ei; i++) + free(env[i]); +out: + free(out); + free(thresh); +} + void account_page_error(struct mce *m, int channel, int dimm) { u64 addr = m->addr; struct mempage *mp; + char *msg, *thresh; time_t t; unsigned cpu = m->extcpu ? m->extcpu : m->cpu; @@ -200,16 +305,34 @@ t = m->time; addr &= ~((u64)PAGE_SIZE - 1); mp = mempage_lookup(addr); - if (!mp) { - mp = xalloc(sizeof(struct mempage)); + if (!mp && corr_err_counters < max_corr_err_counters) { + mp = mempage_alloc(); bucket_init(&mp->ce.bucket); mempage_insert(addr, mp); + mempage_cluster_lru_list_insert(to_cluster(mp)); + corr_err_counters++; + } else if (!mp) { + mp = mempage_replace(); + bucket_init(&mp->ce.bucket); + mempage_rb_tree_update(addr, mp); + mempage_cluster_lru_list_update(to_cluster(mp)); + + /* Report how often the replacement of counter 'mp' happened */ + ++mp_repalcement.count; + if (__bucket_account(&mp_replacement_trigger_conf, &mp_repalcement.bucket, 1, t)) { + thresh = bucket_output(&mp_replacement_trigger_conf, &mp_repalcement.bucket); + xasprintf(&msg, "Replacements of page correctable error counter exceed threshold %s", thresh); + free(thresh); + + counter_trigger(msg, t, &mp_repalcement, &mp_replacement_trigger_conf, false); + free(msg); + } + } else { + mempage_cluster_lru_list_update(to_cluster(mp)); } ++mp->ce.count; if (__bucket_account(&page_trigger_conf, &mp->ce.bucket, 1, t)) { struct memdimm *md; - char *msg; - char *thresh; if (mp->offlined != PAGE_ONLINE) return; @@ -288,6 +411,7 @@ int n; config_trigger("page", "memory-ce", &page_trigger_conf); + config_trigger("page", "memory-ce-counter-replacement", &mp_replacement_trigger_conf); n = config_choice("page", "memory-ce-action", offline_choice); if (n >= 0) offline = n; @@ -311,4 +435,11 @@ page_error_post_soft_trigger); exit(1); } + + n = max_corr_err_counters; + max_corr_err_counters = roundup(max_corr_err_counters, N); + if (n != max_corr_err_counters) + Lprintf("Round up max-corr-err-counters from %d to %d\n", n, max_corr_err_counters); + + bucket_init(&mp_repalcement.bucket); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/tests/page/inject new/mcelog-175/tests/page/inject --- old/mcelog-173/tests/page/inject 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/tests/page/inject 2021-01-08 17:50:02.000000000 +0100 @@ -8,3 +8,20 @@ ../../input/GENPAGE | mce-inject ../../input/GENPAGE | mce-inject ../../input/GENPAGE | mce-inject + +conf=$1 + +NUM_ERRORS="$(awk '/^num-errors = / { print $3 }' $conf)" + +if [ "$NUM_ERRORS" == "" ]; then + exit +fi + +NUM_ERRORS=`expr $NUM_ERRORS - 6` + +# Make sure mcelog is ready to consume the mce records (avoid mce records overflow). +sleep 2 + +for ((i = 1; i <= $NUM_ERRORS; i++)); do + ../../input/GENPAGE | mce-inject +done diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/tests/page/page-error-counter-replacement.conf new/mcelog-175/tests/page/page-error-counter-replacement.conf --- old/mcelog-173/tests/page/page-error-counter-replacement.conf 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-175/tests/page/page-error-counter-replacement.conf 2021-01-08 17:50:02.000000000 +0100 @@ -0,0 +1,16 @@ +# trigger: 1 + +# We expect that the injected errors are almost in different pages. +# So about 156 - 126 = 30 counter replacements happen. +num-errors = 156 + +# 2 counter clusters (63 counters per counter cluster) +max-corr-err-counters = 126 + +[page] +memory-ce-action = account +memory-ce-counter-replacement-threshold = 20 / 1h +memory-ce-counter-replacement-trigger = ../trigger + +[trigger] +directory = . diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/tests/test new/mcelog-175/tests/test --- old/mcelog-173/tests/test 2020-09-24 02:39:24.000000000 +0200 +++ new/mcelog-175/tests/test 2021-01-08 17:50:02.000000000 +0100 @@ -44,7 +44,9 @@ for conf in `ls *.conf` do log=`echo $conf | sed "s/conf/log/g"` - ./inject $conf + # Inject mce records and run mcelog in parallel. + # So that the mce records can be consumed by mcelog in time (avoid mce record overflow). + ./inject $conf & $D ../../mcelog --foreground --daemon --debug-numerrors --config $conf --logfile $log >> result # let triggers finish diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-173/triggers/page-error-counter-replacement-trigger new/mcelog-175/triggers/page-error-counter-replacement-trigger --- old/mcelog-173/triggers/page-error-counter-replacement-trigger 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-175/triggers/page-error-counter-replacement-trigger 2021-01-08 17:50:02.000000000 +0100 @@ -0,0 +1,28 @@ +#!/bin/sh +# There are limited counters for counting page correctable errors. +# When the counters run out, replace an old counter for counting +# correctable errors for a new page. + +# This shell script can be executed by mcelog in daemon mode when the +# counter replacements exceed a pre-configured threshold. +# +# environment: +# MESSAGE Human readable consolidated warning message +# THRESHOLD human readable threshold status +# TOTALCOUNT total count of page error counter replacements +# LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds) +# THRESHOLD_COUNT Total number of events in current threshold time period of specific type +# +# note: will run as mcelog configured user +# this can be changed in mcelog.conf + +logger -s -p daemon.err -t mcelog "$MESSAGE" +logger -s -p daemon.err -t mcelog "THRESHOLD: $THRESHOLD" +logger -s -p daemon.err -t mcelog "TOTALCOUNT: $TOTALCOUNT" +logger -s -p daemon.err -t mcelog "LASTEVENT: $LASTEVENT" +logger -s -p daemon.err -t mcelog "THRESHOLD_COUNT: $THRESHOLD_COUNT" +logger -s -p daemon.err -t mcelog "AGETIME: $AGETIME" + +[ -x ./page-error-counter-replacement-trigger.local ] && . ./page-error-counter-replacement-trigger.local + +exit 0 ++++++ mcelog_invert_prefill_db_warning.patch ++++++ --- /var/tmp/diff_new_pack.HjjjKw/_old 2021-01-29 14:56:46.121480401 +0100 +++ /var/tmp/diff_new_pack.HjjjKw/_new 2021-01-29 14:56:46.121480401 +0100 @@ -2,9 +2,9 @@ memdb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) ---- mcelog-1.64+git20190805.e53631f.orig/memdb.c 2019-09-06 14:06:51.757228185 +0200 -+++ mcelog-1.64+git20190805.e53631f/memdb.c 2019-09-06 14:09:38.197237074 +0200 -@@ -430,11 +430,11 @@ +--- a/memdb.c ++++ b/memdb.c +@@ -431,11 +431,11 @@ md->location = xstrdup(bl); md->name = xstrdup(dmi_getstring(&d->header, d->device_locator)); }
