Hello community, here is the log from the commit of package mcelog for openSUSE:Factory checked in at Mon Sep 5 16:38:45 CEST 2011.
-------- --- mcelog/mcelog.changes 2011-06-07 11:53:11.000000000 +0200 +++ mcelog/mcelog.changes 2011-08-18 00:20:41.000000000 +0200 @@ -1,0 +2,28 @@ +Thu Aug 18 00:09:50 CEST 2011 - [email protected] + +- update to GIT of today (6e4e2a000124f08f1a4e3791c2b02ec9ae6af393) +- many bugfixes +- Implement re-parsing of mcelog output in ASCII +- Add support for non-page aligned EFI Configuration Tables +- Add --debug-numerrors +- Add decoder for corrected XEN events to --ascii +- Correctly log kernel supplied time +- record the trigger info in the log +- mcelog: Implement dmi decoding for UEFI +- mcelog: Add usage information to mcelog for --ignorenodev +- Fix length calculation of SMBIOS mapping +- change disclaimer +- explictly spell out corrected errors + +------------------------------------------------------------------- +Sat Jul 2 21:50:53 UTC 2011 - [email protected] + +- Update to latest git version (fate#311830) + Unfortunately versions have not been increased, latest tag + still is 1.0-pre3 (same as 1 year ago), therefore the date + is included in the version. I try to push maintainers to + increase the version number. +- Invert logic of db prefill messages -> info if it works, silent + if not + +------------------------------------------------------------------- @@ -11,0 +40,9 @@ + +------------------------------------------------------------------- +Tue Apr 6 15:15:45 CEST 2010 - [email protected] + +- Update to latest git version having quite some fixes (no features): + - Fixed some memleaks and made app valgrind conform + - Fixed theoretical DoS attack (bnc#586241) + - Added support of additional cpus + - Fixed a lot messages (in manpage, in triggers, in README, ...) calling whatdependson for head-i586 Old: ---- mcelog-1.0pre3.6363f5b719e9.tar.bz2 New: ---- mcelog-1.0pre3.6e4e2a000124.tar.bz2 mcelog_invert_prefill_db_warning.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ mcelog.spec ++++++ --- /var/tmp/diff_new_pack.d44muK/_old 2011-09-05 16:38:26.000000000 +0200 +++ /var/tmp/diff_new_pack.d44muK/_new 2011-09-05 16:38:26.000000000 +0200 @@ -21,8 +21,8 @@ Name: mcelog License: GPLv2+ Summary: Log Machine Check Events -Version: 1.0pre3.6363f5b719e9 -Release: 6 +Version: 1.0pre3.6e4e2a000124 +Release: 1 AutoReqProv: on ExclusiveArch: x86_64 BuildRequires: libesmtp-devel @@ -33,6 +33,7 @@ Source2: mcelog.sysconfig Source6: README.email_setup Patch1: email.patch +Patch2: mcelog_invert_prefill_db_warning.patch Group: System/Monitoring BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -55,6 +56,7 @@ %prep %setup %patch1 -p1 +%patch2 -p1 %build export SUSE_ASNEEDED=0 ++++++ email.patch ++++++ --- /var/tmp/diff_new_pack.d44muK/_old 2011-09-05 16:38:26.000000000 +0200 +++ /var/tmp/diff_new_pack.d44muK/_new 2011-09-05 16:38:26.000000000 +0200 @@ -1,22 +1,11 @@ ---- - Makefile | 10 +++ - email.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - email.h | 32 ++++++++++ - mcelog.c | 93 +++++++++++++++++++++++++++++++ - mcelog.h | 1 - msg.c | 8 ++ - 6 files changed, 325 insertions(+), 3 deletions(-) - -Index: mcelog-1.0pre3.6363f5b719e9/Makefile -=================================================================== ---- mcelog-1.0pre3.6363f5b719e9.orig/Makefile -+++ mcelog-1.0pre3.6363f5b719e9/Makefile +--- mcelog-1.0pre3.6e4e2a000124/Makefile ++++ mcelog-1.0pre3.6e4e2a000124/Makefile @@ -1,3 +1,4 @@ +CONFIG_EMAIL := 1 CFLAGS := -g -Os prefix := /usr etcprefix := -@@ -34,7 +35,8 @@ OBJ := p4.o k8.o mcelog.o dmi.o tsc.o co +@@ -34,7 +35,8 @@ client.o cache.o sysfs.o yellow.o page.o rbtree.o \ xeon75xx.o sandy-bridge.o DISKDB_OBJ := diskdb.o dimm.o db.o @@ -26,7 +15,7 @@ DOC := mce.pdf ADD_DEFINES := -@@ -46,6 +48,12 @@ OBJ += ${DISKDB_OBJ} +@@ -46,6 +48,12 @@ all: dbquery endif @@ -39,10 +28,8 @@ SRC := $(OBJ:.o=.c) mcelog: ${OBJ} -Index: mcelog-1.0pre3.6363f5b719e9/email.c -=================================================================== ---- /dev/null -+++ mcelog-1.0pre3.6363f5b719e9/email.c +--- mcelog-1.0pre3.6e4e2a000124/email.c ++++ mcelog-1.0pre3.6e4e2a000124/email.c @@ -0,0 +1,184 @@ +#include <unistd.h> +#include <signal.h> @@ -228,10 +215,8 @@ + smtp_destroy_session (session); + return 0; +} -Index: mcelog-1.0pre3.6363f5b719e9/email.h -=================================================================== ---- /dev/null -+++ mcelog-1.0pre3.6363f5b719e9/email.h +--- mcelog-1.0pre3.6e4e2a000124/email.h ++++ mcelog-1.0pre3.6e4e2a000124/email.h @@ -0,0 +1,32 @@ +#ifndef _MCELOG_EMAIL_H_ +#define _MCELOG_EMAIL_H_ @@ -265,19 +250,17 @@ +#endif + +#endif -Index: mcelog-1.0pre3.6363f5b719e9/mcelog.c -=================================================================== ---- mcelog-1.0pre3.6363f5b719e9.orig/mcelog.c -+++ mcelog-1.0pre3.6363f5b719e9/mcelog.c +--- mcelog-1.0pre3.6e4e2a000124/mcelog.c ++++ mcelog-1.0pre3.6e4e2a000124/mcelog.c @@ -37,6 +37,7 @@ #include <assert.h> #include <signal.h> #include <pwd.h> +#include <sys/wait.h> + #include <fnmatch.h> #include "mcelog.h" #include "paths.h" - #include "k8.h" -@@ -58,6 +59,9 @@ +@@ -59,6 +60,9 @@ #include "yellow.h" #include "page.h" @@ -287,7 +270,7 @@ enum cputype cputype = CPU_GENERIC; char *logfn = LOG_DEV_FILENAME; -@@ -69,7 +73,7 @@ static double cpumhz; +@@ -70,7 +74,7 @@ static int cpumhz_forced; int ascii_mode; int dump_raw_ascii; @@ -296,7 +279,7 @@ static char *inputfile; char *processor_flags; static int foreground; -@@ -792,6 +796,7 @@ void usage(void) +@@ -914,6 +918,7 @@ "--num-errors N Only process N errors (for testing)\n" "--pidfile file Write pid of daemon into file\n" ); @@ -304,15 +287,15 @@ diskdb_usage(); print_cputypes(); exit(1); -@@ -855,6 +860,7 @@ static struct option options[] = { - { "num-errors", 1, NULL, O_NUMERRORS }, +@@ -979,6 +984,7 @@ { "pidfile", 1, NULL, O_PIDFILE }, + { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ DISKDB_OPTIONS + EMAIL_OPTIONS {} }; -@@ -1026,11 +1032,86 @@ static void drop_cred(void) +@@ -1153,11 +1159,86 @@ } } @@ -399,15 +382,7 @@ if (recordlen == 0) { Wprintf("no data in mce record\n"); -@@ -1041,19 +1122,23 @@ static void process(int fd, unsigned rec - if (len < 0) - err("read"); - -- for (i = 0; (i < len / (int)recordlen) && !finish; i++) { -+ for (i = 0; (i < len / (int)recordlen) && !finish; i++) { - struct mce *mce = (struct mce *)(buf + i*recordlen); - mce_prepare(mce); - if (numerrors > 0 && --numerrors == 0) +@@ -1177,12 +1258,16 @@ finish = 1; if (!mce_filter(mce, recordlen)) continue; @@ -424,7 +399,7 @@ flushlog(); } -@@ -1161,6 +1246,8 @@ int main(int ac, char **av) +@@ -1293,6 +1378,8 @@ exit(0); } else if (diskdb_cmd(opt, ac, av)) { exit(0); @@ -433,7 +408,7 @@ } else if (opt == 0) break; } -@@ -1169,6 +1256,8 @@ int main(int ac, char **av) +@@ -1301,6 +1388,8 @@ logfn = av[optind++]; if (av[optind]) usage(); @@ -442,10 +417,18 @@ checkdmi(); general_setup(); -Index: mcelog-1.0pre3.6363f5b719e9/msg.c -=================================================================== ---- mcelog-1.0pre3.6363f5b719e9.orig/msg.c -+++ mcelog-1.0pre3.6363f5b719e9/msg.c +--- mcelog-1.0pre3.6e4e2a000124/mcelog.h ++++ mcelog-1.0pre3.6e4e2a000124/mcelog.h +@@ -120,6 +120,7 @@ + enum option_ranges { + O_COMMON = 500, + O_DISKDB = 1000, ++ O_EMAIL = 1500, + }; + + enum syslog_opt { +--- mcelog-1.0pre3.6e4e2a000124/msg.c ++++ mcelog-1.0pre3.6e4e2a000124/msg.c @@ -8,10 +8,13 @@ #include "mcelog.h" #include "msg.h" @@ -460,7 +443,7 @@ static char *output_fn; int need_stdout(void) -@@ -135,6 +138,11 @@ int Wprintf(char *fmt, ...) +@@ -135,6 +138,11 @@ n = vfprintf(output_fh ? output_fh : stdout, fmt, ap); va_end(ap); } @@ -472,15 +455,3 @@ return n; } -Index: mcelog-1.0pre3.6363f5b719e9/mcelog.h -=================================================================== ---- mcelog-1.0pre3.6363f5b719e9.orig/mcelog.h -+++ mcelog-1.0pre3.6363f5b719e9/mcelog.h -@@ -122,6 +122,7 @@ enum cputype { - enum option_ranges { - O_COMMON = 500, - O_DISKDB = 1000, -+ O_EMAIL = 1500, - }; - - enum syslog_opt { ++++++ mcelog-1.0pre3.6363f5b719e9.tar.bz2 -> mcelog-1.0pre3.6e4e2a000124.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/CHANGES new/mcelog-1.0pre3.6e4e2a000124/CHANGES --- old/mcelog-1.0pre3.6363f5b719e9/CHANGES 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/CHANGES 2011-08-18 00:01:26.000000000 +0200 @@ -1,5 +1,7 @@ <newer changes first> +Add Linux Kongress 2010 paper +Add Sandy Bridge Support Write pid file by default in daemon mode Reopen log files on SIGUSR1 in daemon mode Default --daemon mode to logging to /var/log/mcelog diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/README new/mcelog-1.0pre3.6e4e2a000124/README --- old/mcelog-1.0pre3.6363f5b719e9/README 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/README 2011-08-18 00:01:26.000000000 +0200 @@ -30,6 +30,14 @@ The recommended mode is daemon, because several new functions (like page error predictive failure analysis) require a continuously running daemon. +Documentation: + +The primary reference documentation are the man pages. +lk10-mcelog.pdf has a overview over the errors mcelog handles +(originally from Linux Kongress 2010) +mce.pdf is a very old paper describing the first releases of mcelog +(some parts are obsolete) + For distributors: Please install a init script by default that runs mcelog in daemon mode. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/dmi.c new/mcelog-1.0pre3.6e4e2a000124/dmi.c --- old/mcelog-1.0pre3.6363f5b719e9/dmi.c 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/dmi.c 2011-08-18 00:01:26.000000000 +0200 @@ -137,6 +137,52 @@ } } +static int get_efi_base_addr(size_t *address) +{ + FILE *efi_systab; + const char *filename; + char linebuf[64]; + int ret = 0; + + *address = 0; /* Prevent compiler warning */ + + /* Linux 2.6.7 and up: /sys/firmware/efi/systab */ + filename = "/sys/firmware/efi/systab"; + if ((efi_systab = fopen(filename, "r")) != NULL) + goto check_symbol; + + /* Linux up to 2.6.6: /proc/efi/systab */ + filename = "/proc/efi/systab"; + if ((efi_systab = fopen(filename, "r")) != NULL) + goto check_symbol; + + /* Failed to open EFI interfaces */ + return ret; + +check_symbol: + while ((fgets(linebuf, sizeof(linebuf) - 1, efi_systab)) != NULL) { + char *addrp = strchr(linebuf, '='); + *(addrp++) = '\0'; + + if (strcmp(linebuf, "SMBIOS") == 0) { + *address = strtoul(addrp, NULL, 0); + ret = 1; + break; + } + } + + if (fclose(efi_systab) != 0) + perror(filename); + + if (!ret) + Eprintf("%s: SMBIOS entry point missing", filename); + + if (verbose) + printf("%s: SMBIOS entry point at 0x%08lx\n", filename, + (unsigned long)*address); + return ret; +} + int opendmi(void) { struct anchor *a, *abase; @@ -146,6 +192,8 @@ unsigned corr; int err = -1; const int segsize = 0x10000; + size_t entry_point_addr = 0; + size_t length = 0; if (entries) return 0; @@ -156,9 +204,40 @@ return -1; } - abase = mmap(NULL, segsize-1, PROT_READ, MAP_SHARED, memfd, 0xf0000); + /* + * On EFI-based systems, the SMBIOS Entry Point structure can be + * located by looking in the EFI Configuration Table. + */ + if (get_efi_base_addr(&entry_point_addr)) { + size_t addr_start = round_down(entry_point_addr, pagesize); + size_t addr_end = round_up(entry_point_addr + 0x20, pagesize); + length = addr_end - addr_start; + + /* mmap() the address of SMBIOS structure table entry point. */ + abase = mmap(NULL, length, PROT_READ, MAP_SHARED, memfd, + addr_start); + if (abase == (struct anchor *)-1) { + Eprintf("Cannot mmap 0x%lx for efi mode: %s", + (unsigned long)entry_point_addr, + strerror(errno)); + goto legacy; + } + a = (struct anchor*)((char*)abase + (entry_point_addr - addr_start)); + goto fill_entries; + } + +legacy: + /* + * On non-EFI systems, the SMBIOS Entry Point structure can be located + * by searching for the anchor-string on paragraph (16-byte) boundaries + * within the physical memory address range 000F0000h to 000FFFFFh + */ + length = segsize - 1; + abase = mmap(NULL, length, PROT_READ, MAP_SHARED, memfd, 0xf0000); + if (abase == (struct anchor *)-1) { - Eprintf("Cannot mmap 0xf0000: %s", strerror(errno)); + Eprintf("Cannot mmap 0xf0000 for legacy mode: %s", + strerror(errno)); goto out; } @@ -175,11 +254,13 @@ a = p; +fill_entries: if (verbose) printf("DMI tables at %x, %u bytes, %u entries\n", a->table, a->length, a->numentries); corr = a->table - round_down(a->table, pagesize); - entrieslen = round_up(a->length + pagesize, pagesize); + entrieslen = round_up(a->table + a->length, pagesize) - + round_down(a->table, pagesize); entries = mmap(NULL, entrieslen, PROT_READ, MAP_SHARED, memfd, round_down(a->table, pagesize)); @@ -195,7 +276,7 @@ err = 0; out_mmap: - munmap(abase, 0xffff); + munmap(abase, length); out: close(memfd); return err; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/input/xen new/mcelog-1.0pre3.6e4e2a000124/input/xen --- old/mcelog-1.0pre3.6363f5b719e9/input/xen 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/input/xen 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,3 @@ +(XEN) MCE: The hardware reports a non fatal, correctable incident occurred on CPU 1. +(XEN) Bank 2: d400008000040150 at 182c480179cf0 + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/mcelog.c new/mcelog-1.0pre3.6e4e2a000124/mcelog.c --- old/mcelog-1.0pre3.6363f5b719e9/mcelog.c 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/mcelog.c 2011-08-18 00:01:26.000000000 +0200 @@ -37,6 +37,7 @@ #include <assert.h> #include <signal.h> #include <pwd.h> +#include <fnmatch.h> #include "mcelog.h" #include "paths.h" #include "k8.h" @@ -80,14 +81,14 @@ static char logfile_default[] = LOG_FILE; static char *pidfile = pidfile_default; static char *logfile; +static int debug_numerrors; static void check_cpu(void); static void disclaimer(void) { - Wprintf("HARDWARE ERROR. This is *NOT* a software problem!\n"); - Wprintf("Please contact your hardware vendor\n"); + Wprintf("Hardware event. This is not a software error.\n"); } static char *extended_bankname(unsigned bank) @@ -192,6 +193,24 @@ *model += c.c.ext_model << 4; } +static u32 unparse_cpuid(unsigned family, unsigned model) +{ + union { + struct cpuid1 c; + u32 v; + } c; + + c.c.family = family; + if (family >= 0xf) { + c.c.family = 0xf; + c.c.ext_family = family - 0xf; + } + c.c.model = model & 0xf; + if (family == 6 || family == 0xf) + c.c.ext_model = model >> 4; + return c.v; +} + static char *cputype_name[] = { [CPU_GENERIC] = "generic CPU", [CPU_P6OLD] = "Intel PPro/P2/P3/old Xeon", @@ -261,19 +280,35 @@ exit(1); } +static char *vendor[] = { + [0] = "Intel", + [1] = "Cyrix", + [2] = "AMD", + [3] = "UMC", + [4] = "vendor 4", + [5] = "Centaur", + [6] = "vendor 6", + [7] = "Transmeta", + [8] = "NSC" +}; + +static unsigned cpuvendor_to_num(char *name) +{ + unsigned i; + unsigned v; + char *end; + + v = strtoul(name, &end, 0); + if (end > name) + return v; + for (i = 0; i < NELE(vendor); i++) + if (!strcmp(name, vendor[i])) + return i; + return 0; +} + static char *cpuvendor_name(u32 cpuvendor) { - static char *vendor[] = { - [0] = "Intel", - [1] = "Cyrix", - [2] = "AMD", - [3] = "UMC", - [4] = "vendor 4", - [5] = "Centaur", - [6] = "vendor 6", - [7] = "Transmeta", - [8] = "NSC" - }; return (cpuvendor < NELE(vendor)) ? vendor[cpuvendor] : "Unknown vendor"; } @@ -318,7 +353,7 @@ static void mce_prepare(struct mce *m) { mce_cpuid(m); - if (m->time) + if (!m->time) m->time = time(NULL); } @@ -409,10 +444,6 @@ CPRINT("SOCKETID %u", socketid); CPRINT("APICID %u", apicid); CPRINT("MCGCAP %#llx", mcgcap); - if (m->aux0) - CPRINT("AUX0 %#llx", aux0); - if (m->aux1) - CPRINT("AUX1 %#llx", aux1); #undef CPRINT Wprintf("\n"); } @@ -491,8 +522,21 @@ return s; } +static char *skip_syslog(char *s) +{ + char *p; + + /* Handle syslog output */ + p = strstr(s, "mcelog: "); + if (p) + return p + sizeof("mcelog: ") - 1; + return s; +} + static char *skipgunk(char *s) { + s = skip_syslog(s); + s = skipspace(s); if (*s == '<') { s += strcspn(s, ">"); @@ -508,10 +552,42 @@ return skipspace(s); } +static inline int urange(unsigned val, unsigned lo, unsigned hi) +{ + return val >= lo && val <= hi; +} + +static int is_short(char *name) +{ + return strlen(name) == 3 && + isupper(name[0]) && + islower(name[1]) && + islower(name[2]); +} + +static unsigned skip_date(char *s) +{ + unsigned day, hour, min, year, sec; + char dayname[11]; + char month[11]; + unsigned next; + + if (sscanf(s, "%10s %10s %u %u:%u:%u %u%n", + dayname, month, &day, &hour, &min, &sec, &year, &next) != 7) + return 0; + if (!is_short(dayname) || !is_short(month) || !urange(day, 1, 31) || + !urange(hour, 0, 24) || !urange(min, 0, 59) || !urange(sec, 0, 59) || + year < 1900) + return 0; + return next; +} + static void dump_mce_final(struct mce *m, char *symbol, int missing, int recordlen, int dseen) { m->finished = 1; + if (m->cpuid) + mce_cpuid(m); if (!dump_raw_ascii) { if (!dseen) disclaimer(); @@ -525,6 +601,23 @@ flushlog(); } +static char *skip_patterns[] = { + "MCA:*", + "MCi_MISC register valid*", + "MC? status*", + "Unsupported new Family*", + "Kernel does not support page offline interface", + NULL +}; + +static int match_patterns(char *s, char **pat) +{ + for (; *pat; pat++) + if (!fnmatch(*pat, s, 0)) + return 0; + return 1; +} + #define FIELD(f) \ if (recordlen < endof_field(struct mce, f)) \ recordlen = endof_field(struct mce, f) @@ -560,12 +653,14 @@ symbol[0] = '\0'; while (next > 0 || getdelim(&line, &linelen, '\n', inf) > 0) { int n = 0; + char *start; s = next > 0 ? s + next : line; s = skipgunk(s); + start = s; next = 0; - if (!strncmp(s, "CPU", 3)) { + if (!strncmp(s, "CPU ", 4)) { unsigned cpu = 0, bank = 0; n = sscanf(s, "CPU %u: Machine Check Exception: %16Lx Bank %d: %016Lx%n", @@ -663,6 +758,8 @@ missing++; else FIELD(time); + + next += skip_date(s + next); } else if (!strncmp(s, "MCGCAP", 6)) { if ((n = sscanf(s, "MCGCAP %llx%n", &m.mcgcap, &next)) != 1) @@ -682,26 +779,50 @@ else FIELD(socketid); } - else if (!strncmp(s, "AUX0", 4)) { - if ((n = sscanf(s, "AUX0 %llx%n", &m.aux0, &next)) != 1) - missing++; - else - FIELD(aux0); - } - else if (!strncmp(s, "AUX1", 4)) { - if ((n = sscanf(s, "AUX1 %llx%n", &m.aux1, &next)) != 1) + else if (!strncmp(s, "CPUID", 5)) { + unsigned fam, mod; + char vendor[31]; + + if ((n = sscanf(s, "CPUID Vendor %30s Family %u Model %u\n", + vendor, &fam, &mod)) < 3) missing++; - else - FIELD(aux1); - } + else { + m.cpuvendor = cpuvendor_to_num(vendor); + m.cpuid = unparse_cpuid(fam, mod); + FIELD(cpuid); + FIELD(cpuvendor); + } + } else if (strstr(s, "HARDWARE ERROR")) disclaimer_seen = 1; + else if (!strncmp(s, "(XEN)", 5)) { + char *w; + unsigned bank, cpu; + + if (strstr(s, "The hardware reports a non fatal, correctable incident occurred")) { + w = strstr(s, "CPU"); + if (w && sscanf(w, "CPU %d", &cpu)) { + m.cpu = cpu; + FIELD(cpu); + } + } else if ((n = sscanf(s, "(XEN) Bank %d: %llx at %llx", + &bank, &m.status, &m.addr) >= 1)) { + m.bank = bank; + FIELD(bank); + if (n >= 2) + FIELD(status); + if (n >= 3) + FIELD(addr); + } + } + else if (!match_patterns(s, skip_patterns)) + n = 0; else { s = skipspace(s); if (*s && data) dump_mce_final(&m, symbol, missing, recordlen, disclaimer_seen); if (!dump_raw_ascii) - Wprintf("%s", line); + Wprintf("%s", start); if (*s && data) goto restart; } @@ -777,6 +898,7 @@ "--cpumhz MHZ Set CPU Mhz to decode time (output unreliable, not needed on new kernels)\n" "--raw (with --ascii) Dump in raw ASCII format for machine processing\n" "--daemon Run in background waiting for events (needs newer kernel)\n" +"--ignorenodev Exit silently when the device cannot be opened\n" "--file filename With --ascii read machine check log from filename instead of stdin\n" "--syslog Log decoded machine checks in syslog (default stdout or syslog for daemon)\n" "--syslog-error Log decoded machine checks in syslog with error level\n" @@ -823,6 +945,7 @@ O_FOREGROUND, O_NUMERRORS, O_PIDFILE, + O_DEBUG_NUMERRORS, }; static struct option options[] = { @@ -854,6 +977,7 @@ { "client", 0, NULL, O_CLIENT }, { "num-errors", 1, NULL, O_NUMERRORS }, { "pidfile", 1, NULL, O_PIDFILE }, + { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ DISKDB_OPTIONS {} }; @@ -955,6 +1079,9 @@ case O_CONFIG_FILE: /* parsed in config.c */ break; + case O_DEBUG_NUMERRORS: + debug_numerrors = 1; + break; case 0: break; default: @@ -1038,8 +1165,10 @@ } len = read(fd, buf, recordlen * loglen); - if (len < 0) - err("read"); + if (len < 0) { + SYSERRprintf("mcelog read"); + return; + } for (i = 0; (i < len / (int)recordlen) && !finish; i++) { struct mce *mce = (struct mce *)(buf + i*recordlen); @@ -1057,6 +1186,9 @@ flushlog(); } + if (debug_numerrors && numerrors <= 0) + finish = 1; + if (recordlen > sizeof(struct mce)) { Eprintf("warning: %lu bytes ignored in each record\n", (unsigned long)recordlen - sizeof(struct mce)); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/mcelog.conf new/mcelog-1.0pre3.6e4e2a000124/mcelog.conf --- old/mcelog-1.0pre3.6363f5b719e9/mcelog.conf 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/mcelog.conf 2011-08-18 00:01:26.000000000 +0200 @@ -117,7 +117,7 @@ [socket] # Memory error accounting per socket -socket-tracing-enabled = yes +socket-tracking-enabled = yes # Threshold and trigger for uncorrected memory errors on a socket # mem-uc-error-trigger = socket-memory-error-trigger mem-uc-error-threshold = 100 / 24h diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/mcelog.h new/mcelog-1.0pre3.6e4e2a000124/mcelog.h --- old/mcelog-1.0pre3.6363f5b719e9/mcelog.h 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/mcelog.h 2011-08-18 00:01:26.000000000 +0200 @@ -31,8 +31,6 @@ __u32 socketid; /* CPU socket ID */ __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ - __u64 aux0; - __u64 aux1; }; #define X86_VENDOR_INTEL 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/p4.c new/mcelog-1.0pre3.6e4e2a000124/p4.c --- old/mcelog-1.0pre3.6363f5b719e9/p4.c 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/p4.c 2011-08-18 00:01:26.000000000 +0200 @@ -257,6 +257,8 @@ if (status & MCI_STATUS_UC) Wprintf("Uncorrected error\n"); + else + Wprintf("Corrected error\n"); if (status & MCI_STATUS_EN) Wprintf("Error enabled\n"); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/sysfs.c new/mcelog-1.0pre3.6e4e2a000124/sysfs.c --- old/mcelog-1.0pre3.6363f5b719e9/sysfs.c 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/sysfs.c 2011-08-18 00:01:26.000000000 +0200 @@ -20,6 +20,7 @@ #include <unistd.h> #include <string.h> #include <sys/fcntl.h> +#include <sys/stat.h> #include <stdarg.h> #include <errno.h> #include "mcelog.h" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/Makefile new/mcelog-1.0pre3.6e4e2a000124/tests/Makefile --- old/mcelog-1.0pre3.6363f5b719e9/tests/Makefile 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/Makefile 2011-08-18 00:01:26.000000000 +0200 @@ -7,6 +7,7 @@ ./test page "${DEBUG}" ./test memdb "${DEBUG}" ./test socket "${DEBUG}" + ./test pfa "${DEBUG}" clean: rm -f */*log diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/PFA_test_howto new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/PFA_test_howto --- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/PFA_test_howto 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/PFA_test_howto 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,213 @@ +This README file describes the steps of testing PFA (Predictive Failure +Analysis) functionality of mcelog under Linux which is facilitated by using +mce-inject. + +PFA is a RAS Feature. PFA capable system can monitor corrected hardware errors +and take corrective action in advance before uncorrected error happen. For +example, PFA should offline a memory page if more than 10 errors per hour on a +memory page are found. It mostly focuses on memory errors. + +0. Preparation work +******************************* +- Install the Linux kernel with full MCE injection support + + Make sure following configuration options are enabled: + + CONFIG_X86_MCE=y + CONFIG_X86_MCE_INTEL=y + CONFIG_X86_MCE_INJECT=y or CONFIG_X86_MCE_INJECT=m + +- Build mcelog and install in /usr/bin (or rather first in your $PATH) + + # cd $HOME/mcelog + # make + # make install + +- Get mce-inject git version from + git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git + and install in /usr/bin (or rather first in your $PATH) + + # cd $HOME + # git clone git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git + # cd mce-inject + # make + # make install + +- Install page-types tool, which is accompanied with Linux kernel source + (2.6.32 or newer). + + # cd $KERNEL_SRC/Documentation/vm/ + # gcc -o page-types page-types.c + # cp page-types /usr/bin/ + + + +1. Start PFA test +******************************* + +The PFA test cases in mcelog are in the following directories: + +- mcelog/tests/pfa #page level pfa test cases + +You can run all PFA test cases simply just by typing: + + # cd mcelog/tests + # ./test pfa + +all the test cases in the specified subdirectory will be ran and the test +results will be saved in files: + + mcelog/tests/pfa/results + +When you examine the content of the file, you will find such results: + +- if one case passed: + + "*.conf: triggers trigger as expected" + +- if one case failed + + "*.conf: triggers did not trigger as expected: $expected_num != + $actual_got_num" + +you can refer to the "*.log" file in the specific subdirectory for the log saved +by mcelog. + + +2. Modify or add new test cases +******************************* + +If you want to modify the existing test cases or add your own case, the +following description will have a more detailed look which might help: + +- To add or run a page level PFA test, you need first get a configure file in + + mcelog/tests/pfa/ + + directory defining mainly the threshold and trigger actions you want, then the + number of trigger events you expect to happen. + + +- A typical configure file is as following: + + mcelog/tests/pfa/page-account.conf + ---------------------------------------------------------- + # trigger: 5 + # num-errors = 3 + + [page] + memory-ce-threshold = 2 / 1h + memory-ce-trigger = ../trigger + #memory-ce-action = off|account|soft|hard|soft-then-hard + memory-ce-action = account + + [trigger] + directory = . + ------------------------------------------------------------ + + - “# trigger: 5” + + Specify the count number of triggers you expect to get based + on the threshold defined in "memory-ce-threshold" described below. + + mcelog/tests/test harness in the end will compare this count number with + the number of actual trigger events got from the log to verify the test + results. + + please note the "#" is needed for mcelog/tests/test harness to read here. + + + - "# num-errors = 3" + + "num-errors" is a mcelog configure option. if uncomment, it is used by + mcelog to stop processing the stored machine check records in mcelog + buffer read from /dev/mcelog and return(for debug purpose) when the + number is reached even there might be: + - still some unprocessed records left in the buffer which + will be ignored + - or there are not enough records in /dev/mcelog the program + will not return. + + if not set as in this example, mcelog will return until finish processing + all the records. + + When you are not sure what should be the correct num-errors number, it is + not recommended to set this option. + + + - "memory-ce-threshold = 2 / 1h" + + Define the threshold for memory corrected errors per page. Here means + if there are 2 corrected errors detected in one page within 1 hour, + the trigger defined in “memory-ce-trigger” described below will be called. + + + - "memory-ce-trigger = ../trigger" + + Specify the trigger you want when exceeding the threshold. + Here mcelog/tests/trigger will be called which simply print some text for + testing. + + + - "memory-ce-action = account" + + specify the internal action in mcelog to exceeding a memory corrected error + threshold. + + This is done in addition to executing the trigger script if available. + - off: No action + - account: only account errors + - soft: try to soft-offline page without killing any processes + This requires an update kernel. Might not be successful + - hard: try to hard-offline page without killing any processes + This requires an update kernel. Might not be successful + - soft-then-hard: First try to soft offline, then try hard offlining + + The offline action is based on the sysfs_wirte action of: + /sys/devices/system/memory/soft_offline_page + or + /sys/devices/system/memory/hard_offline_page + + Please note that offlining does not work for all pages, but only for pages + in the Linux page cache or free pages. And if offline action(soft, hard, or + soft-then-hard)are chosen in "memory-ce-action", there will trigger only + once for each page,no matter the offline action taken was successful or + failed. + + +3. Influencing factors of the trigger results +******************************* + +The correct expectation of triggers depends on 4 factors: + +- The count number of trigger expectation defined in "pfa/*.conf" file + + As described above, in our example the trigger expectation are defined to be 5 + times which means the "mcelog/tests/pfa/inject" script will randomly chosen 5 + free pages to inject in turn and do the MCE injection on each page for + $memory-ce-threshold times. + +- The threshold defined in “memory-ce-trigger” of "pfa/*.conf" file + + As described above, for “memory-ce-threshold = 2 / 1h” in our example, + "mcelog/tests/pfa/inject" script will do the MCE injection + 2 times continuously for each chosen page to make the trigger happen. + +- The “memory-ce-action” defined in "pfa/*.conf" file + + As described above. if the “memory-ce-action” is soft/hard/soft-then-hard, + no matter offlining action succeed or not, triggers_per_page calculation will + changed to be: + + triggers_per_page = INT(injections_per_page / memory-ce-threshold) >= 1? 1:0 + +- The actual number of records read out if "num-errors" defined in "pfa/*.conf" + file + + As described above. mcelog will just read out $num-errors records, that means: + + readout_total_injections = MIN(num-errors, injection_per_page * + actual-inject_pages) + + this might affect the trigger counts for some last injected pages since not + all the machine check records from /dev/mcelog are processed and counted. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/inject new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/inject --- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/inject 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/inject 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,79 @@ +#!/bin/sh +PATH=$PATH:$(pwd)/../../../mce-inject + +page_type="slab buddy mmap anonymous nopage huge" + +function get_free_page() +{ + local rand=0 + cnt=`page-types -Nl -b $1 | tee page_$1 | wc -l` + if [ $cnt -gt 1 ]; then + rand=$(expr $RANDOM % $cnt + 1) + if [ ${rand} -eq 1 ]; then + # skip the title line of output + ((rand++)) + fi + page=`awk -v line=${rand} 'NR == line {print $1}' page_$1` + echo 0x${page} + else + echo 0 + fi + rm -f page_$1 +} + +if [ "$1" = "" ]; then + echo "usage $0 conf_file" + exit 1 +fi + +if [ ! -f $1 ]; then + echo "configure file not exists: $1" + exit 1 +fi + +which page-types > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "please install page-types tool first" + exit 1 +fi + +echo "+++ start the injection for $1 +++" + +NUMT="$(awk '/# trigger: / { print $3}' $1)" +THRESHOLD="$(awk '/memory-ce-threshold = / { print $3}' $1)" + +if [ "$NUMT" -eq 0 ]; then + echo "No injection will be done!" + exit 0 +fi + +if [ "$THRESHOLD" -eq 0 ]; then + echo "Threshold should not be 0!" + exit 1 +fi + +trigger_cnt=0 +while [ "$trigger_cnt" -lt "$NUMT" ]; do + for i in ${page_type}; do + P=$(get_free_page $i) + if [ "$P" = "0" ]; then + continue + fi + if [ "$trigger_cnt" -ge "$NUMT" ]; then + exit 0; + fi + inject_cnt=0 + while [ "$inject_cnt" -lt "$THRESHOLD" ]; do + echo "inject for page type $i at physical address ${P}000 [ NO. $inject_cnt ]" + ../../input/GENPAGE $P | mce-inject + inject_cnt=$(($inject_cnt+1)) + done + if [ "$inject_cnt" -eq "$THRESHOLD" ]; then + trigger_cnt=$(($trigger_cnt+1)) + fi + done + if [ "$trigger_cnt" -eq 0 ]; then + echo "None available free pages found!" + exit 1 + fi +done diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-account.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-account.conf --- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-account.conf 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-account.conf 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,12 @@ +# trigger: 1 +#num-errors = 3 + +[page] +memory-ce-threshold = 2 / 1h +memory-ce-trigger = ../trigger +#memory-ce-action = off|account|soft|hard|soft-then-hard +memory-ce-action = account + +[trigger] +directory = . + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-hard.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-hard.conf --- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-hard.conf 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-hard.conf 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,12 @@ +# trigger: 1 +#num-errors = 3 + +[page] +memory-ce-threshold = 2 / 1h +memory-ce-trigger = ../trigger +#memory-ce-action = off|account|soft|hard|soft-then-hard +memory-ce-action = hard + +[trigger] +directory = . + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft-then-hard.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft-then-hard.conf --- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft-then-hard.conf 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft-then-hard.conf 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,12 @@ +# trigger: 1 +#num-errors = 3 + +[page] +memory-ce-threshold = 1 / 1h +memory-ce-trigger = ../trigger +#memory-ce-action = off|account|soft|hard|soft-then-hard +memory-ce-action = soft-then-hard + +[trigger] +directory = . + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft.conf new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft.conf --- old/mcelog-1.0pre3.6363f5b719e9/tests/pfa/page-soft.conf 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/pfa/page-soft.conf 2011-08-18 00:01:26.000000000 +0200 @@ -0,0 +1,12 @@ +# trigger: 1 +#num-errors = 3 + +[page] +memory-ce-threshold = 2 / 1h +memory-ce-trigger = ../trigger +#memory-ce-action = off|account|soft|hard|soft-then-hard +memory-ce-action = soft + +[trigger] +directory = . + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/tests/test new/mcelog-1.0pre3.6e4e2a000124/tests/test --- old/mcelog-1.0pre3.6363f5b719e9/tests/test 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/tests/test 2011-08-18 00:01:26.000000000 +0200 @@ -42,8 +42,8 @@ for conf in `ls *.conf` do log=`echo $conf | sed "s/conf/log/g"` - ./inject - $D ../../mcelog --foreground --daemon --config $conf --logfile $log + ./inject $conf + $D ../../mcelog --foreground --daemon --debug-numerrors --config $conf --logfile $log >> result # let triggers finish sleep 1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/triggers/cache-error-trigger new/mcelog-1.0pre3.6e4e2a000124/triggers/cache-error-trigger --- old/mcelog-1.0pre3.6363f5b719e9/triggers/cache-error-trigger 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/triggers/cache-error-trigger 2011-08-18 00:01:26.000000000 +0200 @@ -19,7 +19,7 @@ # EXIT=0 -for i in $CPUS_AFFECTED ; do +for i in $AFFECTED_CPUS ; do if [ $i = 0 ] ; then logger -s -p daemon.warn -t mcelog "Not offlining CPU 0" EXIT=1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0pre3.6363f5b719e9/xeon75xx.c new/mcelog-1.0pre3.6e4e2a000124/xeon75xx.c --- old/mcelog-1.0pre3.6363f5b719e9/xeon75xx.c 2010-10-17 20:33:36.000000000 +0200 +++ new/mcelog-1.0pre3.6e4e2a000124/xeon75xx.c 2011-08-18 00:01:26.000000000 +0200 @@ -25,142 +25,15 @@ #include "mcelog.h" #include "xeon75xx.h" -/* DIMM description */ -struct aux_pfa_dimm { - u8 fbd_channel_id; - u8 ddr_channel_id; - u8 ddr_dimm_id; - u8 ddr_rank_id; - u8 ddr_dimm_bank_id; - u8 ddr_dimm_row_id; - u8 ddr_dimm_column_id; - u8 valid; -} __attribute__((packed)); - -enum { - MCE_BANK_MBOX0 = 8, - MCE_BANK_MBOX1 = 9, - - DIMM_VALID_FBD_CHAN = (1 << 0), - DIMM_VALID_DDR_CHAN = (1 << 1), - DIMM_VALID_DDR_DIMM = (1 << 2), - DIMM_VALID_DDR_RANK = (1 << 3), - DIMM_VALID_DIMM_BANK = (1 << 4), - DIMM_VALID_DIMM_ROW = (1 << 5), - DIMM_VALID_DIMM_COLUMN = (1 << 6), - DIMM_VALID_ALL = 0x7f, -}; - -static struct id { - char *name; - unsigned offset; - unsigned valid; - enum { - NL = 1<<0, - IND = 1<<1, - } flags; -} ids[] = { -#define V(n,f,b) n, offsetof(struct aux_pfa_dimm, f), b - { V("FBD-Channel", fbd_channel_id, DIMM_VALID_FBD_CHAN) }, - { V("DDR-Channel", ddr_channel_id, DIMM_VALID_DDR_CHAN) }, - { V("DDR-DIMM", ddr_dimm_id, DIMM_VALID_DDR_DIMM) }, - { V("DDR-Rank", ddr_rank_id, DIMM_VALID_DDR_RANK) }, - { V("DIMM-Bank", ddr_dimm_bank_id, DIMM_VALID_DIMM_BANK), NL|IND }, - { V("DIMM-Row", ddr_dimm_row_id, DIMM_VALID_DIMM_ROW) }, - { V("DIMM-Column", ddr_dimm_column_id, DIMM_VALID_DIMM_COLUMN), NL }, -#undef V - {} -}; - -#if 0 -/* Use for memdb channel output */ - -static int opt_number(char *buf, int val) -{ - if (val == (u8)-1) { - *buf++ = '?'; - return 1; - } - return sprintf(buf, "%u", val); -} - -static void print_channel(int channel, char *buf) -{ - int n; - n = opt_number(buf, ((channel) >> 8) & 0xff); - buf[n++] = ':'; - opt_number(buf + n, channel & 0xff); -} -#endif - -static void decode_dimm(struct aux_pfa_dimm *d, int *channel, int *dimm) -{ - if (d->valid == 0) - return; - if (d->valid & DIMM_VALID_DDR_DIMM) - *dimm = d->ddr_dimm_id; - if (d->valid & (DIMM_VALID_DDR_CHAN|DIMM_VALID_FBD_CHAN)) { - int fbd_chan = (d->valid & DIMM_VALID_FBD_CHAN) ? - d->fbd_channel_id : (u8)-1; - int ddr_chan = (d->valid & DIMM_VALID_DDR_CHAN) ? - d->ddr_channel_id : (u8)-1; - *channel = (fbd_chan << 8) | ddr_chan; - } -} - -static void print_dimm(int num, struct aux_pfa_dimm *d) -{ - struct id *id; - int indent; - int k; - - if (d->valid == 0) - return; - - k = indent = Wprintf("DIMM %d: ", num); - for (id = ids; id->name; id++) { - if (d->valid & id->valid) - k += Wprintf("%s %u ", id->name, *((u8*)d + id->offset)); - if (k > 0) { - if (id->flags & NL) { - Wprintf("\n"); - k = 0; - } - if (id->flags & IND) - Wprintf("%.*s", indent, ""); - } - } -} - -static int is_mem_err(struct mce *m, unsigned msize) -{ - if (msize < offsetof(struct mce, aux1) + sizeof(u64)) - return 0; - if (m->bank != MCE_BANK_MBOX0 && m->bank != MCE_BANK_MBOX1) - return 0; - return 1; -} - -union d { - struct aux_pfa_dimm d; - u64 val; -}; +/* This used to decode the old xeon 75xx memory error aux format. But that has never + been merged into mainline kernels, so removed it again. */ void xeon75xx_memory_error(struct mce *m, unsigned msize, int *channel, int *dimm) { - if (!is_mem_err(m, msize)) - return; - - decode_dimm(&((union d *)&m->aux0)->d, &channel[0], &dimm[0]); - decode_dimm(&((union d *)&m->aux1)->d, &channel[1], &dimm[1]); } void xeon75xx_decode_dimm(struct mce *m, unsigned msize) { - if (!is_mem_err(m, msize)) - return; - print_dimm(0, &((union d *)&m->aux0)->d); - print_dimm(1, &((union d *)&m->aux1)->d); } ++++++ mcelog_invert_prefill_db_warning.patch ++++++ --- memdb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) Index: mcelog-1.0.2011.06.08/memdb.c =================================================================== --- mcelog-1.0.2011.06.08.orig/memdb.c +++ mcelog-1.0.2011.06.08/memdb.c @@ -417,11 +417,11 @@ void prefill_memdb(void) md->location = xstrdup(bl); md->name = xstrdup(dmi_getstring(&d->header, d->device_locator)); } - if (missed) { - static int warned; - if (!warned) { - Eprintf("failed to prefill DIMM database from DMI data"); - warned = 1; + if (!missed) { + static int db_rill_msg; + if (!db_rill_msg) { + Gprintf("Prefilled DIMM database from DMI data"); + db_rill_msg = 1; } } } ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Remember to have fun... -- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
