Hello community, here is the log from the commit of package mcelog for openSUSE:Factory checked in at 2015-01-25 21:13:59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/mcelog (Old) and /work/SRC/openSUSE:Factory/.mcelog.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "mcelog" Changes: -------- --- /work/SRC/openSUSE:Factory/mcelog/mcelog.changes 2014-11-24 11:14:30.000000000 +0100 +++ /work/SRC/openSUSE:Factory/.mcelog.new/mcelog.changes 2015-01-25 21:14:02.000000000 +0100 @@ -1,0 +2,10 @@ +Fri Jan 23 11:04:40 UTC 2015 - [email protected] + +- Update to version 1.0.8 +- Remove patch which got integrated mainline: + 0001-Continue-without-dmi-when-no-SMBIOS-or-SMBIOS-0x0-in.patch +- Fix possible security issue, build service complained about: + missing-call-to-setgroups-before-setuid + Add fix_setgroups_missing_call.patch + +------------------------------------------------------------------- Old: ---- 0001-Continue-without-dmi-when-no-SMBIOS-or-SMBIOS-0x0-in.patch mcelog-1.0.1.tar.bz2 New: ---- fix_setgroups_missing_call.patch mcelog-1.0.8.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ mcelog.spec ++++++ --- /var/tmp/diff_new_pack.lzRXQz/_old 2015-01-25 21:14:04.000000000 +0100 +++ /var/tmp/diff_new_pack.lzRXQz/_new 2015-01-25 21:14:04.000000000 +0100 @@ -1,7 +1,7 @@ # # spec file for package mcelog # -# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2015 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -20,7 +20,7 @@ Summary: Log Machine Check Events License: GPL-2.0 Group: System/Monitoring -Version: 1.0.1 +Version: 1.0.8 Release: 0 ExclusiveArch: %{ix86} x86_64 BuildRequires: libesmtp-devel @@ -43,8 +43,8 @@ Patch8: patches/add-f14h-support.patch Patch9: patches/add-f15h-support.patch Patch10: patches/add-f16h-support.patch -Patch11: 0001-Continue-without-dmi-when-no-SMBIOS-or-SMBIOS-0x0-in.patch -Patch12: mcelog-socket-path.patch +Patch11: mcelog-socket-path.patch +Patch12: fix_setgroups_missing_call.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build PreReq: %fillup_prereq Url: https://git.kernel.org/cgit/utils/cpu/mce/mcelog.git ++++++ email.patch ++++++ --- /var/tmp/diff_new_pack.lzRXQz/_old 2015-01-25 21:14:04.000000000 +0100 +++ /var/tmp/diff_new_pack.lzRXQz/_new 2015-01-25 21:14:04.000000000 +0100 @@ -7,18 +7,18 @@ msg.c | 8 ++ 6 files changed, 343 insertions(+), 2 deletions(-) -Index: mcelog-1.0.1/Makefile +Index: mcelog-1.0.8/Makefile =================================================================== ---- mcelog-1.0.1.orig/Makefile -+++ mcelog-1.0.1/Makefile +--- mcelog-1.0.8.orig/Makefile 2014-12-20 19:35:05.000000000 +0100 ++++ mcelog-1.0.8/Makefile 2015-01-22 14:55:13.323708502 +0100 @@ -1,3 +1,4 @@ +CONFIG_EMAIL := 1 CFLAGS := -g -Os prefix := /usr etcprefix := -@@ -34,7 +35,8 @@ OBJ := p4.o k8.o mcelog.o dmi.o tsc.o co +@@ -37,7 +38,8 @@ client.o cache.o sysfs.o yellow.o page.o rbtree.o \ - xeon75xx.o sandy-bridge.o ivy-bridge.o msr.o + xeon75xx.o sandy-bridge.o ivy-bridge.o haswell.o msr.o bus.o unknown.o DISKDB_OBJ := diskdb.o dimm.o db.o -CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o ${DISKDB_OBJ} +EMAIL_OBJ := email.o @@ -26,7 +26,7 @@ DOC := mce.pdf ADD_DEFINES := -@@ -46,6 +48,12 @@ OBJ += ${DISKDB_OBJ} +@@ -49,6 +51,12 @@ all: dbquery endif @@ -39,10 +39,10 @@ SRC := $(OBJ:.o=.c) mcelog: ${OBJ} -Index: mcelog-1.0.1/email.c +Index: mcelog-1.0.8/email.c =================================================================== ---- /dev/null -+++ mcelog-1.0.1/email.c +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ mcelog-1.0.8/email.c 2015-01-22 14:55:13.327708502 +0100 @@ -0,0 +1,199 @@ +#include <unistd.h> +#include <signal.h> @@ -243,10 +243,10 @@ + smtp_destroy_session (session); + return 0; +} -Index: mcelog-1.0.1/email.h +Index: mcelog-1.0.8/email.h =================================================================== ---- /dev/null -+++ mcelog-1.0.1/email.h +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ mcelog-1.0.8/email.h 2015-01-22 14:55:13.331708502 +0100 @@ -0,0 +1,34 @@ +#ifndef _MCELOG_EMAIL_H_ +#define _MCELOG_EMAIL_H_ @@ -282,10 +282,10 @@ +#endif + +#endif -Index: mcelog-1.0.1/mcelog.c +Index: mcelog-1.0.8/mcelog.c =================================================================== ---- mcelog-1.0.1.orig/mcelog.c -+++ mcelog-1.0.1/mcelog.c +--- mcelog-1.0.8.orig/mcelog.c 2014-12-20 19:35:05.000000000 +0100 ++++ mcelog-1.0.8/mcelog.c 2015-01-22 14:55:13.335708502 +0100 @@ -37,6 +37,7 @@ #include <assert.h> #include <signal.h> @@ -294,9 +294,9 @@ #include <fnmatch.h> #include "mcelog.h" #include "paths.h" -@@ -59,6 +60,9 @@ - #include "yellow.h" - #include "page.h" +@@ -61,6 +62,9 @@ + #include "bus.h" + #include "unknown.h" +#include "email.h" +int email_mode; @@ -304,7 +304,7 @@ enum cputype cputype = CPU_GENERIC; char *logfn = LOG_DEV_FILENAME; -@@ -70,7 +74,7 @@ static double cpumhz; +@@ -72,7 +76,7 @@ static int cpumhz_forced; int ascii_mode; int dump_raw_ascii; @@ -313,7 +313,7 @@ static char *inputfile; char *processor_flags; static int foreground; -@@ -928,6 +932,7 @@ void usage(void) +@@ -944,6 +948,7 @@ "--pidfile file Write pid of daemon into file\n" "--no-imc-log Disable extended iMC logging\n" ); @@ -321,7 +321,7 @@ diskdb_usage(); print_cputypes(); exit(1); -@@ -995,6 +1000,7 @@ static struct option options[] = { +@@ -1011,6 +1016,7 @@ { "debug-numerrors", 0, NULL, O_DEBUG_NUMERRORS }, /* undocumented: for testing */ { "no-imc-log", 0, NULL, O_NO_IMC_LOG }, DISKDB_OPTIONS @@ -329,7 +329,7 @@ {} }; -@@ -1170,11 +1176,86 @@ static void drop_cred(void) +@@ -1188,11 +1194,86 @@ } } @@ -416,7 +416,7 @@ if (recordlen == 0) { Wprintf("no data in mce record\n"); -@@ -1201,12 +1282,16 @@ static void process(int fd, unsigned rec +@@ -1219,12 +1300,16 @@ finish = 1; if (!mce_filter(mce, recordlen)) continue; @@ -433,7 +433,7 @@ flushlog(); } -@@ -1317,6 +1402,8 @@ int main(int ac, char **av) +@@ -1335,6 +1420,8 @@ exit(0); } else if (diskdb_cmd(opt, ac, av)) { exit(0); @@ -442,7 +442,7 @@ } else if (opt == 0) break; } -@@ -1345,6 +1432,10 @@ int main(int ac, char **av) +@@ -1363,6 +1450,10 @@ logfn = av[optind++]; if (av[optind]) usage(); @@ -453,11 +453,11 @@ checkdmi(); general_setup(); -Index: mcelog-1.0.1/mcelog.h +Index: mcelog-1.0.8/mcelog.h =================================================================== ---- mcelog-1.0.1.orig/mcelog.h -+++ mcelog-1.0.1/mcelog.h -@@ -123,6 +123,7 @@ enum cputype { +--- mcelog-1.0.8.orig/mcelog.h 2014-12-20 19:35:05.000000000 +0100 ++++ mcelog-1.0.8/mcelog.h 2015-01-22 14:55:13.339708502 +0100 +@@ -125,6 +125,7 @@ enum option_ranges { O_COMMON = 500, O_DISKDB = 1000, @@ -465,10 +465,10 @@ }; enum syslog_opt { -Index: mcelog-1.0.1/msg.c +Index: mcelog-1.0.8/msg.c =================================================================== ---- mcelog-1.0.1.orig/msg.c -+++ mcelog-1.0.1/msg.c +--- mcelog-1.0.8.orig/msg.c 2014-12-20 19:35:05.000000000 +0100 ++++ mcelog-1.0.8/msg.c 2015-01-22 14:55:13.343708502 +0100 @@ -8,10 +8,13 @@ #include "mcelog.h" #include "msg.h" @@ -483,7 +483,7 @@ static char *output_fn; int need_stdout(void) -@@ -135,6 +138,11 @@ int Wprintf(char *fmt, ...) +@@ -135,6 +138,11 @@ n = vfprintf(output_fh ? output_fh : stdout, fmt, ap); va_end(ap); } ++++++ fix_setgroups_missing_call.patch ++++++ Index: mcelog-1.0.8/mcelog.c =================================================================== --- mcelog-1.0.8.orig/mcelog.c 2015-01-22 14:56:56.151710136 +0100 +++ mcelog-1.0.8/mcelog.c 2015-01-23 09:58:35.252799171 +0100 @@ -37,6 +37,7 @@ #include <assert.h> #include <signal.h> #include <pwd.h> +#include <grp.h> #include <sys/wait.h> #include <fnmatch.h> #include "mcelog.h" @@ -1185,6 +1186,14 @@ static void drop_cred(void) { + /* When dropping privileges from root, the `setgroups` call will + * remove any extraneous groups. If we don't call this, then + * even though our uid has dropped, we may still have groups + * that enable us to do super-user things. This will fail if we + * aren't root, so don't bother checking the return value, this + * is just done as an optimistic privilege dropping function. + */ + setgroups(0, NULL); if (runcred.uid != -1U && runcred.gid == -1U) { struct passwd *pw = getpwuid(runcred.uid); if (pw) ++++++ mcelog-1.0.1.tar.bz2 -> mcelog-1.0.8.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/Makefile new/mcelog-1.0.8/Makefile --- old/mcelog-1.0.1/Makefile 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/Makefile 2014-12-20 19:35:05.000000000 +0100 @@ -22,7 +22,10 @@ # CONFIG_DISKDB = 1 TRIGGERS=cache-error-trigger dimm-error-trigger page-error-trigger \ - socket-memory-error-trigger + socket-memory-error-trigger \ + bus-error-trigger \ + iomca-error-trigger \ + unknown-error-trigger all: mcelog @@ -32,7 +35,7 @@ nehalem.o dunnington.o tulsa.o config.o memutil.o msg.o \ eventloop.o leaky-bucket.o memdb.o server.o trigger.o \ client.o cache.o sysfs.o yellow.o page.o rbtree.o \ - xeon75xx.o sandy-bridge.o ivy-bridge.o msr.o + xeon75xx.o sandy-bridge.o ivy-bridge.o haswell.o msr.o bus.o unknown.o DISKDB_OBJ := diskdb.o dimm.o db.o CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o ${DISKDB_OBJ} DOC := mce.pdf diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/bus.c new/mcelog-1.0.8/bus.c --- old/mcelog-1.0.1/bus.c 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/bus.c 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,129 @@ +/* Copyright (C) 20014 Intel Corporation + Author: Rui Wang + Handle 'Bus and Interconnect' error threshold indications. + + mcelog is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; version + 2. + + mcelog is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should find a copy of v2 of the GNU General Public License somewhere + on your Linux system. */ +#define _GNU_SOURCE 1 +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include "memutil.h" +#include "mcelog.h" +#include "config.h" +#include "trigger.h" +#include "bus.h" + +static char *bus_trigger, *iomca_trigger; + +enum { + MAX_ENV = 20, +}; + +void bus_setup(void) +{ + bus_trigger = config_string("socket", "bus-uc-threshold-trigger"); + if (bus_trigger && trigger_check(bus_trigger) < 0) { + SYSERRprintf("Cannot access bus threshold trigger `%s'", + bus_trigger); + exit(1); + } + + iomca_trigger = config_string("socket", "iomca-threshold-trigger"); + if (iomca_trigger && trigger_check(iomca_trigger) < 0) { + SYSERRprintf("Cannot access iomca threshold trigger `%s'", + iomca_trigger); + exit(1); + } +} + +void run_bus_trigger(int socket, int cpu, char *level, char *pp, char *rrrr, + char *ii, char *timeout) +{ + int ei = 0; + char *env[MAX_ENV]; + int i; + char *msg; + char *location; + + if (socket >= 0) + asprintf(&location, "CPU %d on socket %d", cpu, socket); + else + asprintf(&location, "CPU %d", cpu); + asprintf(&msg, "%s received Bus and Interconnect Errors in %s", + location, ii); + asprintf(&env[ei++], "LOCATION=%s", location); + free(location); + + if (!bus_trigger) + goto out; + + if (socket >= 0) + asprintf(&env[ei++], "SOCKETID=%d", socket); + asprintf(&env[ei++], "MESSAGE=%s", msg); + asprintf(&env[ei++], "CPU=%d", cpu); + asprintf(&env[ei++], "LEVEL=%s", level); + asprintf(&env[ei++], "PARTICIPATION=%s", pp); + asprintf(&env[ei++], "REQUEST=%s", rrrr); + asprintf(&env[ei++], "ORIGIN=%s", ii); + asprintf(&env[ei++], "TIMEOUT=%s", timeout); + env[ei] = NULL; + assert(ei < MAX_ENV); + + run_trigger(bus_trigger, NULL, env); + for (i = 0; i < ei; i++) + free(env[i]); +out: + free(msg); +} + +void run_iomca_trigger(int socket, int cpu, int seg, int bus, int dev, int fn) +{ + int ei = 0; + char *env[MAX_ENV]; + int i; + char *msg; + char *location; + + if (socket >= 0) + asprintf(&location, "CPU %d on socket %d", cpu, socket); + else + asprintf(&location, "CPU %d", cpu); + asprintf(&msg, "%s received IO MCA Errors from %x:%02x:%02x.%x", + location, seg, bus, dev, fn); + asprintf(&env[ei++], "LOCATION=%s", location); + free(location); + + if (!iomca_trigger) + goto out; + + if (socket >= 0) + asprintf(&env[ei++], "SOCKETID=%d", socket); + asprintf(&env[ei++], "MESSAGE=%s", msg); + asprintf(&env[ei++], "CPU=%d", cpu); + asprintf(&env[ei++], "SEG=%x", seg); + asprintf(&env[ei++], "BUS=%02x", bus); + asprintf(&env[ei++], "DEVICE=%02x", dev); + asprintf(&env[ei++], "FUNCTION=%x", fn); + env[ei] = NULL; + assert(ei < MAX_ENV); + + run_trigger(iomca_trigger, NULL, env); + for (i = 0; i < ei; i++) + free(env[i]); +out: + free(msg); + +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/bus.h new/mcelog-1.0.8/bus.h --- old/mcelog-1.0.1/bus.h 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/bus.h 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,4 @@ +void bus_setup(void); +void run_bus_trigger(int socket, int cpu, char *level, char *pp, char *rrrr, + char *ii, char *timeout); +void run_iomca_trigger(int socket, int cpu, int seg, int bus, int dev, int fn); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/client.c new/mcelog-1.0.8/client.c --- old/mcelog-1.0.1/client.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/client.c 2014-12-20 19:35:05.000000000 +0100 @@ -29,9 +29,9 @@ { struct sockaddr_un sun; int fd; + FILE * fp; int n; char buf[1024]; - int done; char *path = config_string("server", "socket-path"); if (!path) path = SOCKET_PATH; @@ -52,14 +52,18 @@ if (write(fd, command, n) != n) SYSERRprintf("client command write"); - done = 0; - while (!done && (n = read(fd, buf, sizeof buf)) > 0) { - if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) { - n -= 5; - done = 1; + if ((fp = fdopen(fd, "r")) != NULL) { + while (fgets(buf, sizeof buf, fp)) { + n = strlen(buf); + if (n >= 5 && !memcmp(buf + n - 5, "done\n", 5)) { + fclose(fp); + return; + } + + fputs(buf, stdout); } - write(1, buf, n); + fclose(fp); } - if (n < 0) - SYSERRprintf("client read"); + + SYSERRprintf("client read"); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/dmi.c new/mcelog-1.0.8/dmi.c --- old/mcelog-1.0.1/dmi.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/dmi.c 2014-12-20 19:35:05.000000000 +0100 @@ -174,8 +174,10 @@ if (fclose(efi_systab) != 0) perror(filename); - if (!ret) - Eprintf("%s: SMBIOS entry point missing", filename); + if (!ret || !*address){ + Lprintf("No valid SMBIOS entry point: Continue without DMI decoding"); + return 0; + } if (verbose) printf("%s: SMBIOS entry point at 0x%08lx\n", filename, @@ -224,6 +226,8 @@ } a = (struct anchor*)((char*)abase + (entry_point_addr - addr_start)); goto fill_entries; + } else { + return -1; } legacy: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/haswell.c new/mcelog-1.0.8/haswell.c --- old/mcelog-1.0.1/haswell.c 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/haswell.c 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,150 @@ +/* Copyright (C) 2013 Intel Corporation + Decode Intel Ivy Bridge specific machine check errors. + + mcelog is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; version + 2. + + mcelog is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should find a copy of v2 of the GNU General Public License somewhere + on your Linux system; if not, write to the Free Software Foundation, + Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Author: Tony Luck +*/ + +#include "mcelog.h" +#include "bitfield.h" +#include "haswell.h" +#include "memdb.h" + +/* See IA32 SDM Vol3B Table 16-20 */ + +static char *pcu_1[] = { + [0x00] = "No Error", + [0x09] = "MC_MESSAGE_CHANNEL_TIMEOUT", + [0x0D] = "MC_IMC_FORCE_SR_S3_TIMEOUT", + [0x0E] = "MC_CPD_UNCPD_SD_TIMEOUT", + [0x13] = "MC_DMI_TRAINING_TIMEOUT", + [0x15] = "MC_DMI_CPU_RESET_ACK_TIMEOUT", + [0x1E] = "MC_VR_ICC_MAX_LT_FUSED_ICC_MAX", + [0x25] = "MC_SVID_COMMAN_TIMEOUT", + [0x29] = "MC_VR_VOUT_MAC_LT_FUSED_SVID", + [0x2B] = "MC_PKGC_WATCHDOG_HANG_CBZ_DOWN", + [0x2C] = "MC_PKGC_WATCHDOG_HANG_CBZ_UP", + [0x39] = "MC_PKGC_WATCHDOG_HANG_C3_UP_SF", + [0x44] = "MC_CRITICAL_VR_FAILED", + [0x45] = "MC_ICC_MAX_NOTSUPPORTED", + [0x46] = "MC_VID_RAMP_DOWN_FAILED", + [0x47] = "MC_EXCL_MODE_NO_PMREQ_CMP", + [0x48] = "MC_SVID_READ_REG_ICC_MAX_FAILED", + [0x49] = "MC_SVID_WRITE_REG_VOUT_MAX_FAILED", + [0x4B] = "MC_BOOT_VID_TIMEOUT_DRAM_0", + [0x4C] = "MC_BOOT_VID_TIMEOUT_DRAM_1", + [0x4D] = "MC_BOOT_VID_TIMEOUT_DRAM_2", + [0x4E] = "MC_BOOT_VID_TIMEOUT_DRAM_3", + [0x4F] = "MC_SVID_COMMAND_ERROR", + [0x52] = "MC_FIVR_CATAS_OVERVOL_FAULT", + [0x53] = "MC_FIVR_CATAS_OVERCUR_FAULT", + [0x57] = "MC_SVID_PKGC_REQUEST_FAILED", + [0x58] = "MC_SVID_IMON_REQUEST_FAILED", + [0x59] = "MC_SVID_ALERT_REQUEST_FAILED", + [0x60] = "MC_INVALID_PKGS_REQ_PCH", + [0x61] = "MC_INVALID_PKGS_REQ_QPI", + [0x62] = "MC_INVALID_PKGS_RSP_QPI", + [0x63] = "MC_INVALID_PKGS_RSP_PCH", + [0x64] = "MC_INVALID_PKG_STATE_CONFIG", + [0x67] = "MC_HA_IMC_RW_BLOCK_ACK_TIMEOUT", + [0x68] = "MC_IMC_RW_SMBUS_TIMEOUT", + [0x69] = "MC_HA_FAILSTS_CHANGE_DETECTED", + [0x6A] = "MC_MSGCH_PMREQ_CMP_TIMEOUT", + [0x70] = "MC_WATCHDOG_TIMEOUT_PKGC_SLAVE", + [0x71] = "MC_WATCHDOG_TIMEOUT_PKGC_MASTER", + [0x72] = "MC_WATCHDOG_TIMEOUT_PKGS_MASTER", + [0x7C] = "MC_BIOS_RST_CPL_INVALID_SEQ", + [0x7D] = "MC_MORE_THAN_ONE_TXT_AGENT", + [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT" +}; + +static struct field pcu_mc4[] = { + FIELD(24, pcu_1), + {} +}; + +/* See IA32 SDM Vol3B Table 16-21 */ + +static char *qpi[] = { + [0x02] = "Intel QPI physical layer detected drift buffer alarm", + [0x03] = "Intel QPI physical layer detected latency buffer rollover", + [0x10] = "Intel QPI link layer detected control error from R3QPI", + [0x11] = "Rx entered LLR abort state on CRC error", + [0x12] = "Unsupported or undefined packet", + [0x13] = "Intel QPI link layer control error", + [0x15] = "RBT used un-initialized value", + [0x20] = "Intel QPI physical layer detected a QPI in-band reset but aborted initialization", + [0x21] = "Link failover data self healing", + [0x22] = "Phy detected in-band reset (no width change)", + [0x23] = "Link failover clock failover", + [0x30] = "Rx detected CRC error - successful LLR after Phy re-init", + [0x31] = "Rx detected CRC error - successful LLR wihout Phy re-init", +}; + +static struct field qpi_mc[] = { + FIELD(16, qpi), + {} +}; + +/* See IA32 SDM Vol3B Table 16-22 */ + +static struct field memctrl_mc9[] = { + SBITFIELD(16, "DDR3 address parity error"), + SBITFIELD(17, "Uncorrected HA write data error"), + SBITFIELD(18, "Uncorrected HA data byte enable error"), + SBITFIELD(19, "Corrected patrol scrub error"), + SBITFIELD(20, "Uncorrected patrol scrub error"), + SBITFIELD(21, "Corrected spare error"), + SBITFIELD(22, "Uncorrected spare error"), + SBITFIELD(23, "Corrected memory read error"), + SBITFIELD(24, "iMC write data buffer parity error"), + SBITFIELD(25, "DDR4 command address parity error"), + {} +}; + +void hsw_decode_model(int cputype, int bank, u64 status, u64 misc) +{ + switch (bank) { + case 4: + Wprintf("PCU: "); + switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { + case 0x402: case 0x403: + Wprintf("Internal errors "); + break; + case 0x406: + Wprintf("Intel TXT errors "); + break; + case 0x407: + Wprintf("Other UBOX Internal errors "); + break; + } + if (EXTRACT(status, 16, 19)) + Wprintf("PCU internal error "); + decode_bitfield(status, pcu_mc4); + break; + case 5: + case 20: + case 21: + Wprintf("QPI: "); + decode_bitfield(status, qpi_mc); + break; + case 9: case 10: case 11: case 12: + case 13: case 14: case 15: case 16: + Wprintf("MemCtrl: "); + decode_bitfield(status, memctrl_mc9); + break; + } +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/haswell.h new/mcelog-1.0.8/haswell.h --- old/mcelog-1.0.1/haswell.h 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/haswell.h 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,2 @@ +void hsw_decode_model(int cputype, int bank, u64 status, u64 misc); +void haswell_ep_memerr_misc(struct mce *m, int *channel, int *dimm); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/input/GENCACHE new/mcelog-1.0.8/input/GENCACHE --- old/mcelog-1.0.1/input/GENCACHE 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/input/GENCACHE 2014-12-20 19:35:05.000000000 +0100 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # GENCACHE cpu level type track # generate a memory error. All fields are optional. # see SDM 3a chapter 15 for details diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/input/GENMEM new/mcelog-1.0.8/input/GENMEM --- old/mcelog-1.0.1/input/GENMEM 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/input/GENMEM 2014-12-20 19:35:05.000000000 +0100 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # GENMEM socketid channel dimm corr-err-cnt uc-flag # generate a memory error. All fields are optional. # suitable to be fed into mce-inject or mcelog --ascii diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/input/GENPAGE new/mcelog-1.0.8/input/GENPAGE --- old/mcelog-1.0.1/input/GENPAGE 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/input/GENPAGE 2014-12-20 19:35:05.000000000 +0100 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # GENMPAGE pfn socketid channel dimm corr-err-cnt # generate a memory error on a page. All fields are optional. # dimm/channel can be out of sync with the address diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/input/iomca new/mcelog-1.0.8/input/iomca --- old/mcelog-1.0.1/input/iomca 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/input/iomca 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,4 @@ +CPU 0 BANK 1 +STATUS 0x9c00000000000e0b +MISC 0xabcdef +ADDR 0xabcd diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/input/unknown new/mcelog-1.0.8/input/unknown --- old/mcelog-1.0.1/input/unknown 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/input/unknown 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,4 @@ +CPU 0 BANK 1 +STATUS 0x9c0000000000040b +MISC 0xabcdef +ADDR 0xabcd diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/intel.c new/mcelog-1.0.8/intel.c --- old/mcelog-1.0.1/intel.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/intel.c 2014-12-20 19:35:05.000000000 +0100 @@ -24,6 +24,7 @@ #include "page.h" #include "sandy-bridge.h" #include "ivy-bridge.h" +#include "haswell.h" #include "xeon75xx.h" int memory_error_support; @@ -33,7 +34,7 @@ if (cpu == CPU_NEHALEM || cpu == CPU_XEON75XX || cpu == CPU_INTEL || cpu == CPU_SANDY_BRIDGE || cpu == CPU_SANDY_BRIDGE_EP || cpu == CPU_IVY_BRIDGE || cpu == CPU_IVY_BRIDGE_EPEX || - cpu == CPU_HASWELL) + cpu == CPU_HASWELL || cpu == CPU_HASWELL_EPEX || cpu == CPU_BROADWELL) memory_error_support = 1; } @@ -67,9 +68,12 @@ return CPU_IVY_BRIDGE; else if (model == 0x3e) return CPU_IVY_BRIDGE_EPEX; - else if (model == 0x3c || model == 0x3f || model == 0x45 || - model == 0x46) + else if (model == 0x3c || model == 0x45 || model == 0x46) return CPU_HASWELL; + else if (model == 0x3f) + return CPU_HASWELL_EPEX; + else if (model == 0x3d) + return CPU_BROADWELL; if (model > 0x1a) { Eprintf("Family 6 Model %x CPU: only decoding architectural errors\n", model); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/intel.h new/mcelog-1.0.8/intel.h --- old/mcelog-1.0.1/intel.h 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/intel.h 2014-12-20 19:35:05.000000000 +0100 @@ -18,5 +18,7 @@ case CPU_SANDY_BRIDGE: \ case CPU_IVY_BRIDGE: \ case CPU_IVY_BRIDGE_EPEX: \ - case CPU_HASWELL + case CPU_HASWELL: \ + case CPU_HASWELL_EPEX: \ + case CPU_BROADWELL diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/ivy-bridge.c new/mcelog-1.0.8/ivy-bridge.c --- old/mcelog-1.0.1/ivy-bridge.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/ivy-bridge.c 2014-12-20 19:35:05.000000000 +0100 @@ -68,20 +68,16 @@ /* See IA32 SDM Vol3B Table 16-18 */ -static char *memctrl_1[] = { - [0x001] = "Address parity error", - [0x002] = "HA Wrt buffer Data parity error", - [0x004] = "HA Wrt byte enable parity error", - [0x008] = "Corrected patrol scrub error", - [0x010] = "Uncorrected patrol scrub error", - [0x020] = "Corrected spare error", - [0x040] = "Uncorrected spare error", - [0x080] = "Corrected memory read error", - [0x100] = "iMC, WDB, parity errors", -}; - static struct field memctrl_mc9[] = { - FIELD(16, memctrl_1), + SBITFIELD(16, "Address parity error"), + SBITFIELD(17, "HA Wrt buffer Data parity error"), + SBITFIELD(18, "HA Wrt byte enable parity error"), + SBITFIELD(19, "Corrected patrol scrub error"), + SBITFIELD(20, "Uncorrected patrol scrub error"), + SBITFIELD(21, "Corrected spare error"), + SBITFIELD(22, "Uncorrected spare error"), + SBITFIELD(23, "Corrected memory read error"), + SBITFIELD(24, "iMC, WDB, parity errors"), {} }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/mcelog.c new/mcelog-1.0.8/mcelog.c --- old/mcelog-1.0.1/mcelog.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/mcelog.c 2014-12-20 19:35:05.000000000 +0100 @@ -58,6 +58,8 @@ #include "msg.h" #include "yellow.h" #include "page.h" +#include "bus.h" +#include "unknown.h" enum cputype cputype = CPU_GENERIC; @@ -226,8 +228,10 @@ [CPU_SANDY_BRIDGE] = "Sandy Bridge", /* Fill in better name */ [CPU_SANDY_BRIDGE_EP] = "Sandy Bridge EP", /* Fill in better name */ [CPU_IVY_BRIDGE] = "Ivy Bridge", /* Fill in better name */ - [CPU_IVY_BRIDGE_EPEX] = "Ivy Bridge EP/EX", /* Fill in better name */ + [CPU_IVY_BRIDGE_EPEX] = "Intel Xeon v2 (Ivy Bridge) EP/EX", /* Fill in better name */ [CPU_HASWELL] = "Haswell", /* Fill in better name */ + [CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", + [CPU_BROADWELL] = "Broadwell", }; static struct config_choice cpu_choices[] = { @@ -261,10 +265,15 @@ { "sandybridge", CPU_SANDY_BRIDGE }, /* Fill in better name */ { "sandybridge-ep", CPU_SANDY_BRIDGE_EP }, /* Fill in better name */ { "ivybridge", CPU_IVY_BRIDGE }, /* Fill in better name */ - { "ivybridge-ep", CPU_IVY_BRIDGE_EPEX }, /* Fill in better name */ - { "ivybridge-ex", CPU_IVY_BRIDGE_EPEX }, /* Fill in better name */ + { "ivybridge-ep", CPU_IVY_BRIDGE_EPEX }, + { "ivybridge-ex", CPU_IVY_BRIDGE_EPEX }, { "haswell", CPU_HASWELL }, /* Fill in better name */ - {} + { "haswell-ep", CPU_HASWELL_EPEX }, + { "haswell-ex", CPU_HASWELL_EPEX }, + { "broadwell", CPU_BROADWELL }, + { "xeon-v2", CPU_IVY_BRIDGE_EPEX }, + { "xeon-v3", CPU_HASWELL_EPEX }, + { NULL } }; static void print_cputypes(void) @@ -424,7 +433,8 @@ fam, mod); } - if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX) + if (cputype != CPU_SANDY_BRIDGE_EP && cputype != CPU_IVY_BRIDGE_EPEX && + cputype != CPU_HASWELL_EPEX && cputype != CPU_BROADWELL) resolveaddr(m->addr); if (!ascii_mode && ismemerr && (m->status & MCI_STATUS_ADDRV)) { diskdb_resolve_addr(m->addr); @@ -562,6 +572,12 @@ if (*s == ']') ++s; } + + s = skipspace(s); + + if (strncmp(s, "mce: [Hardware Error]:", 22) == 0) + s += 22; + return skipspace(s); } @@ -1148,6 +1164,8 @@ { trigger_setup(); yellow_setup(); + bus_setup(); + unknown_setup(); config_cred("global", "run-credentials", &runcred); if (config_bool("global", "filter-memory-errors") == 1) filter_memory_errors = 1; @@ -1190,7 +1208,7 @@ count = len / (int)recordlen; if (count == (int)loglen) { if ((ioctl(fd, MCE_GETCLEAR_FLAGS, &flags) == 0) && - (flags & MCE_OVERFLOW)) + (flags & (1 << MCE_OVERFLOW))) Eprintf("Warning: MCE buffer is overflowed.\n"); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/mcelog.conf new/mcelog-1.0.8/mcelog.conf --- old/mcelog-1.0.1/mcelog.conf 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/mcelog.conf 2014-12-20 19:35:05.000000000 +0100 @@ -127,6 +127,9 @@ # Log socket error threshold explicitely? mem-ce-error-log = yes +bus-uc-threshold-trigger = bus-error-trigger +iomca-threshold-trigger = iomca-error-trigger +unknown-threshold-trigger = unknown-error-trigger [cache] # Processing of cache error thresholds reported by Intel CPUs diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/mcelog.h new/mcelog-1.0.8/mcelog.h --- old/mcelog-1.0.1/mcelog.h 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/mcelog.h 2014-12-20 19:35:05.000000000 +0100 @@ -118,6 +118,8 @@ CPU_IVY_BRIDGE, CPU_IVY_BRIDGE_EPEX, CPU_HASWELL, + CPU_HASWELL_EPEX, + CPU_BROADWELL, }; enum option_ranges { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/msr.c new/mcelog-1.0.8/msr.c --- old/mcelog-1.0.1/msr.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/msr.c 2014-12-20 19:35:05.000000000 +0100 @@ -36,10 +36,8 @@ SYSERRprintf("Cannot re-read MSR_ERROR_CONTROL from %s\n", fpath); exit(1); } - if ((data & bit) == 0) { - SYSERRprintf("Failed to set imc_log on cpu %d\n", cpu); - exit(1); - } + if ((data & bit) == 0) + Lprintf("No DIMM detection available on cpu %d (normal in virtual environments)\n", cpu); close(fd); } @@ -54,6 +52,8 @@ msr = 0x17f; /* MSR_ERROR_CONTROL */ bit = 0x2; /* MemError Log Enable */ break; + default: + return; } for (cpu = 0; cpu < ncpus; cpu++) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/p4.c new/mcelog-1.0.8/p4.c --- old/mcelog-1.0.1/p4.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/p4.c 2014-12-20 19:35:05.000000000 +0100 @@ -30,9 +30,12 @@ #include "tulsa.h" #include "intel.h" #include "yellow.h" +#include "bus.h" +#include "unknown.h" #include "bitfield.h" #include "sandy-bridge.h" #include "ivy-bridge.h" +#include "haswell.h" /* decode mce for P4/Xeon and Core2 family */ @@ -115,7 +118,7 @@ return II[i]; } -static void decode_mca(__u32 mca, u64 track, int cpu, int *ismemerr, int socket) +static int decode_mca(u64 status, u64 misc, u64 track, int cpu, int *ismemerr, int socket) { #define TLB_LL_MASK 0x3 /*bit 0, bit 1*/ #define TLB_LL_SHIFT 0x0 @@ -140,6 +143,8 @@ #define BUS_PP_MASK 0x600 /*bit 9, bit 10*/ #define BUS_PP_SHIFT 0x9 + u32 mca; + int ret = 0; static char *msg[] = { [0] = "No Error", [1] = "Unclassified", @@ -150,6 +155,7 @@ [6] = "SMM Handler Code Access Violation", }; + mca = status & 0xffff; if (mca & (1UL << 12)) { Wprintf("corrected filtering (some unreported errors in same region)\n"); mca &= ~(1UL << 12); @@ -157,16 +163,27 @@ if (mca < NELE(msg)) { Wprintf("%s\n", msg[mca]); - return; + return ret; } if ((mca >> 2) == 3) { - Wprintf("%s Generic memory hierarchy error\n", get_LL_str(mca & 3)); + unsigned levelnum; + char *level; + levelnum = mca & 3; + level = get_LL_str(levelnum); + Wprintf("%s Generic cache hierarchy error\n", level); + if (track == 2) + run_yellow_trigger(cpu, -1, levelnum, "unknown", level, socket); } else if (test_prefix(4, mca)) { - Wprintf("%s TLB %s Error\n", - get_TT_str((mca & TLB_TT_MASK) >> TLB_TT_SHIFT), - get_LL_str((mca & TLB_LL_MASK) >> - TLB_LL_SHIFT)); + unsigned levelnum, typenum; + char *level, *type; + typenum = (mca & TLB_TT_MASK) >> TLB_TT_SHIFT; + type = get_TT_str(typenum); + levelnum = (mca & TLB_LL_MASK) >> TLB_LL_SHIFT; + level = get_LL_str(levelnum); + Wprintf("%s TLB %s Error\n", type, level); + if (track == 2) + run_yellow_trigger(cpu, typenum, levelnum, type, level, socket); } else if (test_prefix(8, mca)) { unsigned typenum = (mca & CACHE_TT_MASK) >> CACHE_TT_SHIFT; unsigned levelnum = (mca & CACHE_LL_MASK) >> CACHE_LL_SHIFT; @@ -176,25 +193,51 @@ get_RRRR_str((mca & CACHE_RRRR_MASK) >> CACHE_RRRR_SHIFT)); if (track == 2) - run_yellow_trigger(cpu, typenum, levelnum, type, level, socket); + run_yellow_trigger(cpu, typenum, levelnum, type, level,socket); } else if (test_prefix(10, mca)) { if (mca == 0x400) Wprintf("Internal Timer error\n"); else Wprintf("Internal unclassified error: %x\n", mca & 0xffff); + + ret = 1; } else if (test_prefix(11, mca)) { - Wprintf("BUS %s %s %s %s %s Error\n", - get_LL_str((mca & BUS_LL_MASK) >> BUS_LL_SHIFT), - get_PP_str((mca & BUS_PP_MASK) >> BUS_PP_SHIFT), - get_RRRR_str((mca & BUS_RRRR_MASK) >> - BUS_RRRR_SHIFT), - get_II_str((mca & BUS_II_MASK) >> BUS_II_SHIFT), - get_T_str((mca & BUS_T_MASK) >> BUS_T_SHIFT)); + char *level, *pp, *rrrr, *ii, *timeout; + + level = get_LL_str((mca & BUS_LL_MASK) >> BUS_LL_SHIFT); + pp = get_PP_str((mca & BUS_PP_MASK) >> BUS_PP_SHIFT); + rrrr = get_RRRR_str((mca & BUS_RRRR_MASK) >> BUS_RRRR_SHIFT); + ii = get_II_str((mca & BUS_II_MASK) >> BUS_II_SHIFT); + timeout = get_T_str((mca & BUS_T_MASK) >> BUS_T_SHIFT); + + Wprintf("BUS error: %d %d %s %s %s %s %s\n", socket, cpu, + level, pp, rrrr, ii, timeout); + run_bus_trigger(socket, cpu, level, pp, rrrr, ii, timeout); + /* IO MCA - reported as bus/interconnect with specific PP,T,RRRR,II,LL values + * and MISCV set. MISC register points to root port that reported the error + * need to cross check with AER logs for more details. + * See: http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html + */ + if ((status & MCI_STATUS_MISCV) && + (status & 0xefff) == 0x0e0b) { + int seg, bus, dev, fn; + + seg = EXTRACT(misc, 32, 39); + bus = EXTRACT(misc, 24, 31); + dev = EXTRACT(misc, 19, 23); + fn = EXTRACT(misc, 16, 18); + Wprintf("IO MCA reported by root port %x:%02x:%02x.%x\n", + seg, bus, dev, fn); + run_iomca_trigger(socket, cpu, seg, bus, dev, fn); + } } else if (test_prefix(7, mca)) { decode_memory_controller(mca); *ismemerr = 1; - } else + } else { Wprintf("Unknown Error %x\n", mca); + ret = 1; + } + return ret; } static void p4_decode_model(__u32 model) @@ -242,7 +285,7 @@ [3] = "SRAR" }; -static void decode_mci(__u64 status, int cpu, unsigned mcgcap, int *ismemerr, +static int decode_mci(__u64 status, __u64 misc, int cpu, unsigned mcgcap, int *ismemerr, int socket) { u64 track = 0; @@ -279,7 +322,7 @@ decode_tracking(track); } Wprintf("MCA: "); - decode_mca(status & 0xffffL, track, cpu, ismemerr, socket); + return decode_mca(status, misc, track, cpu, ismemerr, socket); } static void decode_mcg(__u64 mcgstatus) @@ -313,11 +356,14 @@ if (log->bank == MCE_THERMAL_BANK) { decode_thermal(log, cpu); + run_unknown_trigger(socket, cpu, log); return; } decode_mcg(log->mcgstatus); - decode_mci(log->status, cpu, log->mcgcap, ismemerr, socket); + if (decode_mci(log->status, log->misc, cpu, log->mcgcap, ismemerr, + socket)) + run_unknown_trigger(socket, cpu, log); if (test_prefix(11, (log->status & 0xffffL))) { switch (cputype) { @@ -360,23 +406,9 @@ case CPU_IVY_BRIDGE_EPEX: ivb_decode_model(cputype, log->bank, log->status, log->misc); break; - } - - /* IO MCA - reported as bus/interconnect with specific PP,T,RRRR,II,LL values - * and MISCV set. MISC register points to root port that reported the error - * need to cross check with AER logs for more details. - * See: http://www.intel.com/content/www/us/en/architecture-and-technology/enhanced-mca-logging-xeon-paper.html - */ - if ((log->status & MCI_STATUS_MISCV) && - (log->status & 0xefff) == 0x0e0b) { - int seg, bus, dev, fn; - - seg = EXTRACT(log->misc, 32, 39); - bus = EXTRACT(log->misc, 24, 31); - dev = EXTRACT(log->misc, 19, 23); - fn = EXTRACT(log->misc, 16, 18); - Wprintf("IO MCA reported by root port %x:%02x:%02x.%x\n", - seg, bus, dev, fn); + case CPU_HASWELL_EPEX: + hsw_decode_model(cputype, log->bank, log->status, log->misc); + break; } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/sandy-bridge.c new/mcelog-1.0.8/sandy-bridge.c --- old/mcelog-1.0.1/sandy-bridge.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/sandy-bridge.c 2014-12-20 19:35:05.000000000 +0100 @@ -63,18 +63,14 @@ {} }; -static char *memctrl_1[] = { - [0x001] = "Address parity error", - [0x002] = "HA Wrt buffer Data parity error", - [0x004] = "HA Wrt byte enable parity error", - [0x008] = "Corrected patrol scrub error", - [0x010] = "Uncorrected patrol scrub error", - [0x020] = "Corrected spare error", - [0x040] = "Uncorrected spare error", -}; - static struct field memctrl_mc8[] = { - FIELD(16, memctrl_1), + SBITFIELD(16, "Address parity error"), + SBITFIELD(17, "HA Wrt buffer Data parity error"), + SBITFIELD(18, "HA Wrt byte enable parity error"), + SBITFIELD(19, "Corrected patrol scrub error"), + SBITFIELD(20, "Uncorrected patrol scrub error"), + SBITFIELD(21, "Corrected spare error"), + SBITFIELD(22, "Uncorrected spare error"), {} }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/tests/test new/mcelog-1.0.8/tests/test --- old/mcelog-1.0.1/tests/test 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/tests/test 2014-12-20 19:35:05.000000000 +0100 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # simple test harness for mcelog daemon trigger test cases # ./test subdir [debugger] # run mcelog test in specific sub directory diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/tests/unknown/inject new/mcelog-1.0.8/tests/unknown/inject --- old/mcelog-1.0.1/tests/unknown/inject 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/tests/unknown/inject 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,8 @@ +#!/bin/sh + +B=$(pwd)/../.. + +PATH=$PATH:$B/../mce-inject + +mce-inject $B/input/iomca +mce-inject $B/input/unknown diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/tests/unknown/unknown.conf new/mcelog-1.0.8/tests/unknown/unknown.conf --- old/mcelog-1.0.1/tests/unknown/unknown.conf 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/tests/unknown/unknown.conf 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,11 @@ +# trigger: 3 + +num-errors = 2 + +[socket] +bus-uc-threshold-trigger = ../trigger +iomca-threshold-trigger = ../trigger +unknown-threshold-trigger = ../trigger + +[trigger] +directory = . diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/triggers/bus-error-trigger new/mcelog-1.0.8/triggers/bus-error-trigger --- old/mcelog-1.0.1/triggers/bus-error-trigger 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/triggers/bus-error-trigger 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,23 @@ +#!/bin/sh +# This shell script can be executed by mcelog in daemon mode when a sockets +# receives Bus and Interconnect errors +# +# environment: +# MESSAGE Human readable consolidated error message +# LOCATION Consolidated location as a single string +# SOCKETID Socket ID of CPU that includes the memory controller with the DIMM +# LEVEL Interconnect level +# PARTICIPATION Processor Participation (Originator, Responder or Observer) +# REQUEST Request type (read, write, prefetch, etc.) +# ORIGIN Memory or IO +# TIMEOUT The request timed out or not +# +# note: will run as mcelog configured user +# this can be changed in mcelog.conf + +logger -s -p daemon.err -t mcelog "$MESSAGE" +logger -s -p daemon.err -t mcelog "Location: $LOCATION" + +[ -x ./bus-error-trigger.local ] && . ./bus-error-trigger.local + +exit 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/triggers/cache-error-trigger new/mcelog-1.0.8/triggers/cache-error-trigger --- old/mcelog-1.0.1/triggers/cache-error-trigger 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/triggers/cache-error-trigger 2014-12-20 19:35:05.000000000 +0100 @@ -15,16 +15,11 @@ # this can be changed in mcelog.conf # -# offline the CPUs (except CPU #0) sharing the affected cache +# offline the CPUs sharing the affected cache # EXIT=0 for i in $AFFECTED_CPUS ; do - if [ $i = 0 ] ; then - logger -s -p daemon.warn -t mcelog "Not offlining CPU 0" - EXIT=1 - continue - fi logger -s -p daemon.crit -t mcelog "Offlining CPU $i due to cache error threshold" F=$(printf "/sys/devices/system/cpu/cpu%d/online" $i) echo 0 > $F diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/triggers/iomca-error-trigger new/mcelog-1.0.8/triggers/iomca-error-trigger --- old/mcelog-1.0.1/triggers/iomca-error-trigger 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/triggers/iomca-error-trigger 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,23 @@ +#!/bin/sh +# This shell script can be executed by mcelog in daemon mode when a sockets +# receives Bus and Interconnect errors +# +# environment: +# MESSAGE Human readable consolidated error message +# LOCATION Consolidated location as a single string +# SOCKETID Socket ID of CPU that includes the memory controller with the DIMM +# CPU Linux CPU number that triggered the error +# SET PCI segment number +# BUS PCI bus number +# DEVICE PCI device number +# FUNCTION PCI function number +# +# note: will run as mcelog configured user +# this can be changed in mcelog.conf + +logger -s -p daemon.err -t mcelog "$MESSAGE" +logger -s -p daemon.err -t mcelog "Location: $LOCATION" + +[ -x ./iomca-error-trigger.local ] && . ./iomca-error-trigger.local + +exit 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/triggers/unknown-error-trigger new/mcelog-1.0.8/triggers/unknown-error-trigger --- old/mcelog-1.0.1/triggers/unknown-error-trigger 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/triggers/unknown-error-trigger 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,26 @@ +#!/bin/sh +# This shell script is executed by mcelog in daemon mode when +# an not otherwise handled machine check error happens. +# +# environment: +# MESSAGE Human readable consolidated error message +# LOCATION Consolidated location as a single string +# SOCKETID Socket ID of CPU that includes the memory controller with the DIMM +# CPU Linux CPU number that triggered the error +# STATUS IA32_MCi_STATUS register value +# ADDR IA32_MCi_ADDR register value +# MISC IA32_MCi_MISC regiser value +# MCGSTATUS IA32_MCG_STATUS register value +# MCGCAP IA32_MCG_CAP register value +# For details on the register layout please see the Intel SDM http://www.intel.com/sdm +# volume 3, chapter 15 +# +# note: will run as mcelog configured user +# this can be changed in mcelog.conf + +logger -s -p daemon.err -t mcelog "$MESSAGE" +logger -s -p daemon.err -t mcelog "Location: $LOCATION" + +[ -x ./unknown-error-trigger.local ] && . ./unknown-error-trigger.local + +exit 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/unknown.c new/mcelog-1.0.8/unknown.c --- old/mcelog-1.0.1/unknown.c 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/unknown.c 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,82 @@ +/* Copyright (C) 20014 Intel Corporation + Author: Rui Wang + Handle all other unknown error requests. + + mcelog is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; version + 2. + + mcelog is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should find a copy of v2 of the GNU General Public License somewhere + on your Linux system. */ +#define _GNU_SOURCE 1 +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include "memutil.h" +#include "mcelog.h" +#include "config.h" +#include "trigger.h" +#include "unknown.h" + +static char *unknown_trigger; + +enum { + MAX_ENV = 20, +}; + +void unknown_setup(void) +{ + unknown_trigger = config_string("socket", "unknown-threshold-trigger"); + if (unknown_trigger && trigger_check(unknown_trigger) < 0) { + SYSERRprintf("Cannot access unknown threshold trigger `%s'", + unknown_trigger); + exit(1); + } +} + +void run_unknown_trigger(int socket, int cpu, struct mce *log) +{ + int ei = 0; + char *env[MAX_ENV]; + int i; + char *msg; + char *location; + + if (socket >= 0) + asprintf(&location, "CPU %d on socket %d", cpu, socket); + else + asprintf(&location, "CPU %d", cpu); + asprintf(&msg, "%s received unknown error", location); + asprintf(&env[ei++], "LOCATION=%s", location); + free(location); + + if (!unknown_trigger) + goto out; + + if (socket >= 0) + asprintf(&env[ei++], "SOCKETID=%d", socket); + asprintf(&env[ei++], "MESSAGE=%s", msg); + asprintf(&env[ei++], "CPU=%d", cpu); + asprintf(&env[ei++], "STATUS=%llx", log->status); + asprintf(&env[ei++], "MISC=%llx", log->misc); + asprintf(&env[ei++], "ADDR=%llx", log->addr); + asprintf(&env[ei++], "MCGSTATUS=%llx", log->mcgstatus); + asprintf(&env[ei++], "MCGCAP=%llx", log->mcgcap); + env[ei] = NULL; + assert(ei < MAX_ENV); + + run_trigger(unknown_trigger, NULL, env); + for (i = 0; i < ei; i++) + free(env[i]); +out: + free(msg); +} + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/unknown.h new/mcelog-1.0.8/unknown.h --- old/mcelog-1.0.1/unknown.h 1970-01-01 01:00:00.000000000 +0100 +++ new/mcelog-1.0.8/unknown.h 2014-12-20 19:35:05.000000000 +0100 @@ -0,0 +1,2 @@ +void unknown_setup(void); +void run_unknown_trigger(int socket, int cpu, struct mce *log); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-1.0.1/yellow.c new/mcelog-1.0.8/yellow.c --- old/mcelog-1.0.1/yellow.c 2014-03-25 23:09:38.000000000 +0100 +++ new/mcelog-1.0.8/yellow.c 2014-12-20 19:35:05.000000000 +0100 @@ -90,6 +90,8 @@ asprintf(&env[ei++], "TYPE=%s", ts); if (cache_to_cpus(cpu, lnum, tnum, &cpumasklen, &cpumask) >= 0) env[ei++] = cpulist("AFFECTED_CPUS=", cpumask, cpumasklen); + else + asprintf(&env[ei++], "AFFECTED_CPUS=unknown"); env[ei] = NULL; assert(ei < MAX_ENV); -- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
