Module Name:    src
Committed By:   msaitoh
Date:           Fri Dec  8 06:05:15 UTC 2017

Modified Files:
        src/sys/arch/sparc/include [netbsd-8]: cpu.h
        src/sys/arch/sparc/sparc [netbsd-8]: cpu.c intr.c

Log Message:
Pull up following revision(s) (requested by macallan in ticket #429):
        sys/arch/sparc/sparc/cpu.c: revision 1.250
        sys/arch/sparc/include/cpu.h: revision 1.99
        sys/arch/sparc/sparc/intr.c: revision 1.119
- return early in xcall() if the function is sparc_noop() instead of triggering
  the IPI and then ignoring responses ( or lack thereof )
- write the .tag field last to avoid a race when polling for an incoming
  IPI
- add event counters for IPIs being caught with the mutex not held, and for
  messages that are already marked as completed
With this my SS20 made it through 48 hours of pkgsrc with MAKE_JOBS=3 and a
pair of SM81s.
Hypersparcs still crash but instead of craziness we get actual error messages,
apparently one CPU will occasionally do a watchdog reset, which according to
the manual is caused by catching a trap with traps disabled. Now to figure
out how that can even happen...


To generate a diff of this commit:
cvs rdiff -u -r1.97 -r1.97.8.1 src/sys/arch/sparc/include/cpu.h
cvs rdiff -u -r1.249 -r1.249.6.1 src/sys/arch/sparc/sparc/cpu.c
cvs rdiff -u -r1.118 -r1.118.22.1 src/sys/arch/sparc/sparc/intr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc/include/cpu.h
diff -u src/sys/arch/sparc/include/cpu.h:1.97 src/sys/arch/sparc/include/cpu.h:1.97.8.1
--- src/sys/arch/sparc/include/cpu.h:1.97	Sat Dec 10 10:41:07 2016
+++ src/sys/arch/sparc/include/cpu.h	Fri Dec  8 06:05:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.97 2016/12/10 10:41:07 mrg Exp $ */
+/*	$NetBSD: cpu.h,v 1.97.8.1 2017/12/08 06:05:15 msaitoh Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -164,7 +164,7 @@ struct cpu_info {
 	 * the pending register to avoid a hardware bug.
 	 */
 #define raise_ipi(cpi,lvl)	do {			\
-	int x;						\
+	volatile int x;						\
 	(cpi)->intreg_4m->pi_set = PINTR_SINTRLEV(lvl);	\
 	x = (cpi)->intreg_4m->pi_pend; __USE(x);	\
 } while (0)
@@ -333,6 +333,8 @@ struct cpu_info {
 	struct evcnt ci_savefpstate_null;
 	struct evcnt ci_xpmsg_mutex_fail;
 	struct evcnt ci_xpmsg_mutex_fail_call;
+	struct evcnt ci_xpmsg_mutex_not_held;
+	struct evcnt ci_xpmsg_bogus;
 	struct evcnt ci_intrcnt[16];
 	struct evcnt ci_sintrcnt[16];
 };

Index: src/sys/arch/sparc/sparc/cpu.c
diff -u src/sys/arch/sparc/sparc/cpu.c:1.249 src/sys/arch/sparc/sparc/cpu.c:1.249.6.1
--- src/sys/arch/sparc/sparc/cpu.c:1.249	Wed Jan 18 21:33:25 2017
+++ src/sys/arch/sparc/sparc/cpu.c	Fri Dec  8 06:05:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.249 2017/01/18 21:33:25 macallan Exp $ */
+/*	$NetBSD: cpu.c,v 1.249.6.1 2017/12/08 06:05:15 msaitoh Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.249 2017/01/18 21:33:25 macallan Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.249.6.1 2017/12/08 06:05:15 msaitoh Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -183,7 +183,7 @@ int go_smp_cpus = 0;	/* non-primary CPUs
  * This must be locked around all message transactions to ensure only
  * one CPU is generating them.
  */
-static kmutex_t xpmsg_mutex;
+kmutex_t xpmsg_mutex;
 
 #endif /* MULTIPROCESSOR */
 
@@ -367,6 +367,10 @@ cpu_init_evcnt(struct cpu_info *cpi)
 			     NULL, cpu_name(cpi), "IPI mutex_trylock fail");
 	evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_fail_call, EVCNT_TYPE_MISC,
 			     NULL, cpu_name(cpi), "IPI mutex_trylock fail/call");
+	evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_not_held, EVCNT_TYPE_MISC,
+			     NULL, cpu_name(cpi), "IPI with mutex not held");
+	evcnt_attach_dynamic(&cpi->ci_xpmsg_bogus, EVCNT_TYPE_MISC,
+			     NULL, cpu_name(cpi), "bogus IPI");
 
 	/*
 	 * These are the per-cpu per-IPL hard & soft interrupt counters.
@@ -653,6 +657,8 @@ xcall(xcall_func_t func, xcall_trap_t tr
 	char *bufp = errbuf;
 	size_t bufsz = sizeof errbuf, wrsz;
 
+	if (is_noop) return;
+
 	mybit = (1 << cpuinfo.ci_cpuid);
 	callself = func && (cpuset & mybit) != 0;
 	cpuset &= ~mybit;
@@ -714,7 +720,10 @@ xcall(xcall_func_t func, xcall_trap_t tr
 		if ((cpuset & (1 << n)) == 0)
 			continue;
 
-		cpi->msg.tag = XPMSG_FUNC;
+		/*
+		 * Write msg.tag last - if another CPU is polling above it may
+		 * end up seeing an incomplete message. Not likely but still.
+		 */ 
 		cpi->msg.complete = 0;
 		p = &cpi->msg.u.xpmsg_func;
 		p->func = func;
@@ -722,6 +731,9 @@ xcall(xcall_func_t func, xcall_trap_t tr
 		p->arg0 = arg0;
 		p->arg1 = arg1;
 		p->arg2 = arg2;
+		__insn_barrier();
+		cpi->msg.tag = XPMSG_FUNC;
+		__insn_barrier();
 		/* Fast cross calls use interrupt level 14 */
 		raise_ipi(cpi,13+fasttrap);/*xcall_cookie->pil*/
 	}
@@ -737,7 +749,7 @@ xcall(xcall_func_t func, xcall_trap_t tr
 	 * have completed (bailing if it takes "too long", being loud about
 	 * this in the process).
 	 */
-	done = is_noop;
+	done = 0;
 	i = 1000000;	/* time-out, not too long, but still an _AGE_ */
 	while (!done) {
 		if (--i < 0) {
@@ -774,7 +786,7 @@ xcall(xcall_func_t func, xcall_trap_t tr
 
 	if (i >= 0 || debug_xcall == 0) {
 		if (i < 0)
-			printf_nolog("%s\n", errbuf);
+			aprint_error("%s\n", errbuf);
 		mutex_spin_exit(&xpmsg_mutex);
 		return;
 	}

Index: src/sys/arch/sparc/sparc/intr.c
diff -u src/sys/arch/sparc/sparc/intr.c:1.118 src/sys/arch/sparc/sparc/intr.c:1.118.22.1
--- src/sys/arch/sparc/sparc/intr.c:1.118	Sat Nov 16 23:54:01 2013
+++ src/sys/arch/sparc/sparc/intr.c	Fri Dec  8 06:05:15 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $ */
+/*	$NetBSD: intr.c,v 1.118.22.1 2017/12/08 06:05:15 msaitoh Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118.22.1 2017/12/08 06:05:15 msaitoh Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -76,6 +76,8 @@ static int intr_biglock_wrapper(void *);
 void *xcall_cookie;
 #endif
 
+extern kmutex_t xpmsg_mutex;
+
 void	strayintr(struct clockframe *);
 #ifdef DIAGNOSTIC
 void	bogusintr(struct clockframe *);
@@ -241,7 +243,7 @@ nmi_hard(void)
 			DELAY(1);
 			if (n-- > 0)
 				continue;
-			printf("nmi_hard: SMP botch.");
+			printf("nmi_hard: SMP botch.\n");
 			break;
 		}
 	}
@@ -364,6 +366,27 @@ xcallintr(void *v)
 	if (v != xcallintr)
 		cpuinfo.ci_sintrcnt[13].ev_count++;
 
+	if (mutex_owned(&xpmsg_mutex) == 0) {
+		cpuinfo.ci_xpmsg_mutex_not_held.ev_count++;
+#ifdef DEBUG
+		printf("%s: mutex not held\n", __func__);
+#endif
+		cpuinfo.msg.complete = 1;
+		kpreempt_enable();
+		return;
+	}
+
+	if (cpuinfo.msg.complete != 0) {
+		cpuinfo.ci_xpmsg_bogus.ev_count++;
+#ifdef DEBUG
+		volatile struct xpmsg_func *p = &cpuinfo.msg.u.xpmsg_func;
+		printf("%s: bogus message %08x %08x %08x %08x\n", __func__,
+		    cpuinfo.msg.tag, (uint32_t)p->func, p->arg0, p->arg1);
+#endif
+		kpreempt_enable();
+		return;
+	}
+
 	/* notyet - cpuinfo.msg.received = 1; */
 	switch (cpuinfo.msg.tag) {
 	case XPMSG_FUNC:

Reply via email to