Module Name:    src
Committed By:   martin
Date:           Wed Mar 21 11:52:50 UTC 2018

Modified Files:
        src/sys/arch/sparc/sparc [netbsd-7]: cpu.c intr.c

Log Message:
Pull up following revision(s) (requested by mrg in ticket #1585):
        sys/arch/sparc/sparc/cpu.c: revision 1.250 (patch)
        sys/arch/sparc/include/cpu.h: revision 1.99 (patch -> cpuvar.h)
        sys/arch/sparc/sparc/intr.c: revision 1.119 (patch)

- return early in xcall() if the function is sparc_noop() instead of triggering
  the IPI and then ignoring responses ( or lack thereof )
- write the .tag field last to avoid a race when polling for an incoming
  IPI
- add event counters for IPIs being caught with the mutex not held, and for
  messages that are already marked as completed

With this my SS20 made it through 48 hours of pkgsrc with MAKE_JOBS=3 and a
pair of SM81s.

Hypersparcs still crash but instead of craziness we get actual error messages,
apparently one CPU will occasionally do a watchdog reset, which according to
the manual is caused by catching a trap with traps disabled. Now to figure
out how that can even happen...


To generate a diff of this commit:
cvs rdiff -u -r1.248 -r1.248.2.1 src/sys/arch/sparc/sparc/cpu.c
cvs rdiff -u -r1.118 -r1.118.4.1 src/sys/arch/sparc/sparc/intr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc/sparc/cpu.c
diff -u src/sys/arch/sparc/sparc/cpu.c:1.248 src/sys/arch/sparc/sparc/cpu.c:1.248.2.1
--- src/sys/arch/sparc/sparc/cpu.c:1.248	Fri Jul 25 17:21:32 2014
+++ src/sys/arch/sparc/sparc/cpu.c	Wed Mar 21 11:52:49 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.248 2014/07/25 17:21:32 nakayama Exp $ */
+/*	$NetBSD: cpu.c,v 1.248.2.1 2018/03/21 11:52:49 martin Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.248 2014/07/25 17:21:32 nakayama Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.248.2.1 2018/03/21 11:52:49 martin Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -183,7 +183,7 @@ int go_smp_cpus = 0;	/* non-primary CPUs
  * This must be locked around all message transactions to ensure only
  * one CPU is generating them.
  */
-static kmutex_t xpmsg_mutex;
+kmutex_t xpmsg_mutex;
 
 #endif /* MULTIPROCESSOR */
 
@@ -367,6 +367,10 @@ cpu_init_evcnt(struct cpu_info *cpi)
 			     NULL, cpu_name(cpi), "IPI mutex_trylock fail");
 	evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_fail_call, EVCNT_TYPE_MISC,
 			     NULL, cpu_name(cpi), "IPI mutex_trylock fail/call");
+	evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_not_held, EVCNT_TYPE_MISC,
+			     NULL, cpu_name(cpi), "IPI with mutex not held");
+	evcnt_attach_dynamic(&cpi->ci_xpmsg_bogus, EVCNT_TYPE_MISC,
+			     NULL, cpu_name(cpi), "bogus IPI");
 
 	/*
 	 * These are the per-cpu per-IPL hard & soft interrupt counters.
@@ -653,6 +657,8 @@ xcall(xcall_func_t func, xcall_trap_t tr
 	char *bufp = errbuf;
 	size_t bufsz = sizeof errbuf, wrsz;
 
+	if (is_noop) return;
+
 	mybit = (1 << cpuinfo.ci_cpuid);
 	callself = func && (cpuset & mybit) != 0;
 	cpuset &= ~mybit;
@@ -714,7 +720,10 @@ xcall(xcall_func_t func, xcall_trap_t tr
 		if ((cpuset & (1 << n)) == 0)
 			continue;
 
-		cpi->msg.tag = XPMSG_FUNC;
+		/*
+		 * Write msg.tag last - if another CPU is polling above it may
+		 * end up seeing an incomplete message. Not likely but still.
+		 */ 
 		cpi->msg.complete = 0;
 		p = &cpi->msg.u.xpmsg_func;
 		p->func = func;
@@ -722,6 +731,9 @@ xcall(xcall_func_t func, xcall_trap_t tr
 		p->arg0 = arg0;
 		p->arg1 = arg1;
 		p->arg2 = arg2;
+		__insn_barrier();
+		cpi->msg.tag = XPMSG_FUNC;
+		__insn_barrier();
 		/* Fast cross calls use interrupt level 14 */
 		raise_ipi(cpi,13+fasttrap);/*xcall_cookie->pil*/
 	}
@@ -737,7 +749,7 @@ xcall(xcall_func_t func, xcall_trap_t tr
 	 * have completed (bailing if it takes "too long", being loud about
 	 * this in the process).
 	 */
-	done = is_noop;
+	done = 0;
 	i = 1000000;	/* time-out, not too long, but still an _AGE_ */
 	while (!done) {
 		if (--i < 0) {
@@ -774,7 +786,7 @@ xcall(xcall_func_t func, xcall_trap_t tr
 
 	if (i >= 0 || debug_xcall == 0) {
 		if (i < 0)
-			printf_nolog("%s\n", errbuf);
+			aprint_error("%s\n", errbuf);
 		mutex_spin_exit(&xpmsg_mutex);
 		return;
 	}

Index: src/sys/arch/sparc/sparc/intr.c
diff -u src/sys/arch/sparc/sparc/intr.c:1.118 src/sys/arch/sparc/sparc/intr.c:1.118.4.1
--- src/sys/arch/sparc/sparc/intr.c:1.118	Sat Nov 16 23:54:01 2013
+++ src/sys/arch/sparc/sparc/intr.c	Wed Mar 21 11:52:49 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $ */
+/*	$NetBSD: intr.c,v 1.118.4.1 2018/03/21 11:52:49 martin Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118 2013/11/16 23:54:01 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.118.4.1 2018/03/21 11:52:49 martin Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -76,6 +76,8 @@ static int intr_biglock_wrapper(void *);
 void *xcall_cookie;
 #endif
 
+extern kmutex_t xpmsg_mutex;
+
 void	strayintr(struct clockframe *);
 #ifdef DIAGNOSTIC
 void	bogusintr(struct clockframe *);
@@ -241,7 +243,7 @@ nmi_hard(void)
 			DELAY(1);
 			if (n-- > 0)
 				continue;
-			printf("nmi_hard: SMP botch.");
+			printf("nmi_hard: SMP botch.\n");
 			break;
 		}
 	}
@@ -364,6 +366,27 @@ xcallintr(void *v)
 	if (v != xcallintr)
 		cpuinfo.ci_sintrcnt[13].ev_count++;
 
+	if (mutex_owned(&xpmsg_mutex) == 0) {
+		cpuinfo.ci_xpmsg_mutex_not_held.ev_count++;
+#ifdef DEBUG
+		printf("%s: mutex not held\n", __func__);
+#endif
+		cpuinfo.msg.complete = 1;
+		kpreempt_enable();
+		return;
+	}
+
+	if (cpuinfo.msg.complete != 0) {
+		cpuinfo.ci_xpmsg_bogus.ev_count++;
+#ifdef DEBUG
+		volatile struct xpmsg_func *p = &cpuinfo.msg.u.xpmsg_func;
+		printf("%s: bogus message %08x %08x %08x %08x\n", __func__,
+		    cpuinfo.msg.tag, (uint32_t)p->func, p->arg0, p->arg1);
+#endif
+		kpreempt_enable();
+		return;
+	}
+
 	/* notyet - cpuinfo.msg.received = 1; */
 	switch (cpuinfo.msg.tag) {
 	case XPMSG_FUNC:

Reply via email to