Module Name:    src
Committed By:   mrg
Date:           Thu Jan 27 05:31:14 UTC 2011

Modified Files:
        src/sys/arch/sparc/sparc: cpu.c cpuvar.h intr.c

Log Message:
to quote a new comment:

 * There's a deadlock potential between multiple CPUs trying
 * to xcall() at the same time, and the thread that loses the
 * race to get xpmsg_lock is at an IPL above the incoming IPI
 * IPL level, so it sits around waiting to take the lock while
 * the other CPU is waiting for this CPU to handle the IPI and
 * mark it as completed.
 *
 * If we fail to get the mutex, and we're at high enough IPL,
 * call xcallintr() if there is a valid msg.tag.

this seems to fix the xcall() failed to ping cpus problem.
idea from martin, tested by macallan and myself.


To generate a diff of this commit:
cvs rdiff -u -r1.225 -r1.226 src/sys/arch/sparc/sparc/cpu.c
cvs rdiff -u -r1.84 -r1.85 src/sys/arch/sparc/sparc/cpuvar.h
cvs rdiff -u -r1.109 -r1.110 src/sys/arch/sparc/sparc/intr.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/sparc/sparc/cpu.c
diff -u src/sys/arch/sparc/sparc/cpu.c:1.225 src/sys/arch/sparc/sparc/cpu.c:1.226
--- src/sys/arch/sparc/sparc/cpu.c:1.225	Sat Jan 22 12:13:25 2011
+++ src/sys/arch/sparc/sparc/cpu.c	Thu Jan 27 05:31:13 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.225 2011/01/22 12:13:25 mrg Exp $ */
+/*	$NetBSD: cpu.c,v 1.226 2011/01/27 05:31:13 mrg Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.225 2011/01/22 12:13:25 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.226 2011/01/27 05:31:13 mrg Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -301,14 +301,19 @@
 {
 
 	/*
-	 * Setup the per-cpu savefpstate counters.  The "savefp null"
-	 * counter should go away when the NULL struct fpstate * bug
-	 * is fixed.
+	 * Setup the per-cpu counters.
+	 *
+	 * The "savefp null" counter should go away when the NULL
+	 * struct fpstate * bug is fixed.
 	 */
 	evcnt_attach_dynamic(&cpi->ci_savefpstate, EVCNT_TYPE_MISC,
 			     NULL, cpu_name(cpi), "savefp ipi");
 	evcnt_attach_dynamic(&cpi->ci_savefpstate_null, EVCNT_TYPE_MISC,
 			     NULL, cpu_name(cpi), "savefp null ipi");
+	evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_fail, EVCNT_TYPE_MISC,
+			     NULL, cpu_name(cpi), "IPI mutex_trylock fail");
+	evcnt_attach_dynamic(&cpi->ci_xpmsg_mutex_fail_call, EVCNT_TYPE_MISC,
+			     NULL, cpu_name(cpi), "IPI mutex_trylock fail with call");
 }
 
 /*
@@ -581,6 +586,7 @@
 	struct cpu_info *cpi;
 	int n, i, done, callself, mybit;
 	volatile struct xpmsg_func *p;
+	u_int pil;
 	int fasttrap;
 	int is_noop = func == (xcall_func_t)sparc_noop;
 
@@ -592,7 +598,34 @@
 	cpuset &= cpu_ready_mask;
 
 	/* prevent interrupts that grab the kernel lock */
+#if 0
 	mutex_spin_enter(&xpmsg_mutex);
+#else
+	/*
+	 * There's a deadlock potential between multiple CPUs trying
+	 * to xcall() at the same time, and the thread that loses the
+	 * race to get xpmsg_lock is at an IPL above the incoming IPI
+	 * IPL level, so it sits around waiting to take the lock while
+	 * the other CPU is waiting for this CPU to handle the IPI and
+	 * mark it as completed.
+	 *
+	 * If we fail to get the mutex, and we're at high enough IPL,
+	 * call xcallintr() if there is a valid msg.tag.
+	 */
+	pil = (getpsr() & PSR_PIL) >> 8;
+	
+	if (cold || pil < 13)
+		mutex_spin_enter(&xpmsg_mutex);
+	else {
+		while (mutex_tryenter(&xpmsg_mutex) == 0) {
+			cpuinfo.ci_xpmsg_mutex_fail.ev_count++;
+			if (cpuinfo.msg.tag) {
+				cpuinfo.ci_xpmsg_mutex_fail_call.ev_count++;
+				xcallintr(NULL);
+			}
+		}
+	}
+#endif
 
 	/*
 	 * Firstly, call each CPU.  We do this so that they might have

Index: src/sys/arch/sparc/sparc/cpuvar.h
diff -u src/sys/arch/sparc/sparc/cpuvar.h:1.84 src/sys/arch/sparc/sparc/cpuvar.h:1.85
--- src/sys/arch/sparc/sparc/cpuvar.h:1.84	Thu Jan 13 05:20:27 2011
+++ src/sys/arch/sparc/sparc/cpuvar.h	Thu Jan 27 05:31:14 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpuvar.h,v 1.84 2011/01/13 05:20:27 mrg Exp $ */
+/*	$NetBSD: cpuvar.h,v 1.85 2011/01/27 05:31:14 mrg Exp $ */
 
 /*
  *  Copyright (c) 1996 The NetBSD Foundation, Inc.
@@ -340,6 +340,8 @@
 	struct evcnt ci_lev14;
 	struct evcnt ci_savefpstate;
 	struct evcnt ci_savefpstate_null;
+	struct evcnt ci_xpmsg_mutex_fail;
+	struct evcnt ci_xpmsg_mutex_fail_call;
 };
 
 /*
@@ -453,6 +455,8 @@
 typedef void (*xcall_func_t)(int, int, int);
 typedef void (*xcall_trap_t)(int, int, int);
 void xcall(xcall_func_t, xcall_trap_t, int, int, int, u_int);
+/* from intr.c */
+void xcallintr(void *);
 /* Shorthand */
 #define XCALL0(f,cpuset)		\
 	xcall((xcall_func_t)f, NULL, 0, 0, 0, cpuset)

Index: src/sys/arch/sparc/sparc/intr.c
diff -u src/sys/arch/sparc/sparc/intr.c:1.109 src/sys/arch/sparc/sparc/intr.c:1.110
--- src/sys/arch/sparc/sparc/intr.c:1.109	Sat Jan 22 10:37:22 2011
+++ src/sys/arch/sparc/sparc/intr.c	Thu Jan 27 05:31:14 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.c,v 1.109 2011/01/22 10:37:22 mrg Exp $ */
+/*	$NetBSD: intr.c,v 1.110 2011/01/27 05:31:14 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.109 2011/01/22 10:37:22 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.110 2011/01/27 05:31:14 mrg Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -377,8 +377,11 @@
 #if defined(MULTIPROCESSOR)
 /*
  * Respond to an xcall() request from another CPU.
+ *
+ * This is also called directly from xcall() if we notice an
+ * incoming message while we're waiting to grab the xpmsg_lock.
  */
-static void
+void
 xcallintr(void *v)
 {
 

Reply via email to