Module Name:    src
Committed By:   ad
Date:           Sat May 23 17:08:05 UTC 2009

Modified Files:
        src/sys/kern: kern_lock.c kern_tc.c
        src/sys/sys: lwp.h

Log Message:
- Add lwp_pctr(), get an LWP's preemption/ctxsw counter.
- Fix a preemption bug in CURCPU_IDLE_P() that can lead to a bogus
  assertion failure on DEBUG kernels.
- Fix MP/preemption races with timecounter detachment.


To generate a diff of this commit:
cvs rdiff -u -r1.147 -r1.148 src/sys/kern/kern_lock.c
cvs rdiff -u -r1.38 -r1.39 src/sys/kern/kern_tc.c
cvs rdiff -u -r1.117 -r1.118 src/sys/sys/lwp.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_lock.c
diff -u src/sys/kern/kern_lock.c:1.147 src/sys/kern/kern_lock.c:1.148
--- src/sys/kern/kern_lock.c:1.147	Wed Nov 12 12:36:16 2008
+++ src/sys/kern/kern_lock.c	Sat May 23 17:08:04 2009
@@ -1,7 +1,7 @@
-/*	$NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $	*/
+/*	$NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $	*/
 
 /*-
- * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -42,6 +42,7 @@
 #include <sys/cpu.h>
 #include <sys/syslog.h>
 #include <sys/atomic.h>
+#include <sys/lwp.h>
 
 #include <machine/stdarg.h>
 #include <machine/lock.h>
@@ -59,6 +60,8 @@
 assert_sleepable(void)
 {
 	const char *reason;
+	uint64_t pctr;
+	bool idle;
 
 	if (panicstr != NULL) {
 		return;
@@ -66,14 +69,23 @@
 
 	LOCKDEBUG_BARRIER(kernel_lock, 1);
 
+	/*
+	 * Avoid disabling/re-enabling preemption here since this
+	 * routine may be called in delicate situatations.
+	 */
+	do {
+		pctr = lwp_pctr();
+		idle = CURCPU_IDLE_P();
+	} while (pctr != lwp_pctr());
+
 	reason = NULL;
-	if (CURCPU_IDLE_P() && !cold) {
+	if (idle && !cold) {
 		reason = "idle";
 	}
 	if (cpu_intr_p()) {
 		reason = "interrupt";
 	}
-	if ((curlwp->l_pflag & LP_INTR) != 0) {
+	if (cpu_softintr_p()) {
 		reason = "softint";
 	}
 

Index: src/sys/kern/kern_tc.c
diff -u src/sys/kern/kern_tc.c:1.38 src/sys/kern/kern_tc.c:1.39
--- src/sys/kern/kern_tc.c:1.38	Sun Jan 11 02:45:52 2009
+++ src/sys/kern/kern_tc.c	Sat May 23 17:08:04 2009
@@ -1,9 +1,12 @@
-/* $NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $ */
+/* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */
 
 /*-
- * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -37,7 +40,7 @@
 
 #include <sys/cdefs.h>
 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
-__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $");
 
 #include "opt_ntp.h"
 
@@ -54,6 +57,7 @@
 #include <sys/kauth.h>
 #include <sys/mutex.h>
 #include <sys/atomic.h>
+#include <sys/xcall.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
@@ -126,6 +130,7 @@
 
 kmutex_t timecounter_lock;
 static u_int timecounter_mods;
+static volatile int timecounter_removals = 1;
 static u_int timecounter_bad;
 
 #ifdef __FreeBSD__
@@ -309,15 +314,49 @@
 binuptime(struct bintime *bt)
 {
 	struct timehands *th;
-	u_int gen;
+	lwp_t *l;
+	u_int lgen, gen;
 
 	TC_COUNT(nbinuptime);
+
+	/*
+	 * Provide exclusion against tc_detach().
+	 *
+	 * We record the number of timecounter removals before accessing
+	 * timecounter state.  Note that the LWP can be using multiple
+	 * "generations" at once, due to interrupts (interrupted while in
+	 * this function).  Hardware interrupts will borrow the interrupted
+	 * LWP's l_tcgen value for this purpose, and can themselves be
+	 * interrupted by higher priority interrupts.  In this case we need
+	 * to ensure that the oldest generation in use is recorded.
+	 *
+	 * splsched() is too expensive to use, so we take care to structure
+	 * this code in such a way that it is not required.  Likewise, we
+	 * do not disable preemption.
+	 *
+	 * Memory barriers are also too expensive to use for such a
+	 * performance critical function.  The good news is that we do not
+	 * need memory barriers for this type of exclusion, as the thread
+	 * updating timecounter_removals will issue a broadcast cross call
+	 * before inspecting our l_tcgen value (this elides memory ordering
+	 * issues).
+	 */
+	l = curlwp;
+	lgen = l->l_tcgen;
+	if (__predict_true(lgen == 0)) {
+		l->l_tcgen = timecounter_removals;
+	}
+	__insn_barrier();
+
 	do {
 		th = timehands;
 		gen = th->th_generation;
 		*bt = th->th_offset;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
 	} while (gen == 0 || gen != th->th_generation);
+
+	__insn_barrier();
+	l->l_tcgen = lgen;
 }
 
 void
@@ -543,8 +582,11 @@
 {
 	struct timecounter *tc;
 	struct timecounter **tcp = NULL;
-	int rc = 0;
+	int removals;
+	uint64_t where;
+	lwp_t *l;
 
+	/* First, find the timecounter. */
 	mutex_spin_enter(&timecounter_lock);
 	for (tcp = &timecounters, tc = timecounters;
 	     tc != NULL;
@@ -553,17 +595,62 @@
 			break;
 	}
 	if (tc == NULL) {
-		rc = ESRCH;
-	} else {
-		*tcp = tc->tc_next;
-		if (timecounter == target) {
-			tc_pick();
-			tc_windup();
-		}
-		timecounter_mods++;
+		mutex_spin_exit(&timecounter_lock);
+		return ESRCH;
+	}
+
+	/* And now, remove it. */
+	*tcp = tc->tc_next;
+	if (timecounter == target) {
+		tc_pick();
+		tc_windup();
 	}
+	timecounter_mods++;
+	removals = timecounter_removals++;
 	mutex_spin_exit(&timecounter_lock);
-	return rc;
+
+	/*
+	 * We now have to determine if any threads in the system are still
+	 * making use of this timecounter.
+	 *
+	 * We issue a broadcast cross call to elide memory ordering issues,
+	 * then scan all LWPs in the system looking at each's timecounter
+	 * generation number.  We need to see a value of zero (not actively
+	 * using a timecounter) or a value greater than our removal value.
+	 *
+	 * We may race with threads that read `timecounter_removals' and
+	 * and then get preempted before updating `l_tcgen'.  This is not
+	 * a problem, since it means that these threads have not yet started
+	 * accessing timecounter state.  All we do need is one clean
+	 * snapshot of the system where every thread appears not to be using
+	 * old timecounter state.
+	 */
+	for (;;) {
+		where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
+		xc_wait(where);
+
+		mutex_enter(proc_lock);
+		LIST_FOREACH(l, &alllwp, l_list) {
+			if (l->l_tcgen == 0 || l->l_tcgen > removals) {
+				/*
+				 * Not using timecounter or old timecounter
+				 * state at time of our xcall or later.
+				 */
+				continue;
+			}
+			break;
+		}
+		mutex_exit(proc_lock);
+
+		/*
+		 * If the timecounter is still in use, wait at least 10ms
+		 * before retrying.
+		 */
+		if (l == NULL) {
+			return 0;
+		}
+		(void)kpause("tcdetach", false, mstohz(10), NULL);
+	}
 }
 
 /* Report the frequency of the current timecounter. */

Index: src/sys/sys/lwp.h
diff -u src/sys/sys/lwp.h:1.117 src/sys/sys/lwp.h:1.118
--- src/sys/sys/lwp.h:1.117	Wed Feb  4 21:17:39 2009
+++ src/sys/sys/lwp.h	Sat May 23 17:08:05 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: lwp.h,v 1.117 2009/02/04 21:17:39 ad Exp $	*/
+/*	$NetBSD: lwp.h,v 1.118 2009/05/23 17:08:05 ad Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -173,6 +173,8 @@
 	uintptr_t	l_pfailaddr;	/* !: for kernel preemption */
 	uintptr_t	l_pfaillock;	/* !: for kernel preemption */
 	_TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */
+	int		l_tcgen;	/* !: for timecounter removal */
+	int		l_unused2;	/* !: for future use */
 
 	/* These are only used by 'options SYSCALL_TIMES' */
 	uint32_t        l_syscall_time; /* !: time epoch for current syscall */
@@ -306,6 +308,7 @@
 void	lwp_free(lwp_t *, bool, bool);
 void	lwp_sys_init(void);
 u_int	lwp_unsleep(lwp_t *, bool);
+uint64_t lwp_pctr(void);
 
 int	lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
 void	lwp_specific_key_delete(specificdata_key_t);

Reply via email to