Module Name: src Committed By: ad Date: Sat May 23 17:08:05 UTC 2009
Modified Files: src/sys/kern: kern_lock.c kern_tc.c src/sys/sys: lwp.h Log Message: - Add lwp_pctr(), get an LWP's preemption/ctxsw counter. - Fix a preemption bug in CURCPU_IDLE_P() that can lead to a bogus assertion failure on DEBUG kernels. - Fix MP/preemption races with timecounter detachment. To generate a diff of this commit: cvs rdiff -u -r1.147 -r1.148 src/sys/kern/kern_lock.c cvs rdiff -u -r1.38 -r1.39 src/sys/kern/kern_tc.c cvs rdiff -u -r1.117 -r1.118 src/sys/sys/lwp.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_lock.c diff -u src/sys/kern/kern_lock.c:1.147 src/sys/kern/kern_lock.c:1.148 --- src/sys/kern/kern_lock.c:1.147 Wed Nov 12 12:36:16 2008 +++ src/sys/kern/kern_lock.c Sat May 23 17:08:04 2009 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $ */ +/* $NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $ */ /*- - * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.147 2008/11/12 12:36:16 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.148 2009/05/23 17:08:04 ad Exp $"); #include <sys/param.h> #include <sys/proc.h> @@ -42,6 +42,7 @@ #include <sys/cpu.h> #include <sys/syslog.h> #include <sys/atomic.h> +#include <sys/lwp.h> #include <machine/stdarg.h> #include <machine/lock.h> @@ -59,6 +60,8 @@ assert_sleepable(void) { const char *reason; + uint64_t pctr; + bool idle; if (panicstr != NULL) { return; @@ -66,14 +69,23 @@ LOCKDEBUG_BARRIER(kernel_lock, 1); + /* + * Avoid disabling/re-enabling preemption here since this + * routine may be called in delicate situatations. + */ + do { + pctr = lwp_pctr(); + idle = CURCPU_IDLE_P(); + } while (pctr != lwp_pctr()); + reason = NULL; - if (CURCPU_IDLE_P() && !cold) { + if (idle && !cold) { reason = "idle"; } if (cpu_intr_p()) { reason = "interrupt"; } - if ((curlwp->l_pflag & LP_INTR) != 0) { + if (cpu_softintr_p()) { reason = "softint"; } Index: src/sys/kern/kern_tc.c diff -u src/sys/kern/kern_tc.c:1.38 src/sys/kern/kern_tc.c:1.39 --- src/sys/kern/kern_tc.c:1.38 Sun Jan 11 02:45:52 2009 +++ src/sys/kern/kern_tc.c Sat May 23 17:08:04 2009 @@ -1,9 +1,12 @@ -/* $NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $ */ +/* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */ /*- - * Copyright (c) 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -37,7 +40,7 @@ #include <sys/cdefs.h> /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ -__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.38 2009/01/11 02:45:52 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $"); #include "opt_ntp.h" @@ -54,6 +57,7 @@ #include <sys/kauth.h> #include <sys/mutex.h> #include <sys/atomic.h> +#include <sys/xcall.h> /* * A large step happens on boot. This constant detects such steps. @@ -126,6 +130,7 @@ kmutex_t timecounter_lock; static u_int timecounter_mods; +static volatile int timecounter_removals = 1; static u_int timecounter_bad; #ifdef __FreeBSD__ @@ -309,15 +314,49 @@ binuptime(struct bintime *bt) { struct timehands *th; - u_int gen; + lwp_t *l; + u_int lgen, gen; TC_COUNT(nbinuptime); + + /* + * Provide exclusion against tc_detach(). + * + * We record the number of timecounter removals before accessing + * timecounter state. Note that the LWP can be using multiple + * "generations" at once, due to interrupts (interrupted while in + * this function). Hardware interrupts will borrow the interrupted + * LWP's l_tcgen value for this purpose, and can themselves be + * interrupted by higher priority interrupts. In this case we need + * to ensure that the oldest generation in use is recorded. + * + * splsched() is too expensive to use, so we take care to structure + * this code in such a way that it is not required. Likewise, we + * do not disable preemption. + * + * Memory barriers are also too expensive to use for such a + * performance critical function. The good news is that we do not + * need memory barriers for this type of exclusion, as the thread + * updating timecounter_removals will issue a broadcast cross call + * before inspecting our l_tcgen value (this elides memory ordering + * issues). + */ + l = curlwp; + lgen = l->l_tcgen; + if (__predict_true(lgen == 0)) { + l->l_tcgen = timecounter_removals; + } + __insn_barrier(); + do { th = timehands; gen = th->th_generation; *bt = th->th_offset; bintime_addx(bt, th->th_scale * tc_delta(th)); } while (gen == 0 || gen != th->th_generation); + + __insn_barrier(); + l->l_tcgen = lgen; } void @@ -543,8 +582,11 @@ { struct timecounter *tc; struct timecounter **tcp = NULL; - int rc = 0; + int removals; + uint64_t where; + lwp_t *l; + /* First, find the timecounter. */ mutex_spin_enter(&timecounter_lock); for (tcp = &timecounters, tc = timecounters; tc != NULL; @@ -553,17 +595,62 @@ break; } if (tc == NULL) { - rc = ESRCH; - } else { - *tcp = tc->tc_next; - if (timecounter == target) { - tc_pick(); - tc_windup(); - } - timecounter_mods++; + mutex_spin_exit(&timecounter_lock); + return ESRCH; + } + + /* And now, remove it. */ + *tcp = tc->tc_next; + if (timecounter == target) { + tc_pick(); + tc_windup(); } + timecounter_mods++; + removals = timecounter_removals++; mutex_spin_exit(&timecounter_lock); - return rc; + + /* + * We now have to determine if any threads in the system are still + * making use of this timecounter. + * + * We issue a broadcast cross call to elide memory ordering issues, + * then scan all LWPs in the system looking at each's timecounter + * generation number. We need to see a value of zero (not actively + * using a timecounter) or a value greater than our removal value. + * + * We may race with threads that read `timecounter_removals' and + * and then get preempted before updating `l_tcgen'. This is not + * a problem, since it means that these threads have not yet started + * accessing timecounter state. All we do need is one clean + * snapshot of the system where every thread appears not to be using + * old timecounter state. + */ + for (;;) { + where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); + xc_wait(where); + + mutex_enter(proc_lock); + LIST_FOREACH(l, &alllwp, l_list) { + if (l->l_tcgen == 0 || l->l_tcgen > removals) { + /* + * Not using timecounter or old timecounter + * state at time of our xcall or later. + */ + continue; + } + break; + } + mutex_exit(proc_lock); + + /* + * If the timecounter is still in use, wait at least 10ms + * before retrying. + */ + if (l == NULL) { + return 0; + } + (void)kpause("tcdetach", false, mstohz(10), NULL); + } } /* Report the frequency of the current timecounter. */ Index: src/sys/sys/lwp.h diff -u src/sys/sys/lwp.h:1.117 src/sys/sys/lwp.h:1.118 --- src/sys/sys/lwp.h:1.117 Wed Feb 4 21:17:39 2009 +++ src/sys/sys/lwp.h Sat May 23 17:08:05 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: lwp.h,v 1.117 2009/02/04 21:17:39 ad Exp $ */ +/* $NetBSD: lwp.h,v 1.118 2009/05/23 17:08:05 ad Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -173,6 +173,8 @@ uintptr_t l_pfailaddr; /* !: for kernel preemption */ uintptr_t l_pfaillock; /* !: for kernel preemption */ _TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */ + int l_tcgen; /* !: for timecounter removal */ + int l_unused2; /* !: for future use */ /* These are only used by 'options SYSCALL_TIMES' */ uint32_t l_syscall_time; /* !: time epoch for current syscall */ @@ -306,6 +308,7 @@ void lwp_free(lwp_t *, bool, bool); void lwp_sys_init(void); u_int lwp_unsleep(lwp_t *, bool); +uint64_t lwp_pctr(void); int lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t); void lwp_specific_key_delete(specificdata_key_t);