On Wed, Oct 11, 2023 at 09:08:33PM -0500, Scott Cheloha wrote: > On Tue, Oct 10, 2023 at 05:26:14PM +0300, Vitaliy Makkoveev wrote: > > On Tue, Oct 10, 2023 at 09:06:23AM -0500, Scott Cheloha wrote: > > > On Fri, Oct 06, 2023 at 03:41:39PM +0200, Alexander Bluhm wrote: > > > > On Fri, Oct 06, 2023 at 03:47:31PM +0300, Vitaliy Makkoveev wrote: > > > > > On Fri, Oct 06, 2023 at 02:14:52PM +0200, Alexander Bluhm wrote: > > > > > > > @@ -718,11 +743,13 @@ softclock(void *arg) > > > > > > > softclock_process_tick_timeout(to, new); > > > > > > > } > > > > > > > tostat.tos_softclocks++; > > > > > > > - needsproc = !CIRCQ_EMPTY(&timeout_proc); > > > > > > > - mtx_leave(&timeout_mutex); > > > > > > > - > > > > > > > - if (needsproc) > > > > > > > + if (!CIRCQ_EMPTY(&timeout_proc)) > > > > > > > wakeup(&timeout_proc); > > > > > > > +#ifdef MULTIPROCESSOR > > > > > > > + if(!CIRCQ_EMPTY(&timeout_proc_mpsafe)) > > > > > > > + wakeup(&timeout_proc_mpsafe); > > > > > > > +#endif > > > > > > > + mtx_leave(&timeout_mutex); > > > > > > > } > > > > > > > > > > > > > > void > > > > > > > > > > > > Was there a good reason that wakeup() did run without mutex? > > > > > > Do we really want to change this? > > > > > > > > > > > > > > > > I dont understand you. Original code does wakeup() outside mutex. I > > > > > moved wakeup() under mutex. You want to move it back? > > > > > > > > I just wanted to know why you moved it. > > > > > > > > Now I see. You use msleep_nsec() with timeout_mutex. Putting > > > > wakeup in mutex ensures that you don't miss it. > > > > > > Do we actually need to move the softclock() wakeup calls into the > > > mutex? As long as CIRCQ_EMPTY(...) is evaluated within timeout_mutex, > > > the thread can't get stuck waiting for a wakeup that isn't coming. > > > Both threads now sleep via msleep_nsec(), so there is no "gap" between > > > evaluation and unlock. > > > > > > Am I missing something else? > > > > In other hand, why to not move them under the `timeout_mutex' mutex(9)? > > Does this unlocked call provides something significant? > > If you want to move the wakeups into timeout_mutex, let's do it in a > separate patch. >
Leave it as is. With current diff this doesn't matter. > However, near as I can tell, keeping the calls where they are is still > correct. Please speak up if this is not true. > > And note that all the wakeup() and softintr_schedule() calls are > currently made outside of timeout_mutex. Moving them for no reason > feels like we are buying trouble. > > > > > Nitpick: timeoutmp_proc should be timeout_procmp. timeout_ is the > > > > prefix in this file. mp suffix is easier to see at the end. > > > > > > > > >+ if (kthread_create(softclockmp_thread, NULL, NULL, > > > > >"softclockm")) > > > > "softclockm" -> "softclockmp" > > > > > > > > OK bluhm@, but let's wait for cheloha@ and see what he thinks > > > > > > Revised patch: > > > > > > - Add TIMEOUT_MPSAFE support to timeout_barrier(). This is crucial. > > > - Keep the names in the existing namespaces where possible. > > > - Keep the wakeup(9) calls in softclock() outside of timeout_mutex. > > > ... unless I have made an error, they can stay where they are. > > > - Trim the processing loops in the threads. > > > - Tweak the ddb(4) printing code to distinguish the locked and > > > unlocked thread circqs. > > > > > > mvs/bluhm: try this with your favorite process-context timeout and > > > make sure the timeouts still run. > > > > > > Assuming everything works, ok? > > > > > > > ok by me, with the one nit: > > > > > + msleep_nsec(&timeout_proc, &timeout_mutex, PSWP, "bored", > > > + INFSLP); > > > > "bored" is used by tasks. Can you use another ident? > > "tmoslp" matches up with the "tmobar" wmesg in timeout_barrier(), so > let's try "tmoslp". > > I will commit this tomorrow unless I hear otherwise. > Go ahead, but don't forget to add space between "MULTIPROCESSOR" and "*/". > + tostat.tos_thread_wakeups++; > + msleep_nsec(&timeout_proc_mp, &timeout_mutex, PSWP, "tmoslp", > + INFSLP); > + } > +} > +#endif /* MULTIPROCESSOR*/ > Index: share/man/man9/timeout.9 > =================================================================== > RCS file: /cvs/src/share/man/man9/timeout.9,v > retrieving revision 1.56 > diff -u -p -r1.56 timeout.9 > --- share/man/man9/timeout.9 1 Jan 2023 01:19:18 -0000 1.56 > +++ share/man/man9/timeout.9 12 Oct 2023 02:06:29 -0000 > @@ -193,11 +193,16 @@ Counts the time elapsed since the system > The timeout's behavior may be configured with the bitwise OR of > zero or more of the following > .Fa flags : > -.Bl -tag -width TIMEOUT_PROC > +.Bl -tag -width TIMEOUT_MPSAFE > .It Dv TIMEOUT_PROC > Execute the timeout in a process context instead of the default > .Dv IPL_SOFTCLOCK > interrupt context. > +.It Dv TIMEOUT_MPSAFE > +Execute the timeout without the kernel lock. > +Requires the > +.Dv TIMEOUT_PROC > +flag. > .El > .El > .Pp > @@ -367,8 +372,9 @@ The function > .Fa fn > must not block and must be safe to execute on any CPU in the system. > .Pp > -Currently, > -all timeouts are executed under the kernel lock. > +Timeouts without the > +.Dv TIMEOUT_MPSAFE > +flag are executed under the kernel lock. > .Sh RETURN VALUES > .Fn timeout_add , > .Fn timeout_add_sec , > Index: sys/sys/timeout.h > =================================================================== > RCS file: /cvs/src/sys/sys/timeout.h,v > retrieving revision 1.47 > diff -u -p -r1.47 timeout.h > --- sys/sys/timeout.h 31 Dec 2022 16:06:24 -0000 1.47 > +++ sys/sys/timeout.h 12 Oct 2023 02:06:29 -0000 > @@ -54,6 +54,7 @@ struct timeout { > #define TIMEOUT_ONQUEUE 0x02 /* on any timeout queue */ > #define TIMEOUT_INITIALIZED 0x04 /* initialized */ > #define TIMEOUT_TRIGGERED 0x08 /* running or ran */ > +#define TIMEOUT_MPSAFE 0x10 /* run without kernel lock */ > > struct timeoutstat { > uint64_t tos_added; /* timeout_add*(9) calls */ > Index: sys/kern/kern_timeout.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_timeout.c,v > retrieving revision 1.95 > diff -u -p -r1.95 kern_timeout.c > --- sys/kern/kern_timeout.c 29 Jul 2023 06:52:08 -0000 1.95 > +++ sys/kern/kern_timeout.c 12 Oct 2023 02:06:29 -0000 > @@ -75,6 +75,9 @@ struct circq timeout_wheel_kc[BUCKETS]; > struct circq timeout_new; /* [T] New, unscheduled timeouts */ > struct circq timeout_todo; /* [T] Due or needs rescheduling */ > struct circq timeout_proc; /* [T] Due + needs process context */ > +#ifdef MULTIPROCESSOR > +struct circq timeout_proc_mp; /* [T] Process ctx + no kernel > lock */ > +#endif > > time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width > (seconds) */ > struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ > @@ -171,6 +174,9 @@ void softclock_create_thread(void *); > void softclock_process_kclock_timeout(struct timeout *, int); > void softclock_process_tick_timeout(struct timeout *, int); > void softclock_thread(void *); > +#ifdef MULTIPROCESSOR > +void softclock_thread_mp(void *); > +#endif > void timeout_barrier_timeout(void *); > uint32_t timeout_bucket(const struct timeout *); > uint32_t timeout_maskwheel(uint32_t, const struct timespec *); > @@ -228,6 +234,9 @@ timeout_startup(void) > CIRCQ_INIT(&timeout_new); > CIRCQ_INIT(&timeout_todo); > CIRCQ_INIT(&timeout_proc); > +#ifdef MULTIPROCESSOR > + CIRCQ_INIT(&timeout_proc_mp); > +#endif > for (b = 0; b < nitems(timeout_wheel); b++) > CIRCQ_INIT(&timeout_wheel[b]); > for (b = 0; b < nitems(timeout_wheel_kc); b++) > @@ -261,10 +270,16 @@ void > timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int > kclock, > int flags) > { > + KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE))); > + > to->to_func = fn; > to->to_arg = arg; > to->to_kclock = kclock; > to->to_flags = flags | TIMEOUT_INITIALIZED; > + > + /* For now, only process context timeouts may be marked MP-safe. */ > + if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) > + KASSERT(ISSET(to->to_flags, TIMEOUT_PROC)); > } > > void > @@ -455,13 +470,13 @@ timeout_barrier(struct timeout *to) > { > struct timeout barrier; > struct cond c; > - int procflag; > + int flags; > > - procflag = (to->to_flags & TIMEOUT_PROC); > - timeout_sync_order(procflag); > + flags = to->to_flags & (TIMEOUT_PROC | TIMEOUT_MPSAFE); > + timeout_sync_order(ISSET(flags, TIMEOUT_PROC)); > > timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE, > - procflag); > + flags); > barrier.to_process = curproc->p_p; > cond_init(&c); > > @@ -469,16 +484,26 @@ timeout_barrier(struct timeout *to) > > barrier.to_time = ticks; > SET(barrier.to_flags, TIMEOUT_ONQUEUE); > - if (procflag) > - CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); > - else > + if (ISSET(flags, TIMEOUT_PROC)) { > +#ifdef MULTIPROCESSOR > + if (ISSET(flags, TIMEOUT_MPSAFE)) > + CIRCQ_INSERT_TAIL(&timeout_proc_mp, &barrier.to_list); > + else > +#endif > + CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); > + } else > CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list); > > mtx_leave(&timeout_mutex); > > - if (procflag) > - wakeup_one(&timeout_proc); > - else > + if (ISSET(flags, TIMEOUT_PROC)) { > +#ifdef MULTIPROCESSOR > + if (ISSET(flags, TIMEOUT_MPSAFE)) > + wakeup_one(&timeout_proc_mp); > + else > +#endif > + wakeup_one(&timeout_proc); > + } else > softintr_schedule(softclock_si); > > cond_wait(&c, "tmobar"); > @@ -659,7 +684,12 @@ softclock_process_kclock_timeout(struct > if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) > tostat.tos_late++; > if (ISSET(to->to_flags, TIMEOUT_PROC)) { > - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); > +#ifdef MULTIPROCESSOR > + if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) > + CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); > + else > +#endif > + CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); > return; > } > timeout_run(to); > @@ -681,7 +711,12 @@ softclock_process_tick_timeout(struct ti > if (!new && delta < 0) > tostat.tos_late++; > if (ISSET(to->to_flags, TIMEOUT_PROC)) { > - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); > +#ifdef MULTIPROCESSOR > + if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) > + CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); > + else > +#endif > + CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); > return; > } > timeout_run(to); > @@ -699,6 +734,9 @@ softclock(void *arg) > { > struct timeout *first_new, *to; > int needsproc, new; > +#ifdef MULTIPROCESSOR > + int need_proc_mp; > +#endif > > first_new = NULL; > new = 0; > @@ -719,10 +757,17 @@ softclock(void *arg) > } > tostat.tos_softclocks++; > needsproc = !CIRCQ_EMPTY(&timeout_proc); > +#ifdef MULTIPROCESSOR > + need_proc_mp = !CIRCQ_EMPTY(&timeout_proc_mp); > +#endif > mtx_leave(&timeout_mutex); > > if (needsproc) > wakeup(&timeout_proc); > +#ifdef MULTIPROCESSOR > + if (need_proc_mp) > + wakeup(&timeout_proc_mp); > +#endif > } > > void > @@ -730,6 +775,10 @@ softclock_create_thread(void *arg) > { > if (kthread_create(softclock_thread, NULL, NULL, "softclock")) > panic("fork softclock"); > +#ifdef MULTIPROCESSOR > + if (kthread_create(softclock_thread_mp, NULL, NULL, "softclockmp")) > + panic("kthread_create softclock_thread_mp"); > +#endif > } > > void > @@ -751,11 +800,8 @@ softclock_thread(void *arg) > sched_peg_curproc(ci); > > s = splsoftclock(); > + mtx_enter(&timeout_mutex); > for (;;) { > - sleep_setup(&timeout_proc, PSWP, "bored"); > - sleep_finish(0, CIRCQ_EMPTY(&timeout_proc)); > - > - mtx_enter(&timeout_mutex); > while (!CIRCQ_EMPTY(&timeout_proc)) { > to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); > CIRCQ_REMOVE(&to->to_list); > @@ -763,11 +809,36 @@ softclock_thread(void *arg) > tostat.tos_run_thread++; > } > tostat.tos_thread_wakeups++; > - mtx_leave(&timeout_mutex); > + msleep_nsec(&timeout_proc, &timeout_mutex, PSWP, "tmoslp", > + INFSLP); > } > splx(s); > } > > +#ifdef MULTIPROCESSOR > +void > +softclock_thread_mp(void *arg) > +{ > + struct timeout *to; > + > + KERNEL_ASSERT_LOCKED(); > + KERNEL_UNLOCK(); > + > + mtx_enter(&timeout_mutex); > + for (;;) { > + while (!CIRCQ_EMPTY(&timeout_proc_mp)) { > + to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc_mp)); > + CIRCQ_REMOVE(&to->to_list); > + timeout_run(to); > + tostat.tos_run_thread++; > + } > + tostat.tos_thread_wakeups++; > + msleep_nsec(&timeout_proc_mp, &timeout_mutex, PSWP, "tmoslp", > + INFSLP); > + } > +} > +#endif /* MULTIPROCESSOR*/ > + > #ifndef SMALL_KERNEL > void > timeout_adjust_ticks(int adj) > @@ -875,6 +946,10 @@ db_show_timeout(struct timeout *to, stru > where = "softint"; > else if (bucket == &timeout_proc) > where = "thread"; > +#ifdef MULTIPROCESSOR > + else if (bucket == &timeout_proc_mp) > + where = "thread-mp"; > +#endif > else { > if (to->to_kclock != KCLOCK_NONE) > wheel = timeout_wheel_kc; > @@ -888,11 +963,11 @@ db_show_timeout(struct timeout *to, stru > if (to->to_kclock != KCLOCK_NONE) { > kc = &timeout_kclock[to->to_kclock]; > timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); > - db_printf("%20s %8s %7s 0x%0*lx %s\n", > + db_printf("%20s %8s %9s 0x%0*lx %s\n", > db_timespec(&remaining), db_kclock(to->to_kclock), where, > width, (ulong)to->to_arg, name); > } else { > - db_printf("%20d %8s %7s 0x%0*lx %s\n", > + db_printf("%20d %8s %9s 0x%0*lx %s\n", > to->to_time - ticks, "ticks", where, > width, (ulong)to->to_arg, name); > } > @@ -913,11 +988,14 @@ db_show_callout(db_expr_t addr, int hadd > db_timespec(&kc->kc_lastscan), db_kclock(i)); > } > db_printf("\n"); > - db_printf("%20s %8s %7s %*s %s\n", > + db_printf("%20s %8s %9s %*s %s\n", > "remaining", "clock", "wheel", width, "arg", "func"); > db_show_callout_bucket(&timeout_new); > db_show_callout_bucket(&timeout_todo); > db_show_callout_bucket(&timeout_proc); > +#ifdef MULTIPROCESSOR > + db_show_callout_bucket(&timeout_proc_mp); > +#endif > for (b = 0; b < nitems(timeout_wheel); b++) > db_show_callout_bucket(&timeout_wheel[b]); > for (b = 0; b < nitems(timeout_wheel_kc); b++) >