On Thu, Oct 05, 2023 at 12:57:24AM +0200, Alexander Bluhm wrote: > > This is a first step to unlock TCP syn cache. The timer function > is independent of the socket code. That makes it easy to start > there. > > [...] > > Still missing: > - [...] > - Run timer without kernel lock. I am not aware of such a feature. > There is already some network code that could benefit from that. > Can we get timer without kernel lock like TASKQ_MPSAFE implements > it for tasks?
This patch adds a TIMEOUT_MPSAFE flag for use with TIMEOUT_PROC. Softint timeouts are a different story. To run syn_cache_timer() without the kernel lock you would initialize it like this: timeout_set_flags(&sc->sc_timer, syn_cache_timer, sc, KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); Use with caution, this needs another set of eyes. Index: share/man/man9/timeout.9 =================================================================== RCS file: /cvs/src/share/man/man9/timeout.9,v retrieving revision 1.56 diff -u -p -r1.56 timeout.9 --- share/man/man9/timeout.9 1 Jan 2023 01:19:18 -0000 1.56 +++ share/man/man9/timeout.9 5 Oct 2023 16:09:33 -0000 @@ -193,11 +193,16 @@ Counts the time elapsed since the system The timeout's behavior may be configured with the bitwise OR of zero or more of the following .Fa flags : -.Bl -tag -width TIMEOUT_PROC +.Bl -tag -width TIMEOUT_MPSAFE .It Dv TIMEOUT_PROC Execute the timeout in a process context instead of the default .Dv IPL_SOFTCLOCK interrupt context. +.It Dv TIMEOUT_MPSAFE +Execute the timeout without the kernel lock. +Requires the +.Dv TIMEOUT_PROC +flag. .El .El .Pp @@ -367,8 +372,9 @@ The function .Fa fn must not block and must be safe to execute on any CPU in the system. .Pp -Currently, -all timeouts are executed under the kernel lock. +Timeouts without the +.Dv TIMEOUT_MPSAFE +flag are executed under the kernel lock. .Sh RETURN VALUES .Fn timeout_add , .Fn timeout_add_sec , Index: sys/sys/timeout.h =================================================================== RCS file: /cvs/src/sys/sys/timeout.h,v retrieving revision 1.47 diff -u -p -r1.47 timeout.h --- sys/sys/timeout.h 31 Dec 2022 16:06:24 -0000 1.47 +++ sys/sys/timeout.h 5 Oct 2023 16:09:33 -0000 @@ -54,6 +54,7 @@ struct timeout { #define TIMEOUT_ONQUEUE 0x02 /* on any timeout queue */ #define TIMEOUT_INITIALIZED 0x04 /* initialized */ #define TIMEOUT_TRIGGERED 0x08 /* running or ran */ +#define TIMEOUT_MPSAFE 0x10 /* run without kernel lock */ struct timeoutstat { uint64_t tos_added; /* timeout_add*(9) calls */ Index: sys/kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.95 diff -u -p -r1.95 kern_timeout.c --- sys/kern/kern_timeout.c 29 Jul 2023 06:52:08 -0000 1.95 +++ sys/kern/kern_timeout.c 5 Oct 2023 16:09:34 -0000 @@ -75,6 +75,7 @@ struct circq timeout_wheel_kc[BUCKETS]; struct circq timeout_new; /* [T] New, unscheduled timeouts */ struct circq timeout_todo; /* [T] Due or needs rescheduling */ struct circq timeout_proc; /* [T] Due + needs process context */ +struct circq timeout_proc_mpsafe; /* [T] Process ctx + no kernel lock */ time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ @@ -228,6 +229,7 @@ timeout_startup(void) CIRCQ_INIT(&timeout_new); CIRCQ_INIT(&timeout_todo); CIRCQ_INIT(&timeout_proc); + CIRCQ_INIT(&timeout_proc_mpsafe); for (b = 0; b < nitems(timeout_wheel); b++) CIRCQ_INIT(&timeout_wheel[b]); for (b = 0; b < nitems(timeout_wheel_kc); b++) @@ -261,10 +263,16 @@ void timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock, int flags) { + KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE))); + to->to_func = fn; to->to_arg = arg; to->to_kclock = kclock; to->to_flags = flags | TIMEOUT_INITIALIZED; + + /* For now, only process context timeouts may be marked MP-safe. */ + if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) + KASSERT(ISSET(to->to_flags, TIMEOUT_PROC)); } void @@ -659,7 +667,10 @@ softclock_process_kclock_timeout(struct if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) tostat.tos_late++; if (ISSET(to->to_flags, TIMEOUT_PROC)) { - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); + if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) + CIRCQ_INSERT_TAIL(&timeout_proc_mpsafe, &to->to_list); + else + CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); return; } timeout_run(to); @@ -681,7 +692,10 @@ softclock_process_tick_timeout(struct ti if (!new && delta < 0) tostat.tos_late++; if (ISSET(to->to_flags, TIMEOUT_PROC)) { - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); + if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) + CIRCQ_INSERT_TAIL(&timeout_proc_mpsafe, &to->to_list); + else + CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); return; } timeout_run(to); @@ -698,7 +712,7 @@ void softclock(void *arg) { struct timeout *first_new, *to; - int needsproc, new; + int needsproc = 0, new; first_new = NULL; new = 0; @@ -718,7 +732,8 @@ softclock(void *arg) softclock_process_tick_timeout(to, new); } tostat.tos_softclocks++; - needsproc = !CIRCQ_EMPTY(&timeout_proc); + if (!CIRCQ_EMPTY(&timeout_proc) || !CIRCQ_EMPTY(&timeout_proc_mpsafe)) + needsproc = 1; mtx_leave(&timeout_mutex); if (needsproc) @@ -736,11 +751,13 @@ void softclock_thread(void *arg) { CPU_INFO_ITERATOR cii; + struct circq *elm; struct cpu_info *ci; struct timeout *to; int s; KERNEL_ASSERT_LOCKED(); + KERNEL_UNLOCK(); /* Be conservative for the moment */ CPU_INFO_FOREACH(cii, ci) { @@ -752,9 +769,23 @@ softclock_thread(void *arg) s = splsoftclock(); for (;;) { - sleep_setup(&timeout_proc, PSWP, "bored"); - sleep_finish(0, CIRCQ_EMPTY(&timeout_proc)); + mtx_enter(&timeout_mutex); + if (CIRCQ_EMPTY(&timeout_proc) && + CIRCQ_EMPTY(&timeout_proc_mpsafe)) { + tostat.tos_thread_wakeups++; + msleep_nsec(&timeout_proc, &timeout_mutex, PSWP, + "bored", INFSLP); + } + while (!CIRCQ_EMPTY(&timeout_proc_mpsafe)) { + elm = CIRCQ_FIRST(&timeout_proc_mpsafe); + to = timeout_from_circq(elm); + CIRCQ_REMOVE(&to->to_list); + timeout_run(to); + tostat.tos_run_thread++; + } + mtx_leave(&timeout_mutex); + KERNEL_LOCK(); mtx_enter(&timeout_mutex); while (!CIRCQ_EMPTY(&timeout_proc)) { to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); @@ -762,8 +793,8 @@ softclock_thread(void *arg) timeout_run(to); tostat.tos_run_thread++; } - tostat.tos_thread_wakeups++; mtx_leave(&timeout_mutex); + KERNEL_UNLOCK(); } splx(s); } @@ -873,7 +904,7 @@ db_show_timeout(struct timeout *to, stru where = "new"; else if (bucket == &timeout_todo) where = "softint"; - else if (bucket == &timeout_proc) + else if (bucket == &timeout_proc || bucket == &timeout_proc_mpsafe) where = "thread"; else { if (to->to_kclock != KCLOCK_NONE) @@ -918,6 +949,7 @@ db_show_callout(db_expr_t addr, int hadd db_show_callout_bucket(&timeout_new); db_show_callout_bucket(&timeout_todo); db_show_callout_bucket(&timeout_proc); + db_show_callout_bucket(&timeout_proc_mpsafe); for (b = 0; b < nitems(timeout_wheel); b++) db_show_callout_bucket(&timeout_wheel[b]); for (b = 0; b < nitems(timeout_wheel_kc); b++)