On Thu, Oct 05, 2023 at 12:57:24AM +0200, Alexander Bluhm wrote:
> 
> This is a first step to unlock TCP syn cache.  The timer function
> is independent of the socket code.  That makes it easy to start
> there.
> 
> [...]
> 
> Still missing:
> - [...]
> - Run timer without kernel lock.  I am not aware of such a feature.
>   There is already some network code that could benefit from that.
>   Can we get timer without kernel lock like TASKQ_MPSAFE implements
>   it for tasks?

This patch adds a TIMEOUT_MPSAFE flag for use with TIMEOUT_PROC.
Softint timeouts are a different story.

To run syn_cache_timer() without the kernel lock you would initialize
it like this:

        timeout_set_flags(&sc->sc_timer, syn_cache_timer, sc, KCLOCK_NONE,
            TIMEOUT_PROC | TIMEOUT_MPSAFE);

Use with caution, this needs another set of eyes.

Index: share/man/man9/timeout.9
===================================================================
RCS file: /cvs/src/share/man/man9/timeout.9,v
retrieving revision 1.56
diff -u -p -r1.56 timeout.9
--- share/man/man9/timeout.9    1 Jan 2023 01:19:18 -0000       1.56
+++ share/man/man9/timeout.9    5 Oct 2023 16:09:33 -0000
@@ -193,11 +193,16 @@ Counts the time elapsed since the system
 The timeout's behavior may be configured with the bitwise OR of
 zero or more of the following
 .Fa flags :
-.Bl -tag -width TIMEOUT_PROC
+.Bl -tag -width TIMEOUT_MPSAFE
 .It Dv TIMEOUT_PROC
 Execute the timeout in a process context instead of the default
 .Dv IPL_SOFTCLOCK
 interrupt context.
+.It Dv TIMEOUT_MPSAFE
+Execute the timeout without the kernel lock.
+Requires the
+.Dv TIMEOUT_PROC
+flag.
 .El
 .El
 .Pp
@@ -367,8 +372,9 @@ The function
 .Fa fn
 must not block and must be safe to execute on any CPU in the system.
 .Pp
-Currently,
-all timeouts are executed under the kernel lock.
+Timeouts without the
+.Dv TIMEOUT_MPSAFE
+flag are executed under the kernel lock.
 .Sh RETURN VALUES
 .Fn timeout_add ,
 .Fn timeout_add_sec ,
Index: sys/sys/timeout.h
===================================================================
RCS file: /cvs/src/sys/sys/timeout.h,v
retrieving revision 1.47
diff -u -p -r1.47 timeout.h
--- sys/sys/timeout.h   31 Dec 2022 16:06:24 -0000      1.47
+++ sys/sys/timeout.h   5 Oct 2023 16:09:33 -0000
@@ -54,6 +54,7 @@ struct timeout {
 #define TIMEOUT_ONQUEUE                0x02    /* on any timeout queue */
 #define TIMEOUT_INITIALIZED    0x04    /* initialized */
 #define TIMEOUT_TRIGGERED      0x08    /* running or ran */
+#define TIMEOUT_MPSAFE         0x10    /* run without kernel lock */
 
 struct timeoutstat {
        uint64_t tos_added;             /* timeout_add*(9) calls */
Index: sys/kern/kern_timeout.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_timeout.c,v
retrieving revision 1.95
diff -u -p -r1.95 kern_timeout.c
--- sys/kern/kern_timeout.c     29 Jul 2023 06:52:08 -0000      1.95
+++ sys/kern/kern_timeout.c     5 Oct 2023 16:09:34 -0000
@@ -75,6 +75,7 @@ struct circq timeout_wheel_kc[BUCKETS];       
 struct circq timeout_new;              /* [T] New, unscheduled timeouts */
 struct circq timeout_todo;             /* [T] Due or needs rescheduling */
 struct circq timeout_proc;             /* [T] Due + needs process context */
+struct circq timeout_proc_mpsafe;      /* [T] Process ctx + no kernel lock */
 
 time_t timeout_level_width[WHEELCOUNT];        /* [I] Wheel level width 
(seconds) */
 struct timespec tick_ts;               /* [I] Length of a tick (1/hz secs) */
@@ -228,6 +229,7 @@ timeout_startup(void)
        CIRCQ_INIT(&timeout_new);
        CIRCQ_INIT(&timeout_todo);
        CIRCQ_INIT(&timeout_proc);
+       CIRCQ_INIT(&timeout_proc_mpsafe);
        for (b = 0; b < nitems(timeout_wheel); b++)
                CIRCQ_INIT(&timeout_wheel[b]);
        for (b = 0; b < nitems(timeout_wheel_kc); b++)
@@ -261,10 +263,16 @@ void
 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int 
kclock,
     int flags)
 {
+       KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE)));
+
        to->to_func = fn;
        to->to_arg = arg;
        to->to_kclock = kclock;
        to->to_flags = flags | TIMEOUT_INITIALIZED;
+
+       /* For now, only process context timeouts may be marked MP-safe. */
+       if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+               KASSERT(ISSET(to->to_flags, TIMEOUT_PROC));
 }
 
 void
@@ -659,7 +667,10 @@ softclock_process_kclock_timeout(struct 
        if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
                tostat.tos_late++;
        if (ISSET(to->to_flags, TIMEOUT_PROC)) {
-               CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+               if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+                       CIRCQ_INSERT_TAIL(&timeout_proc_mpsafe, &to->to_list);
+               else
+                       CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
                return;
        }
        timeout_run(to);
@@ -681,7 +692,10 @@ softclock_process_tick_timeout(struct ti
        if (!new && delta < 0)
                tostat.tos_late++;
        if (ISSET(to->to_flags, TIMEOUT_PROC)) {
-               CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+               if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+                       CIRCQ_INSERT_TAIL(&timeout_proc_mpsafe, &to->to_list);
+               else
+                       CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
                return;
        }
        timeout_run(to);
@@ -698,7 +712,7 @@ void
 softclock(void *arg)
 {
        struct timeout *first_new, *to;
-       int needsproc, new;
+       int needsproc = 0, new;
 
        first_new = NULL;
        new = 0;
@@ -718,7 +732,8 @@ softclock(void *arg)
                        softclock_process_tick_timeout(to, new);
        }
        tostat.tos_softclocks++;
-       needsproc = !CIRCQ_EMPTY(&timeout_proc);
+       if (!CIRCQ_EMPTY(&timeout_proc) || !CIRCQ_EMPTY(&timeout_proc_mpsafe))
+               needsproc = 1;
        mtx_leave(&timeout_mutex);
 
        if (needsproc)
@@ -736,11 +751,13 @@ void
 softclock_thread(void *arg)
 {
        CPU_INFO_ITERATOR cii;
+       struct circq *elm;
        struct cpu_info *ci;
        struct timeout *to;
        int s;
 
        KERNEL_ASSERT_LOCKED();
+       KERNEL_UNLOCK();
 
        /* Be conservative for the moment */
        CPU_INFO_FOREACH(cii, ci) {
@@ -752,9 +769,23 @@ softclock_thread(void *arg)
 
        s = splsoftclock();
        for (;;) {
-               sleep_setup(&timeout_proc, PSWP, "bored");
-               sleep_finish(0, CIRCQ_EMPTY(&timeout_proc));
+               mtx_enter(&timeout_mutex);
+               if (CIRCQ_EMPTY(&timeout_proc) &&
+                   CIRCQ_EMPTY(&timeout_proc_mpsafe)) {
+                       tostat.tos_thread_wakeups++;
+                       msleep_nsec(&timeout_proc, &timeout_mutex, PSWP,
+                           "bored", INFSLP);
+               }
+               while (!CIRCQ_EMPTY(&timeout_proc_mpsafe)) {
+                       elm = CIRCQ_FIRST(&timeout_proc_mpsafe);
+                       to = timeout_from_circq(elm);
+                       CIRCQ_REMOVE(&to->to_list);
+                       timeout_run(to);
+                       tostat.tos_run_thread++;
+               }
+               mtx_leave(&timeout_mutex);
 
+               KERNEL_LOCK();
                mtx_enter(&timeout_mutex);
                while (!CIRCQ_EMPTY(&timeout_proc)) {
                        to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
@@ -762,8 +793,8 @@ softclock_thread(void *arg)
                        timeout_run(to);
                        tostat.tos_run_thread++;
                }
-               tostat.tos_thread_wakeups++;
                mtx_leave(&timeout_mutex);
+               KERNEL_UNLOCK();
        }
        splx(s);
 }
@@ -873,7 +904,7 @@ db_show_timeout(struct timeout *to, stru
                where = "new";
        else if (bucket == &timeout_todo)
                where = "softint";
-       else if (bucket == &timeout_proc)
+       else if (bucket == &timeout_proc || bucket == &timeout_proc_mpsafe)
                where = "thread";
        else {
                if (to->to_kclock != KCLOCK_NONE)
@@ -918,6 +949,7 @@ db_show_callout(db_expr_t addr, int hadd
        db_show_callout_bucket(&timeout_new);
        db_show_callout_bucket(&timeout_todo);
        db_show_callout_bucket(&timeout_proc);
+       db_show_callout_bucket(&timeout_proc_mpsafe);
        for (b = 0; b < nitems(timeout_wheel); b++)
                db_show_callout_bucket(&timeout_wheel[b]);
        for (b = 0; b < nitems(timeout_wheel_kc); b++)

Reply via email to