The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=74cf7cae4d2238ae6d1c949b2bbd077e1ab33634

commit 74cf7cae4d2238ae6d1c949b2bbd077e1ab33634
Author:     John Baldwin <[email protected]>
AuthorDate: 2021-12-30 22:54:29 +0000
Commit:     John Baldwin <[email protected]>
CommitDate: 2021-12-30 22:55:08 +0000

    softclock: Use dedicated ithreads for running callouts.
    
    Rather than using the swi infrastructure, rewrite softclock() as a
    thread loop (softclock_thread()) and use it as the main routine of the
    softclock threads.  The threads use the CC_LOCK as the thread lock
    when idle.
    
    Reviewed by:    mav, imp, kib
    Sponsored by:   Netflix
    Differential Revision:  https://reviews.freebsd.org/D33683
---
 sys/kern/kern_timeout.c | 115 ++++++++++++++++++++++++++++++++----------------
 sys/sys/systm.h         |   1 -
 2 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index 992a093d30ab..3923c214be8d 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -52,14 +52,17 @@ __FBSDID("$FreeBSD$");
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
+#include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
+#include <sys/unistd.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
@@ -77,6 +80,8 @@ SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
 SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
 
+static void    softclock_thread(void *arg);
+
 #ifdef CALLOUT_PROFILING
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
@@ -166,7 +171,7 @@ struct callout_cpu {
        struct callout_tailq    cc_expireq;
        sbintime_t              cc_firstevent;
        sbintime_t              cc_lastscan;
-       void                    *cc_cookie;
+       struct thread           *cc_thread;
        u_int                   cc_bucket;
        u_int                   cc_inited;
 #ifdef KTR
@@ -222,7 +227,7 @@ static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout 
datastructures");
  *                     relevant callout completes.
  *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  *                     guarantees that the current callout will not run.
- *                     The softclock() function sets this to 0 before it
+ *                     The softclock_call_cc() function sets this to 0 before 
it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
  *                     cc_lock is successfully acquired.
@@ -316,7 +321,7 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
 {
        int i;
 
-       mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
+       mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN);
        cc->cc_inited = 1;
        cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) *
            callwheelsize, M_CALLOUT,
@@ -369,28 +374,38 @@ callout_cpu_switch(struct callout *c, struct callout_cpu 
*cc, int new_cpu)
 static void
 start_softclock(void *dummy)
 {
+       struct proc *p;
+       struct thread *td;
        struct callout_cpu *cc;
-       char name[MAXCOMLEN];
-       int cpu;
+       int cpu, error;
        bool pin_swi;
-       struct intr_event *ie;
 
+       p = NULL;
        CPU_FOREACH(cpu) {
                cc = CC_CPU(cpu);
-               snprintf(name, sizeof(name), "clock (%d)", cpu);
-               ie = NULL;
-               if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
-                   INTR_MPSAFE, &cc->cc_cookie))
-                       panic("died while creating standard software ithreads");
+               error = kproc_kthread_add(softclock_thread, cc, &p, &td,
+                   RFSTOPPED, 0, "clock", "clock (%d)", cpu);
+               if (error != 0)
+                       panic("failed to create softclock thread for cpu %d: 
%d",
+                           cpu, error);
+               CC_LOCK(cc);
+               cc->cc_thread = td;
+               thread_lock(td);
+               sched_class(td, PRI_ITHD);
+               sched_prio(td, PI_SWI(SWI_CLOCK));
+               TD_SET_IWAIT(td);
+               thread_lock_set(td, (struct mtx *)&cc->cc_lock);
+               thread_unlock(td);
                if (cpu == cc_default_cpu)
                        pin_swi = pin_default_swi;
                else
                        pin_swi = pin_pcpu_swi;
-               if (pin_swi && (intr_event_bind(ie, cpu) != 0)) {
-                       printf("%s: %s clock couldn't be pinned to cpu %d\n",
-                           __func__,
-                           cpu == cc_default_cpu ? "default" : "per-cpu",
-                           cpu);
+               if (pin_swi) {
+                       error = cpuset_setithread(td->td_tid, cpu);
+                       if (error != 0)
+                               printf("%s: %s clock couldn't be pinned to cpu 
%d: %d\n",
+                                   __func__, cpu == cc_default_cpu ?
+                                   "default" : "per-cpu", cpu, error);
                }
        }
 }
@@ -418,6 +433,7 @@ callout_process(sbintime_t now)
        struct callout *tmp, *tmpn;
        struct callout_cpu *cc;
        struct callout_list *sc;
+       struct thread *td;
        sbintime_t first, last, max, tmp_max;
        uint32_t lookahead;
        u_int firstb, lastb, nowb;
@@ -529,13 +545,15 @@ next:
        avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
        avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
 #endif
-       mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
-       /*
-        * swi_sched acquires the thread lock, so we don't want to call it
-        * with cc_lock held; incorrect locking order.
-        */
-       if (!TAILQ_EMPTY(&cc->cc_expireq))
-               swi_sched(cc->cc_cookie, 0);
+       if (!TAILQ_EMPTY(&cc->cc_expireq)) {
+               td = cc->cc_thread;
+               if (TD_AWAITING_INTR(td)) {
+                       TD_CLR_IWAIT(td);
+                       sched_add(td, SRQ_INTR);
+               } else
+                       mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+       } else
+               mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 }
 
 static struct callout_cpu *
@@ -797,38 +815,57 @@ skip:
  */
 
 /*
- * Software (low priority) clock interrupt.
+ * Software (low priority) clock interrupt thread handler.
  * Run periodic events from timeout queue.
  */
-void
-softclock(void *arg)
+static void
+softclock_thread(void *arg)
 {
+       struct thread *td = curthread;
        struct callout_cpu *cc;
        struct callout *c;
 #ifdef CALLOUT_PROFILING
-       int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
+       int depth, gcalls, lockcalls, mpcalls;
 #endif
 
        cc = (struct callout_cpu *)arg;
        CC_LOCK(cc);
-       while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
-               TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
-               softclock_call_cc(c, cc,
+       for (;;) {
+               while (TAILQ_EMPTY(&cc->cc_expireq)) {
+                       /*
+                        * Use CC_LOCK(cc) as the thread_lock while
+                        * idle.
+                        */
+                       thread_lock(td);
+                       thread_lock_set(td, (struct mtx *)&cc->cc_lock);
+                       TD_SET_IWAIT(td);
+                       mi_switch(SW_VOL | SWT_IWAIT);
+
+                       /* mi_switch() drops thread_lock(). */
+                       CC_LOCK(cc);
+               }
+
 #ifdef CALLOUT_PROFILING
-                   &mpcalls, &lockcalls, &gcalls,
+               depth = gcalls = lockcalls = mpcalls = 0;
 #endif
-                   0);
+               while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
+                       TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+                       softclock_call_cc(c, cc,
 #ifdef CALLOUT_PROFILING
-               ++depth;
+                           &mpcalls, &lockcalls, &gcalls,
 #endif
-       }
+                           0);
 #ifdef CALLOUT_PROFILING
-       avg_depth += (depth * 1000 - avg_depth) >> 8;
-       avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
-       avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
-       avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
+                       ++depth;
 #endif
-       CC_UNLOCK(cc);
+               }
+#ifdef CALLOUT_PROFILING
+               avg_depth += (depth * 1000 - avg_depth) >> 8;
+               avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
+               avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
+               avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
+#endif
+       }
 }
 
 void
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index ffe014eb8b42..25a2d0c41e82 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -471,7 +471,6 @@ int sysbeep(int hertz, sbintime_t duration);
 
 void   hardclock(int cnt, int usermode);
 void   hardclock_sync(int cpu);
-void   softclock(void *);
 void   statclock(int cnt, int usermode);
 void   profclock(int cnt, int usermode, uintfptr_t pc);
 

Reply via email to