Hi, I want to run softclock() without the kernel lock. The way to go, I think, is to first push the kernel lock down into timeout_run(), and then to remove the kernel lock from each timeout, one by one.
Before we can push the kernel lock down into timeout_run() we need to remove the kernel lock from timeout_del_barrier(9). The kernel lock is used in timeout_del_barrier(9) to determine whether the given timeout has stopped running. Because the softclock() runs with the kernel lock we currently assume that once the calling thread has taken the kernel lock any onging softclock() must have returned and relinquished the lock, so the timeout in question has returned. The simplest replacement I can think of is a volatile pointer to the running timeout that we set before leaving the timeout_mutex and clear after reentering the same during timeout_run(). So in the non-TIMEOUT_PROC case the timeout_del_barrier(9) caller just spins until the timeout function returns and the timeout_running pointer is changed. Not every caller can sleep during timeout_del_barrier(9). I think spinning is the simplest thing that will definitely work here. There is no behavior change in the TIMEOUT_PROC case. The kernel lock was never involved in that path. Misc: - The timeout_barrier(9) routine has one caller. Can we just fold it into timeout_del_barrier(9) or do we need to keep it separate for API completeness? See patch. - Assuming it's okay to fold timeout_barrier() into timeout_del_barrier(): I have merged two separate timeout_mutex sections into one. Unclear if I still need a second timeout_sync_order() call before returning from timeout_del_barrier(). - Where, if anywhere, does the splx(splsoftclock()) call belong in the non-TIMEOUT_PROC case? In the current code it is between KERNEL_LOCK() and KERNEL_UNLOCK(). There is no equivalent spot in the new code. (Manpage changes omitted for the moment to keep the diff short.) Thoughts? Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.83 diff -u -p -r1.83 kern_timeout.c --- kern/kern_timeout.c 8 Feb 2021 08:18:45 -0000 1.83 +++ kern/kern_timeout.c 4 May 2021 06:05:42 -0000 @@ -79,6 +79,8 @@ struct circq timeout_proc; /* [T] Due + time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ +volatile struct timeout *timeout_running; /* [T] Running timeout, if any */ + struct kclock { struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ struct timespec kc_late; /* [T] Late if due prior */ @@ -173,6 +175,7 @@ void softclock_process_kclock_timeout(st void softclock_process_tick_timeout(struct timeout *, int); void softclock_thread(void *); uint32_t timeout_bucket(const struct timeout *); +int timeout_del_locked(struct timeout *); uint32_t timeout_maskwheel(uint32_t, const struct timespec *); void timeout_run(struct timeout *); void timeout_proc_barrier(void *); @@ -455,11 +458,12 @@ kclock_nanotime(int kclock, struct times } int -timeout_del(struct timeout *to) +timeout_del_locked(struct timeout *to) { int ret = 0; - mtx_enter(&timeout_mutex); + MUTEX_ASSERT_LOCKED(&timeout_mutex); + if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { CIRCQ_REMOVE(&to->to_list); CLR(to->to_flags, TIMEOUT_ONQUEUE); @@ -468,52 +472,54 @@ timeout_del(struct timeout *to) } CLR(to->to_flags, TIMEOUT_TRIGGERED); tostat.tos_deleted++; - mtx_leave(&timeout_mutex); return ret; } int -timeout_del_barrier(struct timeout *to) +timeout_del(struct timeout *to) { - int removed; + int status; - timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); - - removed = timeout_del(to); - if (!removed) - timeout_barrier(to); - - return removed; + mtx_enter(&timeout_mutex); + status = timeout_del_locked(to); + mtx_leave(&timeout_mutex); + return status; } -void -timeout_barrier(struct timeout *to) +int +timeout_del_barrier(struct timeout *to) { + struct timeout barrier; + struct cond c = COND_INITIALIZER(); int needsproc = ISSET(to->to_flags, TIMEOUT_PROC); timeout_sync_order(needsproc); - if (!needsproc) { - KERNEL_LOCK(); - splx(splsoftclock()); - KERNEL_UNLOCK(); - } else { - struct cond c = COND_INITIALIZER(); - struct timeout barrier; + mtx_enter(&timeout_mutex); + + if (timeout_del_locked(to)) { + mtx_leave(&timeout_mutex); + return 1; + } + if (needsproc) { timeout_set_proc(&barrier, timeout_proc_barrier, &c); barrier.to_process = curproc->p_p; - - mtx_enter(&timeout_mutex); SET(barrier.to_flags, TIMEOUT_ONQUEUE); CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); mtx_leave(&timeout_mutex); - wakeup_one(&timeout_proc); - cond_wait(&c, "tmobar"); + } else { + mtx_leave(&timeout_mutex); + /* XXX Is this in the right spot? */ + splx(splsoftclock()); + while (timeout_running == to) + CPU_BUSY_CYCLE(); } + + return 0; } void @@ -664,6 +670,8 @@ timeout_run(struct timeout *to) struct process *kcov_process = to->to_process; #endif + timeout_running = to; + mtx_leave(&timeout_mutex); timeout_sync_enter(needsproc); #if NKCOV > 0 @@ -675,6 +683,8 @@ timeout_run(struct timeout *to) #endif timeout_sync_leave(needsproc); mtx_enter(&timeout_mutex); + + timeout_running = NULL; } void Index: sys/timeout.h =================================================================== RCS file: /cvs/src/sys/sys/timeout.h,v retrieving revision 1.40 diff -u -p -r1.40 timeout.h --- sys/timeout.h 15 Oct 2020 20:03:44 -0000 1.40 +++ sys/timeout.h 4 May 2021 06:05:42 -0000 @@ -148,7 +148,6 @@ int timeout_in_nsec(struct timeout *, ui int timeout_del(struct timeout *); int timeout_del_barrier(struct timeout *); -void timeout_barrier(struct timeout *); void timeout_adjust_ticks(int); void timeout_hardclock_update(void);