Hi, The last step before unlocking setitimer(2) and getitimer(2) is protecting the per-process ITIMER_REAL state with something other than the kernel lock.
Because the ITIMER_REAL timeout runs at IPL_SOFTCLOCK I think the per-process mutex ps_mtx is appropriate. Changing the setitimer() routine itself is trivial. We just enter/leave ps_mtx instead of the global itimer_mtx if the timer in question is ITIMER_REAL. The realitexpire() routine is trickier. When setitimer(2) is unlocked there will be a small race between the point where we leave timeout_mutex in timeout_run() and where we enter ps_mtx in realitexpire(). Between these two points a different thread in setitimer(2) running without the kernel lock will be able to enter ps_mtx and cancel or reschedule the timeout. So the moment we enter ps_mtx during realitexpire() we first need to check if the timer was cancelled or rescheduled to run in the future. In either case we don't want to send SIGALRM to the process, we just want to leave the mutex and return. Sending the SIGALRM before/after we decide whether to reschedule the timeout is inconsequential, so I've moved the prsignal() call to the end of realitexpire(). I've added a KERNEL_ASSERT_LOCKED() there, too, as prsignal() still needs the kernel lock. This is trivial for now, as all timeouts run with the kernel lock. I have updated the locking notes in sys/proc.h to reflect these changes. ok? Index: kern/kern_time.c =================================================================== RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.149 diff -u -p -r1.149 kern_time.c --- kern/kern_time.c 25 Oct 2020 01:55:18 -0000 1.149 +++ kern/kern_time.c 27 Oct 2020 15:23:16 -0000 @@ -535,10 +535,11 @@ setitimer(int which, const struct itimer TIMEVAL_TO_TIMESPEC(&itv->it_interval, &its.it_interval); } - if (which != ITIMER_REAL) - mtx_enter(&itimer_mtx); - else + if (which == ITIMER_REAL) { + mtx_enter(&pr->ps_mtx); nanouptime(&now); + } else + mtx_enter(&itimer_mtx); if (olditv != NULL) oldits = *itimer; @@ -553,7 +554,9 @@ setitimer(int which, const struct itimer *itimer = its; } - if (which != ITIMER_REAL) + if (which == ITIMER_REAL) + mtx_leave(&pr->ps_mtx); + else mtx_leave(&itimer_mtx); if (olditv != NULL) { @@ -660,21 +663,43 @@ realitexpire(void *arg) struct timespec cts; struct process *pr = arg; struct itimerspec *tp = &pr->ps_timer[ITIMER_REAL]; + int need_signal = 0; + + mtx_enter(&pr->ps_mtx); - prsignal(pr, SIGALRM); + /* + * Do nothing if the timer was cancelled or rescheduled while we + * were entering the mutex. + */ + if (!timespecisset(&tp->it_value) || timeout_pending(&pr->ps_realit_to)) + goto out; - /* If it was a one-shot timer we're done. */ + /* The timer expired. We need to send the signal. */ + need_signal = 1; + + /* One-shot timers are not reloaded. */ if (!timespecisset(&tp->it_interval)) { timespecclear(&tp->it_value); - return; + goto out; } - /* Find the nearest future expiration point and restart the timeout. */ + /* + * Find the nearest future expiration point and restart the + * timer before sending the signal. + */ nanouptime(&cts); while (timespeccmp(&tp->it_value, &cts, <=)) timespecadd(&tp->it_value, &tp->it_interval, &tp->it_value); if ((pr->ps_flags & PS_EXITING) == 0) timeout_at_ts(&pr->ps_realit_to, &tp->it_value); + +out: + mtx_leave(&pr->ps_mtx); + + if (need_signal) { + KERNEL_ASSERT_LOCKED(); + prsignal(pr, SIGALRM); + } } /* Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.300 diff -u -p -r1.300 proc.h --- sys/proc.h 16 Sep 2020 08:01:15 -0000 1.300 +++ sys/proc.h 27 Oct 2020 15:23:16 -0000 @@ -219,7 +219,7 @@ struct process { struct rusage *ps_ru; /* sum of stats for dead threads. */ struct tusage ps_tu; /* accumulated times. */ struct rusage ps_cru; /* sum of stats for reaped children */ - struct itimerspec ps_timer[3]; /* [K] ITIMER_REAL timer */ + struct itimerspec ps_timer[3]; /* [m] ITIMER_REAL timer */ /* [T] ITIMER_{VIRTUAL,PROF} timers */ struct timeout ps_rucheck_to; /* [] resource limit check timer */ time_t ps_nextxcpu; /* when to send next SIGXCPU, */ @@ -273,7 +273,7 @@ struct process { int ps_refcnt; /* Number of references. */ struct timespec ps_start; /* starting uptime. */ - struct timeout ps_realit_to; /* [K] ITIMER_REAL timeout */ + struct timeout ps_realit_to; /* [m] ITIMER_REAL timeout */ }; #define ps_session ps_pgrp->pg_session