On Sun, May 23, 2021 at 09:05:24AM +0000, Visa Hankala wrote: > > [...] > > Here is a revised patch with a slightly altered approach. > > I have replaced the per-CPU queues with system-wide queues to make > the code simpler.
I think that's fine to start with. We don't have many softintrs. We can go per-CPU later if there is an issue with performance. > When a CPU starts processing a soft interrupt, it reserves the handler > to prevent concurrent execution. If the soft interrupt gets rescheduled > during processing, the handler is run again by the same CPU. This breaks > FIFO ordering, though. If you want to preserve FIFO you can reinsert the handler at the queue tail. That would be more fair. If FIFO is the current behavior I think we ought to keep it. > softintr_disestablish() now calls sched_barrier() to drain processing. > This should also prevent the deadlock with the kernel lock. > > It appeared tricky to add witness(4) integration. witness(4) treats the > kernel lock as a sleepable lock to avoid ordering issues with rwlocks. > This creates a dilemma in softintr_dispatch() because of nesting. > The natural lock class for the required soft interrupt pseudo-lock is > mutex, but that does not work if an MP-safe soft interrupt got preempted > by a higher IPL non-MP-safe soft interrupt. > > The dilemma could be worked around by preventing nesting when witness(4) > is enabled, altering the system's behaviour. Another approach is to skip > the pseudo-lock handling when processing is nested. Altering the system's behavior sounds like a bad idea. > However, as soft interrupt handlers cannot use rwlocks, for deadlock > prevention it should be sufficient to ensure that the caller of > softintr_disestablish() does not hold spinning locks (the kernel lock > excluded). Hence softintr_disestablish() calls assertwaitok() at the > start. The assertion is actually implied by sched_barrier(). I like your implementation a lot better than mine. Misc. thoughts below, though I don't have any real complaints. > Index: arch/amd64/amd64/softintr.c > =================================================================== > RCS file: src/sys/arch/amd64/amd64/softintr.c,v > retrieving revision 1.10 > diff -u -p -r1.10 softintr.c > --- arch/amd64/amd64/softintr.c 11 Sep 2020 09:27:09 -0000 1.10 > +++ arch/amd64/amd64/softintr.c 23 May 2021 08:56:28 -0000 > @@ -37,12 +37,33 @@ > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/malloc.h> > +#include <sys/mutex.h> > +#include <sys/queue.h> > > #include <machine/intr.h> > > #include <uvm/uvm_extern.h> > > -struct x86_soft_intr x86_soft_intrs[X86_NSOFTINTR]; > +struct x86_soft_intrhand { > + STAILQ_ENTRY(x86_soft_intrhand) sih_q; > + void (*sih_fn)(void *); > + void *sih_arg; > + struct cpu_info *sih_runner; > + int sih_which; > + unsigned short sih_flags; > + unsigned short sih_state; > +}; > + > +#define SIF_MPSAFE 0x0001 > + > +#define SIS_DYING 0x0001 > +#define SIS_PENDING 0x0002 > +#define SIS_RESTART 0x0004 > + > +STAILQ_HEAD(x86_soft_intr_queue, x86_soft_intrhand); > + > +struct x86_soft_intr_queue softintr_queue[X86_NSOFTINTR]; Why did we switch to STAILQ? I know we don't have very many softintr_disestablish() calls but isn't O(1) removal worth the extra pointer? > +struct mutex softintr_lock = MUTEX_INITIALIZER(IPL_HIGH); > > const int x86_soft_intr_to_ssir[X86_NSOFTINTR] = { > SIR_CLOCK, > @@ -58,15 +79,10 @@ const int x86_soft_intr_to_ssir[X86_NSOF > void > softintr_init(void) > { > - struct x86_soft_intr *si; > int i; > > - for (i = 0; i < X86_NSOFTINTR; i++) { > - si = &x86_soft_intrs[i]; > - TAILQ_INIT(&si->softintr_q); > - mtx_init(&si->softintr_lock, IPL_HIGH); > - si->softintr_ssir = x86_soft_intr_to_ssir[i]; > - } > + for (i = 0; i < X86_NSOFTINTR; i++) > + STAILQ_INIT(&softintr_queue[i]); Nit: nitems(), if you want. > } > > /* > @@ -78,31 +94,45 @@ void > softintr_dispatch(int which) > { > struct cpu_info *ci = curcpu(); > - struct x86_soft_intr *si = &x86_soft_intrs[which]; > + struct x86_soft_intr_queue *queue = &softintr_queue[which]; > struct x86_soft_intrhand *sih; > int floor; > > floor = ci->ci_handled_intr_level; > ci->ci_handled_intr_level = ci->ci_ilevel; > > - KERNEL_LOCK(); > - for (;;) { > - mtx_enter(&si->softintr_lock); > - sih = TAILQ_FIRST(&si->softintr_q); > - if (sih == NULL) { > - mtx_leave(&si->softintr_lock); > - break; > + mtx_enter(&softintr_lock); > + while ((sih = STAILQ_FIRST(queue)) != NULL) { > + KASSERT((sih->sih_state & (SIS_PENDING | SIS_RESTART)) == > + SIS_PENDING); > + KASSERT(sih->sih_runner == NULL); > + > + sih->sih_state &= ~SIS_PENDING; Nit: you could use CLR(), ISSET(), etc., if you want. I won't mention it again though. > + STAILQ_REMOVE_HEAD(queue, sih_q); > + sih->sih_runner = ci; > +restart: > + mtx_leave(&softintr_lock); > + > + if (sih->sih_flags & SIF_MPSAFE) { > + (*sih->sih_fn)(sih->sih_arg); > + } else { > + KERNEL_LOCK(); > + (*sih->sih_fn)(sih->sih_arg); > + KERNEL_UNLOCK(); > } > - TAILQ_REMOVE(&si->softintr_q, sih, sih_q); > - sih->sih_pending = 0; > > + mtx_enter(&softintr_lock); > uvmexp.softs++; > > - mtx_leave(&si->softintr_lock); > + KASSERT((sih->sih_state & SIS_PENDING) == 0); > + if (sih->sih_state & SIS_RESTART) { > + sih->sih_state &= ~SIS_RESTART; > + goto restart; > + } So, here is where you would reinsert if you wanted to keep FIFO. Something like: if (sih->sih_state & SIS_RESTART) { sih->sih_state &= ~SIS_RESTART; sih->sih_state |= SIS_PENDING; STAILQ_INSERT_TAIL(queue, sih, sih_q); } > > - (*sih->sih_fn)(sih->sih_arg); > + sih->sih_runner = NULL; .. of course you would need to clear sih_runner before reinsertion. Actually, I think you ought to clear sih_runner immediately after you reenter the mutex. The symmetry will make an audit easier. > } > - KERNEL_UNLOCK(); > + mtx_leave(&softintr_lock); > > ci->ci_handled_intr_level = floor; > } > @@ -115,9 +145,13 @@ softintr_dispatch(int which) > void * > softintr_establish(int ipl, void (*func)(void *), void *arg) > { > - struct x86_soft_intr *si; > struct x86_soft_intrhand *sih; > int which; > + unsigned short flags = 0; > + > + if (ipl & IPL_MPSAFE) > + flags |= SIF_MPSAFE; > + ipl &= ~IPL_MPSAFE; So IPL_MPSAFE is a flag that we OR'd with an integer IPL level? Seems kinda messy. I guess if we already do it elsewhere it's fine. > > switch (ipl) { > case IPL_SOFTCLOCK: > @@ -137,14 +171,14 @@ softintr_establish(int ipl, void (*func) > panic("softintr_establish"); > } > > - si = &x86_soft_intrs[which]; > - > sih = malloc(sizeof(*sih), M_DEVBUF, M_NOWAIT); > if (__predict_true(sih != NULL)) { > - sih->sih_intrhead = si; > sih->sih_fn = func; > sih->sih_arg = arg; > - sih->sih_pending = 0; > + sih->sih_runner = NULL; > + sih->sih_which = which; > + sih->sih_flags = flags; > + sih->sih_state = 0; > } > return (sih); > } > @@ -157,15 +191,52 @@ softintr_establish(int ipl, void (*func) > void > softintr_disestablish(void *arg) > { > + struct cpu_info *runner = NULL; > struct x86_soft_intrhand *sih = arg; > - struct x86_soft_intr *si = sih->sih_intrhead; > > - mtx_enter(&si->softintr_lock); > - if (sih->sih_pending) { > - TAILQ_REMOVE(&si->softintr_q, sih, sih_q); > - sih->sih_pending = 0; > + assertwaitok(); > + > + mtx_enter(&softintr_lock); > + sih->sih_state |= SIS_DYING; > + if (sih->sih_state & SIS_PENDING) { > + sih->sih_state &= ~SIS_PENDING; > + STAILQ_REMOVE(&softintr_queue[sih->sih_which], sih, > + x86_soft_intrhand, sih_q); > } > - mtx_leave(&si->softintr_lock); > + runner = sih->sih_runner; > + mtx_leave(&softintr_lock); > + > + if (runner != NULL) > + sched_barrier(runner); > + > + KASSERT((sih->sih_state & (SIS_PENDING | SIS_RESTART)) == 0); > + KASSERT(sih->sih_runner == NULL); > > free(sih, M_DEVBUF, sizeof(*sih)); > } > + > +void > +softintr_schedule(void *arg) > +{ > + struct x86_soft_intrhand *sih = arg; > + struct x86_soft_intr_queue *queue = &softintr_queue[sih->sih_which]; > + int trigger = 0; > + > + mtx_enter(&softintr_lock); > + KASSERT((sih->sih_state & SIS_DYING) == 0); The SIS_DYING sanity-check is a great idea. > + if (sih->sih_runner == NULL) { > + KASSERT((sih->sih_state & SIS_RESTART) == 0); > + if ((sih->sih_state & SIS_PENDING) == 0) { > + STAILQ_INSERT_TAIL(queue, sih, sih_q); > + sih->sih_state |= SIS_PENDING; > + trigger = 1; > + } > + } else { > + KASSERT((sih->sih_state & SIS_PENDING) == 0); > + sih->sih_state |= SIS_RESTART; > + } > + mtx_leave(&softintr_lock); > + > + if (trigger) > + softintr(x86_soft_intr_to_ssir[sih->sih_which]); > +} This aligns with your description from earlier and looks correct. > Index: arch/amd64/include/intr.h > =================================================================== > RCS file: src/sys/arch/amd64/include/intr.h,v > retrieving revision 1.32 > diff -u -p -r1.32 intr.h > --- arch/amd64/include/intr.h 17 Jun 2020 06:14:52 -0000 1.32 > +++ arch/amd64/include/intr.h 23 May 2021 08:56:28 -0000 > @@ -231,42 +231,13 @@ extern void (*ipifunc[X86_NIPI])(struct > #define X86_NSOFTINTR 3 > > #ifndef _LOCORE > -#include <sys/queue.h> > - > -struct x86_soft_intrhand { > - TAILQ_ENTRY(x86_soft_intrhand) > - sih_q; > - struct x86_soft_intr *sih_intrhead; > - void (*sih_fn)(void *); > - void *sih_arg; > - int sih_pending; > -}; > - > -struct x86_soft_intr { > - TAILQ_HEAD(, x86_soft_intrhand) > - softintr_q; > - int softintr_ssir; > - struct mutex softintr_lock; > -}; > > void *softintr_establish(int, void (*)(void *), void *); > void softintr_disestablish(void *); > void softintr_init(void); > void softintr_dispatch(int); > +void softintr_schedule(void *); > > -#define softintr_schedule(arg) > \ > -do { \ > - struct x86_soft_intrhand *__sih = (arg); \ > - struct x86_soft_intr *__si = __sih->sih_intrhead; \ > - \ > - mtx_enter(&__si->softintr_lock); \ > - if (__sih->sih_pending == 0) { \ > - TAILQ_INSERT_TAIL(&__si->softintr_q, __sih, sih_q); \ > - __sih->sih_pending = 1; \ > - softintr(__si->softintr_ssir); \ > - } \ > - mtx_leave(&__si->softintr_lock); \ > -} while (/*CONSTCOND*/ 0) Thank you for moving this out of the header. > #endif /* _LOCORE */ > > #endif /* !_MACHINE_INTR_H_ */ >