On Sun, May 23, 2021 at 09:05:24AM +0000, Visa Hankala wrote:
> 
> [...]
> 
> Here is a revised patch with a slightly altered approach.
> 
> I have replaced the per-CPU queues with system-wide queues to make
> the code simpler.

I think that's fine to start with.  We don't have many softintrs.  We
can go per-CPU later if there is an issue with performance.

> When a CPU starts processing a soft interrupt, it reserves the handler
> to prevent concurrent execution. If the soft interrupt gets rescheduled
> during processing, the handler is run again by the same CPU. This breaks
> FIFO ordering, though.

If you want to preserve FIFO you can reinsert the handler at the queue
tail.  That would be more fair.

If FIFO is the current behavior I think we ought to keep it.

> softintr_disestablish() now calls sched_barrier() to drain processing.
> This should also prevent the deadlock with the kernel lock.
> 
> It appeared tricky to add witness(4) integration. witness(4) treats the
> kernel lock as a sleepable lock to avoid ordering issues with rwlocks.
> This creates a dilemma in softintr_dispatch() because of nesting.
> The natural lock class for the required soft interrupt pseudo-lock is
> mutex, but that does not work if an MP-safe soft interrupt got preempted
> by a higher IPL non-MP-safe soft interrupt.
> 
> The dilemma could be worked around by preventing nesting when witness(4)
> is enabled, altering the system's behaviour. Another approach is to skip
> the pseudo-lock handling when processing is nested.

Altering the system's behavior sounds like a bad idea.

> However, as soft interrupt handlers cannot use rwlocks, for deadlock
> prevention it should be sufficient to ensure that the caller of
> softintr_disestablish() does not hold spinning locks (the kernel lock
> excluded). Hence softintr_disestablish() calls assertwaitok() at the
> start. The assertion is actually implied by sched_barrier().

I like your implementation a lot better than mine.

Misc. thoughts below, though I don't have any real complaints.

> Index: arch/amd64/amd64/softintr.c
> ===================================================================
> RCS file: src/sys/arch/amd64/amd64/softintr.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 softintr.c
> --- arch/amd64/amd64/softintr.c       11 Sep 2020 09:27:09 -0000      1.10
> +++ arch/amd64/amd64/softintr.c       23 May 2021 08:56:28 -0000
> @@ -37,12 +37,33 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/malloc.h>
> +#include <sys/mutex.h>
> +#include <sys/queue.h>
>  
>  #include <machine/intr.h>
>  
>  #include <uvm/uvm_extern.h>
>  
> -struct x86_soft_intr x86_soft_intrs[X86_NSOFTINTR];
> +struct x86_soft_intrhand {
> +     STAILQ_ENTRY(x86_soft_intrhand) sih_q;
> +     void                    (*sih_fn)(void *);
> +     void                    *sih_arg;
> +     struct cpu_info         *sih_runner;
> +     int                     sih_which;
> +     unsigned short          sih_flags;
> +     unsigned short          sih_state;
> +};
> +
> +#define SIF_MPSAFE           0x0001
> +
> +#define SIS_DYING            0x0001
> +#define SIS_PENDING          0x0002
> +#define SIS_RESTART          0x0004
> +
> +STAILQ_HEAD(x86_soft_intr_queue, x86_soft_intrhand);
> +
> +struct x86_soft_intr_queue softintr_queue[X86_NSOFTINTR];

Why did we switch to STAILQ?  I know we don't have very many
softintr_disestablish() calls but isn't O(1) removal worth the extra
pointer?

> +struct mutex softintr_lock = MUTEX_INITIALIZER(IPL_HIGH);
>  
>  const int x86_soft_intr_to_ssir[X86_NSOFTINTR] = {
>       SIR_CLOCK,
> @@ -58,15 +79,10 @@ const int x86_soft_intr_to_ssir[X86_NSOF
>  void
>  softintr_init(void)
>  {
> -     struct x86_soft_intr *si;
>       int i;
>  
> -     for (i = 0; i < X86_NSOFTINTR; i++) {
> -             si = &x86_soft_intrs[i];
> -             TAILQ_INIT(&si->softintr_q);
> -             mtx_init(&si->softintr_lock, IPL_HIGH);
> -             si->softintr_ssir = x86_soft_intr_to_ssir[i];
> -     }
> +     for (i = 0; i < X86_NSOFTINTR; i++)
> +             STAILQ_INIT(&softintr_queue[i]);

Nit: nitems(), if you want.

>  }
>  
>  /*
> @@ -78,31 +94,45 @@ void
>  softintr_dispatch(int which)
>  {
>       struct cpu_info *ci = curcpu();
> -     struct x86_soft_intr *si = &x86_soft_intrs[which];
> +     struct x86_soft_intr_queue *queue = &softintr_queue[which];
>       struct x86_soft_intrhand *sih;
>       int floor;
>  
>       floor = ci->ci_handled_intr_level;
>       ci->ci_handled_intr_level = ci->ci_ilevel;
>  
> -     KERNEL_LOCK();
> -     for (;;) {
> -             mtx_enter(&si->softintr_lock);
> -             sih = TAILQ_FIRST(&si->softintr_q);
> -             if (sih == NULL) {
> -                     mtx_leave(&si->softintr_lock);
> -                     break;
> +     mtx_enter(&softintr_lock);
> +     while ((sih = STAILQ_FIRST(queue)) != NULL) {
> +             KASSERT((sih->sih_state & (SIS_PENDING | SIS_RESTART)) ==
> +                 SIS_PENDING);
> +             KASSERT(sih->sih_runner == NULL);
> +
> +             sih->sih_state &= ~SIS_PENDING;

Nit: you could use CLR(), ISSET(), etc., if you want.  I won't mention
it again though.

> +             STAILQ_REMOVE_HEAD(queue, sih_q);
> +             sih->sih_runner = ci;
> +restart:
> +             mtx_leave(&softintr_lock);
> +
> +             if (sih->sih_flags & SIF_MPSAFE) {
> +                     (*sih->sih_fn)(sih->sih_arg);
> +             } else {
> +                     KERNEL_LOCK();
> +                     (*sih->sih_fn)(sih->sih_arg);
> +                     KERNEL_UNLOCK();
>               }
> -             TAILQ_REMOVE(&si->softintr_q, sih, sih_q);
> -             sih->sih_pending = 0;
>  
> +             mtx_enter(&softintr_lock);
>               uvmexp.softs++;
>  
> -             mtx_leave(&si->softintr_lock);
> +             KASSERT((sih->sih_state & SIS_PENDING) == 0);
> +             if (sih->sih_state & SIS_RESTART) {
> +                     sih->sih_state &= ~SIS_RESTART;
> +                     goto restart;
> +             }

So, here is where you would reinsert if you wanted to keep FIFO.
Something like:

if (sih->sih_state & SIS_RESTART) {
        sih->sih_state &= ~SIS_RESTART;
        sih->sih_state |= SIS_PENDING;
        STAILQ_INSERT_TAIL(queue, sih, sih_q);
}

>  
> -             (*sih->sih_fn)(sih->sih_arg);
> +             sih->sih_runner = NULL;

.. of course you would need to clear sih_runner before reinsertion.

Actually, I think you ought to clear sih_runner immediately after you
reenter the mutex.  The symmetry will make an audit easier.

>       }
> -     KERNEL_UNLOCK();
> +     mtx_leave(&softintr_lock);
>  
>       ci->ci_handled_intr_level = floor;
>  }
> @@ -115,9 +145,13 @@ softintr_dispatch(int which)
>  void *
>  softintr_establish(int ipl, void (*func)(void *), void *arg)
>  {
> -     struct x86_soft_intr *si;
>       struct x86_soft_intrhand *sih;
>       int which;
> +     unsigned short flags = 0;
> +
> +     if (ipl & IPL_MPSAFE)
> +             flags |= SIF_MPSAFE;
> +     ipl &= ~IPL_MPSAFE;

So IPL_MPSAFE is a flag that we OR'd with an integer IPL level?  Seems
kinda messy.  I guess if we already do it elsewhere it's fine.

>  
>       switch (ipl) {
>       case IPL_SOFTCLOCK:
> @@ -137,14 +171,14 @@ softintr_establish(int ipl, void (*func)
>               panic("softintr_establish");
>       }
>  
> -     si = &x86_soft_intrs[which];
> -
>       sih = malloc(sizeof(*sih), M_DEVBUF, M_NOWAIT);
>       if (__predict_true(sih != NULL)) {
> -             sih->sih_intrhead = si;
>               sih->sih_fn = func;
>               sih->sih_arg = arg;
> -             sih->sih_pending = 0;
> +             sih->sih_runner = NULL;
> +             sih->sih_which = which;
> +             sih->sih_flags = flags;
> +             sih->sih_state = 0;
>       }
>       return (sih);
>  }
> @@ -157,15 +191,52 @@ softintr_establish(int ipl, void (*func)
>  void
>  softintr_disestablish(void *arg)
>  {
> +     struct cpu_info *runner = NULL;
>       struct x86_soft_intrhand *sih = arg;
> -     struct x86_soft_intr *si = sih->sih_intrhead;
>  
> -     mtx_enter(&si->softintr_lock);
> -     if (sih->sih_pending) {
> -             TAILQ_REMOVE(&si->softintr_q, sih, sih_q);
> -             sih->sih_pending = 0;
> +     assertwaitok();
> +
> +     mtx_enter(&softintr_lock);
> +     sih->sih_state |= SIS_DYING;
> +     if (sih->sih_state & SIS_PENDING) {
> +             sih->sih_state &= ~SIS_PENDING;
> +             STAILQ_REMOVE(&softintr_queue[sih->sih_which], sih,
> +                 x86_soft_intrhand, sih_q);
>       }
> -     mtx_leave(&si->softintr_lock);
> +     runner = sih->sih_runner;
> +     mtx_leave(&softintr_lock);
> +
> +     if (runner != NULL)
> +             sched_barrier(runner);
> +
> +     KASSERT((sih->sih_state & (SIS_PENDING | SIS_RESTART)) == 0);
> +     KASSERT(sih->sih_runner == NULL);
>  
>       free(sih, M_DEVBUF, sizeof(*sih));
>  }
> +
> +void
> +softintr_schedule(void *arg)
> +{
> +     struct x86_soft_intrhand *sih = arg;
> +     struct x86_soft_intr_queue *queue = &softintr_queue[sih->sih_which];
> +     int trigger = 0;
> +
> +     mtx_enter(&softintr_lock);
> +     KASSERT((sih->sih_state & SIS_DYING) == 0);

The SIS_DYING sanity-check is a great idea.

> +     if (sih->sih_runner == NULL) {
> +             KASSERT((sih->sih_state & SIS_RESTART) == 0);
> +             if ((sih->sih_state & SIS_PENDING) == 0) {
> +                     STAILQ_INSERT_TAIL(queue, sih, sih_q);
> +                     sih->sih_state |= SIS_PENDING;
> +                     trigger = 1;
> +             }
> +     } else {
> +             KASSERT((sih->sih_state & SIS_PENDING) == 0);
> +             sih->sih_state |= SIS_RESTART;
> +     }
> +     mtx_leave(&softintr_lock);
> +
> +     if (trigger)
> +             softintr(x86_soft_intr_to_ssir[sih->sih_which]);
> +}

This aligns with your description from earlier and looks correct.

> Index: arch/amd64/include/intr.h
> ===================================================================
> RCS file: src/sys/arch/amd64/include/intr.h,v
> retrieving revision 1.32
> diff -u -p -r1.32 intr.h
> --- arch/amd64/include/intr.h 17 Jun 2020 06:14:52 -0000      1.32
> +++ arch/amd64/include/intr.h 23 May 2021 08:56:28 -0000
> @@ -231,42 +231,13 @@ extern void (*ipifunc[X86_NIPI])(struct 
>  #define      X86_NSOFTINTR                   3
>  
>  #ifndef _LOCORE
> -#include <sys/queue.h>
> -
> -struct x86_soft_intrhand {
> -     TAILQ_ENTRY(x86_soft_intrhand)
> -             sih_q;
> -     struct x86_soft_intr *sih_intrhead;
> -     void    (*sih_fn)(void *);
> -     void    *sih_arg;
> -     int     sih_pending;
> -};
> -
> -struct x86_soft_intr {
> -     TAILQ_HEAD(, x86_soft_intrhand)
> -                     softintr_q;
> -     int             softintr_ssir;
> -     struct mutex    softintr_lock;
> -};
>  
>  void *softintr_establish(int, void (*)(void *), void *);
>  void softintr_disestablish(void *);
>  void softintr_init(void);
>  void softintr_dispatch(int);
> +void softintr_schedule(void *);
>  
> -#define      softintr_schedule(arg)                                          
> \
> -do {                                                                 \
> -     struct x86_soft_intrhand *__sih = (arg);                        \
> -     struct x86_soft_intr *__si = __sih->sih_intrhead;               \
> -                                                                     \
> -     mtx_enter(&__si->softintr_lock);                                \
> -     if (__sih->sih_pending == 0) {                                  \
> -             TAILQ_INSERT_TAIL(&__si->softintr_q, __sih, sih_q);     \
> -             __sih->sih_pending = 1;                                 \
> -             softintr(__si->softintr_ssir);                          \
> -     }                                                               \
> -     mtx_leave(&__si->softintr_lock);                                \
> -} while (/*CONSTCOND*/ 0)

Thank you for moving this out of the header.

>  #endif /* _LOCORE */
>  
>  #endif /* !_MACHINE_INTR_H_ */
> 

Reply via email to