> Date: Wed, 2 Mar 2022 10:53:54 +0000
> From: Visa Hankala <[email protected]>
>
> Holding NET_LOCK() while invoking a taskq barrier can result in
> a deadlock because the lock can prevent the taskq from making progress.
> Avoid this problem in cad_down() by serializing the driver's ioctl
> handler with an rwlock so that NET_LOCK() can be released temporarily.
>
> The patch additionally makes cad_up() release NET_LOCK() when allocating
> the Tx and Rx rings. There is a risk that the allocation blocks.
> The code probably should allow allocation failure, but that I leave for
> another patch.
>
> OK?
I'm not 100% up to date with the network stack locking requirements,
but:
> Index: dev/fdt/if_cad.c
> ===================================================================
> RCS file: src/sys/dev/fdt/if_cad.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 if_cad.c
> --- dev/fdt/if_cad.c 27 Jan 2022 17:34:51 -0000 1.9
> +++ dev/fdt/if_cad.c 2 Mar 2022 10:28:04 -0000
> @@ -30,6 +30,7 @@
> #include <sys/ioctl.h>
> #include <sys/mutex.h>
> #include <sys/kstat.h>
> +#include <sys/rwlock.h>
> #include <sys/task.h>
> #include <sys/timeout.h>
>
> @@ -297,6 +298,7 @@ struct cad_softc {
> unsigned int sc_rx_cons;
> uint32_t sc_netctl;
>
> + struct rwlock sc_cfg_lock;
> struct task sc_statchg_task;
> uint32_t sc_tx_freq;
>
> @@ -467,10 +469,13 @@ cad_attach(struct device *parent, struct
> if (OF_is_compatible(faa->fa_node, "cdns,zynq-gem"))
> sc->sc_rxhang_erratum = 1;
>
> + rw_init(&sc->sc_cfg_lock, "cadcfg");
> timeout_set(&sc->sc_tick, cad_tick, sc);
> task_set(&sc->sc_statchg_task, cad_statchg_task, sc);
>
> + rw_enter_write(&sc->sc_cfg_lock);
> cad_reset(sc);
> + rw_exit_write(&sc->sc_cfg_lock);
>
> sc->sc_ih = fdt_intr_establish(faa->fa_node, IPL_NET | IPL_MPSAFE,
> cad_intr, sc, sc->sc_dev.dv_xname);
> @@ -548,6 +553,9 @@ cad_ioctl(struct ifnet *ifp, u_long cmd,
> int error = 0;
> int s;
>
> + NET_UNLOCK();
> + rw_enter_write(&sc->sc_cfg_lock);
> + NET_LOCK();
> s = splnet();
This looks a bit odd. But I suppose that splnet() is needed because
NET_LOCK() doesn't prevent the interrupt handler from running? But
that doesn't work since the driver uses IPL_MPSAFE. So is this just
to satisfysome splassert() calls?
> switch (cmd) {
> @@ -585,6 +593,7 @@ cad_ioctl(struct ifnet *ifp, u_long cmd,
> }
>
> splx(s);
> + rw_exit_write(&sc->sc_cfg_lock);
>
> return error;
> }
> @@ -598,6 +607,8 @@ cad_reset(struct cad_softc *sc)
> unsigned int freq, i;
> uint32_t div, netcfg;
>
> + rw_assert_wrlock(&sc->sc_cfg_lock);
> +
> HWRITE4(sc, GEM_NETCTL, 0);
> HWRITE4(sc, GEM_IDR, ~0U);
> HWRITE4(sc, GEM_RXSR, 0);
> @@ -655,6 +666,11 @@ cad_up(struct cad_softc *sc)
> unsigned int i, nrxd, ntxd;
> uint32_t val;
>
> + rw_assert_wrlock(&sc->sc_cfg_lock);
> +
> + /* Release lock for memory allocation. */
> + NET_UNLOCK();
> +
> if (sc->sc_dma64)
> flags |= BUS_DMA_64BIT;
>
> @@ -810,6 +826,8 @@ cad_up(struct cad_softc *sc)
> }
> }
>
> + NET_LOCK();
> +
> /*
> * Set MAC address filters.
> */
> @@ -905,11 +923,16 @@ cad_down(struct cad_softc *sc)
> struct cad_buf *rxb, *txb;
> unsigned int i, timeout;
>
> + rw_assert_wrlock(&sc->sc_cfg_lock);
> +
> ifp->if_flags &= ~IFF_RUNNING;
>
> ifq_clr_oactive(&ifp->if_snd);
> ifp->if_timer = 0;
>
> + /* Avoid lock order issues with barriers. */
> + NET_UNLOCK();
> +
> timeout_del_barrier(&sc->sc_tick);
>
> /* Disable data transfer. */
> @@ -981,6 +1004,8 @@ cad_down(struct cad_softc *sc)
> cad_dmamem_free(sc, sc->sc_rxring);
> sc->sc_rxring = NULL;
> sc->sc_rxdesc = NULL;
> +
> + NET_LOCK();
> }
>
> uint8_t
> @@ -1010,6 +1035,8 @@ cad_iff(struct cad_softc *sc)
> struct ether_multistep step;
> uint64_t hash;
> uint32_t netcfg;
> +
> + rw_assert_wrlock(&sc->sc_cfg_lock);
>
> netcfg = HREAD4(sc, GEM_NETCFG);
> netcfg &= ~GEM_NETCFG_UCASTHASHEN;
>
>