> Date: Wed, 2 Mar 2022 10:53:54 +0000 > From: Visa Hankala <v...@hankala.org> > > Holding NET_LOCK() while invoking a taskq barrier can result in > a deadlock because the lock can prevent the taskq from making progress. > Avoid this problem in cad_down() by serializing the driver's ioctl > handler with an rwlock so that NET_LOCK() can be released temporarily. > > The patch additionally makes cad_up() release NET_LOCK() when allocating > the Tx and Rx rings. There is a risk that the allocation blocks. > The code probably should allow allocation failure, but that I leave for > another patch. > > OK?
I'm not 100% up to date with the network stack locking requirements, but: > Index: dev/fdt/if_cad.c > =================================================================== > RCS file: src/sys/dev/fdt/if_cad.c,v > retrieving revision 1.9 > diff -u -p -r1.9 if_cad.c > --- dev/fdt/if_cad.c 27 Jan 2022 17:34:51 -0000 1.9 > +++ dev/fdt/if_cad.c 2 Mar 2022 10:28:04 -0000 > @@ -30,6 +30,7 @@ > #include <sys/ioctl.h> > #include <sys/mutex.h> > #include <sys/kstat.h> > +#include <sys/rwlock.h> > #include <sys/task.h> > #include <sys/timeout.h> > > @@ -297,6 +298,7 @@ struct cad_softc { > unsigned int sc_rx_cons; > uint32_t sc_netctl; > > + struct rwlock sc_cfg_lock; > struct task sc_statchg_task; > uint32_t sc_tx_freq; > > @@ -467,10 +469,13 @@ cad_attach(struct device *parent, struct > if (OF_is_compatible(faa->fa_node, "cdns,zynq-gem")) > sc->sc_rxhang_erratum = 1; > > + rw_init(&sc->sc_cfg_lock, "cadcfg"); > timeout_set(&sc->sc_tick, cad_tick, sc); > task_set(&sc->sc_statchg_task, cad_statchg_task, sc); > > + rw_enter_write(&sc->sc_cfg_lock); > cad_reset(sc); > + rw_exit_write(&sc->sc_cfg_lock); > > sc->sc_ih = fdt_intr_establish(faa->fa_node, IPL_NET | IPL_MPSAFE, > cad_intr, sc, sc->sc_dev.dv_xname); > @@ -548,6 +553,9 @@ cad_ioctl(struct ifnet *ifp, u_long cmd, > int error = 0; > int s; > > + NET_UNLOCK(); > + rw_enter_write(&sc->sc_cfg_lock); > + NET_LOCK(); > s = splnet(); This looks a bit odd. But I suppose that splnet() is needed because NET_LOCK() doesn't prevent the interrupt handler from running? But that doesn't work since the driver uses IPL_MPSAFE. So is this just to satisfysome splassert() calls? > switch (cmd) { > @@ -585,6 +593,7 @@ cad_ioctl(struct ifnet *ifp, u_long cmd, > } > > splx(s); > + rw_exit_write(&sc->sc_cfg_lock); > > return error; > } > @@ -598,6 +607,8 @@ cad_reset(struct cad_softc *sc) > unsigned int freq, i; > uint32_t div, netcfg; > > + rw_assert_wrlock(&sc->sc_cfg_lock); > + > HWRITE4(sc, GEM_NETCTL, 0); > HWRITE4(sc, GEM_IDR, ~0U); > HWRITE4(sc, GEM_RXSR, 0); > @@ -655,6 +666,11 @@ cad_up(struct cad_softc *sc) > unsigned int i, nrxd, ntxd; > uint32_t val; > > + rw_assert_wrlock(&sc->sc_cfg_lock); > + > + /* Release lock for memory allocation. */ > + NET_UNLOCK(); > + > if (sc->sc_dma64) > flags |= BUS_DMA_64BIT; > > @@ -810,6 +826,8 @@ cad_up(struct cad_softc *sc) > } > } > > + NET_LOCK(); > + > /* > * Set MAC address filters. > */ > @@ -905,11 +923,16 @@ cad_down(struct cad_softc *sc) > struct cad_buf *rxb, *txb; > unsigned int i, timeout; > > + rw_assert_wrlock(&sc->sc_cfg_lock); > + > ifp->if_flags &= ~IFF_RUNNING; > > ifq_clr_oactive(&ifp->if_snd); > ifp->if_timer = 0; > > + /* Avoid lock order issues with barriers. */ > + NET_UNLOCK(); > + > timeout_del_barrier(&sc->sc_tick); > > /* Disable data transfer. */ > @@ -981,6 +1004,8 @@ cad_down(struct cad_softc *sc) > cad_dmamem_free(sc, sc->sc_rxring); > sc->sc_rxring = NULL; > sc->sc_rxdesc = NULL; > + > + NET_LOCK(); > } > > uint8_t > @@ -1010,6 +1035,8 @@ cad_iff(struct cad_softc *sc) > struct ether_multistep step; > uint64_t hash; > uint32_t netcfg; > + > + rw_assert_wrlock(&sc->sc_cfg_lock); > > netcfg = HREAD4(sc, GEM_NETCFG); > netcfg &= ~GEM_NETCFG_UCASTHASHEN; > >