On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote: > the current code for serialising if_start calls for mpsafe nics does what it > says.
as per mpi@s suggestion, this makes the ifq code responsible for the task serialisation. all the machinery is there, but it provides a minimal step toward coordination of the oactive flag. the only concession is if a driver wants to clear oactive and run its start routine again it can call ifq_restart() to have it serialised with the stacks calls to the start routine. if there's a desire for a driver to run all of its txeof serialised with its start routine, we'll figure out the best way to provide that when the problem really occurs. if_start_barrier is now called ifq_barrier too.i Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.90 diff -u -p -r1.90 if_myx.c --- dev/pci/if_myx.c 3 Dec 2015 12:45:56 -0000 1.90 +++ dev/pci/if_myx.c 8 Dec 2015 11:31:09 -0000 @@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc) ifq_clr_oactive(&ifp->if_snd); SET(ifp->if_flags, IFF_RUNNING); myx_iff(sc); - myx_start(ifp); + if_start(ifp); return; @@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc) int s; int ring; + CLR(ifp->if_flags, IFF_RUNNING); + bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); sc->sc_linkdown = sts->ms_linkdown; @@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc) } ifq_clr_oactive(&ifp->if_snd); - CLR(ifp->if_flags, IFF_RUNNING); - if_start_barrier(ifp); + ifq_barrier(&ifp->if_snd); for (ring = 0; ring < 2; ring++) { struct myx_rx_ring *mrr = &sc->sc_rx_ring[ring]; @@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_ sc->sc_tx_ring_cons = idx; sc->sc_tx_cons = cons; - ifq_clr_oactive(&ifp->if_snd); - if (!ifq_empty(&ifp->if_snd)) - if_start(ifp); + if (ifq_is_oactive(&ifp->if_snd)) + ifq_restart(&ifp->if_snd); } void Index: dev/pci/if_bnx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v retrieving revision 1.118 diff -u -p -r1.118 if_bnx.c --- dev/pci/if_bnx.c 5 Dec 2015 16:23:37 -0000 1.118 +++ dev/pci/if_bnx.c 8 Dec 2015 11:31:09 -0000 @@ -871,6 +871,7 @@ bnx_attachhook(void *xsc) ifp = &sc->arpcom.ac_if; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_xflags = IFXF_MPSAFE; ifp->if_ioctl = bnx_ioctl; ifp->if_start = bnx_start; ifp->if_watchdog = bnx_watchdog; @@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc) used = atomic_sub_int_nv(&sc->used_tx_bd, freed); + sc->tx_cons = sw_tx_cons; + /* Clear the TX timeout timer. */ if (used == 0) ifp->if_timer = 0; - /* Clear the tx hardware queue full flag. */ - if (used < sc->max_tx_bd) { - DBRUNIF(ifq_is_oactive(&ifp->if_snd), - printf("%s: Open TX chain! %d/%d (used/total)\n", - sc->bnx_dev.dv_xname, used, - sc->max_tx_bd)); - ifq_clr_oactive(&ifp->if_snd); - } - - sc->tx_cons = sw_tx_cons; + if (ifq_is_oactive(&ifp->if_snd)) + ifq_restart(&ifp->if_snd); } /****************************************************************************/ @@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp) int used; u_int16_t tx_prod, tx_chain_prod; - /* If there's no link or the transmit queue is empty then just exit. */ - if (!sc->bnx_link || IFQ_IS_EMPTY(&ifp->if_snd)) { - DBPRINT(sc, BNX_INFO_SEND, - "%s(): No link or transmit queue empty.\n", __FUNCTION__); + if (!sc->bnx_link) { + ifq_purge(&ifp->if_snd); goto bnx_start_exit; } Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.423 diff -u -p -r1.423 if.c --- net/if.c 8 Dec 2015 10:06:12 -0000 1.423 +++ net/if.c 8 Dec 2015 11:31:09 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.423 2015/12/08 10:06:12 dlg Exp $ */ +/* $OpenBSD: if.c,v 1.422 2015/12/05 10:07:55 tedu Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -153,7 +153,6 @@ void if_input_process(void *); void ifa_print_all(void); #endif -void if_start_mpsafe(struct ifnet *ifp); void if_start_locked(struct ifnet *ifp); /* @@ -512,7 +511,7 @@ if_attach_common(struct ifnet *ifp) TAILQ_INIT(&ifp->if_addrlist); TAILQ_INIT(&ifp->if_maddrlist); - ifq_init(&ifp->if_snd); + ifq_init(&ifp->if_snd, ifp); ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks), M_TEMP, M_WAITOK); @@ -540,7 +539,7 @@ void if_start(struct ifnet *ifp) { if (ISSET(ifp->if_xflags, IFXF_MPSAFE)) - if_start_mpsafe(ifp); + ifq_start(&ifp->if_snd); else if_start_locked(ifp); } Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.66 diff -u -p -r1.66 if_var.h --- net/if_var.h 8 Dec 2015 10:14:58 -0000 1.66 +++ net/if_var.h 8 Dec 2015 11:31:09 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: if_var.h,v 1.66 2015/12/08 10:14:58 dlg Exp $ */ +/* $OpenBSD: if_var.h,v 1.64 2015/12/05 16:24:59 mpi Exp $ */ /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */ /* @@ -42,6 +42,7 @@ #include <sys/mbuf.h> #include <sys/srp.h> #include <sys/refcnt.h> +#include <sys/task.h> #include <sys/time.h> #include <net/ifq.h> Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.1 diff -u -p -r1.1 ifq.c --- net/ifq.c 8 Dec 2015 10:06:12 -0000 1.1 +++ net/ifq.c 8 Dec 2015 11:31:09 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: ifq.c,v 1.1 2015/12/08 10:06:12 dlg Exp $ */ +/* $OpenBSD$ */ /* * Copyright (c) 2015 David Gwynne <d...@openbsd.org> @@ -64,6 +64,12 @@ struct priq { * ifqueue serialiser */ +void ifq_start_task(void *); +void ifq_restart_task(void *); +void ifq_barrier_task(void *); + +#define TASK_ONQUEUE 0x1 + static inline unsigned int ifq_enter(struct ifqueue *ifq) { @@ -73,57 +79,108 @@ ifq_enter(struct ifqueue *ifq) static inline unsigned int ifq_leave(struct ifqueue *ifq) { - if (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1) - return (1); + return (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1); +} + +static inline int +ifq_next_task(struct ifqueue *ifq, struct task *work) +{ + struct task *t; + int rv = 0; ifq->ifq_serializer = 1; - return (0); + mtx_enter(&ifq->ifq_task_mtx); + t = TAILQ_FIRST(&ifq->ifq_task_list); + if (t != NULL) { + TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry); + CLR(t->t_flags, TASK_ONQUEUE); + + *work = *t; /* copy to caller to avoid races */ + + rv = 1; + } + mtx_leave(&ifq->ifq_task_mtx); + + return (rv); } void -if_start_mpsafe(struct ifnet *ifp) +ifq_serialize(struct ifqueue *ifq, struct task *t) { - struct ifqueue *ifq = &ifp->if_snd; + struct task work; + + if (ISSET(t->t_flags, TASK_ONQUEUE)) + return; + + mtx_enter(&ifq->ifq_task_mtx); + if (!ISSET(t->t_flags, TASK_ONQUEUE)) { + SET(t->t_flags, TASK_ONQUEUE); + TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry); + } + mtx_leave(&ifq->ifq_task_mtx); if (!ifq_enter(ifq)) return; do { - if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING))) { - ifq->ifq_serializer = 0; - wakeup_one(&ifq->ifq_serializer); - return; - } + while (ifq_next_task(ifq, &work)) + (*work.t_func)(work.t_arg); - if (ifq_empty(ifq) || ifq_is_oactive(ifq)) - continue; + } while (!ifq_leave(ifq)); +} + +void +ifq_start_task(void *p) +{ + struct ifqueue *ifq = p; + struct ifnet *ifp = ifq->ifq_if; - ifp->if_start(ifp); + if (!ISSET(ifp->if_flags, IFF_RUNNING) || + ifq_empty(ifq) || ifq_is_oactive(ifq)) + return; - } while (!ifq_leave(ifq)); + ifp->if_start(ifp); +} + +void +ifq_restart_task(void *p) +{ + struct ifqueue *ifq = p; + struct ifnet *ifp = ifq->ifq_if; + + ifq_clr_oactive(ifq); + ifp->if_start(ifp); } void -if_start_barrier(struct ifnet *ifp) +ifq_barrier(struct ifqueue *ifq) { struct sleep_state sls; - struct ifqueue *ifq = &ifp->if_snd; + unsigned int notdone = 1; + struct task t = TASK_INITIALIZER(ifq_barrier_task, ¬done); /* this should only be called from converted drivers */ - KASSERT(ISSET(ifp->if_xflags, IFXF_MPSAFE)); - - /* drivers should only call this on the way down */ - KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); + KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE)); if (ifq->ifq_serializer == 0) return; - if_start_mpsafe(ifp); /* spin the wheel to guarantee a wakeup */ - do { - sleep_setup(&sls, &ifq->ifq_serializer, PWAIT, "ifbar"); - sleep_finish(&sls, ifq->ifq_serializer != 0); - } while (ifq->ifq_serializer != 0); + ifq_serialize(ifq, &t); + + while (notdone) { + sleep_setup(&sls, ¬done, PWAIT, "ifqbar"); + sleep_finish(&sls, notdone); + } +} + +void +ifq_barrier_task(void *p) +{ + unsigned int *notdone = p; + + *notdone = 0; + wakeup_one(notdone); } /* @@ -131,8 +188,10 @@ if_start_barrier(struct ifnet *ifp) */ void -ifq_init(struct ifqueue *ifq) +ifq_init(struct ifqueue *ifq, struct ifnet *ifp) { + ifq->ifq_if = ifp; + mtx_init(&ifq->ifq_mtx, IPL_NET); ifq->ifq_drops = 0; @@ -140,8 +199,14 @@ ifq_init(struct ifqueue *ifq) ifq->ifq_ops = &priq_ops; ifq->ifq_q = priq_ops.ifqop_alloc(NULL); - ifq->ifq_serializer = 0; ifq->ifq_len = 0; + + mtx_init(&ifq->ifq_task_mtx, IPL_NET); + TAILQ_INIT(&ifq->ifq_task_list); + ifq->ifq_serializer = 0; + + task_set(&ifq->ifq_start, ifq_start_task, ifq); + task_set(&ifq->ifq_restart, ifq_restart_task, ifq); if (ifq->ifq_maxlen == 0) ifq_set_maxlen(ifq, IFQ_MAXLEN); Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.1 diff -u -p -r1.1 ifq.h --- net/ifq.h 8 Dec 2015 10:06:12 -0000 1.1 +++ net/ifq.h 8 Dec 2015 11:31:09 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: ifq.h,v 1.1 2015/12/08 10:06:12 dlg Exp $ */ +/* $OpenBSD$ */ /* * Copyright (c) 2015 David Gwynne <d...@openbsd.org> @@ -24,14 +24,25 @@ struct ifnet; struct ifq_ops; struct ifqueue { + struct ifnet *ifq_if; + + /* mbuf handling */ struct mutex ifq_mtx; uint64_t ifq_drops; const struct ifq_ops *ifq_ops; void *ifq_q; unsigned int ifq_len; - unsigned int ifq_serializer; unsigned int ifq_oactive; + /* work serialisation */ + struct mutex ifq_task_mtx; + struct task_list ifq_task_list; + unsigned int ifq_serializer; + + /* work to be serialised */ + struct task ifq_start; + struct task ifq_restart; + unsigned int ifq_maxlen; }; @@ -54,7 +65,7 @@ struct ifq_ops { * Interface send queues. */ -void ifq_init(struct ifqueue *); +void ifq_init(struct ifqueue *, struct ifnet *); void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *); void ifq_destroy(struct ifqueue *); int ifq_enqueue_try(struct ifqueue *, struct mbuf *); @@ -66,6 +77,8 @@ struct mbuf *ifq_dequeue(struct ifqueue unsigned int ifq_purge(struct ifqueue *); void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *); void ifq_q_leave(struct ifqueue *, void *); +void ifq_serialize(struct ifqueue *, struct task *); +void ifq_barrier(struct ifqueue *); #define ifq_len(_ifq) ((_ifq)->ifq_len) #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) @@ -87,6 +100,18 @@ static inline unsigned int ifq_is_oactive(struct ifqueue *ifq) { return (ifq->ifq_oactive); +} + +static inline void +ifq_start(struct ifqueue *ifq) +{ + ifq_serialize(ifq, &ifq->ifq_start); +} + +static inline void +ifq_restart(struct ifqueue *ifq) +{ + ifq_serialize(ifq, &ifq->ifq_restart); } extern const struct ifq_ops * const ifq_priq_ops;