> Date: Tue, 8 Dec 2015 21:58:49 +1000
> From: David Gwynne <[email protected]>
>
> On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote:
> > the current code for serialising if_start calls for mpsafe nics does what
> > it says.
>
> as per mpi@s suggestion, this makes the ifq code responsible for
> the task serialisation.
>
> all the machinery is there, but it provides a minimal step toward
> coordination of the oactive flag. the only concession is if a driver
> wants to clear oactive and run its start routine again it can call
> ifq_restart() to have it serialised with the stacks calls to the
> start routine.
>
> if there's a desire for a driver to run all of its txeof serialised
> with its start routine, we'll figure out the best way to provide
> that when the problem really occurs.
>
> if_start_barrier is now called ifq_barrier too.i
Bleah. I just spent two days in bed with a flu. I still need to
write down my conclusions from the experiments I did with forwarding
during n2k15. But one of the things I noticed that the taskctx
serialization reduced the forwarding performance a by something like
10%. I'd like to see what happens with this version of the diff. I
think I can recreate setup that I used during n2k15 with em(4) instead
of ix(4) at home. But it won't happen until after this weekend.
> Index: dev/pci/if_myx.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.90
> diff -u -p -r1.90 if_myx.c
> --- dev/pci/if_myx.c 3 Dec 2015 12:45:56 -0000 1.90
> +++ dev/pci/if_myx.c 8 Dec 2015 11:31:09 -0000
> @@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc)
> ifq_clr_oactive(&ifp->if_snd);
> SET(ifp->if_flags, IFF_RUNNING);
> myx_iff(sc);
> - myx_start(ifp);
> + if_start(ifp);
>
> return;
>
> @@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc)
> int s;
> int ring;
>
> + CLR(ifp->if_flags, IFF_RUNNING);
> +
> bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
> BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
> sc->sc_linkdown = sts->ms_linkdown;
> @@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc)
> }
>
> ifq_clr_oactive(&ifp->if_snd);
> - CLR(ifp->if_flags, IFF_RUNNING);
> - if_start_barrier(ifp);
> + ifq_barrier(&ifp->if_snd);
>
> for (ring = 0; ring < 2; ring++) {
> struct myx_rx_ring *mrr = &sc->sc_rx_ring[ring];
> @@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_
> sc->sc_tx_ring_cons = idx;
> sc->sc_tx_cons = cons;
>
> - ifq_clr_oactive(&ifp->if_snd);
> - if (!ifq_empty(&ifp->if_snd))
> - if_start(ifp);
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> }
>
> void
> Index: dev/pci/if_bnx.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v
> retrieving revision 1.118
> diff -u -p -r1.118 if_bnx.c
> --- dev/pci/if_bnx.c 5 Dec 2015 16:23:37 -0000 1.118
> +++ dev/pci/if_bnx.c 8 Dec 2015 11:31:09 -0000
> @@ -871,6 +871,7 @@ bnx_attachhook(void *xsc)
> ifp = &sc->arpcom.ac_if;
> ifp->if_softc = sc;
> ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> + ifp->if_xflags = IFXF_MPSAFE;
> ifp->if_ioctl = bnx_ioctl;
> ifp->if_start = bnx_start;
> ifp->if_watchdog = bnx_watchdog;
> @@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc)
>
> used = atomic_sub_int_nv(&sc->used_tx_bd, freed);
>
> + sc->tx_cons = sw_tx_cons;
> +
> /* Clear the TX timeout timer. */
> if (used == 0)
> ifp->if_timer = 0;
>
> - /* Clear the tx hardware queue full flag. */
> - if (used < sc->max_tx_bd) {
> - DBRUNIF(ifq_is_oactive(&ifp->if_snd),
> - printf("%s: Open TX chain! %d/%d (used/total)\n",
> - sc->bnx_dev.dv_xname, used,
> - sc->max_tx_bd));
> - ifq_clr_oactive(&ifp->if_snd);
> - }
> -
> - sc->tx_cons = sw_tx_cons;
> + if (ifq_is_oactive(&ifp->if_snd))
> + ifq_restart(&ifp->if_snd);
> }
>
>
> /****************************************************************************/
> @@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp)
> int used;
> u_int16_t tx_prod, tx_chain_prod;
>
> - /* If there's no link or the transmit queue is empty then just exit. */
> - if (!sc->bnx_link || IFQ_IS_EMPTY(&ifp->if_snd)) {
> - DBPRINT(sc, BNX_INFO_SEND,
> - "%s(): No link or transmit queue empty.\n", __FUNCTION__);
> + if (!sc->bnx_link) {
> + ifq_purge(&ifp->if_snd);
> goto bnx_start_exit;
> }
>
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.423
> diff -u -p -r1.423 if.c
> --- net/if.c 8 Dec 2015 10:06:12 -0000 1.423
> +++ net/if.c 8 Dec 2015 11:31:09 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: if.c,v 1.423 2015/12/08 10:06:12 dlg Exp $ */
> +/* $OpenBSD: if.c,v 1.422 2015/12/05 10:07:55 tedu Exp $ */
> /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */
>
> /*
> @@ -153,7 +153,6 @@ void if_input_process(void *);
> void ifa_print_all(void);
> #endif
>
> -void if_start_mpsafe(struct ifnet *ifp);
> void if_start_locked(struct ifnet *ifp);
>
> /*
> @@ -512,7 +511,7 @@ if_attach_common(struct ifnet *ifp)
> TAILQ_INIT(&ifp->if_addrlist);
> TAILQ_INIT(&ifp->if_maddrlist);
>
> - ifq_init(&ifp->if_snd);
> + ifq_init(&ifp->if_snd, ifp);
>
> ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
> M_TEMP, M_WAITOK);
> @@ -540,7 +539,7 @@ void
> if_start(struct ifnet *ifp)
> {
> if (ISSET(ifp->if_xflags, IFXF_MPSAFE))
> - if_start_mpsafe(ifp);
> + ifq_start(&ifp->if_snd);
> else
> if_start_locked(ifp);
> }
> Index: net/if_var.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_var.h,v
> retrieving revision 1.66
> diff -u -p -r1.66 if_var.h
> --- net/if_var.h 8 Dec 2015 10:14:58 -0000 1.66
> +++ net/if_var.h 8 Dec 2015 11:31:09 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: if_var.h,v 1.66 2015/12/08 10:14:58 dlg Exp $ */
> +/* $OpenBSD: if_var.h,v 1.64 2015/12/05 16:24:59 mpi Exp $ */
> /* $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $ */
>
> /*
> @@ -42,6 +42,7 @@
> #include <sys/mbuf.h>
> #include <sys/srp.h>
> #include <sys/refcnt.h>
> +#include <sys/task.h>
> #include <sys/time.h>
>
> #include <net/ifq.h>
> Index: net/ifq.c
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.c,v
> retrieving revision 1.1
> diff -u -p -r1.1 ifq.c
> --- net/ifq.c 8 Dec 2015 10:06:12 -0000 1.1
> +++ net/ifq.c 8 Dec 2015 11:31:09 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: ifq.c,v 1.1 2015/12/08 10:06:12 dlg Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * Copyright (c) 2015 David Gwynne <[email protected]>
> @@ -64,6 +64,12 @@ struct priq {
> * ifqueue serialiser
> */
>
> +void ifq_start_task(void *);
> +void ifq_restart_task(void *);
> +void ifq_barrier_task(void *);
> +
> +#define TASK_ONQUEUE 0x1
> +
> static inline unsigned int
> ifq_enter(struct ifqueue *ifq)
> {
> @@ -73,57 +79,108 @@ ifq_enter(struct ifqueue *ifq)
> static inline unsigned int
> ifq_leave(struct ifqueue *ifq)
> {
> - if (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1)
> - return (1);
> + return (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1);
> +}
> +
> +static inline int
> +ifq_next_task(struct ifqueue *ifq, struct task *work)
> +{
> + struct task *t;
> + int rv = 0;
>
> ifq->ifq_serializer = 1;
>
> - return (0);
> + mtx_enter(&ifq->ifq_task_mtx);
> + t = TAILQ_FIRST(&ifq->ifq_task_list);
> + if (t != NULL) {
> + TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
> + CLR(t->t_flags, TASK_ONQUEUE);
> +
> + *work = *t; /* copy to caller to avoid races */
> +
> + rv = 1;
> + }
> + mtx_leave(&ifq->ifq_task_mtx);
> +
> + return (rv);
> }
>
> void
> -if_start_mpsafe(struct ifnet *ifp)
> +ifq_serialize(struct ifqueue *ifq, struct task *t)
> {
> - struct ifqueue *ifq = &ifp->if_snd;
> + struct task work;
> +
> + if (ISSET(t->t_flags, TASK_ONQUEUE))
> + return;
> +
> + mtx_enter(&ifq->ifq_task_mtx);
> + if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
> + SET(t->t_flags, TASK_ONQUEUE);
> + TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
> + }
> + mtx_leave(&ifq->ifq_task_mtx);
>
> if (!ifq_enter(ifq))
> return;
>
> do {
> - if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING))) {
> - ifq->ifq_serializer = 0;
> - wakeup_one(&ifq->ifq_serializer);
> - return;
> - }
> + while (ifq_next_task(ifq, &work))
> + (*work.t_func)(work.t_arg);
>
> - if (ifq_empty(ifq) || ifq_is_oactive(ifq))
> - continue;
> + } while (!ifq_leave(ifq));
> +}
> +
> +void
> +ifq_start_task(void *p)
> +{
> + struct ifqueue *ifq = p;
> + struct ifnet *ifp = ifq->ifq_if;
>
> - ifp->if_start(ifp);
> + if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
> + ifq_empty(ifq) || ifq_is_oactive(ifq))
> + return;
>
> - } while (!ifq_leave(ifq));
> + ifp->if_start(ifp);
> +}
> +
> +void
> +ifq_restart_task(void *p)
> +{
> + struct ifqueue *ifq = p;
> + struct ifnet *ifp = ifq->ifq_if;
> +
> + ifq_clr_oactive(ifq);
> + ifp->if_start(ifp);
> }
>
> void
> -if_start_barrier(struct ifnet *ifp)
> +ifq_barrier(struct ifqueue *ifq)
> {
> struct sleep_state sls;
> - struct ifqueue *ifq = &ifp->if_snd;
> + unsigned int notdone = 1;
> + struct task t = TASK_INITIALIZER(ifq_barrier_task, ¬done);
>
> /* this should only be called from converted drivers */
> - KASSERT(ISSET(ifp->if_xflags, IFXF_MPSAFE));
> -
> - /* drivers should only call this on the way down */
> - KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
> + KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
>
> if (ifq->ifq_serializer == 0)
> return;
>
> - if_start_mpsafe(ifp); /* spin the wheel to guarantee a wakeup */
> - do {
> - sleep_setup(&sls, &ifq->ifq_serializer, PWAIT, "ifbar");
> - sleep_finish(&sls, ifq->ifq_serializer != 0);
> - } while (ifq->ifq_serializer != 0);
> + ifq_serialize(ifq, &t);
> +
> + while (notdone) {
> + sleep_setup(&sls, ¬done, PWAIT, "ifqbar");
> + sleep_finish(&sls, notdone);
> + }
> +}
> +
> +void
> +ifq_barrier_task(void *p)
> +{
> + unsigned int *notdone = p;
> +
> + *notdone = 0;
> + wakeup_one(notdone);
> }
>
> /*
> @@ -131,8 +188,10 @@ if_start_barrier(struct ifnet *ifp)
> */
>
> void
> -ifq_init(struct ifqueue *ifq)
> +ifq_init(struct ifqueue *ifq, struct ifnet *ifp)
> {
> + ifq->ifq_if = ifp;
> +
> mtx_init(&ifq->ifq_mtx, IPL_NET);
> ifq->ifq_drops = 0;
>
> @@ -140,8 +199,14 @@ ifq_init(struct ifqueue *ifq)
> ifq->ifq_ops = &priq_ops;
> ifq->ifq_q = priq_ops.ifqop_alloc(NULL);
>
> - ifq->ifq_serializer = 0;
> ifq->ifq_len = 0;
> +
> + mtx_init(&ifq->ifq_task_mtx, IPL_NET);
> + TAILQ_INIT(&ifq->ifq_task_list);
> + ifq->ifq_serializer = 0;
> +
> + task_set(&ifq->ifq_start, ifq_start_task, ifq);
> + task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
>
> if (ifq->ifq_maxlen == 0)
> ifq_set_maxlen(ifq, IFQ_MAXLEN);
> Index: net/ifq.h
> ===================================================================
> RCS file: /cvs/src/sys/net/ifq.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 ifq.h
> --- net/ifq.h 8 Dec 2015 10:06:12 -0000 1.1
> +++ net/ifq.h 8 Dec 2015 11:31:09 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: ifq.h,v 1.1 2015/12/08 10:06:12 dlg Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * Copyright (c) 2015 David Gwynne <[email protected]>
> @@ -24,14 +24,25 @@ struct ifnet;
> struct ifq_ops;
>
> struct ifqueue {
> + struct ifnet *ifq_if;
> +
> + /* mbuf handling */
> struct mutex ifq_mtx;
> uint64_t ifq_drops;
> const struct ifq_ops *ifq_ops;
> void *ifq_q;
> unsigned int ifq_len;
> - unsigned int ifq_serializer;
> unsigned int ifq_oactive;
>
> + /* work serialisation */
> + struct mutex ifq_task_mtx;
> + struct task_list ifq_task_list;
> + unsigned int ifq_serializer;
> +
> + /* work to be serialised */
> + struct task ifq_start;
> + struct task ifq_restart;
> +
> unsigned int ifq_maxlen;
> };
>
> @@ -54,7 +65,7 @@ struct ifq_ops {
> * Interface send queues.
> */
>
> -void ifq_init(struct ifqueue *);
> +void ifq_init(struct ifqueue *, struct ifnet *);
> void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
> void ifq_destroy(struct ifqueue *);
> int ifq_enqueue_try(struct ifqueue *, struct mbuf *);
> @@ -66,6 +77,8 @@ struct mbuf *ifq_dequeue(struct ifqueue
> unsigned int ifq_purge(struct ifqueue *);
> void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
> void ifq_q_leave(struct ifqueue *, void *);
> +void ifq_serialize(struct ifqueue *, struct task *);
> +void ifq_barrier(struct ifqueue *);
>
> #define ifq_len(_ifq) ((_ifq)->ifq_len)
> #define ifq_empty(_ifq) (ifq_len(_ifq) == 0)
> @@ -87,6 +100,18 @@ static inline unsigned int
> ifq_is_oactive(struct ifqueue *ifq)
> {
> return (ifq->ifq_oactive);
> +}
> +
> +static inline void
> +ifq_start(struct ifqueue *ifq)
> +{
> + ifq_serialize(ifq, &ifq->ifq_start);
> +}
> +
> +static inline void
> +ifq_restart(struct ifqueue *ifq)
> +{
> + ifq_serialize(ifq, &ifq->ifq_restart);
> }
>
> extern const struct ifq_ops * const ifq_priq_ops;
>
>