On Sun, Dec 06, 2015 at 02:00:16PM +1000, David Gwynne wrote:
> the current code for serialising if_start calls for mpsafe nics does what it 
> says.

as per mpi@s suggestion, this makes the ifq code responsible for
the task serialisation.

all the machinery is there, but it provides a minimal step toward
coordination of the oactive flag. the only concession is if a driver
wants to clear oactive and run its start routine again it can call
ifq_restart() to have it serialised with the stacks calls to the
start routine.

if there's a desire for a driver to run all of its txeof serialised
with its start routine, we'll figure out the best way to provide
that when the problem really occurs.

if_start_barrier is now called ifq_barrier too.i

Index: dev/pci/if_myx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.90
diff -u -p -r1.90 if_myx.c
--- dev/pci/if_myx.c    3 Dec 2015 12:45:56 -0000       1.90
+++ dev/pci/if_myx.c    8 Dec 2015 11:31:09 -0000
@@ -1208,7 +1208,7 @@ myx_up(struct myx_softc *sc)
        ifq_clr_oactive(&ifp->if_snd);
        SET(ifp->if_flags, IFF_RUNNING);
        myx_iff(sc);
-       myx_start(ifp);
+       if_start(ifp);
 
        return;
 
@@ -1330,6 +1330,8 @@ myx_down(struct myx_softc *sc)
        int                      s;
        int                      ring;
 
+       CLR(ifp->if_flags, IFF_RUNNING);
+
        bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
            BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
        sc->sc_linkdown = sts->ms_linkdown;
@@ -1362,8 +1364,7 @@ myx_down(struct myx_softc *sc)
        }
 
        ifq_clr_oactive(&ifp->if_snd);
-       CLR(ifp->if_flags, IFF_RUNNING);
-       if_start_barrier(ifp);
+       ifq_barrier(&ifp->if_snd);
 
        for (ring = 0; ring < 2; ring++) {
                struct myx_rx_ring *mrr = &sc->sc_rx_ring[ring];
@@ -1702,9 +1703,8 @@ myx_txeof(struct myx_softc *sc, u_int32_
        sc->sc_tx_ring_cons = idx;
        sc->sc_tx_cons = cons;
 
-       ifq_clr_oactive(&ifp->if_snd);
-       if (!ifq_empty(&ifp->if_snd))
-               if_start(ifp);
+       if (ifq_is_oactive(&ifp->if_snd))
+               ifq_restart(&ifp->if_snd);
 }
 
 void
Index: dev/pci/if_bnx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_bnx.c,v
retrieving revision 1.118
diff -u -p -r1.118 if_bnx.c
--- dev/pci/if_bnx.c    5 Dec 2015 16:23:37 -0000       1.118
+++ dev/pci/if_bnx.c    8 Dec 2015 11:31:09 -0000
@@ -871,6 +871,7 @@ bnx_attachhook(void *xsc)
        ifp = &sc->arpcom.ac_if;
        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+       ifp->if_xflags = IFXF_MPSAFE;
        ifp->if_ioctl = bnx_ioctl;
        ifp->if_start = bnx_start;
        ifp->if_watchdog = bnx_watchdog;
@@ -4573,20 +4574,14 @@ bnx_tx_intr(struct bnx_softc *sc)
 
        used = atomic_sub_int_nv(&sc->used_tx_bd, freed);
 
+       sc->tx_cons = sw_tx_cons;
+
        /* Clear the TX timeout timer. */
        if (used == 0)
                ifp->if_timer = 0;
 
-       /* Clear the tx hardware queue full flag. */
-       if (used < sc->max_tx_bd) {
-               DBRUNIF(ifq_is_oactive(&ifp->if_snd),
-                   printf("%s: Open TX chain! %d/%d (used/total)\n",
-                       sc->bnx_dev.dv_xname, used,
-                       sc->max_tx_bd));
-               ifq_clr_oactive(&ifp->if_snd);
-       }
-
-       sc->tx_cons = sw_tx_cons;
+       if (ifq_is_oactive(&ifp->if_snd))
+               ifq_restart(&ifp->if_snd);
 }
 
 /****************************************************************************/
@@ -4880,10 +4875,8 @@ bnx_start(struct ifnet *ifp)
        int                     used;
        u_int16_t               tx_prod, tx_chain_prod;
 
-       /* If there's no link or the transmit queue is empty then just exit. */
-       if (!sc->bnx_link || IFQ_IS_EMPTY(&ifp->if_snd)) {
-               DBPRINT(sc, BNX_INFO_SEND,
-                   "%s(): No link or transmit queue empty.\n", __FUNCTION__);
+       if (!sc->bnx_link) {
+               ifq_purge(&ifp->if_snd);
                goto bnx_start_exit;
        }
 
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.423
diff -u -p -r1.423 if.c
--- net/if.c    8 Dec 2015 10:06:12 -0000       1.423
+++ net/if.c    8 Dec 2015 11:31:09 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if.c,v 1.423 2015/12/08 10:06:12 dlg Exp $    */
+/*     $OpenBSD: if.c,v 1.422 2015/12/05 10:07:55 tedu Exp $   */
 /*     $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $  */
 
 /*
@@ -153,7 +153,6 @@ void        if_input_process(void *);
 void   ifa_print_all(void);
 #endif
 
-void   if_start_mpsafe(struct ifnet *ifp);
 void   if_start_locked(struct ifnet *ifp);
 
 /*
@@ -512,7 +511,7 @@ if_attach_common(struct ifnet *ifp)
        TAILQ_INIT(&ifp->if_addrlist);
        TAILQ_INIT(&ifp->if_maddrlist);
 
-       ifq_init(&ifp->if_snd);
+       ifq_init(&ifp->if_snd, ifp);
 
        ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
            M_TEMP, M_WAITOK);
@@ -540,7 +539,7 @@ void
 if_start(struct ifnet *ifp)
 {
        if (ISSET(ifp->if_xflags, IFXF_MPSAFE))
-               if_start_mpsafe(ifp);
+               ifq_start(&ifp->if_snd);
        else
                if_start_locked(ifp);
 }
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.66
diff -u -p -r1.66 if_var.h
--- net/if_var.h        8 Dec 2015 10:14:58 -0000       1.66
+++ net/if_var.h        8 Dec 2015 11:31:09 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: if_var.h,v 1.66 2015/12/08 10:14:58 dlg Exp $ */
+/*     $OpenBSD: if_var.h,v 1.64 2015/12/05 16:24:59 mpi Exp $ */
 /*     $NetBSD: if.h,v 1.23 1996/05/07 02:40:27 thorpej Exp $  */
 
 /*
@@ -42,6 +42,7 @@
 #include <sys/mbuf.h>
 #include <sys/srp.h>
 #include <sys/refcnt.h>
+#include <sys/task.h>
 #include <sys/time.h>
 
 #include <net/ifq.h>
Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.1
diff -u -p -r1.1 ifq.c
--- net/ifq.c   8 Dec 2015 10:06:12 -0000       1.1
+++ net/ifq.c   8 Dec 2015 11:31:09 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: ifq.c,v 1.1 2015/12/08 10:06:12 dlg Exp $ */
+/*     $OpenBSD$ */
 
 /*
  * Copyright (c) 2015 David Gwynne <d...@openbsd.org>
@@ -64,6 +64,12 @@ struct priq {
  * ifqueue serialiser
  */
 
+void   ifq_start_task(void *);
+void   ifq_restart_task(void *);
+void   ifq_barrier_task(void *);
+
+#define TASK_ONQUEUE 0x1
+
 static inline unsigned int
 ifq_enter(struct ifqueue *ifq)
 {
@@ -73,57 +79,108 @@ ifq_enter(struct ifqueue *ifq)
 static inline unsigned int
 ifq_leave(struct ifqueue *ifq)
 {
-       if (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1)
-               return (1);
+       return (atomic_cas_uint(&ifq->ifq_serializer, 1, 0) == 1);
+}
+
+static inline int
+ifq_next_task(struct ifqueue *ifq, struct task *work)
+{
+       struct task *t;
+       int rv = 0;
 
        ifq->ifq_serializer = 1;
 
-       return (0);
+       mtx_enter(&ifq->ifq_task_mtx);
+       t = TAILQ_FIRST(&ifq->ifq_task_list);
+       if (t != NULL) {
+               TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
+               CLR(t->t_flags, TASK_ONQUEUE);
+
+               *work = *t; /* copy to caller to avoid races */
+
+               rv = 1;
+       }
+       mtx_leave(&ifq->ifq_task_mtx);
+
+       return (rv);
 }
 
 void
-if_start_mpsafe(struct ifnet *ifp)
+ifq_serialize(struct ifqueue *ifq, struct task *t)
 {
-       struct ifqueue *ifq = &ifp->if_snd;
+       struct task work;
+
+       if (ISSET(t->t_flags, TASK_ONQUEUE))
+               return;
+
+       mtx_enter(&ifq->ifq_task_mtx);
+       if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
+               SET(t->t_flags, TASK_ONQUEUE);
+               TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
+       }
+       mtx_leave(&ifq->ifq_task_mtx);
 
        if (!ifq_enter(ifq))
                return;
 
        do {
-               if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING))) {
-                       ifq->ifq_serializer = 0;
-                       wakeup_one(&ifq->ifq_serializer);
-                       return;
-               }
+               while (ifq_next_task(ifq, &work))
+                       (*work.t_func)(work.t_arg);
 
-               if (ifq_empty(ifq) || ifq_is_oactive(ifq))
-                       continue;
+       } while (!ifq_leave(ifq));
+}
+
+void
+ifq_start_task(void *p)
+{
+       struct ifqueue *ifq = p;
+       struct ifnet *ifp = ifq->ifq_if;
 
-               ifp->if_start(ifp);
+       if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
+           ifq_empty(ifq) || ifq_is_oactive(ifq))
+               return;
 
-       } while (!ifq_leave(ifq));
+       ifp->if_start(ifp);
+}
+
+void
+ifq_restart_task(void *p)
+{
+       struct ifqueue *ifq = p;
+       struct ifnet *ifp = ifq->ifq_if;
+
+       ifq_clr_oactive(ifq);
+       ifp->if_start(ifp);
 }
 
 void
-if_start_barrier(struct ifnet *ifp)
+ifq_barrier(struct ifqueue *ifq)
 {
        struct sleep_state sls;
-       struct ifqueue *ifq = &ifp->if_snd;
+       unsigned int notdone = 1;
+       struct task t = TASK_INITIALIZER(ifq_barrier_task, &notdone);
 
        /* this should only be called from converted drivers */
-       KASSERT(ISSET(ifp->if_xflags, IFXF_MPSAFE));
-
-       /* drivers should only call this on the way down */
-       KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
+       KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
 
        if (ifq->ifq_serializer == 0)
                return;
 
-       if_start_mpsafe(ifp); /* spin the wheel to guarantee a wakeup */
-       do {
-               sleep_setup(&sls, &ifq->ifq_serializer, PWAIT, "ifbar");
-               sleep_finish(&sls, ifq->ifq_serializer != 0);
-       } while (ifq->ifq_serializer != 0);
+       ifq_serialize(ifq, &t);
+
+       while (notdone) {
+               sleep_setup(&sls, &notdone, PWAIT, "ifqbar");
+               sleep_finish(&sls, notdone);
+       }
+}
+
+void
+ifq_barrier_task(void *p)
+{
+       unsigned int *notdone = p;
+
+       *notdone = 0;
+       wakeup_one(notdone);
 }
 
 /*
@@ -131,8 +188,10 @@ if_start_barrier(struct ifnet *ifp)
  */
 
 void
-ifq_init(struct ifqueue *ifq)
+ifq_init(struct ifqueue *ifq, struct ifnet *ifp)
 {
+       ifq->ifq_if = ifp;
+
        mtx_init(&ifq->ifq_mtx, IPL_NET);
        ifq->ifq_drops = 0;
 
@@ -140,8 +199,14 @@ ifq_init(struct ifqueue *ifq)
        ifq->ifq_ops = &priq_ops;
        ifq->ifq_q = priq_ops.ifqop_alloc(NULL);
 
-       ifq->ifq_serializer = 0;
        ifq->ifq_len = 0;
+
+       mtx_init(&ifq->ifq_task_mtx, IPL_NET);
+       TAILQ_INIT(&ifq->ifq_task_list);
+       ifq->ifq_serializer = 0;
+
+       task_set(&ifq->ifq_start, ifq_start_task, ifq);
+       task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
 
        if (ifq->ifq_maxlen == 0)
                ifq_set_maxlen(ifq, IFQ_MAXLEN);
Index: net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.1
diff -u -p -r1.1 ifq.h
--- net/ifq.h   8 Dec 2015 10:06:12 -0000       1.1
+++ net/ifq.h   8 Dec 2015 11:31:09 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: ifq.h,v 1.1 2015/12/08 10:06:12 dlg Exp $ */
+/*     $OpenBSD$ */
 
 /*
  * Copyright (c) 2015 David Gwynne <d...@openbsd.org>
@@ -24,14 +24,25 @@ struct ifnet;
 struct ifq_ops;
 
 struct ifqueue {
+       struct ifnet            *ifq_if;
+
+       /* mbuf handling */
        struct mutex             ifq_mtx;
        uint64_t                 ifq_drops;
        const struct ifq_ops    *ifq_ops;
        void                    *ifq_q;
        unsigned int             ifq_len;
-       unsigned int             ifq_serializer;
        unsigned int             ifq_oactive;
 
+       /* work serialisation */
+       struct mutex             ifq_task_mtx;
+       struct task_list         ifq_task_list;
+       unsigned int             ifq_serializer;
+
+       /* work to be serialised */
+       struct task              ifq_start;
+       struct task              ifq_restart;
+
        unsigned int             ifq_maxlen;
 };
 
@@ -54,7 +65,7 @@ struct ifq_ops {
  * Interface send queues.
  */
 
-void            ifq_init(struct ifqueue *);
+void            ifq_init(struct ifqueue *, struct ifnet *);
 void            ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
 void            ifq_destroy(struct ifqueue *);
 int             ifq_enqueue_try(struct ifqueue *, struct mbuf *);
@@ -66,6 +77,8 @@ struct mbuf   *ifq_dequeue(struct ifqueue 
 unsigned int    ifq_purge(struct ifqueue *);
 void           *ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
 void            ifq_q_leave(struct ifqueue *, void *);
+void            ifq_serialize(struct ifqueue *, struct task *);
+void            ifq_barrier(struct ifqueue *);
 
 #define        ifq_len(_ifq)                   ((_ifq)->ifq_len)
 #define        ifq_empty(_ifq)                 (ifq_len(_ifq) == 0)
@@ -87,6 +100,18 @@ static inline unsigned int
 ifq_is_oactive(struct ifqueue *ifq)
 {
        return (ifq->ifq_oactive);
+}
+
+static inline void
+ifq_start(struct ifqueue *ifq)
+{
+       ifq_serialize(ifq, &ifq->ifq_start);
+}
+
+static inline void
+ifq_restart(struct ifqueue *ifq)
+{
+       ifq_serialize(ifq, &ifq->ifq_restart);
 }
 
 extern const struct ifq_ops * const ifq_priq_ops;

Reply via email to