this is another go at implementing tx mitigation. it is mostly the same as the previous attempts in that actual tx is deferred to a network taskq unless a backlog of packets is reached. when the task or the backlog is reached the actual hardware transmit routine is called. this is all hidden behind the existing api.
the big difference in this version is that an ifq_barrier call no longer implies a taskq_barrier in the nettq. this avoids a deadlock that nics can cause if (when) they call ifq_barrier with NET_LOCK held. instead we just rely on the ifq serialiser barrier to do it's thing. we do care that the task isnt being run when the ifq is about to be freed, so we do the taskq_barrier when the ifq is behing shut down, which already happens without NET_LOCK held. this still gives a significant performance improvement in some situations. eg, hrvoje popovski goes from 740kpps to 1mpps when forwarding between ix interfaces running this code. id like to get it in now so we can shake any issues out of it. ok? Index: ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.30 diff -u -p -r1.30 ifq.c --- ifq.c 29 Mar 2019 04:21:55 -0000 1.30 +++ ifq.c 2 Apr 2019 22:16:19 -0000 @@ -70,6 +70,13 @@ struct priq { void ifq_start_task(void *); void ifq_restart_task(void *); void ifq_barrier_task(void *); +void ifq_bundle_task(void *); + +static inline void +ifq_run_start(struct ifqueue *ifq) +{ + ifq_serialize(ifq, &ifq->ifq_start); +} void ifq_serialize(struct ifqueue *ifq, struct task *t) @@ -112,6 +119,16 @@ ifq_is_serialized(struct ifqueue *ifq) } void +ifq_start(struct ifqueue *ifq) +{ + if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) { + task_del(ifq->ifq_softnet, &ifq->ifq_bundle); + ifq_run_start(ifq); + } else + task_add(ifq->ifq_softnet, &ifq->ifq_bundle); +} + +void ifq_start_task(void *p) { struct ifqueue *ifq = p; @@ -135,11 +152,21 @@ ifq_restart_task(void *p) } void +ifq_bundle_task(void *p) +{ + struct ifqueue *ifq = p; + + ifq_run_start(ifq); +} + +void ifq_barrier(struct ifqueue *ifq) { struct cond c = COND_INITIALIZER(); struct task t = TASK_INITIALIZER(ifq_barrier_task, &c); + task_del(ifq->ifq_softnet, &ifq->ifq_bundle); + if (ifq->ifq_serializer == NULL) return; @@ -164,6 +191,7 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; + ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */ ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -184,6 +212,7 @@ ifq_init(struct ifqueue *ifq, struct ifn mtx_init(&ifq->ifq_task_mtx, IPL_NET); TAILQ_INIT(&ifq->ifq_task_list); ifq->ifq_serializer = NULL; + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq); task_set(&ifq->ifq_start, ifq_start_task, ifq); task_set(&ifq->ifq_restart, ifq_restart_task, ifq); @@ -234,6 +263,10 @@ void ifq_destroy(struct ifqueue *ifq) { struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + + NET_ASSERT_UNLOCKED(); + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) + taskq_barrier(ifq->ifq_softnet); /* don't need to lock because this is the last use of the ifq */ Index: ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.25 diff -u -p -r1.25 ifq.h --- ifq.h 29 Mar 2019 04:21:55 -0000 1.25 +++ ifq.h 2 Apr 2019 22:16:19 -0000 @@ -25,6 +25,7 @@ struct ifq_ops; struct ifqueue { struct ifnet *ifq_if; + struct taskq *ifq_softnet; union { void *_ifq_softc; /* @@ -57,6 +58,7 @@ struct ifqueue { struct mutex ifq_task_mtx; struct task_list ifq_task_list; void *ifq_serializer; + struct task ifq_bundle; /* work to be serialised */ struct task ifq_start; @@ -397,6 +399,7 @@ void ifq_attach(struct ifqueue *, cons void ifq_destroy(struct ifqueue *); void ifq_add_data(struct ifqueue *, struct if_data *); int ifq_enqueue(struct ifqueue *, struct mbuf *); +void ifq_start(struct ifqueue *); struct mbuf *ifq_deq_begin(struct ifqueue *); void ifq_deq_commit(struct ifqueue *, struct mbuf *); void ifq_deq_rollback(struct ifqueue *, struct mbuf *); @@ -436,12 +439,6 @@ static inline unsigned int ifq_is_oactive(struct ifqueue *ifq) { return (ifq->ifq_oactive); -} - -static inline void -ifq_start(struct ifqueue *ifq) -{ - ifq_serialize(ifq, &ifq->ifq_start); } static inline void Index: if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.95 diff -u -p -r1.95 if_var.h --- if_var.h 31 Mar 2019 13:58:18 -0000 1.95 +++ if_var.h 2 Apr 2019 22:16:19 -0000 @@ -176,6 +176,7 @@ struct ifnet { /* and the entries */ struct ifqueue **if_ifqs; /* [I] pointer to an array of sndqs */ void (*if_qstart)(struct ifqueue *); unsigned int if_nifqs; /* [I] number of output queues */ + unsigned int if_txmit; /* [c] txmitigation amount */ struct ifiqueue if_rcv; /* rx/input queue */ struct ifiqueue **if_iqs; /* [I] pointer to the array of iqs */ @@ -303,6 +304,9 @@ do { \ #define IFQ_IS_EMPTY(ifq) ifq_empty(ifq) #define IFQ_SET_MAXLEN(ifq, len) ifq_set_maxlen(ifq, len) +#define IF_TXMIT_MIN 1 +#define IF_TXMIT_DEFAULT 16 + /* default interface priorities */ #define IF_WIRED_DEFAULT_PRIORITY 0 #define IF_WIRELESS_DEFAULT_PRIORITY 4 @@ -337,6 +341,7 @@ void if_start(struct ifnet *); int if_enqueue(struct ifnet *, struct mbuf *); int if_enqueue_ifq(struct ifnet *, struct mbuf *); void if_input(struct ifnet *, struct mbuf_list *); +void if_vinput(struct ifnet *, struct mbuf *); void if_input_process(struct ifnet *, struct mbuf_list *); int if_input_local(struct ifnet *, struct mbuf *, sa_family_t); int if_output_local(struct ifnet *, struct mbuf *, sa_family_t); Index: if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.573 diff -u -p -r1.573 if.c --- if.c 1 Mar 2019 04:47:32 -0000 1.573 +++ if.c 2 Apr 2019 22:16:19 -0000 @@ -613,6 +613,8 @@ if_attach_common(struct ifnet *ifp) ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd; ifp->if_ifqs = ifp->if_snd.ifq_ifqs; ifp->if_nifqs = 1; + if (ifp->if_txmit == 0) + ifp->if_txmit = IF_TXMIT_DEFAULT; ifiq_init(&ifp->if_rcv, ifp, 0);