Hi,

I took on old idea and diff [1] from bluhm and finished it for the IPv4
case.

This change uses mbuf chains in tcp/ip/ether_output instead of single
mbufs.  This approach takes lesser processing power per packets, which
we want to send out.  In several code path it optimises the
processing because we just have to look at the first mbuf of a chain.
In other cases we process several packets in a row for one step, which
saves context switches between functions.

Thus, we gain an performance improvement of around 36% with relayd(8).
The measurements were made with iperf and with 30 streams:

        2823.0 MBit/s   current
        3844.4 MBit/s   current + mbuf-chaining diff below

ip6_output() is not implemented yet.  I would do that in a followup diff
later, if this one is successful.

I tested the diff on amd64 in several testing environments without any
problem.

Whats your opinion about this change?

bye,
Jan

[1]: https://github.com/bluhm/sys/tree/tcp-outlist

Index: net/fq_codel.c
===================================================================
RCS file: /cvs/src/sys/net/fq_codel.c,v
retrieving revision 1.14
diff -u -p -r1.14 fq_codel.c
--- net/fq_codel.c      10 Dec 2020 06:53:38 -0000      1.14
+++ net/fq_codel.c      15 Dec 2020 09:37:36 -0000
@@ -138,6 +138,7 @@ void                 fqcodel_purge(struct fqcodel *, s
 static const struct ifq_ops fqcodel_ops = {
        fqcodel_idx,
        fqcodel_if_enq,
+       NULL,
        fqcodel_if_deq_begin,
        fqcodel_if_deq_commit,
        fqcodel_if_purge,
Index: net/hfsc.c
===================================================================
RCS file: /cvs/src/sys/net/hfsc.c,v
retrieving revision 1.48
diff -u -p -r1.48 hfsc.c
--- net/hfsc.c  22 Oct 2018 23:44:53 -0000      1.48
+++ net/hfsc.c  15 Dec 2020 09:37:36 -0000
@@ -270,6 +270,7 @@ void                 hfsc_free(unsigned int, void *);
 const struct ifq_ops hfsc_ops = {
        hfsc_idx,
        hfsc_enq,
+       NULL,
        hfsc_deq_begin,
        hfsc_deq_commit,
        hfsc_purge,
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.620
diff -u -p -r1.620 if.c
--- net/if.c    3 Oct 2020 00:23:55 -0000       1.620
+++ net/if.c    15 Dec 2020 09:37:36 -0000
@@ -633,6 +633,8 @@ if_attach_common(struct ifnet *ifp)
                ifp->if_rtrequest = if_rtrequest_dummy;
        if (ifp->if_enqueue == NULL)
                ifp->if_enqueue = if_enqueue_ifq;
+       if (ifp->if_enqueue_ml == NULL)
+               ifp->if_enqueue_ml = if_enqueue_ifq_ml;
        ifp->if_llprio = IFQ_DEFPRIO;
 }
 
@@ -682,35 +684,72 @@ if_qstart_compat(struct ifqueue *ifq)
 int
 if_enqueue(struct ifnet *ifp, struct mbuf *m)
 {
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+       ml_enqueue(&ml, m);
+       return (if_enqueue_ml(ifp, &ml));
+}
+
+int
+if_enqueue_ml(struct ifnet *ifp, struct mbuf_list *ml)
+{
+       int error;
+       struct mbuf *m;
+
+       while ((m = ml_dequeue(ml)) != NULL) {
 #if NPF > 0
-       if (m->m_pkthdr.pf.delay > 0)
-               return (pf_delay_pkt(m, ifp->if_index));
+               if (m->m_pkthdr.pf.delay > 0) {
+                       error = pf_delay_pkt(m, ifp->if_index);
+                       if (error != 0)
+                               goto bad;
+                       continue;
+               }
 #endif
 
 #if NBRIDGE > 0
-       if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
-               int error;
-
-               error = bridge_enqueue(ifp, m);
-               return (error);
-       }
+               if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
+                       error = bridge_enqueue(ifp, m);
+                       if (error != 0)
+                               goto bad;
+                       continue;
+               }
 #endif
 
 #if NPF > 0
-       pf_pkt_addr_changed(m);
+               pf_pkt_addr_changed(m);
 #endif /* NPF > 0 */
 
-       return ((*ifp->if_enqueue)(ifp, m));
+               error = (*ifp->if_enqueue)(ifp, m);
+               if (error != 0)
+                       goto bad;
+       }
+
+       return 0;
+ bad:
+       ml_purge(ml);
+       return error;
 }
 
 int
 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
 {
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+       ml_enqueue(&ml, m);
+       return (if_enqueue_ifq_ml(ifp, &ml));
+}
+
+int
+if_enqueue_ifq_ml(struct ifnet *ifp, struct mbuf_list *ml)
+{
        struct ifqueue *ifq = &ifp->if_snd;
        int error;
 
        if (ifp->if_nifqs > 1) {
                unsigned int idx;
+               struct mbuf *m;
+
+               m = MBUF_LIST_FIRST(ml);
 
                /*
                 * use the operations on the first ifq to pick which of
@@ -721,13 +760,16 @@ if_enqueue_ifq(struct ifnet *ifp, struct
                ifq = ifp->if_ifqs[idx];
        }
 
-       error = ifq_enqueue(ifq, m);
-       if (error)
-               return (error);
+       error = ifq_enqueue_ml(ifq, ml);
+       if (error != 0)
+               goto bad;
 
        ifq_start(ifq);
 
        return (0);
+ bad:
+       ml_purge(ml);
+       return (error);
 }
 
 void
Index: net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.267
diff -u -p -r1.267 if_ethersubr.c
--- net/if_ethersubr.c  1 Oct 2020 05:14:10 -0000       1.267
+++ net/if_ethersubr.c  15 Dec 2020 09:37:36 -0000
@@ -311,47 +311,74 @@ bad:
        return (error);
 }
 
-struct mbuf*
-ether_encap(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-    struct rtentry *rt, int *errorp)
+int
+ether_encap(struct ifnet *ifp, struct mbuf_list *ml, struct sockaddr *dst,
+    struct rtentry *rt)
 {
+       struct mbuf_list nl = MBUF_LIST_INITIALIZER();
        struct ether_header eh;
+       struct mbuf *m;
        int error;
 
+       m = ml_dequeue(ml);
+
        error = ether_resolve(ifp, m, dst, rt, &eh);
        switch (error) {
        case 0:
                break;
        case EAGAIN:
                error = 0;
+               /* FALLTHROUGH */
        default:
-               *errorp = error;
-               return (NULL);
+               goto bad;
        }
 
-       m = m_prepend(m, ETHER_ALIGN + sizeof(eh), M_DONTWAIT);
-       if (m == NULL) {
-               *errorp = ENOBUFS;
-               return (NULL);
-       }
+       do {
+               m = m_prepend(m, ETHER_ALIGN + sizeof(eh), M_DONTWAIT);
+               if (m == NULL) {
+                       error = ENOBUFS;
+                       goto bad;
+               }
+               m_adj(m, ETHER_ALIGN);
+               memcpy(mtod(m, struct ether_header *), &eh, sizeof(eh));
+
+               ml_enqueue(&nl, m);
+       } while ((m = ml_dequeue(ml)) != NULL);
 
-       m_adj(m, ETHER_ALIGN);
-       memcpy(mtod(m, struct ether_header *), &eh, sizeof(eh));
+       ml_enlist(ml, &nl);
 
-       return (m);
+       return (0);
+ bad:
+       ml_purge(ml);
+       ml_enlist(ml, &nl);
+       return (error);
 }
 
 int
 ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
     struct rtentry *rt)
 {
-       int error;
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
 
-       m = ether_encap(ifp, m, dst, rt, &error);
-       if (m == NULL)
-               return (error);
+       ml_enqueue(&ml, m);
+       return (ether_output_ml(ifp, &ml, dst, rt));
+}
+
+int
+ether_output_ml(struct ifnet *ifp, struct mbuf_list *ml, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+       int errcap;
+       int errque;
 
-       return (if_enqueue(ifp, m));
+       errcap = ether_encap(ifp, ml, dst, rt);
+
+       if (ml_empty(ml))
+               return errcap;
+
+       errque = (if_enqueue_ml(ifp, ml));
+
+       return errcap != 0 ? errcap : errque;
 }
 
 /*
@@ -670,8 +697,11 @@ ether_ifattach(struct ifnet *ifp)
        ifp->if_hdrlen = ETHER_HDR_LEN;
        ifp->if_mtu = ETHERMTU;
        ifp->if_input = ether_input;
+
        if (ifp->if_output == NULL)
                ifp->if_output = ether_output;
+       if (ifp->if_output_ml == NULL)
+               ifp->if_output_ml = ether_output_ml;
        ifp->if_rtrequest = ether_rtrequest;
 
        if (ifp->if_hardmtu == 0)
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.112
diff -u -p -r1.112 if_var.h
--- net/if_var.h        29 Jul 2020 12:09:31 -0000      1.112
+++ net/if_var.h        15 Dec 2020 09:37:36 -0000
@@ -161,12 +161,15 @@ struct ifnet {                            /* and the 
entries */
 
        /* procedure handles */
        void    (*if_input)(struct ifnet *, struct mbuf *);
+       int     (*if_output_ml)(struct ifnet *, struct mbuf_list *,
+                   struct sockaddr *, struct rtentry *);
        int     (*if_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
                     struct rtentry *); /* output routine (enqueue) */
                                        /* link level output function */
        int     (*if_ll_output)(struct ifnet *, struct mbuf *,
                    struct sockaddr *, struct rtentry *);
        int     (*if_enqueue)(struct ifnet *, struct mbuf *);
+       int     (*if_enqueue_ml)(struct ifnet *, struct mbuf_list *);
        void    (*if_start)(struct ifnet *);    /* initiate output */
        int     (*if_ioctl)(struct ifnet *, u_long, caddr_t); /* ioctl hook */
        void    (*if_watchdog)(struct ifnet *); /* timer routine */
@@ -318,7 +321,9 @@ extern struct ifnet_head ifnet;
 
 void   if_start(struct ifnet *);
 int    if_enqueue(struct ifnet *, struct mbuf *);
+int    if_enqueue_ml(struct ifnet *, struct mbuf_list *);
 int    if_enqueue_ifq(struct ifnet *, struct mbuf *);
+int    if_enqueue_ifq_ml(struct ifnet *, struct mbuf_list *);
 void   if_input(struct ifnet *, struct mbuf_list *);
 void   if_vinput(struct ifnet *, struct mbuf *);
 void   if_input_process(struct ifnet *, struct mbuf_list *);
Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.41
diff -u -p -r1.41 ifq.c
--- net/ifq.c   7 Jul 2020 00:00:03 -0000       1.41
+++ net/ifq.c   15 Dec 2020 09:37:36 -0000
@@ -42,6 +42,7 @@
  */
 unsigned int    priq_idx(unsigned int, const struct mbuf *);
 struct mbuf    *priq_enq(struct ifqueue *, struct mbuf *);
+void            priq_enq_ml(struct ifqueue *, struct mbuf_list *);
 struct mbuf    *priq_deq_begin(struct ifqueue *, void **);
 void            priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
 void            priq_purge(struct ifqueue *, struct mbuf_list *);
@@ -52,6 +53,7 @@ void           priq_free(unsigned int, void *);
 const struct ifq_ops priq_ops = {
        priq_idx,
        priq_enq,
+       priq_enq_ml,
        priq_deq_begin,
        priq_deq_commit,
        priq_purge,
@@ -359,27 +361,45 @@ ifq_add_data(struct ifqueue *ifq, struct
 int
 ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
 {
-       struct mbuf *dm;
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+       ml_enqueue(&ml, m);
+       return (ifq_enqueue_ml(ifq, &ml));
+}
+
+int
+ifq_enqueue_ml(struct ifqueue *ifq, struct mbuf_list *ml)
+{
+       struct mbuf *m;
+       int error = 0;
+
+       m = MBUF_LIST_FIRST(ml);
 
        mtx_enter(&ifq->ifq_mtx);
-       dm = ifq->ifq_ops->ifqop_enq(ifq, m);
-       if (dm != m) {
-               ifq->ifq_packets++;
+
+       /* pre-stats */
+       ifq->ifq_packets += ml_len(ml);
+       ifq->ifq_len += ml_len(ml);
+       if (ISSET(m->m_flags, M_MCAST))
+               ifq->ifq_mcasts += ml_len(ml);
+       MBUF_LIST_FOREACH(ml, m)
                ifq->ifq_bytes += m->m_pkthdr.len;
-               if (ISSET(m->m_flags, M_MCAST))
-                       ifq->ifq_mcasts++;
-       }
 
-       if (dm == NULL)
-               ifq->ifq_len++;
-       else
-               ifq->ifq_qdrops++;
+       ifq->ifq_ops->ifqop_enq_ml(ifq, ml);
+       if (!ml_empty(ml))
+               error = ENOBUFS;
+
+       /* post-stats */
+       ifq->ifq_len -= ml_len(ml);
+       ifq->ifq_qdrops -= ml_len(ml);
+       MBUF_LIST_FOREACH(ml, m)
+               ifq->ifq_bytes -= m->m_pkthdr.len;
+
        mtx_leave(&ifq->ifq_mtx);
 
-       if (dm != NULL)
-               m_freem(dm);
+       ml_purge(ml);
 
-       return (dm == m ? ENOBUFS : 0);
+       return error;
 }
 
 static inline void
@@ -841,38 +861,54 @@ priq_free(unsigned int idx, void *pq)
        free(pq, M_DEVBUF, sizeof(struct priq));
 }
 
-struct mbuf *
-priq_enq(struct ifqueue *ifq, struct mbuf *m)
+void
+priq_enq_ml(struct ifqueue *ifq, struct mbuf_list *ml)
 {
        struct priq *pq;
        struct mbuf_list *pl;
-       struct mbuf *n = NULL;
-       unsigned int prio;
+       struct mbuf *m;
+       unsigned int prio, free, rest, len;
 
-       pq = ifq->ifq_q;
+       m = MBUF_LIST_FIRST(ml);
        KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
 
+       pq = ifq->ifq_q;
+       pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
+
+       free = ifq->ifq_maxlen - ifq_len(ifq);
+
+       if (ml_len(ml) < free) {
+               ml_enlist(pl, ml);
+               return;
+       }
+
+       for (; free > 0; free--)
+               ml_enqueue(pl, ml_dequeue(ml));
+       rest = ml_len(ml);
+
        /* Find a lower priority queue to drop from */
-       if (ifq_len(ifq) >= ifq->ifq_maxlen) {
-               for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
-                       pl = &pq->pq_lists[prio];
-                       if (ml_len(pl) > 0) {
-                               n = ml_dequeue(pl);
-                               goto enqueue;
-                       }
+       for (prio = 0; prio < m->m_pkthdr.pf.prio && rest > 0; prio++) {
+               pl = &pq->pq_lists[prio];
+               len = ml_len(pl);
+
+               while (len > 0 && rest > 0) {
+                       ml_enqueue(ml, ml_dequeue(pl));
+                       len--;
+                       ml_enqueue(pl, ml_dequeue(ml));
+                       rest--;
                }
-               /*
-                * There's no lower priority queue that we can
-                * drop from so don't enqueue this one.
-                */
-               return (m);
        }
+}
 
- enqueue:
-       pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
-       ml_enqueue(pl, m);
+struct mbuf *
+priq_enq(struct ifqueue *ifq, struct mbuf *m)
+{
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+       ml_enqueue(&ml, m);
+       priq_enq_ml(ifq, &ml);
 
-       return (n);
+       return (MBUF_LIST_FIRST(&ml));
 }
 
 struct mbuf *
Index: net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
retrieving revision 1.32
diff -u -p -r1.32 ifq.h
--- net/ifq.h   7 Jul 2020 00:00:03 -0000       1.32
+++ net/ifq.h   15 Dec 2020 09:37:36 -0000
@@ -168,6 +168,12 @@ struct ifiqueue {
  * current traffic conditioner may drop a packet to make space on the
  * queue.
  *
+ * === ifq_enqueue_ml()
+ *
+ * ifq_enqueue_ml() attempts to fit an mbuf_list onto the ifqueue. The
+ * current traffic conditioner may drop a packet to make space on the
+ * queue.
+ *
  * === ifq_start()
  *
  * Once a packet has been successfully queued with ifq_enqueue(),
@@ -402,6 +408,7 @@ struct ifq_ops {
        unsigned int             (*ifqop_idx)(unsigned int,
                                    const struct mbuf *);
        struct mbuf             *(*ifqop_enq)(struct ifqueue *, struct mbuf *);
+       void                     (*ifqop_enq_ml)(struct ifqueue *, struct 
mbuf_list *);
        struct mbuf             *(*ifqop_deq_begin)(struct ifqueue *, void **);
        void                     (*ifqop_deq_commit)(struct ifqueue *,
                                    struct mbuf *, void *);
@@ -422,6 +429,7 @@ void                 ifq_attach(struct ifqueue *, cons
 void            ifq_destroy(struct ifqueue *);
 void            ifq_add_data(struct ifqueue *, struct if_data *);
 int             ifq_enqueue(struct ifqueue *, struct mbuf *);
+int             ifq_enqueue_ml(struct ifqueue *, struct mbuf_list *);
 void            ifq_start(struct ifqueue *);
 struct mbuf    *ifq_deq_begin(struct ifqueue *);
 void            ifq_deq_commit(struct ifqueue *, struct mbuf *);
Index: netinet/if_ether.h
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.h,v
retrieving revision 1.78
diff -u -p -r1.78 if_ether.h
--- netinet/if_ether.h  22 Jul 2020 02:16:02 -0000      1.78
+++ netinet/if_ether.h  15 Dec 2020 09:37:36 -0000
@@ -256,10 +256,11 @@ int       ether_ioctl(struct ifnet *, struct a
 void   ether_input(struct ifnet *, struct mbuf *);
 int    ether_resolve(struct ifnet *, struct mbuf *, struct sockaddr *,
            struct rtentry *, struct ether_header *);
-struct mbuf *
-       ether_encap(struct ifnet *, struct mbuf *, struct sockaddr *,
-           struct rtentry *, int *);
+int    ether_encap(struct ifnet *, struct mbuf_list *, struct sockaddr *,
+           struct rtentry *);
 int    ether_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+           struct rtentry *);
+int    ether_output_ml(struct ifnet *, struct mbuf_list *, struct sockaddr *,
            struct rtentry *);
 void   ether_rtrequest(struct ifnet *, int, struct rtentry *);
 char   *ether_sprintf(u_char *);
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.357
diff -u -p -r1.357 ip_output.c
--- netinet/ip_output.c 24 Jun 2020 22:03:43 -0000      1.357
+++ netinet/ip_output.c 15 Dec 2020 09:37:36 -0000
@@ -87,6 +87,17 @@ int
 ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int);
 #endif /* IPSEC */
 
+int
+ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
+    int flags, struct ip_moptions *imo, struct inpcb *inp,
+    u_int32_t ipsecflowinfo)
+{
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
+
+       ml_enqueue(&ml, m);
+       return (ip_output_ml(&ml, opt, ro, flags, imo, inp, ipsecflowinfo));
+}
+
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
@@ -94,12 +105,13 @@ ip_output_ipsec_send(struct tdb *, struc
  * The mbuf opt, if present, will not be freed.
  */
 int
-ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
-    struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo)
+ip_output_ml(struct mbuf_list *ml, struct mbuf *opt, struct route *ro,
+    int flags, struct ip_moptions *imo, struct inpcb *inp,
+    u_int32_t ipsecflowinfo)
 {
        struct ip *ip;
        struct ifnet *ifp = NULL;
-       struct mbuf *m = m0;
+       struct mbuf *m, *n;
        int hlen = sizeof (struct ip);
        int error = 0;
        struct route iproute;
@@ -118,24 +130,37 @@ ip_output(struct mbuf *m0, struct mbuf *
 #endif /* IPSEC */
 
 #ifdef DIAGNOSTIC
-       if ((m->m_flags & M_PKTHDR) == 0)
-               panic("ip_output no HDR");
+       MBUF_LIST_FOREACH(ml, m) {
+               if ((m->m_flags & M_PKTHDR) == 0)
+                       panic("ip_output no HDR");
+       }
 #endif
-       if (opt)
-               m = ip_insertoptions(m, opt, &hlen);
-
-       ip = mtod(m, struct ip *);
+       if (opt) {
+               for (m = ml_dechain(ml); m; m = n) {
+                       n = m->m_nextpkt;
+                       m->m_nextpkt = NULL;
+                       m = ip_insertoptions(m, opt, &hlen);
+                       ml_enqueue(ml, m);
+               }
+       }
 
        /*
         * Fill in IP header.
         */
        if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
-               ip->ip_v = IPVERSION;
-               ip->ip_off &= htons(IP_DF);
-               ip->ip_id = htons(ip_randomid());
-               ip->ip_hl = hlen >> 2;
-               ipstat_inc(ips_localout);
+               MBUF_LIST_FOREACH(ml, m) {
+                       ip = mtod(m, struct ip *);
+                       ip->ip_v = IPVERSION;
+                       ip->ip_off &= htons(IP_DF);
+                       ip->ip_id = htons(ip_randomid());
+                       ip->ip_hl = hlen >> 2;
+                       ipstat_inc(ips_localout);
+               }
+               m = MBUF_LIST_FIRST(ml);
+               ip = mtod(m, struct ip *);
        } else {
+               m = MBUF_LIST_FIRST(ml);
+               ip = mtod(m, struct ip *);
                hlen = ip->ip_hl << 2;
        }
 
@@ -231,8 +256,14 @@ reroute:
                        dst = satosin(ro->ro_rt->rt_gateway);
 
                /* Set the source IP address */
-               if (ip->ip_src.s_addr == INADDR_ANY && ia)
-                       ip->ip_src = ia->ia_addr.sin_addr;
+               if (ip->ip_src.s_addr == INADDR_ANY && ia) {
+                       MBUF_LIST_FOREACH(ml, m) {
+                               ip = mtod(m, struct ip *);
+                               ip->ip_src = ia->ia_addr.sin_addr;
+                       }
+                       m = MBUF_LIST_FIRST(ml);
+                       ip = mtod(m, struct ip *);
+               }
        }
 
 #ifdef IPSEC
@@ -244,15 +275,18 @@ reroute:
                        /* Should silently drop packet */
                        if (error == -EINVAL)
                                error = 0;
-                       m_freem(m);
-                       goto done;
+                       goto bad;
                }
                if (tdb != NULL) {
                        /*
                         * If it needs TCP/UDP hardware-checksumming, do the
                         * computation now.
                         */
-                       in_proto_cksum_out(m, NULL);
+                       MBUF_LIST_FOREACH(ml, m) {
+                               in_proto_cksum_out(m, NULL);
+                       }
+                       m = MBUF_LIST_FIRST(ml);
+                       ip = mtod(m, struct ip *);
                }
        }
 #endif /* IPSEC */
@@ -260,8 +294,9 @@ reroute:
        if (IN_MULTICAST(ip->ip_dst.s_addr) ||
            (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
 
-               m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
-                       M_BCAST : M_MCAST;
+               MBUF_LIST_FOREACH(ml, m)
+                       m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
+                           M_BCAST : M_MCAST;
 
                /*
                 * IP destination address is multicast.  Make sure "dst"
@@ -273,10 +308,15 @@ reroute:
                /*
                 * See if the caller provided any multicast options
                 */
-               if (imo != NULL)
-                       ip->ip_ttl = imo->imo_ttl;
-               else
-                       ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+               MBUF_LIST_FOREACH(ml, m) {
+                       ip = mtod(m, struct ip *);
+                       if (imo != NULL)
+                               ip->ip_ttl = imo->imo_ttl;
+                       else
+                               ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+               }
+               m = MBUF_LIST_FIRST(ml);
+               ip = mtod(m, struct ip *);
 
                /*
                 * if we don't know the outgoing ifp yet, we can't generate
@@ -418,14 +458,20 @@ sendit:
         * Packet filter
         */
 #if NPF > 0
-       if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
-           ifp, &m) != PF_PASS) {
-               error = EACCES;
-               m_freem(m);
-               goto done;
+       for (m = ml_dechain(ml); m != NULL; m = n) {
+               n = m->m_nextpkt;
+               m->m_nextpkt = NULL;
+               if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
+                   ifp, &m) != PF_PASS) {
+                       error = EACCES;
+                       m_freem(m);
+               } else if (m != NULL) {
+                       ml_enqueue(ml, m);
+               }
        }
-       if (m == NULL)
+       if (ml_empty(ml))
                goto done;
+       m = MBUF_LIST_FIRST(ml);
        ip = mtod(m, struct ip *);
        hlen = ip->ip_hl << 2;
        if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) ==
@@ -441,14 +487,16 @@ sendit:
                goto reroute;
        }
 #endif
-       in_proto_cksum_out(m, ifp);
+       MBUF_LIST_FOREACH(ml, m)
+               in_proto_cksum_out(m, ifp);
+       m = MBUF_LIST_FIRST(ml);
+       ip = mtod(m, struct ip *);
 
 #ifdef IPSEC
        if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) &&
            (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) {
                error = EHOSTUNREACH;
-               m_freem(m);
-               goto done;
+               goto bad;
        }
 #endif
 
@@ -456,16 +504,29 @@ sendit:
         * If small enough for interface, can just send directly.
         */
        if (ntohs(ip->ip_len) <= mtu) {
-               ip->ip_sum = 0;
                if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-                   (ifp->if_bridgeidx == 0))
-                       m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
-               else {
-                       ipstat_inc(ips_outswcsum);
-                       ip->ip_sum = in_cksum(m, hlen);
+                   (ifp->if_bridgeidx == 0)) {
+                       MBUF_LIST_FOREACH(ml, m) {
+                               ip = mtod(m, struct ip *);
+                               ip->ip_sum = 0;
+                               m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
+                       }
+               } else {
+                       MBUF_LIST_FOREACH(ml, m) {
+                               ip = mtod(m, struct ip *);
+                               ip->ip_sum = 0;
+                               ip->ip_sum = in_cksum(m, hlen);
+                       }
+                       counters_add(ipcounters, ips_outswcsum, ml_len(ml));
                }
 
-               error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
+               if (ifp->if_output_ml != NULL)
+                       error = ifp->if_output_ml(ifp, ml, sintosa(dst), 
ro->ro_rt);
+               else if (ifp->if_output != NULL)
+                       while ((m = ml_dequeue(ml)) != NULL)
+                               error = ifp->if_output(ifp, m, sintosa(dst), 
ro->ro_rt);
+               if (error)
+                       goto bad;
                goto done;
        }
 
@@ -496,31 +557,31 @@ sendit:
                goto bad;
        }
 
-       error = ip_fragment(m, ifp, mtu);
-       if (error) {
-               m = m0 = NULL;
-               goto bad;
-       }
+       while ((m = ml_dequeue(ml)) != NULL) {
+               error = ip_fragment(m, ifp, mtu);
+               if (error)
+                       goto bad;
 
-       for (; m; m = m0) {
-               m0 = m->m_nextpkt;
-               m->m_nextpkt = 0;
+               for (; m; m = n) {
+                       n = m->m_nextpkt;
+                       m->m_nextpkt = NULL;
+                       if (error == 0)
+                               error = ifp->if_output(ifp, m, sintosa(dst),
+                                   ro->ro_rt);
+                       else
+                               m_freem(m);
+               }
                if (error == 0)
-                       error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
-               else
-                       m_freem(m);
+                       ipstat_inc(ips_fragmented);
        }
 
-       if (error == 0)
-               ipstat_inc(ips_fragmented);
-
 done:
        if (ro == &iproute && ro->ro_rt)
                rtfree(ro->ro_rt);
        if_put(ifp);
        return (error);
 bad:
-       m_freem(m0);
+       ml_purge(ml);
        goto done;
 }
 
Index: netinet/ip_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.86
diff -u -p -r1.86 ip_var.h
--- netinet/ip_var.h    8 Dec 2019 11:08:22 -0000       1.86
+++ netinet/ip_var.h    15 Dec 2020 09:37:36 -0000
@@ -227,6 +227,8 @@ struct mbuf*
         ip_insertoptions(struct mbuf *, struct mbuf *, int *);
 int     ip_mforward(struct mbuf *, struct ifnet *);
 int     ip_optcopy(struct ip *, struct ip *);
+int     ip_output_ml(struct mbuf_list *, struct mbuf *, struct route *, int,
+    struct ip_moptions *, struct inpcb *, u_int32_t);
 int     ip_output(struct mbuf *, struct mbuf *, struct route *, int,
            struct ip_moptions *, struct inpcb *, u_int32_t);
 struct mbuf *
Index: netinet/tcp_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.128
diff -u -p -r1.128 tcp_output.c
--- netinet/tcp_output.c        10 Nov 2018 18:40:34 -0000      1.128
+++ netinet/tcp_output.c        15 Dec 2020 09:37:36 -0000
@@ -193,7 +193,8 @@ tcp_output(struct tcpcb *tp)
 {
        struct socket *so = tp->t_inpcb->inp_socket;
        long len, win, txmaxseg;
-       int off, flags, error;
+       int off, flags, error = 0;
+       struct mbuf_list ml = MBUF_LIST_INITIALIZER();
        struct mbuf *m;
        struct tcphdr *th;
        u_int32_t optbuf[howmany(MAX_TCPOPTLEN, sizeof(u_int32_t))];
@@ -426,7 +427,7 @@ again:
            TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 &&
            TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) {
                TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
-               return (0);
+               goto out;
        }
 
        /*
@@ -460,7 +461,7 @@ again:
        /*
         * No reason to send a segment, just return.
         */
-       return (0);
+       goto out;
 
 send:
        /*
@@ -485,7 +486,8 @@ send:
                break;
 #endif /* INET6 */
        default:
-               return (EPFNOSUPPORT);
+               error = EPFNOSUPPORT;
+               goto out;
        }
 
        if (flags & TH_SYN) {
@@ -873,13 +875,15 @@ send:
                    0, &src, &dst, IPPROTO_TCP);
                if (tdb == NULL) {
                        m_freem(m);
-                       return (EPERM);
+                       error = EPERM;
+                       goto out;
                }
 
                if (tcp_signature(tdb, tp->pf, m, th, iphlen, 0,
                    mtod(m, caddr_t) + hdrlen - optlen + sigoff) < 0) {
                        m_freem(m);
-                       return (EINVAL);
+                       error = EINVAL;
+                       goto out;
                }
        }
 #endif /* TCP_SIGNATURE */
@@ -1037,9 +1041,6 @@ send:
                                ip->ip_tos |= IPTOS_ECN_ECT0;
 #endif
                }
-               error = ip_output(m, tp->t_inpcb->inp_options,
-                       &tp->t_inpcb->inp_route,
-                       (ip_mtudisc ? IP_MTUDISC : 0), NULL, tp->t_inpcb, 0);
                break;
 #ifdef INET6
        case AF_INET6:
@@ -1057,45 +1058,11 @@ send:
                                ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
 #endif
                }
-               error = ip6_output(m, tp->t_inpcb->inp_outputopts6,
-                         &tp->t_inpcb->inp_route6,
-                         0, NULL, tp->t_inpcb);
                break;
 #endif /* INET6 */
        }
 
-       if (error) {
-out:
-               if (error == ENOBUFS) {
-                       /*
-                        * If the interface queue is full, or IP cannot
-                        * get an mbuf, trigger TCP slow start.
-                        */
-                       tp->snd_cwnd = tp->t_maxseg;
-                       return (0);
-               }
-               if (error == EMSGSIZE) {
-                       /*
-                        * ip_output() will have already fixed the route
-                        * for us.  tcp_mtudisc() will, as its last action,
-                        * initiate retransmission, so it is important to
-                        * not do so here.
-                        */
-                       tcp_mtudisc(tp->t_inpcb, -1);
-                       return (0);
-               }
-               if ((error == EHOSTUNREACH || error == ENETDOWN) &&
-                   TCPS_HAVERCVDSYN(tp->t_state)) {
-                       tp->t_softerror = error;
-                       return (0);
-               }
-
-               /* Restart the delayed ACK timer, if necessary. */
-               if (TCP_TIMER_ISARMED(tp, TCPT_DELACK))
-                       TCP_TIMER_ARM_MSEC(tp, TCPT_DELACK, tcp_delack_msecs);
-
-               return (error);
-       }
+       ml_enqueue(&ml, m);
 
        if (packetlen > tp->t_pmtud_mtu_sent)
                tp->t_pmtud_mtu_sent = packetlen;
@@ -1117,7 +1084,61 @@ out:
        TCP_TIMER_DISARM(tp, TCPT_DELACK);
        if (sendalot && --maxburst)
                goto again;
-       return (0);
+
+ out:
+       if (!ml_empty(&ml)) {
+               int outerr;
+
+               switch (tp->pf) {
+               case 0:     /*default to PF_INET*/
+               case AF_INET:
+                       outerr = ip_output_ml(&ml,
+                           tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+                           (ip_mtudisc ? IP_MTUDISC : 0), NULL, tp->t_inpcb,
+                           0);
+                       break;
+#ifdef INET6
+               case AF_INET6:
+                       outerr = ip6_output_ml(&ml,
+                           tp->t_inpcb->inp_outputopts6,
+                           &tp->t_inpcb->inp_route6, 0, NULL, tp->t_inpcb);
+                       break;
+#endif /* INET6 */
+               }
+               if (error == 0)
+                       error = outerr;
+       }
+       if (error == 0)
+               return (0);
+       if (error == ENOBUFS) {
+               /*
+                * If the interface queue is full, or IP cannot
+                * get an mbuf, trigger TCP slow start.
+                */
+               tp->snd_cwnd = tp->t_maxseg;
+               return (0);
+       }
+       if (error == EMSGSIZE) {
+               /*
+                * ip_output() will have already fixed the route
+                * for us.  tcp_mtudisc() will, as its last action,
+                * initiate retransmission, so it is important to
+                * not do so here.
+                */
+               tcp_mtudisc(tp->t_inpcb, -1);
+               return (0);
+       }
+       if ((error == EHOSTUNREACH || error == ENETDOWN) &&
+           TCPS_HAVERCVDSYN(tp->t_state)) {
+               tp->t_softerror = error;
+               return (0);
+       }
+
+       /* Restart the delayed ACK timer, if necessary. */
+       if (TCP_TIMER_ISARMED(tp, TCPT_DELACK))
+               TCP_TIMER_ARM_MSEC(tp, TCPT_DELACK, tcp_delack_msecs);
+
+       return (error);
 }
 
 void
Index: netinet6/ip6_id.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_id.c,v
retrieving revision 1.14
diff -u -p -r1.14 ip6_id.c
--- netinet6/ip6_id.c   24 Jun 2020 22:03:44 -0000      1.14
+++ netinet6/ip6_id.c   15 Dec 2020 09:37:36 -0000
@@ -82,6 +82,7 @@
  */
 
 #include <sys/param.h>
+#include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
Index: netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.247
diff -u -p -r1.247 ip6_output.c
--- netinet6/ip6_output.c       17 Jul 2020 15:21:36 -0000      1.247
+++ netinet6/ip6_output.c       15 Dec 2020 09:37:36 -0000
@@ -136,6 +136,23 @@ void in6_delayed_cksum(struct mbuf *, u_
 /* Context for non-repeating IDs */
 struct idgen32_ctx ip6_id_ctx;
 
+int
+ip6_output_ml(struct mbuf_list *ml, struct ip6_pktopts *opt,
+    struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
+    struct inpcb *inp)
+{
+       struct mbuf *m;
+       int error = 0;
+
+       while ((m = ml_dequeue(ml)) != NULL) {
+               error = ip6_output(m, opt, ro, flags, im6o, inp);
+               if (error)
+                       break;
+       }
+       ml_purge(ml);
+       return (error);
+}
+
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
Index: netinet6/ip6_var.h
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.86
diff -u -p -r1.86 ip6_var.h
--- netinet6/ip6_var.h  8 Dec 2019 11:08:22 -0000       1.86
+++ netinet6/ip6_var.h  15 Dec 2020 09:37:36 -0000
@@ -320,6 +320,8 @@ int ip6_sysctl(int *, u_int, void *, siz
 void   ip6_forward(struct mbuf *, struct rtentry *, int);
 
 void   ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
+int    ip6_output_ml(struct mbuf_list *, struct ip6_pktopts *,
+    struct route_in6 *, int, struct ip6_moptions *, struct inpcb *);
 int    ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int,
            struct ip6_moptions *, struct inpcb *);
 int    ip6_fragment(struct mbuf *, int, u_char, u_long);

Reply via email to