Hi,

As final step before making LRO (Large Receive Offload) the default,
we have to fix path MTU discovery when forwarding.

The drivers, currently ix(4) and lo(4) only, record an upper bound
of the size of the original packets in ph_mss.  When sending we
must chop the packets with TSO (TCP Segmentation Offload) to that
size.  That means we have to call tcp_if_output_tso() before
ifp->if_output().  I have put that logic into if_output_tso() to
avoid code duplication.

ok?

bluhm

Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.702
diff -u -p -r1.702 if.c
--- net/if.c    2 Jul 2023 19:59:15 -0000       1.702
+++ net/if.c    3 Jul 2023 10:28:30 -0000
@@ -109,6 +109,9 @@
 #include <netinet/tcp.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 
 #ifdef INET6
 #include <netinet6/in6_var.h>
@@ -883,6 +886,57 @@ if_output_ml(struct ifnet *ifp, struct m
                ml_purge(ml);
 
        return error;
+}
+
+int
+if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst,
+    struct rtentry *rt, u_int mtu)
+{
+       uint32_t ifcap;
+       int error;
+
+       switch (dst->sa_family) {
+       case AF_INET:
+               ifcap = IFCAP_TSOv4;
+               break;
+#ifdef INET6
+       case AF_INET6:
+               ifcap = IFCAP_TSOv6;
+               break;
+#endif
+       default:
+               unhandled_af(dst->sa_family);
+       }
+
+       /*
+        * Try to send with TSO first.  When forwarding LRO may set
+        * maximium segment size in mbuf header.  Chop TCP segment
+        * even if it would fit interface MTU to preserve maximum
+        * path MTU.
+        */
+       error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu);
+       if (error || *mp == NULL)
+               return error;
+
+       if ((*mp)->m_pkthdr.len <= mtu) {
+               switch (dst->sa_family) {
+               case AF_INET:
+                       in_hdr_cksum_out(*mp, ifp);
+                       in_proto_cksum_out(*mp, ifp);
+                       break;
+#ifdef INET6
+               case AF_INET6:
+                       in6_proto_cksum_out(*mp, ifp);
+                       break;
+#endif
+               }
+               error = ifp->if_output(ifp, *mp, dst, rt);
+               *mp = NULL;
+               return error;
+       }
+
+       /* mp still contains mbuf that has to be fragmented or dropped. */
+       return 0;
 }
 
 int
Index: net/if_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
retrieving revision 1.128
diff -u -p -r1.128 if_var.h
--- net/if_var.h        28 Jun 2023 11:49:49 -0000      1.128
+++ net/if_var.h        3 Jul 2023 10:04:17 -0000
@@ -329,6 +329,8 @@ int if_output_ml(struct ifnet *, struct 
            struct sockaddr *, struct rtentry *);
 int    if_output_mq(struct ifnet *, struct mbuf_queue *, unsigned int *,
            struct sockaddr *, struct rtentry *);
+int    if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *,
+           struct rtentry *, u_int);
 int    if_output_local(struct ifnet *, struct mbuf *, sa_family_t);
 void   if_rtrequest_dummy(struct ifnet *, int, struct rtentry *);
 void   p2p_rtrequest(struct ifnet *, int, struct rtentry *);
Index: net/pf.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1181
diff -u -p -r1.1181 pf.c
--- net/pf.c    5 Jun 2023 08:37:27 -0000       1.1181
+++ net/pf.c    3 Jul 2023 10:04:17 -0000
@@ -6551,15 +6551,8 @@ pf_route(struct pf_pdesc *pd, struct pf_
                ip = mtod(m0, struct ip *);
        }
 
-       if (ntohs(ip->ip_len) <= ifp->if_mtu) {
-               in_hdr_cksum_out(m0, ifp);
-               in_proto_cksum_out(m0, ifp);
-               ifp->if_output(ifp, m0, sintosa(dst), rt);
-               goto done;
-       }
-
-       if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt,
-           IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL)
+       if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) ||
+           m0 == NULL)
                goto done;
 
        /*
@@ -6686,14 +6679,8 @@ pf_route6(struct pf_pdesc *pd, struct pf
                goto done;
        }
 
-       if (m0->m_pkthdr.len <= ifp->if_mtu) {
-               in6_proto_cksum_out(m0, ifp);
-               ifp->if_output(ifp, m0, sin6tosa(dst), rt);
-               goto done;
-       }
-
-       if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt,
-           IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL)
+       if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) ||
+           m0 == NULL)
                goto done;
 
        ip6stat_inc(ip6s_cantfrag);
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.388
diff -u -p -r1.388 ip_output.c
--- netinet/ip_output.c 22 May 2023 16:08:34 -0000      1.388
+++ netinet/ip_output.c 3 Jul 2023 10:04:17 -0000
@@ -451,17 +451,9 @@ sendit:
 #endif
 
        /*
-        * If small enough for interface, can just send directly.
+        * If TSO or small enough for interface, can just send directly.
         */
-       if (ntohs(ip->ip_len) <= mtu) {
-               in_hdr_cksum_out(m, ifp);
-               in_proto_cksum_out(m, ifp);
-               error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
-               goto done;
-       }
-
-       error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt,
-           IFCAP_TSOv4, mtu);
+       error = if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, mtu);
        if (error || m == NULL)
                goto done;
 
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.111
diff -u -p -r1.111 ip6_forward.c
--- netinet6/ip6_forward.c      16 Jun 2023 19:18:56 -0000      1.111
+++ netinet6/ip6_forward.c      3 Jul 2023 10:04:17 -0000
@@ -319,25 +319,13 @@ reroute:
        }
 #endif
 
-       error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6,
-           ifp->if_mtu);
+       error = if_output_tso(ifp, &m, sin6tosa(sin6), rt, ifp->if_mtu);
        if (error)
                ip6stat_inc(ip6s_cantforward);
        else if (m == NULL)
                ip6stat_inc(ip6s_forward);
        if (error || m == NULL)
                goto senderr;
-
-       /* Check the size after pf_test to give pf a chance to refragment. */
-       if (m->m_pkthdr.len <= ifp->if_mtu) {
-               in6_proto_cksum_out(m, ifp);
-               error = ifp->if_output(ifp, m, sin6tosa(sin6), rt);
-               if (error)
-                       ip6stat_inc(ip6s_cantforward);
-               else
-                       ip6stat_inc(ip6s_forward);
-               goto senderr;
-       }
 
        if (mcopy != NULL)
                icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.278
diff -u -p -r1.278 ip6_output.c
--- netinet6/ip6_output.c       13 Jun 2023 19:34:12 -0000      1.278
+++ netinet6/ip6_output.c       3 Jul 2023 11:07:13 -0000
@@ -677,7 +677,8 @@ reroute:
         * 2-a: send as is if tlen <= interface mtu
         * 2-b: error if tlen > interface mtu
         */
-       tlen = m->m_pkthdr.len;
+       tlen = ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ?
+           m->m_pkthdr.ph_mss : m->m_pkthdr.len;
 
        if (ISSET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT)) {
                CLR(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
@@ -686,9 +687,8 @@ reroute:
                dontfrag = 1;
        else
                dontfrag = 0;
-       if (dontfrag &&                                 /* case 2-b */
-           (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ?
-           m->m_pkthdr.ph_mss : tlen) > ifp->if_mtu) {
+
+       if (dontfrag && tlen > ifp->if_mtu) {           /* case 2-b */
 #ifdef IPSEC
                if (ip_mtudisc)
                        ipsec_adjust_mtu(m, mtu);
@@ -701,15 +701,12 @@ reroute:
         * transmit packet without fragmentation
         */
        if (dontfrag || tlen <= mtu) {                  /* case 1-a and 2-a */
-               in6_proto_cksum_out(m, ifp);
-               error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt);
-               goto done;
+               error = if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt,
+                   ifp->if_mtu);
+               if (error || m == NULL)
+                       goto done;
+               goto bad;                               /* should not happen */
        }
-
-       error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt,
-           IFCAP_TSOv6, mtu);
-       if (error || m == NULL)
-               goto done;
 
        /*
         * try to fragment the packet.  case 1-b

Reply via email to