Hi, As final step before making LRO (Large Receive Offload) the default, we have to fix path MTU discovery when forwarding.
The drivers, currently ix(4) and lo(4) only, record an upper bound of the size of the original packets in ph_mss. When sending we must chop the packets with TSO (TCP Segmentation Offload) to that size. That means we have to call tcp_if_output_tso() before ifp->if_output(). I have put that logic into if_output_tso() to avoid code duplication. ok? bluhm Index: net/if.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v retrieving revision 1.702 diff -u -p -r1.702 if.c --- net/if.c 2 Jul 2023 19:59:15 -0000 1.702 +++ net/if.c 3 Jul 2023 10:28:30 -0000 @@ -109,6 +109,9 @@ #include <netinet/tcp.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> +#include <netinet/tcp.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> #ifdef INET6 #include <netinet6/in6_var.h> @@ -883,6 +886,57 @@ if_output_ml(struct ifnet *ifp, struct m ml_purge(ml); return error; +} + +int +if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, + struct rtentry *rt, u_int mtu) +{ + uint32_t ifcap; + int error; + + switch (dst->sa_family) { + case AF_INET: + ifcap = IFCAP_TSOv4; + break; +#ifdef INET6 + case AF_INET6: + ifcap = IFCAP_TSOv6; + break; +#endif + default: + unhandled_af(dst->sa_family); + } + + /* + * Try to send with TSO first. When forwarding LRO may set + * maximium segment size in mbuf header. Chop TCP segment + * even if it would fit interface MTU to preserve maximum + * path MTU. + */ + error = tcp_if_output_tso(ifp, mp, dst, rt, ifcap, mtu); + if (error || *mp == NULL) + return error; + + if ((*mp)->m_pkthdr.len <= mtu) { + switch (dst->sa_family) { + case AF_INET: + in_hdr_cksum_out(*mp, ifp); + in_proto_cksum_out(*mp, ifp); + break; +#ifdef INET6 + case AF_INET6: + in6_proto_cksum_out(*mp, ifp); + break; +#endif + } + error = ifp->if_output(ifp, *mp, dst, rt); + *mp = NULL; + return error; + } + + /* mp still contains mbuf that has to be fragmented or dropped. */ + return 0; } int Index: net/if_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v retrieving revision 1.128 diff -u -p -r1.128 if_var.h --- net/if_var.h 28 Jun 2023 11:49:49 -0000 1.128 +++ net/if_var.h 3 Jul 2023 10:04:17 -0000 @@ -329,6 +329,8 @@ int if_output_ml(struct ifnet *, struct struct sockaddr *, struct rtentry *); int if_output_mq(struct ifnet *, struct mbuf_queue *, unsigned int *, struct sockaddr *, struct rtentry *); +int if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *, + struct rtentry *, u_int); int if_output_local(struct ifnet *, struct mbuf *, sa_family_t); void if_rtrequest_dummy(struct ifnet *, int, struct rtentry *); void p2p_rtrequest(struct ifnet *, int, struct rtentry *); Index: net/pf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v retrieving revision 1.1181 diff -u -p -r1.1181 pf.c --- net/pf.c 5 Jun 2023 08:37:27 -0000 1.1181 +++ net/pf.c 3 Jul 2023 10:04:17 -0000 @@ -6551,15 +6551,8 @@ pf_route(struct pf_pdesc *pd, struct pf_ ip = mtod(m0, struct ip *); } - if (ntohs(ip->ip_len) <= ifp->if_mtu) { - in_hdr_cksum_out(m0, ifp); - in_proto_cksum_out(m0, ifp); - ifp->if_output(ifp, m0, sintosa(dst), rt); - goto done; - } - - if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt, - IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL) + if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) || + m0 == NULL) goto done; /* @@ -6686,14 +6679,8 @@ pf_route6(struct pf_pdesc *pd, struct pf goto done; } - if (m0->m_pkthdr.len <= ifp->if_mtu) { - in6_proto_cksum_out(m0, ifp); - ifp->if_output(ifp, m0, sin6tosa(dst), rt); - goto done; - } - - if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt, - IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL) + if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) || + m0 == NULL) goto done; ip6stat_inc(ip6s_cantfrag); Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v retrieving revision 1.388 diff -u -p -r1.388 ip_output.c --- netinet/ip_output.c 22 May 2023 16:08:34 -0000 1.388 +++ netinet/ip_output.c 3 Jul 2023 10:04:17 -0000 @@ -451,17 +451,9 @@ sendit: #endif /* - * If small enough for interface, can just send directly. + * If TSO or small enough for interface, can just send directly. */ - if (ntohs(ip->ip_len) <= mtu) { - in_hdr_cksum_out(m, ifp); - in_proto_cksum_out(m, ifp); - error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); - goto done; - } - - error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, - IFCAP_TSOv4, mtu); + error = if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, mtu); if (error || m == NULL) goto done; Index: netinet6/ip6_forward.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v retrieving revision 1.111 diff -u -p -r1.111 ip6_forward.c --- netinet6/ip6_forward.c 16 Jun 2023 19:18:56 -0000 1.111 +++ netinet6/ip6_forward.c 3 Jul 2023 10:04:17 -0000 @@ -319,25 +319,13 @@ reroute: } #endif - error = tcp_if_output_tso(ifp, &m, sin6tosa(sin6), rt, IFCAP_TSOv6, - ifp->if_mtu); + error = if_output_tso(ifp, &m, sin6tosa(sin6), rt, ifp->if_mtu); if (error) ip6stat_inc(ip6s_cantforward); else if (m == NULL) ip6stat_inc(ip6s_forward); if (error || m == NULL) goto senderr; - - /* Check the size after pf_test to give pf a chance to refragment. */ - if (m->m_pkthdr.len <= ifp->if_mtu) { - in6_proto_cksum_out(m, ifp); - error = ifp->if_output(ifp, m, sin6tosa(sin6), rt); - if (error) - ip6stat_inc(ip6s_cantforward); - else - ip6stat_inc(ip6s_forward); - goto senderr; - } if (mcopy != NULL) icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); Index: netinet6/ip6_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.278 diff -u -p -r1.278 ip6_output.c --- netinet6/ip6_output.c 13 Jun 2023 19:34:12 -0000 1.278 +++ netinet6/ip6_output.c 3 Jul 2023 11:07:13 -0000 @@ -677,7 +677,8 @@ reroute: * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu */ - tlen = m->m_pkthdr.len; + tlen = ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ? + m->m_pkthdr.ph_mss : m->m_pkthdr.len; if (ISSET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT)) { CLR(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); @@ -686,9 +687,8 @@ reroute: dontfrag = 1; else dontfrag = 0; - if (dontfrag && /* case 2-b */ - (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) ? - m->m_pkthdr.ph_mss : tlen) > ifp->if_mtu) { + + if (dontfrag && tlen > ifp->if_mtu) { /* case 2-b */ #ifdef IPSEC if (ip_mtudisc) ipsec_adjust_mtu(m, mtu); @@ -701,15 +701,12 @@ reroute: * transmit packet without fragmentation */ if (dontfrag || tlen <= mtu) { /* case 1-a and 2-a */ - in6_proto_cksum_out(m, ifp); - error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt); - goto done; + error = if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt, + ifp->if_mtu); + if (error || m == NULL) + goto done; + goto bad; /* should not happen */ } - - error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt, - IFCAP_TSOv6, mtu); - if (error || m == NULL) - goto done; /* * try to fragment the packet. case 1-b