Hi, I would like to commit the TCP/IP layer for TCP segmentation offload.
The driver is not part of this diff, it should be reviewed separately. Also the ifconfig flags and pseudo interfaces capabilities discussion is excluded. I kept the wrong names IFXF_TSO and IFCAP_TSO as jan@'s diff will make them consistent. The numbers for the defines are not changed, so my commit should not break anything. This diff has no effect, as drivers do not set TSO capabilites yet. I would like to commit it anyway to see that nothing else breaks. Note that hardware might support IPv4 only, so add two separate capabilities. ok? bluhm Index: net/if.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v retrieving revision 1.211 diff -u -p -r1.211 if.h --- net/if.h 7 Mar 2023 20:09:48 -0000 1.211 +++ net/if.h 14 May 2023 19:56:02 -0000 @@ -231,7 +231,7 @@ struct if_status_description { #define IFXF_INET6_NOSOII 0x40 /* [N] don't do RFC 7217 */ #define IFXF_AUTOCONF4 0x80 /* [N] v4 autoconf (aka dhcp) enabled */ #define IFXF_MONITOR 0x100 /* [N] only used for bpf */ -#define IFXF_TSO 0x200 /* [N] TCP segment offloading */ +#define IFXF_TSO 0x200 /* [N] XXX missnamed, should be LRO */ #define IFXF_CANTCHANGE \ (IFXF_MPSAFE|IFXF_CLONED) @@ -251,7 +251,9 @@ struct if_status_description { #define IFCAP_VLAN_HWTAGGING 0x00000020 /* hardware VLAN tag support */ #define IFCAP_CSUM_TCPv6 0x00000080 /* can do IPv6/TCP checksums */ #define IFCAP_CSUM_UDPv6 0x00000100 /* can do IPv6/UDP checksums */ -#define IFCAP_TSO 0x00004000 /* TCP segment offloading */ +#define IFCAP_TSOv4 0x00001000 /* IPv4/TCP segment offload */ +#define IFCAP_TSOv6 0x00002000 /* IPv6/TCP segment offload */ +#define IFCAP_TSO 0x00004000 /* XXX should be LRO */ #define IFCAP_WOL 0x00008000 /* can do wake on lan */ #define IFCAP_CSUM_MASK (IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | \ Index: net/pf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v retrieving revision 1.1179 diff -u -p -r1.1179 pf.c --- net/pf.c 13 May 2023 13:35:17 -0000 1.1179 +++ net/pf.c 14 May 2023 19:56:02 -0000 @@ -6555,15 +6555,9 @@ pf_route(struct pf_pdesc *pd, struct pf_ goto done; } - if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && - m0->m_pkthdr.ph_mss <= ifp->if_mtu) { - if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || - if_output_ml(ifp, &ml, sintosa(dst), rt)) - goto done; - tcpstat_inc(tcps_outswtso); + if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt, + IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL) goto done; - } - CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); /* * Too large for interface; fragment if possible. @@ -6598,7 +6592,6 @@ void pf_route6(struct pf_pdesc *pd, struct pf_state *st) { struct mbuf *m0; - struct mbuf_list ml; struct sockaddr_in6 *dst, sin6; struct rtentry *rt = NULL; struct ip6_hdr *ip6; @@ -6696,15 +6689,9 @@ pf_route6(struct pf_pdesc *pd, struct pf goto done; } - if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && - m0->m_pkthdr.ph_mss <= ifp->if_mtu) { - if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || - if_output_ml(ifp, &ml, sin6tosa(dst), rt)) - goto done; - tcpstat_inc(tcps_outswtso); + if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt, + IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL) goto done; - } - CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); ip6stat_inc(ip6s_cantfrag); if (st->rt != PF_DUPTO) Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v retrieving revision 1.386 diff -u -p -r1.386 ip_output.c --- netinet/ip_output.c 13 May 2023 13:35:17 -0000 1.386 +++ netinet/ip_output.c 14 May 2023 19:56:02 -0000 @@ -460,15 +460,10 @@ sendit: goto done; } - if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && - m->m_pkthdr.ph_mss <= mtu) { - if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) || - (error = if_output_ml(ifp, &ml, sintosa(dst), ro->ro_rt))) - goto done; - tcpstat_inc(tcps_outswtso); + error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, + IFCAP_TSOv4, mtu); + if (error || m == NULL) goto done; - } - CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); /* * Too large for interface; fragment if possible. @@ -1887,10 +1882,15 @@ in_proto_cksum_out(struct mbuf *m, struc u_int16_t csum = 0, offset; offset = ip->ip_hl << 2; - if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + csum = in_cksum_phdr(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htonl(ip->ip_p)); + } else if (ISSET(m->m_pkthdr.csum_flags, + M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) { csum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - offset + ip->ip_p)); + } if (ip->ip_p == IPPROTO_TCP) offset += offsetof(struct tcphdr, th_sum); else if (ip->ip_p == IPPROTO_UDP) Index: netinet/tcp_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v retrieving revision 1.137 diff -u -p -r1.137 tcp_output.c --- netinet/tcp_output.c 13 May 2023 13:35:18 -0000 1.137 +++ netinet/tcp_output.c 14 May 2023 19:56:02 -0000 @@ -80,6 +80,7 @@ #include <sys/kernel.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> #if NPF > 0 #include <net/pfvar.h> @@ -753,7 +754,7 @@ send: /* Enable TSO and specify the size of the resulting segments. */ if (tso) { - m->m_pkthdr.csum_flags |= M_TCP_TSO; + SET(m->m_pkthdr.csum_flags, M_TCP_TSO); m->m_pkthdr.ph_mss = tp->t_maxseg; } @@ -1347,5 +1348,45 @@ tcp_chopper(struct mbuf *m0, struct mbuf bad: tcpstat_inc(tcps_outbadtso); ml_purge(ml); + return error; +} + +int +tcp_if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, + struct rtentry *rt, uint32_t ifcap, u_int mtu) +{ + struct mbuf_list ml; + int error; + + /* caller must fail later or fragment */ + if (!ISSET((*mp)->m_pkthdr.csum_flags, M_TCP_TSO)) + return 0; + if ((*mp)->m_pkthdr.ph_mss > mtu) { + CLR((*mp)->m_pkthdr.csum_flags, M_TCP_TSO); + return 0; + } + + /* network interface hardware will do TSO */ + if (in_ifcap_cksum(*mp, ifp, ifcap)) { + if (ISSET(ifcap, IFCAP_TSOv4)) { + in_hdr_cksum_out(*mp, ifp); + in_proto_cksum_out(*mp, ifp); + } + if (ISSET(ifcap, IFCAP_TSOv6)) + in6_proto_cksum_out(*mp, ifp); + error = ifp->if_output(ifp, *mp, dst, rt); + if (!error) + tcpstat_inc(tcps_outhwtso); + goto done; + } + + /* as fallback do TSO in software */ + if ((error = tcp_chopper(*mp, &ml, ifp, (*mp)->m_pkthdr.ph_mss)) || + (error = if_output_ml(ifp, &ml, dst, rt))) + goto done; + tcpstat_inc(tcps_outswtso); + + done: + *mp = NULL; return error; } Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.164 diff -u -p -r1.164 tcp_var.h --- netinet/tcp_var.h 10 May 2023 12:07:16 -0000 1.164 +++ netinet/tcp_var.h 14 May 2023 19:56:02 -0000 @@ -719,6 +719,8 @@ struct tcpcb * void tcp_notify(struct inpcb *, int); int tcp_output(struct tcpcb *); int tcp_chopper(struct mbuf *, struct mbuf_list *, struct ifnet *, u_int); +int tcp_if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *, + struct rtentry *, uint32_t, u_int); void tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int); int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *); void tcp_rscale(struct tcpcb *, u_long); Index: netinet6/ip6_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.275 diff -u -p -r1.275 ip6_output.c --- netinet6/ip6_output.c 10 May 2023 12:07:17 -0000 1.275 +++ netinet6/ip6_output.c 14 May 2023 19:56:02 -0000 @@ -706,15 +706,10 @@ reroute: goto done; } - if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && - m->m_pkthdr.ph_mss <= mtu) { - if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) || - (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt))) - goto done; - tcpstat_inc(tcps_outswtso); + error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt, + IFCAP_TSOv6, mtu); + if (error || m == NULL) goto done; - } - CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); /* * try to fragment the packet. case 1-b @@ -2715,8 +2710,13 @@ in6_proto_cksum_out(struct mbuf *m, stru u_int16_t csum; offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); - csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, - htonl(m->m_pkthdr.len - offset), htonl(nxt)); + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, + htonl(0), htonl(nxt)); + } else { + csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, + htonl(m->m_pkthdr.len - offset), htonl(nxt)); + } if (nxt == IPPROTO_TCP) offset += offsetof(struct tcphdr, th_sum); else if (nxt == IPPROTO_UDP)