On Sat, May 13, 2023 at 01:32:07AM +0200, Alexander Bluhm wrote: > I have not yet investigated where the dropped counter 83 comes from. > If you see that also, please report what you did.
This is an ENOBUFS error in this chunk. /* network interface hardware will do TSO */ if (in_ifcap_cksum(*mp, ifp, ifcap)) { if (ISSET(ifcap, IFCAP_TSOv4)) { in_hdr_cksum_out(*mp, ifp); in_proto_cksum_out(*mp, ifp); } if (ISSET(ifcap, IFCAP_TSOv6)) in6_proto_cksum_out(*mp, ifp); if ((error = ifp->if_output(ifp, *mp, dst, rt))) { tcpstat_inc(tcps_outbadtso); goto done; } tcpstat_inc(tcps_outhwtso); goto done; } As the error from ifp->if_output() has nothing todo with TSO, I remove the counting there. Updated diff, please test if you have ix(4) interfaces doing TCP output. bluhm Index: dev/pci/if_ix.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.193 diff -u -p -r1.193 if_ix.c --- dev/pci/if_ix.c 28 Apr 2023 10:18:57 -0000 1.193 +++ dev/pci/if_ix.c 14 May 2023 09:11:33 -0000 @@ -1924,8 +1924,9 @@ ixgbe_setup_interface(struct ix_softc *s ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; ifp->if_capabilities |= IFCAP_CSUM_IPv4; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; if (sc->hw.mac.type != ixgbe_mac_82598EB) - ifp->if_capabilities |= IFCAP_TSO; + ifp->if_capabilities |= IFCAP_LRO; /* * Specify the media types supported by this sc and register @@ -2344,6 +2345,7 @@ ixgbe_initialize_transmit_units(struct i int i; uint64_t tdba; uint32_t txctrl; + uint32_t hlreg; /* Setup the Base and Length of the Tx Descriptor Ring */ @@ -2405,6 +2407,11 @@ ixgbe_initialize_transmit_units(struct i rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } + + /* Enable TCP/UDP padding when using TSO */ + hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg |= IXGBE_HLREG0_TXPADEN; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); } /********************************************************************* @@ -2473,16 +2480,18 @@ ixgbe_free_transmit_buffers(struct tx_ri **********************************************************************/ static inline int -ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, - uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) +ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, + uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len, + uint32_t *mss_l4len_idx) { struct ether_extracted ext; int offload = 0; - uint32_t iphlen; + uint32_t ethlen, iphlen; ether_extract_headers(mp, &ext); + ethlen = sizeof(*ext.eh); - *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + *vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT); if (ext.ip4) { iphlen = ext.ip4->ip_hl << 2; @@ -2500,6 +2509,8 @@ ixgbe_csum_offload(struct mbuf *mp, uint *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; #endif } else { + if (mp->m_pkthdr.csum_flags & M_TCP_TSO) + tcpstat_inc(tcps_outbadtso); return offload; } @@ -2519,6 +2530,32 @@ ixgbe_csum_offload(struct mbuf *mp, uint } } + if (mp->m_pkthdr.csum_flags & M_TCP_TSO) { + if (ext.tcp) { + uint32_t pktlen, hdrlen, thlen, outlen; + + thlen = ext.tcp->th_off << 2; + + *mss_l4len_idx |= (uint32_t)(mp->m_pkthdr.ph_mss + << IXGBE_ADVTXD_MSS_SHIFT); + *mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT; + + hdrlen = ethlen + iphlen + thlen; + pktlen = mp->m_pkthdr.len - hdrlen; + CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK + << IXGBE_ADVTXD_PAYLEN_SHIFT); + *olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; + + *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; + offload = 1; + + outlen = hdrlen + mp->m_pkthdr.ph_mss; + tcpstat_add(tcps_outpkttso, + (pktlen + outlen - 1) / outlen); + } else + tcpstat_inc(tcps_outbadtso); + } + return offload; } @@ -2529,6 +2566,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct ixgbe_adv_tx_context_desc *TXD; struct ixgbe_tx_buf *tx_buffer; uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0; + uint32_t mss_l4len_idx = 0; int ctxd = txr->next_avail_desc; int offload = 0; @@ -2544,8 +2582,8 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, } #endif - offload |= ixgbe_csum_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl, - olinfo_status); + offload |= ixgbe_tx_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl, + olinfo_status, cmd_type_len, &mss_l4len_idx); if (!offload) return (0); @@ -2559,7 +2597,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); tx_buffer->m_head = NULL; tx_buffer->eop_index = -1; @@ -2868,18 +2906,20 @@ ixgbe_initialize_receive_units(struct ix } IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - /* Always enable jumbo frame reception */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + /* Always enable jumbo frame reception */ hlreg |= IXGBE_HLREG0_JUMBOEN; + /* Always enable CRC stripping */ + hlreg |= IXGBE_HLREG0_RXCRCSTRP; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); - if (ISSET(ifp->if_xflags, IFXF_TSO)) { + if (ISSET(ifp->if_xflags, IFXF_LRO)) { rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); /* This field has to be set to zero. */ rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - /* Enable TSO Receive Offloading */ + /* RSC Coalescing on ACK Change */ rdrxctl |= IXGBE_RDRXCTL_RSCACKC; rdrxctl |= IXGBE_RDRXCTL_FCOE_WRFIX; @@ -2902,10 +2942,10 @@ ixgbe_initialize_receive_units(struct ix srrctl = bufsz | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); - if (ISSET(ifp->if_xflags, IFXF_TSO)) { + if (ISSET(ifp->if_xflags, IFXF_LRO)) { rdrxctl = IXGBE_READ_REG(&sc->hw, IXGBE_RSCCTL(i)); - /* Enable TSO Receive Side Coalescing */ + /* Enable Receive Side Coalescing */ rdrxctl |= IXGBE_RSCCTL_RSCEN; rdrxctl |= IXGBE_RSCCTL_MAXDESC_16; @@ -3263,7 +3303,7 @@ ixgbe_setup_vlan_hw_support(struct ix_so * We have to disable VLAN striping when using TCP offloading, due to a * firmware bug. */ - if (ISSET(ifp->if_xflags, IFXF_TSO)) { + if (ISSET(ifp->if_xflags, IFXF_LRO)) { sc->vlan_stripping = 0; return; } Index: dev/pci/ixgbe.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/ixgbe.h,v retrieving revision 1.33 diff -u -p -r1.33 ixgbe.h --- dev/pci/ixgbe.h 8 Feb 2022 03:38:00 -0000 1.33 +++ dev/pci/ixgbe.h 14 May 2023 09:11:33 -0000 @@ -60,12 +60,18 @@ #include <net/if.h> #include <net/if_media.h> +#include <net/route.h> #include <net/toeplitz.h> +struct tdb; + #include <netinet/in.h> #include <netinet/if_ether.h> #include <netinet/ip.h> #include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> #if NBPFILTER > 0 #include <net/bpf.h> Index: dev/pci/ixgbe_type.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/ixgbe_type.h,v retrieving revision 1.36 diff -u -p -r1.36 ixgbe_type.h --- dev/pci/ixgbe_type.h 9 Jan 2022 05:42:56 -0000 1.36 +++ dev/pci/ixgbe_type.h 14 May 2023 09:11:33 -0000 @@ -3355,6 +3355,7 @@ struct ixgbe_adv_tx_context_desc { /* 1st&Last TSO-full iSCSI PDU */ #define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800 #define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */ +#define IXGBE_ADVTXD_PAYLEN_MASK 0x0003FFFF /* Adv desc PAYLEN */ #define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ #define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ #define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ Index: net/if.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v retrieving revision 1.211 diff -u -p -r1.211 if.h --- net/if.h 7 Mar 2023 20:09:48 -0000 1.211 +++ net/if.h 14 May 2023 09:11:33 -0000 @@ -231,7 +231,7 @@ struct if_status_description { #define IFXF_INET6_NOSOII 0x40 /* [N] don't do RFC 7217 */ #define IFXF_AUTOCONF4 0x80 /* [N] v4 autoconf (aka dhcp) enabled */ #define IFXF_MONITOR 0x100 /* [N] only used for bpf */ -#define IFXF_TSO 0x200 /* [N] TCP segment offloading */ +#define IFXF_LRO 0x200 /* [N] TCP large recv offload */ #define IFXF_CANTCHANGE \ (IFXF_MPSAFE|IFXF_CLONED) @@ -251,11 +251,17 @@ struct if_status_description { #define IFCAP_VLAN_HWTAGGING 0x00000020 /* hardware VLAN tag support */ #define IFCAP_CSUM_TCPv6 0x00000080 /* can do IPv6/TCP checksums */ #define IFCAP_CSUM_UDPv6 0x00000100 /* can do IPv6/UDP checksums */ -#define IFCAP_TSO 0x00004000 /* TCP segment offloading */ +#define IFCAP_LRO 0x00001000 /* TCP large recv offload */ +#define IFCAP_TSOv4 0x00002000 /* TCP segmentation offload */ +#define IFCAP_TSOv6 0x00004000 /* TCP segmentation offload */ #define IFCAP_WOL 0x00008000 /* can do wake on lan */ #define IFCAP_CSUM_MASK (IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | \ IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6) + +/* XXX feature flags are misnamed */ +#define IFCAP_TSO IFCAP_LRO +#define IFXF_TSO IFXF_LRO /* symbolic names for terminal (per-protocol) CTL_IFQ_ nodes */ #define IFQCTL_LEN 1 Index: net/pf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v retrieving revision 1.1179 diff -u -p -r1.1179 pf.c --- net/pf.c 13 May 2023 13:35:17 -0000 1.1179 +++ net/pf.c 14 May 2023 09:11:33 -0000 @@ -6555,15 +6555,9 @@ pf_route(struct pf_pdesc *pd, struct pf_ goto done; } - if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && - m0->m_pkthdr.ph_mss <= ifp->if_mtu) { - if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || - if_output_ml(ifp, &ml, sintosa(dst), rt)) - goto done; - tcpstat_inc(tcps_outswtso); + if (tcp_if_output_tso(ifp, &m0, sintosa(dst), rt, + IFCAP_TSOv4, ifp->if_mtu) || m0 == NULL) goto done; - } - CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); /* * Too large for interface; fragment if possible. @@ -6598,7 +6592,6 @@ void pf_route6(struct pf_pdesc *pd, struct pf_state *st) { struct mbuf *m0; - struct mbuf_list ml; struct sockaddr_in6 *dst, sin6; struct rtentry *rt = NULL; struct ip6_hdr *ip6; @@ -6696,15 +6689,9 @@ pf_route6(struct pf_pdesc *pd, struct pf goto done; } - if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO) && - m0->m_pkthdr.ph_mss <= ifp->if_mtu) { - if (tcp_chopper(m0, &ml, ifp, m0->m_pkthdr.ph_mss) || - if_output_ml(ifp, &ml, sin6tosa(dst), rt)) - goto done; - tcpstat_inc(tcps_outswtso); + if (tcp_if_output_tso(ifp, &m0, sin6tosa(dst), rt, + IFCAP_TSOv6, ifp->if_mtu) || m0 == NULL) goto done; - } - CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO); ip6stat_inc(ip6s_cantfrag); if (st->rt != PF_DUPTO) Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v retrieving revision 1.386 diff -u -p -r1.386 ip_output.c --- netinet/ip_output.c 13 May 2023 13:35:17 -0000 1.386 +++ netinet/ip_output.c 14 May 2023 09:11:33 -0000 @@ -460,15 +460,10 @@ sendit: goto done; } - if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && - m->m_pkthdr.ph_mss <= mtu) { - if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) || - (error = if_output_ml(ifp, &ml, sintosa(dst), ro->ro_rt))) - goto done; - tcpstat_inc(tcps_outswtso); + error = tcp_if_output_tso(ifp, &m, sintosa(dst), ro->ro_rt, + IFCAP_TSOv4, mtu); + if (error || m == NULL) goto done; - } - CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); /* * Too large for interface; fragment if possible. @@ -1887,10 +1882,15 @@ in_proto_cksum_out(struct mbuf *m, struc u_int16_t csum = 0, offset; offset = ip->ip_hl << 2; - if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + csum = in_cksum_phdr(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htonl(ip->ip_p)); + } else if (ISSET(m->m_pkthdr.csum_flags, + M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) { csum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - offset + ip->ip_p)); + } if (ip->ip_p == IPPROTO_TCP) offset += offsetof(struct tcphdr, th_sum); else if (ip->ip_p == IPPROTO_UDP) Index: netinet/tcp_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v retrieving revision 1.137 diff -u -p -r1.137 tcp_output.c --- netinet/tcp_output.c 13 May 2023 13:35:18 -0000 1.137 +++ netinet/tcp_output.c 14 May 2023 09:20:15 -0000 @@ -80,6 +80,7 @@ #include <sys/kernel.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> #if NPF > 0 #include <net/pfvar.h> @@ -753,7 +754,7 @@ send: /* Enable TSO and specify the size of the resulting segments. */ if (tso) { - m->m_pkthdr.csum_flags |= M_TCP_TSO; + SET(m->m_pkthdr.csum_flags, M_TCP_TSO); m->m_pkthdr.ph_mss = tp->t_maxseg; } @@ -1347,5 +1348,45 @@ tcp_chopper(struct mbuf *m0, struct mbuf bad: tcpstat_inc(tcps_outbadtso); ml_purge(ml); + return error; +} + +int +tcp_if_output_tso(struct ifnet *ifp, struct mbuf **mp, struct sockaddr *dst, + struct rtentry *rt, uint32_t ifcap, u_int mtu) +{ + struct mbuf_list ml; + int error; + + /* caller must fail later or fragment */ + if (!ISSET((*mp)->m_pkthdr.csum_flags, M_TCP_TSO)) + return 0; + if ((*mp)->m_pkthdr.ph_mss > mtu) { + CLR((*mp)->m_pkthdr.csum_flags, M_TCP_TSO); + return 0; + } + + /* network interface hardware will do TSO */ + if (in_ifcap_cksum(*mp, ifp, ifcap)) { + if (ISSET(ifcap, IFCAP_TSOv4)) { + in_hdr_cksum_out(*mp, ifp); + in_proto_cksum_out(*mp, ifp); + } + if (ISSET(ifcap, IFCAP_TSOv6)) + in6_proto_cksum_out(*mp, ifp); + if ((error = ifp->if_output(ifp, *mp, dst, rt))) + goto done; + tcpstat_inc(tcps_outhwtso); + goto done; + } + + /* as fallback do TSO in software */ + if ((error = tcp_chopper(*mp, &ml, ifp, (*mp)->m_pkthdr.ph_mss)) || + (error = if_output_ml(ifp, &ml, dst, rt))) + goto done; + tcpstat_inc(tcps_outswtso); + + done: + *mp = NULL; return error; } Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.164 diff -u -p -r1.164 tcp_var.h --- netinet/tcp_var.h 10 May 2023 12:07:16 -0000 1.164 +++ netinet/tcp_var.h 14 May 2023 09:11:33 -0000 @@ -719,6 +719,8 @@ struct tcpcb * void tcp_notify(struct inpcb *, int); int tcp_output(struct tcpcb *); int tcp_chopper(struct mbuf *, struct mbuf_list *, struct ifnet *, u_int); +int tcp_if_output_tso(struct ifnet *, struct mbuf **, struct sockaddr *, + struct rtentry *, uint32_t, u_int); void tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int); int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *); void tcp_rscale(struct tcpcb *, u_long); Index: netinet6/ip6_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.275 diff -u -p -r1.275 ip6_output.c --- netinet6/ip6_output.c 10 May 2023 12:07:17 -0000 1.275 +++ netinet6/ip6_output.c 14 May 2023 09:11:33 -0000 @@ -706,15 +706,10 @@ reroute: goto done; } - if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) && - m->m_pkthdr.ph_mss <= mtu) { - if ((error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss)) || - (error = if_output_ml(ifp, &ml, sin6tosa(dst), ro->ro_rt))) - goto done; - tcpstat_inc(tcps_outswtso); + error = tcp_if_output_tso(ifp, &m, sin6tosa(dst), ro->ro_rt, + IFCAP_TSOv6, mtu); + if (error || m == NULL) goto done; - } - CLR(m->m_pkthdr.csum_flags, M_TCP_TSO); /* * try to fragment the packet. case 1-b @@ -2715,8 +2710,13 @@ in6_proto_cksum_out(struct mbuf *m, stru u_int16_t csum; offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); - csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, - htonl(m->m_pkthdr.len - offset), htonl(nxt)); + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, + htonl(0), htonl(nxt)); + } else { + csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, + htonl(m->m_pkthdr.len - offset), htonl(nxt)); + } if (nxt == IPPROTO_TCP) offset += offsetof(struct tcphdr, th_sum); else if (nxt == IPPROTO_UDP)