On Sun, May 14, 2023 at 11:39:01PM +0200, Hrvoje Popovski wrote: > I've tested this on openbsd box with 4 iperf3's. 2 for ip4 and 2 for ip6 > and with 16 tcp streams per iperf. When testing over ix(4) there is big > differences in output performance. When testing ix/veb/vport there is > differences in output performance but not that big.
Thanks a lot for testing. I have also created some numbers which can be seen here. http://bluhm.genua.de/perform/results/2023-05-14T09:14:59Z/perform.html Sending TCP to Linux host and socket splicing gets faster. > When testing over vport I'm getting "software chopped" which should be > expected. Yes, we cannot do hardware TSO in a bridge. Maybe we could if all bridge members support it. Next diff that should go in is where jan@ renames flags, cleans up ifconfig(8), and fixes pseudo interface devices. Updated ix(4) driver diff after TCP/IP commit is below. bluhm Index: dev/pci/if_ix.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v retrieving revision 1.193 diff -u -p -r1.193 if_ix.c --- dev/pci/if_ix.c 28 Apr 2023 10:18:57 -0000 1.193 +++ dev/pci/if_ix.c 15 May 2023 17:27:09 -0000 @@ -1924,8 +1924,9 @@ ixgbe_setup_interface(struct ix_softc *s ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; ifp->if_capabilities |= IFCAP_CSUM_IPv4; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; if (sc->hw.mac.type != ixgbe_mac_82598EB) - ifp->if_capabilities |= IFCAP_TSO; + ifp->if_capabilities |= IFCAP_LRO; /* * Specify the media types supported by this sc and register @@ -2344,6 +2345,7 @@ ixgbe_initialize_transmit_units(struct i int i; uint64_t tdba; uint32_t txctrl; + uint32_t hlreg; /* Setup the Base and Length of the Tx Descriptor Ring */ @@ -2405,6 +2407,11 @@ ixgbe_initialize_transmit_units(struct i rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } + + /* Enable TCP/UDP padding when using TSO */ + hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg |= IXGBE_HLREG0_TXPADEN; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); } /********************************************************************* @@ -2473,16 +2480,18 @@ ixgbe_free_transmit_buffers(struct tx_ri **********************************************************************/ static inline int -ixgbe_csum_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, - uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status) +ixgbe_tx_offload(struct mbuf *mp, uint32_t *vlan_macip_lens, + uint32_t *type_tucmd_mlhl, uint32_t *olinfo_status, uint32_t *cmd_type_len, + uint32_t *mss_l4len_idx) { struct ether_extracted ext; int offload = 0; - uint32_t iphlen; + uint32_t ethlen, iphlen; ether_extract_headers(mp, &ext); + ethlen = sizeof(*ext.eh); - *vlan_macip_lens |= (sizeof(*ext.eh) << IXGBE_ADVTXD_MACLEN_SHIFT); + *vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT); if (ext.ip4) { iphlen = ext.ip4->ip_hl << 2; @@ -2500,6 +2509,8 @@ ixgbe_csum_offload(struct mbuf *mp, uint *type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; #endif } else { + if (mp->m_pkthdr.csum_flags & M_TCP_TSO) + tcpstat_inc(tcps_outbadtso); return offload; } @@ -2519,6 +2530,32 @@ ixgbe_csum_offload(struct mbuf *mp, uint } } + if (mp->m_pkthdr.csum_flags & M_TCP_TSO) { + if (ext.tcp) { + uint32_t pktlen, hdrlen, thlen, outlen; + + thlen = ext.tcp->th_off << 2; + + *mss_l4len_idx |= (uint32_t)(mp->m_pkthdr.ph_mss + << IXGBE_ADVTXD_MSS_SHIFT); + *mss_l4len_idx |= thlen << IXGBE_ADVTXD_L4LEN_SHIFT; + + hdrlen = ethlen + iphlen + thlen; + pktlen = mp->m_pkthdr.len - hdrlen; + CLR(*olinfo_status, IXGBE_ADVTXD_PAYLEN_MASK + << IXGBE_ADVTXD_PAYLEN_SHIFT); + *olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; + + *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; + offload = 1; + + outlen = hdrlen + mp->m_pkthdr.ph_mss; + tcpstat_add(tcps_outpkttso, + (pktlen + outlen - 1) / outlen); + } else + tcpstat_inc(tcps_outbadtso); + } + return offload; } @@ -2529,6 +2566,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct ixgbe_adv_tx_context_desc *TXD; struct ixgbe_tx_buf *tx_buffer; uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0; + uint32_t mss_l4len_idx = 0; int ctxd = txr->next_avail_desc; int offload = 0; @@ -2544,8 +2582,8 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, } #endif - offload |= ixgbe_csum_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl, - olinfo_status); + offload |= ixgbe_tx_offload(mp, &vlan_macip_lens, &type_tucmd_mlhl, + olinfo_status, cmd_type_len, &mss_l4len_idx); if (!offload) return (0); @@ -2559,7 +2597,7 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); tx_buffer->m_head = NULL; tx_buffer->eop_index = -1; @@ -2868,18 +2906,20 @@ ixgbe_initialize_receive_units(struct ix } IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - /* Always enable jumbo frame reception */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + /* Always enable jumbo frame reception */ hlreg |= IXGBE_HLREG0_JUMBOEN; + /* Always enable CRC stripping */ + hlreg |= IXGBE_HLREG0_RXCRCSTRP; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); - if (ISSET(ifp->if_xflags, IFXF_TSO)) { + if (ISSET(ifp->if_xflags, IFXF_LRO)) { rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); /* This field has to be set to zero. */ rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - /* Enable TSO Receive Offloading */ + /* RSC Coalescing on ACK Change */ rdrxctl |= IXGBE_RDRXCTL_RSCACKC; rdrxctl |= IXGBE_RDRXCTL_FCOE_WRFIX; @@ -2902,10 +2942,10 @@ ixgbe_initialize_receive_units(struct ix srrctl = bufsz | IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); - if (ISSET(ifp->if_xflags, IFXF_TSO)) { + if (ISSET(ifp->if_xflags, IFXF_LRO)) { rdrxctl = IXGBE_READ_REG(&sc->hw, IXGBE_RSCCTL(i)); - /* Enable TSO Receive Side Coalescing */ + /* Enable Receive Side Coalescing */ rdrxctl |= IXGBE_RSCCTL_RSCEN; rdrxctl |= IXGBE_RSCCTL_MAXDESC_16; @@ -3263,7 +3303,7 @@ ixgbe_setup_vlan_hw_support(struct ix_so * We have to disable VLAN striping when using TCP offloading, due to a * firmware bug. */ - if (ISSET(ifp->if_xflags, IFXF_TSO)) { + if (ISSET(ifp->if_xflags, IFXF_LRO)) { sc->vlan_stripping = 0; return; } Index: dev/pci/ixgbe.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/ixgbe.h,v retrieving revision 1.33 diff -u -p -r1.33 ixgbe.h --- dev/pci/ixgbe.h 8 Feb 2022 03:38:00 -0000 1.33 +++ dev/pci/ixgbe.h 15 May 2023 17:27:09 -0000 @@ -60,12 +60,18 @@ #include <net/if.h> #include <net/if_media.h> +#include <net/route.h> #include <net/toeplitz.h> +struct tdb; + #include <netinet/in.h> #include <netinet/if_ether.h> #include <netinet/ip.h> #include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/tcp_timer.h> +#include <netinet/tcp_var.h> #if NBPFILTER > 0 #include <net/bpf.h> Index: dev/pci/ixgbe_type.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/ixgbe_type.h,v retrieving revision 1.36 diff -u -p -r1.36 ixgbe_type.h --- dev/pci/ixgbe_type.h 9 Jan 2022 05:42:56 -0000 1.36 +++ dev/pci/ixgbe_type.h 15 May 2023 17:27:09 -0000 @@ -3355,6 +3355,7 @@ struct ixgbe_adv_tx_context_desc { /* 1st&Last TSO-full iSCSI PDU */ #define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800 #define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */ +#define IXGBE_ADVTXD_PAYLEN_MASK 0x0003FFFF /* Adv desc PAYLEN */ #define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ #define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ #define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ Index: net/if.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.h,v retrieving revision 1.212 diff -u -p -r1.212 if.h --- net/if.h 15 May 2023 16:34:56 -0000 1.212 +++ net/if.h 15 May 2023 17:27:09 -0000 @@ -231,7 +231,7 @@ struct if_status_description { #define IFXF_INET6_NOSOII 0x40 /* [N] don't do RFC 7217 */ #define IFXF_AUTOCONF4 0x80 /* [N] v4 autoconf (aka dhcp) enabled */ #define IFXF_MONITOR 0x100 /* [N] only used for bpf */ -#define IFXF_TSO 0x200 /* [N] XXX missnamed, should be LRO */ +#define IFXF_LRO 0x200 /* [N] TCP large recv offload */ #define IFXF_CANTCHANGE \ (IFXF_MPSAFE|IFXF_CLONED) @@ -253,11 +253,15 @@ struct if_status_description { #define IFCAP_CSUM_UDPv6 0x00000100 /* can do IPv6/UDP checksums */ #define IFCAP_TSOv4 0x00001000 /* IPv4/TCP segment offload */ #define IFCAP_TSOv6 0x00002000 /* IPv6/TCP segment offload */ -#define IFCAP_TSO 0x00004000 /* XXX should be LRO */ +#define IFCAP_LRO 0x00004000 /* TCP large recv offload */ #define IFCAP_WOL 0x00008000 /* can do wake on lan */ #define IFCAP_CSUM_MASK (IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | \ IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6) + +/* XXX feature flags are misnamed */ +#define IFCAP_TSO IFCAP_LRO +#define IFXF_TSO IFXF_LRO /* symbolic names for terminal (per-protocol) CTL_IFQ_ nodes */ #define IFQCTL_LEN 1