On Tue, Oct 11, 2022 at 04:16:15PM +0100, Stuart Henderson wrote:
> On 2022/10/11 15:03, Moritz Buhl wrote:
> > Here is a new diff for checksum offloading (ipv4, udp, tcp) for em(4).
> >
> > The previous diff didn't implement hardware vlan tagging for >em82578
> > which should result in variable ethernet header lengths and thus
> > wrong checksums inserted at wrong places.
> >
> > The diff below addresses this.
> > I would appreciate further testing reports with different controllers.
> >
> > mbuhl
>
> I tried this on my laptop which has I219-V em (I run it in a trunk
> with iwm). It breaks tx (packets don't show up on the other side).
> rx seems ok.
The following diff will restrict the usage of the advanced
descriptors to 82575, 82576, i350 and i210, and fix what the
last diff broke for i219.
Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.362
diff -u -p -r1.362 if_em.c
--- dev/pci/if_em.c 23 Jun 2022 09:38:28 -0000 1.362
+++ dev/pci/if_em.c 11 Oct 2022 16:05:43 -0000
@@ -37,6 +37,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <dev/pci/if_em.h>
#include <dev/pci/if_em_soc.h>
+#include <netinet/ip6.h>
+
/*********************************************************************
* Driver version
*********************************************************************/
@@ -278,6 +280,8 @@ void em_receive_checksum(struct em_softc
struct mbuf *);
u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
u_int32_t *, u_int32_t *);
+u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+ u_int32_t *);
void em_iff(struct em_softc *);
void em_update_link_status(struct em_softc *);
int em_get_buf(struct em_queue *, int);
@@ -1220,10 +1224,9 @@ em_encap(struct em_queue *que, struct mb
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
}
- if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
- sc->hw.mac_type != em_82576 &&
- sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
- sc->hw.mac_type != em_i350) {
+ if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
+ used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
+ } else if (sc->hw.mac_type >= em_82543) {
used += em_transmit_checksum_setup(que, m, head,
&txd_upper, &txd_lower);
} else {
@@ -1278,7 +1281,7 @@ em_encap(struct em_queue *que, struct mb
#if NVLAN > 0
/* Find out if we are in VLAN mode */
- if (m->m_flags & M_VLANTAG) {
+ if (m->m_flags & M_VLANTAG && sc->hw.mac_type < em_82575) {
/* Set the VLAN id */
desc->upper.fields.special = htole16(m->m_pkthdr.ether_vtag);
@@ -1964,17 +1967,14 @@ em_setup_interface(struct em_softc *sc)
ifp->if_capabilities = IFCAP_VLAN_MTU;
#if NVLAN > 0
- if (sc->hw.mac_type != em_82575 && sc->hw.mac_type != em_82580 &&
- sc->hw.mac_type != em_82576 &&
- sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350)
- ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
+ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
#endif
- if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
- sc->hw.mac_type != em_82576 &&
- sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
- sc->hw.mac_type != em_i350)
+ if (sc->hw.mac_type >= em_82543) {
+ ifp->if_capabilities |= IFCAP_CSUM_IPv4;
ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
+ ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+ }
/*
* Specify the media types supported by this adapter and register
@@ -2391,6 +2391,108 @@ em_free_transmit_structures(struct em_so
}
}
+u_int
+em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+ u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+ struct e1000_adv_tx_context_desc *TD;
+ struct ether_header *eh = mtod(mp, struct ether_header *);
+ struct mbuf *m;
+ uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
+ int off = 0, hoff;
+ uint8_t ipproto, iphlen;
+
+ *olinfo_status = 0;
+ *cmd_type_len = 0;
+ TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+
+#if NVLAN > 0
+ if (ISSET(mp->m_flags, M_VLANTAG)) {
+ uint16_t vtag = htole16(mp->m_pkthdr.ether_vtag);
+ vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
+ *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+ off = 1;
+ }
+#endif
+
+ vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT);
+
+ switch (ntohs(eh->ether_type)) {
+ case ETHERTYPE_IP: {
+ struct ip *ip;
+
+ m = m_getptr(mp, sizeof(*eh), &hoff);
+ ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+
+ iphlen = ip->ip_hl << 2;
+ ipproto = ip->ip_p;
+
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+ if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
+ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+ off = 1;
+ }
+
+ break;
+ }
+#ifdef INET6
+ case ETHERTYPE_IPV6: {
+ struct ip6_hdr *ip6;
+
+ m = m_getptr(mp, sizeof(*eh), &hoff);
+ ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
+
+ iphlen = sizeof(*ip6);
+ ipproto = ip6->ip6_nxt;
+
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+ break;
+ }
+#endif
+ default:
+ iphlen = 0;
+ ipproto = 0;
+ break;
+ }
+
+ *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+ *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT;
+ *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
+ vlan_macip_lens |= iphlen;
+ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+ switch (ipproto) {
+ case IPPROTO_TCP:
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+ if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
+ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+ off = 1;
+ }
+ break;
+ case IPPROTO_UDP:
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
+ if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
+ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+ off = 1;
+ }
+ break;
+ }
+
+ if (!off)
+ return 0;
+
+ /* 82575 needs the queue index added */
+ if (que->sc->hw.mac_type == em_82575)
+ mss_l4len_idx |= (que->me & 0xff) << 4;
+
+ htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+ htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+ htolem32(&TD->u.seqnum_seed, 0);
+ htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
+
+ return 1;
+}
+
/*********************************************************************
*
* The offload context needs to be set when we transfer the first
@@ -2403,51 +2505,93 @@ em_transmit_checksum_setup(struct em_que
u_int32_t *txd_upper, u_int32_t *txd_lower)
{
struct em_context_desc *TXD;
+ XSUM_CONTEXT_T off = OFFLOAD_NONE;
+ uint8_t tucss = 0;
+ uint8_t tucso = 0;
+
+ *txd_upper = 0;
+ *txd_lower = 0;
+
+ if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) {
+ *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
+ *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+ off = OFFLOAD_IP;
+ }
if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
- *txd_upper = E1000_TXD_POPTS_TXSM << 8;
- *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
- if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
- return (0);
- else
- que->tx.active_checksum_context = OFFLOAD_TCP_IP;
+ *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+ *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+
+ if (que->tx.active_checksum_context == OFFLOAD_TCP_IPv6 &&
+ off != OFFLOAD_IP) {
+ return 0;
+ } else if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
+ return 0;
+
+ if (off == OFFLOAD_IP) {
+ off = OFFLOAD_TCP_IP;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip);
+ tucso = tucss + offsetof(struct tcphdr, th_sum);
+ } else {
+ off = OFFLOAD_TCP_IPv6;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr);
+ tucso = tucss + offsetof(struct tcphdr, th_sum);
+ }
+
+ que->tx.active_checksum_context = off;
} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
- *txd_upper = E1000_TXD_POPTS_TXSM << 8;
- *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
- if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
+ *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+ *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+
+ if (que->tx.active_checksum_context == OFFLOAD_UDP_IPv6 &&
+ off != OFFLOAD_IP) {
+ return 0;
+ } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
+ return 0;
+
+ if (off == OFFLOAD_IP) {
+ off = OFFLOAD_UDP_IP;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip);
+ tucso = tucss + offsetof(struct udphdr, uh_sum);
+ } else {
+ off = OFFLOAD_UDP_IPv6;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr);
+ tucso = tucss + offsetof(struct udphdr, uh_sum);
+ }
+
+ que->tx.active_checksum_context = off;
+ } else if (off == OFFLOAD_IP) {
+ if (que->tx.active_checksum_context == OFFLOAD_IP)
return (0);
else
- que->tx.active_checksum_context = OFFLOAD_UDP_IP;
- } else {
- *txd_upper = 0;
- *txd_lower = 0;
- return (0);
+ que->tx.active_checksum_context = OFFLOAD_IP;
}
+ if (off == OFFLOAD_NONE)
+ return 0;
+
/* If we reach this point, the checksum offload context
* needs to be reset.
*/
+
TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
- TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
- TXD->lower_setup.ip_fields.ipcso =
- ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
- TXD->lower_setup.ip_fields.ipcse =
- htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
+ if (off == OFFLOAD_IP || off == OFFLOAD_TCP_IP ||
+ off == OFFLOAD_UDP_IP) {
+ TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
+ TXD->lower_setup.ip_fields.ipcso =
+ ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
+ TXD->lower_setup.ip_fields.ipcse =
+ htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
+ } else {
+ TXD->lower_setup.ip_fields.ipcss = 0;
+ TXD->lower_setup.ip_fields.ipcso = 0;
+ TXD->lower_setup.ip_fields.ipcse = 0;
+ }
- TXD->upper_setup.tcp_fields.tucss =
- ETHER_HDR_LEN + sizeof(struct ip);
+ TXD->upper_setup.tcp_fields.tucss = tucss;
+ TXD->upper_setup.tcp_fields.tucso = tucso;
TXD->upper_setup.tcp_fields.tucse = htole16(0);
-
- if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
- TXD->upper_setup.tcp_fields.tucso =
- ETHER_HDR_LEN + sizeof(struct ip) +
- offsetof(struct tcphdr, th_sum);
- } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
- TXD->upper_setup.tcp_fields.tucso =
- ETHER_HDR_LEN + sizeof(struct ip) +
- offsetof(struct udphdr, uh_sum);
- }
TXD->tcp_seg_setup.data = htole32(0);
TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.80
diff -u -p -r1.80 if_em.h
--- dev/pci/if_em.h 9 Jan 2022 05:42:50 -0000 1.80
+++ dev/pci/if_em.h 11 Oct 2022 12:59:19 -0000
@@ -290,8 +290,11 @@ struct em_dma_alloc {
typedef enum _XSUM_CONTEXT_T {
OFFLOAD_NONE,
+ OFFLOAD_IP,
OFFLOAD_TCP_IP,
- OFFLOAD_UDP_IP
+ OFFLOAD_UDP_IP,
+ OFFLOAD_TCP_IPv6,
+ OFFLOAD_UDP_IPv6
} XSUM_CONTEXT_T;
/* For 82544 PCI-X Workaround */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
retrieving revision 1.87
diff -u -p -r1.87 if_em_hw.h
--- dev/pci/if_em_hw.h 23 Jun 2022 09:38:28 -0000 1.87
+++ dev/pci/if_em_hw.h 11 Oct 2022 12:59:19 -0000
@@ -2123,6 +2123,33 @@ struct em_hw {
#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */
#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+ uint32_t vlan_macip_lens;
+ union {
+ uint32_t launch_time;
+ uint32_t seqnum_seed;
+ } u;
+ uint32_t type_tucmd_mlhl;
+ uint32_t mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift
*/
+#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type:
1=IPv4 */
+#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type:
0=IPv6 */
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
+#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
+
/* Multiple Receive Queue Control */
#define E1000_MRQC_ENABLE_MASK 0x00000003
#define E1000_MRQC_ENABLE_RSS_2Q 0x00000001