On Mon, Jun 27, 2022 at 08:07:32AM +1000, Jonathan Gray wrote:
> On Sun, Jun 26, 2022 at 04:43:59PM +0200, Moritz Buhl wrote:
> > On Sun, Jun 26, 2022 at 12:23:58PM +0200, Moritz Buhl wrote:
> > > Hi,
> > >
> > > I noticed that for some offloading-capable em controllers checksum
> > > offloading is still disabled and I couldn't find a reason for that.
>
> There are two descriptor formats on 82575/82576/i350/i354/i210/i211.
> The older one we use and the newer igb/ix style one we don't use in em.
> A lot of the offloading options are in the newer descriptor format
> from memory.
Thanks that helped a lot. Below is a diff that implements offloading
for i350 and i210 (I haven't checked it for the others you mentioned).
It also does tcp and upd checksums for the older ones also ipv4.
I didn't feel confident in the diff but I currently cannot cause
any misbehavoir.
Using VLAN with i350 and i210 should still break because
IFCAP_VLAN_HWTAGGING is not fully implemented yet.
I would appreciate any feedback and comments.
mbuhl
Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.362
diff -u -p -r1.362 if_em.c
--- dev/pci/if_em.c 23 Jun 2022 09:38:28 -0000 1.362
+++ dev/pci/if_em.c 22 Jul 2022 12:37:27 -0000
@@ -37,6 +37,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <dev/pci/if_em.h>
#include <dev/pci/if_em_soc.h>
+#include <netinet/ip6.h>
+
/*********************************************************************
* Driver version
*********************************************************************/
@@ -278,6 +280,8 @@ void em_receive_checksum(struct em_softc
struct mbuf *);
u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
u_int32_t *, u_int32_t *);
+u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+ u_int32_t *);
void em_iff(struct em_softc *);
void em_update_link_status(struct em_softc *);
int em_get_buf(struct em_queue *, int);
@@ -1221,11 +1225,12 @@ em_encap(struct em_queue *que, struct mb
}
if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
- sc->hw.mac_type != em_82576 &&
- sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
- sc->hw.mac_type != em_i350) {
+ sc->hw.mac_type != em_82576 && sc->hw.mac_type != em_82580 &&
+ sc->hw.mac_type != em_i210 && sc->hw.mac_type != em_i350) {
used += em_transmit_checksum_setup(que, m, head,
&txd_upper, &txd_lower);
+ } else if (sc->hw.mac_type == em_i210 || sc->hw.mac_type == em_i350) {
+ used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
} else {
txd_upper = txd_lower = 0;
}
@@ -1971,10 +1976,11 @@ em_setup_interface(struct em_softc *sc)
#endif
if (sc->hw.mac_type >= em_82543 && sc->hw.mac_type != em_82575 &&
- sc->hw.mac_type != em_82576 &&
- sc->hw.mac_type != em_82580 && sc->hw.mac_type != em_i210 &&
- sc->hw.mac_type != em_i350)
+ sc->hw.mac_type != em_82576 && sc->hw.mac_type != em_82580) {
+ ifp->if_capabilities |= IFCAP_CSUM_IPv4;
ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
+ ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+ }
/*
* Specify the media types supported by this adapter and register
@@ -2391,6 +2397,96 @@ em_free_transmit_structures(struct em_so
}
}
+u_int
+em_tx_ctx_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+ u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+ struct e1000_adv_tx_context_desc *TD;
+ struct ether_header *eh = mtod(mp, struct ether_header *);
+ struct mbuf *m;
+ uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
+ uint32_t iphlen;
+ int off = 0, hoff;
+ uint8_t ipproto;
+
+ *olinfo_status = 0;
+ *cmd_type_len = 0;
+ TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+
+ // XXX: VLAN TAGGING
+
+ vlan_macip_lens |= (sizeof(*eh) << E1000_ADVTXD_MACLEN_SHIFT);
+
+ switch (ntohs(eh->ether_type)) {
+ case ETHERTYPE_IP: {
+ struct ip *ip;
+
+ m = m_getptr(mp, sizeof(*eh), &hoff);
+ ip = (struct ip *)(mtod(m, caddr_t) + hoff);
+
+ iphlen = ip->ip_hl << 2;
+ ipproto = ip->ip_p;
+
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+ if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
+ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+ off = 1;
+ }
+
+ break;
+ }
+#ifdef INET6
+ case ETHERTYPE_IPV6: {
+ struct ip6_hdr *ip6;
+
+ m = m_getptr(mp, sizeof(*eh), &hoff);
+ ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
+
+ iphlen = sizeof(*ip6);
+ ipproto = ip6->ip6_nxt;
+
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+ break;
+ }
+#endif
+ default:
+ return 0;
+ }
+
+ *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+ *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT;
+ *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
+ vlan_macip_lens |= iphlen;
+ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+ switch (ipproto) {
+ case IPPROTO_TCP:
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+ if (ISSET(mp->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
+ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+ off = 1;
+ }
+ break;
+ case IPPROTO_UDP:
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
+ if (ISSET(mp->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
+ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+ off = 1;
+ }
+ break;
+ }
+
+ if (off == 0)
+ return 0;
+
+ htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+ htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+ htolem32(&TD->u.seqnum_seed, 0);
+ htolem32(&TD->mss_l4len_idx, 0);
+
+ return 1;
+}
+
/*********************************************************************
*
* The offload context needs to be set when we transfer the first
@@ -2403,51 +2499,93 @@ em_transmit_checksum_setup(struct em_que
u_int32_t *txd_upper, u_int32_t *txd_lower)
{
struct em_context_desc *TXD;
+ XSUM_CONTEXT_T off = OFFLOAD_NONE;
+ uint8_t tucss = 0;
+ uint8_t tucso = 0;
+
+ *txd_upper = 0;
+ *txd_lower = 0;
+
+ if (mp->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) {
+ *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
+ *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+ off = OFFLOAD_IP;
+ }
if (mp->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
- *txd_upper = E1000_TXD_POPTS_TXSM << 8;
- *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
- if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
- return (0);
- else
- que->tx.active_checksum_context = OFFLOAD_TCP_IP;
+ *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+ *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+
+ if (que->tx.active_checksum_context == OFFLOAD_TCP_IPv6 &&
+ off != OFFLOAD_IP) {
+ return 0;
+ } else if (que->tx.active_checksum_context == OFFLOAD_TCP_IP)
+ return 0;
+
+ if (off == OFFLOAD_IP) {
+ off = OFFLOAD_TCP_IP;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip);
+ tucso = tucss + offsetof(struct tcphdr, th_sum);
+ } else {
+ off = OFFLOAD_TCP_IPv6;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr);
+ tucso = tucss + offsetof(struct tcphdr, th_sum);
+ }
+
+ que->tx.active_checksum_context = off;
} else if (mp->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
- *txd_upper = E1000_TXD_POPTS_TXSM << 8;
- *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
- if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
+ *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+ *txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+
+ if (que->tx.active_checksum_context == OFFLOAD_UDP_IPv6 &&
+ off != OFFLOAD_IP) {
+ return 0;
+ } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP)
+ return 0;
+
+ if (off == OFFLOAD_IP) {
+ off = OFFLOAD_UDP_IP;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip);
+ tucso = tucss + offsetof(struct udphdr, uh_sum);
+ } else {
+ off = OFFLOAD_UDP_IPv6;
+ tucss = ETHER_HDR_LEN + sizeof(struct ip6_hdr);
+ tucso = tucss + offsetof(struct udphdr, uh_sum);
+ }
+
+ que->tx.active_checksum_context = off;
+ } else if (off == OFFLOAD_IP) {
+ if (que->tx.active_checksum_context == OFFLOAD_IP)
return (0);
else
- que->tx.active_checksum_context = OFFLOAD_UDP_IP;
- } else {
- *txd_upper = 0;
- *txd_lower = 0;
- return (0);
+ que->tx.active_checksum_context = OFFLOAD_IP;
}
+ if (off == OFFLOAD_NONE)
+ return 0;
+
/* If we reach this point, the checksum offload context
* needs to be reset.
*/
+
TXD = (struct em_context_desc *)&que->tx.sc_tx_desc_ring[head];
- TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
- TXD->lower_setup.ip_fields.ipcso =
- ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
- TXD->lower_setup.ip_fields.ipcse =
- htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
+ if (off == OFFLOAD_IP || off == OFFLOAD_TCP_IP ||
+ off == OFFLOAD_UDP_IP) {
+ TXD->lower_setup.ip_fields.ipcss = ETHER_HDR_LEN;
+ TXD->lower_setup.ip_fields.ipcso =
+ ETHER_HDR_LEN + offsetof(struct ip, ip_sum);
+ TXD->lower_setup.ip_fields.ipcse =
+ htole16(ETHER_HDR_LEN + sizeof(struct ip) - 1);
+ } else {
+ TXD->lower_setup.ip_fields.ipcss = 0;
+ TXD->lower_setup.ip_fields.ipcso = 0;
+ TXD->lower_setup.ip_fields.ipcse = 0;
+ }
- TXD->upper_setup.tcp_fields.tucss =
- ETHER_HDR_LEN + sizeof(struct ip);
+ TXD->upper_setup.tcp_fields.tucss = tucss;
+ TXD->upper_setup.tcp_fields.tucso = tucso;
TXD->upper_setup.tcp_fields.tucse = htole16(0);
-
- if (que->tx.active_checksum_context == OFFLOAD_TCP_IP) {
- TXD->upper_setup.tcp_fields.tucso =
- ETHER_HDR_LEN + sizeof(struct ip) +
- offsetof(struct tcphdr, th_sum);
- } else if (que->tx.active_checksum_context == OFFLOAD_UDP_IP) {
- TXD->upper_setup.tcp_fields.tucso =
- ETHER_HDR_LEN + sizeof(struct ip) +
- offsetof(struct udphdr, uh_sum);
- }
TXD->tcp_seg_setup.data = htole32(0);
TXD->cmd_and_length = htole32(que->tx.sc_txd_cmd | E1000_TXD_CMD_DEXT);
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.80
diff -u -p -r1.80 if_em.h
--- dev/pci/if_em.h 9 Jan 2022 05:42:50 -0000 1.80
+++ dev/pci/if_em.h 22 Jul 2022 12:30:31 -0000
@@ -290,8 +290,11 @@ struct em_dma_alloc {
typedef enum _XSUM_CONTEXT_T {
OFFLOAD_NONE,
+ OFFLOAD_IP,
OFFLOAD_TCP_IP,
- OFFLOAD_UDP_IP
+ OFFLOAD_UDP_IP,
+ OFFLOAD_TCP_IPv6,
+ OFFLOAD_UDP_IPv6
} XSUM_CONTEXT_T;
/* For 82544 PCI-X Workaround */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
retrieving revision 1.87
diff -u -p -r1.87 if_em_hw.h
--- dev/pci/if_em_hw.h 23 Jun 2022 09:38:28 -0000 1.87
+++ dev/pci/if_em_hw.h 22 Jul 2022 12:30:31 -0000
@@ -2123,6 +2123,32 @@ struct em_hw {
#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */
#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+ uint32_t vlan_macip_lens;
+ union {
+ uint32_t launch_time;
+ uint32_t seqnum_seed;
+ } u;
+ uint32_t type_tucmd_mlhl;
+ uint32_t mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */
+#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4
*/
+#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6
*/
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
+#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
+
/* Multiple Receive Queue Control */
#define E1000_MRQC_ENABLE_MASK 0x00000003
#define E1000_MRQC_ENABLE_RSS_2Q 0x00000001