The branch main has been updated by tuexen: URL: https://cgit.FreeBSD.org/src/commit/?id=bcb298fa9e23c1192c5707086a67d3b396186abc
commit bcb298fa9e23c1192c5707086a67d3b396186abc Author: Timo Völker <timo.voel...@fh-muenster.de> AuthorDate: 2025-08-01 10:09:47 +0000 Commit: Michael Tuexen <tue...@freebsd.org> CommitDate: 2025-08-01 10:09:47 +0000 sctp, tcp, udp: improve deferred computation of checksums When the SCTP, TCP, or UDP implementation send a packet, it does not compute the corresponding checksum but defers that. The network layer will determine whether the network interface selected for the packet has the requested capability and computes the checksum in software, if the selected network interface doesn't have the requested capability. Do this not only for packets being sent by the local SCTP, TCP, and UDP stack, but also when forwarding packets. Furthermore, when such packets are delivered to a local SCTP, TCP, or UDP stack, do not compute or validate the checksum, since such packets never have been on the wire. This allows to support checksum offloading also in the case of local virtual machines or jails. Support for epair, vtnet, and tap interfaces will be added in separate commits. Reviewed by: kp, rgrimes, tuexen, manpages MFC after: 4 weeks Differential Revision: https://reviews.freebsd.org/D51475 --- share/man/man9/mbuf.9 | 32 +++++++++++++++++++++++--------- sys/netinet/ip_fastfwd.c | 22 ++++++++++++++++++++++ sys/netinet/sctp_input.c | 6 +++++- sys/netinet/tcp_input.c | 12 ++++++++++++ sys/netinet/udp_usrreq.c | 6 ++++++ sys/netinet6/ip6_fastfwd.c | 28 ++++++++++++++++++++++++++++ sys/netinet6/ip6_forward.c | 27 +++++++++++++++++++++++++++ sys/netinet6/sctp6_usrreq.c | 6 +++++- sys/netinet6/udp6_usrreq.c | 6 ++++++ sys/sys/mbuf.h | 17 ++++++++--------- 10 files changed, 142 insertions(+), 20 deletions(-) diff --git a/share/man/man9/mbuf.9 b/share/man/man9/mbuf.9 index c05505716a30..e4f30962ccab 100644 --- a/share/man/man9/mbuf.9 +++ b/share/man/man9/mbuf.9 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 29, 2025 +.Dd August 1, 2025 .Dt MBUF 9 .Os .\" @@ -1102,8 +1102,7 @@ of a packet contains two fields used for that purpose, .Vt int Va csum_flags and .Vt int Va csum_data . -The meaning of those fields depends on the direction a packet flows in, -and on whether the packet is fragmented. +The meaning of those fields depends on whether the packet is fragmented. Henceforth, .Va csum_flags or @@ -1117,14 +1116,14 @@ in the .Vt mbuf chain containing the packet. .Pp -On output, the computation of the checksum is delayed until the outgoing -interface has been determined for a packet. +When a packet is sent by SCTP, TCP, or UDP, the computation of the checksum +is delayed until the outgoing interface has been determined for a packet. The interface-specific field .Va ifnet.if_data.ifi_hwassist (see .Xr ifnet 9 ) -is consulted for the capabilities of the interface to assist in -computing checksums. +is consulted by IP for the capabilities of the network interface selected for +output to assist in computing checksums. The .Va csum_flags field of the packet header is set to indicate which actions the interface @@ -1163,8 +1162,8 @@ defined by the TCP and UDP specifications. In the case of SCTP, the checksum field will be initially set by the SCTP implementation to 0. .Pp -On input, an interface indicates the actions it has performed -on a packet by setting one or more of the following flags in +When a packet is received by an interface, it indicates the actions it has +performed on a packet by setting one or more of the following flags in .Va csum_flags associated with the packet: .Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent @@ -1215,6 +1214,21 @@ is not relevant and in .Va csum_flags is not set, since SCTP does not use a pseudo header checksum. +.Pp +If IP delivers a packet with the flags +.Dv CSUM_SCTP , +.Dv CSUM_TCP , +or +.Dv CSUM_UDP +set in +.Va csum_flags +to a local SCTP, TCP, or UDP stack, the packet will be processed without +computing or validating the checksum, since the packet has not been on the +wire. +This can happen if the packet was handled by a virtual interface such as +.Xr tap 4 +or +.Xr epair 4 . .Sh STRESS TESTING When running a kernel compiled with the option .Dv MBUF_STRESS_TEST , diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 9b81760e58f3..51e7c2fbc4b0 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -69,6 +69,7 @@ #include <sys/cdefs.h> #include "opt_ipstealth.h" +#include "opt_sctp.h" #include <sys/param.h> #include <sys/systm.h> @@ -102,6 +103,10 @@ #include <machine/in_cksum.h> +#if defined(SCTP) || defined(SCTP_SUPPORT) +#include <netinet/sctp_crc32.h> +#endif + #define V_ipsendredirects VNET(ipsendredirects) static struct mbuf * @@ -460,6 +465,23 @@ passout: } else gw = (const struct sockaddr *)dst; + /* + * If TCP/UDP header still needs a valid checksum and interface will not + * calculate it for us, do it here. + */ + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & + ~nh->nh_ifp->if_hwassist)) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP_SCTP & + ~nh->nh_ifp->if_hwassist)) { + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_IP_SCTP; + } +#endif + /* Handle redirect case. */ redest.s_addr = 0; if (V_ipsendredirects && osrc.s_addr == ip->ip_src.s_addr && diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index dc31ffbc2161..5f637cc63df5 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -5780,7 +5780,11 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port) goto out; } ecn_bits = ip->ip_tos; - if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { + if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP_SCTP)) { + /* + * Packet with CSUM_IP_SCTP were sent from local host using + * checksum offloading. Checksum not required. + */ SCTP_STAT_INCR(sctps_recvhwcrc); compute_crc = 0; } else { diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index de428ae1af6f..d58cc69b7625 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -650,6 +650,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) th->th_sum = in6_cksum_pseudo(ip6, tlen, IPPROTO_TCP, m->m_pkthdr.csum_data); th->th_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP6_TCP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + th->th_sum = 0; } else th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen); if (th->th_sum) { @@ -710,6 +716,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) htonl(m->m_pkthdr.csum_data + tlen + IPPROTO_TCP)); th->th_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP_TCP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + th->th_sum = 0; } else { struct ipovly *ipov = (struct ipovly *)ip; diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 42cfb919e263..df8f293f9426 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -560,6 +560,12 @@ udp_input(struct mbuf **mp, int *offp, int proto) ip->ip_dst.s_addr, htonl((u_short)len + m->m_pkthdr.csum_data + proto)); uh_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP_UDP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + uh_sum = 0; } else { char b[offsetof(struct ipovly, ih_src)]; struct ipovly *ipov = (struct ipovly *)ip; diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c index 0ed313bd49a5..7139267722b7 100644 --- a/sys/netinet6/ip6_fastfwd.c +++ b/sys/netinet6/ip6_fastfwd.c @@ -27,6 +27,7 @@ #include <sys/cdefs.h> #include "opt_inet6.h" #include "opt_ipstealth.h" +#include "opt_sctp.h" #include <sys/param.h> #include <sys/systm.h> @@ -54,6 +55,10 @@ #include <netinet6/ip6_var.h> #include <netinet6/nd6.h> +#if defined(SCTP) || defined(SCTP_SUPPORT) +#include <netinet/sctp_crc32.h> +#endif + static int ip6_findroute(struct nhop_object **pnh, const struct sockaddr_in6 *dst, struct mbuf *m) @@ -277,6 +282,29 @@ passout: ip6->ip6_hlim -= IPV6_HLIMDEC; } + /* + * If TCP/UDP header still needs a valid checksum and interface will not + * calculate it for us, do it here. + */ + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len) + goto drop; + in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + sctp_delayed_cksum(m, offset); + m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP; + } +#endif + m_clrprotoflags(m); /* Avoid confusing lower layers. */ IP_PROBE(send, NULL, NULL, ip6, nh->nh_ifp, NULL, ip6); diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c index ad8c95c9363c..f6c09b0ac7bc 100644 --- a/sys/netinet6/ip6_forward.c +++ b/sys/netinet6/ip6_forward.c @@ -75,6 +75,10 @@ #include <netipsec/ipsec_support.h> +#if defined(SCTP) || defined(SCTP_SUPPORT) +#include <netinet/sctp_crc32.h> +#endif + /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful @@ -389,6 +393,29 @@ pass: goto bad; } + /* + * If TCP/UDP header still needs a valid checksum and interface will not + * calculate it for us, do it here. + */ + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len) + goto bad; + in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP & + ~nh->nh_ifp->if_hwassist)) { + int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL); + + sctp_delayed_cksum(m, offset); + m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP; + } +#endif + /* Currently LLE layer stores embedded IPv6 addresses */ if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6_addr)) { in6_set_unicast_scopeid(&dst.sin6_addr, dst.sin6_scope_id); diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c index 8964ccf54c54..c4716fdafb6e 100644 --- a/sys/netinet6/sctp6_usrreq.c +++ b/sys/netinet6/sctp6_usrreq.c @@ -139,7 +139,11 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port) goto out; } ecn_bits = IPV6_TRAFFIC_CLASS(ip6); - if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) { + if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP6_SCTP)) { + /* + * Packet with CSUM_IP6_SCTP were sent from local host using + * checksum offloading. Checksum not required. + */ SCTP_STAT_INCR(sctps_recvhwcrc); compute_crc = 0; } else { diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index b3ed16fda713..0027cf3bd230 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -434,6 +434,12 @@ udp6_input(struct mbuf **mp, int *offp, int proto) uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, m->m_pkthdr.csum_data); uh_sum ^= 0xffff; + } else if (m->m_pkthdr.csum_flags & CSUM_IP6_UDP) { + /* + * Packet from local host (maybe from a VM). + * Checksum not required. + */ + uh_sum = 0; } else uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 304bd019c9fc..f9141bf70742 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -641,16 +641,15 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff) /* * Flags indicating checksum, segmentation and other offload work to be - * done, or already done, by hardware or lower layers. It is split into - * separate inbound and outbound flags. + * done, or already done, by hardware or lower layers. * - * Outbound flags that are set by upper protocol layers requesting lower + * Flags that are set by upper protocol layers requesting lower * layers, or ideally the hardware, to perform these offloading tasks. - * For outbound packets this field and its flags can be directly tested - * against ifnet if_hwassist. Note that the outbound and the inbound flags do - * not collide right now but they could be allowed to (as long as the flags are - * scrubbed appropriately when the direction of an mbuf changes). CSUM_BITS - * would also have to split into CSUM_BITS_TX and CSUM_BITS_RX. + * Before passing packets to a network interface this field and its flags can + * be directly tested against ifnet if_hwassist. Note that the flags + * CSUM_IP_SCTP, CSUM_IP_TCP, and CSUM_IP_UDP can appear on input processing + * of SCTP, TCP, and UDP. In such a case the checksum will not be computed or + * validated by SCTP, TCP, or TCP, since the packet has not been on the wire. * * CSUM_INNER_<x> is the same as CSUM_<x> but it applies to the inner frame. * The CSUM_ENCAP_<x> bits identify the outer encapsulation. @@ -679,7 +678,7 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff) #define CSUM_ENCAP_VXLAN 0x00040000 /* VXLAN outer encapsulation */ #define CSUM_ENCAP_RSVD1 0x00080000 -/* Inbound checksum support where the checksum was verified by hardware. */ +/* Flags used to indicate that the checksum was verified by hardware. */ #define CSUM_INNER_L3_CALC 0x00100000 #define CSUM_INNER_L3_VALID 0x00200000 #define CSUM_INNER_L4_CALC 0x00400000