The branch main has been updated by gallatin:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c224b2ce7de0faa28ea87edf6e74de0e4e9d33f9

commit c224b2ce7de0faa28ea87edf6e74de0e4e9d33f9
Author:     Andrew Gallatin <galla...@freebsd.org>
AuthorDate: 2025-08-07 14:48:45 +0000
Commit:     Andrew Gallatin <galla...@freebsd.org>
CommitDate: 2025-08-07 14:55:16 +0000

    iflib: don't pullup UDP payloads to the TCP header size
    
    The IPv4 packet parsing logic in iflib is incredibly complex,
    prematurely optimized, and believes all the world is TCP.
    This causes it to pullup part of the UDP payload into
    the packet header, causing unneeded memory copies.
    This impacts a project I'm working on, and also impacts
    nearly any kernel user of UDP, like NFS.  Eg, NFS over UDP
    will result in pullups for every datagram sent over an iflib NIC.
    
    This patch:
    
      - adds parsing for UDP to iflib
      - attempts to pull up the correct header size, based on UDP or
            TCP protocol type.
      - simplifies packet parsing in iflib by
        - no longer special casing having an ethernet header in a packet by
            itself
        - no longer checking that we're trying to pullup something beyond the
            end of the packet.  Since we're no longer trying to pull up a
            larger TCP header, attempting to pullup something larger than
            the packet should no longer happen.  If it does, the packet is
            malformed and m_pullup will return an error when it runs out of
            data in the mbuf chain
    
    Reviewed by: erj, glebius, kbowling
    Sponsored by:   Netflix
    Differential Revision:  https://reviews.freebsd.org/D51748
---
 sys/net/iflib.c | 63 ++++++++++++++++++++-------------------------------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 2b8f0e617df3..2eca81d54f99 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -70,6 +70,7 @@
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
+#include <netinet/udp.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 
@@ -3372,42 +3373,28 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, 
struct mbuf **mp)
 #ifdef INET
        case ETHERTYPE_IP:
        {
-               struct mbuf *n;
-               struct ip *ip = NULL;
-               struct tcphdr *th = NULL;
-               int minthlen;
+               struct ip *ip;
+               struct tcphdr *th;
+               uint8_t hlen;
 
-               minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + 
sizeof(*th));
-               if (__predict_false(m->m_len < minthlen)) {
-                       /*
-                        * if this code bloat is causing too much of a hit
-                        * move it to a separate function and mark it noinline
-                        */
-                       if (m->m_len == pi->ipi_ehdrlen) {
-                               n = m->m_next;
-                               MPASS(n);
-                               if (n->m_len >= sizeof(*ip))  {
-                                       ip = (struct ip *)n->m_data;
-                                       if (n->m_len >= (ip->ip_hl << 2) + 
sizeof(*th))
-                                               th = (struct tcphdr 
*)((caddr_t)ip + (ip->ip_hl << 2));
-                               } else {
-                                       txq->ift_pullups++;
-                                       if (__predict_false((m = m_pullup(m, 
minthlen)) == NULL))
-                                               return (ENOMEM);
-                                       ip = (struct ip *)(m->m_data + 
pi->ipi_ehdrlen);
-                               }
-                       } else {
-                               txq->ift_pullups++;
-                               if (__predict_false((m = m_pullup(m, minthlen)) 
== NULL))
-                                       return (ENOMEM);
-                               ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
-                               if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
-                                       th = (struct tcphdr *)((caddr_t)ip + 
(ip->ip_hl << 2));
-                       }
-               } else {
-                       ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
-                       if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
-                               th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl 
<< 2));
+               hlen = pi->ipi_ehdrlen + sizeof(*ip);
+               if (__predict_false(m->m_len < hlen)) {
+                       txq->ift_pullups++;
+                       if (__predict_false((m = m_pullup(m, hlen)) == NULL))
+                               return (ENOMEM);
+               }
+               ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
+               hlen = pi->ipi_ehdrlen + (ip->ip_hl << 2);
+               if (ip->ip_p == IPPROTO_TCP) {
+                       hlen += sizeof(*th);
+                       th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
+               } else if (ip->ip_p == IPPROTO_UDP) {
+                       hlen += sizeof(struct udphdr);
+               }
+               if (__predict_false(m->m_len < hlen)) {
+                       txq->ift_pullups++;
+                       if ((m = m_pullup(m, hlen)) == NULL)
+                               return (ENOMEM);
                }
                pi->ipi_ip_hlen = ip->ip_hl << 2;
                pi->ipi_ipproto = ip->ip_p;
@@ -3417,12 +3404,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, 
struct mbuf **mp)
                /* TCP checksum offload may require TCP header length */
                if (IS_TX_OFFLOAD4(pi)) {
                        if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) {
-                               if (__predict_false(th == NULL)) {
-                                       txq->ift_pullups++;
-                                       if (__predict_false((m = m_pullup(m, 
(ip->ip_hl << 2) + sizeof(*th))) == NULL))
-                                               return (ENOMEM);
-                                       th = (struct tcphdr *)((caddr_t)ip + 
pi->ipi_ip_hlen);
-                               }
                                pi->ipi_tcp_hflags = tcp_get_flags(th);
                                pi->ipi_tcp_hlen = th->th_off << 2;
                                pi->ipi_tcp_seq = th->th_seq;

Reply via email to