On Thu,  8 Jan 2026 16:47:13 -0500
[email protected] wrote:

> diff --git a/lib/net/rte_ip4.h b/lib/net/rte_ip4.h
> index 822a660cfb..63852717c9 100644
> --- a/lib/net/rte_ip4.h
> +++ b/lib/net/rte_ip4.h
> @@ -223,21 +223,17 @@ rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr 
> *ipv4_hdr, uint64_t ol_flags)
>               uint8_t  zero;     /* zero. */
>               uint8_t  proto;    /* L4 protocol type. */
>               uint16_t len;      /* L4 length. */
> -     } psd_hdr;
> -
> -     uint32_t l3_len;
> -
> -     psd_hdr.src_addr = ipv4_hdr->src_addr;
> -     psd_hdr.dst_addr = ipv4_hdr->dst_addr;
> -     psd_hdr.zero = 0;
> -     psd_hdr.proto = ipv4_hdr->next_proto_id;
> -     if (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) {
> -             psd_hdr.len = 0;
> -     } else {
> -             l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length);
> -             psd_hdr.len = rte_cpu_to_be_16((uint16_t)(l3_len -
> -                     rte_ipv4_hdr_len(ipv4_hdr)));
> -     }
> +     } psd_hdr = {
> +             .src_addr = ipv4_hdr->src_addr,
> +             .dst_addr = ipv4_hdr->dst_addr,
> +             .proto = ipv4_hdr->next_proto_id,
> +             .len = (ol_flags & (RTE_MBUF_F_TX_TCP_SEG | 
> RTE_MBUF_F_TX_UDP_SEG))
> +                     ? (uint16_t)0
> +                     : 
> rte_cpu_to_be_16((uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
> +                                     rte_ipv4_hdr_len(ipv4_hdr)))
> +     };
> +     RTE_SUPPRESS_UNINITIALIZED_WARNING(psd_hdr);
> +
>       return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
>  }

Since this is all fixed size, why not hand unroll it?
That is what Linux kernel does for IPv4 checksum and this would be
another case. No loop is really necessary.

static inline uint16_t
rte_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
{
        union {
                struct {
                        uint32_t src_addr;
                        uint32_t dst_addr;
                        uint8_t  zero;
                        uint8_t  proto;
                        rte_be16_t len;
                } psd;
                uint16_t u16[6];
        } hdr = {
                .psd = {
                        .src_addr = ipv4_hdr->src_addr,
                        .dst_addr = ipv4_hdr->dst_addr,
                        .proto = ipv4_hdr->next_proto_id,
                }
        };
        uint32_t sum;

        if (!(ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG))) {
                hdr.psd.len = rte_cpu_to_be_16(
                        (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) -
                                   rte_ipv4_hdr_len(ipv4_hdr)));
        }

        /* Unrolled sum of 6 uint16_t words */
        sum = hdr.u16[0] + hdr.u16[1] + hdr.u16[2] +
              hdr.u16[3] + hdr.u16[4] + hdr.u16[5];

        sum = (sum & 0xffff) + (sum >> 16);
        sum = (sum & 0xffff) + (sum >> 16);
        return (uint16_t)sum;
}

I.e Don't use the rte_raw_cksum at all.

Same for IPv6.

Reply via email to