The branch main has been updated by tuexen:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1c23d8f9f39870951c1d0dfbb112fc4e53237737

commit 1c23d8f9f39870951c1d0dfbb112fc4e53237737
Author:     Michael Tuexen <tue...@freebsd.org>
AuthorDate: 2025-09-04 11:16:46 +0000
Commit:     Michael Tuexen <tue...@freebsd.org>
CommitDate: 2025-09-04 11:16:46 +0000

    vtnet: improve checksum offloading
    
    When transmitting a packet over the vtnet interface, map the
    csum flags CSUM_DATA_VALID | CSUM_PSEUDO_HDR to the virtio
    flag VIRTIO_NET_HDR_F_DATA_VALID.
    When receiving a packet over the virtio network channel, translate
    the virtio flag VIRTIO_NET_HDR_F_NEEDS_CSUM not to CSUM_DATA_VALID |
    CSUM_PSEUDO_HDR, but to CSUM_TCP, CSUM_TCP_IPV6, CSUM_UDP, or
    CSUM_UDP_IPV6.
    The second change fixes a series of issue related to checksum
    offloading for if_vtnet.
    While there, improve the stats counters to allow a detailed view
    on what is going on in relation to checksum offloading.
    
    PR:                     165059
    Reviewed by:            tuexen, manpages
    MFC after:              1 week
    Differential Revision:  https://reviews.freebsd.org/D51686
---
 share/man/man4/vtnet.4               |  28 +++--
 sys/dev/virtio/network/if_vtnet.c    | 220 ++++++++++++++++++-----------------
 sys/dev/virtio/network/if_vtnetvar.h |   2 +-
 3 files changed, 132 insertions(+), 118 deletions(-)

diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4
index 073383df11ff..67a835050422 100644
--- a/share/man/man4/vtnet.4
+++ b/share/man/man4/vtnet.4
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd September 3, 2025
+.Dd September 4, 2025
 .Dt VTNET 4
 .Os
 .Sh NAME
@@ -153,7 +153,14 @@ The number of times the receive interrupt handler was 
rescheduled.
 .It Va dev.vtnet. Ns Ar X Ns Va .rxq Ns Ar Y Ns Va .host_lro
 The number of times TCP large receive offload was performed.
 .It Va dev.vtnet. Ns Ar X Ns Va .rxq Ns Ar Y Ns Va .csum_failed
-Currently not used.
+The number of times a packet with a request for receive or transmit checksum
+offloading was received and this request failed.
+The different reasons for the failure are counted by
+.Va dev.vtnet. Ns Ar X Ns Va .rx_csum_inaccessible_ipproto ,
+.Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_ipproto ,
+.Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_ethtype ,
+and
+.Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_offset .
 .It Va dev.vtnet. Ns Ar X Ns Va .rxq Ns Ar Y Ns Va .csum
 The number of times receive checksum offloading for UDP or TCP was performed.
 .It Va dev.vtnet. Ns Ar X Ns Va .rxq Ns Ar Y Ns Va .ierrors
@@ -213,18 +220,21 @@ over all receive queues of the interface.
 The sum of
 .Va dev.vtnet. Ns Ar X Ns Va .rxq Ns Ar Y Ns Va .csum_failed
 over all receive queues of the interface.
-.It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_proto
-Currently unused.
+.It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_inaccessible_ipproto
+The number of times a packet with a request for receive or transmit checksum
+offloading was received where the IP protocol was not accessible.
 .It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_offset
-Currently unused.
-.It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_ipproto
-Currently unused.
-.It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_ethtype
 The number of times fixing the checksum required by
 .Va hw.vtnet.fixup_needs_csum
 or
 .Va hw.vtnet. Ns Ar X Ns Va .fixup_needs_csum
-was attempted for a packet with an EtherType other than IPv4 or IPv6.
+was attempted for a packet where the csum is not located in the first mbuf.
+.It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_ipproto
+The number of times a packet with a request for receive or transmit checksum
+offloading was received where the IP protocol was neither TCP nor UDP.
+.It Va dev.vtnet. Ns Ar X Ns Va .rx_csum_bad_ethtype
+The number of times a packet with a request for receive or transmit checksum
+offloading was received where the EtherType was neither IPv4 nor IPv6.
 .It Va dev.vtnet. Ns Ar X Ns Va .rx_mergeable_failed
 The number of times receiving a mergable buffer failed.
 .It Va dev.vtnet. Ns Ar X Ns Va .rx_enq_replacement_failed
diff --git a/sys/dev/virtio/network/if_vtnet.c 
b/sys/dev/virtio/network/if_vtnet.c
index 867da80a53a8..4f19af6281a3 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -134,9 +134,9 @@ static int  vtnet_rxq_replace_buf(struct vtnet_rxq *, 
struct mbuf *, int);
 static int     vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
 static int     vtnet_rxq_new_buf(struct vtnet_rxq *);
 static int     vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
-                    uint16_t, int, struct virtio_net_hdr *);
-static int     vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
-                    uint16_t, int, struct virtio_net_hdr *);
+                    bool, int, struct virtio_net_hdr *);
+static void    vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
+                   int);
 static int     vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
                     struct virtio_net_hdr *);
 static void    vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
@@ -1761,162 +1761,161 @@ vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
 }
 
 static int
-vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t 
etype,
-    int hoff, struct virtio_net_hdr *hdr)
+vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6,
+    int protocol, struct virtio_net_hdr *hdr)
 {
        struct vtnet_softc *sc;
-       int error;
 
-       sc = rxq->vtnrx_sc;
+       /*
+        * The packet is likely from another VM on the same host or from the
+        * host that itself performed checksum offloading so Tx/Rx is basically
+        * a memcpy and the checksum has little value so far.
+        */
+
+       KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
+           ("%s: unsupported IP protocol %d", __func__, protocol));
 
        /*
-        * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
-        * not have an analogous CSUM flag. The checksum has been validated,
-        * but is incomplete (TCP/UDP pseudo header).
-        *
-        * The packet is likely from another VM on the same host that itself
-        * performed checksum offloading so Tx/Rx is basically a memcpy and
-        * the checksum has little value.
-        *
-        * Default to receiving the packet as-is for performance reasons, but
-        * this can cause issues if the packet is to be forwarded because it
-        * does not contain a valid checksum. This patch may be helpful:
-        * https://reviews.freebsd.org/D6611. In the meantime, have the driver
-        * compute the checksum if requested.
-        *
-        * BMV: Need to add an CSUM_PARTIAL flag?
+        * If the user don't want us to fix it up here by computing the
+        * checksum, just forward the order to compute the checksum by setting
+        * the corresponding mbuf flag (e.g., CSUM_TCP).
         */
+       sc = rxq->vtnrx_sc;
        if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
-               error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
-               return (error);
+               switch (protocol) {
+               case IPPROTO_TCP:
+                       m->m_pkthdr.csum_flags |=
+                           (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP);
+                       break;
+               case IPPROTO_UDP:
+                       m->m_pkthdr.csum_flags |=
+                           (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP);
+                       break;
+               }
+               m->m_pkthdr.csum_data = hdr->csum_offset;
+               return (0);
        }
 
        /*
         * Compute the checksum in the driver so the packet will contain a
         * valid checksum. The checksum is at csum_offset from csum_start.
         */
-       switch (etype) {
-#if defined(INET) || defined(INET6)
-       case ETHERTYPE_IP:
-       case ETHERTYPE_IPV6: {
-               int csum_off, csum_end;
-               uint16_t csum;
+       int csum_off, csum_end;
+       uint16_t csum;
 
-               csum_off = hdr->csum_start + hdr->csum_offset;
-               csum_end = csum_off + sizeof(uint16_t);
+       csum_off = hdr->csum_start + hdr->csum_offset;
+       csum_end = csum_off + sizeof(uint16_t);
 
-               /* Assume checksum will be in the first mbuf. */
-               if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
-                       return (1);
-
-               /*
-                * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
-                * checksum and write it at the specified offset. We could
-                * try to verify the packet: csum_start should probably
-                * correspond to the start of the TCP/UDP header.
-                *
-                * BMV: Need to properly handle UDP with zero checksum. Is
-                * the IPv4 header checksum implicitly validated?
-                */
-               csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
-               *(uint16_t *)(mtodo(m, csum_off)) = csum;
-               m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
-               m->m_pkthdr.csum_data = 0xFFFF;
-               break;
-       }
-#endif
-       default:
-               sc->vtnet_stats.rx_csum_bad_ethtype++;
+       /* Assume checksum will be in the first mbuf. */
+       if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) {
+               sc->vtnet_stats.rx_csum_bad_offset++;
                return (1);
        }
 
+       /*
+        * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
+        * checksum and write it at the specified offset. We could
+        * try to verify the packet: csum_start should probably
+        * correspond to the start of the TCP/UDP header.
+        *
+        * BMV: Need to properly handle UDP with zero checksum. Is
+        * the IPv4 header checksum implicitly validated?
+        */
+       csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
+       *(uint16_t *)(mtodo(m, csum_off)) = csum;
+       m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+       m->m_pkthdr.csum_data = 0xFFFF;
+
        return (0);
 }
 
+static void
+vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol)
+{
+       KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
+           ("%s: unsupported IP protocol %d", __func__, protocol));
+
+       m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+       m->m_pkthdr.csum_data = 0xFFFF;
+}
+
 static int
-vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
-    uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
+vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
+    struct virtio_net_hdr *hdr)
 {
-#if 0
+       const struct ether_header *eh;
        struct vtnet_softc *sc;
-#endif
-       int protocol;
+       int hoff, protocol;
+       uint16_t etype;
+       bool isipv6;
+
+       KASSERT(hdr->flags &
+           (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID),
+           ("%s: missing checksum offloading flag %x", __func__, hdr->flags));
+
+       eh = mtod(m, const struct ether_header *);
+       etype = ntohs(eh->ether_type);
+       if (etype == ETHERTYPE_VLAN) {
+               /* TODO BMV: Handle QinQ. */
+               const struct ether_vlan_header *evh =
+                   mtod(m, const struct ether_vlan_header *);
+               etype = ntohs(evh->evl_proto);
+               hoff = sizeof(struct ether_vlan_header);
+       } else
+               hoff = sizeof(struct ether_header);
 
-#if 0
        sc = rxq->vtnrx_sc;
-#endif
 
+       /* Check whether ethernet type is IP or IPv6, and get protocol. */
        switch (etype) {
 #if defined(INET)
        case ETHERTYPE_IP:
-               if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
-                       protocol = IPPROTO_DONE;
-               else {
+               if (__predict_false(m->m_len < hoff + sizeof(struct ip))) {
+                       sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
+                       return (1);
+               } else {
                        struct ip *ip = (struct ip *)(m->m_data + hoff);
                        protocol = ip->ip_p;
                }
+               isipv6 = false;
                break;
 #endif
 #if defined(INET6)
        case ETHERTYPE_IPV6:
                if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
-                   || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
-                       protocol = IPPROTO_DONE;
+                   || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) {
+                       sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
+                       return (1);
+               }
+               isipv6 = true;
                break;
 #endif
        default:
-               protocol = IPPROTO_DONE;
-               break;
+               sc->vtnet_stats.rx_csum_bad_ethtype++;
+               return (1);
        }
 
+       /* Check whether protocol is TCP or UDP. */
        switch (protocol) {
        case IPPROTO_TCP:
        case IPPROTO_UDP:
-               m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
-               m->m_pkthdr.csum_data = 0xFFFF;
                break;
        default:
                /*
                 * FreeBSD does not support checksum offloading of this
-                * protocol. Let the stack re-verify the checksum later
-                * if the protocol is supported.
+                * protocol here.
                 */
-#if 0
-               if_printf(sc->vtnet_ifp,
-                   "%s: checksum offload of unsupported protocol "
-                   "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
-                   __func__, etype, protocol, hdr->csum_start,
-                   hdr->csum_offset);
-#endif
-               break;
+               sc->vtnet_stats.rx_csum_bad_ipproto++;
+               return (1);
        }
 
-       return (0);
-}
-
-static int
-vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
-    struct virtio_net_hdr *hdr)
-{
-       const struct ether_header *eh;
-       int hoff;
-       uint16_t etype;
-
-       eh = mtod(m, const struct ether_header *);
-       etype = ntohs(eh->ether_type);
-       if (etype == ETHERTYPE_VLAN) {
-               /* TODO BMV: Handle QinQ. */
-               const struct ether_vlan_header *evh =
-                   mtod(m, const struct ether_vlan_header *);
-               etype = ntohs(evh->evl_proto);
-               hoff = sizeof(struct ether_vlan_header);
-       } else
-               hoff = sizeof(struct ether_header);
-
        if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
-               return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
+               return (vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol,
+                   hdr));
        else /* VIRTIO_NET_HDR_F_DATA_VALID */
-               return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
+               vtnet_rxq_csum_data_valid(rxq, m, protocol);
+
+       return (0);
 }
 
 static void
@@ -2497,6 +2496,10 @@ vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
                hdr->csum_start = vtnet_gtoh16(sc, csum_start);
                hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
                txq->vtntx_stats.vtxs_csum++;
+       } else if ((flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) &&
+                  (proto == IPPROTO_TCP || proto == IPPROTO_UDP) &&
+                  (m->m_pkthdr.csum_data == 0xFFFF)) {
+               hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
        }
 
        if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
@@ -2610,7 +2613,8 @@ vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf 
**m_head, int flags)
                m->m_flags &= ~M_VLANTAG;
        }
 
-       if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
+       if (m->m_pkthdr.csum_flags &
+           (VTNET_CSUM_ALL_OFFLOAD | CSUM_DATA_VALID)) {
                m = vtnet_txq_offload(txq, m, hdr);
                if ((*m_head = m) == NULL) {
                        error = ENOBUFS;
@@ -4322,9 +4326,9 @@ vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
        SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
            CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_offset,
            "Received checksum offloaded buffer with incorrect offset");
-       SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
-           CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_proto,
-           "Received checksum offloaded buffer with incorrect protocol");
+       SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto",
+           CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto,
+           "Received checksum offloaded buffer with inaccessible IP protocol");
        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed",
            CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
            sc, 0, vtnet_sysctl_rx_csum_failed, "QU",
diff --git a/sys/dev/virtio/network/if_vtnetvar.h 
b/sys/dev/virtio/network/if_vtnetvar.h
index 0144b0f3232d..cab7ced639a7 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -46,7 +46,7 @@ struct vtnet_statistics {
        uint64_t        rx_csum_bad_ethtype;
        uint64_t        rx_csum_bad_ipproto;
        uint64_t        rx_csum_bad_offset;
-       uint64_t        rx_csum_bad_proto;
+       uint64_t        rx_csum_inaccessible_ipproto;
        uint64_t        tx_csum_unknown_ethtype;
        uint64_t        tx_csum_proto_mismatch;
        uint64_t        tx_tso_not_tcp;

Reply via email to