Author: pkelsey
Date: Sat Mar 14 20:08:04 2020
New Revision: 359000
URL: https://svnweb.freebsd.org/changeset/base/359000

Log:
  Fix if_vmx receive checksum offload bug and harden against the device 
skipping receive descriptors
  
  This fixes a bug where the checksum offload status of received packets
  was being taken from the first descriptor instead of the last, which
  affected LRO packets.
  
  The driver has been hardened against the device skipping receive
  descriptors, although it is not believed that this can occur given the
  way this implementation configures the receive rings.
  
  Additionally, for packets received with the error indicator set, the
  driver now forces the length of all fragments in that packet to zero
  prior to passing it to iflib.  Such packets should wind up being
  discarded at some point in the stack anyway, but this removes any
  questions by killing them in the driver.
  
  Counters have been added (and exposed via sysctls) for skipped receive
  descriptors, zero-length packets received, and packets received with
  the error indicator set so that these conditions can be easily
  observed in the field.
  
  PR:           243126, 243392, 240628
  Reported by:  avg, alexandr.oleyni...@gmail.com, Harald Schmalzbauer
  Reviewed by:  gallatin
  MFC after:    1 week
  Differential Revision:        https://reviews.freebsd.org/D23949

Modified:
  head/sys/dev/vmware/vmxnet3/if_vmx.c
  head/sys/dev/vmware/vmxnet3/if_vmxvar.h

Modified: head/sys/dev/vmware/vmxnet3/if_vmx.c
==============================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmx.c        Sat Mar 14 19:58:50 2020        
(r358999)
+++ head/sys/dev/vmware/vmxnet3/if_vmx.c        Sat Mar 14 20:08:04 2020        
(r359000)
@@ -1494,6 +1494,7 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
        int cqidx;
        uint16_t total_len;
        uint8_t nfrags;
+       uint8_t i;
        uint8_t flid;
 
        sc = vsc;
@@ -1517,6 +1518,7 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
                KASSERT(rxcd->sop && rxcd->eop,
                    ("%s: zero-length packet without both sop and eop set",
                        __func__));
+               rxc->vxcr_zero_length++;
                if (++cqidx == rxc->vxcr_ndesc) {
                        cqidx = 0;
                        rxc->vxcr_gen ^= 1;
@@ -1572,31 +1574,6 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
                }
        }
 
-       /* VLAN */
-       if (rxcd->vlan) {
-               ri->iri_flags |= M_VLANTAG;
-               ri->iri_vtag = rxcd->vtag;
-       }
-
-       /* Checksum offload */
-       if (!rxcd->no_csum) {
-               uint32_t csum_flags = 0;
-
-               if (rxcd->ipv4) {
-                       csum_flags |= CSUM_IP_CHECKED;
-                       if (rxcd->ipcsum_ok)
-                               csum_flags |= CSUM_IP_VALID;
-               }
-               if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
-                       csum_flags |= CSUM_L4_CALC;
-                       if (rxcd->csum_ok) {
-                               csum_flags |= CSUM_L4_VALID;
-                               ri->iri_csum_data = 0xffff;
-                       }
-               }
-               ri->iri_csum_flags = csum_flags;
-       }
-
        /*
         * The queue numbering scheme used for rxcd->qid is as follows:
         *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
@@ -1632,6 +1609,46 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
        ri->iri_nfrags = nfrags;
        ri->iri_len = total_len;
 
+       /*
+        * If there's an error, the last descriptor in the packet will
+        * have the error indicator set.  In this case, set all
+        * fragment lengths to zero.  This will cause iflib to discard
+        * the packet, but process all associated descriptors through
+        * the refill mechanism.
+        */
+       if (__predict_false(rxcd->error)) {
+               rxc->vxcr_pkt_errors++;
+               for (i = 0; i < nfrags; i++) {
+                       frag = &ri->iri_frags[i];
+                       frag->irf_len = 0;
+               }
+       } else {
+               /* Checksum offload information is in the last descriptor. */
+               if (!rxcd->no_csum) {
+                       uint32_t csum_flags = 0;
+
+                       if (rxcd->ipv4) {
+                               csum_flags |= CSUM_IP_CHECKED;
+                               if (rxcd->ipcsum_ok)
+                                       csum_flags |= CSUM_IP_VALID;
+                       }
+                       if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
+                               csum_flags |= CSUM_L4_CALC;
+                               if (rxcd->csum_ok) {
+                                       csum_flags |= CSUM_L4_VALID;
+                                       ri->iri_csum_data = 0xffff;
+                               }
+                       }
+                       ri->iri_csum_flags = csum_flags;
+               }
+
+               /* VLAN information is in the last descriptor. */
+               if (rxcd->vlan) {
+                       ri->iri_flags |= M_VLANTAG;
+                       ri->iri_vtag = rxcd->vtag;
+               }
+       }
+
        return (0);
 }
 
@@ -1645,14 +1662,13 @@ vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
        uint64_t *paddrs;
        int count;
        int len;
-       int pidx;
+       int idx;
        int i;
        uint8_t flid;
        uint8_t btype;
 
        count = iru->iru_count;
        len = iru->iru_buf_size;
-       pidx = iru->iru_pidx;
        flid = iru->iru_flidx;
        paddrs = iru->iru_paddrs;
 
@@ -1666,17 +1682,32 @@ vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
         * command ring 1 is filled with BTYPE_BODY descriptors.
         */
        btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
-       for (i = 0; i < count; i++) {
-               rxd[pidx].addr = paddrs[i];
-               rxd[pidx].len = len;
-               rxd[pidx].btype = btype;
-               rxd[pidx].gen = rxr->vxrxr_gen;
+       /*
+        * The refill entries from iflib will advance monotonically,
+        * but the refilled descriptors may not be contiguous due to
+        * earlier skipping of descriptors by the device.  The refill
+        * entries from iflib need an entire state update, while the
+        * descriptors previously skipped by the device only need to
+        * have their generation numbers updated.
+        */
+       idx = rxr->vxrxr_refill_start;
+       i = 0;
+       do {
+               if (idx == iru->iru_idxs[i]) {
+                       rxd[idx].addr = paddrs[i];
+                       rxd[idx].len = len;
+                       rxd[idx].btype = btype;
+                       i++;
+               } else
+                       rxr->vxrxr_desc_skips++;
+               rxd[idx].gen = rxr->vxrxr_gen;
 
-               if (++pidx == rxr->vxrxr_ndesc) {
-                       pidx = 0;
+               if (++idx == rxr->vxrxr_ndesc) {
+                       idx = 0;
                        rxr->vxrxr_gen ^= 1;
                }
-       }
+       } while (i != count);
+       rxr->vxrxr_refill_start = idx;
 }
 
 static void
@@ -1825,6 +1856,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
        for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
                rxr = &rxq->vxrxq_cmd_ring[i];
                rxr->vxrxr_gen = VMXNET3_INIT_GEN;
+               rxr->vxrxr_desc_skips = 0;
+               rxr->vxrxr_refill_start = 0;
                /*
                 * iflib has zeroed out the descriptor array during the
                 * prior attach or stop
@@ -1834,6 +1867,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
        for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
                rxr = &rxq->vxrxq_cmd_ring[i];
                rxr->vxrxr_gen = 0;
+               rxr->vxrxr_desc_skips = 0;
+               rxr->vxrxr_refill_start = 0;
                bzero(rxr->vxrxr_rxd,
                    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
        }
@@ -1841,6 +1876,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
        rxc = &rxq->vxrxq_comp_ring;
        rxc->vxcr_next = 0;
        rxc->vxcr_gen = VMXNET3_INIT_GEN;
+       rxc->vxcr_zero_length = 0;
+       rxc->vxcr_pkt_errors = 0;
        /*
         * iflib has zeroed out the descriptor array during the prior attach
         * or stop
@@ -2285,14 +2322,22 @@ vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
                    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
                SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
                    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
+               SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", 
CTLFLAG_RD,
+                   &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
                SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
                    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
                SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
                    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
+               SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", 
CTLFLAG_RD,
+                   &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
                SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
                    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
                SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
                    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
+               SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", 
CTLFLAG_RD,
+                   &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
+               SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", 
CTLFLAG_RD,
+                   &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
        }
 }
 

Modified: head/sys/dev/vmware/vmxnet3/if_vmxvar.h
==============================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmxvar.h     Sat Mar 14 19:58:50 2020        
(r358999)
+++ head/sys/dev/vmware/vmxnet3/if_vmxvar.h     Sat Mar 14 20:08:04 2020        
(r359000)
@@ -63,6 +63,8 @@ struct vmxnet3_rxring {
        u_int                    vxrxr_ndesc;
        int                      vxrxr_gen;
        bus_addr_t               vxrxr_paddr;
+       uint64_t                 vxrxr_desc_skips;
+       uint16_t                 vxrxr_refill_start;
 };
 
 struct vmxnet3_comp_ring {
@@ -78,6 +80,8 @@ struct vmxnet3_comp_ring {
        u_int                    vxcr_ndesc;
        int                      vxcr_gen;
        bus_addr_t               vxcr_paddr;
+       uint64_t                 vxcr_zero_length;
+       uint64_t                 vxcr_pkt_errors;
 };
 
 struct vmxnet3_txqueue {
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to