Author: np
Date: Tue Aug 25 02:14:36 2020
New Revision: 364745
URL: https://svnweb.freebsd.org/changeset/base/364745

Log:
  MFC r351444, r357475, r357479, r357481-r357482, r358859, and r364497.
  
  All these are rx improvements in the cxgbe(4) driver.
  
  r351444:
  cxgbe(4): Use the same buffer size for TOE rx queues as the NIC rx queues.
  
  This is a minor simplification.
  
  r357475:
  cxgbe(4): Initialize the rx buffer's metadata on first-use and not on
  allocation.
  
  refill_fl doesn't touch any part of a freshly allocated cluster after
  this change.
  
  r357479:
  cxgbe(4): Avoid ext_arg2 in rxb_free.
  
  ext_arg2 is the only item in the third cacheline in an mbuf and could be
  cold by the time rxb_free runs.  Put the information needed by rxb_free
  in the same line as the refcount, which is very likely to be hot given
  that rxb_free runs when the refcount is decremented and reaches 0.
  
  r357481:
  cxgbe(4): Retire the allow_mbufs_in_cluster optimization.
  
  This simplifies the driver's rx fast path as well as the bookkeeping
  code that tracks various rx buffer sizes and layouts.
  
  r357482:
  cxgbe(4): Treat NIC rx as special and run its handler directly and not
  via the t4_cpl_handler dispatch table.
  
  r358859:
  cxgbe(4): Do not try to use 0 as an rx buffer address when the driver is
  already allocating from the safe zone and the allocation fails.
  
  This bug was introduced in r357481.
  
  r364497:
  cxgbe(4): Use large clusters for TOE rx queues when TOE+TLS is enabled.
  
  Rx is more efficient within the chip when the receive buffer size
  matches the TLS PDU size.
  
  Sponsored by: Chelsio Communications

Modified:
  stable/12/share/man/man4/cxgbe.4
  stable/12/sys/dev/cxgbe/adapter.h
  stable/12/sys/dev/cxgbe/common/common.h
  stable/12/sys/dev/cxgbe/common/t4_hw.c
  stable/12/sys/dev/cxgbe/t4_main.c
  stable/12/sys/dev/cxgbe/t4_netmap.c
  stable/12/sys/dev/cxgbe/t4_sge.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/share/man/man4/cxgbe.4
==============================================================================
--- stable/12/share/man/man4/cxgbe.4    Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/share/man/man4/cxgbe.4    Tue Aug 25 02:14:36 2020        
(r364745)
@@ -317,11 +317,6 @@ Allow the hardware to deliver multiple frames in the s
 opportunistically.
 The default is -1 which lets the driver decide.
 0 or 1 explicitly disable or enable this feature.
-.It Va hw.cxgbe.allow_mbufs_in_cluster
-1 allows the driver to lay down one or more mbufs within the receive buffer
-opportunistically.
-This is the default.
-0 prohibits the driver from doing so.
 .It Va hw.cxgbe.largest_rx_cluster
 .It Va hw.cxgbe.safest_rx_cluster
 Sizes of rx clusters.

Modified: stable/12/sys/dev/cxgbe/adapter.h
==============================================================================
--- stable/12/sys/dev/cxgbe/adapter.h   Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/sys/dev/cxgbe/adapter.h   Tue Aug 25 02:14:36 2020        
(r364745)
@@ -314,24 +314,17 @@ struct port_info {
 
 #define        IS_MAIN_VI(vi)          ((vi) == &((vi)->pi->vi[0]))
 
-/* Where the cluster came from, how it has been carved up. */
-struct cluster_layout {
-       int8_t zidx;
-       int8_t hwidx;
-       uint16_t region1;       /* mbufs laid out within this region */
-                               /* region2 is the DMA region */
-       uint16_t region3;       /* cluster_metadata within this region */
-};
-
 struct cluster_metadata {
+       uma_zone_t zone;
+       caddr_t cl;
        u_int refcount;
-       struct fl_sdesc *sd;    /* For debug only.  Could easily be stale */
 };
 
 struct fl_sdesc {
        caddr_t cl;
        uint16_t nmbuf; /* # of driver originated mbufs with ref on cluster */
-       struct cluster_layout cll;
+       int16_t moff;   /* offset of metadata from cl */
+       uint8_t zidx;
 };
 
 struct tx_desc {
@@ -463,20 +456,17 @@ struct sge_eq {
        char lockname[16];
 };
 
-struct sw_zone_info {
+struct rx_buf_info {
        uma_zone_t zone;        /* zone that this cluster comes from */
-       int size;               /* size of cluster: 2K, 4K, 9K, 16K, etc. */
-       int type;               /* EXT_xxx type of the cluster */
-       int8_t head_hwidx;
-       int8_t tail_hwidx;
+       uint16_t size1;         /* same as size of cluster: 2K/4K/9K/16K.
+                                * hwsize[hwidx1] = size1.  No spare. */
+       uint16_t size2;         /* hwsize[hwidx2] = size2.
+                                * spare in cluster = size1 - size2. */
+       int8_t hwidx1;          /* SGE bufsize idx for size1 */
+       int8_t hwidx2;          /* SGE bufsize idx for size2 */
+       uint8_t type;           /* EXT_xxx type of the cluster */
 };
 
-struct hw_buf_info {
-       int8_t zidx;            /* backpointer to zone; -ve means unused */
-       int8_t next;            /* next hwidx for this zone; -1 means no more */
-       int size;
-};
-
 enum {
        NUM_MEMWIN = 3,
 
@@ -516,7 +506,8 @@ struct sge_fl {
        struct mtx fl_lock;
        __be64 *desc;           /* KVA of descriptor ring, ptr to addresses */
        struct fl_sdesc *sdesc; /* KVA of software descriptor ring */
-       struct cluster_layout cll_def;  /* default refill zone, layout */
+       uint16_t zidx;          /* refill zone idx */
+       uint16_t safe_zidx;
        uint16_t lowat;         /* # of buffers <= this means fl needs help */
        int flags;
        uint16_t buf_boundary;
@@ -534,8 +525,6 @@ struct sge_fl {
        u_int rx_offset;        /* offset in fl buf (when buffer packing) */
        volatile uint32_t *udb;
 
-       uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */
-       uint64_t mbuf_inlined;  /* # of mbuf created within clusters */
        uint64_t cl_allocated;  /* # of clusters allocated */
        uint64_t cl_recycled;   /* # of clusters recycled */
        uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */
@@ -552,7 +541,6 @@ struct sge_fl {
        bus_dmamap_t desc_map;
        char lockname[16];
        bus_addr_t ba;          /* bus address of descriptor ring */
-       struct cluster_layout cll_alt;  /* alternate refill zone, layout */
 };
 
 struct mp_ring;
@@ -766,10 +754,8 @@ struct sge {
        struct sge_iq **iqmap;  /* iq->cntxt_id to iq mapping */
        struct sge_eq **eqmap;  /* eq->cntxt_id to eq mapping */
 
-       int8_t safe_hwidx1;     /* may not have room for metadata */
-       int8_t safe_hwidx2;     /* with room for metadata and maybe more */
-       struct sw_zone_info sw_zone_info[SW_ZONE_SIZES];
-       struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES];
+       int8_t safe_zidx;
+       struct rx_buf_info rx_buf_info[SW_ZONE_SIZES];
 };
 
 struct devnames {

Modified: stable/12/sys/dev/cxgbe/common/common.h
==============================================================================
--- stable/12/sys/dev/cxgbe/common/common.h     Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/sys/dev/cxgbe/common/common.h     Tue Aug 25 02:14:36 2020        
(r364745)
@@ -246,6 +246,8 @@ struct tp_params {
 
        uint32_t vlan_pri_map;
        uint32_t ingress_config;
+       uint32_t max_rx_pdu;
+       uint32_t max_tx_pdu;
        uint64_t hash_filter_mask;
        __be16 err_vec_mask;
 

Modified: stable/12/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- stable/12/sys/dev/cxgbe/common/t4_hw.c      Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/sys/dev/cxgbe/common/t4_hw.c      Tue Aug 25 02:14:36 2020        
(r364745)
@@ -9348,7 +9348,7 @@ static void read_filter_mode_and_ingress_config(struct
 int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 {
        int chan;
-       u32 v;
+       u32 tx_len, rx_len, r, v;
        struct tp_params *tpp = &adap->params.tp;
 
        v = t4_read_reg(adap, A_TP_TIMER_RESOLUTION);
@@ -9374,6 +9374,21 @@ int t4_init_tp_params(struct adapter *adap, bool sleep
                            htobe16(V_T6_COMPR_RXERR_VEC(M_T6_COMPR_RXERR_VEC));
                }
        }
+
+       rx_len = t4_read_reg(adap, A_TP_PMM_RX_PAGE_SIZE);
+       tx_len = t4_read_reg(adap, A_TP_PMM_TX_PAGE_SIZE);
+
+       r = t4_read_reg(adap, A_TP_PARA_REG2);
+       rx_len = min(rx_len, G_MAXRXDATA(r));
+       tx_len = min(tx_len, G_MAXRXDATA(r));
+
+       r = t4_read_reg(adap, A_TP_PARA_REG7);
+       v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
+       rx_len = min(rx_len, v);
+       tx_len = min(tx_len, v);
+
+       tpp->max_tx_pdu = tx_len;
+       tpp->max_rx_pdu = rx_len;
 
        return 0;
 }

Modified: stable/12/sys/dev/cxgbe/t4_main.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_main.c   Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/sys/dev/cxgbe/t4_main.c   Tue Aug 25 02:14:36 2020        
(r364745)
@@ -695,6 +695,7 @@ static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
 #ifdef TCP_OFFLOAD
+static int sysctl_tls(SYSCTL_HANDLER_ARGS);
 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
@@ -6293,8 +6294,8 @@ t4_sysctls(struct adapter *sc)
                    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
 
                sc->tt.tls = 0;
-               SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
-                   &sc->tt.tls, 0, "Inline TLS allowed");
+               SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls", CTLTYPE_INT |
+                   CTLFLAG_RW, sc, 0, sysctl_tls, "I", "Inline TLS allowed");
 
                SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
                    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports,
@@ -9364,6 +9365,37 @@ sysctl_cpus(SYSCTL_HANDLER_ARGS)
 
 #ifdef TCP_OFFLOAD
 static int
+sysctl_tls(SYSCTL_HANDLER_ARGS)
+{
+       struct adapter *sc = arg1;
+       int i, j, v, rc;
+       struct vi_info *vi;
+
+       v = sc->tt.tls;
+       rc = sysctl_handle_int(oidp, &v, 0, req);
+       if (rc != 0 || req->newptr == NULL)
+               return (rc);
+
+       if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
+               return (ENOTSUP);
+
+       rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4stls");
+       if (rc)
+               return (rc);
+       sc->tt.tls = !!v;
+       for_each_port(sc, i) {
+               for_each_vi(sc->port[i], j, vi) {
+                       if (vi->flags & VI_INIT_DONE)
+                               t4_update_fl_bufsize(vi->ifp);
+               }
+       }
+       end_synchronized_op(sc, 0);
+
+       return (0);
+
+}
+
+static int
 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
 {
        struct adapter *sc = arg1;
@@ -10039,8 +10071,6 @@ clear_stats(struct adapter *sc, u_int port_id)
                                rxq->rxcsum = 0;
                                rxq->vlan_extraction = 0;
 
-                               rxq->fl.mbuf_allocated = 0;
-                               rxq->fl.mbuf_inlined = 0;
                                rxq->fl.cl_allocated = 0;
                                rxq->fl.cl_recycled = 0;
                                rxq->fl.cl_fast_recycled = 0;
@@ -10069,8 +10099,6 @@ clear_stats(struct adapter *sc, u_int port_id)
 #endif
 #ifdef TCP_OFFLOAD
                        for_each_ofld_rxq(vi, i, ofld_rxq) {
-                               ofld_rxq->fl.mbuf_allocated = 0;
-                               ofld_rxq->fl.mbuf_inlined = 0;
                                ofld_rxq->fl.cl_allocated = 0;
                                ofld_rxq->fl.cl_recycled = 0;
                                ofld_rxq->fl.cl_fast_recycled = 0;

Modified: stable/12/sys/dev/cxgbe/t4_netmap.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_netmap.c Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/sys/dev/cxgbe/t4_netmap.c Tue Aug 25 02:14:36 2020        
(r364745)
@@ -355,7 +355,7 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi
        struct sge_nm_rxq *nm_rxq;
        struct sge_nm_txq *nm_txq;
        int rc, i, j, hwidx, defq, nrssq;
-       struct hw_buf_info *hwb;
+       struct rx_buf_info *rxb;
 
        ASSERT_SYNCHRONIZED_OP(sc);
 
@@ -363,17 +363,22 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi
            (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
                return (EAGAIN);
 
-       hwb = &sc->sge.hw_buf_info[0];
-       for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
-               if (hwb->size == NETMAP_BUF_SIZE(na))
+       rxb = &sc->sge.rx_buf_info[0];
+       for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
+               if (rxb->size1 == NETMAP_BUF_SIZE(na)) {
+                       hwidx = rxb->hwidx1;
                        break;
+               }
+               if (rxb->size2 == NETMAP_BUF_SIZE(na)) {
+                       hwidx = rxb->hwidx2;
+                       break;
+               }
        }
-       if (i >= SGE_FLBUF_SIZES) {
+       if (i >= SW_ZONE_SIZES) {
                if_printf(ifp, "no hwidx for netmap buffer size %d.\n",
                    NETMAP_BUF_SIZE(na));
                return (ENXIO);
        }
-       hwidx = i;
 
        /* Must set caps before calling netmap_reset */
        nm_set_native_flags(na);

Modified: stable/12/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_sge.c    Tue Aug 25 00:58:14 2020        
(r364744)
+++ stable/12/sys/dev/cxgbe/t4_sge.c    Tue Aug 25 02:14:36 2020        
(r364745)
@@ -143,16 +143,6 @@ SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN
     "payload pack boundary (bytes)");
 
 /*
- * Allow the driver to create mbuf(s) in a cluster allocated for rx.
- * 0: never; always allocate mbufs from the zone_mbuf UMA zone.
- * 1: ok to create mbuf(s) within a cluster if there is room.
- */
-static int allow_mbufs_in_cluster = 1;
-SYSCTL_INT(_hw_cxgbe, OID_AUTO, allow_mbufs_in_cluster, CTLFLAG_RDTUN,
-    &allow_mbufs_in_cluster, 0,
-    "Allow driver to create mbufs within a rx cluster");
-
-/*
  * Largest rx cluster size that the driver is allowed to allocate.
  */
 static int largest_rx_cluster = MJUM16BYTES;
@@ -224,7 +214,8 @@ struct sgl {
 static int service_iq(struct sge_iq *, int);
 static int service_iq_fl(struct sge_iq *, int);
 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, 
uint32_t);
-static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf 
*);
+static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *,
+    u_int);
 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int);
 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char 
*);
 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, 
uint8_t,
@@ -279,8 +270,7 @@ static int refill_fl(struct adapter *, struct sge_fl *
 static void refill_sfl(void *);
 static int alloc_fl_sdesc(struct sge_fl *);
 static void free_fl_sdesc(struct adapter *, struct sge_fl *);
-static void find_best_refill_source(struct adapter *, struct sge_fl *, int);
-static void find_safe_refill_source(struct adapter *, struct sge_fl *);
+static int find_refill_source(struct adapter *, int, bool);
 static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
 
 static inline void get_pkt_gl(struct mbuf *, struct sglist *);
@@ -556,7 +546,6 @@ t4_sge_modload(void)
        t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg);
        t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg);
        t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
-       t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx);
 #ifdef RATELIMIT
        t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack,
            CPL_COOKIE_ETHOFLD);
@@ -665,24 +654,19 @@ setup_pad_and_pack_boundaries(struct adapter *sc)
 void
 t4_tweak_chip_settings(struct adapter *sc)
 {
-       int i;
+       int i, reg;
        uint32_t v, m;
        int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
        int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
        int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
        uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
-       static int sge_flbuf_sizes[] = {
+       static int sw_buf_sizes[] = {
                MCLBYTES,
 #if MJUMPAGESIZE != MCLBYTES
                MJUMPAGESIZE,
-               MJUMPAGESIZE - CL_METADATA_SIZE,
-               MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE,
 #endif
                MJUM9BYTES,
-               MJUM16BYTES,
-               MCLBYTES - MSIZE - CL_METADATA_SIZE,
-               MJUM9BYTES - CL_METADATA_SIZE,
-               MJUM16BYTES - CL_METADATA_SIZE,
+               MJUM16BYTES
        };
 
        KASSERT(sc->flags & MASTER_PF,
@@ -705,13 +689,16 @@ t4_tweak_chip_settings(struct adapter *sc)
            V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
        t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
 
-       KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES,
-           ("%s: hw buffer size table too big", __func__));
        t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096);
        t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536);
-       for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) {
-               t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE15 - (4 * i),
-                   sge_flbuf_sizes[i]);
+       reg = A_SGE_FL_BUFFER_SIZE2;
+       for (i = 0; i < nitems(sw_buf_sizes); i++) {
+               MPASS(reg <= A_SGE_FL_BUFFER_SIZE15);
+               t4_write_reg(sc, reg, sw_buf_sizes[i]);
+               reg += 4;
+               MPASS(reg <= A_SGE_FL_BUFFER_SIZE15);
+               t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE);
+               reg += 4;
        }
 
        v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
@@ -788,11 +775,11 @@ t4_tweak_chip_settings(struct adapter *sc)
 }
 
 /*
- * SGE wants the buffer to be at least 64B and then a multiple of 16.  If
- * padding is in use, the buffer's start and end need to be aligned to the pad
- * boundary as well.  We'll just make sure that the size is a multiple of the
- * boundary here, it is up to the buffer allocation code to make sure the start
- * of the buffer is aligned as well.
+ * SGE wants the buffer to be at least 64B and then a multiple of 16.  Its
+ * address mut be 16B aligned.  If padding is in use the buffer's start and end
+ * need to be aligned to the pad boundary as well.  We'll just make sure that
+ * the size is a multiple of the pad boundary here, it is up to the buffer
+ * allocation code to make sure the start of the buffer is aligned.
  */
 static inline int
 hwsz_ok(struct adapter *sc, int hwsz)
@@ -821,8 +808,7 @@ t4_read_chip_settings(struct adapter *sc)
                MJUM9BYTES,
                MJUM16BYTES
        };
-       struct sw_zone_info *swz, *safe_swz;
-       struct hw_buf_info *hwb;
+       struct rx_buf_info *rxb;
 
        m = F_RXPKTCPLMODE;
        v = F_RXPKTCPLMODE;
@@ -841,114 +827,51 @@ t4_read_chip_settings(struct adapter *sc)
                rc = EINVAL;
        }
 
-       /* Filter out unusable hw buffer sizes entirely (mark with -2). */
-       hwb = &s->hw_buf_info[0];
-       for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
-               r = sc->params.sge.sge_fl_buffer_size[i];
-               hwb->size = r;
-               hwb->zidx = hwsz_ok(sc, r) ? -1 : -2;
-               hwb->next = -1;
-       }
+       s->safe_zidx = -1;
+       rxb = &s->rx_buf_info[0];
+       for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
+               rxb->size1 = sw_buf_sizes[i];
+               rxb->zone = m_getzone(rxb->size1);
+               rxb->type = m_gettype(rxb->size1);
+               rxb->size2 = 0;
+               rxb->hwidx1 = -1;
+               rxb->hwidx2 = -1;
+               for (j = 0; j < SGE_FLBUF_SIZES; j++) {
+                       int hwsize = sp->sge_fl_buffer_size[j];
 
-       /*
-        * Create a sorted list in decreasing order of hw buffer sizes (and so
-        * increasing order of spare area) for each software zone.
-        *
-        * If padding is enabled then the start and end of the buffer must align
-        * to the pad boundary; if packing is enabled then they must align with
-        * the pack boundary as well.  Allocations from the cluster zones are
-        * aligned to min(size, 4K), so the buffer starts at that alignment and
-        * ends at hwb->size alignment.  If mbuf inlining is allowed the
-        * starting alignment will be reduced to MSIZE and the driver will
-        * exercise appropriate caution when deciding on the best buffer layout
-        * to use.
-        */
-       n = 0;  /* no usable buffer size to begin with */
-       swz = &s->sw_zone_info[0];
-       safe_swz = NULL;
-       for (i = 0; i < SW_ZONE_SIZES; i++, swz++) {
-               int8_t head = -1, tail = -1;
-
-               swz->size = sw_buf_sizes[i];
-               swz->zone = m_getzone(swz->size);
-               swz->type = m_gettype(swz->size);
-
-               if (swz->size < PAGE_SIZE) {
-                       MPASS(powerof2(swz->size));
-                       if (fl_pad && (swz->size % sp->pad_boundary != 0))
+                       if (!hwsz_ok(sc, hwsize))
                                continue;
-               }
 
-               if (swz->size == safest_rx_cluster)
-                       safe_swz = swz;
+                       /* hwidx for size1 */
+                       if (rxb->hwidx1 == -1 && rxb->size1 == hwsize)
+                               rxb->hwidx1 = j;
 
-               hwb = &s->hw_buf_info[0];
-               for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
-                       if (hwb->zidx != -1 || hwb->size > swz->size)
+                       /* hwidx for size2 (buffer packing) */
+                       if (rxb->size1 - CL_METADATA_SIZE < hwsize)
                                continue;
-#ifdef INVARIANTS
-                       if (fl_pad)
-                               MPASS(hwb->size % sp->pad_boundary == 0);
-#endif
-                       hwb->zidx = i;
-                       if (head == -1)
-                               head = tail = j;
-                       else if (hwb->size < s->hw_buf_info[tail].size) {
-                               s->hw_buf_info[tail].next = j;
-                               tail = j;
-                       } else {
-                               int8_t *cur;
-                               struct hw_buf_info *t;
-
-                               for (cur = &head; *cur != -1; cur = &t->next) {
-                                       t = &s->hw_buf_info[*cur];
-                                       if (hwb->size == t->size) {
-                                               hwb->zidx = -2;
-                                               break;
-                                       }
-                                       if (hwb->size > t->size) {
-                                               hwb->next = *cur;
-                                               *cur = j;
-                                               break;
-                                       }
+                       n = rxb->size1 - hwsize - CL_METADATA_SIZE;
+                       if (n == 0) {
+                               rxb->hwidx2 = j;
+                               rxb->size2 = hwsize;
+                               break;  /* stop looking */
+                       }
+                       if (rxb->hwidx2 != -1) {
+                               if (n < sp->sge_fl_buffer_size[rxb->hwidx2] -
+                                   hwsize - CL_METADATA_SIZE) {
+                                       rxb->hwidx2 = j;
+                                       rxb->size2 = hwsize;
                                }
+                       } else if (n <= 2 * CL_METADATA_SIZE) {
+                               rxb->hwidx2 = j;
+                               rxb->size2 = hwsize;
                        }
                }
-               swz->head_hwidx = head;
-               swz->tail_hwidx = tail;
-
-               if (tail != -1) {
-                       n++;
-                       if (swz->size - s->hw_buf_info[tail].size >=
-                           CL_METADATA_SIZE)
-                               sc->flags |= BUF_PACKING_OK;
-               }
+               if (rxb->hwidx2 != -1)
+                       sc->flags |= BUF_PACKING_OK;
+               if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster)
+                       s->safe_zidx = i;
        }
-       if (n == 0) {
-               device_printf(sc->dev, "no usable SGE FL buffer size.\n");
-               rc = EINVAL;
-       }
 
-       s->safe_hwidx1 = -1;
-       s->safe_hwidx2 = -1;
-       if (safe_swz != NULL) {
-               s->safe_hwidx1 = safe_swz->head_hwidx;
-               for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) {
-                       int spare;
-
-                       hwb = &s->hw_buf_info[i];
-#ifdef INVARIANTS
-                       if (fl_pad)
-                               MPASS(hwb->size % sp->pad_boundary == 0);
-#endif
-                       spare = safe_swz->size - hwb->size;
-                       if (spare >= CL_METADATA_SIZE) {
-                               s->safe_hwidx2 = i;
-                               break;
-                       }
-               }
-       }
-
        if (sc->flags & IS_VF)
                return (0);
 
@@ -1007,7 +930,7 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_l
        struct sge_params *sp = &sc->params.sge;
 
        SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
-           CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A",
+           CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_bufsizes, "A",
            "freelist buffer sizes");
 
        SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
@@ -1115,28 +1038,19 @@ t4_teardown_adapter_queues(struct adapter *sc)
        return (0);
 }
 
-/* Maximum payload that can be delivered with a single iq descriptor */
+/* Maximum payload that could arrive with a single iq descriptor. */
 static inline int
-mtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
+max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld)
 {
-       int payload;
+       int maxp;
 
-#ifdef TCP_OFFLOAD
-       if (toe) {
-               int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
-
-               /* Note that COP can set rx_coalesce on/off per connection. */
-               payload = max(mtu, rxcs);
-       } else {
-#endif
-               /* large enough even when hw VLAN extraction is disabled */
-               payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN +
-                   ETHER_VLAN_ENCAP_LEN + mtu;
-#ifdef TCP_OFFLOAD
-       }
-#endif
-
-       return (payload);
+       /* large enough even when hw VLAN extraction is disabled */
+       maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN +
+           ETHER_VLAN_ENCAP_LEN + ifp->if_mtu;
+       if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
+           maxp < sc->params.tp.max_rx_pdu)
+               maxp = sc->params.tp.max_rx_pdu;
+       return (maxp);
 }
 
 int
@@ -1162,7 +1076,7 @@ t4_setup_vi_queues(struct vi_info *vi)
        struct ifnet *ifp = vi->ifp;
        struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev);
        struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
-       int maxp, mtu = ifp->if_mtu;
+       int maxp;
 
        /* Interrupt vector to start from (when using multiple vectors) */
        intr_idx = vi->first_intr;
@@ -1206,7 +1120,7 @@ t4_setup_vi_queues(struct vi_info *vi)
         * Allocate rx queues first because a default iqid is required when
         * creating a tx queue.
         */
-       maxp = mtu_to_max_payload(sc, mtu, 0);
+       maxp = max_rx_payload(sc, ifp, false);
        oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq",
            CTLFLAG_RD, NULL, "rx queues");
        for_each_rxq(vi, i, rxq) {
@@ -1228,7 +1142,7 @@ t4_setup_vi_queues(struct vi_info *vi)
                intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq);
 #endif
 #ifdef TCP_OFFLOAD
-       maxp = mtu_to_max_payload(sc, mtu, 1);
+       maxp = max_rx_payload(sc, ifp, true);
        oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq",
            CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections");
        for_each_ofld_rxq(vi, i, ofld_rxq) {
@@ -1618,6 +1532,20 @@ last_flit_to_ns(struct adapter *sc, uint64_t lf)
                return (n * 1000000 / sc->params.vpd.cclk);
 }
 
+static inline void
+move_to_next_rxbuf(struct sge_fl *fl)
+{
+
+       fl->rx_offset = 0;
+       if (__predict_false((++fl->cidx & 7) == 0)) {
+               uint16_t cidx = fl->cidx >> 3;
+
+               if (__predict_false(cidx == fl->sidx))
+                       fl->cidx = cidx = 0;
+               fl->hw_cidx = cidx;
+       }
+}
+
 /*
  * Deals with interrupts on an iq+fl queue.
  */
@@ -1628,8 +1556,8 @@ service_iq_fl(struct sge_iq *iq, int budget)
        struct sge_fl *fl;
        struct adapter *sc = iq->adapter;
        struct iq_desc *d = &iq->desc[iq->cidx];
-       int ndescs = 0, limit;
-       int rsp_type, refill, starved;
+       int ndescs, limit;
+       int rsp_type, starved;
        uint32_t lq;
        uint16_t fl_hw_cidx;
        struct mbuf *m0;
@@ -1641,10 +1569,7 @@ service_iq_fl(struct sge_iq *iq, int budget)
        KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
        MPASS(iq->flags & IQ_HAS_FL);
 
-       limit = budget ? budget : iq->qsize / 16;
-       fl = &rxq->fl;
-       fl_hw_cidx = fl->hw_cidx;       /* stable snapshot */
-
+       ndescs = 0;
 #if defined(INET) || defined(INET6)
        if (iq->flags & IQ_ADJ_CREDIT) {
                MPASS(sort_before_lro(lro));
@@ -1662,38 +1587,40 @@ service_iq_fl(struct sge_iq *iq, int budget)
        MPASS((iq->flags & IQ_ADJ_CREDIT) == 0);
 #endif
 
+       limit = budget ? budget : iq->qsize / 16;
+       fl = &rxq->fl;
+       fl_hw_cidx = fl->hw_cidx;       /* stable snapshot */
        while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) {
 
                rmb();
 
-               refill = 0;
                m0 = NULL;
                rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen);
                lq = be32toh(d->rsp.pldbuflen_qid);
 
                switch (rsp_type) {
                case X_RSPD_TYPE_FLBUF:
+                       if (lq & F_RSPD_NEWBUF) {
+                               if (fl->rx_offset > 0)
+                                       move_to_next_rxbuf(fl);
+                               lq = G_RSPD_LEN(lq);
+                       }
+                       if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) {
+                               FL_LOCK(fl);
+                               refill_fl(sc, fl, 64);
+                               FL_UNLOCK(fl);
+                               fl_hw_cidx = fl->hw_cidx;
+                       }
 
+                       if (d->rss.opcode == CPL_RX_PKT) {
+                               if (__predict_true(eth_rx(sc, rxq, d, lq) == 0))
+                                       break;
+                               goto out;
+                       }
                        m0 = get_fl_payload(sc, fl, lq);
                        if (__predict_false(m0 == NULL))
                                goto out;
-                       refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2;
 
-                       if (iq->flags & IQ_RX_TIMESTAMP) {
-                               /*
-                                * Fill up rcv_tstmp but do not set M_TSTMP.
-                                * rcv_tstmp is not in the format that the
-                                * kernel expects and we don't want to mislead
-                                * it.  For now this is only for custom code
-                                * that knows how to interpret cxgbe's stamp.
-                                */
-                               m0->m_pkthdr.rcv_tstmp =
-                                   last_flit_to_ns(sc, d->rsp.u.last_flit);
-#ifdef notyet
-                               m0->m_flags |= M_TSTMP;
-#endif
-                       }
-
                        /* fall through */
 
                case X_RSPD_TYPE_CPL:
@@ -1737,7 +1664,6 @@ service_iq_fl(struct sge_iq *iq, int budget)
                        t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) |
                            V_INGRESSQID(iq->cntxt_id) |
                            
V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
-                       ndescs = 0;
 
 #if defined(INET) || defined(INET6)
                        if (iq->flags & IQ_LRO_ENABLED &&
@@ -1746,20 +1672,10 @@ service_iq_fl(struct sge_iq *iq, int budget)
                                tcp_lro_flush_inactive(lro, &lro_timeout);
                        }
 #endif
-                       if (budget) {
-                               FL_LOCK(fl);
-                               refill_fl(sc, fl, 32);
-                               FL_UNLOCK(fl);
-
+                       if (budget)
                                return (EINPROGRESS);
-                       }
+                       ndescs = 0;
                }
-               if (refill) {
-                       FL_LOCK(fl);
-                       refill_fl(sc, fl, 32);
-                       FL_UNLOCK(fl);
-                       fl_hw_cidx = fl->hw_cidx;
-               }
        }
 out:
 #if defined(INET) || defined(INET6)
@@ -1787,49 +1703,28 @@ out:
        return (0);
 }
 
-static inline int
-cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll)
-{
-       int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0;
-
-       if (rc)
-               MPASS(cll->region3 >= CL_METADATA_SIZE);
-
-       return (rc);
-}
-
 static inline struct cluster_metadata *
-cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll,
-    caddr_t cl)
+cl_metadata(struct fl_sdesc *sd)
 {
 
-       if (cl_has_metadata(fl, cll)) {
-               struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
-
-               return ((struct cluster_metadata *)(cl + swz->size) - 1);
-       }
-       return (NULL);
+       return ((void *)(sd->cl + sd->moff));
 }
 
 static void
 rxb_free(struct mbuf *m)
 {
-       uma_zone_t zone = m->m_ext.ext_arg1;
-       void *cl = m->m_ext.ext_arg2;
+       struct cluster_metadata *clm = m->m_ext.ext_arg1;
 
-       uma_zfree(zone, cl);
+       uma_zfree(clm->zone, clm->cl);
        counter_u64_add(extfree_rels, 1);
 }
 
 /*
- * The mbuf returned by this function could be allocated from zone_mbuf or
- * constructed in spare room in the cluster.
- *
- * The mbuf carries the payload in one of these ways
- * a) frame inside the mbuf (mbuf from zone_mbuf)
- * b) m_cljset (for clusters without metadata) zone_mbuf
- * c) m_extaddref (cluster with metadata) inline mbuf
- * d) m_extaddref (cluster with metadata) zone_mbuf
+ * The mbuf returned comes from zone_muf and carries the payload in one of 
these
+ * ways
+ * a) complete frame inside the mbuf
+ * b) m_cljset (for clusters without metadata)
+ * d) m_extaddref (cluster with metadata)
  */
 static struct mbuf *
 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset,
@@ -1837,118 +1732,86 @@ get_scatter_segment(struct adapter *sc, struct sge_fl 
 {
        struct mbuf *m;
        struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
-       struct cluster_layout *cll = &sd->cll;
-       struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
-       struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx];
-       struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl);
+       struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx];
+       struct cluster_metadata *clm;
        int len, blen;
        caddr_t payload;
 
-       blen = hwb->size - fl->rx_offset;       /* max possible in this buf */
-       len = min(remaining, blen);
-       payload = sd->cl + cll->region1 + fl->rx_offset;
        if (fl->flags & FL_BUF_PACKING) {
-               const u_int l = fr_offset + len;
-               const u_int pad = roundup2(l, fl->buf_boundary) - l;
+               u_int l, pad;
 
-               if (fl->rx_offset + len + pad < hwb->size)
+               blen = rxb->size2 - fl->rx_offset;      /* max possible in this 
buf */
+               len = min(remaining, blen);
+               payload = sd->cl + fl->rx_offset;
+
+               l = fr_offset + len;
+               pad = roundup2(l, fl->buf_boundary) - l;
+               if (fl->rx_offset + len + pad < rxb->size2)
                        blen = len + pad;
-               MPASS(fl->rx_offset + blen <= hwb->size);
+               MPASS(fl->rx_offset + blen <= rxb->size2);
        } else {
                MPASS(fl->rx_offset == 0);      /* not packing */
+               blen = rxb->size1;
+               len = min(remaining, blen);
+               payload = sd->cl;
        }
 
-
-       if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
-
-               /*
-                * Copy payload into a freshly allocated mbuf.
-                */
-
-               m = fr_offset == 0 ?
-                   m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
-               if (m == NULL)
+       if (fr_offset == 0) {
+               m = m_gethdr(M_NOWAIT, MT_DATA);
+               if (__predict_false(m == NULL))
                        return (NULL);
-               fl->mbuf_allocated++;
+               m->m_pkthdr.len = remaining;
+       } else {
+               m = m_get(M_NOWAIT, MT_DATA);
+               if (__predict_false(m == NULL))
+                       return (NULL);
+       }
+       m->m_len = len;
 
+       if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
                /* copy data to mbuf */
                bcopy(payload, mtod(m, caddr_t), len);
-
-       } else if (sd->nmbuf * MSIZE < cll->region1) {
-
-               /*
-                * There's spare room in the cluster for an mbuf.  Create one
-                * and associate it with the payload that's in the cluster.
-                */
-
-               MPASS(clm != NULL);
-               m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE);
-               /* No bzero required */
-               if (m_init(m, M_NOWAIT, MT_DATA,
-                   fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE))
-                       return (NULL);
-               fl->mbuf_inlined++;
-               m_extaddref(m, payload, blen, &clm->refcount, rxb_free,
-                   swz->zone, sd->cl);
-               if (sd->nmbuf++ == 0)
+               if (fl->flags & FL_BUF_PACKING) {
+                       fl->rx_offset += blen;
+                       MPASS(fl->rx_offset <= rxb->size2);
+                       if (fl->rx_offset < rxb->size2)
+                               return (m);     /* without advancing the cidx */
+               }
+       } else if (fl->flags & FL_BUF_PACKING) {
+               clm = cl_metadata(sd);
+               if (sd->nmbuf++ == 0) {
+                       clm->refcount = 1;
+                       clm->zone = rxb->zone;
+                       clm->cl = sd->cl;
                        counter_u64_add(extfree_refs, 1);
-
-       } else {
-
-               /*
-                * Grab an mbuf from zone_mbuf and associate it with the
-                * payload in the cluster.
-                */
-
-               m = fr_offset == 0 ?
-                   m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
-               if (m == NULL)
-                       return (NULL);
-               fl->mbuf_allocated++;
-               if (clm != NULL) {
-                       m_extaddref(m, payload, blen, &clm->refcount,
-                           rxb_free, swz->zone, sd->cl);
-                       if (sd->nmbuf++ == 0)
-                               counter_u64_add(extfree_refs, 1);
-               } else {
-                       m_cljset(m, sd->cl, swz->type);
-                       sd->cl = NULL;  /* consumed, not a recycle candidate */
                }
-       }
-       if (fr_offset == 0)
-               m->m_pkthdr.len = remaining;
-       m->m_len = len;
+               m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm,
+                   NULL);
 
-       if (fl->flags & FL_BUF_PACKING) {
                fl->rx_offset += blen;
-               MPASS(fl->rx_offset <= hwb->size);
-               if (fl->rx_offset < hwb->size)
+               MPASS(fl->rx_offset <= rxb->size2);
+               if (fl->rx_offset < rxb->size2)
                        return (m);     /* without advancing the cidx */
+       } else {
+               m_cljset(m, sd->cl, rxb->type);
+               sd->cl = NULL;  /* consumed, not a recycle candidate */
        }
 
-       if (__predict_false(++fl->cidx % 8 == 0)) {
-               uint16_t cidx = fl->cidx / 8;
+       move_to_next_rxbuf(fl);
 
-               if (__predict_false(cidx == fl->sidx))
-                       fl->cidx = cidx = 0;
-               fl->hw_cidx = cidx;
-       }
-       fl->rx_offset = 0;
-
        return (m);
 }
 
 static struct mbuf *
-get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf)
+get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen)
 {
        struct mbuf *m0, *m, **pnext;
        u_int remaining;
-       const u_int total = G_RSPD_LEN(len_newbuf);
 
        if (__predict_false(fl->flags & FL_BUF_RESUME)) {
                M_ASSERTPKTHDR(fl->m0);
-               MPASS(fl->m0->m_pkthdr.len == total);
-               MPASS(fl->remaining < total);
+               MPASS(fl->m0->m_pkthdr.len == plen);
+               MPASS(fl->remaining < plen);
 
                m0 = fl->m0;
                pnext = fl->pnext;
@@ -1957,31 +1820,20 @@ get_fl_payload(struct adapter *sc, struct sge_fl *fl, 
                goto get_segment;
        }
 
-       if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
-               fl->rx_offset = 0;
-               if (__predict_false(++fl->cidx % 8 == 0)) {
-                       uint16_t cidx = fl->cidx / 8;
-
-                       if (__predict_false(cidx == fl->sidx))
-                               fl->cidx = cidx = 0;
-                       fl->hw_cidx = cidx;
-               }
-       }
-
        /*
         * Payload starts at rx_offset in the current hw buffer.  Its length is
         * 'len' and it may span multiple hw buffers.

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to