Reduce host CPU overhead of Tx packet processing:
* Use local variables inside per packet loop instead of fields in structs.
* Factor book keeping and conditionals out of the per packet loop where
  possible.
* Post buffers to the nic at a maximum of every 64 packets

Signed-off-by: Nelson Escobar <neescoba at cisco.com>
Signed-off-by: John Daley <johndale at cisco.com>
---
 drivers/net/enic/base/vnic_wq.h |   1 +
 drivers/net/enic/enic_res.h     |   2 +-
 drivers/net/enic/enic_rxtx.c    | 167 +++++++++++++++++++---------------------
 3 files changed, 83 insertions(+), 87 deletions(-)

diff --git a/drivers/net/enic/base/vnic_wq.h b/drivers/net/enic/base/vnic_wq.h
index 689b81c..7a66813 100644
--- a/drivers/net/enic/base/vnic_wq.h
+++ b/drivers/net/enic/base/vnic_wq.h
@@ -67,6 +67,7 @@ struct vnic_wq_ctrl {

 /* 16 bytes */
 struct vnic_wq_buf {
+       struct rte_mempool *pool;
        void *mb;
 };

diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h
index 955db71..3c8e303 100644
--- a/drivers/net/enic/enic_res.h
+++ b/drivers/net/enic/enic_res.h
@@ -53,7 +53,7 @@

 #define ENIC_NON_TSO_MAX_DESC          16
 #define ENIC_DEFAULT_RX_FREE_THRESH    32
-#define ENIC_TX_POST_THRESH            (ENIC_MIN_WQ_DESCS / 2)
+#define ENIC_TX_XMIT_MAX               64

 #define ENIC_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0)

diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index ec8d90a..ba15670 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -374,114 +374,109 @@ unsigned int enic_cleanup_wq(__rte_unused struct enic 
*enic, struct vnic_wq *wq)
        return 0;
 }

-void enic_post_wq_index(struct vnic_wq *wq)
-{
-       enic_vnic_post_wq_index(wq);
-}
-
-void enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
-                  struct rte_mbuf *tx_pkt, unsigned short len,
-                  uint8_t sop, uint8_t eop, uint8_t cq_entry,
-                  uint16_t ol_flags, uint16_t vlan_tag)
-{
-       struct wq_enet_desc *desc, *descs;
-       uint16_t mss = 0;
-       uint8_t vlan_tag_insert = 0;
-       uint64_t bus_addr = (dma_addr_t)
-           (tx_pkt->buf_physaddr + tx_pkt->data_off);
-
-       descs = (struct wq_enet_desc *)wq->ring.descs;
-       desc = descs + wq->head_idx;
-
-       if (sop) {
-               if (ol_flags & PKT_TX_VLAN_PKT)
-                       vlan_tag_insert = 1;
-
-               if (enic->hw_ip_checksum) {
-                       if (ol_flags & PKT_TX_IP_CKSUM)
-                               mss |= ENIC_CALC_IP_CKSUM;
-
-                       if (ol_flags & PKT_TX_TCP_UDP_CKSUM)
-                               mss |= ENIC_CALC_TCP_UDP_CKSUM;
-               }
-       }
-
-       wq_enet_desc_enc(desc,
-               bus_addr,
-               len,
-               mss,
-               0 /* header_length */,
-               0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */,
-               eop,
-               cq_entry,
-               0 /* fcoe_encap */,
-               vlan_tag_insert,
-               vlan_tag,
-               0 /* loopback */);
-
-       enic_vnic_post_wq(wq, (void *)tx_pkt, cq_entry);
-}
-
 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint16_t nb_pkts)
 {
        uint16_t index;
-       unsigned int frags;
-       unsigned int pkt_len;
-       unsigned int seg_len;
-       unsigned int inc_len;
+       unsigned int pkt_len, data_len;
        unsigned int nb_segs;
-       struct rte_mbuf *tx_pkt, *next_tx_pkt;
+       struct rte_mbuf *tx_pkt;
        struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
        struct enic *enic = vnic_dev_priv(wq->vdev);
        unsigned short vlan_id;
        unsigned short ol_flags;
-       uint8_t last_seg, eop;
-       unsigned int host_tx_descs = 0;
+       unsigned int wq_desc_avail;
+       int head_idx;
+       struct vnic_wq_buf *buf;
+       unsigned int hw_ip_cksum_enabled;
+       unsigned int desc_count;
+       struct wq_enet_desc *descs, *desc_p, desc_tmp;
+       uint16_t mss;
+       uint8_t vlan_tag_insert;
+       uint8_t eop;
+       uint64_t bus_addr;

+       enic_cleanup_wq(enic, wq);
+       wq_desc_avail = vnic_wq_desc_avail(wq);
+       head_idx = wq->head_idx;
+       desc_count = wq->ring.desc_count;
+
+       nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
+
+       hw_ip_cksum_enabled = enic->hw_ip_checksum;
        for (index = 0; index < nb_pkts; index++) {
                tx_pkt = *tx_pkts++;
-               inc_len = 0;
                nb_segs = tx_pkt->nb_segs;
-               if (nb_segs > vnic_wq_desc_avail(wq)) {
+               if (nb_segs > wq_desc_avail) {
                        if (index > 0)
-                               enic_post_wq_index(wq);
-
-                       /* wq cleanup and try again */
-                       if (!enic_cleanup_wq(enic, wq) ||
-                               (nb_segs > vnic_wq_desc_avail(wq))) {
-                               return index;
-                       }
+                               goto post;
+                       goto done;
                }

                pkt_len = tx_pkt->pkt_len;
+               data_len = tx_pkt->data_len;
                vlan_id = tx_pkt->vlan_tci;
                ol_flags = tx_pkt->ol_flags;
-               for (frags = 0; inc_len < pkt_len; frags++) {
-                       if (!tx_pkt)
-                               break;
-                       next_tx_pkt = tx_pkt->next;
-                       seg_len = tx_pkt->data_len;
-                       inc_len += seg_len;
-
-                       host_tx_descs++;
-                       last_seg = 0;
-                       eop = 0;
-                       if ((pkt_len == inc_len) || !next_tx_pkt) {
-                               eop = 1;
-                               /* post if last packet in batch or > thresh */
-                               if ((index == (nb_pkts - 1)) ||
-                                  (host_tx_descs > ENIC_TX_POST_THRESH)) {
-                                       last_seg = 1;
-                                       host_tx_descs = 0;
-                               }
+
+               mss = 0;
+               vlan_tag_insert = 0;
+               bus_addr = (dma_addr_t)
+                          (tx_pkt->buf_physaddr + tx_pkt->data_off);
+
+               descs = (struct wq_enet_desc *)wq->ring.descs;
+               desc_p = descs + head_idx;
+
+               eop = (data_len == pkt_len);
+
+               if (ol_flags & PKT_TX_VLAN_PKT)
+                       vlan_tag_insert = 1;
+
+               if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_IP_CKSUM))
+                       mss |= ENIC_CALC_IP_CKSUM;
+
+               if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_TCP_UDP_CKSUM))
+                       mss |= ENIC_CALC_TCP_UDP_CKSUM;
+
+               wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop,
+                                eop, 0, vlan_tag_insert, vlan_id, 0);
+
+               *desc_p = desc_tmp;
+               buf = &wq->bufs[head_idx];
+               buf->mb = (void *)tx_pkt;
+               head_idx = enic_ring_incr(desc_count, head_idx);
+               wq_desc_avail--;
+
+               if (!eop) {
+                       for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
+                           tx_pkt->next) {
+                               data_len = tx_pkt->data_len;
+
+                               if (tx_pkt->next == NULL)
+                                       eop = 1;
+                               desc_p = descs + head_idx;
+                               bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr
+                                          + tx_pkt->data_off);
+                               wq_enet_desc_enc((struct wq_enet_desc *)
+                                                &desc_tmp, bus_addr, data_len,
+                                                mss, 0, 0, eop, eop, 0,
+                                                vlan_tag_insert, vlan_id, 0);
+
+                               *desc_p = desc_tmp;
+                               buf = &wq->bufs[head_idx];
+                               buf->mb = (void *)tx_pkt;
+                               head_idx = enic_ring_incr(desc_count, head_idx);
+                               wq_desc_avail--;
                        }
-                       enic_send_pkt(enic, wq, tx_pkt, (unsigned short)seg_len,
-                                     !frags, eop, last_seg, ol_flags, vlan_id);
-                       tx_pkt = next_tx_pkt;
                }
        }
+ post:
+       rte_wmb();
+       iowrite32(head_idx, &wq->ctrl->posted_index);
+ done:
+       wq->ring.desc_avail = wq_desc_avail;
+       wq->head_idx = head_idx;

-       enic_cleanup_wq(enic, wq);
        return index;
 }
+
+
-- 
2.7.0

Reply via email to