From: Jason Xing <[email protected]>

Like what i40e driver initially did in commit 3106c580fb7cf
("i40e: Use batched xsk Tx interfaces to increase performance"), use
the batched xsk feature to transmit packets.

Signed-off-by: Jason Xing <[email protected]>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 106 +++++++++++++------
 1 file changed, 72 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index f3d3f5c1cdc7..9fe2c4bf8bc5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -2,12 +2,15 @@
 /* Copyright(c) 2018 Intel Corporation. */
 
 #include <linux/bpf_trace.h>
+#include <linux/unroll.h>
 #include <net/xdp_sock_drv.h>
 #include <net/xdp.h>
 
 #include "ixgbe.h"
 #include "ixgbe_txrx_common.h"
 
+#define PKTS_PER_BATCH 4
+
 struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
                                     struct ixgbe_ring *ring)
 {
@@ -388,58 +391,93 @@ void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
        }
 }
 
-static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
+static void ixgbe_set_rs_bit(struct ixgbe_ring *xdp_ring)
+{
+       u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : 
xdp_ring->count - 1;
+       union ixgbe_adv_tx_desc *tx_desc;
+
+       tx_desc = IXGBE_TX_DESC(xdp_ring, ntu);
+       tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD_RS);
+}
+
+static void ixgbe_xmit_pkt(struct ixgbe_ring *xdp_ring, struct xdp_desc *desc,
+                          int i)
+
 {
        struct xsk_buff_pool *pool = xdp_ring->xsk_pool;
        union ixgbe_adv_tx_desc *tx_desc = NULL;
        struct ixgbe_tx_buffer *tx_bi;
-       struct xdp_desc desc;
        dma_addr_t dma;
        u32 cmd_type;
 
-       if (!budget)
-               return true;
+       dma = xsk_buff_raw_get_dma(pool, desc[i].addr);
+       xsk_buff_raw_dma_sync_for_device(pool, dma, desc[i].len);
 
-       while (likely(budget)) {
-               if (!netif_carrier_ok(xdp_ring->netdev))
-                       break;
+       tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
+       tx_bi->bytecount = desc[i].len;
+       tx_bi->xdpf = NULL;
+       tx_bi->gso_segs = 1;
 
-               if (!xsk_tx_peek_desc(pool, &desc))
-                       break;
+       tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+       tx_desc->read.buffer_addr = cpu_to_le64(dma);
 
-               dma = xsk_buff_raw_get_dma(pool, desc.addr);
-               xsk_buff_raw_dma_sync_for_device(pool, dma, desc.len);
+       cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+                  IXGBE_ADVTXD_DCMD_DEXT |
+                  IXGBE_ADVTXD_DCMD_IFCS;
+       cmd_type |= desc[i].len | IXGBE_TXD_CMD_EOP;
+       tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+       tx_desc->read.olinfo_status =
+               cpu_to_le32(desc[i].len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 
-               tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
-               tx_bi->bytecount = desc.len;
-               tx_bi->xdpf = NULL;
-               tx_bi->gso_segs = 1;
+       xdp_ring->next_to_use++;
+}
 
-               tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
-               tx_desc->read.buffer_addr = cpu_to_le64(dma);
+static void ixgbe_xmit_pkt_batch(struct ixgbe_ring *xdp_ring, struct xdp_desc 
*desc)
+{
+       u32 i;
 
-               /* put descriptor type bits */
-               cmd_type = IXGBE_ADVTXD_DTYP_DATA |
-                          IXGBE_ADVTXD_DCMD_DEXT |
-                          IXGBE_ADVTXD_DCMD_IFCS;
-               cmd_type |= desc.len | IXGBE_TXD_CMD;
-               tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
-               tx_desc->read.olinfo_status =
-                       cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+       unrolled_count(PKTS_PER_BATCH)
+       for (i = 0; i < PKTS_PER_BATCH; i++)
+               ixgbe_xmit_pkt(xdp_ring, desc, i);
+}
 
-               xdp_ring->next_to_use++;
-               if (xdp_ring->next_to_use == xdp_ring->count)
-                       xdp_ring->next_to_use = 0;
+static void ixgbe_fill_tx_hw_ring(struct ixgbe_ring *xdp_ring,
+                                 struct xdp_desc *descs, u32 nb_pkts)
+{
+       u32 batched, leftover, i;
+
+       batched = nb_pkts & ~(PKTS_PER_BATCH - 1);
+       leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+       for (i = 0; i < batched; i += PKTS_PER_BATCH)
+               ixgbe_xmit_pkt_batch(xdp_ring, &descs[i]);
+       for (i = batched; i < batched + leftover; i++)
+               ixgbe_xmit_pkt(xdp_ring, &descs[i], 0);
+}
 
-               budget--;
-       }
+static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
+{
+       struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+       u32 nb_pkts, nb_processed = 0;
 
-       if (tx_desc) {
-               ixgbe_xdp_ring_update_tail(xdp_ring);
-               xsk_tx_release(pool);
+       if (!netif_carrier_ok(xdp_ring->netdev))
+               return true;
+
+       nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+       if (!nb_pkts)
+               return true;
+
+       if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+               nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+               ixgbe_fill_tx_hw_ring(xdp_ring, descs, nb_processed);
+               xdp_ring->next_to_use = 0;
        }
 
-       return !!budget;
+       ixgbe_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - 
nb_processed);
+
+       ixgbe_set_rs_bit(xdp_ring);
+       ixgbe_xdp_ring_update_tail(xdp_ring);
+
+       return nb_pkts < budget;
 }
 
 static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
-- 
2.41.3

Reply via email to