Define specific function implementation for i40e driver.
Currently, recycle buffer mode can support 128bit
vector path. And can be enabled both in fast free and
no fast free mode.

Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
Signed-off-by: Feifei Wang <feifei.wa...@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
---
 drivers/net/i40e/i40e_ethdev.c          |   1 +
 drivers/net/i40e/i40e_ethdev.h          |   2 +
 drivers/net/i40e/i40e_rxtx.c            |  24 +++++
 drivers/net/i40e/i40e_rxtx.h            |   4 +
 drivers/net/i40e/i40e_rxtx_vec_common.h | 128 ++++++++++++++++++++++++
 5 files changed, 159 insertions(+)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index cb0070f94b..456fb256f5 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
        .flow_ops_get                 = i40e_dev_flow_ops_get,
        .rxq_info_get                 = i40e_rxq_info_get,
        .txq_info_get                 = i40e_txq_info_get,
+       .rxq_buf_recycle_info_get     = i40e_rxq_buf_recycle_info_get,
        .rx_burst_mode_get            = i40e_rx_burst_mode_get,
        .tx_burst_mode_get            = i40e_tx_burst_mode_get,
        .timesync_enable              = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 9b806d130e..83c5ff5859 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t 
queue_id,
        struct rte_eth_rxq_info *qinfo);
 void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        struct rte_eth_txq_info *qinfo);
+void i40e_rxq_buf_recycle_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+       struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
 int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
                           struct rte_eth_burst_mode *mode);
 int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 788ffb51c2..479964c6c4 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3197,6 +3197,28 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t 
queue_id,
        qinfo->conf.offloads = txq->offloads;
 }
 
+void
+i40e_rxq_buf_recycle_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+       struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
+{
+       struct i40e_rx_queue *rxq;
+
+       rxq = dev->data->rx_queues[queue_id];
+
+       rxq_buf_recycle_info->buf_ring = (void *)rxq->sw_ring;
+       rxq_buf_recycle_info->mp = rxq->mp;
+       rxq_buf_recycle_info->buf_ring_size = rxq->nb_rx_desc;
+       rxq_buf_recycle_info->refill_request = RTE_I40E_RXQ_REARM_THRESH;
+
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+       rxq_buf_recycle_info->refill_head = &rxq->rxrearm_start + 0xF;
+       rxq_buf_recycle_info->receive_tail = &rxq->rx_tail + 0xF;
+#else
+       rxq_buf_recycle_info->refill_head = &rxq->rxrearm_start;
+       rxq_buf_recycle_info->receive_tail = &rxq->rx_tail;
+#endif
+}
+
 #ifdef RTE_ARCH_X86
 static inline bool
 get_avx_supported(bool request_avx512)
@@ -3273,6 +3295,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 
        if (ad->rx_vec_allowed  &&
            rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               dev->rx_descriptors_refill = i40e_rx_descriptors_refill_vec;
 #ifdef RTE_ARCH_X86
                if (dev->data->scattered_rx) {
                        if (ad->rx_use_avx512) {
@@ -3465,6 +3488,7 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
        if (ad->tx_simple_allowed) {
                if (ad->tx_vec_allowed &&
                    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+                       dev->tx_buf_stash = i40e_tx_buf_stash_vec;
 #ifdef RTE_ARCH_X86
                        if (ad->tx_use_avx512) {
 #ifdef CC_AVX512_SUPPORT
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index 5e6eecc501..0ad8f530f9 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -233,6 +233,10 @@ uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
 int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
 
+uint16_t i40e_tx_buf_stash_vec(void *tx_queue,
+               struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
+uint16_t i40e_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb);
+
 uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                            uint16_t nb_pkts);
 uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h 
b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..068ce694f2 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -156,6 +156,134 @@ tx_backlog_entry(struct i40e_tx_entry *txep,
                txep[i].mbuf = tx_pkts[i];
 }
 
+uint16_t
+i40e_tx_buf_stash_vec(void *tx_queue,
+       struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
+{
+       struct i40e_tx_queue *txq = tx_queue;
+       struct i40e_tx_entry *txep;
+       struct rte_mbuf **rxep;
+       struct rte_mbuf *m[RTE_I40E_TX_MAX_FREE_BUF_SZ];
+       int i, j, n;
+       uint16_t avail = 0;
+       uint16_t buf_ring_size = rxq_buf_recycle_info->buf_ring_size;
+       uint16_t mask = rxq_buf_recycle_info->buf_ring_size - 1;
+       uint16_t refill_request = rxq_buf_recycle_info->refill_request;
+       uint16_t refill_head = *rxq_buf_recycle_info->refill_head;
+       uint16_t receive_tail = *rxq_buf_recycle_info->receive_tail;
+
+       /* Get available recycling Rx buffers. */
+       avail = (buf_ring_size - (refill_head - receive_tail)) & mask;
+
+       /* Check Tx free thresh and Rx available space. */
+       if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+               return 0;
+
+       /* check DD bits on threshold descriptor */
+       if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+                               rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+                       rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+               return 0;
+
+       n = txq->tx_rs_thresh;
+
+       /* Buffer recycle can only support no ring buffer wraparound.
+        * Two case for this:
+        *
+        * case 1: The refill head of Rx buffer ring needs to be aligned with
+        * buffer ring size. In this case, the number of Tx freeing buffers
+        * should be equal to refill_request.
+        *
+        * case 2: The refill head of Rx ring buffer does not need to be aligned
+        * with buffer ring size. In this case, the update of refill head can 
not
+        * exceed the Rx buffer ring size.
+        */
+       if (refill_request != n ||
+               (!refill_request && (refill_head + n > buf_ring_size)))
+               return 0;
+
+       /* First buffer to free from S/W ring is at index
+        * tx_next_dd - (tx_rs_thresh-1).
+        */
+       txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+       rxep = rxq_buf_recycle_info->buf_ring;
+       rxep += refill_head;
+
+       if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+               /* Directly put mbufs from Tx to Rx. */
+               for (i = 0; i < n; i++, rxep++, txep++)
+                       *rxep = txep[0].mbuf;
+       } else {
+               for (i = 0, j = 0; i < n; i++) {
+                       /* Avoid txq contains buffers from expected mempoo. */
+                       if (unlikely(rxq_buf_recycle_info->mp
+                                               != txep[i].mbuf->pool))
+                               return 0;
+
+                       m[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+                       /* In case 1, each of Tx buffers should be the
+                        * last reference.
+                        */
+                       if (unlikely(m[j] == NULL && refill_request))
+                               return 0;
+                       /* In case 2, the number of valid Tx free
+                        * buffers should be recorded.
+                        */
+                       j++;
+               }
+               rte_memcpy(rxep, m, sizeof(void *) * j);
+       }
+
+       /* Update counters for Tx. */
+       txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+       txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+       if (txq->tx_next_dd >= txq->nb_tx_desc)
+               txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+       return n;
+}
+
+uint16_t
+i40e_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb)
+{
+       struct i40e_rx_queue *rxq = rx_queue;
+       struct i40e_rx_entry *rxep;
+       volatile union i40e_rx_desc *rxdp;
+       uint16_t rx_id;
+       uint64_t paddr;
+       uint64_t dma_addr;
+       uint16_t i;
+
+       rxdp = rxq->rx_ring + rxq->rxrearm_start;
+       rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+       for (i = 0; i < nb; i++) {
+               /* Initialize rxdp descs. */
+               paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+               dma_addr = rte_cpu_to_le_64(paddr);
+               /* flush desc with pa dma_addr */
+               rxdp[i].read.hdr_addr = 0;
+               rxdp[i].read.pkt_addr = dma_addr;
+       }
+
+       /* Update the descriptor initializer index */
+       rxq->rxrearm_start += nb;
+       if (rxq->rxrearm_start >= rxq->nb_rx_desc)
+               rxq->rxrearm_start = 0;
+
+       rxq->rxrearm_nb -= nb;
+
+       rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
+                       (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
+
+       rte_io_wmb();
+       /* Update the tail pointer on the NIC */
+       I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+
+       return nb;
+}
+
 static inline void
 _i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq)
 {
-- 
2.25.1

Reply via email to