The buffer freeing function for the simple scalar Tx path is almost identical in both ice and i40e drivers, except that the i40e has batching for the FAST_FREE case. Consolidate both functions into a common one based off the better i40e version.
Signed-off-by: Bruce Richardson <[email protected]> --- drivers/net/intel/common/tx.h | 3 ++ drivers/net/intel/common/tx_scalar_fns.h | 58 ++++++++++++++++++++++ drivers/net/intel/i40e/i40e_rxtx.c | 63 +----------------------- drivers/net/intel/ice/ice_rxtx.c | 45 +---------------- 4 files changed, 65 insertions(+), 104 deletions(-) diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h index 0f545631af..3c388857a7 100644 --- a/drivers/net/intel/common/tx.h +++ b/drivers/net/intel/common/tx.h @@ -66,6 +66,9 @@ enum ci_tx_l2tag1_field { /* Common TX maximum burst size for chunked transmission in simple paths */ #define CI_TX_MAX_BURST 32 +/* Common TX maximum free buffer size for batched bulk freeing */ +#define CI_TX_MAX_FREE_BUF_SZ 64 + /* Common TX descriptor command flags for simple transmit */ #define CI_TX_DESC_CMD_DEFAULT (CI_TX_DESC_CMD_ICRC | CI_TX_DESC_CMD_EOP) diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h index d09d118197..185fcdfa72 100644 --- a/drivers/net/intel/common/tx_scalar_fns.h +++ b/drivers/net/intel/common/tx_scalar_fns.h @@ -72,6 +72,64 @@ ci_tx_fill_hw_ring(struct ci_tx_queue *txq, struct rte_mbuf **pkts, } } +/* Free transmitted mbufs from descriptor ring with bulk freeing for Tx simple path */ +static __rte_always_inline int +ci_tx_free_bufs(struct ci_tx_queue *txq) +{ + const uint16_t rs_thresh = txq->tx_rs_thresh; + const uint16_t k = RTE_ALIGN_FLOOR(rs_thresh, CI_TX_MAX_FREE_BUF_SZ); + const uint16_t m = rs_thresh % CI_TX_MAX_FREE_BUF_SZ; + struct rte_mbuf *free[CI_TX_MAX_FREE_BUF_SZ]; + struct ci_tx_entry *txep; + + if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & + rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) != + rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) + return 0; + + txep = &txq->sw_ring[txq->tx_next_dd - (rs_thresh - 1)]; + + struct rte_mempool *fast_free_mp = + likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ? + txq->fast_free_mp : + (txq->fast_free_mp = txep[0].mbuf->pool); + + if (fast_free_mp) { + if (k) { + for (uint16_t j = 0; j != k; j += CI_TX_MAX_FREE_BUF_SZ) { + for (uint16_t i = 0; i < CI_TX_MAX_FREE_BUF_SZ; ++i, ++txep) { + free[i] = txep->mbuf; + txep->mbuf = NULL; + } + rte_mbuf_raw_free_bulk(fast_free_mp, free, CI_TX_MAX_FREE_BUF_SZ); + } + } + + if (m) { + for (uint16_t i = 0; i < m; ++i, ++txep) { + free[i] = txep->mbuf; + txep->mbuf = NULL; + } + rte_mbuf_raw_free_bulk(fast_free_mp, free, m); + } + } else { + for (uint16_t i = 0; i < rs_thresh; ++i, ++txep) + rte_prefetch0((txep + i)->mbuf); + + for (uint16_t i = 0; i < rs_thresh; ++i, ++txep) { + rte_pktmbuf_free_seg(txep->mbuf); + txep->mbuf = NULL; + } + } + + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + rs_thresh); + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + rs_thresh); + if (txq->tx_next_dd >= txq->nb_tx_desc) + txq->tx_next_dd = (uint16_t)(rs_thresh - 1); + + return rs_thresh; +} + /* * Common transmit descriptor cleanup function for Intel drivers. * Used by ice, i40e, iavf, and idpf drivers. diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c index bd85c7324d..395808ff7c 100644 --- a/drivers/net/intel/i40e/i40e_rxtx.c +++ b/drivers/net/intel/i40e/i40e_rxtx.c @@ -1010,65 +1010,6 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) get_context_desc, NULL, NULL); } -static __rte_always_inline int -i40e_tx_free_bufs(struct ci_tx_queue *txq) -{ - struct ci_tx_entry *txep; - const uint16_t tx_rs_thresh = txq->tx_rs_thresh; - uint16_t i, j; - struct rte_mbuf *free[I40E_TX_MAX_FREE_BUF_SZ]; - const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, I40E_TX_MAX_FREE_BUF_SZ); - const uint16_t m = tx_rs_thresh % I40E_TX_MAX_FREE_BUF_SZ; - - if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & - rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) != - rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) - return 0; - - txep = &txq->sw_ring[txq->tx_next_dd - (tx_rs_thresh - 1)]; - - struct rte_mempool *fast_free_mp = - likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ? - txq->fast_free_mp : - (txq->fast_free_mp = txep[0].mbuf->pool); - - if (fast_free_mp != NULL) { - if (k) { - for (j = 0; j != k; j += I40E_TX_MAX_FREE_BUF_SZ) { - for (i = 0; i < I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) { - free[i] = txep->mbuf; - txep->mbuf = NULL; - } - rte_mbuf_raw_free_bulk(fast_free_mp, free, - I40E_TX_MAX_FREE_BUF_SZ); - } - } - - if (m) { - for (i = 0; i < m; ++i, ++txep) { - free[i] = txep->mbuf; - txep->mbuf = NULL; - } - rte_mbuf_raw_free_bulk(fast_free_mp, free, m); - } - } else { - for (i = 0; i < tx_rs_thresh; i++) - rte_prefetch0((txep + i)->mbuf); - - for (i = 0; i < tx_rs_thresh; ++i, ++txep) { - rte_pktmbuf_free_seg(txep->mbuf); - txep->mbuf = NULL; - } - } - - txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + tx_rs_thresh); - txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + tx_rs_thresh); - if (txq->tx_next_dd >= txq->nb_tx_desc) - txq->tx_next_dd = (uint16_t)(tx_rs_thresh - 1); - - return tx_rs_thresh; -} - static inline uint16_t tx_xmit_pkts(struct ci_tx_queue *txq, struct rte_mbuf **tx_pkts, @@ -1083,7 +1024,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq, * descriptor, free the associated buffer. */ if (txq->nb_tx_free < txq->tx_free_thresh) - i40e_tx_free_bufs(txq); + ci_tx_free_bufs(txq); /* Use available descriptor only */ nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); @@ -2508,7 +2449,7 @@ i40e_tx_done_cleanup_simple(struct ci_tx_queue *txq, if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh) break; - n = i40e_tx_free_bufs(txq); + n = ci_tx_free_bufs(txq); if (n == 0) break; diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c index 26b4c73eb6..c1477f3e87 100644 --- a/drivers/net/intel/ice/ice_rxtx.c +++ b/drivers/net/intel/ice/ice_rxtx.c @@ -3129,47 +3129,6 @@ ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) return ci_xmit_pkts(txq, tx_pkts, nb_pkts, CI_VLAN_IN_L2TAG1, get_context_desc, NULL, NULL); } -static __rte_always_inline int -ice_tx_free_bufs(struct ci_tx_queue *txq) -{ - struct ci_tx_entry *txep; - uint16_t i; - - if ((txq->ci_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & - rte_cpu_to_le_64(CI_TXD_QW1_DTYPE_M)) != - rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DESC_DONE)) - return 0; - - txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]; - - struct rte_mempool *fast_free_mp = - likely(txq->fast_free_mp != (void *)UINTPTR_MAX) ? - txq->fast_free_mp : - (txq->fast_free_mp = txep[0].mbuf->pool); - - if (fast_free_mp != NULL) { - for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) { - rte_mempool_put(fast_free_mp, txep->mbuf); - txep->mbuf = NULL; - } - } else { - for (i = 0; i < txq->tx_rs_thresh; i++) - rte_prefetch0((txep + i)->mbuf); - - for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) { - rte_pktmbuf_free_seg(txep->mbuf); - txep->mbuf = NULL; - } - } - - txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); - txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); - if (txq->tx_next_dd >= txq->nb_tx_desc) - txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1); - - return txq->tx_rs_thresh; -} - static int ice_tx_done_cleanup_full(struct ci_tx_queue *txq, uint32_t free_cnt) @@ -3259,7 +3218,7 @@ ice_tx_done_cleanup_simple(struct ci_tx_queue *txq, if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh) break; - n = ice_tx_free_bufs(txq); + n = ci_tx_free_bufs(txq); if (n == 0) break; @@ -3300,7 +3259,7 @@ tx_xmit_pkts(struct ci_tx_queue *txq, * descriptor, free the associated buffer. */ if (txq->nb_tx_free < txq->tx_free_thresh) - ice_tx_free_bufs(txq); + ci_tx_free_bufs(txq); /* Use available descriptor only */ nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); -- 2.51.0

