The olx parameter is intended to be known in compile time and widely used for static optimizations while generating the tx_burst rotuines code from the template.
However, in the mlx5_tx_free_mbuf routine we have the olx parameter provided in runtime only, for all possible execution paths. And the only intended optimization (that actually does not happen) is to check whether multi-buf packets are supported. As fast free offload is not supported for multi-buf packets, we can simplify the code and get rid of the unused olx parameter in the entire call chain. Signed-off-by: Viacheslav Ovsiienko <[email protected]> Acked-by: Dariusz Sosnowski <[email protected]> --- v2: updated commit message --- drivers/net/mlx5/mlx5_tx.c | 18 +++++------------- drivers/net/mlx5/mlx5_tx.h | 28 +++++++++------------------- 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c index 8085b5c306..94644bc3b9 100644 --- a/drivers/net/mlx5/mlx5_tx.c +++ b/drivers/net/mlx5/mlx5_tx.c @@ -144,14 +144,10 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, * Pointer to TX queue structure. * @param last_cqe * valid CQE pointer, if not NULL update txq->wqe_pi and flush the buffers. - * @param olx - * Configured Tx offloads mask. It is fully defined at - * compile time and may be used for optimization. */ static __rte_always_inline void mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, - volatile struct mlx5_cqe *last_cqe, - unsigned int olx __rte_unused) + volatile struct mlx5_cqe *last_cqe) { if (likely(last_cqe != NULL)) { uint16_t tail; @@ -159,7 +155,7 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; if (likely(tail != txq->elts_tail)) { - mlx5_tx_free_elts(txq, tail, olx); + mlx5_tx_free_elts(txq, tail); MLX5_ASSERT(tail == txq->elts_tail); } } @@ -172,16 +168,12 @@ mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, * * @param txq * Pointer to TX queue structure. - * @param olx - * Configured Tx offloads mask. It is fully defined at - * compile time and may be used for optimization. * * NOTE: not inlined intentionally, it makes tx_burst * routine smaller, simple and faster - from experiments. */ void -mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, - unsigned int olx __rte_unused) +mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq) { unsigned int count = MLX5_TX_COMP_MAX_CQE; volatile struct mlx5_cqe *last_cqe = NULL; @@ -259,7 +251,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, /* Ring doorbell to notify hardware. */ rte_compiler_barrier(); *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); - mlx5_tx_comp_flush(txq, last_cqe, olx); + mlx5_tx_comp_flush(txq, last_cqe); } } @@ -280,7 +272,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) struct mlx5_txq_data *__rte_restrict txq = tx_queue; uint16_t used; - mlx5_tx_handle_completion(txq, 0); + mlx5_tx_handle_completion(txq); used = txq->elts_head - txq->elts_tail; if (offset < used) return RTE_ETH_TX_DESC_FULL; diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h index 2f4402eb50..016dba0b03 100644 --- a/drivers/net/mlx5/mlx5_tx.h +++ b/drivers/net/mlx5/mlx5_tx.h @@ -234,8 +234,7 @@ struct mlx5_external_q *mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx); /* mlx5_tx.c */ -void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, - unsigned int olx __rte_unused); +void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq); int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_txq_info *qinfo); @@ -511,15 +510,11 @@ txq_ol_cksum_to_cs(struct rte_mbuf *buf) * Pointer to array of packets to be free. * @param pkts_n * Number of packets to be freed. - * @param olx - * Configured Tx offloads mask. It is fully defined at - * compile time and may be used for optimization. */ static __rte_always_inline void mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, struct rte_mbuf **__rte_restrict pkts, - unsigned int pkts_n, - unsigned int olx __rte_unused) + unsigned int pkts_n) { struct rte_mempool *pool = NULL; struct rte_mbuf **p_free = NULL; @@ -537,7 +532,7 @@ mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, * Free mbufs directly to the pool in bulk * if fast free offload is engaged */ - if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { + if (txq->fast_free) { mbuf = *pkts; pool = mbuf->pool; rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); @@ -627,10 +622,9 @@ mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, static __rte_noinline void __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, struct rte_mbuf **__rte_restrict pkts, - unsigned int pkts_n, - unsigned int olx __rte_unused) + unsigned int pkts_n) { - mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); + mlx5_tx_free_mbuf(txq, pkts, pkts_n); } /** @@ -640,14 +634,10 @@ __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, * Pointer to Tx queue structure. * @param tail * Index in elts to free up to, becomes new elts tail. - * @param olx - * Configured Tx offloads mask. It is fully defined at - * compile time and may be used for optimization. */ static __rte_always_inline void mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, - uint16_t tail, - unsigned int olx __rte_unused) + uint16_t tail) { uint16_t n_elts = tail - txq->elts_tail; @@ -666,7 +656,7 @@ mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, MLX5_ASSERT(part <= txq->elts_s); mlx5_tx_free_mbuf(txq, &txq->elts[txq->elts_tail & txq->elts_m], - part, olx); + part); txq->elts_tail += part; n_elts -= part; } while (n_elts); @@ -3565,7 +3555,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, * - doorbell the NIC about processed CQEs */ rte_prefetch0(*(pkts + loc.pkts_sent)); - mlx5_tx_handle_completion(txq, olx); + mlx5_tx_handle_completion(txq); /* * Calculate the number of available resources - elts and WQEs. * There are two possible different scenarios: @@ -3814,7 +3804,7 @@ mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, txq->stats.opackets += loc.pkts_sent; #endif if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) - __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); + __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free); /* Trace productive bursts only. */ if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent) rte_pmd_mlx5_trace_tx_exit(mlx5_read_pcibar_clock_from_txq(txq), -- 2.34.1

