PMD uses only power of two number of Work Queue Elements, storing the
number of elements in log2 helps to reduce the size of the container to
store it.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 23 ++++++++++++-----------
drivers/net/mlx5/mlx5_rxtx.h | 2 +-
drivers/net/mlx5/mlx5_txq.c | 4 ++--
3 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 214922b..9d00ddc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -199,9 +199,10 @@ txq_complete(struct txq *txq)
} while (1);
if (unlikely(cqe == NULL))
return;
- wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
+ wqe = &(*txq->wqes)[htons(cqe->wqe_counter) &
+ ((1 << txq->wqe_n) - 1)].hdr;
elts_tail = wqe->ctrl[3];
- assert(elts_tail < txq->wqe_n);
+ assert(elts_tail < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
struct rte_mbuf *elt = (*txq->elts)[elts_free];
@@ -335,7 +336,7 @@ mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe
*wqe,
}
/* Inline if enough room. */
if (txq->max_inline != 0) {
- uintptr_t end = (uintptr_t)&(*txq->wqes)[txq->wqe_n];
+ uintptr_t end = (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
uint16_t room;
@@ -446,7 +447,7 @@ tx_prefetch_wqe(struct txq *txq, uint16_t ci)
{
volatile struct mlx5_wqe64 *wqe;
- wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
+ wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)];
rte_prefetch0(wqe);
}
@@ -504,7 +505,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
max -= segs_n;
--pkts_n;
elts_head_next = (elts_head + 1) & (elts_n - 1);
- wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
+ wqe = &(*txq->wqes)[txq->wqe_ci & ((1 << txq->wqe_n) - 1)].hdr;
tx_prefetch_wqe(txq, txq->wqe_ci);
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
if (pkts_n)
@@ -540,7 +541,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
dseg = (volatile void *)
&(*txq->wqes)[txq->wqe_ci++ &
- (txq->wqe_n - 1)];
+ ((1 << txq->wqe_n) - 1)];
else
++dseg;
++ds;
@@ -607,10 +608,10 @@ skip_segs:
static inline void
mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
{
- uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+ uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
(volatile struct mlx5_wqe_data_seg (*)[])
- (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
+ (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)];
mpw->state = MLX5_MPW_STATE_OPENED;
mpw->pkts_n = 0;
@@ -815,7 +816,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
static inline void
mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
{
- uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
+ uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
struct mlx5_wqe_inl_small *inl;
mpw->state = MLX5_MPW_INL_STATE_OPENED;
@@ -1000,7 +1001,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf
**pkts,
addr = rte_pktmbuf_mtod(buf, uintptr_t);
(*txq->elts)[elts_head] = buf;
/* Maximum number of bytes before wrapping. */
- max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
+ max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] -
(uintptr_t)mpw.data.raw);
if (length > max) {
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
@@ -1019,7 +1020,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf
**pkts,
mpw.data.raw += length;
}
if ((uintptr_t)mpw.data.raw ==
- (uintptr_t)&(*txq->wqes)[txq->wqe_n])
+ (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n])
mpw.data.raw =
(volatile void *)&(*txq->wqes)[0];
++mpw.pkts_n;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 3dca8ca..9828aef 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -246,9 +246,9 @@ struct txq {
uint16_t elts_comp; /* Counter since last completion request. */
uint16_t cq_ci; /* Consumer index for completion queue. */
uint16_t wqe_ci; /* Consumer index for work queue. */
- uint16_t wqe_n; /* Number of WQ elements. */
uint16_t elts_n:4; /* (*elts)[] length (in log2). */
uint16_t cqe_n:4; /* Number of CQ elements (in log2). */
+ uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */
uint16_t bf_buf_size:4; /* Log2 Blueflame size. */
uint16_t bf_offset; /* Blueflame offset. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 9919e37..3d2d132 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -81,7 +81,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
for (i = 0; (i != elts_n); ++i)
(*txq_ctrl->txq.elts)[i] = NULL;
- for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) {
+ for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i];
memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
@@ -217,7 +217,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
tmpl->txq.wqes =
(volatile struct mlx5_wqe64 (*)[])
(uintptr_t)qp->gen_data.sqstart;
- tmpl->txq.wqe_n = qp->sq.wqe_cnt;
+ tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
tmpl->txq.bf_reg = qp->gen_data.bf->reg;
tmpl->txq.bf_offset = qp->gen_data.bf->offset;
--
2.1.4