Use a non-volatile uint64_t pointer to store to the descriptor ring. This will allow the compiler to optionally merge the stores as it sees best.
Signed-off-by: Bruce Richardson <[email protected]> --- drivers/net/intel/common/tx_scalar_fns.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/net/intel/common/tx_scalar_fns.h b/drivers/net/intel/common/tx_scalar_fns.h index 7b643fcf44..95e9acbe60 100644 --- a/drivers/net/intel/common/tx_scalar_fns.h +++ b/drivers/net/intel/common/tx_scalar_fns.h @@ -184,6 +184,15 @@ struct ci_timesstamp_queue_fns { write_ts_tail_t write_ts_tail; }; +static inline void +write_txd(volatile void *txd, uint64_t qw0, uint64_t qw1) +{ + uint64_t *txd_qw = RTE_CAST_PTR(void *, txd); + + txd_qw[0] = rte_cpu_to_le_64(qw0); + txd_qw[1] = rte_cpu_to_le_64(qw1); +} + static inline uint16_t ci_xmit_pkts(struct ci_tx_queue *txq, struct rte_mbuf **tx_pkts, @@ -313,8 +322,7 @@ ci_xmit_pkts(struct ci_tx_queue *txq, txe->mbuf = NULL; } - ctx_txd[0] = cd_qw0; - ctx_txd[1] = cd_qw1; + write_txd(ctx_txd, cd_qw0, cd_qw1); txe->last_id = tx_last; tx_id = txe->next_id; @@ -361,12 +369,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq, while ((ol_flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG)) && unlikely(slen > CI_MAX_DATA_PER_TXD)) { - txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr); - txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA | + const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA | ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) | ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) | ((uint64_t)CI_MAX_DATA_PER_TXD << CI_TXD_QW1_TX_BUF_SZ_S) | - ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S)); + ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S); + write_txd(txd, buf_dma_addr, cmd_type_offset_bsz); buf_dma_addr += CI_MAX_DATA_PER_TXD; slen -= CI_MAX_DATA_PER_TXD; @@ -382,12 +390,12 @@ ci_xmit_pkts(struct ci_tx_queue *txq, if (m_seg->next == NULL) td_cmd |= CI_TX_DESC_CMD_EOP; - txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr); - txd->cmd_type_offset_bsz = rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA | + const uint64_t cmd_type_offset_bsz = CI_TX_DESC_DTYPE_DATA | ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) | ((uint64_t)td_offset << CI_TXD_QW1_OFFSET_S) | ((uint64_t)slen << CI_TXD_QW1_TX_BUF_SZ_S) | - ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S)); + ((uint64_t)td_tag << CI_TXD_QW1_L2TAG1_S); + write_txd(txd, buf_dma_addr, cmd_type_offset_bsz); txe->last_id = tx_last; tx_id = txe->next_id; -- 2.51.0

