In the scalar datapath, there is a loop to handle multi-segment, and
multi-descriptor packets on Tx. After that loop, the end-of-packet bit
was written to the descriptor separately, meaning that for each
single-descriptor packet there were two writes to the second quad-word -
basically 3 x 64-bit writes rather than just 2. Adjusting the code to
compute the EOP bit inside the loop saves that extra write per packet
and so improves performance.

Signed-off-by: Bruce Richardson <[email protected]>
---
 drivers/net/intel/common/tx_scalar_fns.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/intel/common/tx_scalar_fns.h 
b/drivers/net/intel/common/tx_scalar_fns.h
index 6079a558e4..7b643fcf44 100644
--- a/drivers/net/intel/common/tx_scalar_fns.h
+++ b/drivers/net/intel/common/tx_scalar_fns.h
@@ -378,6 +378,10 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
                                txn = &sw_ring[txe->next_id];
                        }
 
+                       /* fill the last descriptor with End of Packet (EOP) 
bit */
+                       if (m_seg->next == NULL)
+                               td_cmd |= CI_TX_DESC_CMD_EOP;
+
                        txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
                        txd->cmd_type_offset_bsz = 
rte_cpu_to_le_64(CI_TX_DESC_DTYPE_DATA |
                                ((uint64_t)td_cmd << CI_TXD_QW1_CMD_S) |
@@ -390,21 +394,17 @@ ci_xmit_pkts(struct ci_tx_queue *txq,
                        txe = txn;
                        m_seg = m_seg->next;
                } while (m_seg);
-
-               /* fill the last descriptor with End of Packet (EOP) bit */
-               td_cmd |= CI_TX_DESC_CMD_EOP;
                txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
                txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
 
                /* set RS bit on the last descriptor of one packet */
                if (txq->nb_tx_used >= txq->tx_rs_thresh) {
-                       td_cmd |= CI_TX_DESC_CMD_RS;
+                       txd->cmd_type_offset_bsz |=
+                                       rte_cpu_to_le_64(CI_TX_DESC_CMD_RS << 
CI_TXD_QW1_CMD_S);
 
                        /* Update txq RS bit counters */
                        txq->nb_tx_used = 0;
                }
-               txd->cmd_type_offset_bsz |=
-                               rte_cpu_to_le_64(((uint64_t)td_cmd) << 
CI_TXD_QW1_CMD_S);
 
                if (ts_fns != NULL)
                        ts_id = ts_fns->write_ts_desc(txq, tx_pkt, tx_id, 
ts_id);
-- 
2.51.0

Reply via email to