Prepare the code to write the Work Queue Element with vectorized
instructions.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
Signed-off-by: Elad Persiko <eladpe at mellanox.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 44 ++++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ffd09ac..5dacd93 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -391,6 +391,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                uint32_t length;
                unsigned int ds = 0;
                uintptr_t addr;
+               uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
+               uint8_t ehdr[2];
 #ifdef MLX5_PMD_SOFT_COUNTERS
                uint32_t total_length = 0;
 #endif
@@ -416,6 +418,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                        rte_prefetch0(*pkts);
                addr = rte_pktmbuf_mtod(buf, uintptr_t);
                length = DATA_LEN(buf);
+               ehdr[0] = ((uint8_t *)addr)[0];
+               ehdr[1] = ((uint8_t *)addr)[1];
 #ifdef MLX5_PMD_SOFT_COUNTERS
                total_length = length;
 #endif
@@ -439,24 +443,20 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                } else {
                        wqe->eseg.cs_flags = 0;
                }
-               raw  = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
-               /* Start the know and common part of the WQE structure. */
-               wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
-               wqe->ctrl[2] = 0;
-               wqe->ctrl[3] = 0;
-               wqe->eseg.rsvd0 = 0;
-               wqe->eseg.rsvd1 = 0;
-               wqe->eseg.mss = 0;
-               wqe->eseg.rsvd2 = 0;
-               /* Start by copying the Ethernet Header. */
-               memcpy((uint8_t *)raw, ((uint8_t *)addr), 16);
+               raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+               /*
+                * Start by copying the Ethernet header minus the first two
+                * bytes which will be appended at the end of the Ethernet
+                * segment.
+                */
+               memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 16);
                length -= MLX5_WQE_DWORD_SIZE;
                addr += MLX5_WQE_DWORD_SIZE;
                /* Replace the Ethernet type by the VLAN if necessary. */
                if (buf->ol_flags & PKT_TX_VLAN_PKT) {
                        uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);

-                       memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE -
+                       memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - 2 -
                                           sizeof(vlan)),
                               &vlan, sizeof(vlan));
                        addr -= sizeof(vlan);
@@ -468,10 +468,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                                (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n];
                        uint16_t max_inline =
                                txq->max_inline * RTE_CACHE_LINE_SIZE;
-                       uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
                        uint16_t room;

-                       raw += MLX5_WQE_DWORD_SIZE;
+                       /*
+                        * raw starts two bytes before the boundary to
+                        * continue the above copy of packet data.
+                        */
+                       raw += MLX5_WQE_DWORD_SIZE - 2;
                        room = end - (uintptr_t)raw;
                        if (room > max_inline) {
                                uintptr_t addr_end = (addr + max_inline) &
@@ -487,8 +490,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                                /* Sanity check. */
                                assert(addr <= addr_end);
                        }
-                       /* Store the inlined packet size in the WQE. */
-                       wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
                        /*
                         * 2 DWORDs consumed by the WQE header + 1 DSEG +
                         * the size of the inline part of the packet.
@@ -570,7 +571,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
                        --pkts_n;
 next_pkt:
                ++i;
+               /* Initialize known and common part of the WQE structure. */
+               wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
                wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
+               wqe->ctrl[2] = 0;
+               wqe->ctrl[3] = 0;
+               wqe->eseg.rsvd0 = 0;
+               wqe->eseg.rsvd1 = 0;
+               wqe->eseg.mss = 0;
+               wqe->eseg.rsvd2 = 0;
+               wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
+               wqe->eseg.inline_hdr[0] = ehdr[0];
+               wqe->eseg.inline_hdr[1] = ehdr[1];
                txq->wqe_ci += (ds + 3) / 4;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Increment sent bytes counter. */
-- 
2.1.4

Reply via email to