For memif non-zero-copy mode, there is a branch to compare
the mbuf and memif buffer size during memory copying. Add
a fast memory copy path by removing this branch with mbuf
and memif buffer size defined at compile time. The removal
of the branch leads to considerable performance uplift.
The Rx fast path would not change mbuf's behavior of storing
memif buf.

When memif <= buffer size, Rx chooses the fast memcpy path,
otherwise it would choose the original path.

Test with 1p1q on Ampere Altra AArch64 server,
----------------------------------------------
|  buf size   | memif <= mbuf | memif > mbuf |
----------------------------------------------
| non-zc gain |     4.30%     |    -0.52%    |
----------------------------------------------
|   zc gain   |     2.46%     |     0.70%    |
----------------------------------------------

Test with 1p1q on Cascade Lake Xeon X86server,
----------------------------------------------
|   buf size  | memif <= mbuf | memif > mbuf |
----------------------------------------------
| non-zc gain |     2.13%     |    -1.40%    |
----------------------------------------------
|   zc gain   |     0.18%     |     0.48%    |
----------------------------------------------

Signed-off-by: Joyce Kong <joyce.k...@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
Acked-by: Morten Brørup <m...@smartsharesystems.com>
---
 drivers/net/memif/rte_eth_memif.c | 123 ++++++++++++++++++++----------
 1 file changed, 83 insertions(+), 40 deletions(-)

diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index dd951b8296..24fc8b13fa 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -341,67 +341,111 @@ eth_memif_rx(void *queue, struct rte_mbuf **bufs, 
uint16_t nb_pkts)
        if (cur_slot == last_slot)
                goto refill;
        n_slots = last_slot - cur_slot;
+       if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) {
+               while (n_slots && n_rx_pkts < nb_pkts) {
+                       mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+                       if (unlikely(mbuf_head == NULL))
+                               goto no_free_bufs;
+                       mbuf = mbuf_head;
+
+next_slot1:
+                       mbuf->port = mq->in_port;
+                       s0 = cur_slot & mask;
+                       d0 = &ring->desc[s0];
 
-       while (n_slots && n_rx_pkts < nb_pkts) {
-               mbuf_head = rte_pktmbuf_alloc(mq->mempool);
-               if (unlikely(mbuf_head == NULL))
-                       goto no_free_bufs;
-               mbuf = mbuf_head;
-               mbuf->port = mq->in_port;
-               dst_off = 0;
+                       cp_len = d0->length;
 
-next_slot:
-               s0 = cur_slot & mask;
-               d0 = &ring->desc[s0];
+                       rte_pktmbuf_data_len(mbuf) = cp_len;
+                       rte_pktmbuf_pkt_len(mbuf) = cp_len;
+                       if (mbuf != mbuf_head)
+                               rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
 
-               src_len = d0->length;
-               src_off = 0;
+                       rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+                               (uint8_t *)memif_get_buffer(proc_private, d0), 
cp_len);
 
-               do {
-                       dst_len = mbuf_size - dst_off;
-                       if (dst_len == 0) {
-                               dst_off = 0;
-                               dst_len = mbuf_size;
+                       cur_slot++;
+                       n_slots--;
 
-                               /* store pointer to tail */
+                       if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
                                mbuf_tail = mbuf;
                                mbuf = rte_pktmbuf_alloc(mq->mempool);
                                if (unlikely(mbuf == NULL))
                                        goto no_free_bufs;
-                               mbuf->port = mq->in_port;
                                ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, 
mbuf);
                                if (unlikely(ret < 0)) {
                                        MIF_LOG(ERR, 
"number-of-segments-overflow");
                                        rte_pktmbuf_free(mbuf);
                                        goto no_free_bufs;
                                }
+                               goto next_slot1;
                        }
-                       cp_len = RTE_MIN(dst_len, src_len);
 
-                       rte_pktmbuf_data_len(mbuf) += cp_len;
-                       rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
-                       if (mbuf != mbuf_head)
-                               rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+                       mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+                       *bufs++ = mbuf_head;
+                       n_rx_pkts++;
+               }
+       } else {
+               while (n_slots && n_rx_pkts < nb_pkts) {
+                       mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+                       if (unlikely(mbuf_head == NULL))
+                               goto no_free_bufs;
+                       mbuf = mbuf_head;
+                       mbuf->port = mq->in_port;
+
+next_slot2:
+                       s0 = cur_slot & mask;
+                       d0 = &ring->desc[s0];
 
-                       rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
-                                                          dst_off),
-                               (uint8_t *)memif_get_buffer(proc_private, d0) +
-                               src_off, cp_len);
+                       src_len = d0->length;
+                       dst_off = 0;
+                       src_off = 0;
 
-                       src_off += cp_len;
-                       dst_off += cp_len;
-                       src_len -= cp_len;
-               } while (src_len);
+                       do {
+                               dst_len = mbuf_size - dst_off;
+                               if (dst_len == 0) {
+                                       dst_off = 0;
+                                       dst_len = mbuf_size;
+
+                                       /* store pointer to tail */
+                                       mbuf_tail = mbuf;
+                                       mbuf = rte_pktmbuf_alloc(mq->mempool);
+                                       if (unlikely(mbuf == NULL))
+                                               goto no_free_bufs;
+                                       mbuf->port = mq->in_port;
+                                       ret = memif_pktmbuf_chain(mbuf_head, 
mbuf_tail, mbuf);
+                                       if (unlikely(ret < 0)) {
+                                               MIF_LOG(ERR, 
"number-of-segments-overflow");
+                                               rte_pktmbuf_free(mbuf);
+                                               goto no_free_bufs;
+                                       }
+                               }
+                               cp_len = RTE_MIN(dst_len, src_len);
 
-               cur_slot++;
-               n_slots--;
+                               rte_pktmbuf_data_len(mbuf) += cp_len;
+                               rte_pktmbuf_pkt_len(mbuf) = 
rte_pktmbuf_data_len(mbuf);
+                               if (mbuf != mbuf_head)
+                                       rte_pktmbuf_pkt_len(mbuf_head) += 
cp_len;
 
-               if (d0->flags & MEMIF_DESC_FLAG_NEXT)
-                       goto next_slot;
+                               rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
+                                                                  dst_off),
+                                       (uint8_t 
*)memif_get_buffer(proc_private, d0) +
+                                       src_off, cp_len);
 
-               mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
-               *bufs++ = mbuf_head;
-               n_rx_pkts++;
+                               src_off += cp_len;
+                               dst_off += cp_len;
+                               src_len -= cp_len;
+                       } while (src_len);
+
+                       cur_slot++;
+                       n_slots--;
+
+                       if (d0->flags & MEMIF_DESC_FLAG_NEXT)
+                               goto next_slot2;
+
+                       mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+                       *bufs++ = mbuf_head;
+                       n_rx_pkts++;
+               }
        }
 
 no_free_bufs:
@@ -694,7 +738,6 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t 
nb_pkts)
        return n_tx_pkts;
 }
 
-
 static int
 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue 
*mq,
                memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
-- 
2.25.1

Reply via email to