CQEs are not cache line aligned, which can cause cache conflicts when
polling Rx CQs under high-PPS load.

This patch checks CQ producer/consumer indices before processing packets,
reducing cache conflicts. The added MMIO read overhead is minor
compared to the latency caused by cache conflicts, improving observed
small-packet performance.

Signed-off-by: Rong Qian <qi...@yunsilicon.com>
Signed-off-by: Renyong Wan <wa...@yunsilicon.com>
---
 drivers/net/xsc/xsc_defs.h |  2 ++
 drivers/net/xsc/xsc_rx.c   | 18 ++++++++++++++++--
 drivers/net/xsc/xsc_rx.h   |  1 +
 drivers/net/xsc/xsc_vfio.c |  2 +-
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/xsc/xsc_defs.h b/drivers/net/xsc/xsc_defs.h
index 4b95a0521d..73941901e7 100644
--- a/drivers/net/xsc/xsc_defs.h
+++ b/drivers/net/xsc/xsc_defs.h
@@ -46,10 +46,12 @@
 #define XSC_PF_TX_DB_ADDR              0x4802000
 #define XSC_PF_RX_DB_ADDR              0x4804000
 #define XSC_PF_CQ_DB_ADDR              0x2120000
+#define XSC_PF_CQ_PID_START_ADDR       0x2080000
 
 #define XSC_VF_RX_DB_ADDR              0x8d4
 #define XSC_VF_TX_DB_ADDR              0x8d0
 #define XSC_VF_CQ_DB_ADDR              0x8c4
+#define XSC_VF_CQ_PID_START_ADDR       0x60000
 
 #define XSC_HIF_CMDQM_VECTOR_ID_MEM_ADDR       0x1034000
 
diff --git a/drivers/net/xsc/xsc_rx.c b/drivers/net/xsc/xsc_rx.c
index 332ceb9606..5f8003a1f6 100644
--- a/drivers/net/xsc/xsc_rx.c
+++ b/drivers/net/xsc/xsc_rx.c
@@ -83,6 +83,16 @@ xsc_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 
uint16_t pkts_n)
        int cqe_msg_len = 0;
        volatile struct xsc_cqe_u64 *cqe_u64 = NULL;
        struct rte_mbuf *rep;
+       uint16_t cq_pi;
+       uint16_t cqe_pkts_n = 0;
+
+       if (rxq->cq_pi != NULL) {
+               cq_pi = (*(volatile uint32_t *)(rxq->cq_pi)) & 0xFFFF;
+               if (cq_pi == rxq->cq_ci)
+                       return 0;
+               cqe_pkts_n = (uint16_t)((cq_pi - rxq->cq_ci) & 0xFFFF);
+               pkts_n = pkts_n < cqe_pkts_n ? pkts_n : cqe_pkts_n;
+       }
 
        while (pkts_n) {
                uint32_t idx = rq_ci & wqe_m;
@@ -463,12 +473,16 @@ xsc_rss_qp_create(struct xsc_ethdev_priv *priv, int 
port_id)
                }
 
                rxq_data->wqes = rxq_data->rq_pas->addr;
-               if (!xsc_dev_is_vf(xdev))
+               if (!xsc_dev_is_vf(xdev)) {
                        rxq_data->rq_db = (uint32_t *)((uint8_t 
*)xdev->bar_addr +
                                          XSC_PF_RX_DB_ADDR);
-               else
+                       rxq_data->cq_pi = (uint32_t *)((uint8_t 
*)xdev->bar_addr +
+                                         XSC_PF_CQ_PID_START_ADDR + 
rxq_data->cqn * 4);
+               } else {
                        rxq_data->rq_db = (uint32_t *)((uint8_t 
*)xdev->bar_addr +
                                          XSC_VF_RX_DB_ADDR);
+                       rxq_data->cq_pi = NULL;
+               }
 
                rxq_data->qpn = rqn_base + i;
                xsc_dev_modify_qp_status(xdev, rxq_data->qpn, 1, 
XSC_CMD_OP_RTR2RTS_QP);
diff --git a/drivers/net/xsc/xsc_rx.h b/drivers/net/xsc/xsc_rx.h
index 90fbb89197..59cbbb5141 100644
--- a/drivers/net/xsc/xsc_rx.h
+++ b/drivers/net/xsc/xsc_rx.h
@@ -31,6 +31,7 @@ struct __rte_cache_aligned xsc_rxq_data {
        uint16_t rsv0:4;
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
+       volatile uint32_t *cq_pi;
        uint32_t rq_ci;
        uint32_t rq_pi;
        uint16_t cq_ci;
diff --git a/drivers/net/xsc/xsc_vfio.c b/drivers/net/xsc/xsc_vfio.c
index 1650a3ab2b..c233728c5f 100644
--- a/drivers/net/xsc/xsc_vfio.c
+++ b/drivers/net/xsc/xsc_vfio.c
@@ -562,7 +562,7 @@ xsc_vfio_rx_cq_create(struct xsc_dev *xdev, struct 
xsc_rx_cq_params *cq_params,
                PMD_DRV_LOG(WARNING, "Port %u rxq %u: cq numa_node=%u, device 
numa_node=%u",
                            port_id, idx, cq_params->socket_id, numa_node);
 
-       cqe_n = cq_params->wqe_s;
+       cqe_n = cq_params->wqe_s * 2;
        log_cq_sz = rte_log2_u32(cqe_n);
        cqe_total_sz = cqe_n * sizeof(struct xsc_cqe);
        pa_num = (cqe_total_sz + XSC_PAGE_SIZE - 1) / XSC_PAGE_SIZE;
-- 
2.25.1

Reply via email to