From: Jie Liu <[email protected]>

Remove the optional drv-sw-stats device argument and make software
statistics always-on. Per-queue software statistics are point-in-time
measurements used for accumulation at queue stop/dump, so atomic
operations with rte_memory_order_relaxed add unnecessary overhead
without correctness benefit.

Also rename high_performance_mode field to no_sched_mode to match
the devargs string definition.

Changes:
- Remove sw_stats_en field from struct sxe2_devargs
- Remove RTE_ATOMIC qualifiers from sxe2_rxq_sw_stats fields
- Replace rte_atomic_fetch_add_explicit(relaxed) with plain addition
- Replace rte_atomic_store/load_explicit(relaxed) with plain assignment
- Remove sw_stats_en conditional checks in Rx fast path
- Always pass umbcast_flags to vec Rx functions
- Remove unused #include <rte_stdatomic.h>
- Rename high_performance_mode → no_sched_mode in devargs struct
- Fix int → int32_t for return type in sxe2_parse_eth_devargs

Signed-off-by: Jie Liu <[email protected]>
---
 drivers/net/sxe2/sxe2_ethdev.c          |  2 +-
 drivers/net/sxe2/sxe2_ethdev.h          |  3 +-
 drivers/net/sxe2/sxe2_queue.h           | 15 ++++---
 drivers/net/sxe2/sxe2_rx.c              | 55 +++++++------------------
 drivers/net/sxe2/sxe2_txrx_poll.c       | 38 ++++++-----------
 drivers/net/sxe2/sxe2_txrx_vec_common.h | 52 ++++++++++-------------
 drivers/net/sxe2/sxe2_txrx_vec_sse.c    | 29 +------------
 7 files changed, 61 insertions(+), 133 deletions(-)

diff --git a/drivers/net/sxe2/sxe2_ethdev.c b/drivers/net/sxe2/sxe2_ethdev.c
index b6cc8703a7..066e1faf7e 100644
--- a/drivers/net/sxe2/sxe2_ethdev.c
+++ b/drivers/net/sxe2/sxe2_ethdev.c
@@ -891,7 +891,7 @@ static int32_t sxe2_eth_pmd_probe_pf(struct 
sxe2_common_device *cdev,
 static int32_t sxe2_parse_eth_devargs(struct rte_device *dev,
                          struct rte_eth_devargs *eth_da)
 {
-       int ret = 0;
+       int32_t ret = 0;
 
        if (dev->devargs == NULL)
                return 0;
diff --git a/drivers/net/sxe2/sxe2_ethdev.h b/drivers/net/sxe2/sxe2_ethdev.h
index a3706945e8..8015d9a064 100644
--- a/drivers/net/sxe2/sxe2_ethdev.h
+++ b/drivers/net/sxe2/sxe2_ethdev.h
@@ -130,9 +130,8 @@ struct sxe2_devargs {
        uint8_t flow_dup_pattern_mode;
        uint8_t func_flow_direct_en;
        uint8_t fnav_stat_type;
-       uint8_t high_performance_mode;
+       uint8_t no_sched_mode;
        uint8_t sched_layer_mode;
-       uint8_t sw_stats_en;
        uint8_t rx_low_latency;
 };
 
diff --git a/drivers/net/sxe2/sxe2_queue.h b/drivers/net/sxe2/sxe2_queue.h
index adb4be1214..a300b66771 100644
--- a/drivers/net/sxe2/sxe2_queue.h
+++ b/drivers/net/sxe2/sxe2_queue.h
@@ -7,7 +7,6 @@
 
 #include <rte_ethdev.h>
 #include <rte_io.h>
-#include <rte_stdatomic.h>
 #include <ethdev_driver.h>
 
 #include "sxe2_drv_cmd.h"
@@ -123,13 +122,13 @@ struct sxe2_rxq_stats {
 };
 
 struct sxe2_rxq_sw_stats {
-       RTE_ATOMIC(uint64_t)pkts;
-       RTE_ATOMIC(uint64_t)bytes;
-       RTE_ATOMIC(uint64_t)drop_pkts;
-       RTE_ATOMIC(uint64_t)drop_bytes;
-       RTE_ATOMIC(uint64_t)unicast_pkts;
-       RTE_ATOMIC(uint64_t)multicast_pkts;
-       RTE_ATOMIC(uint64_t)broadcast_pkts;
+       uint64_t pkts;
+       uint64_t bytes;
+       uint64_t drop_pkts;
+       uint64_t drop_bytes;
+       uint64_t unicast_pkts;
+       uint64_t multicast_pkts;
+       uint64_t broadcast_pkts;
 };
 
 struct sxe2_rx_queue {
diff --git a/drivers/net/sxe2/sxe2_rx.c b/drivers/net/sxe2/sxe2_rx.c
index 28832d5f71..543d825166 100644
--- a/drivers/net/sxe2/sxe2_rx.c
+++ b/drivers/net/sxe2/sxe2_rx.c
@@ -479,20 +479,13 @@ int32_t __rte_cold sxe2_rxqs_all_start(struct rte_eth_dev 
*dev)
                        goto l_free_started_queue;
                }
 
-               rte_atomic_store_explicit(&rxq->sw_stats.pkts, 0,
-                       rte_memory_order_relaxed);
-               rte_atomic_store_explicit(&rxq->sw_stats.bytes, 0,
-                       rte_memory_order_relaxed);
-               rte_atomic_store_explicit(&rxq->sw_stats.drop_pkts, 0,
-                       rte_memory_order_relaxed);
-               rte_atomic_store_explicit(&rxq->sw_stats.drop_bytes, 0,
-                       rte_memory_order_relaxed);
-               rte_atomic_store_explicit(&rxq->sw_stats.unicast_pkts, 0,
-                       rte_memory_order_relaxed);
-               rte_atomic_store_explicit(&rxq->sw_stats.broadcast_pkts, 0,
-                       rte_memory_order_relaxed);
-               rte_atomic_store_explicit(&rxq->sw_stats.multicast_pkts, 0,
-                       rte_memory_order_relaxed);
+               rxq->sw_stats.pkts = 0;
+               rxq->sw_stats.bytes = 0;
+               rxq->sw_stats.drop_pkts = 0;
+               rxq->sw_stats.drop_bytes = 0;
+               rxq->sw_stats.unicast_pkts = 0;
+               rxq->sw_stats.broadcast_pkts = 0;
+               rxq->sw_stats.multicast_pkts = 0;
        }
        ret = 0;
        goto l_end;
@@ -524,31 +517,15 @@ void __rte_cold sxe2_rxqs_all_stop(struct rte_eth_dev 
*dev)
 
                rxq = dev->data->rx_queues[nb_rxq];
                if (rxq) {
-                       sw_stats_prev->ipackets +=
-                               rte_atomic_load_explicit(&rxq->sw_stats.pkts,
-                                       rte_memory_order_relaxed);
-                       sw_stats_prev->ierrors +=
-                               
rte_atomic_load_explicit(&rxq->sw_stats.drop_pkts,
-                                       rte_memory_order_relaxed);
-                       sw_stats_prev->ibytes +=
-                               rte_atomic_load_explicit(&rxq->sw_stats.bytes,
-                                       rte_memory_order_relaxed);
-
-                       sw_stats_prev->rx_sw_unicast_packets +=
-                               
rte_atomic_load_explicit(&rxq->sw_stats.unicast_pkts,
-                                       rte_memory_order_relaxed);
-                       sw_stats_prev->rx_sw_broadcast_packets +=
-                               
rte_atomic_load_explicit(&rxq->sw_stats.broadcast_pkts,
-                                       rte_memory_order_relaxed);
-                       sw_stats_prev->rx_sw_multicast_packets +=
-                               
rte_atomic_load_explicit(&rxq->sw_stats.multicast_pkts,
-                                       rte_memory_order_relaxed);
-                       sw_stats_prev->rx_sw_drop_packets +=
-                               
rte_atomic_load_explicit(&rxq->sw_stats.drop_pkts,
-                                       rte_memory_order_relaxed);
-                       sw_stats_prev->rx_sw_drop_bytes +=
-                               
rte_atomic_load_explicit(&rxq->sw_stats.drop_bytes,
-                                       rte_memory_order_relaxed);
+                       sw_stats_prev->ipackets += rxq->sw_stats.pkts;
+                       sw_stats_prev->ierrors += rxq->sw_stats.drop_pkts;
+                       sw_stats_prev->ibytes += rxq->sw_stats.bytes;
+
+                       sw_stats_prev->rx_sw_unicast_packets += 
rxq->sw_stats.unicast_pkts;
+                       sw_stats_prev->rx_sw_broadcast_packets += 
rxq->sw_stats.broadcast_pkts;
+                       sw_stats_prev->rx_sw_multicast_packets += 
rxq->sw_stats.multicast_pkts;
+                       sw_stats_prev->rx_sw_drop_packets += 
rxq->sw_stats.drop_pkts;
+                       sw_stats_prev->rx_sw_drop_bytes += 
rxq->sw_stats.drop_bytes;
                }
        }
 }
diff --git a/drivers/net/sxe2/sxe2_txrx_poll.c 
b/drivers/net/sxe2/sxe2_txrx_poll.c
index b9d34afb31..947a5247ed 100644
--- a/drivers/net/sxe2/sxe2_txrx_poll.c
+++ b/drivers/net/sxe2/sxe2_txrx_poll.c
@@ -682,23 +682,17 @@ sxe2_rx_sw_stats_update(struct sxe2_rx_queue *rxq, struct 
rte_mbuf *mbuf,
                union sxe2_rx_desc *rxd)
 {
        uint64_t qword1 = rte_le_to_cpu_64(rxd->wb.status_err_ptype_len);
-       rte_atomic_fetch_add_explicit(&rxq->sw_stats.pkts, 1,
-               rte_memory_order_relaxed);
-       rte_atomic_fetch_add_explicit(&rxq->sw_stats.bytes,
-                       mbuf->pkt_len + RTE_ETHER_CRC_LEN,
-                       rte_memory_order_relaxed);
+       rxq->sw_stats.pkts += 1;
+       rxq->sw_stats.bytes += mbuf->pkt_len + RTE_ETHER_CRC_LEN;
        switch (SXE2_RX_DESC_STATUS_UMBCAST_VAL_GET(qword1)) {
        case SXE2_RX_DESC_STATUS_UNICAST:
-               rte_atomic_fetch_add_explicit(&rxq->sw_stats.unicast_pkts, 1,
-                       rte_memory_order_relaxed);
+               rxq->sw_stats.unicast_pkts += 1;
                break;
        case SXE2_RX_DESC_STATUS_MULTICAST:
-               rte_atomic_fetch_add_explicit(&rxq->sw_stats.multicast_pkts, 1,
-                       rte_memory_order_relaxed);
+               rxq->sw_stats.multicast_pkts += 1;
                break;
        case SXE2_RX_DESC_STATUS_BROADCAST:
-               rte_atomic_fetch_add_explicit(&rxq->sw_stats.broadcast_pkts, 1,
-                       rte_memory_order_relaxed);
+               rxq->sw_stats.broadcast_pkts += 1;
                break;
        default:
                break;
@@ -787,11 +781,9 @@ uint16_t sxe2_rx_pkts_scattered(void *rx_queue, struct 
rte_mbuf **rx_pkts, uint1
 
                if (unlikely(qword1 & SXE2_RX_DESC_ERROR_RXE_MASK) ||
                        unlikely(qword1 & SXE2_RX_DESC_ERROR_OVERSIZE_MASK)) {
-                       rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 
1,
-                               rte_memory_order_relaxed);
-                       rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-                               first_seg->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN,
-                               rte_memory_order_relaxed);
+                       rxq->sw_stats.drop_pkts += 1;
+                       rxq->sw_stats.drop_bytes +=
+                               first_seg->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN;
                        rte_pktmbuf_free(first_seg);
                        first_seg = NULL;
                        continue;
@@ -822,8 +814,7 @@ uint16_t sxe2_rx_pkts_scattered(void *rx_queue, struct 
rte_mbuf **rx_pkts, uint1
 
                sxe2_rx_mbuf_common_fields_fill(rxq, first_seg, &desc_tmp);
 
-               if (rxq->vsi->adapter->devargs.sw_stats_en)
-                       sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
+               sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
 
                rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, 
first_seg->data_off));
 
@@ -990,11 +981,9 @@ uint16_t sxe2_rx_pkts_scattered_split(void *rx_queue, 
struct rte_mbuf **rx_pkts,
 
                if (unlikely(qword1 & SXE2_RX_DESC_ERROR_RXE_MASK) ||
                        unlikely(qword1 & SXE2_RX_DESC_ERROR_OVERSIZE_MASK)) {
-                       rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 
1,
-                               rte_memory_order_relaxed);
-                       rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-                               first_seg->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN,
-                               rte_memory_order_relaxed);
+                       rxq->sw_stats.drop_pkts += 1;
+                       rxq->sw_stats.drop_bytes +=
+                               first_seg->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN;
                        rte_pktmbuf_free(first_seg);
                        first_seg = NULL;
                        continue;
@@ -1023,8 +1012,7 @@ uint16_t sxe2_rx_pkts_scattered_split(void *rx_queue, 
struct rte_mbuf **rx_pkts,
                first_seg->port = rxq->port_id;
                sxe2_rx_mbuf_common_fields_fill(rxq, first_seg, &desc_tmp);
 
-               if (rxq->vsi->adapter->devargs.sw_stats_en)
-                       sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
+               sxe2_rx_sw_stats_update(rxq, first_seg, &desc_tmp);
 
                rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, 
first_seg->data_off));
 
diff --git a/drivers/net/sxe2/sxe2_txrx_vec_common.h 
b/drivers/net/sxe2/sxe2_txrx_vec_common.h
index 6b1649c390..cc74f6e582 100644
--- a/drivers/net/sxe2/sxe2_txrx_vec_common.h
+++ b/drivers/net/sxe2/sxe2_txrx_vec_common.h
@@ -130,27 +130,20 @@ sxe2_tx_desc_fill_offloads(struct rte_mbuf *mbuf, 
uint64_t *desc_qw1)
 static inline void sxe2_vf_rx_vec_sw_stats_cnt(struct sxe2_rx_queue *rxq,
                struct rte_mbuf *mbuf, uint8_t umbcast_flag)
 {
-       if (rxq->vsi->adapter->devargs.sw_stats_en) {
-               rte_atomic_fetch_add_explicit(&rxq->sw_stats.pkts, 1,
-                                       rte_memory_order_relaxed);
-               rte_atomic_fetch_add_explicit(&rxq->sw_stats.bytes,
-                                mbuf->pkt_len + RTE_ETHER_CRC_LEN, 
rte_memory_order_relaxed);
-               switch (SXE2_RX_UMBCAST_FLAGS_VAL_GET(umbcast_flag)) {
-               case SXE2_RX_DESC_STATUS_UNICAST:
-                       
rte_atomic_fetch_add_explicit(&rxq->sw_stats.unicast_pkts, 1,
-                                       rte_memory_order_relaxed);
-                       break;
-               case SXE2_RX_DESC_STATUS_MULTICAST:
-                       
rte_atomic_fetch_add_explicit(&rxq->sw_stats.multicast_pkts, 1,
-                                       rte_memory_order_relaxed);
-                       break;
-               case SXE2_RX_DESC_STATUS_BROADCAST:
-                       
rte_atomic_fetch_add_explicit(&rxq->sw_stats.broadcast_pkts, 1,
-                                       rte_memory_order_relaxed);
-                       break;
-               default:
-                       break;
-               }
+       rxq->sw_stats.pkts += 1;
+       rxq->sw_stats.bytes += mbuf->pkt_len + RTE_ETHER_CRC_LEN;
+       switch (SXE2_RX_UMBCAST_FLAGS_VAL_GET(umbcast_flag)) {
+       case SXE2_RX_DESC_STATUS_UNICAST:
+               rxq->sw_stats.unicast_pkts += 1;
+               break;
+       case SXE2_RX_DESC_STATUS_MULTICAST:
+               rxq->sw_stats.multicast_pkts += 1;
+               break;
+       case SXE2_RX_DESC_STATUS_BROADCAST:
+               rxq->sw_stats.broadcast_pkts += 1;
+               break;
+       default:
+               break;
        }
 }
 
@@ -196,11 +189,9 @@ sxe2_rx_pkts_refactor(struct sxe2_rx_queue *rxq,
                        } else if (split_rxe_flags[buf_idx] & 
SXE2_RX_DESC_STATUS_EOP_MASK) {
                                continue;
                        } else {
-                               
rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 1,
-                                       rte_memory_order_relaxed);
-                               
rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-                                first_seg->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN,
-                                rte_memory_order_relaxed);
+                               rxq->sw_stats.drop_pkts += 1;
+                               rxq->sw_stats.drop_bytes +=
+                                       first_seg->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN;
                                rte_pktmbuf_free(first_seg);
                                first_seg = NULL;
                                last_seg  = NULL;
@@ -218,11 +209,10 @@ sxe2_rx_pkts_refactor(struct sxe2_rx_queue *rxq,
                                mbuf_bufs[buf_idx]->data_len += rxq->crc_len;
                                mbuf_bufs[buf_idx]->pkt_len  += rxq->crc_len;
                        } else {
-                               
rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_pkts, 1,
-                                       rte_memory_order_relaxed);
-                               
rte_atomic_fetch_add_explicit(&rxq->sw_stats.drop_bytes,
-                                mbuf_bufs[buf_idx]->pkt_len - rxq->crc_len + 
RTE_ETHER_CRC_LEN,
-                                rte_memory_order_relaxed);
+                               rxq->sw_stats.drop_pkts += 1;
+                               rxq->sw_stats.drop_bytes +=
+                                       mbuf_bufs[buf_idx]->pkt_len - 
rxq->crc_len +
+                                       RTE_ETHER_CRC_LEN;
                                rte_pktmbuf_free_seg(mbuf_bufs[buf_idx]);
                                continue;
                        }
diff --git a/drivers/net/sxe2/sxe2_txrx_vec_sse.c 
b/drivers/net/sxe2/sxe2_txrx_vec_sse.c
index f6e3f45937..182a7dfc17 100644
--- a/drivers/net/sxe2/sxe2_txrx_vec_sse.c
+++ b/drivers/net/sxe2/sxe2_txrx_vec_sse.c
@@ -483,41 +483,16 @@ static __rte_always_inline uint16_t
 sxe2_rx_pkts_scattered_batch_vec_sse(struct sxe2_rx_queue *rxq,
                struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
-       const uint64_t *split_rxe_flags64;
        uint8_t split_rxe_flags[SXE2_RX_PKTS_BURST_BATCH_NUM_VEC] = {0};
        uint8_t umbcast_flags[SXE2_RX_PKTS_BURST_BATCH_NUM_VEC] = {0};
        uint16_t rx_done_num;
        uint16_t rx_pkt_done_num;
        rx_pkt_done_num = 0;
 
-       if (rxq->vsi->adapter->devargs.sw_stats_en) {
-               rx_done_num = sxe2_rx_pkts_common_vec_sse(rxq, rx_pkts,
-                               nb_pkts, split_rxe_flags, umbcast_flags);
-       } else {
-               rx_done_num = sxe2_rx_pkts_common_vec_sse(rxq, rx_pkts,
-                               nb_pkts, split_rxe_flags, NULL);
-       }
+       rx_done_num = sxe2_rx_pkts_common_vec_sse(rxq, rx_pkts,
+                       nb_pkts, split_rxe_flags, umbcast_flags);
        if (rx_done_num == 0)
                goto l_end;
-       if (!rxq->vsi->adapter->devargs.sw_stats_en) {
-               split_rxe_flags64 = (uint64_t *)split_rxe_flags;
-               if (rxq->pkt_first_seg == NULL &&
-                       split_rxe_flags64[0] == 0 &&
-                       split_rxe_flags64[1] == 0 &&
-                       split_rxe_flags64[2] == 0 &&
-                       split_rxe_flags64[3] == 0) {
-                       rx_pkt_done_num = rx_done_num;
-                       goto l_end;
-               }
-               if (rxq->pkt_first_seg == NULL) {
-                       while (rx_pkt_done_num < rx_done_num &&
-                              split_rxe_flags[rx_pkt_done_num] == 0)
-                               rx_pkt_done_num++;
-                       if (rx_pkt_done_num == rx_done_num)
-                               goto l_end;
-                       rxq->pkt_first_seg = rx_pkts[rx_pkt_done_num];
-               }
-       }
        rx_pkt_done_num += sxe2_rx_pkts_refactor(rxq, &rx_pkts[rx_pkt_done_num],
                        rx_done_num - rx_pkt_done_num, 
&split_rxe_flags[rx_pkt_done_num],
                        &umbcast_flags[rx_pkt_done_num]);
-- 
2.52.0

Reply via email to