Track transmit counters per TX queue to avoid cache line contention in the xmit hot path and expose per-queue visibility via ethtool -S and ndo_get_stats64() aggregation.
Global tx_large_packets and tx_send_failed continue to be aggregated on the ethtool read path for backward compatibility with existing tools. Signed-off-by: Mingming Cao <[email protected]> Reviewed-by: Dave Marquardt <[email protected]> --- drivers/net/ethernet/ibm/ibmveth.c | 129 +++++++++++++++++++++++++---- drivers/net/ethernet/ibm/ibmveth.h | 13 +++ 2 files changed, 124 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 1c08082ffbd6..4e3f49b6346f 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -252,6 +252,33 @@ static void ibmveth_free_rx_qstats(struct ibmveth_adapter *adapter) adapter->rx_qstats = NULL; } +/** + * ibmveth_alloc_tx_qstats - Allocate per-queue TX statistics + * @adapter: ibmveth adapter structure + * + * Return: 0 on success, -ENOMEM on failure + */ +static int ibmveth_alloc_tx_qstats(struct ibmveth_adapter *adapter) +{ + adapter->tx_qstats = kcalloc(IBMVETH_MAX_QUEUES, + sizeof(struct ibmveth_tx_queue_stats), + GFP_KERNEL); + if (!adapter->tx_qstats) + return -ENOMEM; + + return 0; +} + +/** + * ibmveth_free_tx_qstats - Free per-queue TX statistics + * @adapter: ibmveth adapter structure + */ +static void ibmveth_free_tx_qstats(struct ibmveth_adapter *adapter) +{ + kfree(adapter->tx_qstats); + adapter->tx_qstats = NULL; +} + /** * ibmveth_alloc_rx_queues - Allocate per-queue RX resources * @adapter: ibmveth adapter structure @@ -1628,6 +1655,10 @@ static int ibmveth_open(struct net_device *netdev) if (rc) goto out_cleanup_rx_interrupts; + rc = ibmveth_alloc_tx_qstats(adapter); + if (rc) + goto out_free_tx_resources; + netif_tx_start_all_queues(netdev); netdev_dbg(netdev, "open complete\n"); @@ -1668,6 +1699,7 @@ static int ibmveth_close(struct net_device *netdev) } } + ibmveth_free_tx_qstats(adapter); ibmveth_free_tx_resources(adapter); ibmveth_cleanup_rx_interrupts(adapter); ibmveth_update_rx_no_buffer(adapter); @@ -1960,6 +1992,32 @@ static void ibmveth_aggregate_rx_qstats(struct ibmveth_adapter *adapter) adapter->rx_large_packets = total_large; } +/** + * ibmveth_aggregate_tx_qstats - Sum per-queue TX stats into globals + * @adapter: ibmveth adapter + * + * Cold path only (ethtool). Keeps legacy global counters meaningful for + * tools that read the adapter-level fields in ibmveth_stats[]. + */ +static void ibmveth_aggregate_tx_qstats(struct ibmveth_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + u64 total_large = 0; + u64 total_send_failed = 0; + int i; + + if (!adapter->tx_qstats) + return; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + total_large += adapter->tx_qstats[i].large_packets; + total_send_failed += adapter->tx_qstats[i].send_failures; + } + + adapter->tx_large_packets = total_large; + adapter->tx_send_failed = total_send_failed; +} + static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct ibmveth_adapter *adapter = netdev_priv(dev); @@ -1984,6 +2042,15 @@ static void ibmveth_get_strings(struct net_device *dev, u32 stringset, u8 *data) ethtool_sprintf(&p, "rx%d_no_buffer_drops", i); } + for (i = 0; i < dev->real_num_tx_queues; i++) { + ethtool_sprintf(&p, "tx%d_packets", i); + ethtool_sprintf(&p, "tx%d_bytes", i); + ethtool_sprintf(&p, "tx%d_large_packets", i); + ethtool_sprintf(&p, "tx%d_dropped_packets", i); + ethtool_sprintf(&p, "tx%d_send_failures", i); + ethtool_sprintf(&p, "tx%d_checksum_offload", i); + } + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { ethtool_sprintf(&p, "pool%d_size", i); ethtool_sprintf(&p, "pool%d_active", i); @@ -1999,6 +2066,7 @@ static int ibmveth_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return ARRAY_SIZE(ibmveth_stats) + adapter->num_rx_queues * IBMVETH_NUM_RX_QSTATS + + dev->real_num_tx_queues * IBMVETH_NUM_TX_QSTATS + IBMVETH_NUM_BUFF_POOLS * 3; default: return -EOPNOTSUPP; @@ -2012,6 +2080,7 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, int i, j; ibmveth_aggregate_rx_qstats(adapter); + ibmveth_aggregate_tx_qstats(adapter); for (i = 0; i < ARRAY_SIZE(ibmveth_stats); i++) data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset); @@ -2030,6 +2099,19 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, } } + for (j = 0; j < dev->real_num_tx_queues; j++) { + if (adapter->tx_qstats) { + data[i++] = adapter->tx_qstats[j].packets; + data[i++] = adapter->tx_qstats[j].bytes; + data[i++] = adapter->tx_qstats[j].large_packets; + data[i++] = adapter->tx_qstats[j].dropped_packets; + data[i++] = adapter->tx_qstats[j].send_failures; + data[i++] = adapter->tx_qstats[j].checksum_offload; + } else { + i += IBMVETH_NUM_TX_QSTATS; + } + } + for (j = 0; j < IBMVETH_NUM_BUFF_POOLS; j++) { data[i++] = adapter->rx_buff_pool[0][j].size; data[i++] = adapter->rx_buff_pool[0][j].active; @@ -2152,8 +2234,10 @@ static int ibmveth_send(struct ibmveth_adapter *adapter, } static int ibmveth_is_packet_unsupported(struct sk_buff *skb, - struct net_device *netdev) + struct ibmveth_adapter *adapter, + int queue_num) { + struct net_device *netdev = adapter->netdev; struct ethhdr *ether_header; int ret = 0; @@ -2161,7 +2245,8 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb, if (ether_addr_equal(ether_header->h_dest, netdev->dev_addr)) { netdev_dbg(netdev, "veth doesn't support loopback packets, dropping packet.\n"); - netdev->stats.tx_dropped++; + if (adapter->tx_qstats) + adapter->tx_qstats[queue_num].dropped_packets++; ret = -EOPNOTSUPP; } @@ -2177,7 +2262,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, int i, queue_num = skb_get_queue_mapping(skb); unsigned long mss = 0; - if (ibmveth_is_packet_unsupported(skb, netdev)) + if (ibmveth_is_packet_unsupported(skb, adapter, queue_num)) goto out; /* veth can't checksum offload UDP */ if (skb->ip_summed == CHECKSUM_PARTIAL && @@ -2188,7 +2273,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, skb_checksum_help(skb)) { netdev_err(netdev, "tx: failed to checksum packet\n"); - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].dropped_packets++; goto out; } @@ -2200,6 +2285,8 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, desc_flags |= (IBMVETH_BUF_NO_CSUM | IBMVETH_BUF_CSUM_GOOD); + adapter->tx_qstats[queue_num].checksum_offload++; + /* Need to zero out the checksum */ buf[0] = 0; buf[1] = 0; @@ -2211,7 +2298,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) { if (adapter->fw_large_send_support) { mss = (unsigned long)skb_shinfo(skb)->gso_size; - adapter->tx_large_packets++; + adapter->tx_qstats[queue_num].large_packets++; } else if (!skb_is_gso_v6(skb)) { /* Put -1 in the IP checksum to tell phyp it * is a largesend packet. Put the mss in @@ -2220,7 +2307,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, ip_hdr(skb)->check = 0xffff; tcp_hdr(skb)->check = cpu_to_be16(skb_shinfo(skb)->gso_size); - adapter->tx_large_packets++; + adapter->tx_qstats[queue_num].large_packets++; } } @@ -2228,7 +2315,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, if (unlikely(skb->len > adapter->tx_ltb_size)) { netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n", skb->len, adapter->tx_ltb_size); - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].dropped_packets++; goto out; } memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb)); @@ -2245,7 +2332,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, if (unlikely(total_bytes != skb->len)) { netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n", skb->len, total_bytes); - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].dropped_packets++; goto out; } desc.fields.flags_len = desc_flags | skb->len; @@ -2254,11 +2341,11 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, dma_wmb(); if (ibmveth_send(adapter, desc.desc, mss)) { - adapter->tx_send_failed++; - netdev->stats.tx_dropped++; + adapter->tx_qstats[queue_num].send_failures++; + adapter->tx_qstats[queue_num].dropped_packets++; } else { - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; + adapter->tx_qstats[queue_num].packets++; + adapter->tx_qstats[queue_num].bytes += skb->len; } out: @@ -2759,12 +2846,13 @@ static netdev_features_t ibmveth_features_check(struct sk_buff *skb, } /** - * ibmveth_get_stats64 - Return aggregated per-queue RX statistics + * ibmveth_get_stats64 - Return aggregated per-queue statistics * @dev: network device * @stats: rtnl link statistics storage * - * Sums per-queue rx_qstats into rx_packets/rx_bytes for multi-queue mode. - * TX counters continue to come from netdev->stats (updated in start_xmit). + * Sums per-queue rx_qstats and tx_qstats into the rtnl counters. + * Callers use ndo_get_stats64(); avoid updating netdev->stats on the + * xmit/poll paths to keep per-queue counters off the hot cache line. */ static void ibmveth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) @@ -2779,9 +2867,14 @@ static void ibmveth_get_stats64(struct net_device *dev, } } - stats->tx_packets = dev->stats.tx_packets; - stats->tx_bytes = dev->stats.tx_bytes; - stats->tx_dropped = dev->stats.tx_dropped; + if (adapter->tx_qstats) { + for (i = 0; i < dev->real_num_tx_queues; i++) { + stats->tx_packets += adapter->tx_qstats[i].packets; + stats->tx_bytes += adapter->tx_qstats[i].bytes; + stats->tx_dropped += adapter->tx_qstats[i].dropped_packets; + } + } + stats->tx_errors = dev->stats.tx_errors; } diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index f7b20fd01acb..390c660af979 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -316,9 +316,21 @@ struct ibmveth_rx_queue_stats { u64 no_buffer_drops; }; +struct ibmveth_tx_queue_stats { + u64 packets; + u64 bytes; + u64 large_packets; + u64 dropped_packets; + u64 send_failures; + u64 checksum_offload; +}; + #define IBMVETH_NUM_RX_QSTATS \ (sizeof(struct ibmveth_rx_queue_stats) / sizeof(u64)) +#define IBMVETH_NUM_TX_QSTATS \ + (sizeof(struct ibmveth_tx_queue_stats) / sizeof(u64)) + struct ibmveth_buff_pool { u32 size; u32 index; @@ -386,6 +398,7 @@ struct ibmveth_adapter { /* Multi-queue statistics */ struct ibmveth_hcall_stats hcall_stats; struct ibmveth_rx_queue_stats *rx_qstats; + struct ibmveth_tx_queue_stats *tx_qstats; /* Ethtool settings */ u8 duplex; -- 2.39.3 (Apple Git-146)
