Allow per-NAPI gro_flush_timeout setting.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device gro_flush_timeout
field. Reads from sysfs will read from the net_device field.

The ability to set gro_flush_timeout on specific NAPI instances will be
added in a later commit, via netdev-genl.

Note that idpf has embedded napi_struct in its internals and has
established some series of asserts that involve the size of napi
structure. Since this change increases the napi_struct size from 400 to
416 (according to pahole on my system), I've increased the assertion in
idpf by 16 bytes. No attention whatsoever was paid to the cacheline
placement of idpf internals as a result of this change.

Signed-off-by: Joe Damato <[email protected]>
---
 .../networking/net_cachelines/net_device.rst  |  2 +-
 drivers/net/ethernet/intel/idpf/idpf_txrx.h   |  2 +-
 include/linux/netdevice.h                     |  3 +-
 net/core/dev.c                                | 12 +++---
 net/core/dev.h                                | 40 +++++++++++++++++++
 net/core/net-sysfs.c                          |  2 +-
 6 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/Documentation/networking/net_cachelines/net_device.rst 
b/Documentation/networking/net_cachelines/net_device.rst
index eeeb7c925ec5..3d02ae79c850 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -98,7 +98,6 @@ struct_netdev_queue*                _rx                     
read_mostly
 unsigned_int                        num_rx_queues                              
                     
 unsigned_int                        real_num_rx_queues      -                  
 read_mostly         get_rps_cpu
 struct_bpf_prog*                    xdp_prog                -                  
 read_mostly         netif_elide_gro()
-unsigned_long                       gro_flush_timeout       -                  
 read_mostly         napi_complete_done
 unsigned_int                        gro_max_size            -                  
 read_mostly         skb_gro_receive
 unsigned_int                        gro_ipv4_max_size       -                  
 read_mostly         skb_gro_receive
 rx_handler_func_t*                  rx_handler              read_mostly        
 -                   __netif_receive_skb_core
@@ -182,4 +181,5 @@ struct_devlink_port*                devlink_port
 struct_dpll_pin*                    dpll_pin                                   
                     
 struct hlist_head                   page_pools
 struct dim_irq_moder*               irq_moder
+unsigned_long                       gro_flush_timeout
 u32                                 napi_defer_hard_irqs
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h 
b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index f0537826f840..fcdf73486d46 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -438,7 +438,7 @@ struct idpf_q_vector {
        __cacheline_group_end_aligned(cold);
 };
 libeth_cacheline_set_assert(struct idpf_q_vector, 112,
-                           424 + 2 * sizeof(struct dim),
+                           440 + 2 * sizeof(struct dim),
                            8 + sizeof(cpumask_var_t));
 
 struct idpf_rx_queue_stats {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 55764efc5c93..33897edd16c8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -377,6 +377,7 @@ struct napi_struct {
        struct list_head        dev_list;
        struct hlist_node       napi_hash_node;
        int                     irq;
+       unsigned long           gro_flush_timeout;
        u32                     defer_hard_irqs;
 };
 
@@ -2075,7 +2076,6 @@ struct net_device {
        int                     ifindex;
        unsigned int            real_num_rx_queues;
        struct netdev_rx_queue  *_rx;
-       unsigned long           gro_flush_timeout;
        unsigned int            gro_max_size;
        unsigned int            gro_ipv4_max_size;
        rx_handler_func_t __rcu *rx_handler;
@@ -2398,6 +2398,7 @@ struct net_device {
 
        /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
        struct dim_irq_moder    *irq_moder;
+       unsigned long           gro_flush_timeout;
        u32                     napi_defer_hard_irqs;
 
        u8                      priv[] ____cacheline_aligned
diff --git a/net/core/dev.c b/net/core/dev.c
index 748739958d2a..056ed44f766f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6226,12 +6226,12 @@ bool napi_complete_done(struct napi_struct *n, int 
work_done)
 
        if (work_done) {
                if (n->gro_bitmask)
-                       timeout = READ_ONCE(n->dev->gro_flush_timeout);
+                       timeout = napi_get_gro_flush_timeout(n);
                n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
        }
        if (n->defer_hard_irqs_count > 0) {
                n->defer_hard_irqs_count--;
-               timeout = READ_ONCE(n->dev->gro_flush_timeout);
+               timeout = napi_get_gro_flush_timeout(n);
                if (timeout)
                        ret = false;
        }
@@ -6366,7 +6366,7 @@ static void busy_poll_stop(struct napi_struct *napi, void 
*have_poll_lock,
 
        if (flags & NAPI_F_PREFER_BUSY_POLL) {
                napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
-               timeout = READ_ONCE(napi->dev->gro_flush_timeout);
+               timeout = napi_get_gro_flush_timeout(napi);
                if (napi->defer_hard_irqs_count && timeout) {
                        hrtimer_start(&napi->timer, ns_to_ktime(timeout), 
HRTIMER_MODE_REL_PINNED);
                        skip_schedule = true;
@@ -6648,6 +6648,7 @@ void netif_napi_add_weight(struct net_device *dev, struct 
napi_struct *napi,
        hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
        napi->timer.function = napi_watchdog;
        napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
+       napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
        init_gro_hash(napi);
        napi->skb = NULL;
        INIT_LIST_HEAD(&napi->rx_list);
@@ -11053,7 +11054,7 @@ void netdev_sw_irq_coalesce_default_on(struct 
net_device *dev)
        WARN_ON(dev->reg_state == NETREG_REGISTERED);
 
        if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
-               dev->gro_flush_timeout = 20000;
+               netdev_set_gro_flush_timeout(dev, 20000);
                netdev_set_defer_hard_irqs(dev, 1);
        }
 }
@@ -11991,7 +11992,6 @@ static void __init net_dev_struct_check(void)
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
ifindex);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
real_num_rx_queues);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
_rx);
-       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
gro_flush_timeout);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
gro_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
gro_ipv4_max_size);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
rx_handler);
@@ -12003,7 +12003,7 @@ static void __init net_dev_struct_check(void)
 #ifdef CONFIG_NET_XGRESS
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, 
tcx_ingress);
 #endif
-       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100);
+       CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92);
 }
 
 /*
diff --git a/net/core/dev.h b/net/core/dev.h
index b3792219879b..26e598aa56c3 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -174,6 +174,46 @@ static inline void netdev_set_defer_hard_irqs(struct 
net_device *netdev,
                napi_set_defer_hard_irqs(napi, defer);
 }
 
+/**
+ * napi_get_gro_flush_timeout - get the gro_flush_timeout
+ * @n: napi struct to get the gro_flush_timeout from
+ *
+ * Return: the per-NAPI value of the gro_flush_timeout field.
+ */
+static inline unsigned long
+napi_get_gro_flush_timeout(const struct napi_struct *n)
+{
+       return READ_ONCE(n->gro_flush_timeout);
+}
+
+/**
+ * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi
+ * @n: napi struct to set the gro_flush_timeout
+ * @timeout: timeout value to set
+ *
+ * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout
+ */
+static inline void napi_set_gro_flush_timeout(struct napi_struct *n,
+                                             unsigned long timeout)
+{
+       WRITE_ONCE(n->gro_flush_timeout, timeout);
+}
+
+/**
+ * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs
+ * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set
+ * @timeout: the timeout value to set
+ */
+static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
+                                               unsigned long timeout)
+{
+       struct napi_struct *napi;
+
+       WRITE_ONCE(netdev->gro_flush_timeout, timeout);
+       list_for_each_entry(napi, &netdev->napi_list, dev_list)
+               napi_set_gro_flush_timeout(napi, timeout);
+}
+
 int rps_cpumask_housekeeping(struct cpumask *mask);
 
 #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 25125f356a15..2d9afc6e2161 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
 
 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 {
-       WRITE_ONCE(dev->gro_flush_timeout, val);
+       netdev_set_gro_flush_timeout(dev, val);
        return 0;
 }
 
-- 
2.25.1

Reply via email to