base branch

Kevin Hao via lists.yoctoproject.org Mon, 09 Feb 2026 23:16:53 -0800

The merge commit 043f84176868 ("Merge branch 'v6.1/standard/base'
into v6.1/standard/cn-sdkv5.15/octeon") incorrectly updated
net/core/dev.c to the version from the v6.6/standard/preempt-rt/base
branch. This change restores the file to its original state as found
in the v6.1/standard/base branch.


Signed-off-by: Kevin Hao <[email protected]>
---
Hi Bruce,

Please merge this into the following branch:
  v6.1/standard/cn-sdkv5.15/octeon
---
 net/core/dev.c | 944 ++++++++++++++++++++++++---------------------------------
 1 file changed, 403 insertions(+), 541 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 
5b53496b2666fa710bb75e1d5ef403e15f1dd974..69bb7ac73d047aa4428b4d4eaa67381850cf2b2d
 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -69,7 +69,7 @@
  */
 
 #include <linux/uaccess.h>
-#include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/types.h>
@@ -107,7 +107,6 @@
 #include <net/pkt_cls.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
-#include <net/tcx.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -133,7 +132,6 @@
 #include <trace/events/net.h>
 #include <trace/events/skb.h>
 #include <trace/events/qdisc.h>
-#include <trace/events/xdp.h>
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
@@ -152,16 +150,18 @@
 #include <linux/pm_runtime.h>
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
-#include <net/netdev_rx_queue.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
 
+
 static DEFINE_SPINLOCK(ptype_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;      /* Taps */
 
 static int netif_rx_internal(struct sk_buff *skb);
+static int call_netdevice_notifiers_info(unsigned long val,
+                                        struct netdev_notifier_info *info);
 static int call_netdevice_notifiers_extack(unsigned long val,
                                           struct net_device *dev,
                                           struct netlink_ext_ack *extack);
@@ -396,9 +396,6 @@ static void list_netdevice(struct net_device *dev)
        netdev_for_each_altname(dev, name_node)
                netdev_name_node_add(net, name_node);
 
-       /* We reserved the ifindex, this can't fail */
-       WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
-
        dev_base_seq_inc(net);
 }
 
@@ -408,12 +405,9 @@ static void list_netdevice(struct net_device *dev)
 static void unlist_netdevice(struct net_device *dev, bool lock)
 {
        struct netdev_name_node *name_node;
-       struct net *net = dev_net(dev);
 
        ASSERT_RTNL();
 
-       xa_erase(&net->dev_by_index, dev->ifindex);
-
        netdev_for_each_altname(dev, name_node)
                netdev_name_node_del(name_node);
 
@@ -789,7 +783,18 @@ struct net_device *dev_get_by_name_rcu(struct net *net, 
const char *name)
 }
 EXPORT_SYMBOL(dev_get_by_name_rcu);
 
-/* Deprecated for new users, call netdev_get_by_name() instead */
+/**
+ *     dev_get_by_name         - find a device by its name
+ *     @net: the applicable net namespace
+ *     @name: name to find
+ *
+ *     Find an interface by name. This can be called from any
+ *     context and does its own locking. The returned handle has
+ *     the usage count incremented and the caller must use dev_put() to
+ *     release it when it is no longer needed. %NULL is returned if no
+ *     matching device is found.
+ */
+
 struct net_device *dev_get_by_name(struct net *net, const char *name)
 {
        struct net_device *dev;
@@ -802,31 +807,6 @@ struct net_device *dev_get_by_name(struct net *net, const 
char *name)
 }
 EXPORT_SYMBOL(dev_get_by_name);
 
-/**
- *     netdev_get_by_name() - find a device by its name
- *     @net: the applicable net namespace
- *     @name: name to find
- *     @tracker: tracking object for the acquired reference
- *     @gfp: allocation flags for the tracker
- *
- *     Find an interface by name. This can be called from any
- *     context and does its own locking. The returned handle has
- *     the usage count incremented and the caller must use netdev_put() to
- *     release it when it is no longer needed. %NULL is returned if no
- *     matching device is found.
- */
-struct net_device *netdev_get_by_name(struct net *net, const char *name,
-                                     netdevice_tracker *tracker, gfp_t gfp)
-{
-       struct net_device *dev;
-
-       dev = dev_get_by_name(net, name);
-       if (dev)
-               netdev_tracker_alloc(dev, tracker, gfp);
-       return dev;
-}
-EXPORT_SYMBOL(netdev_get_by_name);
-
 /**
  *     __dev_get_by_index - find a device by its ifindex
  *     @net: the applicable net namespace
@@ -876,7 +856,18 @@ struct net_device *dev_get_by_index_rcu(struct net *net, 
int ifindex)
 }
 EXPORT_SYMBOL(dev_get_by_index_rcu);
 
-/* Deprecated for new users, call netdev_get_by_index() instead */
+
+/**
+ *     dev_get_by_index - find a device by its ifindex
+ *     @net: the applicable net namespace
+ *     @ifindex: index of device
+ *
+ *     Search for an interface by index. Returns NULL if the device
+ *     is not found or a pointer to the device. The device returned has
+ *     had a reference added and the pointer is safe until the user calls
+ *     dev_put to indicate they have finished with it.
+ */
+
 struct net_device *dev_get_by_index(struct net *net, int ifindex)
 {
        struct net_device *dev;
@@ -889,30 +880,6 @@ struct net_device *dev_get_by_index(struct net *net, int 
ifindex)
 }
 EXPORT_SYMBOL(dev_get_by_index);
 
-/**
- *     netdev_get_by_index() - find a device by its ifindex
- *     @net: the applicable net namespace
- *     @ifindex: index of device
- *     @tracker: tracking object for the acquired reference
- *     @gfp: allocation flags for the tracker
- *
- *     Search for an interface by index. Returns NULL if the device
- *     is not found or a pointer to the device. The device returned has
- *     had a reference added and the pointer is safe until the user calls
- *     netdev_put() to indicate they have finished with it.
- */
-struct net_device *netdev_get_by_index(struct net *net, int ifindex,
-                                      netdevice_tracker *tracker, gfp_t gfp)
-{
-       struct net_device *dev;
-
-       dev = dev_get_by_index(net, ifindex);
-       if (dev)
-               netdev_tracker_alloc(dev, tracker, gfp);
-       return dev;
-}
-EXPORT_SYMBOL(netdev_get_by_index);
-
 /**
  *     dev_get_by_napi_id - find a device by napi_id
  *     @napi_id: ID of the NAPI struct
@@ -1134,7 +1101,7 @@ static int __dev_alloc_name(struct net *net, const char 
*name, char *buf)
                        return -EINVAL;
 
                /* Use one page as a bit array of possible slots */
-               inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
+               inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
                if (!inuse)
                        return -ENOMEM;
 
@@ -1164,7 +1131,7 @@ static int __dev_alloc_name(struct net *net, const char 
*name, char *buf)
                }
 
                i = find_first_zero_bit(inuse, max_netdevices);
-               bitmap_free(inuse);
+               free_page((unsigned long) inuse);
        }
 
        snprintf(buf, IFNAMSIZ, name, i);
@@ -1265,6 +1232,22 @@ int dev_change_name(struct net_device *dev, const char 
*newname)
 
        net = dev_net(dev);
 
+       /* Some auto-enslaved devices e.g. failover slaves are
+        * special, as userspace might rename the device after
+        * the interface had been brought up and running since
+        * the point kernel initiated auto-enslavement. Allow
+        * live name change even when these slave devices are
+        * up and running.
+        *
+        * Typically, users of these auto-enslaving devices
+        * don't actually care about slave name change, as
+        * they are supposed to operate on master interface
+        * directly.
+        */
+       if (dev->flags & IFF_UP &&
+           likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
+               return -EBUSY;
+
        down_write(&devnet_rename_sem);
 
        if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
@@ -1281,8 +1264,7 @@ int dev_change_name(struct net_device *dev, const char 
*newname)
        }
 
        if (oldname[0] && !strchr(oldname, '%'))
-               netdev_info(dev, "renamed from %s%s\n", oldname,
-                           dev->flags & IFF_UP ? " (while UP)" : "");
+               netdev_info(dev, "renamed from %s\n", oldname);
 
        old_assign_type = dev->name_assign_type;
        dev->name_assign_type = NET_NAME_RENAMED;
@@ -1420,7 +1402,7 @@ void netdev_state_change(struct net_device *dev)
 
                call_netdevice_notifiers_info(NETDEV_CHANGE,
                                              &change_info.info);
-               rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
+               rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
        }
 }
 EXPORT_SYMBOL(netdev_state_change);
@@ -1556,7 +1538,7 @@ int dev_open(struct net_device *dev, struct 
netlink_ext_ack *extack)
        if (ret < 0)
                return ret;
 
-       rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, GFP_KERNEL, 0, 
NULL);
+       rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
        call_netdevice_notifiers(NETDEV_UP, dev);
 
        return ret;
@@ -1628,7 +1610,7 @@ void dev_close_many(struct list_head *head, bool unlink)
        __dev_close_many(head);
 
        list_for_each_entry_safe(dev, tmp, head, close_list) {
-               rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP | IFF_RUNNING, 
GFP_KERNEL, 0, NULL);
+               rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
                call_netdevice_notifiers(NETDEV_DOWN, dev);
                if (unlink)
                        list_del_init(&dev->close_list);
@@ -1708,15 +1690,14 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
        N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
        N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
        N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
-       N(POST_INIT) N(PRE_UNINIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN)
-       N(CHANGEUPPER) N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA)
-       N(BONDING_INFO) N(PRECHANGEUPPER) N(CHANGELOWERSTATE)
-       N(UDP_TUNNEL_PUSH_INFO) N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+       N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+       N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+       N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+       N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
        N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
        N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
        N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
        N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
-       N(XDP_FEAT_CHANGE)
        }
 #undef N
        return "UNKNOWN_NETDEV_EVENT";
@@ -1943,7 +1924,7 @@ EXPORT_SYMBOL(register_netdevice_notifier_net);
  * @nb: notifier
  *
  * Unregister a notifier previously registered by
- * register_netdevice_notifier_net(). The notifier is unlinked from the
+ * register_netdevice_notifier(). The notifier is unlinked into the
  * kernel structures and may then be reused. A negative errno code
  * is returned on a failure.
  *
@@ -1964,14 +1945,6 @@ int unregister_netdevice_notifier_net(struct net *net,
 }
 EXPORT_SYMBOL(unregister_netdevice_notifier_net);
 
-static void __move_netdevice_notifier_net(struct net *src_net,
-                                         struct net *dst_net,
-                                         struct notifier_block *nb)
-{
-       __unregister_netdevice_notifier_net(src_net, nb);
-       __register_netdevice_notifier_net(dst_net, nb, true);
-}
-
 int register_netdevice_notifier_dev_net(struct net_device *dev,
                                        struct notifier_block *nb,
                                        struct netdev_net_notifier *nn)
@@ -2008,8 +1981,10 @@ static void move_netdevice_notifiers_dev_net(struct 
net_device *dev,
 {
        struct netdev_net_notifier *nn;
 
-       list_for_each_entry(nn, &dev->net_notifier_list, list)
-               __move_netdevice_notifier_net(dev_net(dev), net, nn->nb);
+       list_for_each_entry(nn, &dev->net_notifier_list, list) {
+               __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
+               __register_netdevice_notifier_net(net, nn->nb, true);
+       }
 }
 
 /**
@@ -2021,8 +1996,8 @@ static void move_netdevice_notifiers_dev_net(struct 
net_device *dev,
  *     are as for raw_notifier_call_chain().
  */
 
-int call_netdevice_notifiers_info(unsigned long val,
-                                 struct netdev_notifier_info *info)
+static int call_netdevice_notifiers_info(unsigned long val,
+                                        struct netdev_notifier_info *info)
 {
        struct net *net = dev_net(info->dev);
        int ret;
@@ -2168,10 +2143,13 @@ static DECLARE_WORK(netstamp_work, netstamp_clear);
 void net_enable_timestamp(void)
 {
 #ifdef CONFIG_JUMP_LABEL
-       int wanted = atomic_read(&netstamp_wanted);
+       int wanted;
 
-       while (wanted > 0) {
-               if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted + 1))
+       while (1) {
+               wanted = atomic_read(&netstamp_wanted);
+               if (wanted <= 0)
+                       break;
+               if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == 
wanted)
                        return;
        }
        atomic_inc(&netstamp_needed_deferred);
@@ -2185,10 +2163,13 @@ EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef CONFIG_JUMP_LABEL
-       int wanted = atomic_read(&netstamp_wanted);
+       int wanted;
 
-       while (wanted > 1) {
-               if (atomic_try_cmpxchg(&netstamp_wanted, &wanted, wanted - 1))
+       while (1) {
+               wanted = atomic_read(&netstamp_wanted);
+               if (wanted <= 1)
+                       break;
+               if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == 
wanted)
                        return;
        }
        atomic_dec(&netstamp_needed_deferred);
@@ -2202,7 +2183,7 @@ EXPORT_SYMBOL(net_disable_timestamp);
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
        skb->tstamp = 0;
-       skb->tstamp_type = SKB_CLOCK_REALTIME;
+       skb->mono_delivery_time = 0;
        if (static_branch_unlikely(&netstamp_needed_key))
                skb->tstamp = ktime_get_real();
 }
@@ -2461,7 +2442,8 @@ static bool remove_xps_queue(struct xps_dev_maps 
*dev_maps,
        struct xps_map *map = NULL;
        int pos;
 
-       map = xmap_dereference(dev_maps->attr_map[tci]);
+       if (dev_maps)
+               map = xmap_dereference(dev_maps->attr_map[tci]);
        if (!map)
                return false;
 
@@ -3097,8 +3079,6 @@ void netif_set_tso_max_size(struct net_device *dev, 
unsigned int size)
        dev->tso_max_size = min(GSO_MAX_SIZE, size);
        if (size < READ_ONCE(dev->gso_max_size))
                netif_set_gso_max_size(dev, size);
-       if (size < READ_ONCE(dev->gso_ipv4_max_size))
-               netif_set_gso_ipv4_max_size(dev, size);
 }
 EXPORT_SYMBOL(netif_set_tso_max_size);
 
@@ -3178,7 +3158,7 @@ void __netif_schedule(struct Qdisc *q)
 EXPORT_SYMBOL(__netif_schedule);
 
 struct dev_kfree_skb_cb {
-       enum skb_drop_reason reason;
+       enum skb_free_reason reason;
 };
 
 static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
@@ -3211,7 +3191,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 }
 EXPORT_SYMBOL(netif_tx_wake_queue);
 
-void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 {
        unsigned long flags;
 
@@ -3231,16 +3211,18 @@ void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum 
skb_drop_reason reason)
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_restore(flags);
 }
-EXPORT_SYMBOL(dev_kfree_skb_irq_reason);
+EXPORT_SYMBOL(__dev_kfree_skb_irq);
 
-void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
 {
        if (in_hardirq() || irqs_disabled())
-               dev_kfree_skb_irq_reason(skb, reason);
+               __dev_kfree_skb_irq(skb, reason);
+       else if (unlikely(reason == SKB_REASON_DROPPED))
+               kfree_skb(skb);
        else
-               kfree_skb_reason(skb, reason);
+               consume_skb(skb);
 }
-EXPORT_SYMBOL(dev_kfree_skb_any_reason);
+EXPORT_SYMBOL(__dev_kfree_skb_any);
 
 
 /**
@@ -3312,7 +3294,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
        return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
 }
 
-void skb_warn_bad_offload(const struct sk_buff *skb)
+static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
        static const netdev_features_t null_features;
        struct net_device *dev = skb->dev;
@@ -3422,7 +3404,8 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
                                                  skb->len - start, ~(__u32)0,
                                                  crc32c_csum_stub));
        *(__le32 *)(skb->data + offset) = crc32c_csum;
-       skb_reset_csum_not_inet(skb);
+       skb->ip_summed = CHECKSUM_NONE;
+       skb->csum_not_inet = 0;
 out:
        return ret;
 }
@@ -3445,6 +3428,74 @@ __be16 skb_network_protocol(struct sk_buff *skb, int 
*depth)
        return vlan_get_protocol_and_depth(skb, type, depth);
 }
 
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+       if (tx_path)
+               return skb->ip_summed != CHECKSUM_PARTIAL &&
+                      skb->ip_summed != CHECKSUM_UNNECESSARY;
+
+       return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ *     __skb_gso_segment - Perform segmentation on skb.
+ *     @skb: buffer to segment
+ *     @features: features for the output path (see dev->features)
+ *     @tx_path: whether it is called in TX path
+ *
+ *     This function segments the given skb and returns a list of segments.
+ *
+ *     It may return NULL if the skb requires no segmentation.  This is
+ *     only possible when GSO is used for verifying header integrity.
+ *
+ *     Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+                                 netdev_features_t features, bool tx_path)
+{
+       struct sk_buff *segs;
+
+       if (unlikely(skb_needs_check(skb, tx_path))) {
+               int err;
+
+               /* We're going to init ->check field in TCP or UDP header */
+               err = skb_cow_head(skb, 0);
+               if (err < 0)
+                       return ERR_PTR(err);
+       }
+
+       /* Only report GSO partial support if it will enable us to
+        * support segmentation on this frame without needing additional
+        * work.
+        */
+       if (features & NETIF_F_GSO_PARTIAL) {
+               netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
+               struct net_device *dev = skb->dev;
+
+               partial_features |= dev->features & dev->gso_partial_features;
+               if (!skb_gso_ok(skb, features | partial_features))
+                       features &= ~NETIF_F_GSO_PARTIAL;
+       }
+
+       BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
+                    sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
+       SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+       SKB_GSO_CB(skb)->encap_level = 0;
+
+       skb_reset_mac_header(skb);
+       skb_reset_mac_len(skb);
+
+       segs = skb_mac_gso_segment(skb, features);
+
+       if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && 
!IS_ERR(segs)))
+               skb_warn_bad_offload(skb);
+
+       return segs;
+}
+EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
@@ -3544,7 +3595,7 @@ static netdev_features_t gso_features_check(const struct 
sk_buff *skb,
        if (gso_segs > READ_ONCE(dev->gso_max_segs))
                return features & ~NETIF_F_GSO_MASK;
 
-       if (unlikely(skb->len >= netif_get_gso_max_size(dev, skb)))
+       if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
                return features & ~NETIF_F_GSO_MASK;
 
        if (!skb_shinfo(skb)->gso_type) {
@@ -3793,25 +3844,25 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
         * we add to pkt_len the headers size of all segments
         */
        if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
-               u16 gso_segs = shinfo->gso_segs;
                unsigned int hdr_len;
+               u16 gso_segs = shinfo->gso_segs;
 
                /* mac layer + network layer */
-               hdr_len = skb_transport_offset(skb);
+               hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
 
                /* + transport layer */
                if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) 
{
                        const struct tcphdr *th;
                        struct tcphdr _tcphdr;
 
-                       th = skb_header_pointer(skb, hdr_len,
+                       th = skb_header_pointer(skb, skb_transport_offset(skb),
                                                sizeof(_tcphdr), &_tcphdr);
                        if (likely(th))
                                hdr_len += __tcp_hdrlen(th);
                } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
                        struct udphdr _udphdr;
 
-                       if (skb_header_pointer(skb, hdr_len,
+                       if (skb_header_pointer(skb, skb_transport_offset(skb),
                                               sizeof(_udphdr), &_udphdr))
                                hdr_len += sizeof(struct udphdr);
                }
@@ -3987,6 +4038,50 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, 
struct sk_buff *skb)
 EXPORT_SYMBOL(dev_loopback_xmit);
 
 #ifdef CONFIG_NET_EGRESS
+static struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
+       struct tcf_result cl_res;
+
+       if (!miniq)
+               return skb;
+
+       /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
+       tc_skb_cb(skb)->mru = 0;
+       tc_skb_cb(skb)->post_ct = false;
+       mini_qdisc_bstats_cpu_update(miniq, skb);
+
+       switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, 
false)) {
+       case TC_ACT_OK:
+       case TC_ACT_RECLASSIFY:
+               skb->tc_index = TC_H_MIN(cl_res.classid);
+               break;
+       case TC_ACT_SHOT:
+               mini_qdisc_qstats_cpu_drop(miniq);
+               *ret = NET_XMIT_DROP;
+               kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+               return NULL;
+       case TC_ACT_STOLEN:
+       case TC_ACT_QUEUED:
+       case TC_ACT_TRAP:
+               *ret = NET_XMIT_SUCCESS;
+               consume_skb(skb);
+               return NULL;
+       case TC_ACT_REDIRECT:
+               /* No need to push/pop skb's mac_header here on egress! */
+               skb_do_redirect(skb);
+               *ret = NET_XMIT_SUCCESS;
+               return NULL;
+       default:
+               break;
+       }
+#endif /* CONFIG_NET_CLS_ACT */
+
+       return skb;
+}
+
 static struct netdev_queue *
 netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
 {
@@ -4007,182 +4102,6 @@ void netdev_xmit_skip_txqueue(bool skip)
 EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
 #endif /* CONFIG_NET_EGRESS */
 
-#ifdef CONFIG_NET_XGRESS
-static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
-{
-       int ret = TC_ACT_UNSPEC;
-#ifdef CONFIG_NET_CLS_ACT
-       struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
-       struct tcf_result res;
-
-       if (!miniq)
-               return ret;
-
-       tc_skb_cb(skb)->mru = 0;
-       tc_skb_cb(skb)->post_ct = false;
-
-       mini_qdisc_bstats_cpu_update(miniq, skb);
-       ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
-       /* Only tcf related quirks below. */
-       switch (ret) {
-       case TC_ACT_SHOT:
-               mini_qdisc_qstats_cpu_drop(miniq);
-               break;
-       case TC_ACT_OK:
-       case TC_ACT_RECLASSIFY:
-               skb->tc_index = TC_H_MIN(res.classid);
-               break;
-       }
-#endif /* CONFIG_NET_CLS_ACT */
-       return ret;
-}
-
-static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
-
-void tcx_inc(void)
-{
-       static_branch_inc(&tcx_needed_key);
-}
-
-void tcx_dec(void)
-{
-       static_branch_dec(&tcx_needed_key);
-}
-
-static __always_inline enum tcx_action_base
-tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
-       const bool needs_mac)
-{
-       const struct bpf_mprog_fp *fp;
-       const struct bpf_prog *prog;
-       int ret = TCX_NEXT;
-
-       if (needs_mac)
-               __skb_push(skb, skb->mac_len);
-       bpf_mprog_foreach_prog(entry, fp, prog) {
-               bpf_compute_data_pointers(skb);
-               ret = bpf_prog_run(prog, skb);
-               if (ret != TCX_NEXT)
-                       break;
-       }
-       if (needs_mac)
-               __skb_pull(skb, skb->mac_len);
-       return tcx_action_code(skb, ret);
-}
-
-static __always_inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
-                  struct net_device *orig_dev, bool *another)
-{
-       struct bpf_mprog_entry *entry = 
rcu_dereference_bh(skb->dev->tcx_ingress);
-       int sch_ret;
-
-       if (!entry)
-               return skb;
-       if (*pt_prev) {
-               *ret = deliver_skb(skb, *pt_prev, orig_dev);
-               *pt_prev = NULL;
-       }
-
-       qdisc_skb_cb(skb)->pkt_len = skb->len;
-       tcx_set_ingress(skb, true);
-
-       if (static_branch_unlikely(&tcx_needed_key)) {
-               sch_ret = tcx_run(entry, skb, true);
-               if (sch_ret != TC_ACT_UNSPEC)
-                       goto ingress_verdict;
-       }
-       sch_ret = tc_run(tcx_entry(entry), skb);
-ingress_verdict:
-       switch (sch_ret) {
-       case TC_ACT_REDIRECT:
-               /* skb_mac_header check was done by BPF, so we can safely
-                * push the L2 header back before redirecting to another
-                * netdev.
-                */
-               __skb_push(skb, skb->mac_len);
-               if (skb_do_redirect(skb) == -EAGAIN) {
-                       __skb_pull(skb, skb->mac_len);
-                       *another = true;
-                       break;
-               }
-               *ret = NET_RX_SUCCESS;
-               return NULL;
-       case TC_ACT_SHOT:
-               kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
-               *ret = NET_RX_DROP;
-               return NULL;
-       /* used by tc_run */
-       case TC_ACT_STOLEN:
-       case TC_ACT_QUEUED:
-       case TC_ACT_TRAP:
-               consume_skb(skb);
-               fallthrough;
-       case TC_ACT_CONSUMED:
-               *ret = NET_RX_SUCCESS;
-               return NULL;
-       }
-
-       return skb;
-}
-
-static __always_inline struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
-{
-       struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
-       int sch_ret;
-
-       if (!entry)
-               return skb;
-
-       /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
-        * already set by the caller.
-        */
-       if (static_branch_unlikely(&tcx_needed_key)) {
-               sch_ret = tcx_run(entry, skb, false);
-               if (sch_ret != TC_ACT_UNSPEC)
-                       goto egress_verdict;
-       }
-       sch_ret = tc_run(tcx_entry(entry), skb);
-egress_verdict:
-       switch (sch_ret) {
-       case TC_ACT_REDIRECT:
-               /* No need to push/pop skb's mac_header here on egress! */
-               skb_do_redirect(skb);
-               *ret = NET_XMIT_SUCCESS;
-               return NULL;
-       case TC_ACT_SHOT:
-               kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
-               *ret = NET_XMIT_DROP;
-               return NULL;
-       /* used by tc_run */
-       case TC_ACT_STOLEN:
-       case TC_ACT_QUEUED:
-       case TC_ACT_TRAP:
-               consume_skb(skb);
-               fallthrough;
-       case TC_ACT_CONSUMED:
-               *ret = NET_XMIT_SUCCESS;
-               return NULL;
-       }
-
-       return skb;
-}
-#else
-static __always_inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
-                  struct net_device *orig_dev, bool *another)
-{
-       return skb;
-}
-
-static __always_inline struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
-{
-       return skb;
-}
-#endif /* CONFIG_NET_XGRESS */
-
 #ifdef CONFIG_XPS
 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
                               struct xps_dev_maps *dev_maps, unsigned int tci)
@@ -4365,7 +4284,9 @@ int __dev_queue_xmit(struct sk_buff *skb, struct 
net_device *sb_dev)
        skb_update_prio(skb);
 
        qdisc_pkt_len_init(skb);
-       tcx_set_ingress(skb, false);
+#ifdef CONFIG_NET_CLS_ACT
+       skb->tc_at_ingress = 0;
+#endif
 #ifdef CONFIG_NET_EGRESS
        if (static_branch_unlikely(&egress_needed_key)) {
                if (nf_hook_egress_active()) {
@@ -4552,12 +4473,7 @@ static inline void ____napi_schedule(struct softnet_data 
*sd,
        }
 
        list_add_tail(&napi->poll_list, &sd->poll_list);
-       WRITE_ONCE(napi->list_owner, smp_processor_id());
-       /* If not called from net_rx_action()
-        * we have to raise NET_RX_SOFTIRQ.
-        */
-       if (!sd->in_net_rx_action)
-               raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
 }
 
 #ifdef CONFIG_RPS
@@ -4771,17 +4687,21 @@ static void rps_trigger_softirq(void *data)
 
 #endif /* CONFIG_RPS */
 
+/* Called from hardirq (IPI) context */
+static void trigger_rx_softirq(void *data)
+{
+       struct softnet_data *sd = data;
+
+       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+       smp_store_release(&sd->defer_ipi_scheduled, 0);
+}
+
 /*
- * After we queued a packet into sd->input_pkt_queue,
- * we need to make sure this queue is serviced soon.
- *
- * - If this is another cpu queue, link it to our rps_ipi_list,
- *   and make sure we will process rps_ipi_list from net_rx_action().
- *
- * - If this is our own queue, NAPI schedule our backlog.
- *   Note that this also raises NET_RX_SOFTIRQ.
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
  */
-static void napi_schedule_rps(struct softnet_data *sd)
+static int napi_schedule_rps(struct softnet_data *sd)
 {
        struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
 
@@ -4790,15 +4710,12 @@ static void napi_schedule_rps(struct softnet_data *sd)
                sd->rps_ipi_next = mysd->rps_ipi_list;
                mysd->rps_ipi_list = sd;
 
-               /* If not called from net_rx_action() or napi_threaded_poll()
-                * we have to raise NET_RX_SOFTIRQ.
-                */
-               if (!mysd->in_net_rx_action && !mysd->in_napi_threaded_poll)
-                       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
-               return;
+               __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+               return 1;
        }
 #endif /* CONFIG_RPS */
        __napi_schedule_irqoff(&mysd->backlog);
+       return 0;
 }
 
 #ifdef CONFIG_NET_FLOW_LIMIT
@@ -5218,17 +5135,16 @@ static __latent_entropy void net_tx_action(struct 
softirq_action *h)
                        clist = clist->next;
 
                        WARN_ON(refcount_read(&skb->users));
-                       if (likely(get_kfree_skb_cb(skb)->reason == 
SKB_CONSUMED))
-                               trace_consume_skb(skb, net_tx_action);
+                       if (likely(get_kfree_skb_cb(skb)->reason == 
SKB_REASON_CONSUMED))
+                               trace_consume_skb(skb);
                        else
                                trace_kfree_skb(skb, net_tx_action,
-                                               get_kfree_skb_cb(skb)->reason);
+                                               SKB_DROP_REASON_NOT_SPECIFIED);
 
                        if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
                                __kfree_skb(skb);
                        else
-                               __napi_kfree_skb(skb,
-                                                get_kfree_skb_cb(skb)->reason);
+                               __kfree_skb_defer(skb);
                }
        }
 
@@ -5290,6 +5206,72 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
+static inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+                  struct net_device *orig_dev, bool *another)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
+       struct tcf_result cl_res;
+
+       /* If there's at least one ingress present somewhere (so
+        * we get here via enabled static key), remaining devices
+        * that are not configured with an ingress qdisc will bail
+        * out here.
+        */
+       if (!miniq)
+               return skb;
+
+       if (*pt_prev) {
+               *ret = deliver_skb(skb, *pt_prev, orig_dev);
+               *pt_prev = NULL;
+       }
+
+       qdisc_skb_cb(skb)->pkt_len = skb->len;
+       tc_skb_cb(skb)->mru = 0;
+       tc_skb_cb(skb)->post_ct = false;
+       skb->tc_at_ingress = 1;
+       mini_qdisc_bstats_cpu_update(miniq, skb);
+
+       switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, 
false)) {
+       case TC_ACT_OK:
+       case TC_ACT_RECLASSIFY:
+               skb->tc_index = TC_H_MIN(cl_res.classid);
+               break;
+       case TC_ACT_SHOT:
+               mini_qdisc_qstats_cpu_drop(miniq);
+               kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+               *ret = NET_RX_DROP;
+               return NULL;
+       case TC_ACT_STOLEN:
+       case TC_ACT_QUEUED:
+       case TC_ACT_TRAP:
+               consume_skb(skb);
+               *ret = NET_RX_SUCCESS;
+               return NULL;
+       case TC_ACT_REDIRECT:
+               /* skb_mac_header check was done by cls/act_bpf, so
+                * we can safely push the L2 header back before
+                * redirecting to another netdev
+                */
+               __skb_push(skb, skb->mac_len);
+               if (skb_do_redirect(skb) == -EAGAIN) {
+                       __skb_pull(skb, skb->mac_len);
+                       *another = true;
+                       break;
+               }
+               *ret = NET_RX_SUCCESS;
+               return NULL;
+       case TC_ACT_CONSUMED:
+               *ret = NET_RX_SUCCESS;
+               return NULL;
+       default:
+               break;
+       }
+#endif /* CONFIG_NET_CLS_ACT */
+       return skb;
+}
+
 /**
  *     netdev_is_rx_handler_busy - check if receive handler is registered
  *     @dev: device to check
@@ -6109,9 +6091,10 @@ EXPORT_SYMBOL(__napi_schedule);
  */
 bool napi_schedule_prep(struct napi_struct *n)
 {
-       unsigned long new, val = READ_ONCE(n->state);
+       unsigned long val, new;
 
        do {
+               val = READ_ONCE(n->state);
                if (unlikely(val & NAPIF_STATE_DISABLE))
                        return false;
                new = val | NAPIF_STATE_SCHED;
@@ -6124,7 +6107,7 @@ bool napi_schedule_prep(struct napi_struct *n)
                 */
                new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
                                                   NAPIF_STATE_MISSED;
-       } while (!try_cmpxchg(&n->state, &val, new));
+       } while (cmpxchg(&n->state, val, new) != val);
 
        return !(val & NAPIF_STATE_SCHED);
 }
@@ -6191,10 +6174,10 @@ bool napi_complete_done(struct napi_struct *n, int 
work_done)
                list_del_init(&n->poll_list);
                local_irq_restore(flags);
        }
-       WRITE_ONCE(n->list_owner, -1);
 
-       val = READ_ONCE(n->state);
        do {
+               val = READ_ONCE(n->state);
+
                WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
 
                new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
@@ -6207,7 +6190,7 @@ bool napi_complete_done(struct napi_struct *n, int 
work_done)
                 */
                new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
                                                    NAPIF_STATE_SCHED;
-       } while (!try_cmpxchg(&n->state, &val, new));
+       } while (cmpxchg(&n->state, val, new) != val);
 
        if (unlikely(val & NAPIF_STATE_MISSED)) {
                __napi_schedule(n);
@@ -6318,8 +6301,7 @@ void napi_busy_loop(unsigned int napi_id,
        if (!napi)
                goto out;
 
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-               preempt_disable();
+       preempt_disable();
        for (;;) {
                int work = 0;
 
@@ -6361,8 +6343,7 @@ void napi_busy_loop(unsigned int napi_id,
                if (unlikely(need_resched())) {
                        if (napi_poll)
                                busy_poll_stop(napi, have_poll_lock, 
prefer_busy_poll, budget);
-                       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-                               preempt_enable();
+                       preempt_enable();
                        rcu_read_unlock();
                        cond_resched();
                        if (loop_end(loop_end_arg, start_time))
@@ -6373,8 +6354,7 @@ void napi_busy_loop(unsigned int napi_id,
        }
        if (napi_poll)
                busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-               preempt_enable();
+       preempt_enable();
 out:
        rcu_read_unlock();
 }
@@ -6476,8 +6456,12 @@ int dev_set_threaded(struct net_device *dev, bool 
threaded)
         * softirq mode will happen in the next round of napi_schedule().
         * This should not cause hiccups/stalls to the live traffic.
         */
-       list_for_each_entry(napi, &dev->napi_list, dev_list)
-               assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+       list_for_each_entry(napi, &dev->napi_list, dev_list) {
+               if (threaded)
+                       set_bit(NAPI_STATE_THREADED, &napi->state);
+               else
+                       clear_bit(NAPI_STATE_THREADED, &napi->state);
+       }
 
        return err;
 }
@@ -6506,7 +6490,6 @@ void netif_napi_add_weight(struct net_device *dev, struct 
napi_struct *napi,
 #ifdef CONFIG_NETPOLL
        napi->poll_owner = -1;
 #endif
-       napi->list_owner = -1;
        set_bit(NAPI_STATE_SCHED, &napi->state);
        set_bit(NAPI_STATE_NPSVC, &napi->state);
        list_add_rcu(&napi->dev_list, &dev->napi_list);
@@ -6528,16 +6511,19 @@ void napi_disable(struct napi_struct *n)
        might_sleep();
        set_bit(NAPI_STATE_DISABLE, &n->state);
 
-       val = READ_ONCE(n->state);
-       do {
-               while (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
+       for ( ; ; ) {
+               val = READ_ONCE(n->state);
+               if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
                        usleep_range(20, 200);
-                       val = READ_ONCE(n->state);
+                       continue;
                }
 
                new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
                new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
-       } while (!try_cmpxchg(&n->state, &val, new));
+
+               if (cmpxchg(&n->state, val, new) == val)
+                       break;
+       }
 
        hrtimer_cancel(&n->timer);
 
@@ -6554,15 +6540,16 @@ EXPORT_SYMBOL(napi_disable);
  */
 void napi_enable(struct napi_struct *n)
 {
-       unsigned long new, val = READ_ONCE(n->state);
+       unsigned long val, new;
 
        do {
+               val = READ_ONCE(n->state);
                BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
 
                new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
                if (n->dev->threaded && n->thread)
                        new |= NAPIF_STATE_THREADED;
-       } while (!try_cmpxchg(&n->state, &val, new));
+       } while (cmpxchg(&n->state, val, new) != val);
 }
 EXPORT_SYMBOL(napi_enable);
 
@@ -6718,57 +6705,9 @@ static int napi_thread_wait(struct napi_struct *napi)
        return -1;
 }
 
-static void skb_defer_free_flush(struct softnet_data *sd)
-{
-       struct sk_buff *skb, *next;
-
-       /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
-       if (!READ_ONCE(sd->defer_list))
-               return;
-
-       spin_lock(&sd->defer_lock);
-       skb = sd->defer_list;
-       sd->defer_list = NULL;
-       sd->defer_count = 0;
-       spin_unlock(&sd->defer_lock);
-
-       while (skb != NULL) {
-               next = skb->next;
-               napi_consume_skb(skb, 1);
-               skb = next;
-       }
-}
-
-#ifndef CONFIG_PREEMPT_RT
-
-/* Called from hardirq (IPI) context */
-static void trigger_rx_softirq(void *data)
-{
-       struct softnet_data *sd = data;
-
-       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
-       smp_store_release(&sd->defer_ipi_scheduled, 0);
-}
-
-#else
-
-static void trigger_rx_softirq(struct work_struct *defer_work)
-{
-       struct softnet_data *sd;
-
-       sd = container_of(defer_work, struct softnet_data, defer_work);
-       smp_store_release(&sd->defer_ipi_scheduled, 0);
-       local_bh_disable();
-       skb_defer_free_flush(sd);
-       local_bh_enable();
-}
-
-#endif
-
 static int napi_threaded_poll(void *data)
 {
        struct napi_struct *napi = data;
-       struct softnet_data *sd;
        void *have;
 
        while (!napi_thread_wait(napi)) {
@@ -6778,21 +6717,11 @@ static int napi_threaded_poll(void *data)
                        bool repoll = false;
 
                        local_bh_disable();
-                       sd = this_cpu_ptr(&softnet_data);
-                       sd->in_napi_threaded_poll = true;
 
                        have = netpoll_poll_lock(napi);
                        __napi_poll(napi, &repoll);
                        netpoll_poll_unlock(have);
 
-                       sd->in_napi_threaded_poll = false;
-                       barrier();
-
-                       if (sd_has_rps_ipi_waiting(sd)) {
-                               local_irq_disable();
-                               net_rps_action_and_irq_enable(sd);
-                       }
-                       skb_defer_free_flush(sd);
                        local_bh_enable();
 
                        if (!repoll)
@@ -6805,6 +6734,28 @@ static int napi_threaded_poll(void *data)
        return 0;
 }
 
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+       struct sk_buff *skb, *next;
+       unsigned long flags;
+
+       /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+       if (!READ_ONCE(sd->defer_list))
+               return;
+
+       spin_lock_irqsave(&sd->defer_lock, flags);
+       skb = sd->defer_list;
+       sd->defer_list = NULL;
+       sd->defer_count = 0;
+       spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+       while (skb != NULL) {
+               next = skb->next;
+               napi_consume_skb(skb, 1);
+               skb = next;
+       }
+}
+
 static __latent_entropy void net_rx_action(struct softirq_action *h)
 {
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -6814,8 +6765,6 @@ static __latent_entropy void net_rx_action(struct 
softirq_action *h)
        LIST_HEAD(list);
        LIST_HEAD(repoll);
 
-start:
-       sd->in_net_rx_action = true;
        local_irq_disable();
        list_splice_init(&sd->poll_list, &list);
        local_irq_enable();
@@ -6826,18 +6775,8 @@ static __latent_entropy void net_rx_action(struct 
softirq_action *h)
                skb_defer_free_flush(sd);
 
                if (list_empty(&list)) {
-                       if (list_empty(&repoll)) {
-                               sd->in_net_rx_action = false;
-                               barrier();
-                               /* We need to check if ____napi_schedule()
-                                * had refilled poll_list while
-                                * sd->in_net_rx_action was true.
-                                */
-                               if (!list_empty(&sd->poll_list))
-                                       goto start;
-                               if (!sd_has_rps_ipi_waiting(sd))
-                                       goto end;
-                       }
+                       if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
+                               goto end;
                        break;
                }
 
@@ -6862,8 +6801,6 @@ static __latent_entropy void net_rx_action(struct 
softirq_action *h)
        list_splice(&list, &sd->poll_list);
        if (!list_empty(&sd->poll_list))
                __raise_softirq_irqoff(NET_RX_SOFTIRQ);
-       else
-               sd->in_net_rx_action = false;
 
        net_rps_action_and_irq_enable(sd);
 end:;
@@ -8503,8 +8440,9 @@ static int __dev_set_promiscuity(struct net_device *dev, 
int inc, bool notify)
                }
        }
        if (dev->flags != old_flags) {
-               netdev_info(dev, "%s promiscuous mode\n",
-                           dev->flags & IFF_PROMISC ? "entered" : "left");
+               pr_info("device %s %s promiscuous mode\n",
+                       dev->name,
+                       dev->flags & IFF_PROMISC ? "entered" : "left");
                if (audit_enabled) {
                        current_uid_gid(&uid, &gid);
                        audit_log(audit_context(), GFP_ATOMIC,
@@ -8521,7 +8459,7 @@ static int __dev_set_promiscuity(struct net_device *dev, 
int inc, bool notify)
                dev_change_rx_flags(dev, IFF_PROMISC);
        }
        if (notify)
-               __dev_notify_flags(dev, old_flags, IFF_PROMISC, 0, NULL);
+               __dev_notify_flags(dev, old_flags, IFF_PROMISC);
        return 0;
 }
 
@@ -8572,13 +8510,11 @@ static int __dev_set_allmulti(struct net_device *dev, 
int inc, bool notify)
                }
        }
        if (dev->flags ^ old_flags) {
-               netdev_info(dev, "%s allmulticast mode\n",
-                           dev->flags & IFF_ALLMULTI ? "entered" : "left");
                dev_change_rx_flags(dev, IFF_ALLMULTI);
                dev_set_rx_mode(dev);
                if (notify)
                        __dev_notify_flags(dev, old_flags,
-                                          dev->gflags ^ old_gflags, 0, NULL);
+                                          dev->gflags ^ old_gflags);
        }
        return 0;
 }
@@ -8741,13 +8677,12 @@ int __dev_change_flags(struct net_device *dev, unsigned 
int flags,
 }
 
 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
-                       unsigned int gchanges, u32 portid,
-                       const struct nlmsghdr *nlh)
+                       unsigned int gchanges)
 {
        unsigned int changes = dev->flags ^ old_flags;
 
        if (gchanges)
-               rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC, portid, 
nlh);
+               rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
 
        if (changes & IFF_UP) {
                if (dev->flags & IFF_UP)
@@ -8789,7 +8724,7 @@ int dev_change_flags(struct net_device *dev, unsigned int 
flags,
                return ret;
 
        changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
-       __dev_notify_flags(dev, old_flags, changes, 0, NULL);
+       __dev_notify_flags(dev, old_flags, changes);
        return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
@@ -8969,11 +8904,9 @@ int dev_set_mac_address(struct net_device *dev, struct 
sockaddr *sa,
        err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
        if (err)
                return err;
-       if (memcmp(dev->dev_addr, sa->sa_data, dev->addr_len)) {
-               err = ops->ndo_set_mac_address(dev, sa);
-               if (err)
-                       return err;
-       }
+       err = ops->ndo_set_mac_address(dev, sa);
+       if (err)
+               return err;
        dev->addr_assign_type = NET_ADDR_SET;
        call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
        add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -9411,16 +9344,8 @@ static int dev_xdp_attach(struct net_device *dev, struct 
netlink_ext_ack *extack
                        NL_SET_ERR_MSG(extack, "Native and generic XDP can't be 
active at the same time");
                        return -EEXIST;
                }
-               if (!offload && bpf_prog_is_offloaded(new_prog->aux)) {
-                       NL_SET_ERR_MSG(extack, "Using offloaded program without 
HW_MODE flag is not supported");
-                       return -EINVAL;
-               }
-               if (bpf_prog_is_dev_bound(new_prog->aux) && 
!bpf_offload_dev_match(new_prog, dev)) {
-                       NL_SET_ERR_MSG(extack, "Program bound to different 
device");
-                       return -EINVAL;
-               }
-               if (bpf_prog_is_dev_bound(new_prog->aux) && mode == 
XDP_MODE_SKB) {
-                       NL_SET_ERR_MSG(extack, "Can't attach device-bound 
programs in generic mode");
+               if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
+                       NL_SET_ERR_MSG(extack, "Using device-bound program 
without HW_MODE flag is not supported");
                        return -EINVAL;
                }
                if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
@@ -9602,7 +9527,6 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, 
struct bpf_prog *prog)
 {
        struct net *net = current->nsproxy->net_ns;
        struct bpf_link_primer link_primer;
-       struct netlink_ext_ack extack = {};
        struct bpf_xdp_link *link;
        struct net_device *dev;
        int err, fd;
@@ -9630,13 +9554,12 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, 
struct bpf_prog *prog)
                goto unlock;
        }
 
-       err = dev_xdp_attach_link(dev, &extack, link);
+       err = dev_xdp_attach_link(dev, NULL, link);
        rtnl_unlock();
 
        if (err) {
                link->dev = NULL;
                bpf_link_cleanup(&link_primer);
-               trace_bpf_xdp_link_attach_failed(extack._msg);
                goto out_put_dev;
        }
 
@@ -9700,40 +9623,23 @@ int dev_change_xdp_fd(struct net_device *dev, struct 
netlink_ext_ack *extack,
 }
 
 /**
- * dev_index_reserve() - allocate an ifindex in a namespace
- * @net: the applicable net namespace
- * @ifindex: requested ifindex, pass %0 to get one allocated
+ *     dev_new_index   -       allocate an ifindex
+ *     @net: the applicable net namespace
  *
- * Allocate a ifindex for a new device. Caller must either use the ifindex
- * to store the device (via list_netdevice()) or call dev_index_release()
- * to give the index up.
- *
- * Return: a suitable unique value for a new device interface number or -errno.
+ *     Returns a suitable unique value for a new device interface
+ *     number.  The caller must hold the rtnl semaphore or the
+ *     dev_base_lock to be sure it remains unique.
  */
-static int dev_index_reserve(struct net *net, u32 ifindex)
+static int dev_new_index(struct net *net)
 {
-       int err;
+       int ifindex = net->ifindex;
 
-       if (ifindex > INT_MAX) {
-               DEBUG_NET_WARN_ON_ONCE(1);
-               return -EINVAL;
+       for (;;) {
+               if (++ifindex <= 0)
+                       ifindex = 1;
+               if (!__dev_get_by_index(net, ifindex))
+                       return net->ifindex = ifindex;
        }
-
-       if (!ifindex)
-               err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
-                                     xa_limit_31b, &net->ifindex, GFP_KERNEL);
-       else
-               err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
-       if (err < 0)
-               return err;
-
-       return ifindex;
-}
-
-static void dev_index_release(struct net *net, int ifindex)
-{
-       /* Expect only unused indexes, unlist_netdevice() removes the used */
-       WARN_ON(xa_erase(&net->dev_by_index, ifindex));
 }
 
 /* Delayed registration/unregisteration */
@@ -10255,10 +10161,11 @@ int register_netdevice(struct net_device *dev)
        if (ret)
                goto err_uninit;
 
-       ret = dev_index_reserve(net, dev->ifindex);
-       if (ret < 0)
+       ret = -EBUSY;
+       if (!dev->ifindex)
+               dev->ifindex = dev_new_index(net);
+       else if (__dev_get_by_index(net, dev->ifindex))
                goto err_free_pcpu;
-       dev->ifindex = ret;
 
        /* Transfer changeable features to wanted_features and enable
         * software offloads (GSO and GRO).
@@ -10305,14 +10212,14 @@ int register_netdevice(struct net_device *dev)
        ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
        ret = notifier_to_errno(ret);
        if (ret)
-               goto err_ifindex_release;
+               goto err_free_pcpu;
 
        ret = netdev_register_kobject(dev);
        write_lock(&dev_base_lock);
        dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
        write_unlock(&dev_base_lock);
        if (ret)
-               goto err_uninit_notify;
+               goto err_free_pcpu;
 
        __netdev_update_features(dev);
 
@@ -10354,15 +10261,11 @@ int register_netdevice(struct net_device *dev)
         */
        if (!dev->rtnl_link_ops ||
            dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-               rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
+               rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
 out:
        return ret;
 
-err_uninit_notify:
-       call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
-err_ifindex_release:
-       dev_index_release(net, dev->ifindex);
 err_free_pcpu:
        netdev_do_free_pcpu_stats(dev);
 err_uninit:
@@ -10730,12 +10633,12 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 
*s,
 
                stats = per_cpu_ptr(netstats, cpu);
                do {
-                       start = u64_stats_fetch_begin(&stats->syncp);
+                       start = u64_stats_fetch_begin_irq(&stats->syncp);
                        rx_packets = u64_stats_read(&stats->rx_packets);
                        rx_bytes   = u64_stats_read(&stats->rx_bytes);
                        tx_packets = u64_stats_read(&stats->tx_packets);
                        tx_bytes   = u64_stats_read(&stats->tx_bytes);
-               } while (u64_stats_fetch_retry(&stats->syncp, start));
+               } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
 
                s->rx_packets += rx_packets;
                s->rx_bytes   += rx_bytes;
@@ -10788,24 +10691,6 @@ void netdev_set_default_ethtool_ops(struct net_device 
*dev,
 }
 EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
 
-/**
- * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default
- * @dev: netdev to enable the IRQ coalescing on
- *
- * Sets a conservative default for SW IRQ coalescing. Users can use
- * sysfs attributes to override the default values.
- */
-void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
-{
-       WARN_ON(dev->reg_state == NETREG_REGISTERED);
-
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
-               dev->gro_flush_timeout = 20000;
-               dev->napi_defer_hard_irqs = 1;
-       }
-}
-EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
-
 void netdev_freemem(struct net_device *dev)
 {
        char *addr = (char *)dev - dev->padded;
@@ -10863,7 +10748,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, 
const char *name,
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
 
-       ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
+       ref_tracker_dir_init(&dev->refcnt_tracker, 128);
 #ifdef CONFIG_PCPU_DEV_REFCNT
        dev->pcpu_refcnt = alloc_percpu(int);
        if (!dev->pcpu_refcnt)
@@ -10882,11 +10767,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, 
const char *name,
        dev_net_set(dev, &init_net);
 
        dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
-       dev->xdp_zc_max_segs = 1;
        dev->gso_max_segs = GSO_MAX_SEGS;
        dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
-       dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
-       dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
        dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
        dev->tso_max_segs = TSO_MAX_SEGS;
        dev->upper_level = 1;
@@ -11054,8 +10936,14 @@ void unregister_netdevice_queue(struct net_device 
*dev, struct list_head *head)
 }
 EXPORT_SYMBOL(unregister_netdevice_queue);
 
-void unregister_netdevice_many_notify(struct list_head *head,
-                                     u32 portid, const struct nlmsghdr *nlh)
+/**
+ *     unregister_netdevice_many - unregister many devices
+ *     @head: list of devices
+ *
+ *  Note: As most callers use a stack allocated list_head,
+ *  we force a list_del() to make sure stack wont be corrupted later.
+ */
+void unregister_netdevice_many(struct list_head *head)
 {
        struct net_device *dev, *tmp;
        LIST_HEAD(close_head);
@@ -11104,9 +10992,8 @@ void unregister_netdevice_many_notify(struct list_head 
*head,
 
                /* Shutdown queueing discipline. */
                dev_shutdown(dev);
-               dev_tcx_uninstall(dev);
+
                dev_xdp_uninstall(dev);
-               bpf_dev_bound_netdev_unregister(dev);
 
                netdev_offload_xstats_disable_all(dev);
 
@@ -11118,8 +11005,7 @@ void unregister_netdevice_many_notify(struct list_head 
*head,
                if (!dev->rtnl_link_ops ||
                    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
                        skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
-                                                    GFP_KERNEL, NULL, 0,
-                                                    portid, nlh);
+                                                    GFP_KERNEL, NULL, 0);
 
                /*
                 *      Flush the unicast and multicast chains
@@ -11130,13 +11016,11 @@ void unregister_netdevice_many_notify(struct 
list_head *head,
                netdev_name_node_alt_flush(dev);
                netdev_name_node_free(dev->name_node);
 
-               call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
-
                if (dev->netdev_ops->ndo_uninit)
                        dev->netdev_ops->ndo_uninit(dev);
 
                if (skb)
-                       rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
+                       rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
 
                /* Notifier chain MUST detach us all upper devices. */
                WARN_ON(netdev_has_any_upper_dev(dev));
@@ -11159,18 +11043,6 @@ void unregister_netdevice_many_notify(struct list_head 
*head,
 
        list_del(head);
 }
-
-/**
- *     unregister_netdevice_many - unregister many devices
- *     @head: list of devices
- *
- *  Note: As most callers use a stack allocated list_head,
- *  we force a list_del() to make sure stack wont be corrupted later.
- */
-void unregister_netdevice_many(struct list_head *head)
-{
-       unregister_netdevice_many_notify(head, 0, NULL);
-}
 EXPORT_SYMBOL(unregister_netdevice_many);
 
 /**
@@ -11251,19 +11123,9 @@ int __dev_change_net_namespace(struct net_device *dev, 
struct net *net,
                        goto out;
 
        /* Check that new_ifindex isn't used yet. */
-       if (new_ifindex) {
-               err = dev_index_reserve(net, new_ifindex);
-               if (err < 0)
-                       goto out;
-       } else {
-               /* If there is an ifindex conflict assign a new one */
-               err = dev_index_reserve(net, dev->ifindex);
-               if (err == -EBUSY)
-                       err = dev_index_reserve(net, 0);
-               if (err < 0)
-                       goto out;
-               new_ifindex = err;
-       }
+       err = -EBUSY;
+       if (new_ifindex && __dev_get_by_index(net, new_ifindex))
+               goto out;
 
        /*
         * And now a mini version of register_netdevice unregister_netdevice.
@@ -11291,6 +11153,13 @@ int __dev_change_net_namespace(struct net_device *dev, 
struct net *net,
        rcu_barrier();
 
        new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
+       /* If there is an ifindex conflict assign a new one */
+       if (!new_ifindex) {
+               if (__dev_get_by_index(net, dev->ifindex))
+                       new_ifindex = dev_new_index(net);
+               else
+                       new_ifindex = dev->ifindex;
+       }
 
        rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
                            new_ifindex);
@@ -11339,7 +11208,7 @@ int __dev_change_net_namespace(struct net_device *dev, 
struct net *net,
         *      Prevent userspace races by waiting until the network
         *      device is fully setup before sending notifications.
         */
-       rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
+       rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
 
        synchronize_net();
        err = 0;
@@ -11471,8 +11340,6 @@ static int __net_init netdev_init(struct net *net)
        if (net->dev_index_head == NULL)
                goto err_idx;
 
-       xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
-
        RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
 
        return 0;
@@ -11570,7 +11437,6 @@ static void __net_exit netdev_exit(struct net *net)
 {
        kfree(net->dev_name_head);
        kfree(net->dev_index_head);
-       xa_destroy(&net->dev_by_index);
        if (net != &init_net)
                WARN_ON_ONCE(!list_empty(&net->dev_base_head));
 }
@@ -11706,11 +11572,7 @@ static int __init net_dev_init(void)
                INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
                sd->cpu = i;
 #endif
-#ifndef CONFIG_PREEMPT_RT
                INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
-#else
-               INIT_WORK(&sd->defer_work, trigger_rx_softirq);
-#endif
                spin_lock_init(&sd->defer_lock);
 
                init_gro_hash(&sd->backlog);

---
base-commit: 043f841768688ddc914578849f2b9cc11757a816
change-id: 20260210-v6-1-octeon-3e6680539e84

Best regards,
-- 
Kevin Hao <[email protected]>

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#16296): 
https://lists.yoctoproject.org/g/linux-yocto/message/16296
Mute This Topic: https://lists.yoctoproject.org/mt/117734674/21656
Group Owner: [email protected]
Unsubscribe: https://lists.yoctoproject.org/g/linux-yocto/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

[linux-yocto] [PATCH linux-yocto v6.1] net: dev: Reset dev.c to match v6.1/standard/base branch

Reply via email to